├── .gitignore
├── .merlin
├── B0.ml
├── BRZO
├── CHANGES.md
├── DEVEL.md
├── LICENSE.md
├── README.md
├── _tags
├── doc
    ├── cookbook.mld
    ├── index.mld
    ├── notes.mld
    └── tutorial.mld
├── myocamlbuild.ml
├── opam
├── pkg
    ├── META
    └── pkg.ml
├── src
    ├── blake3
    │   ├── bytesrw_blake3.ml
    │   ├── bytesrw_blake3.mli
    │   ├── bytesrw_blake3.mllib
    │   ├── bytesrw_blake3_stubs.c
    │   └── libbytesrw_blake3_stubs.clib
    ├── bytesrw.ml
    ├── bytesrw.mli
    ├── bytesrw.mllib
    ├── bytesrw_fmt.ml
    ├── bytesrw_fmt.mli
    ├── bytesrw_hex.ml
    ├── bytesrw_hex.mli
    ├── bytesrw_utf.ml
    ├── bytesrw_utf.mli
    ├── md
    │   ├── bytesrw_md.ml
    │   ├── bytesrw_md.mli
    │   ├── bytesrw_md.mllib
    │   ├── bytesrw_md_stubs.c
    │   └── libbytesrw_md_stubs.clib
    ├── unix
    │   ├── bytesrw_unix.ml
    │   ├── bytesrw_unix.mli
    │   └── bytesrw_unix.mllib
    ├── xxhash
    │   ├── bytesrw_xxhash.ml
    │   ├── bytesrw_xxhash.mli
    │   ├── bytesrw_xxhash.mllib
    │   ├── bytesrw_xxhash_stubs.c
    │   └── libbytesrw_xxhash_stubs.clib
    ├── zlib
    │   ├── bytesrw_zlib.ml
    │   ├── bytesrw_zlib.mli
    │   ├── bytesrw_zlib.mllib
    │   ├── bytesrw_zlib_stubs.c
    │   └── libbytesrw_zlib_stubs.clib
    └── zstd
    │   ├── bytesrw_zstd.ml
    │   ├── bytesrw_zstd.mli
    │   ├── bytesrw_zstd.mllib
    │   ├── bytesrw_zstd_stubs.c
    │   └── libbytesrw_zstd_stubs.clib
└── test
    ├── blake3tap.ml
    ├── cookbook.ml
    ├── gziptrip.ml
    ├── quickstart.ml
    ├── test_blake3.ml
    ├── test_bytesrw.ml
    ├── test_md.ml
    ├── test_utf.ml
    ├── test_xxhash.ml
    ├── test_zlib.ml
    ├── test_zstd.ml
    ├── utf8codec.ml
    ├── xxh3tap.ml
    └── zstdtrip.ml


/.gitignore:
--------------------------------------------------------------------------------
1 | _b0
2 | _build
3 | tmp
4 | *.install


--------------------------------------------------------------------------------
/.merlin:
--------------------------------------------------------------------------------
1 | PKG b0.kit
2 | S src/**
3 | S test/**
4 | B _b0/**


--------------------------------------------------------------------------------
/B0.ml:
--------------------------------------------------------------------------------
  1 | open B0_kit.V000
  2 | 
  3 | (* Library names *)
  4 | 
  5 | let bytesrw = B0_ocaml.libname "bytesrw"
  6 | let bytesrw_blake3 = B0_ocaml.libname "bytesrw.blake3"
  7 | let bytesrw_md = B0_ocaml.libname "bytesrw.md"
  8 | let bytesrw_unix = B0_ocaml.libname "bytesrw.unix"
  9 | let bytesrw_xxhash = B0_ocaml.libname "bytesrw.xxhash"
 10 | let bytesrw_zlib = B0_ocaml.libname "bytesrw.zlib"
 11 | let bytesrw_zstd = B0_ocaml.libname "bytesrw.zstd"
 12 | 
 13 | let b0_std = B0_ocaml.libname "b0.std"
 14 | let unix = B0_ocaml.libname "unix"
 15 | let cmdliner = B0_ocaml.libname "cmdliner"
 16 | 
 17 | (* Libraries *)
 18 | 
 19 | let bytesrw_lib =
 20 |   let srcs = [ `Dir ~/"src" ] in
 21 |   B0_ocaml.lib bytesrw ~srcs
 22 | 
 23 | let bytesrw_blake3_lib =
 24 |   let doc = "BLAKE3 hashes" in
 25 |   let srcs = [ `Dir ~/"src/blake3" ] in
 26 |   let c_requires = Cmd.arg "-lblake3" in
 27 |   let requires = [bytesrw] and exports = [bytesrw] in
 28 |   B0_ocaml.lib bytesrw_blake3 ~srcs ~requires ~exports ~c_requires ~doc
 29 | 
 30 | let bytesrw_md_lib =
 31 |   let doc = "SHA{1,2} hashes" in
 32 |   let srcs = [ `Dir ~/"src/md" ] in
 33 |   let c_requires = Cmd.arg "-lmd" in
 34 |   let requires = [bytesrw] and exports = [bytesrw] in
 35 |   B0_ocaml.lib bytesrw_md ~srcs ~requires ~exports ~c_requires ~doc
 36 | 
 37 | let bytesrw_unix_lib =
 38 |   let srcs = [ `Dir ~/"src/unix" ] in
 39 |   let requires = [bytesrw; unix] and exports = [bytesrw] in
 40 |   B0_ocaml.lib bytesrw_unix ~srcs ~requires ~exports
 41 | 
 42 | let bytesrw_xxhash_lib =
 43 |   let doc = "XXH hashes" in
 44 |   let srcs = [ `Dir ~/"src/xxhash" ] in
 45 |   let c_requires = Cmd.arg "-lxxhash" in
 46 |   let requires = [bytesrw] and exports = [bytesrw]in
 47 |   B0_ocaml.lib bytesrw_xxhash ~srcs ~requires ~exports ~c_requires ~doc
 48 | 
 49 | let bytesrw_zlib_lib =
 50 |   let doc = "deflate, zlib and gzip streams" in
 51 |   let srcs = [ `Dir ~/"src/zlib" ] in
 52 |   let c_requires = Cmd.arg "-lz" in
 53 |   let requires = [bytesrw] and exports = [bytesrw] in
 54 |   B0_ocaml.lib bytesrw_zlib ~srcs ~requires ~exports ~c_requires ~doc
 55 | 
 56 | let bytesrw_zstd_lib =
 57 |   let doc = "zstd streams" in
 58 |   let srcs = [ `Dir ~/"src/zstd" ] in
 59 |   let c_requires = Cmd.arg "-lzstd" in
 60 |   let requires = [bytesrw] and exports = [bytesrw] in
 61 |   B0_ocaml.lib bytesrw_zstd ~srcs ~requires ~exports ~c_requires ~doc
 62 | 
 63 | (* Tests *)
 64 | 
 65 | let test ?(requires = []) =
 66 |   B0_ocaml.test ~requires:(b0_std :: bytesrw :: requires)
 67 | 
 68 | let utf8codec = test ~/"test/utf8codec.ml" ~long:true
 69 | 
 70 | let test_quickstart =
 71 |   test ~/"test/quickstart.ml" ~requires:[bytesrw_zstd] ~run:false
 72 | 
 73 | let test_cookbook =
 74 |   let requires = [bytesrw_zstd; bytesrw_blake3] in
 75 |   test ~/"test/cookbook.ml" ~requires ~run:false
 76 | 
 77 | let test_bytesrw = test ~/"test/test_bytesrw.ml" ~requires:[]
 78 | let test_utf = test ~/"test/test_utf.ml"
 79 | let test_blake3 = test ~/"test/test_blake3.ml" ~requires:[bytesrw_blake3]
 80 | let test_blake3 = test ~/"test/test_md.ml" ~requires:[bytesrw_md]
 81 | let test_xxhash = test ~/"test/test_xxhash.ml" ~requires:[bytesrw_xxhash]
 82 | let test_zlib = test ~/"test/test_zlib.ml" ~requires:[bytesrw_zlib]
 83 | let test_zstd = test ~/"test/test_zstd.ml" ~requires:[bytesrw_zstd]
 84 | 
 85 | let tool_requires = [cmdliner; unix; bytesrw_unix]
 86 | 
 87 | let blake3tap =
 88 |   let doc = "Hash stdin with blake3" in
 89 |   let requires = bytesrw_blake3 :: tool_requires in
 90 |   test ~/"test/blake3tap.ml" ~run:false ~requires ~doc
 91 | 
 92 | let xxh3tap =
 93 |   let doc = "Hash stdin with xxh3" in
 94 |   let requires = bytesrw_xxhash :: tool_requires in
 95 |   test ~/"test/xxh3tap.ml" ~run:false ~requires ~doc
 96 | 
 97 | let gziptrip =
 98 |   let doc = "Gzip (De)compression from stdin to stdout" in
 99 |   let requires = bytesrw_zlib :: tool_requires in
100 |   test ~/"test/gziptrip.ml" ~run:false ~requires ~doc
101 | 
102 | let zstdtrip =
103 |   let doc = "Zstd (De)compression from stdin to stdout" in
104 |   let requires = bytesrw_zstd :: tool_requires in
105 |   test ~/"test/zstdtrip.ml" ~run:false ~requires ~doc
106 | 
107 | (* Packs *)
108 | 
109 | let default =
110 |  let meta =
111 |    B0_meta.empty
112 |    |> ~~ B0_meta.authors ["The bytesrw programmers"]
113 |    |> ~~ B0_meta.maintainers ["Daniel Bünzli <daniel.buenzl i@erratique.ch>"]
114 |    |> ~~ B0_meta.homepage "https://erratique.ch/software/bytesrw"
115 |    |> ~~ B0_meta.online_doc "https://erratique.ch/software/bytesrw/doc"
116 |    |> ~~ B0_meta.licenses ["ISC"]
117 |    |> ~~ B0_meta.repo "git+https://erratique.ch/repos/bytesrw.git"
118 |    |> ~~ B0_meta.issues "https://github.com/dbuenzli/bytesrw/issues"
119 |    |> ~~ B0_meta.description_tags
120 |      ["bytes"; "streaming"; "zstd"; "zlib"; "gzip"; "deflate";
121 |       "sha1"; "sha2"; "compression"; "hashing";
122 |       "utf"; "xxhash"; "blake3"; "org:erratique"; ]
123 |    |> ~~ B0_opam.build
124 |      {|[["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%"
125 |                  "--with-conf-libblake3" "%{conf-libblake3:installed}%"
126 |                  "--with-conf-libmd" "%{conf-libmd:installed}%"
127 |                  "--with-conf-xxhash" "%{conf-xxhash:installed}%"
128 |                  "--with-conf-zlib" "%{conf-zlib:installed}%"
129 |                  "--with-conf-zstd" "%{conf-zstd:installed}%"]]|}
130 |    |> ~~ B0_opam.depopts ["conf-xxhash", "";
131 |                           "conf-zlib", "";
132 |                           "conf-zstd", "";
133 |                           "conf-libmd", "";
134 |                           "conf-libblake3", ""; ]
135 |    |> ~~ B0_opam.conflicts [ "conf-zstd", {|< "1.3.8"|}] (* should be 1.4 *)
136 |    |> ~~ B0_opam.depends
137 |      [ "ocaml", {|>= "4.14.0"|};
138 |        "ocamlfind", {|build|};
139 |        "ocamlbuild", {|build|};
140 |        "topkg", {|build & >= "1.0.3"|};
141 |      ]
142 |    |> B0_meta.tag B0_opam.tag
143 |  in
144 |  B0_pack.make "default" ~doc:"The bytesrw package" ~meta ~locked:true @@
145 |  B0_unit.list ()
146 | 


--------------------------------------------------------------------------------
/BRZO:
--------------------------------------------------------------------------------
1 | (srcs-x pkg test tmp)


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | 
 2 | - Fix `Bytesrw_xxhash.Xxh64.{to_hex,pp}`. Leading zeros
 3 |   were not being printed (#5).
 4 | - Change unuseful signature of `Slice.break`: do not return 
 5 |   `None` if any of `Slice.take` or `Slice.drop` does. Simply
 6 |   return the result of both operations.
 7 | - Fix wrong bound checks in `Slice.{sub,make}[_or_eod]`. The functions
 8 |   now behave like `Bytes.sub` as far as indexing is allowed. Thanks
 9 |   to Adrián Montesinos González for the report and suggesting the fix (#4).
10 | - `bytesrw.*` libraries are made to export `bytesrw`.
11 | 
12 | v0.1.0 2024-11-28 Zagreb
13 | ------------------------
14 | 
15 | First release.
16 | 
17 | Supported by a grant from the OCaml Software Foundation.
18 | 


--------------------------------------------------------------------------------
/DEVEL.md:
--------------------------------------------------------------------------------
 1 | # Testing
 2 | 
 3 |     b0 test
 4 | 
 5 | # Benchmarking
 6 | 
 7 |     hyperfine "$(b0 --path -- zstdtrip) -d < tmp/webster.zst > /dev/null"
 8 |     hyperfine 'zstd -c -d < tmp/webster.zst > /dev/null'
 9 |     
10 |     hyperfine "$(b0 --path -- gziptrip) -d < tmp/webster.gz > /dev/null"
11 |     hyperfine 'gunzip -c < tmp/webster.gz > /dev/null'
12 |     
13 |     hyperfine "$(b0 --path -- xxh3tap) --sink < tmp/webster"
14 |     hyperfine 'xxhsum -H3 < tmp/webster'
15 | 
16 |     hyperfine "$(b0 --path -- blake3tap) --sink < tmp/webster"
17 |     hyperfine 'b3sum < tmp/webster'
18 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2024 The bytesrw programmers
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
14 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 | PERFORMANCE OF THIS SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Bytesrw – Composable byte stream readers and writers for OCaml
 2 | ==============================================================
 3 | 
 4 | Bytesrw extends the OCaml `Bytes` module with composable, memory
 5 | efficient, byte stream readers and writers compatible with effect
 6 | based concurrency.
 7 | 
 8 | Except for byte slice life-times, these abstractions intentionally
 9 | separate away ressource management and the specifics of reading and
10 | writing bytes.
11 | 
12 | Bytesrw distributed under the ISC license. It has no dependencies.
13 | 
14 | Optional support for compressed and hashed bytes depend, at your wish, on 
15 | the C [`zlib`], [`libzstd`], [`blake3`], [`libmd`], [`xxhash`] and
16 | libraries.
17 | 
18 | [`blake3`]: https://blake3.io
19 | [`libzstd`]: http://zstd.net
20 | [`libmd`]: https://www.hadrons.org/software/libmd/
21 | [`xxhash`]: https://xxhash.com/
22 | [`zlib`]: https://zlib.net
23 | 
24 | Homepage: <https://erratique.ch/software/bytesrw/>
25 | 
26 | ## Installation
27 | 
28 | Bytesrw can be installed with `opam`
29 | 
30 |     opam install bytesrw
31 |     
32 |     # Compression support
33 |     opam install bytesrw conf-zlib conf-zstd
34 |     
35 |     # Hashing support
36 |     opam install bytesrw conf-libblake3 conf-libmd conf-xxhash
37 | 
38 | If you don't use `opam` consult the [`opam`](opam) file for build
39 | instructions.
40 | 
41 | ## Documentation
42 | 
43 | The documentation can be consulted [online] or via `odig doc bytesrw`.
44 | 
45 | Questions are welcome but better asked on the [OCaml forum] than on the
46 | issue tracker. 
47 | 
48 | [online]: https://erratique.ch/software/bytesrw/doc
49 | [OCaml forum]: https://discuss.ocaml.org/
50 | 
51 | ## Examples
52 | 
53 | A few examples can be found in the [test](test/) directory. 
54 | 
55 | ## Acknowledgments 
56 | 
57 | A grant from the [OCaml Software Foundation] helped to bring the first
58 | public release of `bytesrw`.
59 | 
60 | [OCaml Software Foundation]: http://ocaml-sf.org/
61 | 


--------------------------------------------------------------------------------
/_tags:
--------------------------------------------------------------------------------
 1 | true : bin_annot, safe_string
 2 | <_b0> : -traverse
 3 | <src> : include
 4 | 
 5 | <src/unix> : include
 6 | <src/**> : package(unix)
 7 | 
 8 | <src/blake3> : include
 9 | <src/blake3/bytesrw_blake3_stubs.c> : use_libblake3
10 | <src/blake3/bytesrw_blake3.{cma,cmxa}> : record_bytesrw_blake3_stubs
11 | <src/blake3/bytesrw_blake3.{cmxs}> : link_bytesrw_blake3_stubs_archive
12 | <src/blake3/libbytesrw_blake3_stubs.*> : use_libblake3
13 | 
14 | <src/md> : include
15 | <src/md/bytesrw_md_stubs.c> : use_libmd
16 | <src/md/bytesrw_md.{cma,cmxa}> : record_bytesrw_md_stubs
17 | <src/md/bytesrw_md.{cmxs}> : link_bytesrw_md_stubs_archive
18 | <src/md/libbytesrw_md_stubs.*> : use_libmd
19 | 
20 | <src/xxhash> : include
21 | <src/xxhash/bytesrw_xxhash_stubs.c> : use_libxxhash
22 | <src/xxhash/bytesrw_xxhash.{cma,cmxa}> : record_bytesrw_xxhash_stubs
23 | <src/xxhash/bytesrw_xxhash.{cmxs}> : link_bytesrw_xxhash_stubs_archive
24 | <src/xxhash/libbytesrw_xxhash_stubs.*> : use_libxxhash
25 | 
26 | <src/zlib> : include
27 | <src/zlib/bytesrw_zlib_stubs.c> : use_zlib
28 | <src/zlib/bytesrw_zlib.{cma,cmxa}> : record_bytesrw_zlib_stubs
29 | <src/zlib/bytesrw_zlib.{cmxs}> : link_bytesrw_zlib_stubs_archive
30 | <src/zlib/libbytesrw_zlib_stubs.*> : use_zlib
31 | 
32 | <src/zstd> : include
33 | <src/zstd/bytesrw_zstd_stubs.c> : use_libzstd
34 | <src/zstd/bytesrw_zstd.{cma,cmxa}> : record_bytesrw_zstd_stubs
35 | <src/zstd/bytesrw_zstd.{cmxs}> : link_bytesrw_zstd_stubs_archive
36 | <src/zstd/libbytesrw_zstd_stubs.*> : use_libzstd
37 | 


--------------------------------------------------------------------------------
/doc/cookbook.mld:
--------------------------------------------------------------------------------
  1 | {0 [Bytesrw] cookbook}
  2 | 
  3 | A few convention and recipes for dealing with byte stream reader and writers.
  4 | 
  5 | {1:conventions Contracts and conventions}
  6 | 
  7 | {2:convention_functions Reader and writer function contract}
  8 | 
  9 | {{!Bytesrw.Bytes.Reader.make}The contract} between a reader and its slice
 10 | enumerating function.
 11 | {{!Bytesrw.Bytes.Writer.make}The contract}
 12 | between a writer and its slice iterating function.
 13 | 
 14 | {2:convention_client Reader and writer client contracts}
 15 | 
 16 | {{!Bytesrw.Bytes.Reader.read}The contract} between a reader and its
 17 | client. {{!Bytesrw.Bytes.Writer.write}The contract} between a
 18 | writer and its client.
 19 | 
 20 | {2:convention_filters Reader and writer filters conventions}
 21 | 
 22 | The {{!Bytesrw.Bytes.Reader.filter}reader
 23 | filter conventions} and the {{!Bytesrw.Bytes.Writer.filters}writer
 24 | filter conventions}.
 25 | 
 26 | {1:string_filtering Applying filters to strings}
 27 | 
 28 | {{!Bytesrw.Bytes.Reader.filters}Reader filters}
 29 | can easily be applied to strings with {!Bytesrw.Bytes.Reader.filter_string}:
 30 | 
 31 | {[
 32 | let id s =
 33 |   let filters = Bytesrw_zstd.[compress_reads (); decompress_reads ()] in
 34 |   Bytes.Reader.filter_string filters s
 35 | ]}
 36 | 
 37 | This can also be done with {{!Bytesrw.Bytes.Writer.filters}writer filters}
 38 | by using {!Bytesrw.Bytes.Writer.filter_string}:
 39 | {[
 40 | let id s =
 41 |   let filters = Bytesrw_zstd.[decompress_writes (); compress_writes ()] in
 42 |   Bytes.Writer.filter_string filters s
 43 | ]}
 44 | 
 45 | {1:checksumming Checksumming streams}
 46 | 
 47 | The pattern for checksumming streams is to apply an identity but
 48 | side-effecting filter (also known as a {{!tapping}tap}) on a reader or
 49 | writer and return a state value.  The state value is updated whenever
 50 | the resulting reader or writer is read or written.
 51 | 
 52 | The example below shows how to combine {{!Bytesrw_blake3}[BLAKE3]}
 53 | checksumming with {{!Bytesrw_zstd}[zstd]} compression.
 54 | 
 55 | This checksums the data before compressing it:
 56 | {[
 57 | let blake3_and_compress ~plain =
 58 |   try
 59 |     let plain, blake3 = Bytesrw_blake3.Blake3.reads plain in
 60 |     let comp = Bytesrw_zstd.compress_reads () plain in
 61 |     let comp = Bytes.Reader.to_string comp in
 62 |     Ok (comp, Bytesrw_blake3.Blake3.value blake3)
 63 |   with
 64 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
 65 | ]}
 66 | This checksums the data after decompressing it:
 67 | {[
 68 | let decompress_and_blake3 ~comp =
 69 |   try
 70 |     let plain = Bytesrw_zstd.decompress_reads () comp in
 71 |     let r, blake3 = Bytesrw_blake3.Blake3.reads plain in
 72 |     let s = Bytes.Reader.to_string r in
 73 |     Ok (s, Bytesrw_blake3.Blake3.value blake3)
 74 |   with
 75 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
 76 | ]}
 77 | By re-odering the operations in the two functions above you could
 78 | equally have applied the checksum on the compressed data or even have
 79 | checksums for both compressed and decompressed data.
 80 | 
 81 | {1:limiting Limiting streams}
 82 | 
 83 | If you need to limit resource consumption, readers and writers
 84 | can be bounded with {!Bytesrw.Bytes.Reader.limit} and
 85 | {!Bytesrw.Bytes.Writer.limit}.
 86 | 
 87 | For example this makes sure that the {{!Bytesrw_zstd}[zstd]}
 88 | decompressed size of [comp] does not exceed [quota] bytes. If it does
 89 | we still return the truncated decompressed data so far.
 90 | 
 91 | {[
 92 | let limited_decompress ~quota ~comp =
 93 |   let buf = Buffer.create quota in
 94 |   try
 95 |     let plain = Bytesrw_zstd.decompress_reads () comp in
 96 |     let () = Bytes.Reader.add_to_buffer buf (Bytes.Reader.limit quota plain) in
 97 |     Ok (`Data (Buffer.contents buf))
 98 |   with
 99 |   |  Bytes.Stream.Error (Bytes.Stream.Limit _quota, _) ->
100 |       Ok (`Quota_exceeded (Buffer.contents buf))
101 |   |  Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
102 | ]}
103 | 
104 | {1:tapping Tapping streams}
105 | 
106 | Tapping streams allow to observe the slices that fly-by in a stream.
107 | The functions {!Bytesrw.Bytes.Reader.tap} and
108 | {!Bytesrw.Bytes.Writer.tap} can be used to tap readers and
109 | writers. Taps are just identity stream filters with a side-effect.
110 | 
111 | Note that readers that result from tap will not tap the
112 | {{!page-tutorial.reader_push_back}push backs} that are performed on
113 | them.  This is a good property if you are using taps for
114 | {{!checksumming}checksumming}.
115 | 
116 | See also {{!tracing}tracing streams}.
117 | 
118 | {1:tracing Tracing streams}
119 | 
120 | Tracing streams can easily be done by {{!tapping}tapping} them
121 | with {!Bytesrw.Bytes.Slice.tracer}. For example the following
122 | will trace the slices of [r] or [w] on [stderr].
123 | 
124 | {[
125 | let rtrace ~id r = Bytes.Reader.tap (Bytes.Slice.tracer ~id) r
126 | let wtrace ~id w = Bytes.Writer.tap (Bytes.Slice.tracer ~id) w
127 | ]}
128 | 
129 | {1:errors Adding your own stream error}
130 | 
131 | Here is a blueprint you can use to define your own stream error.
132 | 
133 | {[
134 | module Myformat : sig
135 | 
136 |   (** {1:errors Errors} *)
137 | 
138 |   type Bytesrw.Bytes.Stream.error +=
139 |   | Error of string (** *)
140 |   (** The type for [myformat] stream errors. *)
141 | 
142 |   (** {1:streams Streams} *)
143 | 
144 |   (* … *)
145 | end = struct
146 |   type Bytes.Stream.error += Error of string
147 | 
148 |   let format_error =
149 |     let case msg = Error msg in
150 |     let message = function Error msg -> msg | _ -> assert false in
151 |     Bytes.Stream.make_format_error ~format:"myformat" ~case ~message
152 | 
153 |   let error e = Bytes.Stream.error format_error e
154 |   let reader_error r e = Bytes.Reader.error format_error r e
155 |   let writer_error w e = Bytes.Writer.error format_error w e
156 | end
157 | ]}
158 | 
159 | In your code you can now selectively pattern match on these errors with
160 | {[
161 | try … with
162 | | Bytes.Stream.Error (Myformat.Error msg, _) -> …
163 | ]}
164 | More on the design of the stream error system can be found in the
165 | {{!page-notes.errors}design notes}.
166 | 


--------------------------------------------------------------------------------
/doc/index.mld:
--------------------------------------------------------------------------------
  1 |   {0 Bytesrw {%html: <span class="version">%%VERSION%%</span>%}}
  2 | 
  3 | Bytesrw extends the OCaml {!Bytes} module with composable, memory
  4 | efficient, byte stream {{!Bytesrw.Bytes.Reader}readers} and
  5 | {{!Bytesrw.Bytes.Writer}writers} compatible with effect based
  6 | concurrency.
  7 | 
  8 | Except for byte slice {{!Bytesrw.Bytes.Slice.validity}life-times},
  9 | these abstractions intentionally separate away ressource management
 10 | and the specifics of reading and writing bytes.
 11 | 
 12 | {1:manuals Manuals}
 13 | 
 14 | The following manuals are available:
 15 | 
 16 | {ul
 17 | {- The {{!quick}quick start} should do so.}
 18 | {- The {{!tutorial}tutorial} is a conceptual overview of byte stream readers
 19 |    and writers.}
 20 | {- The {{!cookbook}Bytesrw cookbook} has a few conventions and
 21 |    byte stream recipes.}
 22 | {- The {{!page-notes}design notes} explains design choices made by the
 23 |    library.}}
 24 | 
 25 | {1:bytesrw Library [bytesrw]}
 26 | 
 27 | This library has the base definition of byte stream reader and writers as an
 28 | extension of the {!Stdlib.Bytes} module.
 29 | 
 30 | {!modules:
 31 | Bytesrw}
 32 | 
 33 | {!modules:
 34 | Bytesrw.Bytes.Slice
 35 | Bytesrw.Bytes.Stream
 36 | Bytesrw.Bytes.Reader
 37 | Bytesrw.Bytes.Writer}
 38 | 
 39 | The following modules rely only on the [Stdlib]:
 40 | 
 41 | {!modules:
 42 | Bytesrw_utf
 43 | Bytesrw_hex}
 44 | 
 45 | {1:bytesrw_other_libs Libraries
 46 |                       [bytesrw.{blake3,md,unix,xxhash,zlib,zstd}]}
 47 | 
 48 | Each of these modules lives in its corresponding library.
 49 | Compression and hashing libraries depend on their canonical C library.
 50 | 
 51 | {!modules:
 52 | Bytesrw_blake3
 53 | Bytesrw_md
 54 | Bytesrw_unix
 55 | Bytesrw_xxhash
 56 | Bytesrw_zlib
 57 | Bytesrw_zstd
 58 | }
 59 | 
 60 | {1:quick Quick start}
 61 | 
 62 | This example compresses standard input to standard output
 63 | with [zstd] using either a compressing byte stream reader (pull) or a
 64 | compressing byte stream writer (push).
 65 | {[
 66 | cat << 'EOF' > quickstart.ml
 67 | open Bytesrw
 68 | 
 69 | let stdio_compress_reads () =
 70 |   try
 71 |     let stdin = Bytes.Reader.of_in_channel In_channel.stdin in
 72 |     let stdout = Bytes.Writer.of_out_channel Out_channel.stdout in
 73 |     let params = Bytesrw_zstd.Cctx_params.make ~checksum:true () in
 74 |     let zstdr = Bytesrw_zstd.compress_reads ~params () stdin in
 75 |     Bytes.Writer.write_reader ~eod:true stdout zstdr;
 76 |     Ok ()
 77 |   with
 78 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
 79 |   | Sys_error e -> Error e
 80 | 
 81 | let stdio_compress_writes () =
 82 |   try
 83 |     let stdin = Bytes.Reader.of_in_channel In_channel.stdin in
 84 |     let stdout = Bytes.Writer.of_out_channel Out_channel.stdout in
 85 |     let params = Bytesrw_zstd.Cctx_params.make ~checksum:true () in
 86 |     let zstdw = Bytesrw_zstd.compress_writes ~params () ~eod:true stdout in
 87 |     Bytes.Writer.write_reader ~eod:true zstdw stdin;
 88 |     Ok ()
 89 |   with
 90 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
 91 |   | Sys_error e -> Error e
 92 | 
 93 | let main () =
 94 |   Result.fold ~ok:(Fun.const 0) ~error:(fun e -> prerr_endline e; 1) @@
 95 |   if Array.exists (String.equal "-w") Sys.argv
 96 |   then stdio_compress_writes ()
 97 |   else stdio_compress_reads ()
 98 | 
 99 | let () = if !Sys.interactive then () else exit (main ())
100 | EOF
101 | ]}
102 | It can be compiled and used with:
103 | {[
104 | ocamlfind ocamlopt -package bytesrw,bytesrw.zstd -linkpkg quickstart.ml
105 | ./a.out < quickstart.ml | zstd -d
106 | ]}
107 | The {{!page-tutorial}tutorial} is a short conceptual overview of byte stream
108 | readers and writers. The {{!page-cookbook}cookbook} has a few more tips and
109 | code snippets.


--------------------------------------------------------------------------------
/doc/notes.mld:
--------------------------------------------------------------------------------
  1 | {0 Design notes}
  2 | 
  3 | {1:wf Well formed streams}
  4 | 
  5 | The design enforces well-formed streams: sequences of {e non-empty}
  6 | slices terminated by a single {!Bytesrw.Bytes.Slice.eod} empty slice.
  7 | 
  8 | We want to ensure that the [read] functions of streams readers do not
  9 | return unexpected empty slices and that they are not called again once
 10 | they returned {!Bytesrw.Bytes.Slice.eod}. Likewise we want to ensure
 11 | that the [write] function of writers is not called with unexpected
 12 | empty slices or that they are called again once they were called with
 13 | {!Bytesrw.Bytes.Slice.eod}.
 14 | 
 15 | For example the {!Bytesrw.Bytes.Slice} creation function like
 16 | {!Bytesrw.Bytes.Slice.make} raise [Invalid_argument] when they
 17 | unexpectedly produce an empty slice and those that can produce them
 18 | have dedicated names that include [_eod]
 19 | (e.g. {!Bytesrw.Bytes.Slice.make_or_eod}). This makes programmers
 20 | well-aware of the points in the code that may terminate streams.
 21 | 
 22 | Besides, readers and writers internally overwrite their [read] and [write]
 23 | function as soon as {!Bytesrw.Bytes.Slice.eod} is seen so that no
 24 | closure is kept longer than it should be.
 25 | 
 26 | {1:errors Error strategy}
 27 | 
 28 | The error strategy may look involved but it caters for:
 29 | 
 30 | - Simple strategies like {!Bytesrw_zstd} where a single case is used
 31 |   for all errors.
 32 | - Complex strategies where a dedicated structured error type is used,
 33 |   possibly distinguishing between setup/reader/writer errors
 34 | 
 35 | while retaining the ability to pattern match on the error of specific
 36 | streams and making sure informative human error messages can always be
 37 | produced.
 38 | 
 39 | For users it means that when they use streams:
 40 | 
 41 | - There is a single exception to guard against and this exception can
 42 |   trivially be turned into an error message.
 43 | - Pattern matching on errors can still be used on a per stream format
 44 |   basis. This is needed for example for stream fromats that deal with
 45 |   substreams of other formats: since streams can be layered on top of
 46 |   each other and a lower level stream could error while you are
 47 |   dealing with your own format, that error should not be caught
 48 |   (XXX but what about ambiguities ?, should we add auto generated
 49 |   stream ids ?).
 50 | 
 51 | To enforce good behaviour by stream reader and writers the following
 52 | type structure is used:
 53 | 
 54 | - There is an extensible {!Bytesrw.Bytes.Stream.type-error} types to
 55 |   which stream formats add they own case.
 56 | - The {!Bytesrw.Bytes.Stream.Error} exception which holds a value of type
 57 |   [error] cannot be directly raised as it demands the creation of a
 58 |   {!Bytesrw.Bytes.Stream.error_context} value for which there is no constructor.
 59 | - This means that all raises go through either
 60 |   {!Bytesrw.Bytes.Stream.val-error},
 61 |   or {!Bytesrw.Bytes.Reader.val-error} or {!Bytesrw.Bytes.Writer.val-error}.
 62 |   These functions ask for a {!Bytesrw.Bytes.Stream.format_error},
 63 |   whose creation asks stream formats to identify themelves and describe
 64 |   how their errors can be stringified.
 65 | 
 66 | Effectively this system can be seen as an OCaml-like exception system
 67 | inside the {!Bytesrw.Bytes.Stream.Error} exception.
 68 | 
 69 | {1:primitives Signature of [read/write] primitives}
 70 | 
 71 | - [write] has to write all the given bytes. Having [write] return the number
 72 |   of written bytes from the slice would be useful for certain scenario, most
 73 |   notably write filters. It would be the moral equivalent of readers' push
 74 |   backs, except in this case it would be inconvenient {e for the client} and
 75 |   leads to things like [really_write], not desirable.
 76 | 
 77 | - [read] does not allow to specify the number bytes to read. Doing so would
 78 |   entail storing the last slice in the reader. Then if too much bytes are
 79 |   requested we either need to start buffering internally because of slice
 80 |   validity or potentially return less. However the latter leads to
 81 |   [really_read] and buffering again, not desirable. The
 82 |   {{:https://github.com/dbuenzli/bytesrw/blob/main/test/utf8codec.ml}UTF-8
 83 |   text codec} example shows that we can really get rid of buffering
 84 |   except for a tiny buffer for overlapping reads. So it seems better to
 85 |   let higher-level abstractions handle that if they really need to.
 86 | 
 87 | {1:names Names}
 88 | 
 89 | - We settle on [length]s rather than [size]s because that's what
 90 |   the [Stdlib] generally uses.
 91 | 
 92 | - We use [first], [last] and [length] for slice indexing and [pos],
 93 |   for streams. That way the terminology does not overlap which makes
 94 |   it for a clearer exposition. Incidentally our usage of [pos] is
 95 |   consistent with the [Stdlib] (though the index vs position terminology
 96 |   is unfortunate in my opinion).
 97 | 
 98 | {1:resolutions Resolved without strong rationale}
 99 | 
100 | The following points were resolved for the first release without
101 | a strong rationale.
102 | 
103 | {ul
104 | {- Push backs. Using {!Bytesrw.Bytes.Reader.push_back} is allowed at the
105 |    beginning of the stream and thus allows to push back to negative positions.
106 |    This could have been disallowed (in which case {!Bytesrw.Bytes.Reader.empty}
107 |    can become a value again). One use can be to push a header before
108 |    a stream, while still having byte positions aligned on the stream
109 |    (using {!Bytesrw.Bytes.Reader.val-append} for that would shift positions).
110 |    Unclear. {b Update.} In fact it is used in Webs_unix.Fd.bytes_reader for
111 |    creating the reader for bodies, after reading the HTTP header we have [n] bytes of
112 |    the body in our buffer, we create a reader at position [n] that reads
113 |    from the fd and push back the initial bytes. So in fact it seems to be
114 |    a good idea in order to start readers at precise positions despite the fact
115 |    that you may have pulled too much data.}
116 | {- For now it was decided not to give access to the reader and writer
117 |    in [read] and [write] functions. This means they can't access
118 |    their properties. Some combinators in [Bytesrw] internally
119 |    mutate the [read] field afterwards because they want to access the reader
120 |    in the definition of the [read] function, this is not possible with the API.
121 |    In filters for reporting read error positions those are generally reported
122 |    by the reader that filters so the underlying reader is available for using
123 |    with {!Bytesrw.Bytes.Reader.error}. It's a bit less clear for writers.}
124 | {- {!Bytesrw.Bytes.Reader.tap} and {!Bytesrw.Bytes.Writer.tap} are
125 |    just {!Fun.id} filters with a side effect, they could be
126 |    removed. We choose to keep them for now, it's doesn't seem to hurt
127 |    to have a name for that.}
128 | {- System errors. When readers and writers use system calls it's
129 |    a bit unclear whether the errors ([Sys_error] and [Unix_error]) should be
130 |    transformed into stream errors. For now we decided not to. That is we
131 |    consider that to be an error of the ressource rather than an error of
132 |    the stream.}}
133 | 
134 | {1:upstream Upstreaming}
135 | 
136 | {b If we determine that the API works well} we should consider
137 | proposing to upstream the extension of {!Bytesrw.Bytes}.
138 | 
139 | In particular it would be an alternative to the
140 | {{:https://github.com/ocaml/RFCs/blob/master/rfcs/modular_io.md}modular
141 | IO proposal}. The core code is mostly trivial it's "just" three data
142 | structures. But added code is more code to maintain.
143 | 
144 | However here are a few things to consider:
145 | 
146 | {ul
147 | {- The labels with lengths are not abbreviated, we have [~length] and
148 |    [~slice_length], the Stdlib uses [~len]. A counter here is to
149 |    mention that this corresponds to the name of the fields
150 |    of the structures and there is little point to multiply the names
151 |    for the same thing (and using [Bytes.Slice.len] would be weird and
152 |    inconsistent with [{String,Bytes}.length]).}
153 | 
154 | {- While most functions use a [first] + [length] for specifying ranges
155 |    a few functions are also provided with the (extremely nice I have to
156 |    say) inclusive [?first], [?last] range mechanism which was was
157 |    {{:https://github.com/ocaml/ocaml/pull/9893}percieved negatively} by
158 |    upstream for strings. For wrong reasons in my opinion revolving
159 |    around aesthetics of [+/-1] (counter 1: clarity; counter 2: depends
160 |    on your use case, you then also need to add +/-1) and provability
161 |    (?). It's a pity since it's a very usable interface that avoids a
162 |    lot of footgunish index computations and brings general code reading
163 |    clarity by requiring less mental labor. It's also consistent with
164 |    OCaml's own inclusive [for] loop and certainly not
165 |    {{:https://en.wikipedia.org/wiki/Comparison_of_programming_languages_(array)#Slicing}an
166 |   anomaly} in the language design space. See the also the discussion
167 |   {{:https://github.com/ocaml/ocaml/pull/10480#issuecomment-887928919}here}.}
168 | 
169 | {- Dependencies. A few things will need to be dispatched differently
170 |    module wise.
171 | 
172 |    Basically the channel stuff should move to the [{In,Out}channel]
173 |    modules and the buffer stuff to the [Buffer] module. The
174 |    formatters won't make it or will end up in the unusably long-winded
175 |    [Format].
176 | 
177 |    Besides that I don't think any functionality would be lost, in
178 |    particular we made sure not to use [Format] in the
179 |    {!Bytesrw.Bytes.Stream.type-error} interface.
180 | 
181 |    One thing I can see not make it is perhaps the nice hex dump
182 |    formatter {!Bytesrw.Bytes.pp_hex} which is maybe too much code for
183 |    upstream to take.}
184 | {- Safety. There are two places where we don't copy a bytes/string
185 |    because we assume that the users abide the Slice validity
186 |    rule. Notably {!Bytesrw.Bytes.Reader.of_string}. Upstream will likely want
187 |    no unsafe usage. These places can be found with [git grep 'Unsafe is
188 |    ok']. We considered prefixing these functions with [unsafe_] but then
189 |    it creeps a bit because for example {!Bytesrw.Bytes.Reader.filter_string}
190 |    uses it.}}


--------------------------------------------------------------------------------
/doc/tutorial.mld:
--------------------------------------------------------------------------------
  1 | {0 Byte stream reader and writer tutorial}
  2 | 
  3 | See also the {{!page-index.quick}quick start} and the {{!page-cookbook}
  4 | cookbook} for short, self-contained, code snippets. This tutorial
  5 | is a conceptual overview of byte stream readers and writers.
  6 | 
  7 | {1:streams Streams}
  8 | 
  9 | In [Bytesrw] you never get to manipulate byte streams directly. You
 10 | observe finite parts of them via stream readers and writers. These
 11 | finite parts are represented by byte slices. A
 12 | {{!Bytesrw.Bytes.Slice}byte slice} is a non-empty, consecutive,
 13 | subrange of a {!Bytes.t} value. There is a single, distinguished,
 14 | empty slice {!Bytesrw.Bytes.Slice.eod} (end-of-data) which is used to
 15 | indicate the end the stream. Once this value is observed no more bytes can
 16 | be observed from a stream.
 17 | 
 18 | To sum up, a byte stream is a sequence of {!Bytesrw.Bytes.Slice.t}
 19 | values ended by a {!Bytesrw.Bytes.Slice.eod} value. Byte stream reader
 20 | and writers give you two different ways of observing this sequence, in
 21 | order, but always only slice by slice:
 22 | 
 23 | - With byte stream readers you get to pull slices one by one from the reader
 24 |   until {!Bytesrw.Bytes.Slice.eod} is pulled.
 25 | - With byte stream writers you get to see slices pushed on the writer
 26 |   one by one until {!Bytesrw.Bytes.Slice.eod} is pushed.
 27 | 
 28 | The system enforces well-formed streams: your readers and writers will
 29 | not hiccup on transient empty slices since they unconditionally
 30 | terminates streams. For this reason the functions in
 31 | {!Bytesrw.Bytes.Slice} always make explicit when
 32 | {!Bytesrw.Bytes.Slice.eod} can be produced by suffixing the function
 33 | names with [_or_eod].
 34 | 
 35 | {1:readers Stream readers}
 36 | 
 37 | Stream {{!Bytesrw.Bytes.Reader}readers} are pull abstractions. They
 38 | provide access to the slices of a stream, in order, on demand, but
 39 | only slice by slice: the slice you get from a reader [r] on
 40 | {!Bytesrw.Bytes.Reader.read} is {{!Bytesrw.Bytes.Slice.validity}valid
 41 | for reading} only until the next slice is read from [r].
 42 | 
 43 | This means that you are {b only} allowed to read those bytes in the range
 44 | defined by the slice until the next call to
 45 | {!Bytesrw.Bytes.Reader.read} on [r]. You are not even allowed
 46 | to mutate the bytes in the range of the slice. If you need to keep the
 47 | data for longer or want to modify it, you need to copy it.
 48 | 
 49 | Readers can be created from {{!Bytesrw.Bytes.Reader.of_bytes} bytes},
 50 | {{!Bytesrw.Bytes.Reader.of_string}strings},
 51 | {{!Bytesrw.Bytes.Reader.of_slice}slices},
 52 | {{!Bytesrw.Bytes.Reader.of_in_channel} input channels},
 53 | {{!Bytesrw_unix.bytes_reader_of_fd}file descriptors}, etc. More generally
 54 | {{!Bytesrw.Bytes.Reader.make}any function} that enumerates a stream's slices
 55 | can be turned into a byte stream reader.
 56 | 
 57 | Readers maintain an informative stream position accessible with
 58 | {!Bytesrw.Bytes.Reader.pos}. The position is the zero based-index of
 59 | the next byte to read or, alternatively, the
 60 | {{!Bytesrw.Bytes.Reader.read_length} number of bytes} that have been
 61 | returned by calls to {!Bytesrw.Bytes.Reader.read}. Positions can be
 62 | used for statistics, for locating errors, for locating substreams or
 63 | computing byte offsets.
 64 | 
 65 | Readers have an informative immutable
 66 | {!Bytesrw.Bytes.Reader.slice_length} property. It is a {b hint} on the
 67 | maximal slice length returned by reads. This can be used by reader
 68 | consumers to adjust their own buffers.
 69 | 
 70 | {2:reader_push_back Push backs}
 71 | 
 72 | Reader {{!Bytesrw.Bytes.Reader.push_back}push backs} provide a limited
 73 | form of look ahead on streams. They should not be used as a general
 74 | buffering mecanism but they allow to
 75 | {{!Bytesrw.Bytes.Reader.sniff}sniff stream content}, for example to
 76 | {{!Bytesrw_utf.guess_reader_encoding}guess encodings}, in order to invoke
 77 | suitable decoders. They are also used to break streams into
 78 | substreams at precise positions when a reader provide you with a
 79 | slice that overlap two substreams.
 80 | 
 81 | {2:reader_filters Filters}
 82 | 
 83 | {{!Bytesrw.Bytes.Reader.filters}Reader filters} are reader
 84 | transformers. They take a reader [r] and return a new reader which,
 85 | when read, reads on [r] and transforms its slices. For example given a
 86 | reader [r] that returns compressed bytes a decompress filter like
 87 | {!Bytesrw_zstd.decompress_reads} returns a new reader which reads and
 88 | decompresses the slices of [r].
 89 | 
 90 | Filters do not necessarily act on a reader forever. For example the
 91 | reader returned by {!Bytesrw_zstd.decompress_reads}[ ~all_frames:false
 92 | () r], decompresses exactly a single [zstd] frame by reading from [r]
 93 | and then returns {!Bytesrw.Bytes.Slice.eod}. After that [r] can be
 94 | used again to read the remaining data after the frame.
 95 | 
 96 | {2:reader_limits Limits}
 97 | 
 98 | The number of bytes returned by a reader can be limited with
 99 | {!Bytesrw.Bytes.Reader.limit}. See {{!page-cookbook.limiting}this
100 | example} in the cookbook.
101 | 
102 | {1:writers Stream writers}
103 | 
104 | Stream {{!Bytesrw.Bytes.Writer}writers} are push abstractions. Clients
105 | push the slices of a stream on a writer [w] with
106 | {!Bytesrw.Bytes.Writer.write}, slice by slice. This allows the write
107 | function of the writer to get a hand on the slice which the client
108 | must guarantee {{!Bytesrw.Bytes.Slice.validity}valid for reading}
109 | until the writer returns.
110 | 
111 | Writers, in their write function, are {b only} allowed to read those
112 | bytes in the range defined by the slice until they return. They are
113 | not allowed to mutate the bytes in the range of the slice. If a
114 | writer needs to keep the data for longer or needs to modify it, it
115 | needs to copy it.
116 | 
117 | Writers can be created to write to
118 | {{!Bytesrw.Bytes.Writer.of_buffer}buffers},
119 | {{!Bytesrw.Bytes.Writer.of_out_channel}output channels},
120 | {{!Bytesrw_unix.bytes_writer_of_fd}file descriptors}, etc. More
121 | generally {{!Bytesrw.Bytes.Writer.make}any slice iterating function}
122 | can be turned into a byte stream writer.
123 | 
124 | Writers maintain an informative stream position accessible with
125 | {!Bytesrw.Bytes.Writer.pos}. The position is the zero based-index of
126 | the next byte to write or, alternatively, the
127 | {{!Bytesrw.Bytes.Writer.written_length}number of bytes} that have been
128 | pushed on the writer by calls to
129 | {!Bytesrw.Bytes.Writer.write}. Positions can be used for statistics,
130 | for locating errors or computing byte offsets.
131 | 
132 | Writers have an informative immutable
133 | {!Bytesrw.Bytes.Writer.slice_length} property. It provides a {b hint}
134 | for clients on the maximal length of slices the writer would like to
135 | receive. This can be used by clients to adjust the sizes of the slices
136 | they write on a writer.
137 | 
138 | {2:writer_filters Filters}
139 | 
140 | {{!Bytesrw.Bytes.Writer.filters}Writer filters} are writer
141 | transformers.  They take a writer [w] and return a new writer which,
142 | when written, transforms the slice and then writes it on [w]. For
143 | example given a writer [w] that writes to an output channel, a filter
144 | like {!Bytesrw_zstd.compress_writes} returns a new writer which
145 | compresses the writes before writing them to [w].
146 | 
147 | Filters do not necessarily act on a writer forever. This is the
148 | purpose of the boolean [eod] argument of
149 | {!Bytesrw.Bytes.Writer.filter}.  When {!Bytesrw.Bytes.Slice.eod} is
150 | written on the filter the end of data slice should only be written to
151 | the underyling writer [w] if [eod] is [true]. If not, the filter
152 | should simply flush its data to [w] and further leave [w] untouched so
153 | that more data can be written on it. For example the writer returned
154 | by {!Bytesrw_zstd.compress_writes}[ () ~eod:false w] compresses writes
155 | until {!Bytesrw.Bytes.Slice.eod} is written. After that [w] can be
156 | used again to write more data after the compressed stream.
157 | 
158 | {2:writer_limits Limits}
159 | 
160 | The number of bytes written on a writer can be limited with
161 | {!Bytesrw.Bytes.Writer.limit}.
162 | 
163 | {1:errors Errors}
164 | 
165 | In general stream readers and writer and their creation function may
166 | raise the extensible {!Bytesrw.Bytes.Stream.Error} exception.
167 | 
168 | These errors should only pertain to byte stream errors, that is they
169 | should mostly be raised by reads and writes on the result of stream
170 | filters. For example the default behaviour on byte stream reader and
171 | writer limits is to raises a byte stream exception with a
172 | {!Bytesrw.Bytes.Stream.Limit} error. Or if you use a [zstd]
173 | decompression filter, any decompression error will be reported by byte
174 | stream exception with a {!Bytesrw_zstd.Error} error. See
175 | {{!page-cookbook.errors}here} for an example on how you can add your
176 | own case to stream errors.
177 | 
178 | If you use byte stream readers and writers to codec a higher-level
179 | data format like JSON that does not result in a byte stream itself,
180 | you should likely have your own error mecanisms and let stream errors
181 | simply fly across your use of readers and writers. For example while
182 | a [zstd] decompression error could occur from the reader you are
183 | decoding your JSON from, it likely doesn't make sense to capture this
184 | error in your decoder and produce it as a JSON codec error.
185 | 
186 | For now the library also decided not to inject {!Sys_error} and
187 | {!Unix.Unix_error} that readers and writers based on standard library
188 | channels and {!Unix} file descriptors may raise into the stream error
189 | exception. The idea is that these errors pertain to the resource being
190 | acted upon, not the byte stream itself (also: we couldn't do the same
191 | for potential unknown third-party system abstractions so it feels
192 | non-compostional). But a bit more practice may be needed to precisely
193 | pin down the strategy here.


--------------------------------------------------------------------------------
/myocamlbuild.ml:
--------------------------------------------------------------------------------
 1 | open Ocamlbuild_plugin
 2 | open Command
 3 | 
 4 | (* Generic pkg-config(1) support. *)
 5 | 
 6 | let pkg_config_exists package =
 7 |   Sys.command ("pkg-config --exists " ^ package) = 0
 8 | 
 9 | let lib_with_clib ~lib ~clib ~has_lib ~src_dir ~stublib =
10 |   let strf = Printf.sprintf in
11 |   let windows = !Ocamlbuild_plugin.Options.ext_lib = "lib" in
12 |   let pkg_config flags package =
13 |     let cmd tmp =
14 |       let pkg_config =
15 |         if not windows then A "pkg-config" else
16 |         S [A "pkg-config"; A "--msvc-syntax"]
17 |       in
18 |       Command.execute ~quiet:true &
19 |       Cmd( S [ pkg_config; A ("--" ^ flags); A package; Sh ">"; A tmp]);
20 |       List.map (fun arg -> A arg) (string_list_of_file tmp)
21 |     in
22 |     with_temp_file "pkgconfig" "pkg-config" cmd
23 |   in
24 |   let ar s = match !Ocamlbuild_plugin.Options.ext_lib with
25 |   | "" -> s ^ ".a" | x -> s ^ "." ^ x
26 |   in
27 |   let make_opt o arg = S [ A o; arg ] in
28 |   let ccopts = List.map (make_opt "-ccopt") in
29 |   let cclibs = List.map (make_opt "-cclib") in
30 |   let dllibs = List.map (make_opt "-dllib") in
31 |   let use_lib = strf "use_%s" lib in
32 |   let use_clib = strf "use_%s" clib in
33 |   let record_stub_lib = strf "record_%s" stublib in
34 |   let link_stub_archive = strf "link_%s_archive" stublib in
35 |   let stub_ar = ar (strf "%s/lib%s" src_dir stublib) in
36 |   let static_stub_l =
37 |     if windows then A (strf "lib%s.lib" stublib) else A (strf "-l%s" stublib)
38 |   in
39 |   let dynamic_stub_l =
40 |     if windows then A (strf "dll%s.dll" stublib) else static_stub_l
41 |   in
42 |   let clib_l = pkg_config "libs-only-l" clib in
43 |   let clib_L =
44 |     let dashldify = function
45 |     | A l when windows -> A (String.subst "/libpath:" "-L" l)
46 |     | arg -> arg
47 |     in
48 |     List.map dashldify (pkg_config "libs-only-L" clib)
49 |   in
50 |   let clib_cflags = ccopts @@ (A has_lib) :: pkg_config "cflags" clib in
51 |   let clib_cclibs = cclibs @@ static_stub_l :: clib_l in
52 |   let clib_ccopts = ccopts @@ clib_L in
53 |   begin
54 |     dep [record_stub_lib] [stub_ar];
55 | 
56 |     flag ["c"; "compile"; use_clib] (S clib_cflags);
57 | 
58 |     flag ["c"; "ocamlmklib"; use_clib] (S (clib_L @ clib_l));
59 | 
60 |     flag ["link"; "ocaml"; "library"; "byte"; record_stub_lib]
61 |       (S (dllibs [dynamic_stub_l] @ clib_ccopts @ clib_cclibs));
62 | 
63 |     flag ["link"; "ocaml"; "library"; "native"; record_stub_lib]
64 |       (S (clib_ccopts @ clib_cclibs));
65 | 
66 |     flag_and_dep ["link"; "ocaml"; link_stub_archive] (P stub_ar);
67 | 
68 |     flag ["link"; "ocaml"; "library"; "shared"; link_stub_archive]
69 |       (S (clib_ccopts @ clib_cclibs));
70 | 
71 |     ocaml_lib ~tag_name:use_lib ~dir:src_dir (strf "%s/%s" src_dir lib)
72 |   end
73 | 
74 | let () =
75 |   dispatch begin function
76 |   | After_rules ->
77 |       if pkg_config_exists "libblake3" then
78 |         lib_with_clib
79 |           ~lib:"bytesrw_blake3" ~clib:"libblake3" ~has_lib:"-DHAS_BLAKE3"
80 |           ~src_dir:"src/blake3" ~stublib:"bytesrw_blake3_stubs";
81 |       if pkg_config_exists "libmd" then
82 |         lib_with_clib
83 |           ~lib:"bytesrw_md" ~clib:"libmd" ~has_lib:"-DHAS_LIBMD"
84 |           ~src_dir:"src/md" ~stublib:"bytesrw_md_stubs";
85 |       if pkg_config_exists "libxxhash" then
86 |         lib_with_clib
87 |           ~lib:"bytesrw_xxhash" ~clib:"libxxhash" ~has_lib:"-DHAS_XXHASH"
88 |           ~src_dir:"src/xxhash" ~stublib:"bytesrw_xxhash_stubs";
89 |       if pkg_config_exists "zlib" then
90 |         lib_with_clib
91 |           ~lib:"bytesrw_zlib" ~clib:"zlib" ~has_lib:"-DHAS_ZLIB"
92 |           ~src_dir:"src/zlib" ~stublib:"bytesrw_zlib_stubs";
93 |       if pkg_config_exists "libzstd" then
94 |         lib_with_clib
95 |           ~lib:"bytesrw_zstd" ~clib:"libzstd" ~has_lib:"-DHAS_ZSTD"
96 |           ~src_dir:"src/zstd" ~stublib:"bytesrw_zstd_stubs";
97 |   | _ -> ()
98 |   end
99 | 


--------------------------------------------------------------------------------
/opam:
--------------------------------------------------------------------------------
 1 | opam-version: "2.0"
 2 | name: "bytesrw"
 3 | synopsis: "Composable byte stream readers and writers for OCaml"
 4 | description: """\
 5 | Bytesrw extends the OCaml `Bytes` module with composable, memory
 6 | efficient, byte stream readers and writers compatible with effect
 7 | based concurrency.
 8 | 
 9 | Except for byte slice life-times, these abstractions intentionally
10 | separate away ressource management and the specifics of reading and
11 | writing bytes.
12 | 
13 | Bytesrw distributed under the ISC license. It has no dependencies.
14 | 
15 | Optional support for compressed and hashed bytes depend, at your wish, on 
16 | the C [`zlib`], [`libzstd`], [`blake3`], [`libmd`], [`xxhash`] and
17 | libraries.
18 | 
19 | [`blake3`]: https://blake3.io
20 | [`libzstd`]: http://zstd.net
21 | [`libmd`]: https://www.hadrons.org/software/libmd/
22 | [`xxhash`]: https://xxhash.com/
23 | [`zlib`]: https://zlib.net
24 | 
25 | Homepage: <https://erratique.ch/software/bytesrw/>"""
26 | maintainer: "Daniel Bünzli <daniel.buenzl i@erratique.ch>"
27 | authors: "The bytesrw programmers"
28 | license: "ISC"
29 | tags: [
30 |   "bytes"
31 |   "streaming"
32 |   "zstd"
33 |   "zlib"
34 |   "gzip"
35 |   "deflate"
36 |   "sha1"
37 |   "sha2"
38 |   "compression"
39 |   "hashing"
40 |   "utf"
41 |   "xxhash"
42 |   "blake3"
43 |   "org:erratique"
44 | ]
45 | homepage: "https://erratique.ch/software/bytesrw"
46 | doc: "https://erratique.ch/software/bytesrw/doc"
47 | bug-reports: "https://github.com/dbuenzli/bytesrw/issues"
48 | depends: [
49 |   "ocaml" {>= "4.14.0"}
50 |   "ocamlfind" {build}
51 |   "ocamlbuild" {build}
52 |   "topkg" {build & >= "1.0.3"}
53 | ]
54 | depopts: [
55 |   "conf-xxhash" "conf-zlib" "conf-zstd" "conf-libmd" "conf-libblake3"
56 | ]
57 | conflicts: [
58 |   "conf-zstd" {< "1.3.8"}
59 | ]
60 | build: [
61 |   "ocaml"
62 |   "pkg/pkg.ml"
63 |   "build"
64 |   "--dev-pkg"
65 |   "%{dev}%"
66 |   "--with-conf-libblake3"
67 |   "%{conf-libblake3:installed}%"
68 |   "--with-conf-libmd"
69 |   "%{conf-libmd:installed}%"
70 |   "--with-conf-xxhash"
71 |   "%{conf-xxhash:installed}%"
72 |   "--with-conf-zlib"
73 |   "%{conf-zlib:installed}%"
74 |   "--with-conf-zstd"
75 |   "%{conf-zstd:installed}%"
76 | ]
77 | dev-repo: "git+https://erratique.ch/repos/bytesrw.git"
78 | 


--------------------------------------------------------------------------------
/pkg/META:
--------------------------------------------------------------------------------
 1 | description = "Composable byte stream readers and writers for OCaml"
 2 | version = "%%VERSION_NUM%%"
 3 | requires = ""
 4 | archive(byte) = "bytesrw.cma"
 5 | archive(native) = "bytesrw.cmxa"
 6 | plugin(byte) = "bytesrw.cma"
 7 | plugin(native) = "bytesrw.cmxs"
 8 | exists_if = "bytesrw.cma bytesrw.cmxa"
 9 | 
10 | package "blake3" (
11 |   directory = "blake3"
12 |   description = "BLAKE3 hashes"
13 |   version = "%%VERSION_NUM%%"
14 |   requires = "bytesrw"
15 |   exports = "bytesrw"
16 |   archive(byte) = "bytesrw_blake3.cma"
17 |   archive(native) = "bytesrw_blake3.cmxa"
18 |   plugin(byte) = "bytesrw_blake3.cma"
19 |   plugin(native) = "bytesrw_blake3.cmxs"
20 |   exists_if = "bytesrw_blake3.cma bytesrw_blake3.cmxa"
21 | )
22 | 
23 | package "md" (
24 |   directory = "md"
25 |   description = "SHA{1,2} hashes"
26 |   version = "%%VERSION_NUM%%"
27 |   requires = "bytesrw"
28 |   exports = "bytesrw"
29 |   archive(byte) = "bytesrw_md.cma"
30 |   archive(native) = "bytesrw_md.cmxa"
31 |   plugin(byte) = "bytesrw_md.cma"
32 |   plugin(native) = "bytesrw_md.cmxs"
33 |   exists_if = "bytesrw_md.cma bytesrw_md.cmxa"
34 | )
35 | 
36 | package "unix" (
37 |   directory = "unix"
38 |   description = "The bytesrw.unix library"
39 |   version = "%%VERSION_NUM%%"
40 |   requires = "bytesrw unix"
41 |   exports = "bytesrw"
42 |   archive(byte) = "bytesrw_unix.cma"
43 |   archive(native) = "bytesrw_unix.cmxa"
44 |   plugin(byte) = "bytesrw_unix.cma"
45 |   plugin(native) = "bytesrw_unix.cmxs"
46 |   exists_if = "bytesrw_unix.cma bytesrw_unix.cmxa"
47 | )
48 | 
49 | package "xxhash" (
50 |   directory = "xxhash"
51 |   description = "XXH hashes"
52 |   version = "%%VERSION_NUM%%"
53 |   requires = "bytesrw"
54 |   exports = "bytesrw"
55 |   archive(byte) = "bytesrw_xxhash.cma"
56 |   archive(native) = "bytesrw_xxhash.cmxa"
57 |   plugin(byte) = "bytesrw_xxhash.cma"
58 |   plugin(native) = "bytesrw_xxhash.cmxs"
59 |   exists_if = "bytesrw_xxhash.cma bytesrw_xxhash.cmxa"
60 | )
61 | 
62 | package "zlib" (
63 |   directory = "zlib"
64 |   description = "deflate, zlib and gzip streams"
65 |   version = "%%VERSION_NUM%%"
66 |   requires = "bytesrw"
67 |   exports = "bytesrw"
68 |   archive(byte) = "bytesrw_zlib.cma"
69 |   archive(native) = "bytesrw_zlib.cmxa"
70 |   plugin(byte) = "bytesrw_zlib.cma"
71 |   plugin(native) = "bytesrw_zlib.cmxs"
72 |   exists_if = "bytesrw_zlib.cma bytesrw_zlib.cmxa"
73 | )
74 | 
75 | package "zstd" (
76 |   directory = "zstd"
77 |   description = "zstd streams"
78 |   version = "%%VERSION_NUM%%"
79 |   requires = "bytesrw"
80 |   exports = "bytesrw"
81 |   archive(byte) = "bytesrw_zstd.cma"
82 |   archive(native) = "bytesrw_zstd.cmxa"
83 |   plugin(byte) = "bytesrw_zstd.cma"
84 |   plugin(native) = "bytesrw_zstd.cmxs"
85 |   exists_if = "bytesrw_zstd.cma bytesrw_zstd.cmxa"
86 | )
87 | 


--------------------------------------------------------------------------------
/pkg/pkg.ml:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ocaml
 2 | #use "topfind"
 3 | #require "topkg"
 4 | open Topkg
 5 | 
 6 | let blake3 = Conf.with_pkg "conf-libblake3"
 7 | let libmd = Conf.with_pkg "conf-libmd"
 8 | let xxhash = Conf.with_pkg "conf-xxhash"
 9 | let zstd = Conf.with_pkg "conf-zstd"
10 | let zlib = Conf.with_pkg "conf-zlib"
11 | let () =
12 |   Pkg.describe "bytesrw" @@ fun c ->
13 |   let blake3 = Conf.value c blake3 in
14 |   let libmd = Conf.value c libmd in
15 |   let xxhash = Conf.value c xxhash in
16 |   let zlib = Conf.value c zlib in
17 |   let zstd = Conf.value c zstd in
18 |   Ok [ Pkg.mllib ~api:["Bytesrw"; "Bytesrw_utf"; "Bytesrw_hex"]
19 |          "src/bytesrw.mllib";
20 |        Pkg.mllib "src/unix/bytesrw_unix.mllib" ~dst_dir:"unix";
21 |        Pkg.mllib ~cond:blake3 "src/blake3/bytesrw_blake3.mllib"
22 |          ~dst_dir:"blake3";
23 |        Pkg.clib ~cond:blake3 "src/blake3/libbytesrw_blake3_stubs.clib"
24 |          ~lib_dst_dir:"blake3";
25 |        Pkg.mllib ~cond:libmd "src/md/bytesrw_md.mllib"
26 |          ~dst_dir:"md";
27 |        Pkg.clib ~cond:libmd "src/md/libbytesrw_md_stubs.clib"
28 |          ~lib_dst_dir:"md";
29 |        Pkg.mllib ~cond:xxhash "src/xxhash/bytesrw_xxhash.mllib"
30 |          ~dst_dir:"xxhash";
31 |        Pkg.clib ~cond:xxhash "src/xxhash/libbytesrw_xxhash_stubs.clib"
32 |          ~lib_dst_dir:"xxhash";
33 |        Pkg.mllib ~cond:zlib "src/zlib/bytesrw_zlib.mllib" ~dst_dir:"zlib";
34 |        Pkg.clib ~cond:zlib "src/zlib/libbytesrw_zlib_stubs.clib"
35 |          ~lib_dst_dir:"zlib";
36 |        Pkg.mllib ~cond:zstd "src/zstd/bytesrw_zstd.mllib" ~dst_dir:"zstd";
37 |        Pkg.clib ~cond:zstd "src/zstd/libbytesrw_zstd_stubs.clib"
38 |          ~lib_dst_dir:"zstd";
39 |        Pkg.doc "doc/index.mld" ~dst:"odoc-pages/index.mld";
40 |        Pkg.doc "doc/cookbook.mld" ~dst:"odoc-pages/cookbook.mld";
41 |        Pkg.doc "doc/notes.mld" ~dst:"odoc-pages/notes.mld";
42 |        Pkg.doc "doc/tutorial.mld" ~dst:"odoc-pages/tutorial.mld";]
43 | 


--------------------------------------------------------------------------------
/src/blake3/bytesrw_blake3.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Bytesrw
  7 | 
  8 | (* Library parameters *)
  9 | 
 10 | external version : unit -> string = "ocaml_bytesrw_blake3_version"
 11 | 
 12 | (* The type for blake3 hashes *)
 13 | 
 14 | module type Blake3 = sig
 15 |   val id : string
 16 |   val length : int
 17 |   type t
 18 |   type key = t
 19 |   module State : sig
 20 |     type t
 21 |     val make : ?key:key -> unit -> t
 22 |     val update : t -> Bytes.Slice.t -> unit
 23 |   end
 24 |   val value : State.t -> t
 25 |   val string : ?key:t -> string -> t
 26 |   val bytes : ?key:t -> bytes -> t
 27 |   val slice : ?key:t -> Bytes.Slice.t -> t
 28 |   val reader : ?key:t -> Bytes.Reader.t -> t
 29 |   val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t
 30 |   val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t
 31 |   val equal : t -> t -> bool
 32 |   val compare : t -> t -> int
 33 |   val to_binary_string : t -> string
 34 |   val of_binary_string : string -> (t, string) result
 35 |   val to_hex : t -> string
 36 |   val of_hex : string -> (t, string) result
 37 |   val pp : Format.formatter -> t -> unit
 38 | end
 39 | 
 40 | (* BLAKE3 hash *)
 41 | 
 42 | module Blake3_hasher = struct
 43 |   type t  (* Custom value holding a blake3_hasher struct *)
 44 |   type hash = string (* 32 bytes *)
 45 |   type key = hash
 46 |   external create : unit -> t = "ocaml_bytesrw_blake3_create"
 47 |   external init : t -> unit = "ocaml_bytesrw_blake3_init"
 48 |   external init_keyed : t -> key:key -> unit = "ocaml_bytesrw_blake3_init_keyed"
 49 |   external finalize : t -> hash = "ocaml_bytesrw_blake3_finalize"
 50 |   external update : t -> bytes -> int -> int -> unit =
 51 |     "ocaml_bytesrw_blake3_update"
 52 | 
 53 |   external hash : bytes -> int -> int -> hash = "ocaml_bytesrw_blake3_hash"
 54 |   external hash_keyed : key:key -> bytes -> int -> int -> hash =
 55 |     "ocaml_bytesrw_blake3_hash_keyed"
 56 | end
 57 | 
 58 | module Blake3 = struct
 59 |   module State = struct
 60 |     type t = Blake3_hasher.t
 61 | 
 62 |     let make ?key () =
 63 |       let state = Blake3_hasher.create () in
 64 |       begin match key with
 65 |       | None -> Blake3_hasher.init state
 66 |       | Some key -> Blake3_hasher.init_keyed state ~key
 67 |       end;
 68 |       state
 69 | 
 70 |     let update state s =
 71 |       let b = Bytes.Slice.bytes s in
 72 |       let first = Bytes.Slice.first s and last = Bytes.Slice.length s in
 73 |       Blake3_hasher.update state b first last
 74 |   end
 75 | 
 76 |   let id = "blake3"
 77 |   let length = 32
 78 |   type t = Blake3_hasher.hash
 79 |   type key = t
 80 | 
 81 |   let value = Blake3_hasher.finalize
 82 |   let bytes ?key b = match key with
 83 |   | None -> Blake3_hasher.hash b 0 (Bytes.length b)
 84 |   | Some key -> Blake3_hasher.hash_keyed ~key b 0 (Bytes.length b)
 85 | 
 86 |   let string ?key s = bytes ?key (Bytes.unsafe_of_string s)
 87 | 
 88 |   let slice ?key s =
 89 |     let b = Bytes.Slice.bytes s in
 90 |     let first = Bytes.Slice.first s and length = Bytes.Slice.length s in
 91 |     match key with
 92 |     | None -> Blake3_hasher.hash b first length
 93 |     | Some key -> Blake3_hasher.hash_keyed ~key b first length
 94 | 
 95 |   let reader ?key r =
 96 |     let rec loop state r = match Bytes.Reader.read r with
 97 |     | s when Bytes.Slice.is_eod s -> value state
 98 |     | s -> State.update state s; loop state r
 99 |     in
100 |     loop (State.make ?key ()) r
101 | 
102 |   let reads ?(state = State.make ()) r =
103 |     Bytes.Reader.tap (State.update state) r, state
104 | 
105 |   let writes ?(state = State.make ()) w =
106 |     Bytes.Writer.tap (State.update state) w, state
107 | 
108 |   let equal = String.equal
109 |   let compare = String.compare
110 |   let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s
111 |   let to_binary_string = Fun.id
112 |   let of_hex s = Bytesrw_hex.to_binary_string ~length s
113 |   let pp = Bytesrw_hex.pp_binary_string
114 |   let to_hex = Bytesrw_hex.of_binary_string
115 | end
116 | 


--------------------------------------------------------------------------------
/src/blake3/bytesrw_blake3.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** [BLAKE3] hashes.
  7 | 
  8 |     This module provides support for the {{:https://blake3.io/}BLAKE3}
  9 |     hash with the [libblake3] C library. *)
 10 | 
 11 | open Bytesrw
 12 | 
 13 | (** The type for [BLAKE3] hashes. *)
 14 | module type Blake3 = sig
 15 | 
 16 |   (** {1:hashes Hashes} *)
 17 | 
 18 |   val id : string
 19 |   (** [id] identifies the hash function. *)
 20 | 
 21 |   val length : int
 22 |   (** [length] is the byte length of hashes produced by the function. *)
 23 | 
 24 |   type t
 25 |   (** The type for hashes. *)
 26 | 
 27 |   type key = t
 28 |   (** The type for keys. *)
 29 | 
 30 |   (** Hash state. *)
 31 |   module State : sig
 32 |     type t
 33 |     (** The type for hash state. *)
 34 | 
 35 |     val make : ?key:key -> unit -> t
 36 |     (** [make ?key ()] is an initial hash state with given
 37 |         parameters. If [key] is unspecified the hash is unkeyed. *)
 38 | 
 39 |     val update : t -> Bytes.Slice.t -> unit
 40 |     (** [update state slice] updates [state] with the bytes in the range of
 41 |         [slice]. *)
 42 |   end
 43 | 
 44 |   val value : State.t -> t
 45 |   (** [value state] is the hash of [state]. This has no effect on
 46 |       [state] which can still be {!State.update}d. *)
 47 | 
 48 |   (** {1:hashing Hashing} *)
 49 | 
 50 |   val string : ?key:t -> string -> t
 51 |   (** [string s] is the hash of [s] keyed with [key] (if any). *)
 52 | 
 53 |   val bytes : ?key:t -> bytes -> t
 54 |   (** [bytes b] is the hash of [b] keyed with [key] (if any). *)
 55 | 
 56 |   val slice : ?key:t -> Bytes.Slice.t -> t
 57 |   (** [slice s] is the hash of the bytes in the range of [s] keyed with
 58 |       [key] (if any). *)
 59 | 
 60 |   val reader : ?key:t -> Bytes.Reader.t -> t
 61 |   (** [reader r] is the hash of stream [r] keyed with [key] (if any).
 62 |       This consumes the reader. See also {!reads}. *)
 63 | 
 64 |   (** {1:streaming Hashing streams} *)
 65 | 
 66 |   val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t
 67 |   (** [reads r] is [hr, hstate] with:
 68 |       {ul
 69 |       {- [hr] a reader that taps the reads of [r] to update [hstate].}
 70 |       {- [hstate], a hash state of the reads made on [hr] so
 71 |          far. This is [state] if explicitely given, otherwise
 72 |          defaults to a fresh {!State.make}.}}
 73 |       To get an intermediate or final hash result use {!value} on
 74 |       [hstate]. *)
 75 | 
 76 |   val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t
 77 |   (** [writes ?state w] is [hw, hstate] with:
 78 |       {ul
 79 |       {- [hw] a writer that taps the writes to update [hstate] before
 80 |          giving them to [w].}
 81 |       {- [hstate], a hash state of the writes made on [hw] so
 82 |          far. This is [state] if explicitely given, otherwise
 83 |          defaults to a fresh {!State.make}.}}
 84 |       To get an intermediate or final hash result use {!value} on
 85 |       [hstate]. *)
 86 | 
 87 |   (** {1:preds Predicates and comparisons} *)
 88 | 
 89 |   val equal : t -> t -> bool
 90 |   (** [equal h0 h1] is [true] iff [h0] and [h1] are equal. *)
 91 | 
 92 |   val compare : t -> t -> int
 93 |   (** [comapre] is a total order on hashes compatible with {!equal}. *)
 94 | 
 95 |   (** {1:converting Converting} *)
 96 | 
 97 |   val to_binary_string : t -> string
 98 |   (** [to_binary_string h] is a big-endian binary representation
 99 |       of [h] of length {!length}. *)
100 | 
101 |   val of_binary_string : string -> (t, string) result
102 |   (** [of_binary_string s] is a hash from the big-endian binary
103 |       representation stored in [s]. *)
104 | 
105 |   val to_hex : t -> string
106 |   (** [to_hex h] is the binary representation of [h] using lowercase
107 |       US-ASCII hex digits. *)
108 | 
109 |   val of_hex : string -> (t, string) result
110 |   (** [of_hex s] parses a sequence of hex digits into a hash. *)
111 | 
112 |   val pp : Format.formatter -> t -> unit
113 |   (** [pp] formats hashes for inspection. *)
114 | end
115 | 
116 | (** [BLAKE3] hash. *)
117 | module Blake3 : Blake3
118 | 
119 | (** {1:library Library parameters} *)
120 | 
121 | val version : unit -> string
122 | (** [version ()] is the version of the [blake3] C library. *)
123 | 


--------------------------------------------------------------------------------
/src/blake3/bytesrw_blake3.mllib:
--------------------------------------------------------------------------------
1 | Bytesrw_blake3


--------------------------------------------------------------------------------
/src/blake3/bytesrw_blake3_stubs.c:
--------------------------------------------------------------------------------
 1 | /*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*/
 5 | 
 6 | #include <caml/mlvalues.h>
 7 | #include <caml/alloc.h>
 8 | #include <caml/custom.h>
 9 | 
10 | #include <blake3.h>
11 | 
12 | #define blake3_hasher_val(v) ((blake3_hasher *)Data_custom_val(v))
13 | 
14 | static struct custom_operations bytesrw_blake3_ops =
15 | { "bytesrw_blake3_ops",
16 |   custom_finalize_default, custom_compare_default, custom_compare_ext_default,
17 |   custom_hash_default, custom_serialize_default, custom_deserialize_default };
18 | 
19 | CAMLprim value ocaml_bytesrw_blake3_version (value unit)
20 | { return caml_copy_string (blake3_version ()); }
21 | 
22 | CAMLprim value ocaml_bytesrw_blake3_create (value unit)
23 | { return caml_alloc_custom (&bytesrw_blake3_ops, sizeof (blake3_hasher), 0, 1);}
24 | 
25 | CAMLprim value ocaml_bytesrw_blake3_init (value hst)
26 | { blake3_hasher_init (blake3_hasher_val (hst)); return Val_unit; }
27 | 
28 | CAMLprim value ocaml_bytesrw_blake3_init_keyed (value hst, value key)
29 | {
30 |   blake3_hasher_init_keyed (blake3_hasher_val (hst), Bytes_val (key));
31 |   return Val_unit;
32 | }
33 | 
34 | CAMLprim value ocaml_bytesrw_blake3_update
35 | (value hst, value str, value ofs, value len)
36 | {
37 |   blake3_hasher_update (blake3_hasher_val (hst),
38 |                         Bytes_val (str) + Int_val (ofs), Int_val (len));
39 |   return Val_unit;
40 | }
41 | 
42 | CAMLprim value ocaml_bytesrw_blake3_finalize (value hst)
43 | {
44 |   uint8_t hash[BLAKE3_OUT_LEN];
45 |   blake3_hasher_finalize (blake3_hasher_val (hst), hash, BLAKE3_OUT_LEN);
46 |   return caml_alloc_initialized_string (BLAKE3_OUT_LEN, (char *)hash);
47 | }
48 | 
49 | CAMLprim value ocaml_bytesrw_blake3_hash
50 | (value str, value ofs, value len)
51 | {
52 |   blake3_hasher h;
53 |   uint8_t hash[BLAKE3_OUT_LEN];
54 |   blake3_hasher_init(&h);
55 |   blake3_hasher_update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len));
56 |   blake3_hasher_finalize (&h, hash, BLAKE3_OUT_LEN);
57 |   return caml_alloc_initialized_string (BLAKE3_OUT_LEN, (char *)hash);
58 | }
59 | 
60 | CAMLprim value ocaml_bytesrw_blake3_hash_keyed
61 | (value key, value str, value ofs, value len)
62 | {
63 |   blake3_hasher h;
64 |   uint8_t hash[BLAKE3_OUT_LEN];
65 |   blake3_hasher_init_keyed(&h, Bytes_val (key));
66 |   blake3_hasher_update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len));
67 |   blake3_hasher_finalize (&h, hash, BLAKE3_OUT_LEN);
68 |   return caml_alloc_initialized_string (BLAKE3_OUT_LEN, (char *)hash);
69 | }
70 | 


--------------------------------------------------------------------------------
/src/blake3/libbytesrw_blake3_stubs.clib:
--------------------------------------------------------------------------------
1 | bytesrw_blake3_stubs.o


--------------------------------------------------------------------------------
/src/bytesrw.mllib:
--------------------------------------------------------------------------------
1 | Bytesrw_fmt
2 | Bytesrw
3 | Bytesrw_utf
4 | Bytesrw_hex
5 | 


--------------------------------------------------------------------------------
/src/bytesrw_fmt.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | let pp_hex_char ppf i = Format.fprintf ppf "%02x" i
  7 | let pp_raw_char ppf c = match Char.code c with
  8 | | 0x0A -> Format.pp_print_string ppf "\n"
  9 | | 0x0D -> Format.pp_print_string ppf "\r"
 10 | | i when i < 0x20 || i = 0x7f -> Format.fprintf ppf "\\x%a" pp_hex_char i
 11 | | _ -> (* XXX We should try to decode utf-8 and if not then escape *)
 12 |     Format.pp_print_char ppf c
 13 | 
 14 | let pp_head_hex count ~first ~len ppf b =
 15 |   let max = first + Int.min count len - 1 in
 16 |   if max < 0 then Format.pp_print_string ppf "<empty>" else begin
 17 |     Format.pp_print_char ppf 'x';
 18 |     for i = first to max
 19 |     do pp_hex_char ppf (Bytes.get_uint8 b i) done;
 20 |     if len - 1 > max then Format.fprintf ppf "@<1>%s" "…";
 21 |   end
 22 | 
 23 | let pp_head_raw count ~first ~len ppf b =
 24 |   let max = first + Int.min count len - 1 in
 25 |   if max < 0 then Format.pp_print_string ppf "<empty>" else begin
 26 |     Format.pp_print_char ppf '\"';
 27 |     for i = first to max do Format.pp_print_char ppf (Bytes.get b i) done;
 28 |     if len - 1 > max then Format.fprintf ppf "@<1>%s" "…";
 29 |     Format.pp_print_char ppf '\"';
 30 |   end
 31 | 
 32 | let pp_raw ~first ~len ppf b =
 33 |   Format.pp_open_vbox ppf 1;
 34 |   Format.pp_print_char ppf '\"';
 35 |   for i = 0 to len - 1 do
 36 |     pp_raw_char ppf (Bytes.get b (first + i));
 37 |     if (i + 1) mod 60 = 0
 38 |     then (Format.pp_print_char ppf '\\'; Format.pp_print_cut ppf ())
 39 |   done;
 40 |   Format.pp_print_char ppf '\"';
 41 |   Format.pp_close_box ppf ()
 42 | 
 43 | (* XXX review this *)
 44 | 
 45 | let strf = Printf.sprintf
 46 | let err_range ~start ~len ~blen =
 47 |   invalid_arg @@
 48 |   strf "range start %d len %d: not in bounds [0;%d]" start len (blen - 1)
 49 | 
 50 | let ilog2 v =
 51 |   let rec loop p v = match v with
 52 |   | 0 -> p
 53 |   | v -> loop (p + 1) (v lsr 1)
 54 |   in
 55 |   loop (-1) v
 56 | 
 57 | let pp_address ~addr ~addr_start ~addr_div ~start ~len =
 58 |   let pp_32 ppf addr = Format.fprintf ppf "%08x  " addr in
 59 |   let pp_64 ppf addr = Format.fprintf ppf "%016x  " addr in
 60 |   if not addr then fun ppf _ -> () else
 61 |   let astart = match addr_start with Some a -> a | None -> start in
 62 |   let amax = astart + len in
 63 |   let pp_address =
 64 |     if Sys.int_size = 31 then pp_32 else
 65 |     if ilog2 amax < 32 then pp_32 else pp_64
 66 |   in
 67 |   fun ppf off -> pp_address ppf ((astart + off) / addr_div)
 68 | 
 69 | let pp_ascii_col ppf get_uint8 b start stop =
 70 |   let pp_ascii_byte ppf b i =
 71 |     let byte = get_uint8 b i in
 72 |     if byte < 0x1F || byte > 0x7E
 73 |     then Format.pp_print_char ppf '.'
 74 |     else Format.pp_print_char ppf (Char.chr byte)
 75 |   in
 76 |   Format.fprintf ppf "  @[<h>@<1>%s" "│";
 77 |   for i = start to stop do pp_ascii_byte ppf b i done;
 78 |   Format.fprintf ppf "@<1>%s@]" "│";
 79 |   ()
 80 | 
 81 | let pp_hex
 82 |     ?(addr = false) ?addr_start ?(addr_div = 1) ?(count = 16) ?(group = 2)
 83 |     ?(ascii = false) ?(start = 0) ?len () ppf b
 84 |   =
 85 |   let blen = Bytes.length b in
 86 |   let len = match len with None -> blen - start | Some len -> len in
 87 |   if len = 0 then () else
 88 |   let bmax = start + len - 1 in
 89 |   match 0 <= start && start <= bmax && bmax < blen with
 90 |   | false -> err_range ~start ~len ~blen
 91 |   | true ->
 92 |       let pp_address = pp_address ~addr ~addr_start ~addr_div ~start ~len in
 93 |       Format.pp_open_vbox ppf 0;
 94 |       pp_address ppf 0;
 95 |       Format.fprintf ppf "%02x" (Bytes.get_uint8 b start);
 96 |       for i = start + 1 to bmax do
 97 |         if i mod count = 0 then
 98 |           begin
 99 |             if ascii
100 |             then pp_ascii_col ppf Bytes.get_uint8 b (i - count) (i - 1);
101 |             Format.pp_print_cut ppf ();
102 |             pp_address ppf i
103 |           end
104 |         else if i mod group = 0 then Format.pp_print_char ppf ' ';
105 |         Format.fprintf ppf "%02x" (Bytes.get_uint8 b i);
106 |       done;
107 |       if ascii then begin (* finish the line *)
108 |         for i = bmax + 1 to bmax + (count - (bmax mod count)) - 1 do
109 |           if i mod group = 0 then Format.pp_print_char ppf ' ';
110 |           Format.fprintf ppf "  ";
111 |         done;
112 |         pp_ascii_col ppf Bytes.get_uint8 b  (bmax - bmax mod count) bmax;
113 |       end;
114 |       if addr then (Format.pp_print_cut ppf (); pp_address ppf len);
115 |       Format.pp_close_box ppf ();
116 |       ()
117 | 


--------------------------------------------------------------------------------
/src/bytesrw_fmt.mli:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | val pp_head_hex :
 7 |   int -> first:int -> len:int -> Format.formatter -> bytes -> unit
 8 | 
 9 | val pp_head_raw :
10 |   int -> first:int -> len:int -> Format.formatter -> bytes -> unit
11 | 
12 | val pp_raw : first:int -> len:int -> Format.formatter -> bytes -> unit
13 | val pp_hex :
14 |   ?addr:bool -> ?addr_start:int -> ?addr_div:int -> ?count:int ->
15 |   ?group:int -> ?ascii:bool -> ?start:int -> ?len:int -> unit ->
16 |   Format.formatter -> bytes -> unit
17 | (** See {!Bytesrw.Bytes.pp_hex}. *)
18 | 


--------------------------------------------------------------------------------
/src/bytesrw_hex.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | (* Binary strings *)
 7 | 
 8 | let strf = Printf.sprintf
 9 | 
10 | let to_binary_string' h = (* raises Failure *)
11 |   let hex_value s i = match s.[i] with
12 |   | '0' .. '9' as c -> Char.code c - 0x30
13 |   | 'A' .. 'F' as c -> 10 + (Char.code c - 0x41)
14 |   | 'a' .. 'f' as c -> 10 + (Char.code c - 0x61)
15 |   | c -> failwith (strf "%d: %C is not an ASCII hexadecimal digit" i c)
16 |   in
17 |   match String.length h with
18 |   | len when len mod 2 <> 0 -> failwith "Missing final hex digit"
19 |   | len ->
20 |       let rec loop max s i h k = match i > max with
21 |       | true -> Bytes.unsafe_to_string s
22 |       | false ->
23 |           let hi = hex_value h k and lo = hex_value h (k + 1) in
24 |           Bytes.set s i (Char.chr @@ (hi lsl 4) lor lo);
25 |           loop max s (i + 1) h (k + 2)
26 |       in
27 |       let s_len = len / 2 in
28 |       let s = Bytes.create s_len in
29 |       loop (s_len - 1) s 0 h 0
30 | 
31 | let err_len ~exp ~fnd =
32 |   strf "Expected %d ASCII hexadecimal digits but found %d characters" exp fnd
33 | 
34 | let to_binary_string ?length hex =
35 |   try match length with
36 |   | None -> Ok (to_binary_string' hex)
37 |   | Some len ->
38 |       let exp = len * 2 in
39 |       let fnd = String.length hex in
40 |       if exp <> fnd then failwith (err_len ~exp ~fnd) else
41 |       Ok (to_binary_string' hex)
42 |   with
43 |   | Failure e -> Error e
44 | 
45 | let pp_binary_string ppf s =
46 |   for i = 0 to String.length s - 1
47 |   do Format.fprintf ppf "%02x" (Char.code (s.[i])) done
48 | 
49 | let of_binary_string s = Format.asprintf "%a" pp_binary_string s
50 | 
51 | let check_binary_string_length ~length s =
52 |   let len = String.length s in
53 |   if len = length then Ok s else
54 |   Error (strf "Expected %d bytes but found %d" length len)
55 | 


--------------------------------------------------------------------------------
/src/bytesrw_hex.mli:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | (** Hexadecimal tools.
 7 | 
 8 |     See also {!Bytesrw.Bytes.pp_hex}. *)
 9 | 
10 | (** {1:binary_strings Binary strings} *)
11 | 
12 | val to_binary_string : ?length:int -> string -> (string, string) result
13 | (** [to_binary_string hex] converts [hex], made of upper or lowercase
14 |     US-ASCII hexadecimal digits to a binary string. If [length] is
15 |     specified, errors if the result is not exactly [length] bytes. *)
16 | 
17 | val of_binary_string : string -> string
18 | (** [of_binary_string s] is the bytes of [s] in lowercase US-ASCII
19 |     hexadecimal digits. *)
20 | 
21 | val pp_binary_string : Format.formatter -> string -> unit
22 | (** [pp_binary_string ppf s] formats the bytes of [s] with lowercase
23 |     US-ASCII hexadecimal digits. *)
24 | 
25 | val check_binary_string_length : length:int -> string -> (string, string) result
26 | (** [check_binary_string_length ~length s] checks that [s] has [length]
27 |     bytes and errors otherwise. *)
28 | 


--------------------------------------------------------------------------------
/src/bytesrw_utf.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | open Bytesrw
 7 | 
 8 | (* XXX add these things to Stdlib.Uchar *)
 9 | 
10 | let uchar_max_utf_8_byte_length = 4
11 | let[@inline] uchar_utf_8_byte_decode_length = function
12 | | '\x00' .. '\x7F' -> 1
13 | | '\x80' .. '\xC1' -> 0
14 | | '\xC2' .. '\xDF' -> 2
15 | | '\xE0' .. '\xEF' -> 3
16 | | '\xF0' .. '\xF4' -> 4
17 | | _ -> 0
18 | 
19 | (* Encodings *)
20 | 
21 | module Encoding = struct
22 |   type t =  [ `Utf_8 | `Utf_16be | `Utf_16le ]
23 | 
24 |   let to_iana_charset = function
25 |   | `Utf_8 -> "UTF-8" | `Utf_16 -> "UTF-16" | `Utf_16be -> "UTF-16BE"
26 |   | `Utf_16le -> "UTF-16LE"
27 | 
28 |   let pp ppf e = Format.pp_print_string ppf (to_iana_charset e)
29 | end
30 | 
31 | (* Encoding guess *)
32 | 
33 | let guess_reader_encoding r = match Bytes.Reader.sniff 3 r with
34 | | s when String.length s <= 1 -> `Utf_8 (* No or little input *)
35 | | "\xEF\xBB\xBF" -> `Utf_8 (* BOM *)
36 | | s when s.[0] = '\xFE' && s.[1] = '\xFF' -> `Utf_16be (* BOM *)
37 | | s when s.[0] = '\xFF' && s.[1] = '\xFE' -> `Utf_16le (* BOM *)
38 | | s when s.[0] = '\x00' && Char.code s.[1] > 0 -> `Utf_16be (* ASCII char *)
39 | | s when Char.code s.[0] > 0 && s.[1] = '\x00' -> `Utf_16le (* ASCII char *)
40 | | s when uchar_utf_8_byte_decode_length s.[0] <> 0 -> `Utf_8
41 | | s -> `Utf_16be (* UTF-16 -> UTF-16BE *)
42 | 
43 | (* Validate
44 | 
45 | let ensure_utf_8_reads ?pos ?slice_length r =
46 |   let read () = failwith "Unimplemented" in
47 |   Bytes.Reader.make ?pos ?slice_length read
48 | 
49 | let ensure_utf_16be_reads ?pos ?slice_length r = failwith "Unimplemented"
50 | let ensure_utf_16le_reads ?pos ?slice_length r = failwith "Unimplemented"
51 | 
52 | let ensure_reads = function
53 | | `Utf_8 -> ensure_utf_8_reads
54 | | `Utf_16be -> ensure_utf_16be_reads
55 | | `Utf_16le -> ensure_utf_16le_reads
56 | *)
57 | 


--------------------------------------------------------------------------------
/src/bytesrw_utf.mli:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | (** UTF streams.
 7 | 
 8 |     A few tools to deal with UTF encoded streams. For now just
 9 |     encoding guessing, more may be added in the future.
10 | 
11 |     Sample code for decoding UTF-8 with position tracking using a
12 |     byte stream reader and encoding UTF-8 with a byte stream writer can be found
13 |     {{:https://github.com/dbuenzli/bytesrw/blob/main/test/utf8codec.ml}here}. *)
14 | 
15 | open Bytesrw
16 | 
17 | (** {1:encodings Encodings} *)
18 | 
19 | (** Encoding specification. *)
20 | module Encoding : sig
21 | 
22 |   type t = [
23 |   | `Utf_8 (** UTF-8 *)
24 |   | `Utf_16be (** UTF-16BE *)
25 |   | `Utf_16le (** UTF-16LE *) ]
26 |   (** The type for UTF encodings. *)
27 | 
28 |   val to_iana_charset : [< t | `Utf_16 ] -> string
29 |   (** [to_iana_charaset e] is [e] as its
30 |       {{:https://www.iana.org/assignments/character-sets/character-sets.xhtml}
31 |       IANA character set name}. *)
32 | 
33 |   val pp : Format.formatter -> [< t | `Utf_16 ] -> unit
34 |   (** [pp] formats encodings with {!to_iana_charset}. *)
35 | end
36 | 
37 | (** {1:encoding_guess Encoding guess} *)
38 | 
39 | val guess_reader_encoding : Bytes.Reader.t -> Encoding.t
40 | (** [guess_reader_encoding r] guesses the encoding at the stream
41 |     position of [r] by {{!Bytesrw.Bytes.Reader.sniff}sniff}ing three
42 |     bytes and applying {{!encoding_guess_heuristic}this
43 |     heuristic} which is subject to change in the future. *)
44 | 
45 | (*
46 | (** {1:validate Validate} *)
47 | 
48 | val ensure_reads : encoding -> Bytes.Reader.filter
49 | (** [ensure_reads encoding r] filters the reads of [r] to make
50 |     sure the stream is a valid [encoding] byte stream. Invalid
51 |     byte sequences *)
52 | *)
53 | 
54 | (** {1:encoding_guess_heuristic Encoding guess heurisitic}
55 | 
56 |     The heuristic is compatible with
57 |     {{:http://unicode.org/glossary/#byte_order_mark}BOM} based
58 |     recognition and the
59 |     {{:http://tools.ietf.org/html/rfc4627#section-3}old} JSON encoding
60 |     recognition (UTF-8 is mandated nowadays) that relies on ASCII
61 |     being present at the beginning of the stream.
62 | 
63 |     The heuristic looks at the first three bytes of input (or less if
64 |     impossible) and takes the {e first} matching byte pattern in the
65 |     table below.
66 | {v
67 | xx = any byte
68 | .. = any byte or no byte (input too small)
69 | pp = positive byte
70 | uu = valid UTF-8 first byte
71 | 
72 | Bytes    | Guess     | Rationale
73 | ---------+-----------+-----------------------------------------------
74 | EF BB BF | `UTF_8    | UTF-8 BOM
75 | FE FF .. | `UTF_16BE | UTF-16BE BOM
76 | FF FE .. | `UTF_16LE | UTF-16LE BOM
77 | 00 pp .. | `UTF_16BE | ASCII UTF-16BE and U+0000 is often forbidden
78 | pp 00 .. | `UTF_16LE | ASCII UTF-16LE and U+0000 is often forbidden
79 | uu .. .. | `UTF_8    | ASCII UTF-8 or valid UTF-8 first byte.
80 | xx xx .. | `UTF_16BE | Not UTF-8 => UTF-16, no BOM => UTF-16BE
81 | .. .. .. | `UTF_8    | Single malformed UTF-8 byte or no input.
82 | v}
83 | *)
84 | 


--------------------------------------------------------------------------------
/src/md/bytesrw_md.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Bytesrw
  7 | 
  8 | (* SHA hash signature *)
  9 | 
 10 | module type Sha = sig
 11 |   val id : string
 12 |   val length : int
 13 |   type t
 14 |   module State : sig
 15 |     type t
 16 |     val make : unit -> t
 17 |     val update : t -> Bytes.Slice.t -> unit
 18 |   end
 19 |   val value : State.t -> t
 20 |   val string : string -> t
 21 |   val bytes : bytes -> t
 22 |   val slice : Bytes.Slice.t -> t
 23 |   val reader : Bytes.Reader.t -> t
 24 |   val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t
 25 |   val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t
 26 |   val equal : t -> t -> bool
 27 |   val compare : t -> t -> int
 28 |   val to_binary_string : t -> string
 29 |   val of_binary_string : string -> (t, string) result
 30 |   val to_hex : t -> string
 31 |   val of_hex : string -> (t, string) result
 32 |   val pp : Format.formatter -> t -> unit
 33 | end
 34 | 
 35 | (* SHA-1 *)
 36 | 
 37 | module Sha1_ctx = struct
 38 |   type t  (* Custom value whose data holds a SHA1_CTX C struct *)
 39 |   type hash = string (* 20 bytes *)
 40 |   external hash : bytes -> int -> int -> hash = "ocaml_bytesrw_sha1_hash"
 41 |   external init : unit -> t = "ocaml_bytesrw_sha1_init"
 42 |   external final : t -> hash = "ocaml_bytesrw_sha1_final"
 43 |   external update : t -> bytes -> int -> int -> unit =
 44 |     "ocaml_bytesrw_sha1_update"
 45 | end
 46 | 
 47 | module Sha_1 = struct
 48 |   module State = struct
 49 |     type t = Sha1_ctx.t
 50 |     let make () = Sha1_ctx.init ()
 51 |     let update state s =
 52 |       if Bytes.Slice.is_eod s then () else
 53 |       let b = Bytes.Slice.bytes s in
 54 |       let first = Bytes.Slice.first s and last = Bytes.Slice.length s in
 55 |       Sha1_ctx.update state b first last
 56 |   end
 57 | 
 58 |   let id = "sha1"
 59 |   let length = 20
 60 |   type t = Sha1_ctx.hash
 61 |   let value = Sha1_ctx.final
 62 |   let bytes b = Sha1_ctx.hash b 0 (Bytes.length b)
 63 |   let string s = bytes (Bytes.unsafe_of_string s)
 64 |   let slice s =
 65 |     let b = Bytes.Slice.bytes s in
 66 |     let first = Bytes.Slice.first s and length = Bytes.Slice.length s in
 67 |     Sha1_ctx.hash b first length
 68 | 
 69 |   let reader r =
 70 |     let rec loop state r = match Bytes.Reader.read r with
 71 |     | s when Bytes.Slice.is_eod s -> value state
 72 |     | s -> State.update state s; loop state r
 73 |     in
 74 |     loop (State.make ()) r
 75 | 
 76 |   let reads ?(state = State.make ()) r =
 77 |     Bytes.Reader.tap (State.update state) r, state
 78 | 
 79 |   let writes ?(state = State.make ()) w =
 80 |     Bytes.Writer.tap (State.update state) w, state
 81 | 
 82 |   let equal = String.equal
 83 |   let compare = String.compare
 84 |   let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s
 85 |   let to_binary_string = Fun.id
 86 |   let of_hex s = Bytesrw_hex.to_binary_string ~length s
 87 |   let pp = Bytesrw_hex.pp_binary_string
 88 |   let to_hex = Bytesrw_hex.of_binary_string
 89 | end
 90 | 
 91 | (* SHA-2 *)
 92 | 
 93 | module Sha2_ctx = struct
 94 |   type t (* Custom value whose data holds a SHA2_CTX C struct *)
 95 |   type h256 = string (* 32 bytes *)
 96 |   type h384 = string (* 48 bytes *)
 97 |   type h512 = string (* 64 bytes *)
 98 | 
 99 |   external hash256 : bytes -> int -> int -> h256 = "ocaml_bytesrw_sha256_hash"
100 |   external init256 : unit -> t = "ocaml_bytesrw_sha256_init"
101 |   external final256 : t -> h256 = "ocaml_bytesrw_sha256_final"
102 |   external update256 : t -> bytes -> int -> int -> unit =
103 |     "ocaml_bytesrw_sha256_update"
104 | 
105 |   external hash384 : bytes -> int -> int -> h384 = "ocaml_bytesrw_sha384_hash"
106 |   external init384 : unit -> t = "ocaml_bytesrw_sha384_init"
107 |   external final384 : t -> h384 = "ocaml_bytesrw_sha384_final"
108 |   external update384 : t -> bytes -> int -> int -> unit =
109 |     "ocaml_bytesrw_sha384_update"
110 | 
111 |   external hash512 : bytes -> int -> int -> h512 = "ocaml_bytesrw_sha512_hash"
112 |   external init512 : unit -> t = "ocaml_bytesrw_sha512_init"
113 |   external final512 : t -> h512 = "ocaml_bytesrw_sha512_final"
114 |   external update512 : t -> bytes -> int -> int -> unit =
115 |     "ocaml_bytesrw_sha512_update"
116 | end
117 | 
118 | module Sha_256 = struct
119 |   module State = struct
120 |     type t = Sha2_ctx.t
121 |     let make () = Sha2_ctx.init256 ()
122 |     let update state s =
123 |       if Bytes.Slice.is_eod s then () else
124 |       let b = Bytes.Slice.bytes s in
125 |       let first = Bytes.Slice.first s and last = Bytes.Slice.length s in
126 |       Sha2_ctx.update256 state b first last
127 |   end
128 | 
129 |   let id = "sha-256"
130 |   let length = 32
131 |   type t = Sha2_ctx.h256
132 | 
133 |   let value = Sha2_ctx.final256
134 |   let bytes b = Sha2_ctx.hash256 b 0 (Bytes.length b)
135 |   let string s = bytes (Bytes.unsafe_of_string s)
136 |   let slice s =
137 |     let b = Bytes.Slice.bytes s in
138 |     let first = Bytes.Slice.first s and length = Bytes.Slice.length s in
139 |     Sha2_ctx.hash256 b first length
140 | 
141 |   let reader r =
142 |     let rec loop state r = match Bytes.Reader.read r with
143 |     | s when Bytes.Slice.is_eod s -> value state
144 |     | s -> State.update state s; loop state r
145 |     in
146 |     loop (State.make ()) r
147 | 
148 |   let reads ?(state = State.make ()) r =
149 |     Bytes.Reader.tap (State.update state) r, state
150 | 
151 |   let writes ?(state = State.make ()) w =
152 |     Bytes.Writer.tap (State.update state) w, state
153 | 
154 |   let equal = String.equal
155 |   let compare = String.compare
156 |   let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s
157 |   let to_binary_string = Fun.id
158 |   let of_hex s = Bytesrw_hex.to_binary_string ~length s
159 |   let pp = Bytesrw_hex.pp_binary_string
160 |   let to_hex = Bytesrw_hex.of_binary_string
161 | end
162 | 
163 | module Sha_384 = struct
164 |   module State = struct
165 |     type t = Sha2_ctx.t
166 |     let make () = Sha2_ctx.init384 ()
167 |     let update state s =
168 |       if Bytes.Slice.is_eod s then () else
169 |       let b = Bytes.Slice.bytes s in
170 |       let first = Bytes.Slice.first s and last = Bytes.Slice.length s in
171 |       Sha2_ctx.update384 state b first last
172 |   end
173 | 
174 |   let id = "sha-384"
175 |   let length = 48
176 |   type t = Sha2_ctx.h384
177 | 
178 |   let value = Sha2_ctx.final384
179 |   let bytes b = Sha2_ctx.hash384 b 0 (Bytes.length b)
180 |   let string s = bytes (Bytes.unsafe_of_string s)
181 |   let slice s =
182 |     let b = Bytes.Slice.bytes s in
183 |     let first = Bytes.Slice.first s and length = Bytes.Slice.length s in
184 |     Sha2_ctx.hash384 b first length
185 | 
186 |   let reader r =
187 |     let rec loop state r = match Bytes.Reader.read r with
188 |     | s when Bytes.Slice.is_eod s -> value state
189 |     | s -> State.update state s; loop state r
190 |     in
191 |     loop (State.make ()) r
192 | 
193 |   let reads ?(state = State.make ()) r =
194 |     Bytes.Reader.tap (State.update state) r, state
195 | 
196 |   let writes ?(state = State.make ()) w =
197 |     Bytes.Writer.tap (State.update state) w, state
198 | 
199 |   let equal = String.equal
200 |   let compare = String.compare
201 |   let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s
202 |   let to_binary_string = Fun.id
203 |   let of_hex s = Bytesrw_hex.to_binary_string ~length s
204 |   let pp = Bytesrw_hex.pp_binary_string
205 |   let to_hex = Bytesrw_hex.of_binary_string
206 | end
207 | 
208 | module Sha_512 = struct
209 |   module State = struct
210 |     type t = Sha2_ctx.t
211 |     let make () = Sha2_ctx.init512 ()
212 |     let update state s =
213 |       if Bytes.Slice.is_eod s then () else
214 |       let b = Bytes.Slice.bytes s in
215 |       let first = Bytes.Slice.first s and last = Bytes.Slice.length s in
216 |       Sha2_ctx.update512 state b first last
217 |   end
218 | 
219 |   let id = "sha-512"
220 |   let length = 64
221 |   type t = Sha2_ctx.h512
222 | 
223 |   let value = Sha2_ctx.final512
224 |   let bytes b = Sha2_ctx.hash512 b 0 (Bytes.length b)
225 |   let string s = bytes (Bytes.unsafe_of_string s)
226 |   let slice s =
227 |     let b = Bytes.Slice.bytes s in
228 |     let first = Bytes.Slice.first s and length = Bytes.Slice.length s in
229 |     Sha2_ctx.hash512 b first length
230 | 
231 |   let reader r =
232 |     let rec loop state r = match Bytes.Reader.read r with
233 |     | s when Bytes.Slice.is_eod s -> value state
234 |     | s -> State.update state s; loop state r
235 |     in
236 |     loop (State.make ()) r
237 | 
238 |   let reads ?(state = State.make ()) r =
239 |     Bytes.Reader.tap (State.update state) r, state
240 | 
241 |   let writes ?(state = State.make ()) w =
242 |     Bytes.Writer.tap (State.update state) w, state
243 | 
244 |   let equal = String.equal
245 |   let compare = String.compare
246 |   let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s
247 |   let to_binary_string = Fun.id
248 |   let of_hex s = Bytesrw_hex.to_binary_string ~length s
249 |   let pp = Bytesrw_hex.pp_binary_string
250 |   let to_hex = Bytesrw_hex.of_binary_string
251 | end
252 | 


--------------------------------------------------------------------------------
/src/md/bytesrw_md.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** [SHA-1] and [SHA-2] hashes.
  7 | 
  8 |     This module provides support for the SHA-1 and SHA-2
  9 |     hashes with the {{:https://www.hadrons.org/software/libmd/}
 10 |     [libmd]} C library. *)
 11 | 
 12 | open Bytesrw
 13 | 
 14 | (** The type for [SHA-1] and [SHA-2] hashes. *)
 15 | module type Sha = sig
 16 | 
 17 |     (** {1:hashes Hashes} *)
 18 | 
 19 |   val id : string
 20 |   (** [id] identifies the hash function. *)
 21 | 
 22 |   val length : int
 23 |   (** [length] is the byte length of hashes produced by the function. *)
 24 | 
 25 |   type t
 26 |   (** The type for hashes. *)
 27 | 
 28 |   (** Hash state. *)
 29 |   module State : sig
 30 |     type t
 31 |     (** The type for hash state. *)
 32 | 
 33 |     val make : unit -> t
 34 |     (** [make ()] is an initial hash state. *)
 35 | 
 36 |     val update : t -> Bytes.Slice.t -> unit
 37 |     (** [update state slice] updates [state] with the bytes in the range of
 38 |         [slice]. *)
 39 |   end
 40 | 
 41 |   val value : State.t -> t
 42 |   (** [value state] is the hash of [state]. {b Warning.} This has an effect on
 43 |       [state], it can no longer be {!State.update}d. *)
 44 | 
 45 |   (** {1:hashing Hashing} *)
 46 | 
 47 |   val string : string -> t
 48 |   (** [string s] is the hash of [s]. *)
 49 | 
 50 |   val bytes : bytes -> t
 51 |   (** [bytes b] is the hash of [b]. *)
 52 | 
 53 |   val slice : Bytes.Slice.t -> t
 54 |   (** [slice s] is the hash of the bytes in the range of [s]. *)
 55 | 
 56 |   val reader : Bytes.Reader.t -> t
 57 |   (** [reader r] is the hash of stream [r]. This consumes the reader.
 58 |       See also {!reads}. *)
 59 | 
 60 |   (** {1:streaming Hashing streams} *)
 61 | 
 62 |   val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t
 63 |   (** [reads r] is [hr, hstate] with:
 64 |       {ul
 65 |       {- [hr] a reader that taps the reads of [r] to update [hstate].}
 66 |       {- [hstate], a hash state of the reads made on [hr] so
 67 |          far. This is [state] if explicitely given, otherwise
 68 |          defaults to a fresh {!State.make}.}}
 69 |       To get the final hash result use {!value} on [hstate] {b once}. *)
 70 | 
 71 |   val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t
 72 |   (** [writes ?state w] is [hw, hstate] with:
 73 |       {ul
 74 |       {- [hw] a writer that taps the writes to update [hstate] before
 75 |          giving them to [w].}
 76 |       {- [hstate], a hash state of the writes made on [hw] so
 77 |          far. This is [state] if explicitely given, otherwise
 78 |          defaults to a fresh {!State.make}.}}
 79 |       To get the final hash result use {!value} on [hstate] {b once}. *)
 80 | 
 81 |   (** {1:preds Predicates and comparisons} *)
 82 | 
 83 |   val equal : t -> t -> bool
 84 |   (** [equal h0 h1] is [true] iff [h0] and [h1] are equal. *)
 85 | 
 86 |   val compare : t -> t -> int
 87 |   (** [comapre] is a total order on hashes compatible with {!equal}. *)
 88 | 
 89 |   (** {1:converting Converting} *)
 90 | 
 91 |   val to_binary_string : t -> string
 92 |   (** [to_binary_string h] is a big-endian binary representation
 93 |       of [h] of length {!length}. *)
 94 | 
 95 |   val of_binary_string : string -> (t, string) result
 96 |   (** [of_binary_string s] is a hash from the big-endian binary
 97 |       representation stored in [s]. *)
 98 | 
 99 |   val to_hex : t -> string
100 |   (** [to_hex h] is the binary representation of [h] using lowercase
101 |       US-ASCII hex digits. *)
102 | 
103 |   val of_hex : string -> (t, string) result
104 |   (** [of_hex s] parses a sequence of hex digits into a hash. *)
105 | 
106 |   val pp : Format.formatter -> t -> unit
107 |   (** [pp] formats hashes for inspection. *)
108 | end
109 | 
110 | (** [SHA-1] hash. *)
111 | module Sha_1 : Sha
112 | 
113 | (** [SHA-256] hash. *)
114 | module Sha_256 : Sha
115 | 
116 | (** [SHA-384] hash. *)
117 | module Sha_384 : Sha
118 | 
119 | (** [SHA-512] hash. *)
120 | module Sha_512 : Sha
121 | 


--------------------------------------------------------------------------------
/src/md/bytesrw_md.mllib:
--------------------------------------------------------------------------------
1 | Bytesrw_md


--------------------------------------------------------------------------------
/src/md/bytesrw_md_stubs.c:
--------------------------------------------------------------------------------
  1 | /*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*/
  5 | 
  6 | #include <caml/mlvalues.h>
  7 | #include <caml/alloc.h>
  8 | #include <caml/custom.h>
  9 | 
 10 | #include <sha1.h>
 11 | #include <sha2.h>
 12 | 
 13 | #define sha1_ctx_val(v) ((SHA1_CTX *)Data_custom_val(v))
 14 | #define sha2_ctx_val(v) ((SHA2_CTX *)Data_custom_val(v))
 15 | 
 16 | static struct custom_operations bytesrw_md_ops =
 17 | { "bytesrw_md_ops",
 18 |   custom_finalize_default, custom_compare_default, custom_compare_ext_default,
 19 |   custom_hash_default, custom_serialize_default, custom_deserialize_default };
 20 | 
 21 | /* SHA-1 */
 22 | 
 23 | CAMLprim value ocaml_bytesrw_sha1_init (value unit)
 24 | {
 25 |   value hctx = caml_alloc_custom (&bytesrw_md_ops, sizeof (SHA1_CTX), 0, 1);
 26 |   SHA1Init (sha1_ctx_val (hctx));
 27 |   return hctx;
 28 | }
 29 | 
 30 | CAMLprim value ocaml_bytesrw_sha1_update
 31 | (value hctx, value str, value ofs, value len)
 32 | {
 33 |   SHA1Update (sha1_ctx_val (hctx), Bytes_val (str) + Int_val (ofs),
 34 |               Int_val (len));
 35 |   return Val_unit;
 36 | }
 37 | 
 38 | CAMLprim value ocaml_bytesrw_sha1_final (value hctx)
 39 | {
 40 |   /* Make a local copy, SHA1Final updates the context */
 41 |   SHA1_CTX ctx = *sha1_ctx_val (hctx);
 42 |   value r = caml_alloc_string (SHA1_DIGEST_LENGTH);
 43 |   SHA1Final (Bytes_val (r), &ctx);
 44 |   return r;
 45 | }
 46 | 
 47 | CAMLprim value ocaml_bytesrw_sha1_hash
 48 | (value str, value ofs, value len)
 49 | {
 50 |   SHA1_CTX h;
 51 |   uint8_t hash[SHA1_DIGEST_LENGTH];
 52 |   SHA1Init (&h);
 53 |   SHA1Update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len));
 54 |   SHA1Final (hash, &h);
 55 |   return caml_alloc_initialized_string (SHA1_DIGEST_LENGTH, (char *)hash);
 56 | }
 57 | 
 58 | /* SHA-256 */
 59 | 
 60 | CAMLprim value ocaml_bytesrw_sha256_init (value unit)
 61 | {
 62 |   value hctx = caml_alloc_custom (&bytesrw_md_ops, sizeof (SHA2_CTX), 0, 1);
 63 |   SHA256Init (sha2_ctx_val (hctx));
 64 |   return hctx;
 65 | }
 66 | 
 67 | CAMLprim value ocaml_bytesrw_sha256_update
 68 | (value hctx, value str, value ofs, value len)
 69 | {
 70 |   SHA256Update (sha2_ctx_val (hctx), Bytes_val (str) + Int_val (ofs),
 71 |                 Int_val (len));
 72 |   return Val_unit;
 73 | }
 74 | 
 75 | CAMLprim value ocaml_bytesrw_sha256_final (value hctx)
 76 | {
 77 |   /* Make a local copy, SHA256Final updates the context */
 78 |   SHA2_CTX ctx = *sha2_ctx_val (hctx);
 79 |   value r = caml_alloc_string (SHA256_DIGEST_LENGTH);
 80 |   SHA256Final (Bytes_val (r), &ctx);
 81 |   return r;
 82 | }
 83 | 
 84 | CAMLprim value ocaml_bytesrw_sha256_hash
 85 | (value str, value ofs, value len)
 86 | {
 87 |   SHA2_CTX h;
 88 |   uint8_t hash[SHA256_DIGEST_LENGTH];
 89 |   SHA256Init (&h);
 90 |   SHA256Update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len));
 91 |   SHA256Final (hash, &h);
 92 |   return caml_alloc_initialized_string (SHA256_DIGEST_LENGTH, (char *)hash);
 93 | }
 94 | 
 95 | /* SHA-384 */
 96 | 
 97 | CAMLprim value ocaml_bytesrw_sha384_init (value unit)
 98 | {
 99 |   value hctx = caml_alloc_custom (&bytesrw_md_ops, sizeof (SHA2_CTX), 0, 1);
100 |   SHA384Init (sha2_ctx_val (hctx));
101 |   return hctx;
102 | }
103 | 
104 | CAMLprim value ocaml_bytesrw_sha384_update
105 | (value hctx, value str, value ofs, value len)
106 | {
107 |   SHA384Update (sha2_ctx_val (hctx), Bytes_val (str) + Int_val (ofs),
108 |                 Int_val (len));
109 |   return Val_unit;
110 | }
111 | 
112 | CAMLprim value ocaml_bytesrw_sha384_final (value hctx)
113 | {
114 |   /* Make a local copy, SHA384Final updates the context */
115 |   SHA2_CTX ctx = *sha2_ctx_val (hctx);
116 |   value r = caml_alloc_string (SHA384_DIGEST_LENGTH);
117 |   SHA384Final (Bytes_val (r), &ctx);
118 |   return r;
119 | }
120 | 
121 | CAMLprim value ocaml_bytesrw_sha384_hash
122 | (value str, value ofs, value len)
123 | {
124 |   SHA2_CTX h;
125 |   uint8_t hash[SHA384_DIGEST_LENGTH];
126 |   SHA384Init (&h);
127 |   SHA384Update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len));
128 |   SHA384Final (hash, &h);
129 |   return caml_alloc_initialized_string (SHA384_DIGEST_LENGTH, (char *)hash);
130 | }
131 | 
132 | /* SHA-512 */
133 | 
134 | CAMLprim value ocaml_bytesrw_sha512_init (value unit)
135 | {
136 |   value hctx = caml_alloc_custom (&bytesrw_md_ops, sizeof (SHA2_CTX), 0, 1);
137 |   SHA512Init (sha2_ctx_val (hctx));
138 |   return hctx;
139 | }
140 | 
141 | CAMLprim value ocaml_bytesrw_sha512_update
142 | (value hctx, value str, value ofs, value len)
143 | {
144 |   SHA512Update (sha2_ctx_val (hctx), Bytes_val (str) + Int_val (ofs),
145 |                 Int_val (len));
146 |   return Val_unit;
147 | }
148 | 
149 | CAMLprim value ocaml_bytesrw_sha512_final (value hctx)
150 | {
151 |   /* Make a local copy, SHA512Final updates the context */
152 |   SHA2_CTX ctx = *sha2_ctx_val (hctx);
153 |   value r = caml_alloc_string (SHA512_DIGEST_LENGTH);
154 |   SHA512Final (Bytes_val (r), &ctx);
155 |   return r;
156 | }
157 | 
158 | CAMLprim value ocaml_bytesrw_sha512_hash
159 | (value str, value ofs, value len)
160 | {
161 |   SHA2_CTX h;
162 |   uint8_t hash[SHA512_DIGEST_LENGTH];
163 |   SHA512Init (&h);
164 |   SHA512Update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len));
165 |   SHA512Final (hash, &h);
166 |   return caml_alloc_initialized_string (SHA512_DIGEST_LENGTH, (char *)hash);
167 | }
168 | 


--------------------------------------------------------------------------------
/src/md/libbytesrw_md_stubs.clib:
--------------------------------------------------------------------------------
1 | bytesrw_md_stubs.o


--------------------------------------------------------------------------------
/src/unix/bytesrw_unix.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | open Bytesrw
 7 | 
 8 | (* Readers and writers. *)
 9 | 
10 | let rec current_pos fd = match Unix.lseek fd 0 Unix.SEEK_CUR with
11 | | exception Unix.Unix_error (ESPIPE, _, _) -> 0
12 | | exception Unix.Unix_error (EINTR, _, _) -> current_pos fd
13 | | pos -> if pos < 0 then 0 else pos
14 | 
15 | let bytes_reader_of_fd
16 |     ?pos ?(slice_length = Bytes.Slice.unix_io_buffer_size) fd
17 |   =
18 |   let pos = match pos with Some pos -> pos | None -> current_pos fd in
19 |   let b = Bytes.create (Bytes.Slice.check_length slice_length) in
20 |   let rec read () = match Unix.read fd b 0 slice_length with
21 |   | 0 -> Bytes.Slice.eod
22 |   | count -> Bytes.Slice.make b ~first:0 ~length:count
23 |   | exception Unix.Unix_error (Unix.EINTR, _, _) -> read ()
24 |   in
25 |   Bytes.Reader.make ~pos ~slice_length read
26 | 
27 | let bytes_writer_of_fd
28 |     ?pos ?(slice_length = Bytes.Slice.unix_io_buffer_size) fd
29 |   =
30 |   let pos = match pos with Some pos -> pos | None -> current_pos fd in
31 |   let rec write = function
32 |   | s when Bytes.Slice.is_eod s -> ()
33 |   | s ->
34 |       let b = Bytes.Slice.bytes s in
35 |       let first = Bytes.Slice.first s and length = Bytes.Slice.length s in
36 |       match Unix.single_write fd b first length with
37 |       | count when count = length -> ()
38 |       | count -> write (Option.get (Bytes.Slice.drop count s))
39 |       | exception Unix.Unix_error (Unix.EINTR, _, _) -> write s
40 |   in
41 |   Bytes.Writer.make ~pos ~slice_length write
42 | 


--------------------------------------------------------------------------------
/src/unix/bytesrw_unix.mli:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | (** {!Unix} file descriptor byte stream readers and writers. *)
 7 | 
 8 | open Bytesrw
 9 | 
10 | val bytes_reader_of_fd :
11 |   ?pos:Bytes.Stream.pos -> ?slice_length:Bytes.Slice.length ->
12 |   Unix.file_descr -> Bytes.Reader.t
13 | (** [bytes_reader_of_fd fd] reads bytes from [fd] with slices of maximal
14 |     length [slice_length] (defaults to
15 |     {!Bytesrw.Bytes.Slice.unix_io_buffer_size}).
16 |     [pos] defaults to the [fd] position as determined by {!Unix.lseek}.
17 |     Reads are retried on {!Unix.EINTR} but both this function and the resulting
18 |     reader may raise {!Unix.Unix_error}. *)
19 | 
20 | val bytes_writer_of_fd :
21 |   ?pos:Bytes.Stream.pos -> ?slice_length:Bytes.Slice.length ->
22 |   Unix.file_descr -> Bytes.Writer.t
23 | (** [bytes_writer_of_fd fd] writes bytes to [fd]. The hinted
24 |     [slice_length] defaults to
25 |     {!Bytesrw.Bytes.Slice.unix_io_buffer_size}.  [pos] defaults to the
26 |     [fd] position as determined by {!Unix.lseek}.  Writes are retried
27 |     on {!Unix.EINTR} but both this function and the resulting writer
28 |     may raise {!Unix.Unix_error}. *)
29 | 


--------------------------------------------------------------------------------
/src/unix/bytesrw_unix.mllib:
--------------------------------------------------------------------------------
1 | Bytesrw_unix


--------------------------------------------------------------------------------
/src/xxhash/bytesrw_xxhash.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Bytesrw
  7 | 
  8 | external get_64u : String.t -> int -> int64 = "%caml_string_get64u"
  9 | external set_64u : Bytes.t -> int -> int64 -> unit = "%caml_string_set64u"
 10 | external swap_64 : int64 -> int64 = "%bswap_int64"
 11 | external noswap : int64 -> int64 = "%identity"
 12 | let layout = if Sys.big_endian then noswap else swap_64
 13 | let u64_to_binary_string t =
 14 |   let b = Bytes.create 8 in set_64u b 0 (layout t); Bytes.unsafe_to_string b
 15 | 
 16 | (* Library parameters *)
 17 | 
 18 | external version : unit -> int = "ocaml_bytesrw_XXH_versionNumber"
 19 | external xxh3_secret_size_min : unit -> int =
 20 |   "ocaml_bytesrw_XXH3_SECRET_SIZE_MIN"
 21 | 
 22 | let version () =
 23 |   let v = version () in
 24 |   let maj = v / (100 * 100) and rem = v mod (100 * 100) in
 25 |   let min = rem / 100 in
 26 |   Printf.sprintf "%d.%d.%d" maj min (rem mod 100)
 27 | 
 28 | (* XXH3 module type *)
 29 | 
 30 | module type Xxh3 = sig
 31 |   val id : string
 32 |   val length : int
 33 |   type seed = int64
 34 |   type secret = string
 35 |   type t
 36 | 
 37 |   module State : sig
 38 |     type t
 39 |     val make : ?secret:secret -> ?seed:seed -> unit -> t
 40 |     val update : t -> Bytes.Slice.t -> unit
 41 |     val copy : t -> t
 42 |   end
 43 | 
 44 |   val value : State.t -> t
 45 |   val string : ?seed:seed -> string -> t
 46 |   val bytes : ?seed:seed -> bytes -> t
 47 |   val slice : ?seed:seed -> Bytes.Slice.t -> t
 48 |   val reader : ?seed:seed -> Bytes.Reader.t -> t
 49 |   val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t
 50 |   val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t
 51 |   val equal : t -> t -> bool
 52 |   val compare : t -> t -> int
 53 |   val to_binary_string : t -> string
 54 |   val of_binary_string : string -> (t, string) result
 55 |   val to_hex : t -> string
 56 |   val of_hex : string -> (t, string) result
 57 |   val pp : Format.formatter -> t -> unit
 58 | end
 59 | 
 60 | module Xxh3_state = struct
 61 |   type t  (* Custom value holding a pointer to a finalized XXH3_state_t *)
 62 |   type seed = int64
 63 |   type secret = string
 64 |   type h64_hash = int64
 65 |   type h128_hash = string
 66 |   let no_seed = 0L
 67 | 
 68 |   external create : unit -> t = "ocaml_bytesrw_XXH3_createState"
 69 |   external copy : dst:t -> src:t -> unit = "ocaml_bytesrw_XXH3_copyState"
 70 |   external h64bits_reset : t -> unit = "ocaml_bytesrw_XXH3_64bits_reset"
 71 |   external h64bits_reset_with_seed : t -> seed -> unit =
 72 |     "ocaml_bytesrw_XXH3_64bits_reset_withSeed"
 73 | 
 74 |   external h64bits_reset_with_secret : t -> secret -> unit =
 75 |     "ocaml_bytesrw_XXH3_64bits_reset_withSecret"
 76 | 
 77 |   external h64bits_reset_with_secret_and_seed :
 78 |     t -> secret -> seed -> unit =
 79 |     "ocaml_bytesrw_XXH3_64bits_reset_withSecretandSeed"
 80 | 
 81 |   external h64bits_update : t -> bytes -> int -> int -> unit =
 82 |     "ocaml_bytesrw_XXH3_64bits_update"
 83 | 
 84 |   external h64bits_digest : t -> h64_hash = "ocaml_bytesrw_XXH3_64bits_digest"
 85 | 
 86 |   external seeded_64hash : bytes -> int -> int -> seed -> h64_hash =
 87 |     "ocaml_bytesrw_XXH3_64bits_withSeed"
 88 | 
 89 |   external h128bits_reset : t -> unit = "ocaml_bytesrw_XXH3_128bits_reset"
 90 |   external h128bits_reset_with_seed : t -> seed -> unit =
 91 |     "ocaml_bytesrw_XXH3_128bits_reset_withSeed"
 92 | 
 93 |   external h128bits_reset_with_secret : t -> secret -> unit =
 94 |     "ocaml_bytesrw_XXH3_128bits_reset_withSecret"
 95 | 
 96 |   external h128bits_reset_with_secret_and_seed :
 97 |     t -> secret -> seed -> unit =
 98 |     "ocaml_bytesrw_XXH3_128bits_reset_withSecretandSeed"
 99 | 
100 |   external h128bits_update : t -> bytes -> int -> int -> unit =
101 |     "ocaml_bytesrw_XXH3_128bits_update"
102 | 
103 |   external h128bits_digest : t -> h128_hash =
104 |     "ocaml_bytesrw_XXH3_128bits_digest"
105 | 
106 |   external seeded_128hash : bytes -> int -> int -> seed -> h128_hash =
107 |     "ocaml_bytesrw_XXH3_128bits_withSeed"
108 | end
109 | 
110 | module Xxh3_64 = struct
111 |   module State = struct
112 |     type t = Xxh3_state.t
113 | 
114 |     let make ?secret ?seed () =
115 |       let state = Xxh3_state.create () in
116 |       begin match secret, seed with
117 |       | None, None -> Xxh3_state.h64bits_reset state
118 |       | None, Some seed -> Xxh3_state.h64bits_reset_with_seed state seed
119 |       | Some secret, None -> Xxh3_state.h64bits_reset_with_secret state secret
120 |       | Some seed, Some secret ->
121 |           Xxh3_state.h64bits_reset_with_secret_and_seed state seed secret
122 |       end;
123 |       state
124 | 
125 |     let update state s =
126 |       let b = Bytes.Slice.bytes s in
127 |       let first = Bytes.Slice.first s and last = Bytes.Slice.length s in
128 |       Xxh3_state.h64bits_update state b first last
129 | 
130 |     let copy src =
131 |       let dst = Xxh3_state.create () in
132 |       Xxh3_state.copy ~dst ~src; dst
133 |   end
134 | 
135 |   let id = "xxh3-64"
136 |   let length = 8
137 |   type seed = Xxh3_state.seed
138 |   type secret = Xxh3_state.secret
139 |   type t = Xxh3_state.h64_hash
140 | 
141 |   let value = Xxh3_state.h64bits_digest
142 |   let bytes ?(seed = Xxh3_state.no_seed) b =
143 |     Xxh3_state.seeded_64hash b 0 (Bytes.length b) seed
144 | 
145 |   let string ?seed s = bytes ?seed (Bytes.unsafe_of_string s)
146 | 
147 |   let slice ?(seed = Xxh3_state.no_seed) s =
148 |     let b = Bytes.Slice.bytes s in
149 |     Xxh3_state.seeded_64hash b (Bytes.Slice.first s) (Bytes.Slice.length s) seed
150 | 
151 |   let reader ?seed r =
152 |     let rec loop state r = match Bytes.Reader.read r with
153 |     | s when Bytes.Slice.is_eod s -> value state
154 |     | s -> State.update state s; loop state r
155 |     in
156 |     loop (State.make ?seed ()) r
157 | 
158 |   let reads ?(state = State.make ()) r =
159 |     Bytes.Reader.tap (State.update state) r, state
160 | 
161 |   let writes ?(state = State.make ()) w =
162 |     Bytes.Writer.tap (State.update state) w, state
163 | 
164 |   let equal = Int64.equal
165 |   let compare = Int64.compare
166 |   let of_binary_string s =
167 |     match Bytesrw_hex.check_binary_string_length ~length s with
168 |     | Error _ as e -> e
169 |     | Ok s -> Ok (layout (get_64u s 0))
170 | 
171 |   let to_binary_string = u64_to_binary_string
172 |   let of_hex s = match Bytesrw_hex.to_binary_string ~length s with
173 |   | Error _ as e -> e
174 |   | Ok s -> Ok (layout (get_64u s 0))
175 | 
176 |   let to_hex h = Printf.sprintf "%016Lx" h
177 |   let to_uint64 = Fun.id
178 |   let of_uint64 = Fun.id
179 |   let pp ppf h = Format.fprintf ppf "%016Lx" h
180 | end
181 | 
182 | module Xxh3_128 = struct
183 |   module State = struct
184 |     type t = Xxh3_state.t
185 | 
186 |     let make ?secret ?seed () =
187 |       let state = Xxh3_state.create () in
188 |       begin match secret, seed with
189 |       | None, None -> Xxh3_state.h128bits_reset state
190 |       | None, Some seed -> Xxh3_state.h128bits_reset_with_seed state seed
191 |       | Some secret, None -> Xxh3_state.h128bits_reset_with_secret state secret
192 |       | Some seed, Some secret ->
193 |           Xxh3_state.h128bits_reset_with_secret_and_seed state seed secret
194 |       end;
195 |       state
196 | 
197 |     let update state s =
198 |       let b = Bytes.Slice.bytes s in
199 |       let first = Bytes.Slice.first s and last = Bytes.Slice.length s in
200 |       Xxh3_state.h64bits_update state b first last
201 | 
202 |     let copy src =
203 |       let dst = Xxh3_state.create () in
204 |       Xxh3_state.copy ~dst ~src; dst
205 |   end
206 | 
207 |   let id = "xxh3-128"
208 |   let length = 16
209 |   type seed = Xxh3_state.seed
210 |   type secret = Xxh3_state.secret
211 |   type t = Xxh3_state.h128_hash
212 | 
213 |   let value = Xxh3_state.h128bits_digest
214 | 
215 |   let bytes ?(seed = Xxh3_state.no_seed) b =
216 |     Xxh3_state.seeded_128hash b 0 (Bytes.length b) seed
217 | 
218 |   let string ?seed s = bytes ?seed (Bytes.unsafe_of_string s)
219 | 
220 |   let slice ?(seed = Xxh3_state.no_seed) s =
221 |     let b = Bytes.Slice.bytes s in
222 |     Xxh3_state.seeded_128hash
223 |       b (Bytes.Slice.first s) (Bytes.Slice.length s) seed
224 | 
225 |   let reader ?seed r =
226 |     let rec loop state r = match Bytes.Reader.read r with
227 |     | s when Bytes.Slice.is_eod s -> value state
228 |     | s -> State.update state s; loop state r
229 |     in
230 |     loop (State.make ?seed ()) r
231 | 
232 |   let reads ?(state = State.make ()) r =
233 |     Bytes.Reader.tap (State.update state) r, state
234 | 
235 |   let writes ?(state = State.make ()) w =
236 |     Bytes.Writer.tap (State.update state) w, state
237 | 
238 |   let equal = String.equal
239 |   let compare = String.compare
240 |   let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s
241 |   let to_binary_string = Fun.id
242 |   let of_hex s = Bytesrw_hex.to_binary_string ~length s
243 |   let pp = Bytesrw_hex.pp_binary_string
244 |   let to_hex = Bytesrw_hex.of_binary_string
245 | end
246 | 


--------------------------------------------------------------------------------
/src/xxhash/bytesrw_xxhash.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** [XXH3-64] and [XXH3-128] hashes.
  7 | 
  8 |     This module provides support for the {{:https://xxhash.com/}XXH3}
  9 |     hash family with the [libxxhash] C library. *)
 10 | 
 11 | open Bytesrw
 12 | 
 13 | (** The type for [XXH3] hash functions. *)
 14 | module type Xxh3 = sig
 15 | 
 16 |   (** {1:hashes Hashes} *)
 17 | 
 18 |   val id : string
 19 |   (** [id] identifies the hash function. *)
 20 | 
 21 |   val length : int
 22 |   (** [length] is the byte length of hashes produced by the function. *)
 23 | 
 24 |   type seed = int64
 25 |   (** The type for seeds. *)
 26 | 
 27 |   type secret = string
 28 |   (** The type for secrets. *)
 29 | 
 30 |   type t
 31 |   (** The type for hashes. *)
 32 | 
 33 |   (** Hash state. *)
 34 |   module State : sig
 35 |     type t
 36 |     (** The type for hash state. *)
 37 | 
 38 |     val make : ?secret:secret -> ?seed:seed -> unit -> t
 39 |     (** [make ?secret ?seed ()] is an initial hash state with given
 40 |         parameters. If unspecified the hash is unseeded and there is
 41 |         no secret.
 42 | 
 43 |         Raises [Invalid_argument] if your secret is smaller than
 44 |         {!Bytesrw_xxhash.xxh3_secret_size_min}. *)
 45 | 
 46 |     val update : t -> Bytes.Slice.t -> unit
 47 |     (** [update state slice] updates [state] with the bytes in the range of
 48 |         [slice]. *)
 49 | 
 50 |     val copy : t -> t
 51 |     (** [copy t] is a copy of [t]. *)
 52 |   end
 53 | 
 54 |   val value : State.t -> t
 55 |   (** [value state] is the hash of [state]. This has no effect on
 56 |       [state] which can still be {!State.update}d. *)
 57 | 
 58 |   (** {1:hashing Hashing} *)
 59 | 
 60 |   val string : ?seed:seed -> string -> t
 61 |   (** [string s] is the hash of [s] with seed [seed] (if any). *)
 62 | 
 63 |   val bytes : ?seed:seed -> bytes -> t
 64 |   (** [bytes b] is the hash of [b] with seed [seed] (if any). *)
 65 | 
 66 |   val slice : ?seed:seed -> Bytes.Slice.t -> t
 67 |   (** [slice s] is the hash of the bytes in the range of [s] with seed
 68 |       [seed] (if any). *)
 69 | 
 70 |   val reader : ?seed:seed -> Bytes.Reader.t -> t
 71 |   (** [reader r] is the hash of the stream [r] with seed [seed] (if any).
 72 |       This consumes the reader. See also {!reads}. *)
 73 | 
 74 |   (** {1:streaming Hashing streams} *)
 75 | 
 76 |   val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t
 77 |   (** [reads r] is [hr, hstate] with:
 78 |       {ul
 79 |       {- [hr] a reader that taps the reads of [r] to update [hstate].}
 80 |       {- [hstate], a hash state of the reads made on [hr] so
 81 |          far. This is [state] if explicitely given, otherwise
 82 |          defaults to a fresh {!State.make}.}}
 83 |       To get intermediate or final hash results use {!value} on
 84 |       [hstate]. *)
 85 | 
 86 |   val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t
 87 |   (** [writes ?state w] is [hw, hstate] with:
 88 |       {ul
 89 |       {- [hw] a writer that taps the writes to update [hstate] before
 90 |          giving them to [w].}
 91 |       {- [hstate], a hash state of the writes made on [hw] so
 92 |          far. This is [state] if explicitely given, otherwise
 93 |          defaults to a fresh {!State.make}.}}
 94 |       To get intermediate or final hash results use {!value} on
 95 |       [hstate]. *)
 96 | 
 97 |   (** {1:preds Predicates and comparisons} *)
 98 | 
 99 |   val equal : t -> t -> bool
100 |   (** [equal h0 h1] is [true] iff [h0] and [h1] are equal. *)
101 | 
102 |   val compare : t -> t -> int
103 |   (** [compare] is a total order on hashes compatible with {!equal}. *)
104 | 
105 |   (** {1:converting Converting} *)
106 | 
107 |   val to_binary_string : t -> string
108 |   (** [to_binary_string h] is a big-endian binary representation
109 |       of [h] of length {!length}. *)
110 | 
111 |   val of_binary_string : string -> (t, string) result
112 |   (** [of_binary_string s] is a hash from the big-endian binary
113 |       representation stored in [s]. *)
114 | 
115 |   val to_hex : t -> string
116 |   (** [to_hex h] is the binary representation of [h] using lowercase
117 |       US-ASCII hex digits. *)
118 | 
119 |   val of_hex : string -> (t, string) result
120 |   (** [of_hex s] parses a sequence of hex digits into a hash. *)
121 | 
122 |   val pp : Format.formatter -> t -> unit
123 |   (** [pp] formats hashes for inspection. *)
124 | end
125 | 
126 | (** [XXH3-64] hash. *)
127 | module Xxh3_64 : sig
128 |   include Xxh3 (** @inline *)
129 | 
130 |   val to_uint64 : t -> int64
131 |   (** [to_int64 h] is [h] as an unsigned [int64] number. *)
132 | 
133 |   val of_uint64 : int64 -> t
134 |   (** [of_uint64 u] is [u] as a hash. *)
135 | end
136 | 
137 | 
138 | (** [XXH3-128] hash. *)
139 | module Xxh3_128 : Xxh3
140 | 
141 | (** {1:library Library parameters} *)
142 | 
143 | val version : unit -> string
144 | (** [version ()] is the version of the [xxhash] C library. *)
145 | 
146 | val xxh3_secret_size_min : unit -> int
147 | (** [xxh3_secret_size_min ()] is the bare minimum size for a custom
148 |     secret. *)
149 | 


--------------------------------------------------------------------------------
/src/xxhash/bytesrw_xxhash.mllib:
--------------------------------------------------------------------------------
1 | Bytesrw_xxhash


--------------------------------------------------------------------------------
/src/xxhash/bytesrw_xxhash_stubs.c:
--------------------------------------------------------------------------------
  1 | /*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*/
  5 | 
  6 | #include <caml/mlvalues.h>
  7 | #include <caml/alloc.h>
  8 | #include <caml/fail.h>
  9 | 
 10 | /* For the _withSecretandSeed funs, we need this. Also when the minimal
 11 |    version is bumped conditionals in these stubs can be removed.  */
 12 | 
 13 | #define XXH_PRIVATE_API /* For XXH_CPU_LITTLE_ENDIAN  */
 14 | #define XXH_STATIC_LINKING_ONLY
 15 | 
 16 | #include <xxhash.h>
 17 | 
 18 | #if XXH_VERSION_NUMBER < 800
 19 | #error "Unsupported libxxhash version, at least 0.8.0 is needed"
 20 | #endif
 21 | 
 22 | #define XXH3_state_t_val(v) (*((XXH3_state_t **) Data_custom_val(v)))
 23 | #define _BE64(v) (XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(v) : v)
 24 | 
 25 | /* Library parameters */
 26 | 
 27 | CAMLprim value ocaml_bytesrw_XXH_versionNumber (value unit)
 28 | { return Val_int (XXH_VERSION_NUMBER); }
 29 | 
 30 | CAMLprim value ocaml_bytesrw_XXH3_SECRET_SIZE_MIN (value unit)
 31 | { return Val_int (XXH3_SECRET_SIZE_MIN); }
 32 | 
 33 | /* Hashing bytes */
 34 | 
 35 | CAMLprim value ocaml_bytesrw_XXH3_64bits_withSeed
 36 | (value str, value ofs, value len, value seed)
 37 | {
 38 |   XXH64_hash_t h = XXH3_64bits_withSeed (Bp_val (str) + Int_val (ofs),
 39 |                                          Int_val (len), Int64_val (seed));
 40 |   return (caml_copy_int64 (h));
 41 | }
 42 | 
 43 | CAMLprim value ocaml_bytesrw_XXH3_128bits_withSeed
 44 | (value str, value ofs, value len, value seed)
 45 | {
 46 |   XXH128_hash_t h = XXH3_128bits_withSeed (Bp_val (str) + Int_val (ofs),
 47 |                                          Int_val (len), Int64_val (seed));
 48 |   value res = caml_alloc_string (16);
 49 |   ((XXH64_hash_t *)Bp_val(res))[0] = _BE64 (h.high64);
 50 |   ((XXH64_hash_t *)(Bp_val(res)))[1] = _BE64 (h.low64);
 51 |   return res;
 52 | }
 53 | 
 54 | /* XXH3_state_t */
 55 | 
 56 | static const char ocaml_bytesrw_err_secret_too_small[] = "Secret too small";
 57 | 
 58 | void ocaml_bytesrw_finalize_XXH3_state_t (value hst)
 59 | { XXH3_freeState (XXH3_state_t_val (hst)); }
 60 | 
 61 | CAMLprim value ocaml_bytesrw_XXH3_createState (value unit)
 62 | {
 63 |   XXH3_state_t *st = XXH3_createState ();
 64 |   if (!st) caml_failwith ("Could not allocate XXH3_state_t");
 65 |   value hst = caml_alloc_final (1, &ocaml_bytesrw_finalize_XXH3_state_t, 0, 1);
 66 |   XXH3_state_t_val(hst) = st;
 67 |   return hst;
 68 | }
 69 | 
 70 | CAMLprim value ocaml_bytesrw_XXH3_copyState (value dst, value src)
 71 | {
 72 |   XXH3_copyState (XXH3_state_t_val (dst), XXH3_state_t_val (src));
 73 |   return Val_unit;
 74 | }
 75 | 
 76 | /* XXH3_64 */
 77 | 
 78 | CAMLprim value ocaml_bytesrw_XXH3_64bits_reset (value hst)
 79 | { XXH3_64bits_reset (XXH3_state_t_val (hst)); return Val_unit; }
 80 | 
 81 | CAMLprim value ocaml_bytesrw_XXH3_64bits_reset_withSeed
 82 | (value hst, value seed)
 83 | {
 84 |   XXH3_64bits_reset_withSeed (XXH3_state_t_val (hst), Int64_val (seed));
 85 |   return Val_unit;
 86 | }
 87 | 
 88 | CAMLprim value ocaml_bytesrw_XXH3_64bits_reset_withSecret
 89 | (value hst, value secret)
 90 | {
 91 |   XXH_errorcode rc =
 92 |     XXH3_64bits_reset_withSecret (XXH3_state_t_val (hst), String_val (secret),
 93 |                                   caml_string_length (secret));
 94 |   if (rc != XXH_OK) caml_invalid_argument (ocaml_bytesrw_err_secret_too_small);
 95 |   return Val_unit;
 96 | }
 97 | 
 98 | CAMLprim value ocaml_bytesrw_XXH3_64bits_reset_withSecretandSeed
 99 | (value hst, value secret, value seed)
100 | {
101 | #if XXH_VERSION_NUMBER < 801
102 |   caml_raise_sys_error
103 |     (caml_copy_string ("Need libxxhash >= 0.8.1 to support secret and hash"));
104 | #else
105 |   XXH_errorcode rc =
106 |     XXH3_64bits_reset_withSecretandSeed(XXH3_state_t_val (hst),
107 |                                         String_val (secret),
108 |                                         caml_string_length (secret),
109 |                                         Int64_val (seed));
110 |   if (rc != XXH_OK) caml_invalid_argument (ocaml_bytesrw_err_secret_too_small);
111 | #endif
112 |   return Val_unit;
113 | }
114 | 
115 | CAMLprim value ocaml_bytesrw_XXH3_64bits_update
116 | (value hst, value str, value ofs, value len)
117 | {
118 |   XXH3_64bits_update (XXH3_state_t_val (hst),  Bp_val (str) + Int_val (ofs),
119 |                       Int_val (len));
120 |   return Val_unit;
121 | }
122 | 
123 | CAMLprim value ocaml_bytesrw_XXH3_64bits_digest (value hst)
124 | { return caml_copy_int64 (XXH3_64bits_digest (XXH3_state_t_val (hst))); }
125 | 
126 | /* XXH3_128 */
127 | 
128 | CAMLprim value ocaml_bytesrw_XXH3_128bits_reset (value hst)
129 | { XXH3_128bits_reset (XXH3_state_t_val (hst)); return Val_unit; }
130 | 
131 | CAMLprim value ocaml_bytesrw_XXH3_128bits_reset_withSeed
132 | (value hst, value seed)
133 | {
134 |   XXH3_128bits_reset_withSeed (XXH3_state_t_val (hst), Int64_val (seed));
135 |   return Val_unit;
136 | }
137 | 
138 | CAMLprim value ocaml_bytesrw_XXH3_128bits_reset_withSecret
139 | (value hst, value secret)
140 | {
141 |   XXH_errorcode rc =
142 |     XXH3_128bits_reset_withSecret (XXH3_state_t_val (hst), String_val (secret),
143 |                                    caml_string_length (secret));
144 |   if (rc != XXH_OK) caml_invalid_argument (ocaml_bytesrw_err_secret_too_small);
145 |   return Val_unit;
146 | }
147 | 
148 | CAMLprim value ocaml_bytesrw_XXH3_128bits_reset_withSecretandSeed
149 | (value hst, value secret, value seed)
150 | {
151 | #if XXH_VERSION_NUMBER < 801
152 |   caml_raise_sys_error
153 |     (caml_copy_string ("Need libxxhash >= 0.8.1 to support secret and hash"));
154 | #else
155 |   XXH_errorcode rc =
156 |     XXH3_128bits_reset_withSecretandSeed(XXH3_state_t_val (hst),
157 |                                          String_val (secret),
158 |                                          caml_string_length (secret),
159 |                                          Int64_val (seed));
160 |   if (rc != XXH_OK) caml_invalid_argument (ocaml_bytesrw_err_secret_too_small);
161 | #endif
162 |   return Val_unit;
163 | }
164 | 
165 | CAMLprim value ocaml_bytesrw_XXH3_128bits_update
166 | (value hst, value str, value ofs, value len)
167 | {
168 |   XXH3_128bits_update (XXH3_state_t_val (hst),  Bp_val (str) + Int_val (ofs),
169 |                        Int_val (len));
170 |   return Val_unit;
171 | }
172 | 
173 | CAMLprim value ocaml_bytesrw_XXH3_128bits_digest (value hst)
174 | {
175 |   XXH128_hash_t h = XXH3_128bits_digest (XXH3_state_t_val (hst));
176 |   value res = caml_alloc_string (16);
177 |   ((XXH64_hash_t *)Bp_val(res))[0] = _BE64 (h.high64);
178 |   ((XXH64_hash_t *)(Bp_val(res)))[1] = _BE64 (h.low64);
179 |   return res;
180 | }
181 | 


--------------------------------------------------------------------------------
/src/xxhash/libbytesrw_xxhash_stubs.clib:
--------------------------------------------------------------------------------
1 | bytesrw_xxhash_stubs.o


--------------------------------------------------------------------------------
/src/zlib/bytesrw_zlib.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** [deflate], [zlib] and [gzip] streams.
  7 | 
  8 |     This module provides support for reading and writing
  9 |     {{:https://www.rfc-editor.org/rfc/rfc1951}[deflate]},
 10 |     {{:https://www.rfc-editor.org/rfc/rfc1950}[zlib]} and
 11 |     {{:https://www.rfc-editor.org/rfc/rfc1952}[gzip]}
 12 |     streams with the {{:https://zlib.net/}[zlib]} C library.
 13 | 
 14 |     {b Slice lengths.} The slice length of readers created by
 15 |     filters of this module defaults to {!default_slice_length}. The hinted
 16 |     slice length of writers created by filters of this module defaults to
 17 |     {!default_slice_length} and they write on their writers with
 18 |     slices that respect their desires.
 19 | 
 20 |     {b Positions.} The position of readers and writers created
 21 |     by filters of this module default to [0]. *)
 22 | 
 23 | open Bytesrw
 24 | 
 25 | (** {1:errors Errors} *)
 26 | 
 27 | type Bytes.Stream.error += Error of string (** *)
 28 | (** The type for [deflate], [zlib] and [gzip] stream errors.
 29 | 
 30 |     Except for the {{!library}library parameters}, all functions of this
 31 |     module and resulting reader and writers may raise
 32 |     {!Bytesrw.Bytes.Stream.Error} with this error. *)
 33 | 
 34 | type level = int
 35 | (** The type for compression levels.
 36 | 
 37 |     An integer between [-1] and [9], see these
 38 |     {{!compression_level}constants}. *)
 39 | 
 40 | (** {1:streams Streams} *)
 41 | 
 42 | (** {{:https://www.rfc-editor.org/rfc/rfc1951}[deflate]} streams. *)
 43 | module Deflate : sig
 44 | 
 45 |   (** {1:decompress Decompress} *)
 46 | 
 47 |   val decompress_reads : ?leftover:bool -> unit -> Bytes.Reader.filter
 48 |   (** [decompress_reads () r] filters the reads of [r] by decompressing
 49 |       a [deflate] stream. If [leftover] is:
 50 |       {ul
 51 |       {- [false] (default), the reader errors if there is leftover data after
 52 |          the end of the [deflate] stream.}
 53 |       {- [true] the reader decompresses one [deflate] stream. Once the
 54 |          reader returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly
 55 |          after the end of the [deflate] stream and can be read again to
 56 |          perform other non-filtered reads.}} *)
 57 | 
 58 |   val decompress_writes : unit -> Bytes.Writer.filter
 59 |   (** [decompress_writes () w ~eod] filters writes on [w] by decompressing
 60 |       a [deflate] stream until {!Bytesrw.Bytes.Slice.eod} is written, if
 61 |       leftover data remains an error is raised. If [eod] is [false],
 62 |       the last {!Bytesrw.Bytes.Slice.eod} is not written on [w] and at this
 63 |       point [w] can be used again to perform other non-filtered
 64 |       writes.  *)
 65 | 
 66 |   (** {1:compress Compress} *)
 67 | 
 68 |   val compress_reads : ?level:level -> unit -> Bytes.Reader.filter
 69 |   (** [compress_reads ~level () r] filters the reads of [r] by compressing
 70 |       them to a [deflate] stream at level [level] (defaults to
 71 |       {!default_compression}). *)
 72 | 
 73 |   val compress_writes : ?level:level -> unit -> Bytes.Writer.filter
 74 |   (** [compress_writes ~level w ~eod] filters writes on [w] by compressing
 75 |       them to a [deflate] stream at level [level] (defaults to
 76 |       {!default_compression}) until {!Bytesrw.Bytes.Slice.eod} is written.
 77 |       If [eod] is false, the latter is not written on [w] and at that point
 78 |       [w] can be used again to perform non-filtered writes. *)
 79 | end
 80 | 
 81 | (** {{:https://www.rfc-editor.org/rfc/rfc1950}[zlib]} streams. *)
 82 | module Zlib : sig
 83 | 
 84 |   (** {1:decompress Decompress} *)
 85 | 
 86 |   val decompress_reads : ?leftover:bool -> unit -> Bytes.Reader.filter
 87 |   (** [decompress_reads () r] filters the reads of [r] by decompressing
 88 |       a [zlib] stream. If [leftover] is:
 89 |       {ul
 90 |       {- [false] (default), the reader errors if there is leftover data after
 91 |          the end of the [zlib] stream.}
 92 |       {- [true] the reader decompresses one [zlib] stream. Once the
 93 |          reader returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly
 94 |          after the end of the [zlib] stream and can be read again to
 95 |          perform other non-filtered reads.}} *)
 96 | 
 97 |   val decompress_writes : unit -> Bytes.Writer.filter
 98 |   (** [decompress_writes () w ~eod] filters writes on [w] by decompressing a
 99 |       [zlib] stream until {!Bytesrw.Bytes.Slice.eod} is written, if leftover
100 |       data remains an error is raised. If [eod] is [false] the last
101 |       {!Bytes.Slice.eod} is not written on [w] and at this point [w] can be
102 |       used again to perform othe non-filtered writes. *)
103 | 
104 |   (** {1:compress Compress} *)
105 | 
106 |   val compress_reads : ?level:level -> unit -> Bytes.Reader.filter
107 |   (** [compress_reads ~level () r] filters the reads of [r] by compressing
108 |       them to a [zlib] stream at level [level] (defaults to
109 |       {!default_compression}). *)
110 | 
111 |   val compress_writes : ?level:level -> unit -> Bytes.Writer.filter
112 |   (** [compress_writes ~level () w ~eod] filters writes on [w] by compressing
113 |       them to a [zlib] stream at level [level] (defaults to
114 |       {!default_compression}) until {!Bytesrw.Bytes.Slice.eod} is written.
115 |       If [eod] is [false], the latter is not written on [w] and at that point
116 |       [w] can be used again to perform non-filtered writes. *)
117 | end
118 | 
119 | (** {{:https://www.rfc-editor.org/rfc/rfc1952}[gzip]} streams.
120 | 
121 |     {b Note.} In general a [gzip] stream can be made of multiple,
122 |     independently compressed, members. The way the module handles [gzip]
123 |     member headers is described {{!Gzip.member_headers}here}. *)
124 | module Gzip : sig
125 | 
126 |   (** {1:decompress Decompress} *)
127 | 
128 |   val decompress_reads : ?all_members:bool -> unit -> Bytes.Reader.filter
129 |   (** [decompress_reads () r] filters the reads of [r] by decompressing [gzip]
130 |       members. If [all_members] is
131 |       {ul
132 |       {- [true] (default), this concatenates decompressed sequences of [gzip]
133 |          members like [gunzip] would do and errors if there is leftover data.}
134 |       {- [false] this decompresses a single [gzip] member. Once the resulting
135 |          reader returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly
136 |          after the end of the gzip member and can be used again to perform
137 |          other non-filtered reads (e.g. a new [gzip] member or other unrelated
138 |          data).}} *)
139 | 
140 |   val decompress_writes : unit -> Bytes.Writer.filter
141 |   (** [decompress_writes () w ~eod] filters the writes on [w] by
142 |       decompressing sequences of [gzip] members until
143 |       [Bytesrw.Bytes.Slice.eod] is written. If [eod] is [false], the
144 |       latter is not written on [w] and at this point [w] can be used
145 |       again to perform other non-filtered writes. *)
146 | 
147 |   (** {1:compress Compress} *)
148 | 
149 |   val compress_reads : ?level:level -> unit -> Bytes.Reader.filter
150 |   (** [compress_reads ~level () r] filters the reads of [r] by
151 |       compressing them to as a single [gzip] member at level [level]
152 |       (defaults to {!default_compression}). *)
153 | 
154 |   val compress_writes : ?level:level -> unit -> Bytes.Writer.filter
155 |   (** [compress_writes ~level () w ~eod] filters the writes on [w] by
156 |       compressing them to a single [gzip] member at level [level]
157 |       (defaults to {!default_compression}) until
158 |       {!Bytesrw.Bytes.Slice.eod} is written. If [eod] is [false] the
159 |       latter is not written on [w] and at this point [w] can be used
160 |       again to perform other non-filtered writes.  *)
161 | 
162 | (** {1:member_headers Member headers}
163 | 
164 |     Currently no facility is provided to access [gzip] member
165 |     headers. It seems those are little used in practice. However
166 |     support is provided to read and write [gzip] streams
167 |     member-by-member which is used by certain formats.
168 | 
169 |     On compression the member's header generated in the stream is
170 |     [zlib]'s default header; see documentation of [deflateSetHeader]
171 |     in the {{:https://www.zlib.net/manual.html} the manual}. Note that
172 |     this watermarks the operating system in the stream (at least in
173 |     v1.3.1 as of writing). *)
174 | end
175 | 
176 | (** {1:library Library parameters} *)
177 | 
178 | val version : unit -> string
179 | (** [version ()] is the version of the [zlib] C library. *)
180 | 
181 | val default_slice_length : int
182 | (** [default_slice_length] is [128KB]. Note, this choice is made by
183 |     [Bytesrw_zlib] not the [zlib] library. *)
184 | 
185 | (** {2:compression_level Compression levels} *)
186 | 
187 | val default_compression : level
188 | (** [default_compression] is [-1], the default compression level.  The
189 |     resulting level depends on the [zlib] library. *)
190 | 
191 | val no_compression : level
192 | (** [no_compression] is [0], indicates no compression. *)
193 | 
194 | val best_speed : level
195 | (** [best_speed] is [1], indicates fastest compression. *)
196 | 
197 | val best_compression : level
198 | (** [best_compression] is [9], indicates best compression. *)
199 | 


--------------------------------------------------------------------------------
/src/zlib/bytesrw_zlib.mllib:
--------------------------------------------------------------------------------
1 | Bytesrw_zlib


--------------------------------------------------------------------------------
/src/zlib/bytesrw_zlib_stubs.c:
--------------------------------------------------------------------------------
  1 | /*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*/
  5 | 
  6 | #include <caml/mlvalues.h>
  7 | #include <caml/alloc.h>
  8 | #include <caml/memory.h>
  9 | #include <caml/fail.h>
 10 | 
 11 | #include <stdbool.h>
 12 | #include <zlib.h>
 13 | 
 14 | #if ZLIB_VERNUM < 0x12b0
 15 | #error "Unsupported zlib version, at least 1.2.11 is needed"
 16 | #endif
 17 | 
 18 | #define z_streamp_val(v) (*((z_streamp *) Data_custom_val(v)))
 19 | 
 20 | /* OCaml Bytesrw_zlib.flush value map */
 21 | 
 22 | static int ocaml_zlib_flush[] =
 23 | { Z_NO_FLUSH, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH, Z_FINISH,
 24 |   Z_BLOCK, Z_TREES };
 25 | 
 26 | /* OCaml Bytesrw_zlib.Zbuf.t value fields */
 27 | 
 28 | enum ocaml_zbuf_fields
 29 | { ocaml_zbuf_bytes = 0, ocaml_zbuf_size, ocaml_zbuf_pos };
 30 | 
 31 | /* Library parameters */
 32 | 
 33 | CAMLprim value ocaml_bytesrw_zlib_version (value unit)
 34 | { return (caml_copy_string (zlibVersion ())); }
 35 | 
 36 | /* Inflate */
 37 | 
 38 | void ocaml_bytesrw_finalize_inflate_z_stream (value zs)
 39 | {
 40 |   z_streamp s = z_streamp_val (zs);
 41 |   if (s != NULL) {
 42 |     inflateEnd (s); caml_stat_free (s);
 43 |     z_streamp_val (zs) = NULL;
 44 |   }
 45 | }
 46 | 
 47 | CAMLprim value ocaml_bytesrw_free_inflate_z_stream (value zs)
 48 | { ocaml_bytesrw_finalize_inflate_z_stream (zs); return Val_unit; }
 49 | 
 50 | CAMLprim value ocaml_bytesrw_create_inflate_z_stream (value window_bits)
 51 | {
 52 |   z_streamp s = caml_stat_alloc (sizeof (z_stream));
 53 |   if (s == NULL) caml_failwith ("Could not allocate z_stream");
 54 |   s->zalloc = Z_NULL; s->zfree = Z_NULL; s->opaque = Z_NULL;
 55 |   s->avail_in = 0; s->next_in = Z_NULL;
 56 | 
 57 |   int rc = inflateInit2(s, Int_val (window_bits));
 58 |   if (rc != Z_OK) {
 59 |     value err = caml_copy_string( (s->msg) ? s->msg :
 60 |                                   "Unknown inflateInit2 error");
 61 |     caml_stat_free (s);
 62 |     caml_failwith_value (err);
 63 |   }
 64 |   value zs = caml_alloc_final (1, &ocaml_bytesrw_finalize_inflate_z_stream,
 65 |                                0, 1);
 66 |   z_streamp_val (zs) = s;
 67 |   return zs;
 68 | }
 69 | 
 70 | CAMLprim value ocaml_bytesrw_inflate_reset (value zs)
 71 | {
 72 |   z_streamp s = z_streamp_val (zs);
 73 |   int rc = inflateReset (s);
 74 |   if (rc != Z_OK)
 75 |   { caml_failwith ((s->msg) ? s->msg : "Unknown inflateReset error"); }
 76 |   return Val_unit;
 77 | }
 78 | 
 79 | CAMLprim value ocaml_bytesrw_inflate
 80 | (value zs, value src, value dst)
 81 | {
 82 |   z_streamp s = z_streamp_val (zs);
 83 | 
 84 |   size_t in_pos = Int_val (Field (src, ocaml_zbuf_pos));
 85 |   size_t in_size = Int_val (Field (src, ocaml_zbuf_size));
 86 |   s->next_in = Bytes_val (Field (src, ocaml_zbuf_bytes)) + in_pos;
 87 |   s->avail_in = in_size - in_pos;
 88 | 
 89 |   size_t out_pos = Int_val (Field (dst, ocaml_zbuf_pos));
 90 |   size_t out_size = Int_val (Field (dst, ocaml_zbuf_size));
 91 |   s->next_out = Bytes_val (Field (dst, ocaml_zbuf_bytes)) + out_pos;
 92 |   s->avail_out = out_size - out_pos;
 93 | 
 94 |   int rc = inflate (s, Z_NO_FLUSH);
 95 |   if (rc != Z_OK && rc != Z_STREAM_END && rc != Z_BUF_ERROR)
 96 |   { caml_failwith ((s->msg) ? s->msg : "Unknown inflate error"); }
 97 | 
 98 |   size_t in_consumed = in_size - in_pos - s->avail_in;
 99 |   size_t out_consumed = out_size - out_pos - s->avail_out;
100 | 
101 |   Store_field (src, ocaml_zbuf_pos, Val_int (in_pos + in_consumed));
102 |   Store_field (dst, ocaml_zbuf_pos, Val_int (out_pos + out_consumed));
103 | 
104 |   return Val_bool (rc == Z_STREAM_END);
105 | }
106 | 
107 | /* Deflate */
108 | 
109 | void ocaml_bytesrw_finalize_deflate_z_stream (value zs)
110 | {
111 |   z_streamp s = z_streamp_val (zs);
112 |   if (s != NULL) {
113 |     deflateEnd (s); caml_stat_free (s);
114 |     z_streamp_val (zs) = NULL;
115 |   }
116 | }
117 | 
118 | CAMLprim value ocaml_bytesrw_free_deflate_z_stream (value zs)
119 | { ocaml_bytesrw_finalize_deflate_z_stream (zs); return Val_unit; }
120 | 
121 | CAMLprim value ocaml_bytesrw_create_deflate_z_stream
122 | (value level, value window_bits)
123 | {
124 |   z_streamp s = caml_stat_alloc (sizeof (z_stream));
125 |   if (s == NULL) caml_failwith ("Could not allocate z_stream");
126 |   s->zalloc = Z_NULL; s->zfree = Z_NULL; s->opaque = Z_NULL;
127 |   s->avail_in = 0; s->next_in = Z_NULL;
128 | 
129 |   int rc = deflateInit2(s, Int_val (level), Z_DEFLATED, Int_val (window_bits),
130 |                         8, Z_DEFAULT_STRATEGY);
131 |   if (rc != Z_OK) {
132 |     value err = caml_copy_string( (s->msg) ? s->msg :
133 |                                   "Unknown deflateInit2 error");
134 |     caml_stat_free (s);
135 |     caml_failwith_value (err);
136 |   }
137 |   value zs = caml_alloc_final (1, &ocaml_bytesrw_finalize_deflate_z_stream,
138 |                                0, 1);
139 |   z_streamp_val (zs) = s;
140 |   return zs;
141 | }
142 | 
143 | CAMLprim value ocaml_bytesrw_deflate_reset (value zs)
144 | {
145 |   z_streamp s = z_streamp_val (zs);
146 |   int rc = deflateReset (s);
147 |   if (rc != Z_OK)
148 |   { caml_failwith ((s->msg) ? s->msg : "Unknown deflateReset error"); }
149 |   return Val_unit;
150 | }
151 | 
152 | CAMLprim value ocaml_bytesrw_deflate
153 | (value zs, value src, value dst, value flush_op)
154 | {
155 |   z_streamp s = z_streamp_val (zs);
156 | 
157 |   size_t in_pos = Int_val (Field (src, ocaml_zbuf_pos));
158 |   size_t in_size = Int_val (Field (src, ocaml_zbuf_size));
159 |   s->next_in = Bytes_val (Field (src, ocaml_zbuf_bytes)) + in_pos;
160 |   s->avail_in = in_size - in_pos;
161 | 
162 |   size_t out_pos = Int_val (Field (dst, ocaml_zbuf_pos));
163 |   size_t out_size = Int_val (Field (dst, ocaml_zbuf_size));
164 |   s->next_out = Bytes_val (Field (dst, ocaml_zbuf_bytes)) + out_pos;
165 |   s->avail_out = out_size - out_pos;
166 | 
167 |   int rc = deflate (s, ocaml_zlib_flush [Int_val (flush_op)]);
168 |   if (rc != Z_OK && rc != Z_STREAM_END && rc != Z_BUF_ERROR)
169 |   { caml_failwith ((s->msg) ? s->msg : "Unknown deflate error"); }
170 | 
171 |   size_t in_consumed = in_size - in_pos - s->avail_in;
172 |   size_t out_consumed = out_size - out_pos - s->avail_out;
173 | 
174 |   Store_field (src, ocaml_zbuf_pos, Val_int (in_pos + in_consumed));
175 |   Store_field (dst, ocaml_zbuf_pos, Val_int (out_pos + out_consumed));
176 | 
177 |   return Val_bool (rc == Z_STREAM_END);
178 | }
179 | 


--------------------------------------------------------------------------------
/src/zlib/libbytesrw_zlib_stubs.clib:
--------------------------------------------------------------------------------
1 | bytesrw_zlib_stubs.o


--------------------------------------------------------------------------------
/src/zstd/bytesrw_zstd.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** [zstd] streams.
  7 | 
  8 |     This module provides support for reading and writing
  9 |     {{:https://www.rfc-editor.org/rfc/rfc8878.html}[zstd]} compressed
 10 |     streams with the {{:http://zstd.net/}[libzstd]} C library.
 11 | 
 12 |     {b Positions.} The positions of readers and writers created
 13 |     by filters of this module default to [0]. *)
 14 | 
 15 | open Bytesrw
 16 | 
 17 | (** {1:errors Errors} *)
 18 | 
 19 | type Bytes.Stream.error += Error of string (** *)
 20 | (** The type for [zstd] stream errors.
 21 | 
 22 |     Except for the {{!lib}library parameters}, all functions of this
 23 |     module and resulting reader and writers may raise
 24 |     {!Bytesrw.Bytes.Stream.Error} with this error. *)
 25 | 
 26 | (** {1:decompress Decompress} *)
 27 | 
 28 | (** Decompression parameters. *)
 29 | module Dctx_params : sig
 30 |   type t
 31 |   (** The type for decompression parameters. *)
 32 | 
 33 |   val make : ?init:t -> ?window_log_max:int -> unit -> t
 34 |   (** [make ()] are the given compression parameters. Those unspecfied
 35 |       take the value of [init] which defaults to {!default}. See
 36 |       corresponding accessors for the default values. *)
 37 | 
 38 |   val default : t
 39 |   (** [default] are the default paramaters. See accessors for the
 40 |       default values. *)
 41 | 
 42 |   val window_log_max : t -> int
 43 |   (** [window_log_max] is the maximum back-reference distance in power
 44 |       of two allowed for decoding. Errors if the stream requires
 45 |       more. [0] is the default maximum window. This can be used to
 46 |       limit the memory used for decoding. *)
 47 | 
 48 |   (**/**)
 49 |   val unsafe_param : int -> int -> t -> t
 50 |   (**/**)
 51 | end
 52 | 
 53 | (** Decompression dictionaries. *)
 54 | module Ddict : sig
 55 |   type t
 56 |   (** The type for dictionaries. *)
 57 | 
 58 |   val of_binary_string : string -> t
 59 |   (** [of_binary_string s] is a dictionary from the binary data [s]. *)
 60 | end
 61 | 
 62 | val decompress_reads :
 63 |   ?all_frames:bool -> ?dict:Ddict.t -> ?params:Dctx_params.t -> unit ->
 64 |   Bytes.Reader.filter
 65 | (** [decompress_reads () r] filters the reads of [r] by decompressing
 66 |     [zstd] frames.
 67 |     {ul
 68 |     {- [dict] is the decompression dictionary, if any.}
 69 |     {- [params] defaults to {!Dctx_params.default}}
 70 |     {- [slice_length] defaults to {!dstream_out_size}.}
 71 |     {- If you get to create [r] and it has no constraints on its own
 72 |        use {!dstream_in_size} for its slices.}}
 73 |     If [all_frames] is:
 74 |     {ul
 75 |     {- [true] (default), this decompressses all frames until [r] returns
 76 |        {!Bytesrw.Bytes.Slice.eod} and concatenates the result.}
 77 |     {- [false] this decompresses a single frame. Once the resulting reader
 78 |        returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly after
 79 |        the end of frame and can be used again to perform other non-filtered
 80 |        reads (e.g. a new [zstd] frame or other unrelated data).}} *)
 81 | 
 82 | val decompress_writes :
 83 |   ?dict:Ddict.t -> ?params:Dctx_params.t -> unit -> Bytes.Writer.filter
 84 | (** [decompress_writes () w ~eod] filters the writes on [w] by decompressing
 85 |     sequences of [zstd] frames until {!Bytesrw.Bytes.Slice.eod} is written.
 86 |     If [eod] is [false] the last {!Bytesrw.Bytes.Slice.eod} is not written
 87 |     on [w] and at this point [w] can be used again to perform other non-filtered
 88 |     writes.
 89 |     {ul
 90 |     {- [dict] is the decompression dictionary, if any.}
 91 |     {- [params] defaults to {!Dctx_params.default}}
 92 |     {- [slice_length] defaults to {!dstream_in_size}}
 93 |     {- Compressed slice lengths abides to [w]'s desire but if you get to
 94 |        create it and it has no constraints on its own use
 95 |        {!dstream_out_size}.}} *)
 96 | 
 97 | (** {1:compress Compress}
 98 | 
 99 |     {b Warning.} The default {!Cctx_params.default} compression
100 |     parameters are those of the C library and do not perform
101 |     checksums. If you want to compress so that the [zstd] command
102 |     line tool can uncompress you need to checksum. See the
103 |     example in the {{!page-index.quick}quick start}. *)
104 | 
105 | (** Compression parameters. *)
106 | module Cctx_params : sig
107 | 
108 |   type clevel = int
109 |   (** The type for compression levels. See {!val-clevel}. *)
110 | 
111 |   type t
112 |   (** The type for compression parameters. *)
113 | 
114 |   val make :
115 |     ?init:t -> ?checksum:bool -> ?clevel:clevel -> ?window_log:int -> unit -> t
116 |   (** [make ()] are the given compression parameters. Those unspecfied
117 |       take the value of [init] which defaults to {!default}. See
118 |       corresponding accessors for the default values. *)
119 | 
120 |   val default : t
121 |   (** [default] are the default parameters. See accessors for the
122 |       default values. *)
123 | 
124 |   val checksum : t -> bool
125 |   (** [checksum p] is [true] if frames are checksumed. Defaults
126 |       to [false] {b Warning.} This mirrors the library default but does
127 |       not mirror the [zstd] tool default. *)
128 | 
129 |   val clevel : t -> clevel
130 |   (** [clevel p] is the compression level. Must be in
131 |       the  {!min_clevel} to {!max_clevel} range. Defaults
132 |       to {!default_clevel}. [0] means default compression level. *)
133 | 
134 |   val window_log : t -> int
135 |   (** [window_log] is the maximal allowed back-reference distance in power
136 |       of [2]. [0] means default window log. *)
137 | 
138 |   (**/**)
139 |   val unsafe_param : int -> int -> t -> t
140 |   (**/**)
141 | end
142 | 
143 | (** Compression dictionaries. *)
144 | module Cdict : sig
145 |   type t
146 |   (** The type for dictionaries. *)
147 | 
148 |   val of_binary_string : string -> t
149 |   (** [of_binary_string s] is a dictionary from the binary data [s]. *)
150 | end
151 | 
152 | val compress_reads :
153 |   ?dict:Cdict.t -> ?params:Cctx_params.t -> unit -> Bytes.Reader.filter
154 | (** [compress_reads () r] filters the reads of [r] by compressing them
155 |     to a single [zstd] frame.
156 |     {ul
157 |     {- [dict] is the compression dictionary, if any.}
158 |     {- [params] defaults to {!Cctx_params.default}.}
159 |     {- [slice_length] defaults to {!cstream_out_size}.}
160 |     {- If you get to create [r] and it has no constraints on its own
161 |        use {!cstream_in_size} for its slices.}} *)
162 | 
163 | val compress_writes :
164 |   ?dict:Cdict.t -> ?params:Cctx_params.t -> unit -> Bytes.Writer.filter
165 | (** [compress_writes () w ~eod] filters the writes on [w] by compressing them
166 |     to a single [zstd] frame until {!Bytesrw.Bytes.Slice.eod} is written.
167 |     If [eod] is [false] the last {!Bytesrw.Bytes.Slice.eod} is not written
168 |     on [w] and at this point [w] can be used again to perform non-filtered
169 |     writes.
170 |     {ul
171 |     {- [dict] is the compression dictionary, if any.}
172 |     {- [params] defaults to {!Cctx_params.default}.}
173 |     {- [slice_length] defaults to {!cstream_in_size}.}
174 |     {- Decompressed slice length abides to [w]'s desire but if you get to
175 |        create it and it has no constraints on its own use
176 |        {!cstream_out_size}.}} *)
177 | 
178 | (** {1:lib Library parameters} *)
179 | 
180 | val version : unit -> string
181 | (** [version ()] is the version of the [libzstd] C library. *)
182 | 
183 | val min_clevel : unit -> Cctx_params.clevel
184 | (** [min_clevel ()] is the minimum negative compression level allowed. *)
185 | 
186 | val max_clevel : unit -> Cctx_params.clevel
187 | (** [max_clevel ()] is the maximum compression level available. *)
188 | 
189 | val default_clevel : unit -> Cctx_params.clevel
190 | (** [default_clevel ()] is the default compression level. *)
191 | 
192 | val cstream_in_size : unit -> int
193 | (** [cstream_in_size ()] is the recommended length of input slices
194 |     on compression. *)
195 | 
196 | val cstream_out_size : unit -> int
197 | (** [cstream_out_size ()] is the recommended length of output slices
198 |     on compression. *)
199 | 
200 | val dstream_in_size : unit -> int
201 | (** [dstream_in_size ()] is the recommended length of input slices
202 |     on decompression. *)
203 | 
204 | val dstream_out_size : unit -> int
205 | (** [dstream_out_size ()] is the recommended length of output
206 |     slices on decompression. *)
207 | 


--------------------------------------------------------------------------------
/src/zstd/bytesrw_zstd.mllib:
--------------------------------------------------------------------------------
1 | Bytesrw_zstd


--------------------------------------------------------------------------------
/src/zstd/bytesrw_zstd_stubs.c:
--------------------------------------------------------------------------------
  1 | /*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*/
  5 | 
  6 | #include <caml/mlvalues.h>
  7 | #include <caml/alloc.h>
  8 | #include <caml/memory.h>
  9 | #include <caml/fail.h>
 10 | 
 11 | #include <stdbool.h>
 12 | #include <zstd.h>
 13 | 
 14 | #if ZSTD_VERSION_NUMBER < 10400
 15 | #error "Unsupported libzstd version, at least 1.4.0 is needed"
 16 | #endif
 17 | 
 18 | #define ZSTD_CCtx_val(v) (*((ZSTD_CCtx **) Data_custom_val(v)))
 19 | #define ZSTD_DCtx_val(v) (*((ZSTD_DCtx **) Data_custom_val(v)))
 20 | 
 21 | /* OCaml Bytesrw_zstd.end_directive value map */
 22 | 
 23 | static ZSTD_EndDirective ocaml_zstd_end_directive[] =
 24 | { ZSTD_e_continue, ZSTD_e_flush, ZSTD_e_end};
 25 | 
 26 | /* OCaml Bytesrw_zstd.c_parameter value map */
 27 | 
 28 | static ZSTD_dParameter ocaml_zstd_d_parameter[] =
 29 | {  ZSTD_d_windowLogMax};
 30 | 
 31 | /* OCaml Bytesrw_zstd.c_parameter value map */
 32 | 
 33 | static ZSTD_cParameter ocaml_zstd_c_parameter[] =
 34 | { ZSTD_c_compressionLevel, ZSTD_c_windowLog, ZSTD_c_checksumFlag};
 35 | 
 36 | /* OCaml Zbuf.t value fields */
 37 | 
 38 | enum ocaml_zbuf_fields
 39 | { ocaml_zbuf_bytes = 0, ocaml_zbuf_size, ocaml_zbuf_pos };
 40 | 
 41 | /* Library parameters */
 42 | 
 43 | CAMLprim value ocaml_bytesrw_ZSTD_versionString (value unit)
 44 | { return (caml_copy_string (ZSTD_versionString ())); }
 45 | 
 46 | CAMLprim value ocaml_bytesrw_ZSTD_minCLevel (value unit)
 47 | { return (Val_int (ZSTD_minCLevel ())); }
 48 | 
 49 | CAMLprim value ocaml_bytesrw_ZSTD_maxCLevel (value unit)
 50 | { return (Val_int (ZSTD_maxCLevel ())); }
 51 | 
 52 | CAMLprim value ocaml_bytesrw_ZSTD_defaultCLevel (value unit)
 53 | /* Once 1.5.0 is required we can use ZSTD_defaultCLevel () */
 54 | { return (Val_int (ZSTD_CLEVEL_DEFAULT)); }
 55 | 
 56 | CAMLprim value ocaml_bytesrw_ZSTD_CStreamInSize (value unit)
 57 | { return (Val_int (ZSTD_CStreamInSize ())); }
 58 | 
 59 | CAMLprim value ocaml_bytesrw_ZSTD_CStreamOutSize (value unit)
 60 | { return (Val_int (ZSTD_CStreamOutSize ())); }
 61 | 
 62 | CAMLprim value ocaml_bytesrw_ZSTD_DStreamInSize (value unit)
 63 | { return (Val_int (ZSTD_DStreamInSize ())); }
 64 | 
 65 | CAMLprim value ocaml_bytesrw_ZSTD_DStreamOutSize (value unit)
 66 | { return (Val_int (ZSTD_DStreamOutSize ())); }
 67 | 
 68 | /* Decompression */
 69 | 
 70 | void ocaml_bytesrw_finalize_ZSTD_DCtx (value dctx)
 71 | { size_t rc = ZSTD_freeDCtx (ZSTD_DCtx_val (dctx)); /* N.B. accepts NULL */ }
 72 | 
 73 | CAMLprim value ocaml_bytesrw_ZSTD_freeDCtx (value dctx)
 74 | {
 75 |   ZSTD_freeDCtx (ZSTD_DCtx_val (dctx));
 76 |   ZSTD_DCtx_val (dctx) = NULL;
 77 |   return Val_unit;
 78 | }
 79 | 
 80 | CAMLprim value ocaml_bytesrw_ZSTD_createDCtx (value unit)
 81 | {
 82 |   ZSTD_DCtx *ctx = ZSTD_createDCtx ();
 83 |   if (!ctx) caml_failwith ("Could not allocate ZSTD_DCtx");
 84 |   value dctx = caml_alloc_final (1, &ocaml_bytesrw_finalize_ZSTD_DCtx, 0, 1);
 85 |   ZSTD_DCtx_val (dctx) = ctx;
 86 |   return dctx;
 87 | }
 88 | 
 89 | CAMLprim value ocaml_bytesrw_ZSTD_DCtx_setParameter
 90 | (value dctx, value p, value v)
 91 | {
 92 |   ZSTD_DCtx *ctx = ZSTD_DCtx_val (dctx);
 93 | 	ZSTD_dParameter param =
 94 | 		(Is_block (p) ?
 95 | 		 Int_val (Field (p, 0)) : ocaml_zstd_d_parameter[Int_val (p)]);
 96 |   size_t rc = ZSTD_DCtx_setParameter (ctx, param, Int_val (v));
 97 |   if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc));
 98 |   return Val_unit;
 99 | }
100 | 
101 | CAMLprim value ocaml_bytesrw_ZSTD_DCtx_loadDictionary(value dctx, value s)
102 | {
103 |   ZSTD_DCtx *ctx = ZSTD_DCtx_val (dctx);
104 |   size_t rc = ZSTD_DCtx_loadDictionary (ctx, String_val (s),
105 |                                         caml_string_length (s));
106 |   if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc));
107 |   return Val_unit;
108 | }
109 | 
110 | CAMLprim value ocaml_bytesrw_ZSTD_decompressStream
111 | (value dctx, value src, value dst)
112 | {
113 |   ZSTD_DCtx *ctx = ZSTD_DCtx_val (dctx);
114 |   ZSTD_inBuffer bsrc;
115 |   bsrc.src = Bytes_val (Field (src, ocaml_zbuf_bytes));
116 |   bsrc.size = Int_val (Field (src, ocaml_zbuf_size));
117 |   bsrc.pos = Int_val (Field (src, ocaml_zbuf_pos));
118 | 
119 |   ZSTD_outBuffer bdst;
120 |   bdst.dst = Bytes_val (Field (dst, ocaml_zbuf_bytes));
121 |   bdst.size = Int_val (Field (dst, ocaml_zbuf_size));
122 |   bdst.pos = Int_val (Field (dst, ocaml_zbuf_pos));
123 | 
124 |   size_t rc = ZSTD_decompressStream (ctx, &bdst, &bsrc);
125 |   if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc));
126 | 
127 |   Store_field (src, ocaml_zbuf_pos, Val_int (bsrc.pos));
128 |   Store_field (dst, ocaml_zbuf_pos, Val_int (bdst.pos));
129 |   return Val_bool (rc == 0 /* End of frame */);
130 | }
131 | 
132 | /* Compression */
133 | 
134 | void ocaml_bytesrw_finalize_ZSTD_CCtx (value cctx)
135 | { ZSTD_freeCCtx (ZSTD_CCtx_val (cctx)); /* N.B. accepts NULL */ }
136 | 
137 | CAMLprim value ocaml_bytesrw_ZSTD_freeCCtx (value cctx)
138 | {
139 |   ZSTD_freeCCtx (ZSTD_CCtx_val (cctx));
140 |   ZSTD_CCtx_val (cctx) = NULL;
141 |   return Val_unit;
142 | }
143 | 
144 | CAMLprim value ocaml_bytesrw_ZSTD_createCCtx (value unit)
145 | {
146 |   ZSTD_CCtx *ctx = ZSTD_createCCtx ();
147 |   if (!ctx) caml_failwith ("Could not allocate ZSTD_CCtx");
148 |   value cctx = caml_alloc_final (1, &ocaml_bytesrw_finalize_ZSTD_CCtx, 0, 1);
149 |   ZSTD_CCtx_val (cctx) = ctx;
150 |   return cctx;
151 | }
152 | 
153 | CAMLprim value ocaml_bytesrw_ZSTD_CCtx_setParameter
154 | (value cctx, value p, value v)
155 | {
156 |   ZSTD_CCtx *ctx = ZSTD_CCtx_val (cctx);
157 | 	ZSTD_cParameter param =
158 | 		(Is_block (p) ?
159 | 		 Int_val (Field (p, 0)) : ocaml_zstd_c_parameter[Int_val (p)]);
160 | 
161 |   size_t rc = ZSTD_CCtx_setParameter (ctx, param, Int_val (v));
162 |   if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc));
163 |   return Val_unit;
164 | }
165 | 
166 | CAMLprim value ocaml_bytesrw_ZSTD_CCtx_loadDictionary(value cctx, value s)
167 | {
168 |   ZSTD_CCtx *ctx = ZSTD_CCtx_val (cctx);
169 |   size_t rc = ZSTD_CCtx_loadDictionary (ctx, String_val (s),
170 |                                         caml_string_length (s));
171 |   if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc));
172 |   return Val_unit;
173 | }
174 | 
175 | CAMLprim value ocaml_bytesrw_ZSTD_compressStream2
176 | (value dctx, value src, value dst, value edir)
177 | {
178 |   ZSTD_CCtx *ctx = ZSTD_CCtx_val (dctx);
179 | 
180 |   ZSTD_inBuffer bsrc;
181 |   bsrc.src = Bytes_val (Field (src, ocaml_zbuf_bytes));
182 |   bsrc.size = Int_val (Field (src, ocaml_zbuf_size));
183 |   bsrc.pos = Int_val (Field (src, ocaml_zbuf_pos));
184 | 
185 |   ZSTD_outBuffer bdst;
186 |   bdst.dst = Bytes_val (Field (dst, ocaml_zbuf_bytes));
187 |   bdst.size = Int_val (Field (dst, ocaml_zbuf_size));
188 |   bdst.pos = Int_val (Field (dst, ocaml_zbuf_pos));
189 | 
190 |   size_t rem = ZSTD_compressStream2 (ctx, &bdst, &bsrc,
191 |                                      ocaml_zstd_end_directive[Int_val (edir)]);
192 | 
193 |   if (ZSTD_isError (rem)) caml_failwith (ZSTD_getErrorName (rem));
194 |   Store_field (src, ocaml_zbuf_pos, Val_int (bsrc.pos));
195 |   Store_field (dst, ocaml_zbuf_pos, Val_int (bdst.pos));
196 |   return Val_bool (rem == 0 /* end_dir completed */);
197 | }
198 | 


--------------------------------------------------------------------------------
/src/zstd/libbytesrw_zstd_stubs.clib:
--------------------------------------------------------------------------------
1 | bytesrw_zstd_stubs.o


--------------------------------------------------------------------------------
/test/blake3tap.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | open Bytesrw
 7 | 
 8 | let stdin_reader unix ~slice_length = match unix with
 9 | | true -> Bytesrw_unix.bytes_reader_of_fd ?slice_length Unix.stdin
10 | | false -> Bytes.Reader.of_in_channel ?slice_length In_channel.stdin
11 | 
12 | let stdout_writer unix ~slice_length = match unix with
13 | | true -> Bytesrw_unix.bytes_writer_of_fd ?slice_length Unix.stdout
14 | | false -> Bytes.Writer.of_out_channel ?slice_length Out_channel.stdout
15 | 
16 | let blake3_reader r = Bytesrw_blake3.Blake3.(to_hex (reader r))
17 | let blake3_reads r =
18 |   let i, hash = Bytesrw_blake3.Blake3.reads r in
19 |   i, fun () -> Bytesrw_blake3.Blake3.(to_hex (value hash))
20 | 
21 | let blake3_writes w =
22 |   let w, hash = Bytesrw_blake3.Blake3.writes w in
23 |   w, fun () -> Bytesrw_blake3.Blake3.(to_hex (value hash))
24 | 
25 | let sink processor unix ~slice_length = match processor with
26 | | `Reader ->
27 |     let i = stdin_reader unix ~slice_length in
28 |     i, blake3_reader i
29 | | `Writer ->
30 |     let i = stdin_reader unix ~slice_length in
31 |     let w, get_hash = blake3_writes (Bytes.Writer.ignore ()) in
32 |     let () = Bytes.Writer.write_reader w ~eod:true i in
33 |     i, get_hash ()
34 | 
35 | let filter processor unix ~slice_length = match processor with
36 | | `Reader ->
37 |     let i = stdin_reader unix ~slice_length in
38 |     let i, get_hash = blake3_reads i in
39 |     let o = stdout_writer unix ~slice_length in
40 |     Bytes.Writer.write_reader ~eod:true o i;
41 |     i, get_hash ()
42 | | `Writer ->
43 |     let i = stdin_reader unix ~slice_length in
44 |     let o = stdout_writer unix ~slice_length in
45 |     let o, get_hash = blake3_writes o in
46 |     Bytes.Writer.write_reader ~eod:true o i;
47 |     i, get_hash ()
48 | 
49 | let log_count i = Printf.eprintf "i:%d\n%!" (Bytes.Reader.read_length i)
50 | 
51 | let tap mode processor slice_length show_count unix =
52 |   try
53 |     let i, hash = match mode with
54 |     | `Sink -> sink processor unix ~slice_length
55 |     | `Filter -> filter processor unix ~slice_length
56 |     in
57 |     if show_count then log_count i;
58 |     Printf.eprintf "%s\n%!" hash;
59 |     Ok 0
60 |   with
61 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
62 | 
63 | open Cmdliner
64 | 
65 | let cmd =
66 |   let doc = "Rewrite stdin to stdout and report the BLAKE3 hash of data" in
67 |   let mode =
68 |     let c = `Sink, Arg.info ["sink"] ~doc:"Only read stdin." in
69 |     let d = `Filter, Arg.info ["filter"] ~doc:"Write stdin to stdout." in
70 |     Arg.(value & vflag `Filter [c; d])
71 |   in
72 |   let processor =
73 |     let r = `Reader, Arg.info ["reader"] ~doc:"Use a byte stream reader tap." in
74 |     let w = `Writer, Arg.info ["writer"] ~doc:"Use a byte stream writer tap." in
75 |     Arg.(value & vflag `Reader [r; w])
76 |   in
77 |   let slice_length =
78 |     let doc = "IO byte slices size." in
79 |     Arg.(value & opt (some int) None & info ["io-size"] ~doc ~docv:"SIZE")
80 |   in
81 |   let show_count =
82 |     let doc = "Show on $(b,stderr) final amount of bytes read." in
83 |     Arg.(value & flag & info ["show-count"] ~doc)
84 |   in
85 |   let unix =
86 |     let doc = "Use OCaml Unix library I/O instead of Stdlib channels" in
87 |     Arg.(value & flag & info ["unix-io"] ~doc)
88 |   in
89 |   Cmd.v (Cmd.info "blake3tap" ~version:"%%VERSION%%" ~doc) @@
90 |   Term.(const tap $ mode $ processor $ slice_length $ show_count $ unix)
91 | 
92 | let main () = Cmd.eval_result' cmd
93 | let () = if !Sys.interactive then () else exit (main ())
94 | 


--------------------------------------------------------------------------------
/test/cookbook.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: CC0-1.0
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | open Bytesrw
 7 | 
 8 | (* Applying filters to strings *)
 9 | 
10 | let id s =
11 |   let filters = Bytesrw_zstd.[compress_reads (); decompress_reads ()] in
12 |   Bytes.Reader.filter_string filters s
13 | 
14 | let id s =
15 |   let filters = Bytesrw_zstd.[decompress_writes (); compress_writes ()] in
16 |   Bytes.Writer.filter_string filters s
17 | 
18 | (* Checksumming streams *)
19 | 
20 | let blake3_and_compress ~plain =
21 |   try
22 |     let plain, blake3 = Bytesrw_blake3.Blake3.reads plain in
23 |     let comp = Bytesrw_zstd.compress_reads () plain in
24 |     let comp = Bytes.Reader.to_string comp in
25 |     Ok (comp, Bytesrw_blake3.Blake3.value blake3)
26 |   with
27 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
28 | 
29 | let decompress_and_blake3 ~comp =
30 |   try
31 |     let plain = Bytesrw_zstd.decompress_reads () comp in
32 |     let r, blake3 = Bytesrw_blake3.Blake3.reads plain in
33 |     let s = Bytes.Reader.to_string r in
34 |     Ok (s, Bytesrw_blake3.Blake3.value blake3)
35 |   with
36 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
37 | 
38 | (* Limiting streams *)
39 | 
40 | let limited_decompress ~quota ~comp =
41 |   let buf = Buffer.create quota in
42 |   try
43 |     let plain = Bytesrw_zstd.decompress_reads () comp in
44 |     let () = Bytes.Reader.add_to_buffer buf (Bytes.Reader.limit quota plain) in
45 |     Ok (`Data (Buffer.contents buf))
46 |   with
47 |   |  Bytes.Stream.Error (Bytes.Stream.Limit _quota, _) ->
48 |       Ok (`Quota_exceeded (Buffer.contents buf))
49 |   |  Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
50 | 
51 | (* Tracing streams *)
52 | 
53 | let rtrace ~id r = Bytes.Reader.tap (Bytes.Slice.tracer ~id) r
54 | let wtrace ~id w = Bytes.Writer.tap (Bytes.Slice.tracer ~id) w
55 | 
56 | (* Adding your own stream error *)
57 | 
58 | module Myformat : sig
59 | 
60 |   (** {1:errors Errors} *)
61 | 
62 |   type Bytesrw.Bytes.Stream.error +=
63 |   | Error of string (** *)
64 |   (** The type for [myformat] stream errors. *)
65 | 
66 |   (** {1:streams Streams} *)
67 | 
68 |   (* … *)
69 | end = struct
70 |   type Bytes.Stream.error += Error of string
71 | 
72 |   let format_error =
73 |     let case msg = Error msg in
74 |     let message = function Error msg -> msg | _ -> assert false in
75 |     Bytes.Stream.make_format_error ~format:"myformat" ~case ~message
76 | 
77 |   let error e = Bytes.Stream.error format_error e
78 |   let reader_error r e = Bytes.Reader.error format_error r e
79 |   let writer_error w e = Bytes.Writer.error format_error w e
80 | end
81 | 


--------------------------------------------------------------------------------
/test/gziptrip.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Bytesrw
  7 | 
  8 | let stdin_reader unix ~slice_length = match unix with
  9 | | true -> Bytesrw_unix.bytes_reader_of_fd ?slice_length Unix.stdin
 10 | | false -> Bytes.Reader.of_in_channel ?slice_length In_channel.stdin
 11 | 
 12 | let stdout_writer unix ~slice_length = match unix with
 13 | | true -> Bytesrw_unix.bytes_writer_of_fd ?slice_length Unix.stdout
 14 | | false -> Bytes.Writer.of_out_channel ?slice_length Out_channel.stdout
 15 | 
 16 | let filter_stdio_with_reader_filter unix ~slice_length filter =
 17 |   let i = stdin_reader unix ~slice_length in
 18 |   let o = stdout_writer unix ~slice_length in
 19 |   Bytes.Writer.write_reader ~eod:true o (filter i);
 20 |   i, o
 21 | 
 22 | let filter_stdio_with_writer_filter unix ~slice_length filter =
 23 |   let i = stdin_reader unix ~slice_length in
 24 |   let o = stdout_writer unix ~slice_length in
 25 |   Bytes.Writer.write_reader ~eod:true (filter o) i;
 26 |   Bytes.Writer.write_eod o; (* Writer filters do not write eod *)
 27 |   i, o
 28 | 
 29 | let decompress processor unix ~slice_length = match processor with
 30 | | `Reader ->
 31 |     let d = Bytesrw_zlib.Gzip.decompress_reads () in
 32 |     filter_stdio_with_reader_filter unix ~slice_length d
 33 | | `Writer ->
 34 |     let d = Bytesrw_zlib.Gzip.decompress_writes () ~eod:true in
 35 |     filter_stdio_with_writer_filter unix ~slice_length d
 36 | 
 37 | let compress processor unix ~slice_length = match processor with
 38 | | `Reader ->
 39 |     let c = Bytesrw_zlib.Gzip.compress_reads () in
 40 |     filter_stdio_with_reader_filter unix ~slice_length c
 41 | | `Writer ->
 42 |     let c = Bytesrw_zlib.Gzip.compress_writes () ~eod:true in
 43 |     filter_stdio_with_writer_filter unix ~slice_length c
 44 | 
 45 | let log_count i o =
 46 |   let i = Bytes.Reader.read_length i in
 47 |   let o = Bytes.Writer.written_length o in
 48 |   let pct = Float.to_int ((float o /. float i) *. 100.) in
 49 |   Printf.eprintf "i:%d o:%d o/i:%d%%\n%!" i o pct
 50 | 
 51 | let trip mode clevel processor slice_length show_count unix =
 52 |   try
 53 |     let i, o = match mode with
 54 |     | `Decompress -> decompress processor unix ~slice_length
 55 |     | `Compress -> compress processor unix ~slice_length
 56 |     in
 57 |     if show_count then log_count i o;
 58 |     Ok 0
 59 |   with
 60 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
 61 | 
 62 | open Cmdliner
 63 | 
 64 | let cmd =
 65 |   let doc = "Gzip (De)compression from stdin to stdout" in
 66 |   let mode =
 67 |     let c = `Compress, Arg.info ["z"; "compress"] ~doc:"Compress." in
 68 |     let d = `Decompress, Arg.info ["d"; "decompress"] ~doc:"Decompress." in
 69 |     Arg.(value & vflag `Compress [c; d])
 70 |   in
 71 |   let processor =
 72 |     let r =
 73 |       `Reader, Arg.info ["reader"] ~doc:"Use a byte stream reader processor." in
 74 |     let w =
 75 |       `Writer, Arg.info ["writer"] ~doc:"Use a byte stream writer processor." in
 76 |     Arg.(value & vflag `Reader [r; w])
 77 |   in
 78 |   let slice_length =
 79 |     let doc = "IO byte slices size." in
 80 |     Arg.(value & opt (some int) None & info ["io-size"] ~doc ~docv:"SIZE")
 81 |   in
 82 |   let clevel =
 83 |     let doc =
 84 |       Printf.sprintf "Use compression level $(docv) (%d-%d)"
 85 |         (Bytesrw_zlib.default_compression) (Bytesrw_zlib.best_compression)
 86 |     in
 87 |     Arg.(value & opt int (Bytesrw_zlib.default_compression) &
 88 |          info ["l"] ~doc ~docv:"LEVEL")
 89 |   in
 90 |   let show_count =
 91 |     let doc = "Show on $(b,stderr) final amount of bytes read and written." in
 92 |     Arg.(value & flag & info ["show-count"] ~doc)
 93 |   in
 94 |   let unix =
 95 |     let doc = "Use OCaml Unix library I/O instead of Stdlib channels" in
 96 |     Arg.(value & flag & info ["unix-io"] ~doc)
 97 |   in
 98 |   Cmd.v (Cmd.info "gziptrip" ~version:"%%VERSION%%" ~doc) @@
 99 |   Term.(const trip $ mode $ clevel $ processor $
100 |         slice_length $ show_count $ unix)
101 | 
102 | let main () = Cmd.eval_result' cmd
103 | let () = if !Sys.interactive then () else exit (main ())
104 | 


--------------------------------------------------------------------------------
/test/quickstart.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: CC0-1.0
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | (* Compile with:
 7 |    ocamlfind ocamlopt -package bytesrw,bytesrw.zstd -linkpkg quickstart.ml *)
 8 | 
 9 | open Bytesrw
10 | 
11 | let stdio_compress_reads () =
12 |   try
13 |     let stdin = Bytes.Reader.of_in_channel In_channel.stdin in
14 |     let stdout = Bytes.Writer.of_out_channel Out_channel.stdout in
15 |     let params = Bytesrw_zstd.Cctx_params.make ~checksum:true () in
16 |     let zstdr = Bytesrw_zstd.compress_reads ~params () stdin in
17 |     Bytes.Writer.write_reader ~eod:true stdout zstdr;
18 |     Ok ()
19 |   with
20 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
21 |   | Sys_error e -> Error e
22 | 
23 | let stdio_compress_writes () =
24 |   try
25 |     let stdin = Bytes.Reader.of_in_channel In_channel.stdin in
26 |     let stdout = Bytes.Writer.of_out_channel Out_channel.stdout in
27 |     let params = Bytesrw_zstd.Cctx_params.make ~checksum:true () in
28 |     let zstdw = Bytesrw_zstd.compress_writes ~params () ~eod:true stdout in
29 |     Bytes.Writer.write_reader ~eod:true zstdw stdin;
30 |     Ok ()
31 |   with
32 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
33 |   | Sys_error e -> Error e
34 | 
35 | let main () =
36 |   Result.fold ~ok:(Fun.const 0) ~error:(fun e -> prerr_endline e; 1) @@
37 |   if Array.exists (String.equal "-w") Sys.argv
38 |   then stdio_compress_writes ()
39 |   else stdio_compress_reads ()
40 | 
41 | let () = if !Sys.interactive then () else exit (main ())
42 | 


--------------------------------------------------------------------------------
/test/test_blake3.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | open Bytesrw
 7 | open B0_testing
 8 | 
 9 | let repeat n = Test.range ~kind:"slice_length" ~first:1 ~last:n
10 | 
11 | (* Test vectors *)
12 | 
13 | type t =
14 |   { data : string;
15 |     blake3 : string }
16 | 
17 | let t0 =
18 |   { data = "";
19 |     blake3 =
20 |       "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"; }
21 | 
22 | let t1 =
23 |   { data =
24 |       "BLAKE3 is based on an optimized instance of the established \
25 |        hash function BLAKE2 and on the original Bao tree mode.";
26 |     blake3 =
27 |       "d352deef3f9b5aff803f7f2ab3aa4a15a0f21f4babce3534451057084155a280" }
28 | 
29 | (* Testing *)
30 | 
31 | let test_blake3 =
32 |   Test.test "Bytesrw_blake3.Blake3" @@ fun () ->
33 |   let module H = Bytesrw_blake3.Blake3 in
34 |   let testh t = t.blake3 and hex = H.to_hex in
35 |   assert (H.(hex (string t0.data) = testh t0));
36 |   assert (H.(hex (string t1.data) = testh t1));
37 |   begin repeat 5 @@ fun n ->
38 |     let r = Bytes.Reader.of_string ~slice_length:n t1.data in
39 |     let r, st = H.reads r in
40 |     let () = Bytes.Reader.discard r in
41 |     assert (H.(hex (value st) = testh t1));
42 |     let w = Bytes.Writer.of_buffer ~slice_length:n (Buffer.create 255) in
43 |     let w, st = H.writes w in
44 |     let () = Bytes.Writer.write_string w t1.data in
45 |     assert (H.(hex (value st) = testh t1));
46 |     let () = Bytes.Writer.write_eod w in
47 |     assert (H.(hex (value st) = testh t1));
48 |     let () = Bytes.Writer.write_eod w in
49 |     assert (H.(hex (value st) = testh t1));
50 |   end;
51 |   let r, st = H.reads (Bytes.Reader.empty ()) in
52 |   let () = Bytes.Reader.discard r in
53 |   assert (H.(hex (value st) = testh t0));
54 |   let w = Bytes.Writer.of_buffer ~slice_length:2 (Buffer.create 255) in
55 |   let w, st = H.writes w in
56 |   let () = Bytes.Writer.write_eod w in
57 |   assert (H.(hex (value st) = testh t0));
58 |   let h = H.(string t1.data) in
59 |   assert (hex h = testh t1);
60 |   assert (H.equal h (H.of_hex (H.to_hex h) |> Result.get_ok));
61 |   assert (H.equal h
62 |             (H.of_binary_string (H.to_binary_string h) |> Result.get_ok));
63 |   ()
64 | 
65 | let main () =
66 |   Test.main @@ fun () ->
67 |   Test.log "Using libblake3 %s" (Bytesrw_blake3.version ());
68 |   Test.autorun ();
69 |   Gc.full_major ()
70 | 
71 | let () = if !Sys.interactive then () else exit (main ())
72 | 


--------------------------------------------------------------------------------
/test/test_md.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open B0_testing
  7 | open Bytesrw
  8 | 
  9 | let repeat ?__POS__ n f =
 10 |   Test.range ?__POS__ ~kind:"slice_length" ~first:1 ~last:n f
 11 | 
 12 | 
 13 | (* Test vectors *)
 14 | 
 15 | type t =
 16 |   { data : string; sha_1 : string; sha_256 : string; sha_384 : string;
 17 |     sha_512 : string; }
 18 | 
 19 | let t0 =
 20 |   { data = "";
 21 |     sha_1 = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
 22 |     sha_256 =
 23 |       "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
 24 |     sha_384 =
 25 |       "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da\
 26 |        274edebfe76f65fbd51ad2f14898b95b";
 27 |     sha_512 =
 28 |       "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce\
 29 |        47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"; }
 30 | 
 31 | let t1 =
 32 |   { data =
 33 |       "SHA-1 is used to generate a condensed representation of a \
 34 |        message called a message digest.";
 35 |     sha_1 = "a2ee13f265a9b1d7ce14e6b539a6371710e8e4cd";
 36 |     sha_256 =
 37 |       "aa97506c73d3b4207b7bc50faf8bf04c9918524db0ac827073e13067807815f9";
 38 |     sha_384 =
 39 |       "e7a9d7ff5f40240d0747d7d387dd59b3cf4275ba13f5465668b96c357aee6b47\
 40 |        20356901245d4d1339211ca4faff5fde";
 41 |     sha_512 =
 42 |       "08d45fb0542f32fd4db91767eaad372374f754c035e4920cae61e778d9118430\
 43 |        a0807f26aa8eab5b1f9425cabf96378694d0a0fadaad14ea30f5d1077d7e910b" }
 44 | 
 45 | let test_mod (module H : Bytesrw_md.Sha) testh =
 46 |   let hex = H.to_hex in
 47 |   assert (H.(hex (string t0.data) = testh t0));
 48 |   assert (H.(hex (string t1.data) = testh t1));
 49 |   begin repeat 5 @@ fun n ->
 50 |     let r = Bytes.Reader.of_string ~slice_length:n t1.data in
 51 |     let r, st = H.reads r in
 52 |     let () = Bytes.Reader.discard r in
 53 |     assert (H.(hex (value st) = testh t1));
 54 |     let w = Bytes.Writer.of_buffer ~slice_length:n (Buffer.create 255) in
 55 |     let w, st = H.writes w in
 56 |     let () = Bytes.Writer.write_string w t1.data in
 57 |     assert (H.(hex (value st) = testh t1));
 58 |     let () = Bytes.Writer.write_eod w in
 59 |     assert (H.(hex (value st) = testh t1));
 60 |     let () = Bytes.Writer.write_eod w in
 61 |     assert (H.(hex (value st) = testh t1));
 62 |   end;
 63 |   let r, st = H.reads (Bytes.Reader.empty ()) in
 64 |   let () = Bytes.Reader.discard r in
 65 |   assert (H.(hex (value st) = testh t0));
 66 |   let w = Bytes.Writer.of_buffer ~slice_length:2 (Buffer.create 255) in
 67 |   let w, st = H.writes w in
 68 |   let () = Bytes.Writer.write_eod w in
 69 |   assert (H.(hex (value st) = testh t0));
 70 |   let h = H.(string t1.data) in
 71 |   assert (hex h = testh t1);
 72 |   assert (H.equal h (H.of_hex (H.to_hex h) |> Result.get_ok));
 73 |   assert (H.equal h
 74 |             (H.of_binary_string (H.to_binary_string h) |> Result.get_ok));
 75 |   ()
 76 | 
 77 | let test_sha_1 =
 78 |   Test.test "Bytesrw_md.Sha_1" @@ fun () ->
 79 |   test_mod (module Bytesrw_md.Sha_1) (fun t -> t.sha_1);
 80 |   ()
 81 | 
 82 | let test_sha_256 =
 83 |   Test.test "Bytesrw_md.Sha_256" @@ fun () ->
 84 |   test_mod (module Bytesrw_md.Sha_256) (fun t -> t.sha_256);
 85 |   ()
 86 | 
 87 | let test_sha_384 =
 88 |   Test.test "Bytesrw_md.Sha_384" @@ fun () ->
 89 |   test_mod (module Bytesrw_md.Sha_384) (fun t -> t.sha_384);
 90 |   ()
 91 | 
 92 | let test_sha_512 =
 93 |   Test.test "Bytesrw_md.Sha_512" @@ fun () ->
 94 |   test_mod (module Bytesrw_md.Sha_512) (fun t -> t.sha_512);
 95 |   ()
 96 | 
 97 | (* Tests *)
 98 | 
 99 | let main () = Test.main @@ fun () -> Test.autorun (); Gc.full_major ()
100 | let () = if !Sys.interactive then () else exit (main ())
101 | 


--------------------------------------------------------------------------------
/test/test_utf.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | open Bytesrw
 7 | open B0_testing
 8 | 
 9 | let tracer = Bytes.Slice.tracer ~ppf:Format.std_formatter
10 | let reader_of_list ss =
11 |   Bytes.Reader.of_slice_seq (List.to_seq (List.map Bytes.Slice.of_string ss))
12 | 
13 | let test_guess_reader_encoding =
14 |   Test.test "Test Bytesrw_utf.guess_reader_encoding" @@ fun () ->
15 |   let test (s, exp) =
16 |     let r = Bytes.Reader.of_string s in
17 |     let g = Bytesrw_utf.guess_reader_encoding r in
18 |     let pp_enc = Bytesrw_utf.Encoding.pp in
19 |     if exp <> g then begin
20 |       Test.fail "expected: %a found: %a" pp_enc exp pp_enc g;
21 |     end else (Test.string (Bytes.Reader.to_string r) s)
22 |   in
23 |   (* This was taken from the uutf suite which also tested subsequent
24 |      malformed data. *)
25 |   (* UTF-8 guess *)
26 |   test ("", `Utf_8);
27 |   test ("\xEF", `Utf_8); (* malformed *)
28 |   test ("\xEF\xBB", `Utf_8); (* malformed *)
29 |   test ("\xEF\xBB\x00", `Utf_8); (* malformed *)
30 |   test ("\xEF\xBB\xBF\xEF\xBB\xBF", `Utf_8);
31 |   test ("\n\r\n", `Utf_8);
32 |   test ("\n\x80\xEF\xBB\xBF\n", `Utf_8); (* malformed *)
33 |   test ("\n\n\xEF\xBB\x00\n", `Utf_8); (* malformed *)
34 |   test ("\n\xC8\x99", `Utf_8);
35 |   test ("\xC8\x99\n", `Utf_8);
36 |   test ("\xC8\x99\n\n", `Utf_8);
37 |   test ("\xC8\x99\xC8\x99", `Utf_8);
38 |   test ("\xC8\x99\xF0\x9F\x90\xAB", `Utf_8);
39 |   test ("\xF0\x9F\x90\xAB\n", `Utf_8);
40 |   (* UTF-16BE guess *)
41 |   test ("\xFE\xFF\xDB\xFF\xDF\xFF\x00\x0A", `Utf_16be);
42 |   test ("\xFE\xFF\xDB\xFF\x00\x0A\x00\x0A", `Utf_16be); (* malformed *)
43 |   test ("\xFE\xFF\xDB\xFF\xDF", `Utf_16be); (* malformed *)
44 |   test ("\x80\x81\xDB\xFF\xDF\xFF\xFE\xFF\xDF\xFF\xDB\xFF",
45 |         `Utf_16be); (* malformed *)
46 |   test ("\x80\x81\xDF\xFF\xDB\xFF\xFE", `Utf_16be); (* malformred *)
47 |   test ("\x00\x0A", `Utf_16be);
48 |   test ("\x00\x0A\xDB", `Utf_16be); (* malformed *)
49 |   test ("\x00\x0A\xDB\xFF", `Utf_16be); (* malformed *)
50 |   test ("\x00\x0A\xDB\xFF\xDF", `Utf_16be); (* malformed *)
51 |   test ("\x00\x0A\xDB\xFF\xDF\xFF", `Utf_16be);
52 |   test ("\x00\x0A\x00\x0A", `Utf_16be);
53 |   (* UTF-16LE guess *)
54 |   test ("\xFF\xFE\xFF\xDB\xFF\xDF\x0A\x00", `Utf_16le);
55 |   test ("\xFF\xFE\xFF\xDB\x0A\x00\x0A\x00", `Utf_16le); (* malformed *)
56 |   test ("\xFF\xFE\xFF\xDB\xDF", `Utf_16le); (* malformed *)
57 |   test ("\x0A\x00", `Utf_16le);
58 |   test ("\x0A\x00\xDB", `Utf_16le); (* malformed *)
59 |   test ("\x0A\x00\xFF\xDB", `Utf_16le); (* malformed *)
60 |   test ("\x0A\x00\xFF\xDB\xDF", `Utf_16le); (* malformed *)
61 |   test ("\x0A\x00\xFF\xDB\xFF\xDF", `Utf_16le);
62 |   test ("\x0A\x00\x0A\x00", `Utf_16le);
63 |   ()
64 | 
65 | let main () = Test.main @@ fun () -> Test.autorun ()
66 | let () = if !Sys.interactive then () else exit (main ())
67 | 


--------------------------------------------------------------------------------
/test/test_xxhash.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | open B0_testing
 7 | open Bytesrw
 8 | 
 9 | let repeat n = Test.range ~kind:"slice_length" ~first:1 ~last:n
10 | 
11 | (* Test vectors *)
12 | 
13 | type t = { data : string; xxh3_64 : string; xxh3_128 : string; }
14 | let t0 =
15 |   { data = "";
16 |     xxh3_64 = "2d06800538d394c2";
17 |     xxh3_128 = "99aa06d3014798d86001c324468d497f"; }
18 | 
19 | let t1 =
20 |   { data = "xxHash is an extremely fast non-cryptographic hash algorithm";
21 |     xxh3_64 = "339d1954a9e06117";
22 |     xxh3_128 = "07b0640ae1f202e6990373bfcc1e5c75" }
23 | 
24 | let t2 =
25 |   { data = "abc\n";
26 |     xxh3_64 = "079364cbfdf9f4cb";
27 |     xxh3_128 = "158bbebef0c159c99914d27c01087efa"; }
28 | 
29 | let test_module (module H : Bytesrw_xxhash.Xxh3) testh =
30 |   let hex = H.to_hex in
31 |   assert (H.(hex (string t0.data) = testh t0));
32 |   assert (H.(hex (string t1.data) = testh t1));
33 |   assert (H.(hex (string t1.data) = testh t1));
34 |   begin repeat 5 @@ fun n ->
35 |     let r = Bytes.Reader.of_string ~slice_length:n t1.data in
36 |     let r, st = H.reads r in
37 |     let () = Bytes.Reader.discard r in
38 |     assert (H.(hex (value st) = testh t1));
39 |     let w = Bytes.Writer.of_buffer ~slice_length:n (Buffer.create 255) in
40 |     let w, st = H.writes w in
41 |     let () = Bytes.Writer.write_string w t1.data in
42 |     assert (H.(hex (value st) = testh t1));
43 |     let () = Bytes.Writer.write_eod w in
44 |     assert (H.(hex (value st) = testh t1));
45 |     let () = Bytes.Writer.write_eod w in
46 |     assert (H.(hex (value st) = testh t1));
47 |   end;
48 |   let r, st = H.reads (Bytes.Reader.empty ()) in
49 |   let () = Bytes.Reader.discard r in
50 |   assert (H.(hex (value st) = testh t0));
51 |   let w = Bytes.Writer.of_buffer ~slice_length:2 (Buffer.create 255) in
52 |   let w, st = H.writes w in
53 |   let () = Bytes.Writer.write_eod w in
54 |   assert (H.(hex (value st) = testh t0));
55 |   let f h =
56 |     assert (H.equal h (H.of_hex (H.to_hex h) |> Result.get_ok));
57 |     assert (H.equal h
58 |               (H.of_binary_string (H.to_binary_string h) |> Result.get_ok));
59 |   in
60 |   f H.(string t0.data);
61 |   f H.(string t1.data);
62 |   f H.(string t2.data);
63 |   ()
64 | 
65 | let test_xxh3_64 =
66 |   Test.test "Bytesrw_xxhash.Xxh3_64" @@ fun () ->
67 |   test_module (module Bytesrw_xxhash.Xxh3_64) (fun t -> t.xxh3_64);
68 |   let h = Bytesrw_xxhash.Xxh3_64.(string t1.data) in
69 |   assert (Printf.sprintf "%Lx" (Bytesrw_xxhash.Xxh3_64.to_uint64 h) =
70 |           t1.xxh3_64);
71 |   ()
72 | 
73 | let test_xxh3_128 =
74 |   Test.test "Testing Bytesrw_xxhash.Xxh3_128" @@ fun () ->
75 |   test_module (module Bytesrw_xxhash.Xxh3_128) (fun t -> t.xxh3_128);
76 |   ()
77 | 
78 | (* Tests *)
79 | 
80 | let main () =
81 |   Test.main @@ fun () ->
82 |   Test.log "Using libxxhash %s" (Bytesrw_xxhash.version ());
83 |   Test.autorun ();
84 |   Gc.full_major ()
85 | 
86 | let () = if !Sys.interactive then () else exit (main ())
87 | 


--------------------------------------------------------------------------------
/test/test_zlib.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Bytesrw
  7 | open B0_testing
  8 | 
  9 | let repeat n = Test.range ~kind:"slice_length" ~first:1 ~last:n
 10 | 
 11 | let test_stream_error f =
 12 |   let is_exn = function Bytes.Stream.Error _ -> true | _ -> false in
 13 |   Test.catch f @@ function fnd -> Test.holds (is_exn fnd)
 14 | 
 15 | (* Test vectors *)
 16 | 
 17 | let a30_deflate =
 18 |   "\x4b\xc4\x0b\x00", String.make 30 'a'
 19 | 
 20 | let a30_zlib =
 21 |   "\x78\x9c\x4b\xc4\x0b\x00\xb0\x4f\x0b\x5f", String.make 30 'a'
 22 | 
 23 | let a_gz =
 24 |   "\x1f\x8b\x08\x08\x94\x58\x2d\x66\x00\x03\x61\x2e\x74\x78\x74\x00\x4b\x4c\
 25 |    \x84\x01\x00\xf0\xcd\x11\x4c\x0a\x00\x00\x00", String.make 10 'a'
 26 | 
 27 | let b_gz =
 28 |   "\x1f\x8b\x08\x08\x8c\x58\x2d\x66\x00\x03\x62\x2e\x74\x78\x74\x00\x4b\x4a\
 29 |    \x82\x01\x00\xf8\x4c\x2f\x42\x0a\x00\x00\x00", String.make 10 'b'
 30 | 
 31 | let more = "moreatthedoor"
 32 | 
 33 | (* Tests *)
 34 | 
 35 | let test_deflate_decompress_reads =
 36 |   Test.test "Bytesrw_zlib.Deflate.decompress_reads" @@ fun () ->
 37 |   begin repeat 5 @@ fun n -> (* One stream *)
 38 |   let c = Bytes.Reader.of_string ~slice_length:n (fst a30_deflate) in
 39 |   let d = Bytesrw_zlib.Deflate.decompress_reads () c in
 40 |   assert (Bytes.Reader.to_string d = snd a30_deflate)
 41 |   end;
 42 |   begin repeat 5 @@ fun n -> (* One stream with unexpected leftover data *)
 43 |     let c = fst a30_deflate ^ more in
 44 |     let c = Bytes.Reader.of_string ~slice_length:n c in
 45 |     let d = Bytesrw_zlib.Deflate.decompress_reads () c in
 46 |     test_stream_error @@ fun () -> Bytes.Reader.to_string d
 47 |   end;
 48 |   begin repeat 5 @@ fun n -> (* One stream with expected leftover data *)
 49 |     let c = (fst a30_deflate) ^ more in
 50 |     let c = Bytes.Reader.of_string ~slice_length:n c in
 51 |     let d = Bytesrw_zlib.Deflate.decompress_reads ~leftover:true () c in
 52 |     assert (Bytes.Reader.to_string d = snd a30_deflate);
 53 |     assert (Bytes.Reader.pos c = String.length (fst a30_deflate));
 54 |     assert (Bytes.Reader.to_string c = more);
 55 |   end;
 56 |   begin repeat 5 @@ fun n -> (* One stream with expected leftover data but "" *)
 57 |     let c = (fst a30_deflate) in
 58 |     let c = Bytes.Reader.of_string ~slice_length:n c in
 59 |     let d = Bytesrw_zlib.Deflate.decompress_reads ~leftover:true () c in
 60 |     assert (Bytes.Reader.to_string d = snd a30_deflate);
 61 |     assert (Bytes.Reader.pos c = String.length (fst a30_deflate));
 62 |     assert (Bytes.Reader.to_string c = "");
 63 |   end;
 64 |   ()
 65 | 
 66 | let test_deflate_decompress_writes =
 67 |   Test.test "Bytesrw_zlib.Deflate.decompress_writes" @@ fun () ->
 68 |   begin repeat 5 @@ fun n -> (* One stream. *)
 69 |     let b = Buffer.create 255 in
 70 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
 71 |     let d = Bytesrw_zlib.Deflate.decompress_writes ~eod:true () w in
 72 |     let c = Bytes.Reader.of_string ~slice_length:n (fst a30_deflate) in
 73 |     let () = Bytes.Writer.write_reader ~eod:true d c in
 74 |     assert (Buffer.contents b = snd a30_deflate);
 75 |   end;
 76 |   begin repeat 5 @@ fun n -> (* One stream with unexpected leftover *)
 77 |     let b = Buffer.create 255 in
 78 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
 79 |     let d = Bytesrw_zlib.Deflate.decompress_writes ~eod:true () w in
 80 |     let c = fst a30_deflate in
 81 |     let c = Bytes.Reader.of_string ~slice_length:n (c ^ c) in
 82 |     test_stream_error @@ fun () ->
 83 |     Bytes.Writer.write_reader ~eod:true d c
 84 |   end;
 85 |   ()
 86 | 
 87 | let test_deflate_compress_reads =
 88 |   Test.test "Bytesrw_zlib.Deflate.compress_reads" @@ fun () ->
 89 |   begin repeat 5 @@ fun n ->
 90 |     let data = snd a30_deflate in
 91 |     let d = Bytes.Reader.of_string ~slice_length:n data in
 92 |     let c = Bytesrw_zlib.Deflate.compress_reads () ~slice_length:n d in
 93 |     let trip = Bytesrw_zlib.Deflate.decompress_reads () ~slice_length:n c in
 94 |     assert (Bytes.Reader.to_string trip = data)
 95 |   end
 96 | 
 97 | let test_deflate_compress_writes =
 98 |   Test.test "Bytesrw_zlib.Deflate.compress_writes" @@ fun () ->
 99 |   begin repeat 5 @@ fun n ->
100 |     let b = Buffer.create 255 in
101 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
102 |     let trip =
103 |       Bytesrw_zlib.Deflate.compress_writes () ~slice_length:n ~eod:true @@
104 |       Bytesrw_zlib.Deflate.decompress_writes () ~slice_length:n ~eod:true @@
105 |       w
106 |     in
107 |     let data = snd a30_deflate in
108 |     let r = Bytes.Reader.of_string ~slice_length:n data in
109 |     let () = Bytes.Writer.write_reader ~eod:true trip r in
110 |     assert (Buffer.contents b = data)
111 |   end
112 | 
113 | let test_zlib_decompress_reads =
114 |   Test.test "Bytesrw_zlib.Zlib.decompress_reads" @@ fun () ->
115 |   begin repeat 5 @@ fun n -> (* One stream *)
116 |     let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zlib) in
117 |     let d = Bytesrw_zlib.Zlib.decompress_reads () c in
118 |     assert (Bytes.Reader.to_string d = snd a30_zlib);
119 |   end;
120 |   begin repeat 5 @@ fun n -> (* One stream with expected leftover data *)
121 |     let data = fst a30_zlib ^ more in
122 |     let c = Bytes.Reader.of_string ~slice_length:n data in
123 |     let d = Bytesrw_zlib.Zlib.decompress_reads ~leftover:true () c in
124 |     assert (Bytes.Reader.to_string d = snd a30_zlib);
125 |     assert (Bytes.Reader.pos c = String.length (fst a30_zlib));
126 |     assert (Bytes.Reader.to_string c = more);
127 |   end;
128 |   ()
129 | 
130 | let test_zlib_decompress_writes =
131 |   Test.test "Bytesrw_zlib.Zlib.decompress_writes" @@ fun () ->
132 |   begin repeat 5 @@ fun n -> (* One stream *)
133 |     let b = Buffer.create 255 in
134 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
135 |     let d = Bytesrw_zlib.Zlib.decompress_writes () ~eod:true w in
136 |     let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zlib) in
137 |     let () = Bytes.Writer.write_reader ~eod:true d c in
138 |     assert (Buffer.contents b = snd a30_zlib);
139 |   end
140 | 
141 | let test_zlib_compress_reads =
142 |   Test.test "Bytesrw_zlib.Zlib.compress_reads" @@ fun () ->
143 |   begin repeat 5 @@ fun n ->
144 |     let data = snd a30_zlib in
145 |     let d = Bytes.Reader.of_string ~slice_length:n data in
146 |     let c = Bytesrw_zlib.Zlib.compress_reads () ~slice_length:n d in
147 |     let trip = Bytesrw_zlib.Zlib.decompress_reads () ~slice_length:n c in
148 |     assert (Bytes.Reader.to_string trip = data)
149 |   end
150 | 
151 | let test_zlib_compress_writes =
152 |   Test.test "Bytesrw_zlib.Zlib.compress_writes" @@ fun () ->
153 |   begin repeat 5 @@ fun n ->
154 |     let b = Buffer.create 255 in
155 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
156 |     let trip =
157 |       Bytesrw_zlib.Zlib.compress_writes () ~slice_length:n ~eod:true @@
158 |       Bytesrw_zlib.Zlib.decompress_writes () ~slice_length:n ~eod:true @@
159 |       w
160 |     in
161 |     let data = snd a30_zlib in
162 |     let r = Bytes.Reader.of_string ~slice_length:n data in
163 |     let () = Bytes.Writer.write_reader ~eod:true trip r in
164 |     assert (Buffer.contents b = data)
165 |   end
166 | 
167 | let test_gzip_decompress_reads =
168 |   Test.test "Bytesrw_zlib.Gzip.decompress_reads" @@ fun () ->
169 |   begin repeat 5 @@ fun n -> (* One member *)
170 |     let c = Bytes.Reader.of_string ~slice_length:n (fst a_gz) in
171 |     let d0 = Bytesrw_zlib.Gzip.decompress_reads () c in
172 |     assert (Bytes.Reader.to_string d0 = snd a_gz);
173 |   end;
174 |   begin repeat 5 @@ fun n -> (* Two members, one shot *)
175 |     let data = (fst a_gz) ^ (fst b_gz) in
176 |     let c = Bytes.Reader.of_string ~slice_length:n data in
177 |     let d0 = Bytesrw_zlib.Gzip.decompress_reads () c in
178 |     assert (Bytes.Reader.to_string d0 = snd a_gz ^ snd b_gz);
179 |   end;
180 |   begin repeat 5 @@ fun n -> (* One member, leftover data *)
181 |     let data = (fst a_gz) ^ more in
182 |     let c = Bytes.Reader.of_string ~slice_length:n data in
183 |     let d0 = Bytesrw_zlib.Gzip.decompress_reads ~all_members:false () c in
184 |     assert (Bytes.Reader.to_string d0 = snd a_gz);
185 |     assert (Bytes.Reader.pos c = String.length (fst a_gz));
186 |     assert (Bytes.Reader.to_string c = more)
187 |   end;
188 |   begin repeat 5 @@ fun n -> (* One member, empty leftover data *)
189 |     let c = Bytes.Reader.of_string ~slice_length:n (fst b_gz) in
190 |     let d0 = Bytesrw_zlib.Gzip.decompress_reads ~all_members:false () c in
191 |     assert (Bytes.Reader.to_string d0 = snd b_gz);
192 |     assert (Bytes.Reader.pos c = String.length (fst b_gz));
193 |     assert (Bytes.Reader.to_string c = "")
194 |   end;
195 |   begin repeat 5 @@ fun n -> (* Two members, two shots *)
196 |     let cdata = (fst a_gz) ^ (fst b_gz) in
197 |     let dlen = String.length cdata in
198 |     let c = Bytes.Reader.of_string ~slice_length:n (cdata ^ more) in
199 |     let d0 = Bytesrw_zlib.Gzip.decompress_reads ~all_members:false () c in
200 |     assert (Bytes.Reader.to_string d0 = snd a_gz);
201 |     assert (Bytes.Reader.pos c = String.length (fst a_gz));
202 |     let d1 = Bytesrw_zlib.Gzip.decompress_reads ~all_members:false () c in
203 |     assert (Bytes.Reader.to_string d1 = snd b_gz);
204 |     assert (Bytes.Reader.pos c = dlen);
205 |     assert (Bytes.Reader.to_string c = more);
206 |   end;
207 |   ()
208 | 
209 | let test_gzip_decompress_writes =
210 |   Test.test "Bytesrw_zlib.Gzip.decompress_writes" @@ fun () ->
211 |   begin repeat 5 @@ fun n ->
212 |     let b = Buffer.create 255 in
213 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
214 |     let d = Bytesrw_zlib.Gzip.decompress_writes () ~eod:true w in
215 |     let c = Bytes.Reader.of_string ~slice_length:n (fst a_gz) in
216 |     let () = Bytes.Writer.write_reader ~eod:true d c in
217 |     assert (Buffer.contents b = snd a_gz);
218 |   end;
219 |   begin repeat 5 @@ fun n ->
220 |     let data = (fst a_gz) ^ (fst b_gz) in
221 |     let res = (snd a_gz) ^ (snd b_gz) in
222 |     let b = Buffer.create 255 in
223 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
224 |     let d = Bytesrw_zlib.Gzip.decompress_writes () ~eod:true w in
225 |     let c = Bytes.Reader.of_string ~slice_length:n data in
226 |     let () = Bytes.Writer.write_reader ~eod:true d c in
227 |     assert (Buffer.contents b = res);
228 |   end
229 | 
230 | let test_gzip_compress_reads =
231 |   Test.test "Bytesrw_zlib.Gzip.compress_reads" @@ fun () ->
232 |   begin repeat 5 @@ fun n ->
233 |     let data = snd a_gz in
234 |     let d = Bytes.Reader.of_string ~slice_length:n data in
235 |     let c = Bytesrw_zlib.Gzip.compress_reads () ~slice_length:n d in
236 |     let trip = Bytesrw_zlib.Gzip.decompress_reads () ~slice_length:n c in
237 |     assert (Bytes.Reader.to_string trip = data)
238 |   end
239 | 
240 | let test_gzip_compress_writes =
241 |   Test.test "Bytesrw_zlib.Gzip.compress_writes" @@ fun () ->
242 |   begin repeat 5 @@ fun n ->
243 |     let b = Buffer.create 255 in
244 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
245 |     let trip =
246 |       Bytesrw_zlib.Gzip.compress_writes () ~slice_length:n ~eod:true @@
247 |       Bytesrw_zlib.Gzip.decompress_writes () ~slice_length:n ~eod:true @@
248 |       w
249 |     in
250 |     let data = snd a_gz in
251 |     let r = Bytes.Reader.of_string ~slice_length:n data in
252 |     let () = Bytes.Writer.write_reader ~eod:true trip r in
253 |     assert (Buffer.contents b = data)
254 |   end
255 | 
256 | let main () =
257 |   Test.main @@ fun () ->
258 |   Test.log "Using zlib %s" (Bytesrw_zlib.version ());
259 |   Test.autorun ();
260 |   Gc.full_major ()
261 | 
262 | let () = if !Sys.interactive then () else exit (main ())
263 | 


--------------------------------------------------------------------------------
/test/test_zstd.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Bytesrw
  7 | open B0_testing
  8 | 
  9 | let repeat n = Test.range ~kind:"slice_length" ~first:1 ~last:n
 10 | 
 11 | let test_stream_error f =
 12 |   let is_exn = function Bytes.Stream.Error _ -> true | _ -> false in
 13 |   Test.catch f @@ fun fnd -> Test.holds (is_exn fnd)
 14 | 
 15 | (* Test vectors *)
 16 | 
 17 | let a30_zstd = (* Note this compressed data has a checksum. *)
 18 |   "\x28\xb5\x2f\xfd\x04\x58\x45\x00\x00\x10\x61\x61\x01\x00\x0c\xc0\x02\x61\
 19 |    \x36\xf8\xbb", String.make 30 'a'
 20 | 
 21 | let b30_zstd = (* Note this compressed data has a checksum. *)
 22 |   "\x28\xb5\x2f\xfd\x04\x58\x45\x00\x00\x10\x62\x62\x01\x00\x0c\xc0\x02\xb3\
 23 |    \x56\x1f\x2e", String.make 30 'b'
 24 | 
 25 | let more = "moreatthedoor"
 26 | 
 27 | (* Tests *)
 28 | 
 29 | let test_decompress_reads =
 30 |   Test.test "Bytesrw_zstd.decompress_reads" @@ fun () ->
 31 |   begin repeat 5 @@ fun n -> (* one frame *)
 32 |     let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zstd) in
 33 |     let d = Bytesrw_zstd.decompress_reads () ~slice_length:n c in
 34 |     assert (Bytes.Reader.to_string d = snd a30_zstd)
 35 |   end;
 36 |   begin repeat 5 @@ fun n -> (* one frame with unexpected leftover data *)
 37 |     let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zstd ^ more) in
 38 |     let d = Bytesrw_zstd.decompress_reads () ~slice_length:n c in
 39 |     test_stream_error @@ fun () -> Bytes.Reader.to_string d
 40 |   end;
 41 |   begin repeat 5 @@ fun n -> (* one frame with expected leftover data *)
 42 |     let c = fst a30_zstd ^ more in
 43 |     let c = Bytes.Reader.of_string ~slice_length:n c in
 44 |     let d =
 45 |       Bytesrw_zstd.decompress_reads ~all_frames:false () ~slice_length:n c
 46 |     in
 47 |     assert (Bytes.Reader.to_string d = snd a30_zstd);
 48 |     assert (Bytes.Reader.pos c = String.length (fst a30_zstd));
 49 |     assert (Bytes.Reader.to_string c = more);
 50 |   end;
 51 |   begin repeat 5 @@ fun n -> (* two frames, one shot *)
 52 |     let data = (fst a30_zstd) ^ (fst b30_zstd) in
 53 |     let c = Bytes.Reader.of_string ~slice_length:n data in
 54 |     let d = Bytesrw_zstd.decompress_reads () ~slice_length:n c in
 55 |     assert (Bytes.Reader.to_string d = snd a30_zstd ^ snd b30_zstd)
 56 |   end;
 57 |   begin repeat 5 @@ fun n -> (* two frames, two shots *)
 58 |     let data = (fst a30_zstd) ^ (fst b30_zstd) in
 59 |     let c = Bytes.Reader.of_string ~slice_length:n data in
 60 |     let d =
 61 |       Bytesrw_zstd.decompress_reads ~all_frames:false () ~slice_length:n c
 62 |     in
 63 |     assert (Bytes.Reader.to_string d = snd a30_zstd);
 64 |     assert (Bytes.Reader.pos c = String.length (fst a30_zstd));
 65 |     let d =
 66 |       Bytesrw_zstd.decompress_reads ~all_frames:false () ~slice_length:n c
 67 |     in
 68 |     assert (Bytes.Reader.to_string d = snd b30_zstd);
 69 |     assert (Bytes.Reader.to_string c = "");
 70 |   end;
 71 |   ()
 72 | 
 73 | let test_decompress_writes =
 74 |   Test.test "Bytesrw_zstd.decompress_writes" @@ fun () ->
 75 |   begin repeat 5 @@ fun n -> (* one frame *)
 76 |     let b = Buffer.create 255 in
 77 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
 78 |     let d = Bytesrw_zstd.decompress_writes () ~slice_length:n ~eod:true w in
 79 |     let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zstd) in
 80 |     let () = Bytes.Writer.write_reader ~eod:true d c in
 81 |     assert (Buffer.contents b = snd a30_zstd)
 82 |   end;
 83 |   begin repeat 5 @@ fun n -> (* one with unexpected leftover *)
 84 |     let b = Buffer.create 255 in
 85 |     let w = Bytes.Writer.of_buffer ~slice_length:n b in
 86 |     let d = Bytesrw_zstd.decompress_writes () ~slice_length:n ~eod:true w in
 87 |     let c = Bytes.Reader.of_string ~slice_length:n ((fst a30_zstd) ^ more) in
 88 |     test_stream_error @@ fun () -> Bytes.Writer.write_reader ~eod:true d c;
 89 |   end;
 90 |   ()
 91 | 
 92 | let test_compress_reads =
 93 |   Test.test "Bytesrw_zstd.compress_reads" @@ fun () ->
 94 |   repeat 5 @@ fun n ->
 95 |   let data = snd a30_zstd in
 96 |   let d = Bytes.Reader.of_string ~slice_length:n data in
 97 |   let c = Bytesrw_zstd.compress_reads () ~slice_length:n d in
 98 |   let trip = Bytesrw_zstd.decompress_reads () ~slice_length:n c in
 99 |   assert (Bytes.Reader.to_string trip = data)
100 | 
101 | let test_compress_writes =
102 |   Test.test "Bytesrw_zstd.compress_writes" @@ fun () ->
103 |   repeat 5 @@ fun n ->
104 |   let data = snd a30_zstd in
105 |   let b = Buffer.create 255 in
106 |   let w = Bytes.Writer.of_buffer ~slice_length:n b in
107 |   let dw = Bytesrw_zstd.decompress_writes () ~slice_length:n ~eod:true w in
108 |   let c = Bytesrw_zstd.compress_writes () ~slice_length:n ~eod:true dw in
109 |   let rdata = Bytes.Reader.of_string ~slice_length:n data in
110 |   let () = Bytes.Writer.write_reader ~eod:true c rdata in
111 |   assert (Buffer.contents b = data)
112 | 
113 | let test_dictionary_support =
114 |   Test.test "dictionary support" @@ fun () ->
115 |   repeat 5 @@ fun n ->
116 |   let dict = "aaaaaaaa" in
117 |   let data = "aaaaaaaabbbbbbbb" ^ "aaaaaaaa" ^ "aaaaaaaa" ^ "aaaaaaaa"in
118 |   let cdict = Bytesrw_zstd.Cdict.of_binary_string dict in
119 |   let ddict = Bytesrw_zstd.Ddict.of_binary_string dict in
120 |   let datar = Bytes.Reader.of_string data ~slice_length:n in
121 |   let c = Bytesrw_zstd.compress_reads ~dict:cdict () ~slice_length:n datar in
122 |   let d = Bytesrw_zstd.decompress_reads ~dict:ddict () ~slice_length:n c in
123 |   assert (Bytes.Reader.to_string d = data);
124 |   ()
125 | 
126 | let main () =
127 |   Test.main @@ fun () ->
128 |   Test.log "Using libsztd %s" (Bytesrw_zstd.version ());
129 |   Test.autorun ();
130 |   Gc.full_major ()
131 | 
132 | let () = if !Sys.interactive then () else exit (main ())
133 | 


--------------------------------------------------------------------------------
/test/utf8codec.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (* UTF-8 decoder with position tracking using Bytes.Reader.t and the
  7 |    Stdlib codecs.
  8 | 
  9 |    FIXME: integrate the optimizations there were done in Jsont_bytesrw. *)
 10 | 
 11 | open Bytesrw
 12 | 
 13 | (* XXX add these things to Stdlib.Uchar *)
 14 | 
 15 | let uchar_max_utf_8_byte_length = 4
 16 | let uchar_utf_8_byte_decode_length byte = (* or utf_8_byte_length_of_byte *)
 17 |   if byte < 0x80 then 1 else if byte < 0xC2 then 0 else
 18 |   if byte < 0xE0 then 2 else if byte < 0xF0 then 3 else
 19 |   if byte < 0xF5 then 4 else 0
 20 | 
 21 | (* Decoder *)
 22 | 
 23 | let sot = 0x1A0000  (* start of text U+10FFFF + 1 *)
 24 | let eot = 0x1A0001  (*   end of text U+10FFFF + 2 *)
 25 | 
 26 | type decoder =
 27 |   { file : string;
 28 |     reader : Bytes.Reader.t;
 29 |     mutable i : Bytes.t; (* Current input slice. *)
 30 |     mutable i_max : int; (* Maximum byte index in [i]. *)
 31 |     mutable i_next : int; (* Next byte index to read in [i]. *)
 32 |     mutable overlap : Bytes.t; (* Buffer for overlapping decodes. *)
 33 |     mutable u : int; (* Current Unicode scalar value or sot or eot. *)
 34 |     mutable byte_count : int; (* Global byte count. *)
 35 |     mutable line : int; (* Current line number. *)
 36 |     mutable line_start : int; (* Current line first global byte position. *) }
 37 | 
 38 | let make_decoder ?(file = "-") reader =
 39 |   let overlap = Bytes.create uchar_max_utf_8_byte_length in
 40 |   { file; reader; i = overlap (* overwritten by initial refill *);
 41 |     i_max = 0; i_next = 1 (* triggers an initial refill *);
 42 |     overlap; u = sot; byte_count = 0; line = 1; line_start = 0; }
 43 | 
 44 | (* Decoder position and errors *)
 45 | 
 46 | let col_next d = d.byte_count - d.line_start
 47 | let err_loc d line col fmt =
 48 |   Format.kasprintf failwith ("%s:%d:%d: " ^^ fmt) d.file line col
 49 | 
 50 | let err_malformed_utf_8 d =
 51 |   err_loc d d.line (col_next d)
 52 |     "UTF-8 decoding error at input byte %d" d.byte_count
 53 | 
 54 | (* Next character *)
 55 | 
 56 | let[@inline] is_eoslice d = d.i_next > d.i_max
 57 | let[@inline] is_eod d = d.i_max = - 1 (* Only happens on Slice.eod *)
 58 | let[@inline] available d = d.i_max - d.i_next + 1
 59 | let[@inline] next_utf_8_length d =
 60 |   uchar_utf_8_byte_decode_length (Bytes.get_uint8 d.i d.i_next)
 61 | 
 62 | let set_slice d slice =
 63 |   d.i <- Bytes.Slice.bytes slice;
 64 |   d.i_next <- Bytes.Slice.first slice;
 65 |   d.i_max <- d.i_next + Bytes.Slice.length slice - 1
 66 | 
 67 | let rec setup_overlap d start need = match need with
 68 | | 0 ->
 69 |     let slice = match available d with
 70 |     | 0 -> Bytes.Reader.read d.reader
 71 |     | length -> Bytes.Slice.make d.i ~first:d.i_next ~length
 72 |     in
 73 |     d.i <- d.overlap; d.i_next <- 0; d.i_max <- start; slice
 74 | | need ->
 75 |     if is_eoslice d then set_slice d (Bytes.Reader.read d.reader);
 76 |     if is_eod d
 77 |     then (d.byte_count <- d.byte_count - start; err_malformed_utf_8 d);
 78 |     let available = available d in
 79 |     let take = Int.min need available in
 80 |     for i = 0 to take - 1 do
 81 |       Bytes.set d.overlap (start + i) (Bytes.get d.i (d.i_next + i))
 82 |     done;
 83 |     d.i_next <- d.i_next + take; d.byte_count <- d.byte_count + take;
 84 |     setup_overlap d (start + take) (need - take)
 85 | 
 86 | let rec nextc d = match available d with
 87 | | a when a <= 0 ->
 88 |     if is_eod d then d.u <- eot else
 89 |     (set_slice d (Bytes.Reader.read d.reader); nextc d)
 90 | | a when a < uchar_max_utf_8_byte_length && a < next_utf_8_length d ->
 91 |     let s = setup_overlap d 0 (next_utf_8_length d) in nextc d; set_slice d s
 92 | | _ ->
 93 |     let udec = Bytes.get_utf_8_uchar d.i d.i_next in
 94 |     if not (Uchar.utf_decode_is_valid udec) then err_malformed_utf_8 d else
 95 |     let u = Uchar.to_int (Uchar.utf_decode_uchar udec) in
 96 |     let ulen = Uchar.utf_decode_length udec in
 97 |     d.i_next <- d.i_next + ulen; d.byte_count <- d.byte_count + ulen;
 98 |     begin match u with
 99 |     | 0x000D (* CR *) -> d.line_start <- d.byte_count; d.line <- d.line + 1;
100 |     | 0x000A (* LF *) ->
101 |         d.line_start <- d.byte_count;
102 |         if d.u <> 0x000D then d.line <- d.line + 1;
103 |     | _ -> ()
104 |     end;
105 |     d.u <- u
106 | 
107 | (* UTF-8 encoder. *)
108 | 
109 | type encoder =
110 |   { writer : Bytes.Writer.t; (* Destination of bytes. *)
111 |     o : Stdlib.Bytes.t; (* Buffer for slices. *)
112 |     o_max : int; (* Max index in [o]. *)
113 |     mutable o_next : int; (* Next writable index in [o]. *) }
114 | 
115 | let[@inline] rem_len e = e.o_max - e.o_next + 1
116 | 
117 | let make_encoder ?buf:(o = Bytes.create 65535) writer =
118 |   let len = Bytes.length o in
119 |   if len < 4 then invalid_arg "encoder bytes buffer length < 4" else
120 |   { writer; o; o_max = len - 1; o_next = 0 }
121 | 
122 | let flush e =
123 |   Bytes.Writer.write e.writer (Bytes.Slice.make e.o ~first:0 ~length:e.o_next);
124 |   e.o_next <- 0
125 | 
126 | let encode_eot e = flush e; Bytes.Writer.write_eod e.writer
127 | let encode_char e c =
128 |   if e.o_next > e.o_max then flush e;
129 |   Bytes.set e.o e.o_next c; e.o_next <- e.o_next + 1
130 | 
131 | let rec encode_uchar e u =
132 |   let rem_len = rem_len e in
133 |   if rem_len < 4 && Uchar.utf_8_byte_length u > rem_len
134 |   then (flush e; encode_uchar e u)
135 |   else (e.o_next <- e.o_next + Bytes.set_utf_8_uchar e.o e.o_next u)
136 | 
137 | let rec encode_substring e s first length =
138 |   if length = 0 then () else
139 |   let len = Int.min (rem_len e) length in
140 |   if len = 0 then (flush e; encode_substring e s first length) else
141 |   begin
142 |     Stdlib.Bytes.blit_string s first e.o e.o_next len;
143 |     e.o_next <- e.o_next + len;
144 |     encode_substring e s (first + len) (length - len)
145 |   end
146 | 
147 | let encode_string e s = encode_substring e s 0 (String.length s)
148 | 
149 | (* Testing *)
150 | 
151 | let uchars_of_string s =
152 |   let rec loop acc s i max =
153 |     if i > max then List.rev acc else
154 |     let d = String.get_utf_8_uchar s i in
155 |     let u = Uchar.to_int (Uchar.utf_decode_uchar d) in
156 |     loop (u :: acc) s (i + Uchar.utf_decode_length d) max
157 |   in
158 |   loop [] s 0 (String.length s - 1)
159 | 
160 | let ustr u = Printf.sprintf "U+%04X" u
161 | let strf = Printf.sprintf
162 | let exp exp fnd =
163 |   if (exp <> fnd)
164 |   then failwith (strf "expected %s found: %s" (ustr exp) (ustr fnd))
165 | 
166 | let rec assert_dec d = function
167 | | [] -> nextc d; exp eot d.u
168 | | u :: us -> nextc d; exp u d.u; assert_dec d us
169 | 
170 | let test s =
171 |   let uchars = uchars_of_string s in
172 |   for slice_length = 1 to Int.max (String.length s) 1 do
173 |     let d = make_decoder (Bytes.Reader.of_string ~slice_length s) in
174 |     exp sot d.u; assert_dec d uchars
175 |   done
176 | 
177 | let uchar_utf8 u =
178 |   let b = Bytes.create (Uchar.utf_8_byte_length u) in
179 |   ignore (Bytes.set_utf_8_uchar b 0 u); Bytes.unsafe_to_string b
180 | 
181 | let test_uchars () =
182 |   let rec loop u =
183 |     if Uchar.equal u Uchar.max then () else
184 |     let s = "abé" ^ (uchar_utf8 u) ^ "🐫" in
185 |     let s' = "abé" ^ (uchar_utf8 u) in
186 |     (test s; test s'; loop (Uchar.succ u))
187 |   in
188 |   loop Uchar.min
189 | 
190 | let test_encoder () =
191 |   let b = Buffer.create 255 in
192 |   let w = Bytes.Writer.of_buffer b in
193 |   let e = make_encoder ~buf:(Bytes.create 4) w in
194 |   encode_char e 'a';
195 |   encode_uchar e (Uchar.of_int 0x1F42B);
196 |   encode_string e "0123456789";
197 |   encode_eot e;
198 |   assert (Buffer.contents b = "a🐫0123456789");
199 |   ()
200 | 
201 | let main () =
202 |   B0_testing.Test.main @@ fun () ->
203 |   test "";
204 |   test "a";
205 |   test "abcd";
206 |   test "abécd";
207 |   test "🐫";
208 |   test_uchars ();
209 |   test_encoder ()
210 | 
211 | let () = if !Sys.interactive then () else exit (main ())
212 | 


--------------------------------------------------------------------------------
/test/xxh3tap.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Bytesrw
  7 | 
  8 | let stdin_reader unix ~slice_length = match unix with
  9 | | true -> Bytesrw_unix.bytes_reader_of_fd ?slice_length Unix.stdin
 10 | | false -> Bytes.Reader.of_in_channel ?slice_length In_channel.stdin
 11 | 
 12 | let stdout_writer unix ~slice_length = match unix with
 13 | | true -> Bytesrw_unix.bytes_writer_of_fd ?slice_length Unix.stdout
 14 | | false -> Bytes.Writer.of_out_channel ?slice_length Out_channel.stdout
 15 | 
 16 | let xxh3_64_reader r = Bytesrw_xxhash.Xxh3_64.(to_hex (reader r))
 17 | let xxh3_64_reads r =
 18 |   let i, hash = Bytesrw_xxhash.Xxh3_64.reads r in
 19 |   i, (fun () -> Bytesrw_xxhash.Xxh3_64.(to_hex (value hash)))
 20 | 
 21 | let xxh3_64_writes w =
 22 |   let w, hash = Bytesrw_xxhash.Xxh3_64.writes w in
 23 |   w, (fun () -> Bytesrw_xxhash.Xxh3_64.(to_hex (value hash)))
 24 | 
 25 | let xxh3_128_reader r = Bytesrw_xxhash.Xxh3_128.(to_hex (reader r))
 26 | let xxh3_128_reads r =
 27 |   let i, hash = Bytesrw_xxhash.Xxh3_128.reads r in
 28 |   i, (fun () -> Bytesrw_xxhash.Xxh3_128.(to_hex (value hash)))
 29 | 
 30 | let xxh3_128_writes w =
 31 |   let w, hash = Bytesrw_xxhash.Xxh3_128.writes w in
 32 |   w, (fun () -> Bytesrw_xxhash.Xxh3_128.(to_hex (value hash)))
 33 | 
 34 | let hash_reader = function
 35 | | `Xxh3_64 -> xxh3_64_reader | `Xxh3_128 -> xxh3_128_reader
 36 | 
 37 | let hash_reads = function
 38 | | `Xxh3_64 -> xxh3_64_reads | `Xxh3_128 -> xxh3_128_reads
 39 | 
 40 | let hash_writes = function
 41 | | `Xxh3_64 -> xxh3_64_writes | `Xxh3_128 -> xxh3_128_writes
 42 | 
 43 | let sink hash processor unix ~slice_length = match processor with
 44 | | `Reader ->
 45 |     let i = stdin_reader unix ~slice_length in
 46 |     i, hash_reader hash i
 47 | | `Writer ->
 48 |     let i = stdin_reader unix ~slice_length in
 49 |     let w, get_hash = hash_writes hash (Bytes.Writer.ignore ()) in
 50 |     let () = Bytes.Writer.write_reader w ~eod:true i in
 51 |     i, get_hash ()
 52 | 
 53 | let filter hash processor unix ~slice_length = match processor with
 54 | | `Reader ->
 55 |     let i = stdin_reader unix ~slice_length in
 56 |     let i, get_hash = hash_reads hash i in
 57 |     let o = stdout_writer unix ~slice_length in
 58 |     Bytes.Writer.write_reader ~eod:true o i;
 59 |     i, get_hash ()
 60 | | `Writer ->
 61 |     let i = stdin_reader unix ~slice_length in
 62 |     let o = stdout_writer unix ~slice_length in
 63 |     let o, get_hash = hash_writes hash o in
 64 |     Bytes.Writer.write_reader ~eod:true o i;
 65 |     i, get_hash ()
 66 | 
 67 | let log_count i = Printf.eprintf "i:%d\n%!" (Bytes.Reader.read_length i)
 68 | 
 69 | let tap mode hash processor slice_length show_count unix =
 70 |   try
 71 |     let i, hash = match mode with
 72 |     | `Sink -> sink hash processor unix ~slice_length
 73 |     | `Filter -> filter hash processor unix ~slice_length
 74 |     in
 75 |     if show_count then log_count i;
 76 |     Printf.eprintf "%s\n%!" hash;
 77 |     Ok 0
 78 |   with
 79 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
 80 | 
 81 | open Cmdliner
 82 | 
 83 | let cmd =
 84 |   let doc = "Rewrite stdin to stdout and report the xxh3-{64,128} of data" in
 85 |   let mode =
 86 |     let c = `Sink, Arg.info ["sink"] ~doc:"Only read stdin." in
 87 |     let d = `Filter, Arg.info ["filter"] ~doc:"Write stdin to stdout." in
 88 |     Arg.(value & vflag `Filter [c; d])
 89 |   in
 90 |   let processor =
 91 |     let r = `Reader, Arg.info ["reader"] ~doc:"Use a byte stream reader tap." in
 92 |     let w = `Writer, Arg.info ["writer"] ~doc:"Use a byte stream writer tap." in
 93 |     Arg.(value & vflag `Reader [r; w])
 94 |   in
 95 |   let slice_length =
 96 |     let doc = "IO byte slices size." in
 97 |     Arg.(value & opt (some int) None & info ["io-size"] ~doc ~docv:"SIZE")
 98 |   in
 99 |   let hash =
100 |     let h64 = `Xxh3_64, Arg.info ["xxh3-64"] ~doc:"Use xxh3-64." in
101 |     let h128 = `Xxh3_128, Arg.info ["xxh3-128"] ~doc:"Use xxh3-128." in
102 |     Arg.(value & vflag `Xxh3_64 [h64; h128])
103 |   in
104 |   let show_count =
105 |     let doc = "Show on $(b,stderr) final amount of bytes read." in
106 |     Arg.(value & flag & info ["show-count"] ~doc)
107 |   in
108 |   let unix =
109 |     let doc = "Use OCaml Unix library I/O instead of Stdlib channels" in
110 |     Arg.(value & flag & info ["unix-io"] ~doc)
111 |   in
112 |   Cmd.v (Cmd.info "xxh3tap" ~version:"%%VERSION%%" ~doc) @@
113 |   Term.(const tap $ mode $ hash $ processor $ slice_length $ show_count $ unix)
114 | 
115 | let main () = Cmd.eval_result' cmd
116 | let () = if !Sys.interactive then () else exit (main ())
117 | 


--------------------------------------------------------------------------------
/test/zstdtrip.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2024 The bytesrw programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Bytesrw
  7 | 
  8 | let stdin_reader unix ~slice_length = match unix with
  9 | | true -> Bytesrw_unix.bytes_reader_of_fd ?slice_length Unix.stdin
 10 | | false -> Bytes.Reader.of_in_channel ?slice_length In_channel.stdin
 11 | 
 12 | let stdout_writer unix ~slice_length = match unix with
 13 | | true -> Bytesrw_unix.bytes_writer_of_fd ?slice_length Unix.stdout
 14 | | false -> Bytes.Writer.of_out_channel ?slice_length Out_channel.stdout
 15 | 
 16 | let filter_stdio_with_reader_filter unix ~slice_length filter =
 17 |   let i = stdin_reader unix ~slice_length in
 18 |   let o = stdout_writer unix ~slice_length in
 19 |   Bytes.Writer.write_reader ~eod:true o (filter i);
 20 |   i, o
 21 | 
 22 | let filter_stdio_with_writer_filter unix ~slice_length filter =
 23 |   let i = stdin_reader unix ~slice_length in
 24 |   let o = stdout_writer unix ~slice_length in
 25 |   Bytes.Writer.write_reader ~eod:true (filter o) i;
 26 |   i, o
 27 | 
 28 | let decompress processor params unix ~slice_length = match processor with
 29 | | `Reader ->
 30 |     let d = Bytesrw_zstd.decompress_reads ~params () in
 31 |     filter_stdio_with_reader_filter unix ~slice_length d
 32 | | `Writer ->
 33 |     let d = Bytesrw_zstd.decompress_writes ~params () ~eod:true in
 34 |     filter_stdio_with_writer_filter unix ~slice_length d
 35 | 
 36 | let compress processor params unix ~slice_length = match processor with
 37 | | `Reader ->
 38 |     let c = Bytesrw_zstd.compress_reads ~params () in
 39 |     filter_stdio_with_reader_filter unix ~slice_length c
 40 | | `Writer ->
 41 |     let c = Bytesrw_zstd.compress_writes ~params () ~eod:true in
 42 |     filter_stdio_with_writer_filter unix ~slice_length c
 43 | 
 44 | let log_count i o =
 45 |   let i = Bytes.Reader.read_length i in
 46 |   let o = Bytes.Writer.written_length o in
 47 |   let pct = Float.to_int ((float o /. float i) *. 100.) in
 48 |   Printf.eprintf "i:%d o:%d o/i:%d%%\n%!" i o pct
 49 | 
 50 | let trip mode clevel no_checksum processor slice_length show_count unix =
 51 |   try
 52 |     let i, o = match mode with
 53 |     | `Decompress ->
 54 |         let params = Bytesrw_zstd.Dctx_params.make () in
 55 |         decompress processor params unix ~slice_length
 56 |     | `Compress ->
 57 |         let checksum = not no_checksum in
 58 |         let params = Bytesrw_zstd.Cctx_params.make ~clevel ~checksum () in
 59 |         compress processor params unix ~slice_length
 60 |     in
 61 |     if show_count then log_count i o;
 62 |     Ok 0
 63 |   with
 64 |   | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e
 65 | 
 66 | open Cmdliner
 67 | 
 68 | let cmd =
 69 |   let doc = "Zstd (De)compression from stdin to stdout" in
 70 |   let mode =
 71 |     let c = `Compress, Arg.info ["z"; "compress"] ~doc:"Compress." in
 72 |     let d = `Decompress, Arg.info ["d"; "decompress"] ~doc:"Decompress." in
 73 |     Arg.(value & vflag `Compress [c; d])
 74 |   in
 75 |   let processor =
 76 |     let r =
 77 |       `Reader, Arg.info ["reader"] ~doc:"Use a byte stream reader processor."
 78 |     in
 79 |     let w =
 80 |       `Writer, Arg.info ["writer"] ~doc:"Use a byte stream writer processor."
 81 |     in
 82 |     Arg.(value & vflag `Reader [r; w])
 83 |   in
 84 |   let slice_length =
 85 |     let doc = "IO byte slices size." in
 86 |     Arg.(value & opt (some int) None & info ["io-size"] ~doc ~docv:"SIZE")
 87 |   in
 88 |   let clevel =
 89 |     let doc =
 90 |       Printf.sprintf "Use compression level $(docv) (%d-%d)"
 91 |         (Bytesrw_zstd.min_clevel ()) (Bytesrw_zstd.max_clevel ())
 92 |     in
 93 |     Arg.(value & opt int (Bytesrw_zstd.default_clevel ()) &
 94 |          info ["l"] ~doc ~docv:"LEVEL")
 95 |   in
 96 |   let no_checksum =
 97 |     let doc = "Do not add integrity checksums" in
 98 |     Arg.(value & flag & info ["no-check"] ~doc)
 99 |   in
100 |   let show_count =
101 |     let doc = "Show on $(b,stderr) final amount of bytes read and written." in
102 |     Arg.(value & flag & info ["show-count"] ~doc)
103 |   in
104 |   let unix =
105 |     let doc = "Use OCaml Unix library I/O instead of Stdlib channels" in
106 |     Arg.(value & flag & info ["unix-io"] ~doc)
107 |   in
108 |   Cmd.v (Cmd.info "zstdtrip" ~version:"%%VERSION%%" ~doc) @@
109 |   Term.(const trip $ mode $ clevel $ no_checksum $ processor $
110 |         slice_length $ show_count $ unix)
111 | 
112 | let main () = Cmd.eval_result' cmd
113 | let () = if !Sys.interactive then () else exit (main ())
114 | 


--------------------------------------------------------------------------------