├── .gitignore ├── .merlin ├── B0.ml ├── BRZO ├── CHANGES.md ├── DEVEL.md ├── LICENSE.md ├── README.md ├── _tags ├── doc ├── cookbook.mld ├── index.mld ├── notes.mld └── tutorial.mld ├── myocamlbuild.ml ├── opam ├── pkg ├── META └── pkg.ml ├── src ├── blake3 │ ├── bytesrw_blake3.ml │ ├── bytesrw_blake3.mli │ ├── bytesrw_blake3.mllib │ ├── bytesrw_blake3_stubs.c │ └── libbytesrw_blake3_stubs.clib ├── bytesrw.ml ├── bytesrw.mli ├── bytesrw.mllib ├── bytesrw_fmt.ml ├── bytesrw_fmt.mli ├── bytesrw_hex.ml ├── bytesrw_hex.mli ├── bytesrw_utf.ml ├── bytesrw_utf.mli ├── md │ ├── bytesrw_md.ml │ ├── bytesrw_md.mli │ ├── bytesrw_md.mllib │ ├── bytesrw_md_stubs.c │ └── libbytesrw_md_stubs.clib ├── unix │ ├── bytesrw_unix.ml │ ├── bytesrw_unix.mli │ └── bytesrw_unix.mllib ├── xxhash │ ├── bytesrw_xxhash.ml │ ├── bytesrw_xxhash.mli │ ├── bytesrw_xxhash.mllib │ ├── bytesrw_xxhash_stubs.c │ └── libbytesrw_xxhash_stubs.clib ├── zlib │ ├── bytesrw_zlib.ml │ ├── bytesrw_zlib.mli │ ├── bytesrw_zlib.mllib │ ├── bytesrw_zlib_stubs.c │ └── libbytesrw_zlib_stubs.clib └── zstd │ ├── bytesrw_zstd.ml │ ├── bytesrw_zstd.mli │ ├── bytesrw_zstd.mllib │ ├── bytesrw_zstd_stubs.c │ └── libbytesrw_zstd_stubs.clib └── test ├── blake3tap.ml ├── cookbook.ml ├── gziptrip.ml ├── quickstart.ml ├── test_blake3.ml ├── test_bytesrw.ml ├── test_md.ml ├── test_utf.ml ├── test_xxhash.ml ├── test_zlib.ml ├── test_zstd.ml ├── utf8codec.ml ├── xxh3tap.ml └── zstdtrip.ml /.gitignore: -------------------------------------------------------------------------------- 1 | _b0 2 | _build 3 | tmp 4 | *.install -------------------------------------------------------------------------------- /.merlin: -------------------------------------------------------------------------------- 1 | PKG b0.kit 2 | S src/** 3 | S test/** 4 | B _b0/** -------------------------------------------------------------------------------- /B0.ml: -------------------------------------------------------------------------------- 1 | open B0_kit.V000 2 | 3 | (* Library names *) 4 | 5 | let bytesrw = B0_ocaml.libname "bytesrw" 6 | let bytesrw_blake3 = B0_ocaml.libname "bytesrw.blake3" 7 | let bytesrw_md = B0_ocaml.libname "bytesrw.md" 8 | let bytesrw_unix = B0_ocaml.libname "bytesrw.unix" 9 | let bytesrw_xxhash = B0_ocaml.libname "bytesrw.xxhash" 10 | let bytesrw_zlib = B0_ocaml.libname "bytesrw.zlib" 11 | let bytesrw_zstd = B0_ocaml.libname "bytesrw.zstd" 12 | 13 | let b0_std = B0_ocaml.libname "b0.std" 14 | let unix = B0_ocaml.libname "unix" 15 | let cmdliner = B0_ocaml.libname "cmdliner" 16 | 17 | (* Libraries *) 18 | 19 | let bytesrw_lib = 20 | let srcs = [ `Dir ~/"src" ] in 21 | B0_ocaml.lib bytesrw ~srcs 22 | 23 | let bytesrw_blake3_lib = 24 | let doc = "BLAKE3 hashes" in 25 | let srcs = [ `Dir ~/"src/blake3" ] in 26 | let c_requires = Cmd.arg "-lblake3" in 27 | let requires = [bytesrw] and exports = [bytesrw] in 28 | B0_ocaml.lib bytesrw_blake3 ~srcs ~requires ~exports ~c_requires ~doc 29 | 30 | let bytesrw_md_lib = 31 | let doc = "SHA{1,2} hashes" in 32 | let srcs = [ `Dir ~/"src/md" ] in 33 | let c_requires = Cmd.arg "-lmd" in 34 | let requires = [bytesrw] and exports = [bytesrw] in 35 | B0_ocaml.lib bytesrw_md ~srcs ~requires ~exports ~c_requires ~doc 36 | 37 | let bytesrw_unix_lib = 38 | let srcs = [ `Dir ~/"src/unix" ] in 39 | let requires = [bytesrw; unix] and exports = [bytesrw] in 40 | B0_ocaml.lib bytesrw_unix ~srcs ~requires ~exports 41 | 42 | let bytesrw_xxhash_lib = 43 | let doc = "XXH hashes" in 44 | let srcs = [ `Dir ~/"src/xxhash" ] in 45 | let c_requires = Cmd.arg "-lxxhash" in 46 | let requires = [bytesrw] and exports = [bytesrw]in 47 | B0_ocaml.lib bytesrw_xxhash ~srcs ~requires ~exports ~c_requires ~doc 48 | 49 | let bytesrw_zlib_lib = 50 | let doc = "deflate, zlib and gzip streams" in 51 | let srcs = [ `Dir ~/"src/zlib" ] in 52 | let c_requires = Cmd.arg "-lz" in 53 | let requires = [bytesrw] and exports = [bytesrw] in 54 | B0_ocaml.lib bytesrw_zlib ~srcs ~requires ~exports ~c_requires ~doc 55 | 56 | let bytesrw_zstd_lib = 57 | let doc = "zstd streams" in 58 | let srcs = [ `Dir ~/"src/zstd" ] in 59 | let c_requires = Cmd.arg "-lzstd" in 60 | let requires = [bytesrw] and exports = [bytesrw] in 61 | B0_ocaml.lib bytesrw_zstd ~srcs ~requires ~exports ~c_requires ~doc 62 | 63 | (* Tests *) 64 | 65 | let test ?(requires = []) = 66 | B0_ocaml.test ~requires:(b0_std :: bytesrw :: requires) 67 | 68 | let utf8codec = test ~/"test/utf8codec.ml" ~long:true 69 | 70 | let test_quickstart = 71 | test ~/"test/quickstart.ml" ~requires:[bytesrw_zstd] ~run:false 72 | 73 | let test_cookbook = 74 | let requires = [bytesrw_zstd; bytesrw_blake3] in 75 | test ~/"test/cookbook.ml" ~requires ~run:false 76 | 77 | let test_bytesrw = test ~/"test/test_bytesrw.ml" ~requires:[] 78 | let test_utf = test ~/"test/test_utf.ml" 79 | let test_blake3 = test ~/"test/test_blake3.ml" ~requires:[bytesrw_blake3] 80 | let test_blake3 = test ~/"test/test_md.ml" ~requires:[bytesrw_md] 81 | let test_xxhash = test ~/"test/test_xxhash.ml" ~requires:[bytesrw_xxhash] 82 | let test_zlib = test ~/"test/test_zlib.ml" ~requires:[bytesrw_zlib] 83 | let test_zstd = test ~/"test/test_zstd.ml" ~requires:[bytesrw_zstd] 84 | 85 | let tool_requires = [cmdliner; unix; bytesrw_unix] 86 | 87 | let blake3tap = 88 | let doc = "Hash stdin with blake3" in 89 | let requires = bytesrw_blake3 :: tool_requires in 90 | test ~/"test/blake3tap.ml" ~run:false ~requires ~doc 91 | 92 | let xxh3tap = 93 | let doc = "Hash stdin with xxh3" in 94 | let requires = bytesrw_xxhash :: tool_requires in 95 | test ~/"test/xxh3tap.ml" ~run:false ~requires ~doc 96 | 97 | let gziptrip = 98 | let doc = "Gzip (De)compression from stdin to stdout" in 99 | let requires = bytesrw_zlib :: tool_requires in 100 | test ~/"test/gziptrip.ml" ~run:false ~requires ~doc 101 | 102 | let zstdtrip = 103 | let doc = "Zstd (De)compression from stdin to stdout" in 104 | let requires = bytesrw_zstd :: tool_requires in 105 | test ~/"test/zstdtrip.ml" ~run:false ~requires ~doc 106 | 107 | (* Packs *) 108 | 109 | let default = 110 | let meta = 111 | B0_meta.empty 112 | |> ~~ B0_meta.authors ["The bytesrw programmers"] 113 | |> ~~ B0_meta.maintainers ["Daniel Bünzli "] 114 | |> ~~ B0_meta.homepage "https://erratique.ch/software/bytesrw" 115 | |> ~~ B0_meta.online_doc "https://erratique.ch/software/bytesrw/doc" 116 | |> ~~ B0_meta.licenses ["ISC"] 117 | |> ~~ B0_meta.repo "git+https://erratique.ch/repos/bytesrw.git" 118 | |> ~~ B0_meta.issues "https://github.com/dbuenzli/bytesrw/issues" 119 | |> ~~ B0_meta.description_tags 120 | ["bytes"; "streaming"; "zstd"; "zlib"; "gzip"; "deflate"; 121 | "sha1"; "sha2"; "compression"; "hashing"; 122 | "utf"; "xxhash"; "blake3"; "org:erratique"; ] 123 | |> ~~ B0_opam.build 124 | {|[["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%" 125 | "--with-conf-libblake3" "%{conf-libblake3:installed}%" 126 | "--with-conf-libmd" "%{conf-libmd:installed}%" 127 | "--with-conf-xxhash" "%{conf-xxhash:installed}%" 128 | "--with-conf-zlib" "%{conf-zlib:installed}%" 129 | "--with-conf-zstd" "%{conf-zstd:installed}%"]]|} 130 | |> ~~ B0_opam.depopts ["conf-xxhash", ""; 131 | "conf-zlib", ""; 132 | "conf-zstd", ""; 133 | "conf-libmd", ""; 134 | "conf-libblake3", ""; ] 135 | |> ~~ B0_opam.conflicts [ "conf-zstd", {|< "1.3.8"|}] (* should be 1.4 *) 136 | |> ~~ B0_opam.depends 137 | [ "ocaml", {|>= "4.14.0"|}; 138 | "ocamlfind", {|build|}; 139 | "ocamlbuild", {|build|}; 140 | "topkg", {|build & >= "1.0.3"|}; 141 | ] 142 | |> B0_meta.tag B0_opam.tag 143 | in 144 | B0_pack.make "default" ~doc:"The bytesrw package" ~meta ~locked:true @@ 145 | B0_unit.list () 146 | -------------------------------------------------------------------------------- /BRZO: -------------------------------------------------------------------------------- 1 | (srcs-x pkg test tmp) -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | 2 | - Fix `Bytesrw_xxhash.Xxh64.{to_hex,pp}`. Leading zeros 3 | were not being printed (#5). 4 | - Change unuseful signature of `Slice.break`: do not return 5 | `None` if any of `Slice.take` or `Slice.drop` does. Simply 6 | return the result of both operations. 7 | - Fix wrong bound checks in `Slice.{sub,make}[_or_eod]`. The functions 8 | now behave like `Bytes.sub` as far as indexing is allowed. Thanks 9 | to Adrián Montesinos González for the report and suggesting the fix (#4). 10 | - `bytesrw.*` libraries are made to export `bytesrw`. 11 | 12 | v0.1.0 2024-11-28 Zagreb 13 | ------------------------ 14 | 15 | First release. 16 | 17 | Supported by a grant from the OCaml Software Foundation. 18 | -------------------------------------------------------------------------------- /DEVEL.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | b0 test 4 | 5 | # Benchmarking 6 | 7 | hyperfine "$(b0 --path -- zstdtrip) -d < tmp/webster.zst > /dev/null" 8 | hyperfine 'zstd -c -d < tmp/webster.zst > /dev/null' 9 | 10 | hyperfine "$(b0 --path -- gziptrip) -d < tmp/webster.gz > /dev/null" 11 | hyperfine 'gunzip -c < tmp/webster.gz > /dev/null' 12 | 13 | hyperfine "$(b0 --path -- xxh3tap) --sink < tmp/webster" 14 | hyperfine 'xxhsum -H3 < tmp/webster' 15 | 16 | hyperfine "$(b0 --path -- blake3tap) --sink < tmp/webster" 17 | hyperfine 'b3sum < tmp/webster' 18 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2024 The bytesrw programmers 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 10 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 12 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 14 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 | PERFORMANCE OF THIS SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Bytesrw – Composable byte stream readers and writers for OCaml 2 | ============================================================== 3 | 4 | Bytesrw extends the OCaml `Bytes` module with composable, memory 5 | efficient, byte stream readers and writers compatible with effect 6 | based concurrency. 7 | 8 | Except for byte slice life-times, these abstractions intentionally 9 | separate away ressource management and the specifics of reading and 10 | writing bytes. 11 | 12 | Bytesrw distributed under the ISC license. It has no dependencies. 13 | 14 | Optional support for compressed and hashed bytes depend, at your wish, on 15 | the C [`zlib`], [`libzstd`], [`blake3`], [`libmd`], [`xxhash`] and 16 | libraries. 17 | 18 | [`blake3`]: https://blake3.io 19 | [`libzstd`]: http://zstd.net 20 | [`libmd`]: https://www.hadrons.org/software/libmd/ 21 | [`xxhash`]: https://xxhash.com/ 22 | [`zlib`]: https://zlib.net 23 | 24 | Homepage: 25 | 26 | ## Installation 27 | 28 | Bytesrw can be installed with `opam` 29 | 30 | opam install bytesrw 31 | 32 | # Compression support 33 | opam install bytesrw conf-zlib conf-zstd 34 | 35 | # Hashing support 36 | opam install bytesrw conf-libblake3 conf-libmd conf-xxhash 37 | 38 | If you don't use `opam` consult the [`opam`](opam) file for build 39 | instructions. 40 | 41 | ## Documentation 42 | 43 | The documentation can be consulted [online] or via `odig doc bytesrw`. 44 | 45 | Questions are welcome but better asked on the [OCaml forum] than on the 46 | issue tracker. 47 | 48 | [online]: https://erratique.ch/software/bytesrw/doc 49 | [OCaml forum]: https://discuss.ocaml.org/ 50 | 51 | ## Examples 52 | 53 | A few examples can be found in the [test](test/) directory. 54 | 55 | ## Acknowledgments 56 | 57 | A grant from the [OCaml Software Foundation] helped to bring the first 58 | public release of `bytesrw`. 59 | 60 | [OCaml Software Foundation]: http://ocaml-sf.org/ 61 | -------------------------------------------------------------------------------- /_tags: -------------------------------------------------------------------------------- 1 | true : bin_annot, safe_string 2 | <_b0> : -traverse 3 | : include 4 | 5 | : include 6 | : package(unix) 7 | 8 | : include 9 | : use_libblake3 10 | : record_bytesrw_blake3_stubs 11 | : link_bytesrw_blake3_stubs_archive 12 | : use_libblake3 13 | 14 | : include 15 | : use_libmd 16 | : record_bytesrw_md_stubs 17 | : link_bytesrw_md_stubs_archive 18 | : use_libmd 19 | 20 | : include 21 | : use_libxxhash 22 | : record_bytesrw_xxhash_stubs 23 | : link_bytesrw_xxhash_stubs_archive 24 | : use_libxxhash 25 | 26 | : include 27 | : use_zlib 28 | : record_bytesrw_zlib_stubs 29 | : link_bytesrw_zlib_stubs_archive 30 | : use_zlib 31 | 32 | : include 33 | : use_libzstd 34 | : record_bytesrw_zstd_stubs 35 | : link_bytesrw_zstd_stubs_archive 36 | : use_libzstd 37 | -------------------------------------------------------------------------------- /doc/cookbook.mld: -------------------------------------------------------------------------------- 1 | {0 [Bytesrw] cookbook} 2 | 3 | A few convention and recipes for dealing with byte stream reader and writers. 4 | 5 | {1:conventions Contracts and conventions} 6 | 7 | {2:convention_functions Reader and writer function contract} 8 | 9 | {{!Bytesrw.Bytes.Reader.make}The contract} between a reader and its slice 10 | enumerating function. 11 | {{!Bytesrw.Bytes.Writer.make}The contract} 12 | between a writer and its slice iterating function. 13 | 14 | {2:convention_client Reader and writer client contracts} 15 | 16 | {{!Bytesrw.Bytes.Reader.read}The contract} between a reader and its 17 | client. {{!Bytesrw.Bytes.Writer.write}The contract} between a 18 | writer and its client. 19 | 20 | {2:convention_filters Reader and writer filters conventions} 21 | 22 | The {{!Bytesrw.Bytes.Reader.filter}reader 23 | filter conventions} and the {{!Bytesrw.Bytes.Writer.filters}writer 24 | filter conventions}. 25 | 26 | {1:string_filtering Applying filters to strings} 27 | 28 | {{!Bytesrw.Bytes.Reader.filters}Reader filters} 29 | can easily be applied to strings with {!Bytesrw.Bytes.Reader.filter_string}: 30 | 31 | {[ 32 | let id s = 33 | let filters = Bytesrw_zstd.[compress_reads (); decompress_reads ()] in 34 | Bytes.Reader.filter_string filters s 35 | ]} 36 | 37 | This can also be done with {{!Bytesrw.Bytes.Writer.filters}writer filters} 38 | by using {!Bytesrw.Bytes.Writer.filter_string}: 39 | {[ 40 | let id s = 41 | let filters = Bytesrw_zstd.[decompress_writes (); compress_writes ()] in 42 | Bytes.Writer.filter_string filters s 43 | ]} 44 | 45 | {1:checksumming Checksumming streams} 46 | 47 | The pattern for checksumming streams is to apply an identity but 48 | side-effecting filter (also known as a {{!tapping}tap}) on a reader or 49 | writer and return a state value. The state value is updated whenever 50 | the resulting reader or writer is read or written. 51 | 52 | The example below shows how to combine {{!Bytesrw_blake3}[BLAKE3]} 53 | checksumming with {{!Bytesrw_zstd}[zstd]} compression. 54 | 55 | This checksums the data before compressing it: 56 | {[ 57 | let blake3_and_compress ~plain = 58 | try 59 | let plain, blake3 = Bytesrw_blake3.Blake3.reads plain in 60 | let comp = Bytesrw_zstd.compress_reads () plain in 61 | let comp = Bytes.Reader.to_string comp in 62 | Ok (comp, Bytesrw_blake3.Blake3.value blake3) 63 | with 64 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 65 | ]} 66 | This checksums the data after decompressing it: 67 | {[ 68 | let decompress_and_blake3 ~comp = 69 | try 70 | let plain = Bytesrw_zstd.decompress_reads () comp in 71 | let r, blake3 = Bytesrw_blake3.Blake3.reads plain in 72 | let s = Bytes.Reader.to_string r in 73 | Ok (s, Bytesrw_blake3.Blake3.value blake3) 74 | with 75 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 76 | ]} 77 | By re-odering the operations in the two functions above you could 78 | equally have applied the checksum on the compressed data or even have 79 | checksums for both compressed and decompressed data. 80 | 81 | {1:limiting Limiting streams} 82 | 83 | If you need to limit resource consumption, readers and writers 84 | can be bounded with {!Bytesrw.Bytes.Reader.limit} and 85 | {!Bytesrw.Bytes.Writer.limit}. 86 | 87 | For example this makes sure that the {{!Bytesrw_zstd}[zstd]} 88 | decompressed size of [comp] does not exceed [quota] bytes. If it does 89 | we still return the truncated decompressed data so far. 90 | 91 | {[ 92 | let limited_decompress ~quota ~comp = 93 | let buf = Buffer.create quota in 94 | try 95 | let plain = Bytesrw_zstd.decompress_reads () comp in 96 | let () = Bytes.Reader.add_to_buffer buf (Bytes.Reader.limit quota plain) in 97 | Ok (`Data (Buffer.contents buf)) 98 | with 99 | | Bytes.Stream.Error (Bytes.Stream.Limit _quota, _) -> 100 | Ok (`Quota_exceeded (Buffer.contents buf)) 101 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 102 | ]} 103 | 104 | {1:tapping Tapping streams} 105 | 106 | Tapping streams allow to observe the slices that fly-by in a stream. 107 | The functions {!Bytesrw.Bytes.Reader.tap} and 108 | {!Bytesrw.Bytes.Writer.tap} can be used to tap readers and 109 | writers. Taps are just identity stream filters with a side-effect. 110 | 111 | Note that readers that result from tap will not tap the 112 | {{!page-tutorial.reader_push_back}push backs} that are performed on 113 | them. This is a good property if you are using taps for 114 | {{!checksumming}checksumming}. 115 | 116 | See also {{!tracing}tracing streams}. 117 | 118 | {1:tracing Tracing streams} 119 | 120 | Tracing streams can easily be done by {{!tapping}tapping} them 121 | with {!Bytesrw.Bytes.Slice.tracer}. For example the following 122 | will trace the slices of [r] or [w] on [stderr]. 123 | 124 | {[ 125 | let rtrace ~id r = Bytes.Reader.tap (Bytes.Slice.tracer ~id) r 126 | let wtrace ~id w = Bytes.Writer.tap (Bytes.Slice.tracer ~id) w 127 | ]} 128 | 129 | {1:errors Adding your own stream error} 130 | 131 | Here is a blueprint you can use to define your own stream error. 132 | 133 | {[ 134 | module Myformat : sig 135 | 136 | (** {1:errors Errors} *) 137 | 138 | type Bytesrw.Bytes.Stream.error += 139 | | Error of string (** *) 140 | (** The type for [myformat] stream errors. *) 141 | 142 | (** {1:streams Streams} *) 143 | 144 | (* … *) 145 | end = struct 146 | type Bytes.Stream.error += Error of string 147 | 148 | let format_error = 149 | let case msg = Error msg in 150 | let message = function Error msg -> msg | _ -> assert false in 151 | Bytes.Stream.make_format_error ~format:"myformat" ~case ~message 152 | 153 | let error e = Bytes.Stream.error format_error e 154 | let reader_error r e = Bytes.Reader.error format_error r e 155 | let writer_error w e = Bytes.Writer.error format_error w e 156 | end 157 | ]} 158 | 159 | In your code you can now selectively pattern match on these errors with 160 | {[ 161 | try … with 162 | | Bytes.Stream.Error (Myformat.Error msg, _) -> … 163 | ]} 164 | More on the design of the stream error system can be found in the 165 | {{!page-notes.errors}design notes}. 166 | -------------------------------------------------------------------------------- /doc/index.mld: -------------------------------------------------------------------------------- 1 | {0 Bytesrw {%html: %%VERSION%%%}} 2 | 3 | Bytesrw extends the OCaml {!Bytes} module with composable, memory 4 | efficient, byte stream {{!Bytesrw.Bytes.Reader}readers} and 5 | {{!Bytesrw.Bytes.Writer}writers} compatible with effect based 6 | concurrency. 7 | 8 | Except for byte slice {{!Bytesrw.Bytes.Slice.validity}life-times}, 9 | these abstractions intentionally separate away ressource management 10 | and the specifics of reading and writing bytes. 11 | 12 | {1:manuals Manuals} 13 | 14 | The following manuals are available: 15 | 16 | {ul 17 | {- The {{!quick}quick start} should do so.} 18 | {- The {{!tutorial}tutorial} is a conceptual overview of byte stream readers 19 | and writers.} 20 | {- The {{!cookbook}Bytesrw cookbook} has a few conventions and 21 | byte stream recipes.} 22 | {- The {{!page-notes}design notes} explains design choices made by the 23 | library.}} 24 | 25 | {1:bytesrw Library [bytesrw]} 26 | 27 | This library has the base definition of byte stream reader and writers as an 28 | extension of the {!Stdlib.Bytes} module. 29 | 30 | {!modules: 31 | Bytesrw} 32 | 33 | {!modules: 34 | Bytesrw.Bytes.Slice 35 | Bytesrw.Bytes.Stream 36 | Bytesrw.Bytes.Reader 37 | Bytesrw.Bytes.Writer} 38 | 39 | The following modules rely only on the [Stdlib]: 40 | 41 | {!modules: 42 | Bytesrw_utf 43 | Bytesrw_hex} 44 | 45 | {1:bytesrw_other_libs Libraries 46 | [bytesrw.{blake3,md,unix,xxhash,zlib,zstd}]} 47 | 48 | Each of these modules lives in its corresponding library. 49 | Compression and hashing libraries depend on their canonical C library. 50 | 51 | {!modules: 52 | Bytesrw_blake3 53 | Bytesrw_md 54 | Bytesrw_unix 55 | Bytesrw_xxhash 56 | Bytesrw_zlib 57 | Bytesrw_zstd 58 | } 59 | 60 | {1:quick Quick start} 61 | 62 | This example compresses standard input to standard output 63 | with [zstd] using either a compressing byte stream reader (pull) or a 64 | compressing byte stream writer (push). 65 | {[ 66 | cat << 'EOF' > quickstart.ml 67 | open Bytesrw 68 | 69 | let stdio_compress_reads () = 70 | try 71 | let stdin = Bytes.Reader.of_in_channel In_channel.stdin in 72 | let stdout = Bytes.Writer.of_out_channel Out_channel.stdout in 73 | let params = Bytesrw_zstd.Cctx_params.make ~checksum:true () in 74 | let zstdr = Bytesrw_zstd.compress_reads ~params () stdin in 75 | Bytes.Writer.write_reader ~eod:true stdout zstdr; 76 | Ok () 77 | with 78 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 79 | | Sys_error e -> Error e 80 | 81 | let stdio_compress_writes () = 82 | try 83 | let stdin = Bytes.Reader.of_in_channel In_channel.stdin in 84 | let stdout = Bytes.Writer.of_out_channel Out_channel.stdout in 85 | let params = Bytesrw_zstd.Cctx_params.make ~checksum:true () in 86 | let zstdw = Bytesrw_zstd.compress_writes ~params () ~eod:true stdout in 87 | Bytes.Writer.write_reader ~eod:true zstdw stdin; 88 | Ok () 89 | with 90 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 91 | | Sys_error e -> Error e 92 | 93 | let main () = 94 | Result.fold ~ok:(Fun.const 0) ~error:(fun e -> prerr_endline e; 1) @@ 95 | if Array.exists (String.equal "-w") Sys.argv 96 | then stdio_compress_writes () 97 | else stdio_compress_reads () 98 | 99 | let () = if !Sys.interactive then () else exit (main ()) 100 | EOF 101 | ]} 102 | It can be compiled and used with: 103 | {[ 104 | ocamlfind ocamlopt -package bytesrw,bytesrw.zstd -linkpkg quickstart.ml 105 | ./a.out < quickstart.ml | zstd -d 106 | ]} 107 | The {{!page-tutorial}tutorial} is a short conceptual overview of byte stream 108 | readers and writers. The {{!page-cookbook}cookbook} has a few more tips and 109 | code snippets. -------------------------------------------------------------------------------- /doc/notes.mld: -------------------------------------------------------------------------------- 1 | {0 Design notes} 2 | 3 | {1:wf Well formed streams} 4 | 5 | The design enforces well-formed streams: sequences of {e non-empty} 6 | slices terminated by a single {!Bytesrw.Bytes.Slice.eod} empty slice. 7 | 8 | We want to ensure that the [read] functions of streams readers do not 9 | return unexpected empty slices and that they are not called again once 10 | they returned {!Bytesrw.Bytes.Slice.eod}. Likewise we want to ensure 11 | that the [write] function of writers is not called with unexpected 12 | empty slices or that they are called again once they were called with 13 | {!Bytesrw.Bytes.Slice.eod}. 14 | 15 | For example the {!Bytesrw.Bytes.Slice} creation function like 16 | {!Bytesrw.Bytes.Slice.make} raise [Invalid_argument] when they 17 | unexpectedly produce an empty slice and those that can produce them 18 | have dedicated names that include [_eod] 19 | (e.g. {!Bytesrw.Bytes.Slice.make_or_eod}). This makes programmers 20 | well-aware of the points in the code that may terminate streams. 21 | 22 | Besides, readers and writers internally overwrite their [read] and [write] 23 | function as soon as {!Bytesrw.Bytes.Slice.eod} is seen so that no 24 | closure is kept longer than it should be. 25 | 26 | {1:errors Error strategy} 27 | 28 | The error strategy may look involved but it caters for: 29 | 30 | - Simple strategies like {!Bytesrw_zstd} where a single case is used 31 | for all errors. 32 | - Complex strategies where a dedicated structured error type is used, 33 | possibly distinguishing between setup/reader/writer errors 34 | 35 | while retaining the ability to pattern match on the error of specific 36 | streams and making sure informative human error messages can always be 37 | produced. 38 | 39 | For users it means that when they use streams: 40 | 41 | - There is a single exception to guard against and this exception can 42 | trivially be turned into an error message. 43 | - Pattern matching on errors can still be used on a per stream format 44 | basis. This is needed for example for stream fromats that deal with 45 | substreams of other formats: since streams can be layered on top of 46 | each other and a lower level stream could error while you are 47 | dealing with your own format, that error should not be caught 48 | (XXX but what about ambiguities ?, should we add auto generated 49 | stream ids ?). 50 | 51 | To enforce good behaviour by stream reader and writers the following 52 | type structure is used: 53 | 54 | - There is an extensible {!Bytesrw.Bytes.Stream.type-error} types to 55 | which stream formats add they own case. 56 | - The {!Bytesrw.Bytes.Stream.Error} exception which holds a value of type 57 | [error] cannot be directly raised as it demands the creation of a 58 | {!Bytesrw.Bytes.Stream.error_context} value for which there is no constructor. 59 | - This means that all raises go through either 60 | {!Bytesrw.Bytes.Stream.val-error}, 61 | or {!Bytesrw.Bytes.Reader.val-error} or {!Bytesrw.Bytes.Writer.val-error}. 62 | These functions ask for a {!Bytesrw.Bytes.Stream.format_error}, 63 | whose creation asks stream formats to identify themelves and describe 64 | how their errors can be stringified. 65 | 66 | Effectively this system can be seen as an OCaml-like exception system 67 | inside the {!Bytesrw.Bytes.Stream.Error} exception. 68 | 69 | {1:primitives Signature of [read/write] primitives} 70 | 71 | - [write] has to write all the given bytes. Having [write] return the number 72 | of written bytes from the slice would be useful for certain scenario, most 73 | notably write filters. It would be the moral equivalent of readers' push 74 | backs, except in this case it would be inconvenient {e for the client} and 75 | leads to things like [really_write], not desirable. 76 | 77 | - [read] does not allow to specify the number bytes to read. Doing so would 78 | entail storing the last slice in the reader. Then if too much bytes are 79 | requested we either need to start buffering internally because of slice 80 | validity or potentially return less. However the latter leads to 81 | [really_read] and buffering again, not desirable. The 82 | {{:https://github.com/dbuenzli/bytesrw/blob/main/test/utf8codec.ml}UTF-8 83 | text codec} example shows that we can really get rid of buffering 84 | except for a tiny buffer for overlapping reads. So it seems better to 85 | let higher-level abstractions handle that if they really need to. 86 | 87 | {1:names Names} 88 | 89 | - We settle on [length]s rather than [size]s because that's what 90 | the [Stdlib] generally uses. 91 | 92 | - We use [first], [last] and [length] for slice indexing and [pos], 93 | for streams. That way the terminology does not overlap which makes 94 | it for a clearer exposition. Incidentally our usage of [pos] is 95 | consistent with the [Stdlib] (though the index vs position terminology 96 | is unfortunate in my opinion). 97 | 98 | {1:resolutions Resolved without strong rationale} 99 | 100 | The following points were resolved for the first release without 101 | a strong rationale. 102 | 103 | {ul 104 | {- Push backs. Using {!Bytesrw.Bytes.Reader.push_back} is allowed at the 105 | beginning of the stream and thus allows to push back to negative positions. 106 | This could have been disallowed (in which case {!Bytesrw.Bytes.Reader.empty} 107 | can become a value again). One use can be to push a header before 108 | a stream, while still having byte positions aligned on the stream 109 | (using {!Bytesrw.Bytes.Reader.val-append} for that would shift positions). 110 | Unclear. {b Update.} In fact it is used in Webs_unix.Fd.bytes_reader for 111 | creating the reader for bodies, after reading the HTTP header we have [n] bytes of 112 | the body in our buffer, we create a reader at position [n] that reads 113 | from the fd and push back the initial bytes. So in fact it seems to be 114 | a good idea in order to start readers at precise positions despite the fact 115 | that you may have pulled too much data.} 116 | {- For now it was decided not to give access to the reader and writer 117 | in [read] and [write] functions. This means they can't access 118 | their properties. Some combinators in [Bytesrw] internally 119 | mutate the [read] field afterwards because they want to access the reader 120 | in the definition of the [read] function, this is not possible with the API. 121 | In filters for reporting read error positions those are generally reported 122 | by the reader that filters so the underlying reader is available for using 123 | with {!Bytesrw.Bytes.Reader.error}. It's a bit less clear for writers.} 124 | {- {!Bytesrw.Bytes.Reader.tap} and {!Bytesrw.Bytes.Writer.tap} are 125 | just {!Fun.id} filters with a side effect, they could be 126 | removed. We choose to keep them for now, it's doesn't seem to hurt 127 | to have a name for that.} 128 | {- System errors. When readers and writers use system calls it's 129 | a bit unclear whether the errors ([Sys_error] and [Unix_error]) should be 130 | transformed into stream errors. For now we decided not to. That is we 131 | consider that to be an error of the ressource rather than an error of 132 | the stream.}} 133 | 134 | {1:upstream Upstreaming} 135 | 136 | {b If we determine that the API works well} we should consider 137 | proposing to upstream the extension of {!Bytesrw.Bytes}. 138 | 139 | In particular it would be an alternative to the 140 | {{:https://github.com/ocaml/RFCs/blob/master/rfcs/modular_io.md}modular 141 | IO proposal}. The core code is mostly trivial it's "just" three data 142 | structures. But added code is more code to maintain. 143 | 144 | However here are a few things to consider: 145 | 146 | {ul 147 | {- The labels with lengths are not abbreviated, we have [~length] and 148 | [~slice_length], the Stdlib uses [~len]. A counter here is to 149 | mention that this corresponds to the name of the fields 150 | of the structures and there is little point to multiply the names 151 | for the same thing (and using [Bytes.Slice.len] would be weird and 152 | inconsistent with [{String,Bytes}.length]).} 153 | 154 | {- While most functions use a [first] + [length] for specifying ranges 155 | a few functions are also provided with the (extremely nice I have to 156 | say) inclusive [?first], [?last] range mechanism which was was 157 | {{:https://github.com/ocaml/ocaml/pull/9893}percieved negatively} by 158 | upstream for strings. For wrong reasons in my opinion revolving 159 | around aesthetics of [+/-1] (counter 1: clarity; counter 2: depends 160 | on your use case, you then also need to add +/-1) and provability 161 | (?). It's a pity since it's a very usable interface that avoids a 162 | lot of footgunish index computations and brings general code reading 163 | clarity by requiring less mental labor. It's also consistent with 164 | OCaml's own inclusive [for] loop and certainly not 165 | {{:https://en.wikipedia.org/wiki/Comparison_of_programming_languages_(array)#Slicing}an 166 | anomaly} in the language design space. See the also the discussion 167 | {{:https://github.com/ocaml/ocaml/pull/10480#issuecomment-887928919}here}.} 168 | 169 | {- Dependencies. A few things will need to be dispatched differently 170 | module wise. 171 | 172 | Basically the channel stuff should move to the [{In,Out}channel] 173 | modules and the buffer stuff to the [Buffer] module. The 174 | formatters won't make it or will end up in the unusably long-winded 175 | [Format]. 176 | 177 | Besides that I don't think any functionality would be lost, in 178 | particular we made sure not to use [Format] in the 179 | {!Bytesrw.Bytes.Stream.type-error} interface. 180 | 181 | One thing I can see not make it is perhaps the nice hex dump 182 | formatter {!Bytesrw.Bytes.pp_hex} which is maybe too much code for 183 | upstream to take.} 184 | {- Safety. There are two places where we don't copy a bytes/string 185 | because we assume that the users abide the Slice validity 186 | rule. Notably {!Bytesrw.Bytes.Reader.of_string}. Upstream will likely want 187 | no unsafe usage. These places can be found with [git grep 'Unsafe is 188 | ok']. We considered prefixing these functions with [unsafe_] but then 189 | it creeps a bit because for example {!Bytesrw.Bytes.Reader.filter_string} 190 | uses it.}} -------------------------------------------------------------------------------- /doc/tutorial.mld: -------------------------------------------------------------------------------- 1 | {0 Byte stream reader and writer tutorial} 2 | 3 | See also the {{!page-index.quick}quick start} and the {{!page-cookbook} 4 | cookbook} for short, self-contained, code snippets. This tutorial 5 | is a conceptual overview of byte stream readers and writers. 6 | 7 | {1:streams Streams} 8 | 9 | In [Bytesrw] you never get to manipulate byte streams directly. You 10 | observe finite parts of them via stream readers and writers. These 11 | finite parts are represented by byte slices. A 12 | {{!Bytesrw.Bytes.Slice}byte slice} is a non-empty, consecutive, 13 | subrange of a {!Bytes.t} value. There is a single, distinguished, 14 | empty slice {!Bytesrw.Bytes.Slice.eod} (end-of-data) which is used to 15 | indicate the end the stream. Once this value is observed no more bytes can 16 | be observed from a stream. 17 | 18 | To sum up, a byte stream is a sequence of {!Bytesrw.Bytes.Slice.t} 19 | values ended by a {!Bytesrw.Bytes.Slice.eod} value. Byte stream reader 20 | and writers give you two different ways of observing this sequence, in 21 | order, but always only slice by slice: 22 | 23 | - With byte stream readers you get to pull slices one by one from the reader 24 | until {!Bytesrw.Bytes.Slice.eod} is pulled. 25 | - With byte stream writers you get to see slices pushed on the writer 26 | one by one until {!Bytesrw.Bytes.Slice.eod} is pushed. 27 | 28 | The system enforces well-formed streams: your readers and writers will 29 | not hiccup on transient empty slices since they unconditionally 30 | terminates streams. For this reason the functions in 31 | {!Bytesrw.Bytes.Slice} always make explicit when 32 | {!Bytesrw.Bytes.Slice.eod} can be produced by suffixing the function 33 | names with [_or_eod]. 34 | 35 | {1:readers Stream readers} 36 | 37 | Stream {{!Bytesrw.Bytes.Reader}readers} are pull abstractions. They 38 | provide access to the slices of a stream, in order, on demand, but 39 | only slice by slice: the slice you get from a reader [r] on 40 | {!Bytesrw.Bytes.Reader.read} is {{!Bytesrw.Bytes.Slice.validity}valid 41 | for reading} only until the next slice is read from [r]. 42 | 43 | This means that you are {b only} allowed to read those bytes in the range 44 | defined by the slice until the next call to 45 | {!Bytesrw.Bytes.Reader.read} on [r]. You are not even allowed 46 | to mutate the bytes in the range of the slice. If you need to keep the 47 | data for longer or want to modify it, you need to copy it. 48 | 49 | Readers can be created from {{!Bytesrw.Bytes.Reader.of_bytes} bytes}, 50 | {{!Bytesrw.Bytes.Reader.of_string}strings}, 51 | {{!Bytesrw.Bytes.Reader.of_slice}slices}, 52 | {{!Bytesrw.Bytes.Reader.of_in_channel} input channels}, 53 | {{!Bytesrw_unix.bytes_reader_of_fd}file descriptors}, etc. More generally 54 | {{!Bytesrw.Bytes.Reader.make}any function} that enumerates a stream's slices 55 | can be turned into a byte stream reader. 56 | 57 | Readers maintain an informative stream position accessible with 58 | {!Bytesrw.Bytes.Reader.pos}. The position is the zero based-index of 59 | the next byte to read or, alternatively, the 60 | {{!Bytesrw.Bytes.Reader.read_length} number of bytes} that have been 61 | returned by calls to {!Bytesrw.Bytes.Reader.read}. Positions can be 62 | used for statistics, for locating errors, for locating substreams or 63 | computing byte offsets. 64 | 65 | Readers have an informative immutable 66 | {!Bytesrw.Bytes.Reader.slice_length} property. It is a {b hint} on the 67 | maximal slice length returned by reads. This can be used by reader 68 | consumers to adjust their own buffers. 69 | 70 | {2:reader_push_back Push backs} 71 | 72 | Reader {{!Bytesrw.Bytes.Reader.push_back}push backs} provide a limited 73 | form of look ahead on streams. They should not be used as a general 74 | buffering mecanism but they allow to 75 | {{!Bytesrw.Bytes.Reader.sniff}sniff stream content}, for example to 76 | {{!Bytesrw_utf.guess_reader_encoding}guess encodings}, in order to invoke 77 | suitable decoders. They are also used to break streams into 78 | substreams at precise positions when a reader provide you with a 79 | slice that overlap two substreams. 80 | 81 | {2:reader_filters Filters} 82 | 83 | {{!Bytesrw.Bytes.Reader.filters}Reader filters} are reader 84 | transformers. They take a reader [r] and return a new reader which, 85 | when read, reads on [r] and transforms its slices. For example given a 86 | reader [r] that returns compressed bytes a decompress filter like 87 | {!Bytesrw_zstd.decompress_reads} returns a new reader which reads and 88 | decompresses the slices of [r]. 89 | 90 | Filters do not necessarily act on a reader forever. For example the 91 | reader returned by {!Bytesrw_zstd.decompress_reads}[ ~all_frames:false 92 | () r], decompresses exactly a single [zstd] frame by reading from [r] 93 | and then returns {!Bytesrw.Bytes.Slice.eod}. After that [r] can be 94 | used again to read the remaining data after the frame. 95 | 96 | {2:reader_limits Limits} 97 | 98 | The number of bytes returned by a reader can be limited with 99 | {!Bytesrw.Bytes.Reader.limit}. See {{!page-cookbook.limiting}this 100 | example} in the cookbook. 101 | 102 | {1:writers Stream writers} 103 | 104 | Stream {{!Bytesrw.Bytes.Writer}writers} are push abstractions. Clients 105 | push the slices of a stream on a writer [w] with 106 | {!Bytesrw.Bytes.Writer.write}, slice by slice. This allows the write 107 | function of the writer to get a hand on the slice which the client 108 | must guarantee {{!Bytesrw.Bytes.Slice.validity}valid for reading} 109 | until the writer returns. 110 | 111 | Writers, in their write function, are {b only} allowed to read those 112 | bytes in the range defined by the slice until they return. They are 113 | not allowed to mutate the bytes in the range of the slice. If a 114 | writer needs to keep the data for longer or needs to modify it, it 115 | needs to copy it. 116 | 117 | Writers can be created to write to 118 | {{!Bytesrw.Bytes.Writer.of_buffer}buffers}, 119 | {{!Bytesrw.Bytes.Writer.of_out_channel}output channels}, 120 | {{!Bytesrw_unix.bytes_writer_of_fd}file descriptors}, etc. More 121 | generally {{!Bytesrw.Bytes.Writer.make}any slice iterating function} 122 | can be turned into a byte stream writer. 123 | 124 | Writers maintain an informative stream position accessible with 125 | {!Bytesrw.Bytes.Writer.pos}. The position is the zero based-index of 126 | the next byte to write or, alternatively, the 127 | {{!Bytesrw.Bytes.Writer.written_length}number of bytes} that have been 128 | pushed on the writer by calls to 129 | {!Bytesrw.Bytes.Writer.write}. Positions can be used for statistics, 130 | for locating errors or computing byte offsets. 131 | 132 | Writers have an informative immutable 133 | {!Bytesrw.Bytes.Writer.slice_length} property. It provides a {b hint} 134 | for clients on the maximal length of slices the writer would like to 135 | receive. This can be used by clients to adjust the sizes of the slices 136 | they write on a writer. 137 | 138 | {2:writer_filters Filters} 139 | 140 | {{!Bytesrw.Bytes.Writer.filters}Writer filters} are writer 141 | transformers. They take a writer [w] and return a new writer which, 142 | when written, transforms the slice and then writes it on [w]. For 143 | example given a writer [w] that writes to an output channel, a filter 144 | like {!Bytesrw_zstd.compress_writes} returns a new writer which 145 | compresses the writes before writing them to [w]. 146 | 147 | Filters do not necessarily act on a writer forever. This is the 148 | purpose of the boolean [eod] argument of 149 | {!Bytesrw.Bytes.Writer.filter}. When {!Bytesrw.Bytes.Slice.eod} is 150 | written on the filter the end of data slice should only be written to 151 | the underyling writer [w] if [eod] is [true]. If not, the filter 152 | should simply flush its data to [w] and further leave [w] untouched so 153 | that more data can be written on it. For example the writer returned 154 | by {!Bytesrw_zstd.compress_writes}[ () ~eod:false w] compresses writes 155 | until {!Bytesrw.Bytes.Slice.eod} is written. After that [w] can be 156 | used again to write more data after the compressed stream. 157 | 158 | {2:writer_limits Limits} 159 | 160 | The number of bytes written on a writer can be limited with 161 | {!Bytesrw.Bytes.Writer.limit}. 162 | 163 | {1:errors Errors} 164 | 165 | In general stream readers and writer and their creation function may 166 | raise the extensible {!Bytesrw.Bytes.Stream.Error} exception. 167 | 168 | These errors should only pertain to byte stream errors, that is they 169 | should mostly be raised by reads and writes on the result of stream 170 | filters. For example the default behaviour on byte stream reader and 171 | writer limits is to raises a byte stream exception with a 172 | {!Bytesrw.Bytes.Stream.Limit} error. Or if you use a [zstd] 173 | decompression filter, any decompression error will be reported by byte 174 | stream exception with a {!Bytesrw_zstd.Error} error. See 175 | {{!page-cookbook.errors}here} for an example on how you can add your 176 | own case to stream errors. 177 | 178 | If you use byte stream readers and writers to codec a higher-level 179 | data format like JSON that does not result in a byte stream itself, 180 | you should likely have your own error mecanisms and let stream errors 181 | simply fly across your use of readers and writers. For example while 182 | a [zstd] decompression error could occur from the reader you are 183 | decoding your JSON from, it likely doesn't make sense to capture this 184 | error in your decoder and produce it as a JSON codec error. 185 | 186 | For now the library also decided not to inject {!Sys_error} and 187 | {!Unix.Unix_error} that readers and writers based on standard library 188 | channels and {!Unix} file descriptors may raise into the stream error 189 | exception. The idea is that these errors pertain to the resource being 190 | acted upon, not the byte stream itself (also: we couldn't do the same 191 | for potential unknown third-party system abstractions so it feels 192 | non-compostional). But a bit more practice may be needed to precisely 193 | pin down the strategy here. -------------------------------------------------------------------------------- /myocamlbuild.ml: -------------------------------------------------------------------------------- 1 | open Ocamlbuild_plugin 2 | open Command 3 | 4 | (* Generic pkg-config(1) support. *) 5 | 6 | let pkg_config_exists package = 7 | Sys.command ("pkg-config --exists " ^ package) = 0 8 | 9 | let lib_with_clib ~lib ~clib ~has_lib ~src_dir ~stublib = 10 | let strf = Printf.sprintf in 11 | let windows = !Ocamlbuild_plugin.Options.ext_lib = "lib" in 12 | let pkg_config flags package = 13 | let cmd tmp = 14 | let pkg_config = 15 | if not windows then A "pkg-config" else 16 | S [A "pkg-config"; A "--msvc-syntax"] 17 | in 18 | Command.execute ~quiet:true & 19 | Cmd( S [ pkg_config; A ("--" ^ flags); A package; Sh ">"; A tmp]); 20 | List.map (fun arg -> A arg) (string_list_of_file tmp) 21 | in 22 | with_temp_file "pkgconfig" "pkg-config" cmd 23 | in 24 | let ar s = match !Ocamlbuild_plugin.Options.ext_lib with 25 | | "" -> s ^ ".a" | x -> s ^ "." ^ x 26 | in 27 | let make_opt o arg = S [ A o; arg ] in 28 | let ccopts = List.map (make_opt "-ccopt") in 29 | let cclibs = List.map (make_opt "-cclib") in 30 | let dllibs = List.map (make_opt "-dllib") in 31 | let use_lib = strf "use_%s" lib in 32 | let use_clib = strf "use_%s" clib in 33 | let record_stub_lib = strf "record_%s" stublib in 34 | let link_stub_archive = strf "link_%s_archive" stublib in 35 | let stub_ar = ar (strf "%s/lib%s" src_dir stublib) in 36 | let static_stub_l = 37 | if windows then A (strf "lib%s.lib" stublib) else A (strf "-l%s" stublib) 38 | in 39 | let dynamic_stub_l = 40 | if windows then A (strf "dll%s.dll" stublib) else static_stub_l 41 | in 42 | let clib_l = pkg_config "libs-only-l" clib in 43 | let clib_L = 44 | let dashldify = function 45 | | A l when windows -> A (String.subst "/libpath:" "-L" l) 46 | | arg -> arg 47 | in 48 | List.map dashldify (pkg_config "libs-only-L" clib) 49 | in 50 | let clib_cflags = ccopts @@ (A has_lib) :: pkg_config "cflags" clib in 51 | let clib_cclibs = cclibs @@ static_stub_l :: clib_l in 52 | let clib_ccopts = ccopts @@ clib_L in 53 | begin 54 | dep [record_stub_lib] [stub_ar]; 55 | 56 | flag ["c"; "compile"; use_clib] (S clib_cflags); 57 | 58 | flag ["c"; "ocamlmklib"; use_clib] (S (clib_L @ clib_l)); 59 | 60 | flag ["link"; "ocaml"; "library"; "byte"; record_stub_lib] 61 | (S (dllibs [dynamic_stub_l] @ clib_ccopts @ clib_cclibs)); 62 | 63 | flag ["link"; "ocaml"; "library"; "native"; record_stub_lib] 64 | (S (clib_ccopts @ clib_cclibs)); 65 | 66 | flag_and_dep ["link"; "ocaml"; link_stub_archive] (P stub_ar); 67 | 68 | flag ["link"; "ocaml"; "library"; "shared"; link_stub_archive] 69 | (S (clib_ccopts @ clib_cclibs)); 70 | 71 | ocaml_lib ~tag_name:use_lib ~dir:src_dir (strf "%s/%s" src_dir lib) 72 | end 73 | 74 | let () = 75 | dispatch begin function 76 | | After_rules -> 77 | if pkg_config_exists "libblake3" then 78 | lib_with_clib 79 | ~lib:"bytesrw_blake3" ~clib:"libblake3" ~has_lib:"-DHAS_BLAKE3" 80 | ~src_dir:"src/blake3" ~stublib:"bytesrw_blake3_stubs"; 81 | if pkg_config_exists "libmd" then 82 | lib_with_clib 83 | ~lib:"bytesrw_md" ~clib:"libmd" ~has_lib:"-DHAS_LIBMD" 84 | ~src_dir:"src/md" ~stublib:"bytesrw_md_stubs"; 85 | if pkg_config_exists "libxxhash" then 86 | lib_with_clib 87 | ~lib:"bytesrw_xxhash" ~clib:"libxxhash" ~has_lib:"-DHAS_XXHASH" 88 | ~src_dir:"src/xxhash" ~stublib:"bytesrw_xxhash_stubs"; 89 | if pkg_config_exists "zlib" then 90 | lib_with_clib 91 | ~lib:"bytesrw_zlib" ~clib:"zlib" ~has_lib:"-DHAS_ZLIB" 92 | ~src_dir:"src/zlib" ~stublib:"bytesrw_zlib_stubs"; 93 | if pkg_config_exists "libzstd" then 94 | lib_with_clib 95 | ~lib:"bytesrw_zstd" ~clib:"libzstd" ~has_lib:"-DHAS_ZSTD" 96 | ~src_dir:"src/zstd" ~stublib:"bytesrw_zstd_stubs"; 97 | | _ -> () 98 | end 99 | -------------------------------------------------------------------------------- /opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | name: "bytesrw" 3 | synopsis: "Composable byte stream readers and writers for OCaml" 4 | description: """\ 5 | Bytesrw extends the OCaml `Bytes` module with composable, memory 6 | efficient, byte stream readers and writers compatible with effect 7 | based concurrency. 8 | 9 | Except for byte slice life-times, these abstractions intentionally 10 | separate away ressource management and the specifics of reading and 11 | writing bytes. 12 | 13 | Bytesrw distributed under the ISC license. It has no dependencies. 14 | 15 | Optional support for compressed and hashed bytes depend, at your wish, on 16 | the C [`zlib`], [`libzstd`], [`blake3`], [`libmd`], [`xxhash`] and 17 | libraries. 18 | 19 | [`blake3`]: https://blake3.io 20 | [`libzstd`]: http://zstd.net 21 | [`libmd`]: https://www.hadrons.org/software/libmd/ 22 | [`xxhash`]: https://xxhash.com/ 23 | [`zlib`]: https://zlib.net 24 | 25 | Homepage: """ 26 | maintainer: "Daniel Bünzli " 27 | authors: "The bytesrw programmers" 28 | license: "ISC" 29 | tags: [ 30 | "bytes" 31 | "streaming" 32 | "zstd" 33 | "zlib" 34 | "gzip" 35 | "deflate" 36 | "sha1" 37 | "sha2" 38 | "compression" 39 | "hashing" 40 | "utf" 41 | "xxhash" 42 | "blake3" 43 | "org:erratique" 44 | ] 45 | homepage: "https://erratique.ch/software/bytesrw" 46 | doc: "https://erratique.ch/software/bytesrw/doc" 47 | bug-reports: "https://github.com/dbuenzli/bytesrw/issues" 48 | depends: [ 49 | "ocaml" {>= "4.14.0"} 50 | "ocamlfind" {build} 51 | "ocamlbuild" {build} 52 | "topkg" {build & >= "1.0.3"} 53 | ] 54 | depopts: [ 55 | "conf-xxhash" "conf-zlib" "conf-zstd" "conf-libmd" "conf-libblake3" 56 | ] 57 | conflicts: [ 58 | "conf-zstd" {< "1.3.8"} 59 | ] 60 | build: [ 61 | "ocaml" 62 | "pkg/pkg.ml" 63 | "build" 64 | "--dev-pkg" 65 | "%{dev}%" 66 | "--with-conf-libblake3" 67 | "%{conf-libblake3:installed}%" 68 | "--with-conf-libmd" 69 | "%{conf-libmd:installed}%" 70 | "--with-conf-xxhash" 71 | "%{conf-xxhash:installed}%" 72 | "--with-conf-zlib" 73 | "%{conf-zlib:installed}%" 74 | "--with-conf-zstd" 75 | "%{conf-zstd:installed}%" 76 | ] 77 | dev-repo: "git+https://erratique.ch/repos/bytesrw.git" 78 | -------------------------------------------------------------------------------- /pkg/META: -------------------------------------------------------------------------------- 1 | description = "Composable byte stream readers and writers for OCaml" 2 | version = "%%VERSION_NUM%%" 3 | requires = "" 4 | archive(byte) = "bytesrw.cma" 5 | archive(native) = "bytesrw.cmxa" 6 | plugin(byte) = "bytesrw.cma" 7 | plugin(native) = "bytesrw.cmxs" 8 | exists_if = "bytesrw.cma bytesrw.cmxa" 9 | 10 | package "blake3" ( 11 | directory = "blake3" 12 | description = "BLAKE3 hashes" 13 | version = "%%VERSION_NUM%%" 14 | requires = "bytesrw" 15 | exports = "bytesrw" 16 | archive(byte) = "bytesrw_blake3.cma" 17 | archive(native) = "bytesrw_blake3.cmxa" 18 | plugin(byte) = "bytesrw_blake3.cma" 19 | plugin(native) = "bytesrw_blake3.cmxs" 20 | exists_if = "bytesrw_blake3.cma bytesrw_blake3.cmxa" 21 | ) 22 | 23 | package "md" ( 24 | directory = "md" 25 | description = "SHA{1,2} hashes" 26 | version = "%%VERSION_NUM%%" 27 | requires = "bytesrw" 28 | exports = "bytesrw" 29 | archive(byte) = "bytesrw_md.cma" 30 | archive(native) = "bytesrw_md.cmxa" 31 | plugin(byte) = "bytesrw_md.cma" 32 | plugin(native) = "bytesrw_md.cmxs" 33 | exists_if = "bytesrw_md.cma bytesrw_md.cmxa" 34 | ) 35 | 36 | package "unix" ( 37 | directory = "unix" 38 | description = "The bytesrw.unix library" 39 | version = "%%VERSION_NUM%%" 40 | requires = "bytesrw unix" 41 | exports = "bytesrw" 42 | archive(byte) = "bytesrw_unix.cma" 43 | archive(native) = "bytesrw_unix.cmxa" 44 | plugin(byte) = "bytesrw_unix.cma" 45 | plugin(native) = "bytesrw_unix.cmxs" 46 | exists_if = "bytesrw_unix.cma bytesrw_unix.cmxa" 47 | ) 48 | 49 | package "xxhash" ( 50 | directory = "xxhash" 51 | description = "XXH hashes" 52 | version = "%%VERSION_NUM%%" 53 | requires = "bytesrw" 54 | exports = "bytesrw" 55 | archive(byte) = "bytesrw_xxhash.cma" 56 | archive(native) = "bytesrw_xxhash.cmxa" 57 | plugin(byte) = "bytesrw_xxhash.cma" 58 | plugin(native) = "bytesrw_xxhash.cmxs" 59 | exists_if = "bytesrw_xxhash.cma bytesrw_xxhash.cmxa" 60 | ) 61 | 62 | package "zlib" ( 63 | directory = "zlib" 64 | description = "deflate, zlib and gzip streams" 65 | version = "%%VERSION_NUM%%" 66 | requires = "bytesrw" 67 | exports = "bytesrw" 68 | archive(byte) = "bytesrw_zlib.cma" 69 | archive(native) = "bytesrw_zlib.cmxa" 70 | plugin(byte) = "bytesrw_zlib.cma" 71 | plugin(native) = "bytesrw_zlib.cmxs" 72 | exists_if = "bytesrw_zlib.cma bytesrw_zlib.cmxa" 73 | ) 74 | 75 | package "zstd" ( 76 | directory = "zstd" 77 | description = "zstd streams" 78 | version = "%%VERSION_NUM%%" 79 | requires = "bytesrw" 80 | exports = "bytesrw" 81 | archive(byte) = "bytesrw_zstd.cma" 82 | archive(native) = "bytesrw_zstd.cmxa" 83 | plugin(byte) = "bytesrw_zstd.cma" 84 | plugin(native) = "bytesrw_zstd.cmxs" 85 | exists_if = "bytesrw_zstd.cma bytesrw_zstd.cmxa" 86 | ) 87 | -------------------------------------------------------------------------------- /pkg/pkg.ml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ocaml 2 | #use "topfind" 3 | #require "topkg" 4 | open Topkg 5 | 6 | let blake3 = Conf.with_pkg "conf-libblake3" 7 | let libmd = Conf.with_pkg "conf-libmd" 8 | let xxhash = Conf.with_pkg "conf-xxhash" 9 | let zstd = Conf.with_pkg "conf-zstd" 10 | let zlib = Conf.with_pkg "conf-zlib" 11 | let () = 12 | Pkg.describe "bytesrw" @@ fun c -> 13 | let blake3 = Conf.value c blake3 in 14 | let libmd = Conf.value c libmd in 15 | let xxhash = Conf.value c xxhash in 16 | let zlib = Conf.value c zlib in 17 | let zstd = Conf.value c zstd in 18 | Ok [ Pkg.mllib ~api:["Bytesrw"; "Bytesrw_utf"; "Bytesrw_hex"] 19 | "src/bytesrw.mllib"; 20 | Pkg.mllib "src/unix/bytesrw_unix.mllib" ~dst_dir:"unix"; 21 | Pkg.mllib ~cond:blake3 "src/blake3/bytesrw_blake3.mllib" 22 | ~dst_dir:"blake3"; 23 | Pkg.clib ~cond:blake3 "src/blake3/libbytesrw_blake3_stubs.clib" 24 | ~lib_dst_dir:"blake3"; 25 | Pkg.mllib ~cond:libmd "src/md/bytesrw_md.mllib" 26 | ~dst_dir:"md"; 27 | Pkg.clib ~cond:libmd "src/md/libbytesrw_md_stubs.clib" 28 | ~lib_dst_dir:"md"; 29 | Pkg.mllib ~cond:xxhash "src/xxhash/bytesrw_xxhash.mllib" 30 | ~dst_dir:"xxhash"; 31 | Pkg.clib ~cond:xxhash "src/xxhash/libbytesrw_xxhash_stubs.clib" 32 | ~lib_dst_dir:"xxhash"; 33 | Pkg.mllib ~cond:zlib "src/zlib/bytesrw_zlib.mllib" ~dst_dir:"zlib"; 34 | Pkg.clib ~cond:zlib "src/zlib/libbytesrw_zlib_stubs.clib" 35 | ~lib_dst_dir:"zlib"; 36 | Pkg.mllib ~cond:zstd "src/zstd/bytesrw_zstd.mllib" ~dst_dir:"zstd"; 37 | Pkg.clib ~cond:zstd "src/zstd/libbytesrw_zstd_stubs.clib" 38 | ~lib_dst_dir:"zstd"; 39 | Pkg.doc "doc/index.mld" ~dst:"odoc-pages/index.mld"; 40 | Pkg.doc "doc/cookbook.mld" ~dst:"odoc-pages/cookbook.mld"; 41 | Pkg.doc "doc/notes.mld" ~dst:"odoc-pages/notes.mld"; 42 | Pkg.doc "doc/tutorial.mld" ~dst:"odoc-pages/tutorial.mld";] 43 | -------------------------------------------------------------------------------- /src/blake3/bytesrw_blake3.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | (* Library parameters *) 9 | 10 | external version : unit -> string = "ocaml_bytesrw_blake3_version" 11 | 12 | (* The type for blake3 hashes *) 13 | 14 | module type Blake3 = sig 15 | val id : string 16 | val length : int 17 | type t 18 | type key = t 19 | module State : sig 20 | type t 21 | val make : ?key:key -> unit -> t 22 | val update : t -> Bytes.Slice.t -> unit 23 | end 24 | val value : State.t -> t 25 | val string : ?key:t -> string -> t 26 | val bytes : ?key:t -> bytes -> t 27 | val slice : ?key:t -> Bytes.Slice.t -> t 28 | val reader : ?key:t -> Bytes.Reader.t -> t 29 | val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t 30 | val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t 31 | val equal : t -> t -> bool 32 | val compare : t -> t -> int 33 | val to_binary_string : t -> string 34 | val of_binary_string : string -> (t, string) result 35 | val to_hex : t -> string 36 | val of_hex : string -> (t, string) result 37 | val pp : Format.formatter -> t -> unit 38 | end 39 | 40 | (* BLAKE3 hash *) 41 | 42 | module Blake3_hasher = struct 43 | type t (* Custom value holding a blake3_hasher struct *) 44 | type hash = string (* 32 bytes *) 45 | type key = hash 46 | external create : unit -> t = "ocaml_bytesrw_blake3_create" 47 | external init : t -> unit = "ocaml_bytesrw_blake3_init" 48 | external init_keyed : t -> key:key -> unit = "ocaml_bytesrw_blake3_init_keyed" 49 | external finalize : t -> hash = "ocaml_bytesrw_blake3_finalize" 50 | external update : t -> bytes -> int -> int -> unit = 51 | "ocaml_bytesrw_blake3_update" 52 | 53 | external hash : bytes -> int -> int -> hash = "ocaml_bytesrw_blake3_hash" 54 | external hash_keyed : key:key -> bytes -> int -> int -> hash = 55 | "ocaml_bytesrw_blake3_hash_keyed" 56 | end 57 | 58 | module Blake3 = struct 59 | module State = struct 60 | type t = Blake3_hasher.t 61 | 62 | let make ?key () = 63 | let state = Blake3_hasher.create () in 64 | begin match key with 65 | | None -> Blake3_hasher.init state 66 | | Some key -> Blake3_hasher.init_keyed state ~key 67 | end; 68 | state 69 | 70 | let update state s = 71 | let b = Bytes.Slice.bytes s in 72 | let first = Bytes.Slice.first s and last = Bytes.Slice.length s in 73 | Blake3_hasher.update state b first last 74 | end 75 | 76 | let id = "blake3" 77 | let length = 32 78 | type t = Blake3_hasher.hash 79 | type key = t 80 | 81 | let value = Blake3_hasher.finalize 82 | let bytes ?key b = match key with 83 | | None -> Blake3_hasher.hash b 0 (Bytes.length b) 84 | | Some key -> Blake3_hasher.hash_keyed ~key b 0 (Bytes.length b) 85 | 86 | let string ?key s = bytes ?key (Bytes.unsafe_of_string s) 87 | 88 | let slice ?key s = 89 | let b = Bytes.Slice.bytes s in 90 | let first = Bytes.Slice.first s and length = Bytes.Slice.length s in 91 | match key with 92 | | None -> Blake3_hasher.hash b first length 93 | | Some key -> Blake3_hasher.hash_keyed ~key b first length 94 | 95 | let reader ?key r = 96 | let rec loop state r = match Bytes.Reader.read r with 97 | | s when Bytes.Slice.is_eod s -> value state 98 | | s -> State.update state s; loop state r 99 | in 100 | loop (State.make ?key ()) r 101 | 102 | let reads ?(state = State.make ()) r = 103 | Bytes.Reader.tap (State.update state) r, state 104 | 105 | let writes ?(state = State.make ()) w = 106 | Bytes.Writer.tap (State.update state) w, state 107 | 108 | let equal = String.equal 109 | let compare = String.compare 110 | let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s 111 | let to_binary_string = Fun.id 112 | let of_hex s = Bytesrw_hex.to_binary_string ~length s 113 | let pp = Bytesrw_hex.pp_binary_string 114 | let to_hex = Bytesrw_hex.of_binary_string 115 | end 116 | -------------------------------------------------------------------------------- /src/blake3/bytesrw_blake3.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** [BLAKE3] hashes. 7 | 8 | This module provides support for the {{:https://blake3.io/}BLAKE3} 9 | hash with the [libblake3] C library. *) 10 | 11 | open Bytesrw 12 | 13 | (** The type for [BLAKE3] hashes. *) 14 | module type Blake3 = sig 15 | 16 | (** {1:hashes Hashes} *) 17 | 18 | val id : string 19 | (** [id] identifies the hash function. *) 20 | 21 | val length : int 22 | (** [length] is the byte length of hashes produced by the function. *) 23 | 24 | type t 25 | (** The type for hashes. *) 26 | 27 | type key = t 28 | (** The type for keys. *) 29 | 30 | (** Hash state. *) 31 | module State : sig 32 | type t 33 | (** The type for hash state. *) 34 | 35 | val make : ?key:key -> unit -> t 36 | (** [make ?key ()] is an initial hash state with given 37 | parameters. If [key] is unspecified the hash is unkeyed. *) 38 | 39 | val update : t -> Bytes.Slice.t -> unit 40 | (** [update state slice] updates [state] with the bytes in the range of 41 | [slice]. *) 42 | end 43 | 44 | val value : State.t -> t 45 | (** [value state] is the hash of [state]. This has no effect on 46 | [state] which can still be {!State.update}d. *) 47 | 48 | (** {1:hashing Hashing} *) 49 | 50 | val string : ?key:t -> string -> t 51 | (** [string s] is the hash of [s] keyed with [key] (if any). *) 52 | 53 | val bytes : ?key:t -> bytes -> t 54 | (** [bytes b] is the hash of [b] keyed with [key] (if any). *) 55 | 56 | val slice : ?key:t -> Bytes.Slice.t -> t 57 | (** [slice s] is the hash of the bytes in the range of [s] keyed with 58 | [key] (if any). *) 59 | 60 | val reader : ?key:t -> Bytes.Reader.t -> t 61 | (** [reader r] is the hash of stream [r] keyed with [key] (if any). 62 | This consumes the reader. See also {!reads}. *) 63 | 64 | (** {1:streaming Hashing streams} *) 65 | 66 | val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t 67 | (** [reads r] is [hr, hstate] with: 68 | {ul 69 | {- [hr] a reader that taps the reads of [r] to update [hstate].} 70 | {- [hstate], a hash state of the reads made on [hr] so 71 | far. This is [state] if explicitely given, otherwise 72 | defaults to a fresh {!State.make}.}} 73 | To get an intermediate or final hash result use {!value} on 74 | [hstate]. *) 75 | 76 | val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t 77 | (** [writes ?state w] is [hw, hstate] with: 78 | {ul 79 | {- [hw] a writer that taps the writes to update [hstate] before 80 | giving them to [w].} 81 | {- [hstate], a hash state of the writes made on [hw] so 82 | far. This is [state] if explicitely given, otherwise 83 | defaults to a fresh {!State.make}.}} 84 | To get an intermediate or final hash result use {!value} on 85 | [hstate]. *) 86 | 87 | (** {1:preds Predicates and comparisons} *) 88 | 89 | val equal : t -> t -> bool 90 | (** [equal h0 h1] is [true] iff [h0] and [h1] are equal. *) 91 | 92 | val compare : t -> t -> int 93 | (** [comapre] is a total order on hashes compatible with {!equal}. *) 94 | 95 | (** {1:converting Converting} *) 96 | 97 | val to_binary_string : t -> string 98 | (** [to_binary_string h] is a big-endian binary representation 99 | of [h] of length {!length}. *) 100 | 101 | val of_binary_string : string -> (t, string) result 102 | (** [of_binary_string s] is a hash from the big-endian binary 103 | representation stored in [s]. *) 104 | 105 | val to_hex : t -> string 106 | (** [to_hex h] is the binary representation of [h] using lowercase 107 | US-ASCII hex digits. *) 108 | 109 | val of_hex : string -> (t, string) result 110 | (** [of_hex s] parses a sequence of hex digits into a hash. *) 111 | 112 | val pp : Format.formatter -> t -> unit 113 | (** [pp] formats hashes for inspection. *) 114 | end 115 | 116 | (** [BLAKE3] hash. *) 117 | module Blake3 : Blake3 118 | 119 | (** {1:library Library parameters} *) 120 | 121 | val version : unit -> string 122 | (** [version ()] is the version of the [blake3] C library. *) 123 | -------------------------------------------------------------------------------- /src/blake3/bytesrw_blake3.mllib: -------------------------------------------------------------------------------- 1 | Bytesrw_blake3 -------------------------------------------------------------------------------- /src/blake3/bytesrw_blake3_stubs.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*/ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #define blake3_hasher_val(v) ((blake3_hasher *)Data_custom_val(v)) 13 | 14 | static struct custom_operations bytesrw_blake3_ops = 15 | { "bytesrw_blake3_ops", 16 | custom_finalize_default, custom_compare_default, custom_compare_ext_default, 17 | custom_hash_default, custom_serialize_default, custom_deserialize_default }; 18 | 19 | CAMLprim value ocaml_bytesrw_blake3_version (value unit) 20 | { return caml_copy_string (blake3_version ()); } 21 | 22 | CAMLprim value ocaml_bytesrw_blake3_create (value unit) 23 | { return caml_alloc_custom (&bytesrw_blake3_ops, sizeof (blake3_hasher), 0, 1);} 24 | 25 | CAMLprim value ocaml_bytesrw_blake3_init (value hst) 26 | { blake3_hasher_init (blake3_hasher_val (hst)); return Val_unit; } 27 | 28 | CAMLprim value ocaml_bytesrw_blake3_init_keyed (value hst, value key) 29 | { 30 | blake3_hasher_init_keyed (blake3_hasher_val (hst), Bytes_val (key)); 31 | return Val_unit; 32 | } 33 | 34 | CAMLprim value ocaml_bytesrw_blake3_update 35 | (value hst, value str, value ofs, value len) 36 | { 37 | blake3_hasher_update (blake3_hasher_val (hst), 38 | Bytes_val (str) + Int_val (ofs), Int_val (len)); 39 | return Val_unit; 40 | } 41 | 42 | CAMLprim value ocaml_bytesrw_blake3_finalize (value hst) 43 | { 44 | uint8_t hash[BLAKE3_OUT_LEN]; 45 | blake3_hasher_finalize (blake3_hasher_val (hst), hash, BLAKE3_OUT_LEN); 46 | return caml_alloc_initialized_string (BLAKE3_OUT_LEN, (char *)hash); 47 | } 48 | 49 | CAMLprim value ocaml_bytesrw_blake3_hash 50 | (value str, value ofs, value len) 51 | { 52 | blake3_hasher h; 53 | uint8_t hash[BLAKE3_OUT_LEN]; 54 | blake3_hasher_init(&h); 55 | blake3_hasher_update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len)); 56 | blake3_hasher_finalize (&h, hash, BLAKE3_OUT_LEN); 57 | return caml_alloc_initialized_string (BLAKE3_OUT_LEN, (char *)hash); 58 | } 59 | 60 | CAMLprim value ocaml_bytesrw_blake3_hash_keyed 61 | (value key, value str, value ofs, value len) 62 | { 63 | blake3_hasher h; 64 | uint8_t hash[BLAKE3_OUT_LEN]; 65 | blake3_hasher_init_keyed(&h, Bytes_val (key)); 66 | blake3_hasher_update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len)); 67 | blake3_hasher_finalize (&h, hash, BLAKE3_OUT_LEN); 68 | return caml_alloc_initialized_string (BLAKE3_OUT_LEN, (char *)hash); 69 | } 70 | -------------------------------------------------------------------------------- /src/blake3/libbytesrw_blake3_stubs.clib: -------------------------------------------------------------------------------- 1 | bytesrw_blake3_stubs.o -------------------------------------------------------------------------------- /src/bytesrw.mllib: -------------------------------------------------------------------------------- 1 | Bytesrw_fmt 2 | Bytesrw 3 | Bytesrw_utf 4 | Bytesrw_hex 5 | -------------------------------------------------------------------------------- /src/bytesrw_fmt.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | let pp_hex_char ppf i = Format.fprintf ppf "%02x" i 7 | let pp_raw_char ppf c = match Char.code c with 8 | | 0x0A -> Format.pp_print_string ppf "\n" 9 | | 0x0D -> Format.pp_print_string ppf "\r" 10 | | i when i < 0x20 || i = 0x7f -> Format.fprintf ppf "\\x%a" pp_hex_char i 11 | | _ -> (* XXX We should try to decode utf-8 and if not then escape *) 12 | Format.pp_print_char ppf c 13 | 14 | let pp_head_hex count ~first ~len ppf b = 15 | let max = first + Int.min count len - 1 in 16 | if max < 0 then Format.pp_print_string ppf "" else begin 17 | Format.pp_print_char ppf 'x'; 18 | for i = first to max 19 | do pp_hex_char ppf (Bytes.get_uint8 b i) done; 20 | if len - 1 > max then Format.fprintf ppf "@<1>%s" "…"; 21 | end 22 | 23 | let pp_head_raw count ~first ~len ppf b = 24 | let max = first + Int.min count len - 1 in 25 | if max < 0 then Format.pp_print_string ppf "" else begin 26 | Format.pp_print_char ppf '\"'; 27 | for i = first to max do Format.pp_print_char ppf (Bytes.get b i) done; 28 | if len - 1 > max then Format.fprintf ppf "@<1>%s" "…"; 29 | Format.pp_print_char ppf '\"'; 30 | end 31 | 32 | let pp_raw ~first ~len ppf b = 33 | Format.pp_open_vbox ppf 1; 34 | Format.pp_print_char ppf '\"'; 35 | for i = 0 to len - 1 do 36 | pp_raw_char ppf (Bytes.get b (first + i)); 37 | if (i + 1) mod 60 = 0 38 | then (Format.pp_print_char ppf '\\'; Format.pp_print_cut ppf ()) 39 | done; 40 | Format.pp_print_char ppf '\"'; 41 | Format.pp_close_box ppf () 42 | 43 | (* XXX review this *) 44 | 45 | let strf = Printf.sprintf 46 | let err_range ~start ~len ~blen = 47 | invalid_arg @@ 48 | strf "range start %d len %d: not in bounds [0;%d]" start len (blen - 1) 49 | 50 | let ilog2 v = 51 | let rec loop p v = match v with 52 | | 0 -> p 53 | | v -> loop (p + 1) (v lsr 1) 54 | in 55 | loop (-1) v 56 | 57 | let pp_address ~addr ~addr_start ~addr_div ~start ~len = 58 | let pp_32 ppf addr = Format.fprintf ppf "%08x " addr in 59 | let pp_64 ppf addr = Format.fprintf ppf "%016x " addr in 60 | if not addr then fun ppf _ -> () else 61 | let astart = match addr_start with Some a -> a | None -> start in 62 | let amax = astart + len in 63 | let pp_address = 64 | if Sys.int_size = 31 then pp_32 else 65 | if ilog2 amax < 32 then pp_32 else pp_64 66 | in 67 | fun ppf off -> pp_address ppf ((astart + off) / addr_div) 68 | 69 | let pp_ascii_col ppf get_uint8 b start stop = 70 | let pp_ascii_byte ppf b i = 71 | let byte = get_uint8 b i in 72 | if byte < 0x1F || byte > 0x7E 73 | then Format.pp_print_char ppf '.' 74 | else Format.pp_print_char ppf (Char.chr byte) 75 | in 76 | Format.fprintf ppf " @[@<1>%s" "│"; 77 | for i = start to stop do pp_ascii_byte ppf b i done; 78 | Format.fprintf ppf "@<1>%s@]" "│"; 79 | () 80 | 81 | let pp_hex 82 | ?(addr = false) ?addr_start ?(addr_div = 1) ?(count = 16) ?(group = 2) 83 | ?(ascii = false) ?(start = 0) ?len () ppf b 84 | = 85 | let blen = Bytes.length b in 86 | let len = match len with None -> blen - start | Some len -> len in 87 | if len = 0 then () else 88 | let bmax = start + len - 1 in 89 | match 0 <= start && start <= bmax && bmax < blen with 90 | | false -> err_range ~start ~len ~blen 91 | | true -> 92 | let pp_address = pp_address ~addr ~addr_start ~addr_div ~start ~len in 93 | Format.pp_open_vbox ppf 0; 94 | pp_address ppf 0; 95 | Format.fprintf ppf "%02x" (Bytes.get_uint8 b start); 96 | for i = start + 1 to bmax do 97 | if i mod count = 0 then 98 | begin 99 | if ascii 100 | then pp_ascii_col ppf Bytes.get_uint8 b (i - count) (i - 1); 101 | Format.pp_print_cut ppf (); 102 | pp_address ppf i 103 | end 104 | else if i mod group = 0 then Format.pp_print_char ppf ' '; 105 | Format.fprintf ppf "%02x" (Bytes.get_uint8 b i); 106 | done; 107 | if ascii then begin (* finish the line *) 108 | for i = bmax + 1 to bmax + (count - (bmax mod count)) - 1 do 109 | if i mod group = 0 then Format.pp_print_char ppf ' '; 110 | Format.fprintf ppf " "; 111 | done; 112 | pp_ascii_col ppf Bytes.get_uint8 b (bmax - bmax mod count) bmax; 113 | end; 114 | if addr then (Format.pp_print_cut ppf (); pp_address ppf len); 115 | Format.pp_close_box ppf (); 116 | () 117 | -------------------------------------------------------------------------------- /src/bytesrw_fmt.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val pp_head_hex : 7 | int -> first:int -> len:int -> Format.formatter -> bytes -> unit 8 | 9 | val pp_head_raw : 10 | int -> first:int -> len:int -> Format.formatter -> bytes -> unit 11 | 12 | val pp_raw : first:int -> len:int -> Format.formatter -> bytes -> unit 13 | val pp_hex : 14 | ?addr:bool -> ?addr_start:int -> ?addr_div:int -> ?count:int -> 15 | ?group:int -> ?ascii:bool -> ?start:int -> ?len:int -> unit -> 16 | Format.formatter -> bytes -> unit 17 | (** See {!Bytesrw.Bytes.pp_hex}. *) 18 | -------------------------------------------------------------------------------- /src/bytesrw_hex.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Binary strings *) 7 | 8 | let strf = Printf.sprintf 9 | 10 | let to_binary_string' h = (* raises Failure *) 11 | let hex_value s i = match s.[i] with 12 | | '0' .. '9' as c -> Char.code c - 0x30 13 | | 'A' .. 'F' as c -> 10 + (Char.code c - 0x41) 14 | | 'a' .. 'f' as c -> 10 + (Char.code c - 0x61) 15 | | c -> failwith (strf "%d: %C is not an ASCII hexadecimal digit" i c) 16 | in 17 | match String.length h with 18 | | len when len mod 2 <> 0 -> failwith "Missing final hex digit" 19 | | len -> 20 | let rec loop max s i h k = match i > max with 21 | | true -> Bytes.unsafe_to_string s 22 | | false -> 23 | let hi = hex_value h k and lo = hex_value h (k + 1) in 24 | Bytes.set s i (Char.chr @@ (hi lsl 4) lor lo); 25 | loop max s (i + 1) h (k + 2) 26 | in 27 | let s_len = len / 2 in 28 | let s = Bytes.create s_len in 29 | loop (s_len - 1) s 0 h 0 30 | 31 | let err_len ~exp ~fnd = 32 | strf "Expected %d ASCII hexadecimal digits but found %d characters" exp fnd 33 | 34 | let to_binary_string ?length hex = 35 | try match length with 36 | | None -> Ok (to_binary_string' hex) 37 | | Some len -> 38 | let exp = len * 2 in 39 | let fnd = String.length hex in 40 | if exp <> fnd then failwith (err_len ~exp ~fnd) else 41 | Ok (to_binary_string' hex) 42 | with 43 | | Failure e -> Error e 44 | 45 | let pp_binary_string ppf s = 46 | for i = 0 to String.length s - 1 47 | do Format.fprintf ppf "%02x" (Char.code (s.[i])) done 48 | 49 | let of_binary_string s = Format.asprintf "%a" pp_binary_string s 50 | 51 | let check_binary_string_length ~length s = 52 | let len = String.length s in 53 | if len = length then Ok s else 54 | Error (strf "Expected %d bytes but found %d" length len) 55 | -------------------------------------------------------------------------------- /src/bytesrw_hex.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Hexadecimal tools. 7 | 8 | See also {!Bytesrw.Bytes.pp_hex}. *) 9 | 10 | (** {1:binary_strings Binary strings} *) 11 | 12 | val to_binary_string : ?length:int -> string -> (string, string) result 13 | (** [to_binary_string hex] converts [hex], made of upper or lowercase 14 | US-ASCII hexadecimal digits to a binary string. If [length] is 15 | specified, errors if the result is not exactly [length] bytes. *) 16 | 17 | val of_binary_string : string -> string 18 | (** [of_binary_string s] is the bytes of [s] in lowercase US-ASCII 19 | hexadecimal digits. *) 20 | 21 | val pp_binary_string : Format.formatter -> string -> unit 22 | (** [pp_binary_string ppf s] formats the bytes of [s] with lowercase 23 | US-ASCII hexadecimal digits. *) 24 | 25 | val check_binary_string_length : length:int -> string -> (string, string) result 26 | (** [check_binary_string_length ~length s] checks that [s] has [length] 27 | bytes and errors otherwise. *) 28 | -------------------------------------------------------------------------------- /src/bytesrw_utf.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | (* XXX add these things to Stdlib.Uchar *) 9 | 10 | let uchar_max_utf_8_byte_length = 4 11 | let[@inline] uchar_utf_8_byte_decode_length = function 12 | | '\x00' .. '\x7F' -> 1 13 | | '\x80' .. '\xC1' -> 0 14 | | '\xC2' .. '\xDF' -> 2 15 | | '\xE0' .. '\xEF' -> 3 16 | | '\xF0' .. '\xF4' -> 4 17 | | _ -> 0 18 | 19 | (* Encodings *) 20 | 21 | module Encoding = struct 22 | type t = [ `Utf_8 | `Utf_16be | `Utf_16le ] 23 | 24 | let to_iana_charset = function 25 | | `Utf_8 -> "UTF-8" | `Utf_16 -> "UTF-16" | `Utf_16be -> "UTF-16BE" 26 | | `Utf_16le -> "UTF-16LE" 27 | 28 | let pp ppf e = Format.pp_print_string ppf (to_iana_charset e) 29 | end 30 | 31 | (* Encoding guess *) 32 | 33 | let guess_reader_encoding r = match Bytes.Reader.sniff 3 r with 34 | | s when String.length s <= 1 -> `Utf_8 (* No or little input *) 35 | | "\xEF\xBB\xBF" -> `Utf_8 (* BOM *) 36 | | s when s.[0] = '\xFE' && s.[1] = '\xFF' -> `Utf_16be (* BOM *) 37 | | s when s.[0] = '\xFF' && s.[1] = '\xFE' -> `Utf_16le (* BOM *) 38 | | s when s.[0] = '\x00' && Char.code s.[1] > 0 -> `Utf_16be (* ASCII char *) 39 | | s when Char.code s.[0] > 0 && s.[1] = '\x00' -> `Utf_16le (* ASCII char *) 40 | | s when uchar_utf_8_byte_decode_length s.[0] <> 0 -> `Utf_8 41 | | s -> `Utf_16be (* UTF-16 -> UTF-16BE *) 42 | 43 | (* Validate 44 | 45 | let ensure_utf_8_reads ?pos ?slice_length r = 46 | let read () = failwith "Unimplemented" in 47 | Bytes.Reader.make ?pos ?slice_length read 48 | 49 | let ensure_utf_16be_reads ?pos ?slice_length r = failwith "Unimplemented" 50 | let ensure_utf_16le_reads ?pos ?slice_length r = failwith "Unimplemented" 51 | 52 | let ensure_reads = function 53 | | `Utf_8 -> ensure_utf_8_reads 54 | | `Utf_16be -> ensure_utf_16be_reads 55 | | `Utf_16le -> ensure_utf_16le_reads 56 | *) 57 | -------------------------------------------------------------------------------- /src/bytesrw_utf.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** UTF streams. 7 | 8 | A few tools to deal with UTF encoded streams. For now just 9 | encoding guessing, more may be added in the future. 10 | 11 | Sample code for decoding UTF-8 with position tracking using a 12 | byte stream reader and encoding UTF-8 with a byte stream writer can be found 13 | {{:https://github.com/dbuenzli/bytesrw/blob/main/test/utf8codec.ml}here}. *) 14 | 15 | open Bytesrw 16 | 17 | (** {1:encodings Encodings} *) 18 | 19 | (** Encoding specification. *) 20 | module Encoding : sig 21 | 22 | type t = [ 23 | | `Utf_8 (** UTF-8 *) 24 | | `Utf_16be (** UTF-16BE *) 25 | | `Utf_16le (** UTF-16LE *) ] 26 | (** The type for UTF encodings. *) 27 | 28 | val to_iana_charset : [< t | `Utf_16 ] -> string 29 | (** [to_iana_charaset e] is [e] as its 30 | {{:https://www.iana.org/assignments/character-sets/character-sets.xhtml} 31 | IANA character set name}. *) 32 | 33 | val pp : Format.formatter -> [< t | `Utf_16 ] -> unit 34 | (** [pp] formats encodings with {!to_iana_charset}. *) 35 | end 36 | 37 | (** {1:encoding_guess Encoding guess} *) 38 | 39 | val guess_reader_encoding : Bytes.Reader.t -> Encoding.t 40 | (** [guess_reader_encoding r] guesses the encoding at the stream 41 | position of [r] by {{!Bytesrw.Bytes.Reader.sniff}sniff}ing three 42 | bytes and applying {{!encoding_guess_heuristic}this 43 | heuristic} which is subject to change in the future. *) 44 | 45 | (* 46 | (** {1:validate Validate} *) 47 | 48 | val ensure_reads : encoding -> Bytes.Reader.filter 49 | (** [ensure_reads encoding r] filters the reads of [r] to make 50 | sure the stream is a valid [encoding] byte stream. Invalid 51 | byte sequences *) 52 | *) 53 | 54 | (** {1:encoding_guess_heuristic Encoding guess heurisitic} 55 | 56 | The heuristic is compatible with 57 | {{:http://unicode.org/glossary/#byte_order_mark}BOM} based 58 | recognition and the 59 | {{:http://tools.ietf.org/html/rfc4627#section-3}old} JSON encoding 60 | recognition (UTF-8 is mandated nowadays) that relies on ASCII 61 | being present at the beginning of the stream. 62 | 63 | The heuristic looks at the first three bytes of input (or less if 64 | impossible) and takes the {e first} matching byte pattern in the 65 | table below. 66 | {v 67 | xx = any byte 68 | .. = any byte or no byte (input too small) 69 | pp = positive byte 70 | uu = valid UTF-8 first byte 71 | 72 | Bytes | Guess | Rationale 73 | ---------+-----------+----------------------------------------------- 74 | EF BB BF | `UTF_8 | UTF-8 BOM 75 | FE FF .. | `UTF_16BE | UTF-16BE BOM 76 | FF FE .. | `UTF_16LE | UTF-16LE BOM 77 | 00 pp .. | `UTF_16BE | ASCII UTF-16BE and U+0000 is often forbidden 78 | pp 00 .. | `UTF_16LE | ASCII UTF-16LE and U+0000 is often forbidden 79 | uu .. .. | `UTF_8 | ASCII UTF-8 or valid UTF-8 first byte. 80 | xx xx .. | `UTF_16BE | Not UTF-8 => UTF-16, no BOM => UTF-16BE 81 | .. .. .. | `UTF_8 | Single malformed UTF-8 byte or no input. 82 | v} 83 | *) 84 | -------------------------------------------------------------------------------- /src/md/bytesrw_md.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | (* SHA hash signature *) 9 | 10 | module type Sha = sig 11 | val id : string 12 | val length : int 13 | type t 14 | module State : sig 15 | type t 16 | val make : unit -> t 17 | val update : t -> Bytes.Slice.t -> unit 18 | end 19 | val value : State.t -> t 20 | val string : string -> t 21 | val bytes : bytes -> t 22 | val slice : Bytes.Slice.t -> t 23 | val reader : Bytes.Reader.t -> t 24 | val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t 25 | val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t 26 | val equal : t -> t -> bool 27 | val compare : t -> t -> int 28 | val to_binary_string : t -> string 29 | val of_binary_string : string -> (t, string) result 30 | val to_hex : t -> string 31 | val of_hex : string -> (t, string) result 32 | val pp : Format.formatter -> t -> unit 33 | end 34 | 35 | (* SHA-1 *) 36 | 37 | module Sha1_ctx = struct 38 | type t (* Custom value whose data holds a SHA1_CTX C struct *) 39 | type hash = string (* 20 bytes *) 40 | external hash : bytes -> int -> int -> hash = "ocaml_bytesrw_sha1_hash" 41 | external init : unit -> t = "ocaml_bytesrw_sha1_init" 42 | external final : t -> hash = "ocaml_bytesrw_sha1_final" 43 | external update : t -> bytes -> int -> int -> unit = 44 | "ocaml_bytesrw_sha1_update" 45 | end 46 | 47 | module Sha_1 = struct 48 | module State = struct 49 | type t = Sha1_ctx.t 50 | let make () = Sha1_ctx.init () 51 | let update state s = 52 | if Bytes.Slice.is_eod s then () else 53 | let b = Bytes.Slice.bytes s in 54 | let first = Bytes.Slice.first s and last = Bytes.Slice.length s in 55 | Sha1_ctx.update state b first last 56 | end 57 | 58 | let id = "sha1" 59 | let length = 20 60 | type t = Sha1_ctx.hash 61 | let value = Sha1_ctx.final 62 | let bytes b = Sha1_ctx.hash b 0 (Bytes.length b) 63 | let string s = bytes (Bytes.unsafe_of_string s) 64 | let slice s = 65 | let b = Bytes.Slice.bytes s in 66 | let first = Bytes.Slice.first s and length = Bytes.Slice.length s in 67 | Sha1_ctx.hash b first length 68 | 69 | let reader r = 70 | let rec loop state r = match Bytes.Reader.read r with 71 | | s when Bytes.Slice.is_eod s -> value state 72 | | s -> State.update state s; loop state r 73 | in 74 | loop (State.make ()) r 75 | 76 | let reads ?(state = State.make ()) r = 77 | Bytes.Reader.tap (State.update state) r, state 78 | 79 | let writes ?(state = State.make ()) w = 80 | Bytes.Writer.tap (State.update state) w, state 81 | 82 | let equal = String.equal 83 | let compare = String.compare 84 | let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s 85 | let to_binary_string = Fun.id 86 | let of_hex s = Bytesrw_hex.to_binary_string ~length s 87 | let pp = Bytesrw_hex.pp_binary_string 88 | let to_hex = Bytesrw_hex.of_binary_string 89 | end 90 | 91 | (* SHA-2 *) 92 | 93 | module Sha2_ctx = struct 94 | type t (* Custom value whose data holds a SHA2_CTX C struct *) 95 | type h256 = string (* 32 bytes *) 96 | type h384 = string (* 48 bytes *) 97 | type h512 = string (* 64 bytes *) 98 | 99 | external hash256 : bytes -> int -> int -> h256 = "ocaml_bytesrw_sha256_hash" 100 | external init256 : unit -> t = "ocaml_bytesrw_sha256_init" 101 | external final256 : t -> h256 = "ocaml_bytesrw_sha256_final" 102 | external update256 : t -> bytes -> int -> int -> unit = 103 | "ocaml_bytesrw_sha256_update" 104 | 105 | external hash384 : bytes -> int -> int -> h384 = "ocaml_bytesrw_sha384_hash" 106 | external init384 : unit -> t = "ocaml_bytesrw_sha384_init" 107 | external final384 : t -> h384 = "ocaml_bytesrw_sha384_final" 108 | external update384 : t -> bytes -> int -> int -> unit = 109 | "ocaml_bytesrw_sha384_update" 110 | 111 | external hash512 : bytes -> int -> int -> h512 = "ocaml_bytesrw_sha512_hash" 112 | external init512 : unit -> t = "ocaml_bytesrw_sha512_init" 113 | external final512 : t -> h512 = "ocaml_bytesrw_sha512_final" 114 | external update512 : t -> bytes -> int -> int -> unit = 115 | "ocaml_bytesrw_sha512_update" 116 | end 117 | 118 | module Sha_256 = struct 119 | module State = struct 120 | type t = Sha2_ctx.t 121 | let make () = Sha2_ctx.init256 () 122 | let update state s = 123 | if Bytes.Slice.is_eod s then () else 124 | let b = Bytes.Slice.bytes s in 125 | let first = Bytes.Slice.first s and last = Bytes.Slice.length s in 126 | Sha2_ctx.update256 state b first last 127 | end 128 | 129 | let id = "sha-256" 130 | let length = 32 131 | type t = Sha2_ctx.h256 132 | 133 | let value = Sha2_ctx.final256 134 | let bytes b = Sha2_ctx.hash256 b 0 (Bytes.length b) 135 | let string s = bytes (Bytes.unsafe_of_string s) 136 | let slice s = 137 | let b = Bytes.Slice.bytes s in 138 | let first = Bytes.Slice.first s and length = Bytes.Slice.length s in 139 | Sha2_ctx.hash256 b first length 140 | 141 | let reader r = 142 | let rec loop state r = match Bytes.Reader.read r with 143 | | s when Bytes.Slice.is_eod s -> value state 144 | | s -> State.update state s; loop state r 145 | in 146 | loop (State.make ()) r 147 | 148 | let reads ?(state = State.make ()) r = 149 | Bytes.Reader.tap (State.update state) r, state 150 | 151 | let writes ?(state = State.make ()) w = 152 | Bytes.Writer.tap (State.update state) w, state 153 | 154 | let equal = String.equal 155 | let compare = String.compare 156 | let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s 157 | let to_binary_string = Fun.id 158 | let of_hex s = Bytesrw_hex.to_binary_string ~length s 159 | let pp = Bytesrw_hex.pp_binary_string 160 | let to_hex = Bytesrw_hex.of_binary_string 161 | end 162 | 163 | module Sha_384 = struct 164 | module State = struct 165 | type t = Sha2_ctx.t 166 | let make () = Sha2_ctx.init384 () 167 | let update state s = 168 | if Bytes.Slice.is_eod s then () else 169 | let b = Bytes.Slice.bytes s in 170 | let first = Bytes.Slice.first s and last = Bytes.Slice.length s in 171 | Sha2_ctx.update384 state b first last 172 | end 173 | 174 | let id = "sha-384" 175 | let length = 48 176 | type t = Sha2_ctx.h384 177 | 178 | let value = Sha2_ctx.final384 179 | let bytes b = Sha2_ctx.hash384 b 0 (Bytes.length b) 180 | let string s = bytes (Bytes.unsafe_of_string s) 181 | let slice s = 182 | let b = Bytes.Slice.bytes s in 183 | let first = Bytes.Slice.first s and length = Bytes.Slice.length s in 184 | Sha2_ctx.hash384 b first length 185 | 186 | let reader r = 187 | let rec loop state r = match Bytes.Reader.read r with 188 | | s when Bytes.Slice.is_eod s -> value state 189 | | s -> State.update state s; loop state r 190 | in 191 | loop (State.make ()) r 192 | 193 | let reads ?(state = State.make ()) r = 194 | Bytes.Reader.tap (State.update state) r, state 195 | 196 | let writes ?(state = State.make ()) w = 197 | Bytes.Writer.tap (State.update state) w, state 198 | 199 | let equal = String.equal 200 | let compare = String.compare 201 | let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s 202 | let to_binary_string = Fun.id 203 | let of_hex s = Bytesrw_hex.to_binary_string ~length s 204 | let pp = Bytesrw_hex.pp_binary_string 205 | let to_hex = Bytesrw_hex.of_binary_string 206 | end 207 | 208 | module Sha_512 = struct 209 | module State = struct 210 | type t = Sha2_ctx.t 211 | let make () = Sha2_ctx.init512 () 212 | let update state s = 213 | if Bytes.Slice.is_eod s then () else 214 | let b = Bytes.Slice.bytes s in 215 | let first = Bytes.Slice.first s and last = Bytes.Slice.length s in 216 | Sha2_ctx.update512 state b first last 217 | end 218 | 219 | let id = "sha-512" 220 | let length = 64 221 | type t = Sha2_ctx.h512 222 | 223 | let value = Sha2_ctx.final512 224 | let bytes b = Sha2_ctx.hash512 b 0 (Bytes.length b) 225 | let string s = bytes (Bytes.unsafe_of_string s) 226 | let slice s = 227 | let b = Bytes.Slice.bytes s in 228 | let first = Bytes.Slice.first s and length = Bytes.Slice.length s in 229 | Sha2_ctx.hash512 b first length 230 | 231 | let reader r = 232 | let rec loop state r = match Bytes.Reader.read r with 233 | | s when Bytes.Slice.is_eod s -> value state 234 | | s -> State.update state s; loop state r 235 | in 236 | loop (State.make ()) r 237 | 238 | let reads ?(state = State.make ()) r = 239 | Bytes.Reader.tap (State.update state) r, state 240 | 241 | let writes ?(state = State.make ()) w = 242 | Bytes.Writer.tap (State.update state) w, state 243 | 244 | let equal = String.equal 245 | let compare = String.compare 246 | let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s 247 | let to_binary_string = Fun.id 248 | let of_hex s = Bytesrw_hex.to_binary_string ~length s 249 | let pp = Bytesrw_hex.pp_binary_string 250 | let to_hex = Bytesrw_hex.of_binary_string 251 | end 252 | -------------------------------------------------------------------------------- /src/md/bytesrw_md.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** [SHA-1] and [SHA-2] hashes. 7 | 8 | This module provides support for the SHA-1 and SHA-2 9 | hashes with the {{:https://www.hadrons.org/software/libmd/} 10 | [libmd]} C library. *) 11 | 12 | open Bytesrw 13 | 14 | (** The type for [SHA-1] and [SHA-2] hashes. *) 15 | module type Sha = sig 16 | 17 | (** {1:hashes Hashes} *) 18 | 19 | val id : string 20 | (** [id] identifies the hash function. *) 21 | 22 | val length : int 23 | (** [length] is the byte length of hashes produced by the function. *) 24 | 25 | type t 26 | (** The type for hashes. *) 27 | 28 | (** Hash state. *) 29 | module State : sig 30 | type t 31 | (** The type for hash state. *) 32 | 33 | val make : unit -> t 34 | (** [make ()] is an initial hash state. *) 35 | 36 | val update : t -> Bytes.Slice.t -> unit 37 | (** [update state slice] updates [state] with the bytes in the range of 38 | [slice]. *) 39 | end 40 | 41 | val value : State.t -> t 42 | (** [value state] is the hash of [state]. {b Warning.} This has an effect on 43 | [state], it can no longer be {!State.update}d. *) 44 | 45 | (** {1:hashing Hashing} *) 46 | 47 | val string : string -> t 48 | (** [string s] is the hash of [s]. *) 49 | 50 | val bytes : bytes -> t 51 | (** [bytes b] is the hash of [b]. *) 52 | 53 | val slice : Bytes.Slice.t -> t 54 | (** [slice s] is the hash of the bytes in the range of [s]. *) 55 | 56 | val reader : Bytes.Reader.t -> t 57 | (** [reader r] is the hash of stream [r]. This consumes the reader. 58 | See also {!reads}. *) 59 | 60 | (** {1:streaming Hashing streams} *) 61 | 62 | val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t 63 | (** [reads r] is [hr, hstate] with: 64 | {ul 65 | {- [hr] a reader that taps the reads of [r] to update [hstate].} 66 | {- [hstate], a hash state of the reads made on [hr] so 67 | far. This is [state] if explicitely given, otherwise 68 | defaults to a fresh {!State.make}.}} 69 | To get the final hash result use {!value} on [hstate] {b once}. *) 70 | 71 | val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t 72 | (** [writes ?state w] is [hw, hstate] with: 73 | {ul 74 | {- [hw] a writer that taps the writes to update [hstate] before 75 | giving them to [w].} 76 | {- [hstate], a hash state of the writes made on [hw] so 77 | far. This is [state] if explicitely given, otherwise 78 | defaults to a fresh {!State.make}.}} 79 | To get the final hash result use {!value} on [hstate] {b once}. *) 80 | 81 | (** {1:preds Predicates and comparisons} *) 82 | 83 | val equal : t -> t -> bool 84 | (** [equal h0 h1] is [true] iff [h0] and [h1] are equal. *) 85 | 86 | val compare : t -> t -> int 87 | (** [comapre] is a total order on hashes compatible with {!equal}. *) 88 | 89 | (** {1:converting Converting} *) 90 | 91 | val to_binary_string : t -> string 92 | (** [to_binary_string h] is a big-endian binary representation 93 | of [h] of length {!length}. *) 94 | 95 | val of_binary_string : string -> (t, string) result 96 | (** [of_binary_string s] is a hash from the big-endian binary 97 | representation stored in [s]. *) 98 | 99 | val to_hex : t -> string 100 | (** [to_hex h] is the binary representation of [h] using lowercase 101 | US-ASCII hex digits. *) 102 | 103 | val of_hex : string -> (t, string) result 104 | (** [of_hex s] parses a sequence of hex digits into a hash. *) 105 | 106 | val pp : Format.formatter -> t -> unit 107 | (** [pp] formats hashes for inspection. *) 108 | end 109 | 110 | (** [SHA-1] hash. *) 111 | module Sha_1 : Sha 112 | 113 | (** [SHA-256] hash. *) 114 | module Sha_256 : Sha 115 | 116 | (** [SHA-384] hash. *) 117 | module Sha_384 : Sha 118 | 119 | (** [SHA-512] hash. *) 120 | module Sha_512 : Sha 121 | -------------------------------------------------------------------------------- /src/md/bytesrw_md.mllib: -------------------------------------------------------------------------------- 1 | Bytesrw_md -------------------------------------------------------------------------------- /src/md/bytesrw_md_stubs.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*/ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #define sha1_ctx_val(v) ((SHA1_CTX *)Data_custom_val(v)) 14 | #define sha2_ctx_val(v) ((SHA2_CTX *)Data_custom_val(v)) 15 | 16 | static struct custom_operations bytesrw_md_ops = 17 | { "bytesrw_md_ops", 18 | custom_finalize_default, custom_compare_default, custom_compare_ext_default, 19 | custom_hash_default, custom_serialize_default, custom_deserialize_default }; 20 | 21 | /* SHA-1 */ 22 | 23 | CAMLprim value ocaml_bytesrw_sha1_init (value unit) 24 | { 25 | value hctx = caml_alloc_custom (&bytesrw_md_ops, sizeof (SHA1_CTX), 0, 1); 26 | SHA1Init (sha1_ctx_val (hctx)); 27 | return hctx; 28 | } 29 | 30 | CAMLprim value ocaml_bytesrw_sha1_update 31 | (value hctx, value str, value ofs, value len) 32 | { 33 | SHA1Update (sha1_ctx_val (hctx), Bytes_val (str) + Int_val (ofs), 34 | Int_val (len)); 35 | return Val_unit; 36 | } 37 | 38 | CAMLprim value ocaml_bytesrw_sha1_final (value hctx) 39 | { 40 | /* Make a local copy, SHA1Final updates the context */ 41 | SHA1_CTX ctx = *sha1_ctx_val (hctx); 42 | value r = caml_alloc_string (SHA1_DIGEST_LENGTH); 43 | SHA1Final (Bytes_val (r), &ctx); 44 | return r; 45 | } 46 | 47 | CAMLprim value ocaml_bytesrw_sha1_hash 48 | (value str, value ofs, value len) 49 | { 50 | SHA1_CTX h; 51 | uint8_t hash[SHA1_DIGEST_LENGTH]; 52 | SHA1Init (&h); 53 | SHA1Update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len)); 54 | SHA1Final (hash, &h); 55 | return caml_alloc_initialized_string (SHA1_DIGEST_LENGTH, (char *)hash); 56 | } 57 | 58 | /* SHA-256 */ 59 | 60 | CAMLprim value ocaml_bytesrw_sha256_init (value unit) 61 | { 62 | value hctx = caml_alloc_custom (&bytesrw_md_ops, sizeof (SHA2_CTX), 0, 1); 63 | SHA256Init (sha2_ctx_val (hctx)); 64 | return hctx; 65 | } 66 | 67 | CAMLprim value ocaml_bytesrw_sha256_update 68 | (value hctx, value str, value ofs, value len) 69 | { 70 | SHA256Update (sha2_ctx_val (hctx), Bytes_val (str) + Int_val (ofs), 71 | Int_val (len)); 72 | return Val_unit; 73 | } 74 | 75 | CAMLprim value ocaml_bytesrw_sha256_final (value hctx) 76 | { 77 | /* Make a local copy, SHA256Final updates the context */ 78 | SHA2_CTX ctx = *sha2_ctx_val (hctx); 79 | value r = caml_alloc_string (SHA256_DIGEST_LENGTH); 80 | SHA256Final (Bytes_val (r), &ctx); 81 | return r; 82 | } 83 | 84 | CAMLprim value ocaml_bytesrw_sha256_hash 85 | (value str, value ofs, value len) 86 | { 87 | SHA2_CTX h; 88 | uint8_t hash[SHA256_DIGEST_LENGTH]; 89 | SHA256Init (&h); 90 | SHA256Update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len)); 91 | SHA256Final (hash, &h); 92 | return caml_alloc_initialized_string (SHA256_DIGEST_LENGTH, (char *)hash); 93 | } 94 | 95 | /* SHA-384 */ 96 | 97 | CAMLprim value ocaml_bytesrw_sha384_init (value unit) 98 | { 99 | value hctx = caml_alloc_custom (&bytesrw_md_ops, sizeof (SHA2_CTX), 0, 1); 100 | SHA384Init (sha2_ctx_val (hctx)); 101 | return hctx; 102 | } 103 | 104 | CAMLprim value ocaml_bytesrw_sha384_update 105 | (value hctx, value str, value ofs, value len) 106 | { 107 | SHA384Update (sha2_ctx_val (hctx), Bytes_val (str) + Int_val (ofs), 108 | Int_val (len)); 109 | return Val_unit; 110 | } 111 | 112 | CAMLprim value ocaml_bytesrw_sha384_final (value hctx) 113 | { 114 | /* Make a local copy, SHA384Final updates the context */ 115 | SHA2_CTX ctx = *sha2_ctx_val (hctx); 116 | value r = caml_alloc_string (SHA384_DIGEST_LENGTH); 117 | SHA384Final (Bytes_val (r), &ctx); 118 | return r; 119 | } 120 | 121 | CAMLprim value ocaml_bytesrw_sha384_hash 122 | (value str, value ofs, value len) 123 | { 124 | SHA2_CTX h; 125 | uint8_t hash[SHA384_DIGEST_LENGTH]; 126 | SHA384Init (&h); 127 | SHA384Update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len)); 128 | SHA384Final (hash, &h); 129 | return caml_alloc_initialized_string (SHA384_DIGEST_LENGTH, (char *)hash); 130 | } 131 | 132 | /* SHA-512 */ 133 | 134 | CAMLprim value ocaml_bytesrw_sha512_init (value unit) 135 | { 136 | value hctx = caml_alloc_custom (&bytesrw_md_ops, sizeof (SHA2_CTX), 0, 1); 137 | SHA512Init (sha2_ctx_val (hctx)); 138 | return hctx; 139 | } 140 | 141 | CAMLprim value ocaml_bytesrw_sha512_update 142 | (value hctx, value str, value ofs, value len) 143 | { 144 | SHA512Update (sha2_ctx_val (hctx), Bytes_val (str) + Int_val (ofs), 145 | Int_val (len)); 146 | return Val_unit; 147 | } 148 | 149 | CAMLprim value ocaml_bytesrw_sha512_final (value hctx) 150 | { 151 | /* Make a local copy, SHA512Final updates the context */ 152 | SHA2_CTX ctx = *sha2_ctx_val (hctx); 153 | value r = caml_alloc_string (SHA512_DIGEST_LENGTH); 154 | SHA512Final (Bytes_val (r), &ctx); 155 | return r; 156 | } 157 | 158 | CAMLprim value ocaml_bytesrw_sha512_hash 159 | (value str, value ofs, value len) 160 | { 161 | SHA2_CTX h; 162 | uint8_t hash[SHA512_DIGEST_LENGTH]; 163 | SHA512Init (&h); 164 | SHA512Update (&h, Bytes_val (str) + Int_val (ofs), Int_val (len)); 165 | SHA512Final (hash, &h); 166 | return caml_alloc_initialized_string (SHA512_DIGEST_LENGTH, (char *)hash); 167 | } 168 | -------------------------------------------------------------------------------- /src/md/libbytesrw_md_stubs.clib: -------------------------------------------------------------------------------- 1 | bytesrw_md_stubs.o -------------------------------------------------------------------------------- /src/unix/bytesrw_unix.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | (* Readers and writers. *) 9 | 10 | let rec current_pos fd = match Unix.lseek fd 0 Unix.SEEK_CUR with 11 | | exception Unix.Unix_error (ESPIPE, _, _) -> 0 12 | | exception Unix.Unix_error (EINTR, _, _) -> current_pos fd 13 | | pos -> if pos < 0 then 0 else pos 14 | 15 | let bytes_reader_of_fd 16 | ?pos ?(slice_length = Bytes.Slice.unix_io_buffer_size) fd 17 | = 18 | let pos = match pos with Some pos -> pos | None -> current_pos fd in 19 | let b = Bytes.create (Bytes.Slice.check_length slice_length) in 20 | let rec read () = match Unix.read fd b 0 slice_length with 21 | | 0 -> Bytes.Slice.eod 22 | | count -> Bytes.Slice.make b ~first:0 ~length:count 23 | | exception Unix.Unix_error (Unix.EINTR, _, _) -> read () 24 | in 25 | Bytes.Reader.make ~pos ~slice_length read 26 | 27 | let bytes_writer_of_fd 28 | ?pos ?(slice_length = Bytes.Slice.unix_io_buffer_size) fd 29 | = 30 | let pos = match pos with Some pos -> pos | None -> current_pos fd in 31 | let rec write = function 32 | | s when Bytes.Slice.is_eod s -> () 33 | | s -> 34 | let b = Bytes.Slice.bytes s in 35 | let first = Bytes.Slice.first s and length = Bytes.Slice.length s in 36 | match Unix.single_write fd b first length with 37 | | count when count = length -> () 38 | | count -> write (Option.get (Bytes.Slice.drop count s)) 39 | | exception Unix.Unix_error (Unix.EINTR, _, _) -> write s 40 | in 41 | Bytes.Writer.make ~pos ~slice_length write 42 | -------------------------------------------------------------------------------- /src/unix/bytesrw_unix.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** {!Unix} file descriptor byte stream readers and writers. *) 7 | 8 | open Bytesrw 9 | 10 | val bytes_reader_of_fd : 11 | ?pos:Bytes.Stream.pos -> ?slice_length:Bytes.Slice.length -> 12 | Unix.file_descr -> Bytes.Reader.t 13 | (** [bytes_reader_of_fd fd] reads bytes from [fd] with slices of maximal 14 | length [slice_length] (defaults to 15 | {!Bytesrw.Bytes.Slice.unix_io_buffer_size}). 16 | [pos] defaults to the [fd] position as determined by {!Unix.lseek}. 17 | Reads are retried on {!Unix.EINTR} but both this function and the resulting 18 | reader may raise {!Unix.Unix_error}. *) 19 | 20 | val bytes_writer_of_fd : 21 | ?pos:Bytes.Stream.pos -> ?slice_length:Bytes.Slice.length -> 22 | Unix.file_descr -> Bytes.Writer.t 23 | (** [bytes_writer_of_fd fd] writes bytes to [fd]. The hinted 24 | [slice_length] defaults to 25 | {!Bytesrw.Bytes.Slice.unix_io_buffer_size}. [pos] defaults to the 26 | [fd] position as determined by {!Unix.lseek}. Writes are retried 27 | on {!Unix.EINTR} but both this function and the resulting writer 28 | may raise {!Unix.Unix_error}. *) 29 | -------------------------------------------------------------------------------- /src/unix/bytesrw_unix.mllib: -------------------------------------------------------------------------------- 1 | Bytesrw_unix -------------------------------------------------------------------------------- /src/xxhash/bytesrw_xxhash.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | external get_64u : String.t -> int -> int64 = "%caml_string_get64u" 9 | external set_64u : Bytes.t -> int -> int64 -> unit = "%caml_string_set64u" 10 | external swap_64 : int64 -> int64 = "%bswap_int64" 11 | external noswap : int64 -> int64 = "%identity" 12 | let layout = if Sys.big_endian then noswap else swap_64 13 | let u64_to_binary_string t = 14 | let b = Bytes.create 8 in set_64u b 0 (layout t); Bytes.unsafe_to_string b 15 | 16 | (* Library parameters *) 17 | 18 | external version : unit -> int = "ocaml_bytesrw_XXH_versionNumber" 19 | external xxh3_secret_size_min : unit -> int = 20 | "ocaml_bytesrw_XXH3_SECRET_SIZE_MIN" 21 | 22 | let version () = 23 | let v = version () in 24 | let maj = v / (100 * 100) and rem = v mod (100 * 100) in 25 | let min = rem / 100 in 26 | Printf.sprintf "%d.%d.%d" maj min (rem mod 100) 27 | 28 | (* XXH3 module type *) 29 | 30 | module type Xxh3 = sig 31 | val id : string 32 | val length : int 33 | type seed = int64 34 | type secret = string 35 | type t 36 | 37 | module State : sig 38 | type t 39 | val make : ?secret:secret -> ?seed:seed -> unit -> t 40 | val update : t -> Bytes.Slice.t -> unit 41 | val copy : t -> t 42 | end 43 | 44 | val value : State.t -> t 45 | val string : ?seed:seed -> string -> t 46 | val bytes : ?seed:seed -> bytes -> t 47 | val slice : ?seed:seed -> Bytes.Slice.t -> t 48 | val reader : ?seed:seed -> Bytes.Reader.t -> t 49 | val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t 50 | val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t 51 | val equal : t -> t -> bool 52 | val compare : t -> t -> int 53 | val to_binary_string : t -> string 54 | val of_binary_string : string -> (t, string) result 55 | val to_hex : t -> string 56 | val of_hex : string -> (t, string) result 57 | val pp : Format.formatter -> t -> unit 58 | end 59 | 60 | module Xxh3_state = struct 61 | type t (* Custom value holding a pointer to a finalized XXH3_state_t *) 62 | type seed = int64 63 | type secret = string 64 | type h64_hash = int64 65 | type h128_hash = string 66 | let no_seed = 0L 67 | 68 | external create : unit -> t = "ocaml_bytesrw_XXH3_createState" 69 | external copy : dst:t -> src:t -> unit = "ocaml_bytesrw_XXH3_copyState" 70 | external h64bits_reset : t -> unit = "ocaml_bytesrw_XXH3_64bits_reset" 71 | external h64bits_reset_with_seed : t -> seed -> unit = 72 | "ocaml_bytesrw_XXH3_64bits_reset_withSeed" 73 | 74 | external h64bits_reset_with_secret : t -> secret -> unit = 75 | "ocaml_bytesrw_XXH3_64bits_reset_withSecret" 76 | 77 | external h64bits_reset_with_secret_and_seed : 78 | t -> secret -> seed -> unit = 79 | "ocaml_bytesrw_XXH3_64bits_reset_withSecretandSeed" 80 | 81 | external h64bits_update : t -> bytes -> int -> int -> unit = 82 | "ocaml_bytesrw_XXH3_64bits_update" 83 | 84 | external h64bits_digest : t -> h64_hash = "ocaml_bytesrw_XXH3_64bits_digest" 85 | 86 | external seeded_64hash : bytes -> int -> int -> seed -> h64_hash = 87 | "ocaml_bytesrw_XXH3_64bits_withSeed" 88 | 89 | external h128bits_reset : t -> unit = "ocaml_bytesrw_XXH3_128bits_reset" 90 | external h128bits_reset_with_seed : t -> seed -> unit = 91 | "ocaml_bytesrw_XXH3_128bits_reset_withSeed" 92 | 93 | external h128bits_reset_with_secret : t -> secret -> unit = 94 | "ocaml_bytesrw_XXH3_128bits_reset_withSecret" 95 | 96 | external h128bits_reset_with_secret_and_seed : 97 | t -> secret -> seed -> unit = 98 | "ocaml_bytesrw_XXH3_128bits_reset_withSecretandSeed" 99 | 100 | external h128bits_update : t -> bytes -> int -> int -> unit = 101 | "ocaml_bytesrw_XXH3_128bits_update" 102 | 103 | external h128bits_digest : t -> h128_hash = 104 | "ocaml_bytesrw_XXH3_128bits_digest" 105 | 106 | external seeded_128hash : bytes -> int -> int -> seed -> h128_hash = 107 | "ocaml_bytesrw_XXH3_128bits_withSeed" 108 | end 109 | 110 | module Xxh3_64 = struct 111 | module State = struct 112 | type t = Xxh3_state.t 113 | 114 | let make ?secret ?seed () = 115 | let state = Xxh3_state.create () in 116 | begin match secret, seed with 117 | | None, None -> Xxh3_state.h64bits_reset state 118 | | None, Some seed -> Xxh3_state.h64bits_reset_with_seed state seed 119 | | Some secret, None -> Xxh3_state.h64bits_reset_with_secret state secret 120 | | Some seed, Some secret -> 121 | Xxh3_state.h64bits_reset_with_secret_and_seed state seed secret 122 | end; 123 | state 124 | 125 | let update state s = 126 | let b = Bytes.Slice.bytes s in 127 | let first = Bytes.Slice.first s and last = Bytes.Slice.length s in 128 | Xxh3_state.h64bits_update state b first last 129 | 130 | let copy src = 131 | let dst = Xxh3_state.create () in 132 | Xxh3_state.copy ~dst ~src; dst 133 | end 134 | 135 | let id = "xxh3-64" 136 | let length = 8 137 | type seed = Xxh3_state.seed 138 | type secret = Xxh3_state.secret 139 | type t = Xxh3_state.h64_hash 140 | 141 | let value = Xxh3_state.h64bits_digest 142 | let bytes ?(seed = Xxh3_state.no_seed) b = 143 | Xxh3_state.seeded_64hash b 0 (Bytes.length b) seed 144 | 145 | let string ?seed s = bytes ?seed (Bytes.unsafe_of_string s) 146 | 147 | let slice ?(seed = Xxh3_state.no_seed) s = 148 | let b = Bytes.Slice.bytes s in 149 | Xxh3_state.seeded_64hash b (Bytes.Slice.first s) (Bytes.Slice.length s) seed 150 | 151 | let reader ?seed r = 152 | let rec loop state r = match Bytes.Reader.read r with 153 | | s when Bytes.Slice.is_eod s -> value state 154 | | s -> State.update state s; loop state r 155 | in 156 | loop (State.make ?seed ()) r 157 | 158 | let reads ?(state = State.make ()) r = 159 | Bytes.Reader.tap (State.update state) r, state 160 | 161 | let writes ?(state = State.make ()) w = 162 | Bytes.Writer.tap (State.update state) w, state 163 | 164 | let equal = Int64.equal 165 | let compare = Int64.compare 166 | let of_binary_string s = 167 | match Bytesrw_hex.check_binary_string_length ~length s with 168 | | Error _ as e -> e 169 | | Ok s -> Ok (layout (get_64u s 0)) 170 | 171 | let to_binary_string = u64_to_binary_string 172 | let of_hex s = match Bytesrw_hex.to_binary_string ~length s with 173 | | Error _ as e -> e 174 | | Ok s -> Ok (layout (get_64u s 0)) 175 | 176 | let to_hex h = Printf.sprintf "%016Lx" h 177 | let to_uint64 = Fun.id 178 | let of_uint64 = Fun.id 179 | let pp ppf h = Format.fprintf ppf "%016Lx" h 180 | end 181 | 182 | module Xxh3_128 = struct 183 | module State = struct 184 | type t = Xxh3_state.t 185 | 186 | let make ?secret ?seed () = 187 | let state = Xxh3_state.create () in 188 | begin match secret, seed with 189 | | None, None -> Xxh3_state.h128bits_reset state 190 | | None, Some seed -> Xxh3_state.h128bits_reset_with_seed state seed 191 | | Some secret, None -> Xxh3_state.h128bits_reset_with_secret state secret 192 | | Some seed, Some secret -> 193 | Xxh3_state.h128bits_reset_with_secret_and_seed state seed secret 194 | end; 195 | state 196 | 197 | let update state s = 198 | let b = Bytes.Slice.bytes s in 199 | let first = Bytes.Slice.first s and last = Bytes.Slice.length s in 200 | Xxh3_state.h64bits_update state b first last 201 | 202 | let copy src = 203 | let dst = Xxh3_state.create () in 204 | Xxh3_state.copy ~dst ~src; dst 205 | end 206 | 207 | let id = "xxh3-128" 208 | let length = 16 209 | type seed = Xxh3_state.seed 210 | type secret = Xxh3_state.secret 211 | type t = Xxh3_state.h128_hash 212 | 213 | let value = Xxh3_state.h128bits_digest 214 | 215 | let bytes ?(seed = Xxh3_state.no_seed) b = 216 | Xxh3_state.seeded_128hash b 0 (Bytes.length b) seed 217 | 218 | let string ?seed s = bytes ?seed (Bytes.unsafe_of_string s) 219 | 220 | let slice ?(seed = Xxh3_state.no_seed) s = 221 | let b = Bytes.Slice.bytes s in 222 | Xxh3_state.seeded_128hash 223 | b (Bytes.Slice.first s) (Bytes.Slice.length s) seed 224 | 225 | let reader ?seed r = 226 | let rec loop state r = match Bytes.Reader.read r with 227 | | s when Bytes.Slice.is_eod s -> value state 228 | | s -> State.update state s; loop state r 229 | in 230 | loop (State.make ?seed ()) r 231 | 232 | let reads ?(state = State.make ()) r = 233 | Bytes.Reader.tap (State.update state) r, state 234 | 235 | let writes ?(state = State.make ()) w = 236 | Bytes.Writer.tap (State.update state) w, state 237 | 238 | let equal = String.equal 239 | let compare = String.compare 240 | let of_binary_string s = Bytesrw_hex.check_binary_string_length ~length s 241 | let to_binary_string = Fun.id 242 | let of_hex s = Bytesrw_hex.to_binary_string ~length s 243 | let pp = Bytesrw_hex.pp_binary_string 244 | let to_hex = Bytesrw_hex.of_binary_string 245 | end 246 | -------------------------------------------------------------------------------- /src/xxhash/bytesrw_xxhash.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** [XXH3-64] and [XXH3-128] hashes. 7 | 8 | This module provides support for the {{:https://xxhash.com/}XXH3} 9 | hash family with the [libxxhash] C library. *) 10 | 11 | open Bytesrw 12 | 13 | (** The type for [XXH3] hash functions. *) 14 | module type Xxh3 = sig 15 | 16 | (** {1:hashes Hashes} *) 17 | 18 | val id : string 19 | (** [id] identifies the hash function. *) 20 | 21 | val length : int 22 | (** [length] is the byte length of hashes produced by the function. *) 23 | 24 | type seed = int64 25 | (** The type for seeds. *) 26 | 27 | type secret = string 28 | (** The type for secrets. *) 29 | 30 | type t 31 | (** The type for hashes. *) 32 | 33 | (** Hash state. *) 34 | module State : sig 35 | type t 36 | (** The type for hash state. *) 37 | 38 | val make : ?secret:secret -> ?seed:seed -> unit -> t 39 | (** [make ?secret ?seed ()] is an initial hash state with given 40 | parameters. If unspecified the hash is unseeded and there is 41 | no secret. 42 | 43 | Raises [Invalid_argument] if your secret is smaller than 44 | {!Bytesrw_xxhash.xxh3_secret_size_min}. *) 45 | 46 | val update : t -> Bytes.Slice.t -> unit 47 | (** [update state slice] updates [state] with the bytes in the range of 48 | [slice]. *) 49 | 50 | val copy : t -> t 51 | (** [copy t] is a copy of [t]. *) 52 | end 53 | 54 | val value : State.t -> t 55 | (** [value state] is the hash of [state]. This has no effect on 56 | [state] which can still be {!State.update}d. *) 57 | 58 | (** {1:hashing Hashing} *) 59 | 60 | val string : ?seed:seed -> string -> t 61 | (** [string s] is the hash of [s] with seed [seed] (if any). *) 62 | 63 | val bytes : ?seed:seed -> bytes -> t 64 | (** [bytes b] is the hash of [b] with seed [seed] (if any). *) 65 | 66 | val slice : ?seed:seed -> Bytes.Slice.t -> t 67 | (** [slice s] is the hash of the bytes in the range of [s] with seed 68 | [seed] (if any). *) 69 | 70 | val reader : ?seed:seed -> Bytes.Reader.t -> t 71 | (** [reader r] is the hash of the stream [r] with seed [seed] (if any). 72 | This consumes the reader. See also {!reads}. *) 73 | 74 | (** {1:streaming Hashing streams} *) 75 | 76 | val reads : ?state:State.t -> Bytes.Reader.t -> Bytes.Reader.t * State.t 77 | (** [reads r] is [hr, hstate] with: 78 | {ul 79 | {- [hr] a reader that taps the reads of [r] to update [hstate].} 80 | {- [hstate], a hash state of the reads made on [hr] so 81 | far. This is [state] if explicitely given, otherwise 82 | defaults to a fresh {!State.make}.}} 83 | To get intermediate or final hash results use {!value} on 84 | [hstate]. *) 85 | 86 | val writes : ?state:State.t -> Bytes.Writer.t -> Bytes.Writer.t * State.t 87 | (** [writes ?state w] is [hw, hstate] with: 88 | {ul 89 | {- [hw] a writer that taps the writes to update [hstate] before 90 | giving them to [w].} 91 | {- [hstate], a hash state of the writes made on [hw] so 92 | far. This is [state] if explicitely given, otherwise 93 | defaults to a fresh {!State.make}.}} 94 | To get intermediate or final hash results use {!value} on 95 | [hstate]. *) 96 | 97 | (** {1:preds Predicates and comparisons} *) 98 | 99 | val equal : t -> t -> bool 100 | (** [equal h0 h1] is [true] iff [h0] and [h1] are equal. *) 101 | 102 | val compare : t -> t -> int 103 | (** [compare] is a total order on hashes compatible with {!equal}. *) 104 | 105 | (** {1:converting Converting} *) 106 | 107 | val to_binary_string : t -> string 108 | (** [to_binary_string h] is a big-endian binary representation 109 | of [h] of length {!length}. *) 110 | 111 | val of_binary_string : string -> (t, string) result 112 | (** [of_binary_string s] is a hash from the big-endian binary 113 | representation stored in [s]. *) 114 | 115 | val to_hex : t -> string 116 | (** [to_hex h] is the binary representation of [h] using lowercase 117 | US-ASCII hex digits. *) 118 | 119 | val of_hex : string -> (t, string) result 120 | (** [of_hex s] parses a sequence of hex digits into a hash. *) 121 | 122 | val pp : Format.formatter -> t -> unit 123 | (** [pp] formats hashes for inspection. *) 124 | end 125 | 126 | (** [XXH3-64] hash. *) 127 | module Xxh3_64 : sig 128 | include Xxh3 (** @inline *) 129 | 130 | val to_uint64 : t -> int64 131 | (** [to_int64 h] is [h] as an unsigned [int64] number. *) 132 | 133 | val of_uint64 : int64 -> t 134 | (** [of_uint64 u] is [u] as a hash. *) 135 | end 136 | 137 | 138 | (** [XXH3-128] hash. *) 139 | module Xxh3_128 : Xxh3 140 | 141 | (** {1:library Library parameters} *) 142 | 143 | val version : unit -> string 144 | (** [version ()] is the version of the [xxhash] C library. *) 145 | 146 | val xxh3_secret_size_min : unit -> int 147 | (** [xxh3_secret_size_min ()] is the bare minimum size for a custom 148 | secret. *) 149 | -------------------------------------------------------------------------------- /src/xxhash/bytesrw_xxhash.mllib: -------------------------------------------------------------------------------- 1 | Bytesrw_xxhash -------------------------------------------------------------------------------- /src/xxhash/bytesrw_xxhash_stubs.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*/ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | /* For the _withSecretandSeed funs, we need this. Also when the minimal 11 | version is bumped conditionals in these stubs can be removed. */ 12 | 13 | #define XXH_PRIVATE_API /* For XXH_CPU_LITTLE_ENDIAN */ 14 | #define XXH_STATIC_LINKING_ONLY 15 | 16 | #include 17 | 18 | #if XXH_VERSION_NUMBER < 800 19 | #error "Unsupported libxxhash version, at least 0.8.0 is needed" 20 | #endif 21 | 22 | #define XXH3_state_t_val(v) (*((XXH3_state_t **) Data_custom_val(v))) 23 | #define _BE64(v) (XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(v) : v) 24 | 25 | /* Library parameters */ 26 | 27 | CAMLprim value ocaml_bytesrw_XXH_versionNumber (value unit) 28 | { return Val_int (XXH_VERSION_NUMBER); } 29 | 30 | CAMLprim value ocaml_bytesrw_XXH3_SECRET_SIZE_MIN (value unit) 31 | { return Val_int (XXH3_SECRET_SIZE_MIN); } 32 | 33 | /* Hashing bytes */ 34 | 35 | CAMLprim value ocaml_bytesrw_XXH3_64bits_withSeed 36 | (value str, value ofs, value len, value seed) 37 | { 38 | XXH64_hash_t h = XXH3_64bits_withSeed (Bp_val (str) + Int_val (ofs), 39 | Int_val (len), Int64_val (seed)); 40 | return (caml_copy_int64 (h)); 41 | } 42 | 43 | CAMLprim value ocaml_bytesrw_XXH3_128bits_withSeed 44 | (value str, value ofs, value len, value seed) 45 | { 46 | XXH128_hash_t h = XXH3_128bits_withSeed (Bp_val (str) + Int_val (ofs), 47 | Int_val (len), Int64_val (seed)); 48 | value res = caml_alloc_string (16); 49 | ((XXH64_hash_t *)Bp_val(res))[0] = _BE64 (h.high64); 50 | ((XXH64_hash_t *)(Bp_val(res)))[1] = _BE64 (h.low64); 51 | return res; 52 | } 53 | 54 | /* XXH3_state_t */ 55 | 56 | static const char ocaml_bytesrw_err_secret_too_small[] = "Secret too small"; 57 | 58 | void ocaml_bytesrw_finalize_XXH3_state_t (value hst) 59 | { XXH3_freeState (XXH3_state_t_val (hst)); } 60 | 61 | CAMLprim value ocaml_bytesrw_XXH3_createState (value unit) 62 | { 63 | XXH3_state_t *st = XXH3_createState (); 64 | if (!st) caml_failwith ("Could not allocate XXH3_state_t"); 65 | value hst = caml_alloc_final (1, &ocaml_bytesrw_finalize_XXH3_state_t, 0, 1); 66 | XXH3_state_t_val(hst) = st; 67 | return hst; 68 | } 69 | 70 | CAMLprim value ocaml_bytesrw_XXH3_copyState (value dst, value src) 71 | { 72 | XXH3_copyState (XXH3_state_t_val (dst), XXH3_state_t_val (src)); 73 | return Val_unit; 74 | } 75 | 76 | /* XXH3_64 */ 77 | 78 | CAMLprim value ocaml_bytesrw_XXH3_64bits_reset (value hst) 79 | { XXH3_64bits_reset (XXH3_state_t_val (hst)); return Val_unit; } 80 | 81 | CAMLprim value ocaml_bytesrw_XXH3_64bits_reset_withSeed 82 | (value hst, value seed) 83 | { 84 | XXH3_64bits_reset_withSeed (XXH3_state_t_val (hst), Int64_val (seed)); 85 | return Val_unit; 86 | } 87 | 88 | CAMLprim value ocaml_bytesrw_XXH3_64bits_reset_withSecret 89 | (value hst, value secret) 90 | { 91 | XXH_errorcode rc = 92 | XXH3_64bits_reset_withSecret (XXH3_state_t_val (hst), String_val (secret), 93 | caml_string_length (secret)); 94 | if (rc != XXH_OK) caml_invalid_argument (ocaml_bytesrw_err_secret_too_small); 95 | return Val_unit; 96 | } 97 | 98 | CAMLprim value ocaml_bytesrw_XXH3_64bits_reset_withSecretandSeed 99 | (value hst, value secret, value seed) 100 | { 101 | #if XXH_VERSION_NUMBER < 801 102 | caml_raise_sys_error 103 | (caml_copy_string ("Need libxxhash >= 0.8.1 to support secret and hash")); 104 | #else 105 | XXH_errorcode rc = 106 | XXH3_64bits_reset_withSecretandSeed(XXH3_state_t_val (hst), 107 | String_val (secret), 108 | caml_string_length (secret), 109 | Int64_val (seed)); 110 | if (rc != XXH_OK) caml_invalid_argument (ocaml_bytesrw_err_secret_too_small); 111 | #endif 112 | return Val_unit; 113 | } 114 | 115 | CAMLprim value ocaml_bytesrw_XXH3_64bits_update 116 | (value hst, value str, value ofs, value len) 117 | { 118 | XXH3_64bits_update (XXH3_state_t_val (hst), Bp_val (str) + Int_val (ofs), 119 | Int_val (len)); 120 | return Val_unit; 121 | } 122 | 123 | CAMLprim value ocaml_bytesrw_XXH3_64bits_digest (value hst) 124 | { return caml_copy_int64 (XXH3_64bits_digest (XXH3_state_t_val (hst))); } 125 | 126 | /* XXH3_128 */ 127 | 128 | CAMLprim value ocaml_bytesrw_XXH3_128bits_reset (value hst) 129 | { XXH3_128bits_reset (XXH3_state_t_val (hst)); return Val_unit; } 130 | 131 | CAMLprim value ocaml_bytesrw_XXH3_128bits_reset_withSeed 132 | (value hst, value seed) 133 | { 134 | XXH3_128bits_reset_withSeed (XXH3_state_t_val (hst), Int64_val (seed)); 135 | return Val_unit; 136 | } 137 | 138 | CAMLprim value ocaml_bytesrw_XXH3_128bits_reset_withSecret 139 | (value hst, value secret) 140 | { 141 | XXH_errorcode rc = 142 | XXH3_128bits_reset_withSecret (XXH3_state_t_val (hst), String_val (secret), 143 | caml_string_length (secret)); 144 | if (rc != XXH_OK) caml_invalid_argument (ocaml_bytesrw_err_secret_too_small); 145 | return Val_unit; 146 | } 147 | 148 | CAMLprim value ocaml_bytesrw_XXH3_128bits_reset_withSecretandSeed 149 | (value hst, value secret, value seed) 150 | { 151 | #if XXH_VERSION_NUMBER < 801 152 | caml_raise_sys_error 153 | (caml_copy_string ("Need libxxhash >= 0.8.1 to support secret and hash")); 154 | #else 155 | XXH_errorcode rc = 156 | XXH3_128bits_reset_withSecretandSeed(XXH3_state_t_val (hst), 157 | String_val (secret), 158 | caml_string_length (secret), 159 | Int64_val (seed)); 160 | if (rc != XXH_OK) caml_invalid_argument (ocaml_bytesrw_err_secret_too_small); 161 | #endif 162 | return Val_unit; 163 | } 164 | 165 | CAMLprim value ocaml_bytesrw_XXH3_128bits_update 166 | (value hst, value str, value ofs, value len) 167 | { 168 | XXH3_128bits_update (XXH3_state_t_val (hst), Bp_val (str) + Int_val (ofs), 169 | Int_val (len)); 170 | return Val_unit; 171 | } 172 | 173 | CAMLprim value ocaml_bytesrw_XXH3_128bits_digest (value hst) 174 | { 175 | XXH128_hash_t h = XXH3_128bits_digest (XXH3_state_t_val (hst)); 176 | value res = caml_alloc_string (16); 177 | ((XXH64_hash_t *)Bp_val(res))[0] = _BE64 (h.high64); 178 | ((XXH64_hash_t *)(Bp_val(res)))[1] = _BE64 (h.low64); 179 | return res; 180 | } 181 | -------------------------------------------------------------------------------- /src/xxhash/libbytesrw_xxhash_stubs.clib: -------------------------------------------------------------------------------- 1 | bytesrw_xxhash_stubs.o -------------------------------------------------------------------------------- /src/zlib/bytesrw_zlib.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** [deflate], [zlib] and [gzip] streams. 7 | 8 | This module provides support for reading and writing 9 | {{:https://www.rfc-editor.org/rfc/rfc1951}[deflate]}, 10 | {{:https://www.rfc-editor.org/rfc/rfc1950}[zlib]} and 11 | {{:https://www.rfc-editor.org/rfc/rfc1952}[gzip]} 12 | streams with the {{:https://zlib.net/}[zlib]} C library. 13 | 14 | {b Slice lengths.} The slice length of readers created by 15 | filters of this module defaults to {!default_slice_length}. The hinted 16 | slice length of writers created by filters of this module defaults to 17 | {!default_slice_length} and they write on their writers with 18 | slices that respect their desires. 19 | 20 | {b Positions.} The position of readers and writers created 21 | by filters of this module default to [0]. *) 22 | 23 | open Bytesrw 24 | 25 | (** {1:errors Errors} *) 26 | 27 | type Bytes.Stream.error += Error of string (** *) 28 | (** The type for [deflate], [zlib] and [gzip] stream errors. 29 | 30 | Except for the {{!library}library parameters}, all functions of this 31 | module and resulting reader and writers may raise 32 | {!Bytesrw.Bytes.Stream.Error} with this error. *) 33 | 34 | type level = int 35 | (** The type for compression levels. 36 | 37 | An integer between [-1] and [9], see these 38 | {{!compression_level}constants}. *) 39 | 40 | (** {1:streams Streams} *) 41 | 42 | (** {{:https://www.rfc-editor.org/rfc/rfc1951}[deflate]} streams. *) 43 | module Deflate : sig 44 | 45 | (** {1:decompress Decompress} *) 46 | 47 | val decompress_reads : ?leftover:bool -> unit -> Bytes.Reader.filter 48 | (** [decompress_reads () r] filters the reads of [r] by decompressing 49 | a [deflate] stream. If [leftover] is: 50 | {ul 51 | {- [false] (default), the reader errors if there is leftover data after 52 | the end of the [deflate] stream.} 53 | {- [true] the reader decompresses one [deflate] stream. Once the 54 | reader returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly 55 | after the end of the [deflate] stream and can be read again to 56 | perform other non-filtered reads.}} *) 57 | 58 | val decompress_writes : unit -> Bytes.Writer.filter 59 | (** [decompress_writes () w ~eod] filters writes on [w] by decompressing 60 | a [deflate] stream until {!Bytesrw.Bytes.Slice.eod} is written, if 61 | leftover data remains an error is raised. If [eod] is [false], 62 | the last {!Bytesrw.Bytes.Slice.eod} is not written on [w] and at this 63 | point [w] can be used again to perform other non-filtered 64 | writes. *) 65 | 66 | (** {1:compress Compress} *) 67 | 68 | val compress_reads : ?level:level -> unit -> Bytes.Reader.filter 69 | (** [compress_reads ~level () r] filters the reads of [r] by compressing 70 | them to a [deflate] stream at level [level] (defaults to 71 | {!default_compression}). *) 72 | 73 | val compress_writes : ?level:level -> unit -> Bytes.Writer.filter 74 | (** [compress_writes ~level w ~eod] filters writes on [w] by compressing 75 | them to a [deflate] stream at level [level] (defaults to 76 | {!default_compression}) until {!Bytesrw.Bytes.Slice.eod} is written. 77 | If [eod] is false, the latter is not written on [w] and at that point 78 | [w] can be used again to perform non-filtered writes. *) 79 | end 80 | 81 | (** {{:https://www.rfc-editor.org/rfc/rfc1950}[zlib]} streams. *) 82 | module Zlib : sig 83 | 84 | (** {1:decompress Decompress} *) 85 | 86 | val decompress_reads : ?leftover:bool -> unit -> Bytes.Reader.filter 87 | (** [decompress_reads () r] filters the reads of [r] by decompressing 88 | a [zlib] stream. If [leftover] is: 89 | {ul 90 | {- [false] (default), the reader errors if there is leftover data after 91 | the end of the [zlib] stream.} 92 | {- [true] the reader decompresses one [zlib] stream. Once the 93 | reader returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly 94 | after the end of the [zlib] stream and can be read again to 95 | perform other non-filtered reads.}} *) 96 | 97 | val decompress_writes : unit -> Bytes.Writer.filter 98 | (** [decompress_writes () w ~eod] filters writes on [w] by decompressing a 99 | [zlib] stream until {!Bytesrw.Bytes.Slice.eod} is written, if leftover 100 | data remains an error is raised. If [eod] is [false] the last 101 | {!Bytes.Slice.eod} is not written on [w] and at this point [w] can be 102 | used again to perform othe non-filtered writes. *) 103 | 104 | (** {1:compress Compress} *) 105 | 106 | val compress_reads : ?level:level -> unit -> Bytes.Reader.filter 107 | (** [compress_reads ~level () r] filters the reads of [r] by compressing 108 | them to a [zlib] stream at level [level] (defaults to 109 | {!default_compression}). *) 110 | 111 | val compress_writes : ?level:level -> unit -> Bytes.Writer.filter 112 | (** [compress_writes ~level () w ~eod] filters writes on [w] by compressing 113 | them to a [zlib] stream at level [level] (defaults to 114 | {!default_compression}) until {!Bytesrw.Bytes.Slice.eod} is written. 115 | If [eod] is [false], the latter is not written on [w] and at that point 116 | [w] can be used again to perform non-filtered writes. *) 117 | end 118 | 119 | (** {{:https://www.rfc-editor.org/rfc/rfc1952}[gzip]} streams. 120 | 121 | {b Note.} In general a [gzip] stream can be made of multiple, 122 | independently compressed, members. The way the module handles [gzip] 123 | member headers is described {{!Gzip.member_headers}here}. *) 124 | module Gzip : sig 125 | 126 | (** {1:decompress Decompress} *) 127 | 128 | val decompress_reads : ?all_members:bool -> unit -> Bytes.Reader.filter 129 | (** [decompress_reads () r] filters the reads of [r] by decompressing [gzip] 130 | members. If [all_members] is 131 | {ul 132 | {- [true] (default), this concatenates decompressed sequences of [gzip] 133 | members like [gunzip] would do and errors if there is leftover data.} 134 | {- [false] this decompresses a single [gzip] member. Once the resulting 135 | reader returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly 136 | after the end of the gzip member and can be used again to perform 137 | other non-filtered reads (e.g. a new [gzip] member or other unrelated 138 | data).}} *) 139 | 140 | val decompress_writes : unit -> Bytes.Writer.filter 141 | (** [decompress_writes () w ~eod] filters the writes on [w] by 142 | decompressing sequences of [gzip] members until 143 | [Bytesrw.Bytes.Slice.eod] is written. If [eod] is [false], the 144 | latter is not written on [w] and at this point [w] can be used 145 | again to perform other non-filtered writes. *) 146 | 147 | (** {1:compress Compress} *) 148 | 149 | val compress_reads : ?level:level -> unit -> Bytes.Reader.filter 150 | (** [compress_reads ~level () r] filters the reads of [r] by 151 | compressing them to as a single [gzip] member at level [level] 152 | (defaults to {!default_compression}). *) 153 | 154 | val compress_writes : ?level:level -> unit -> Bytes.Writer.filter 155 | (** [compress_writes ~level () w ~eod] filters the writes on [w] by 156 | compressing them to a single [gzip] member at level [level] 157 | (defaults to {!default_compression}) until 158 | {!Bytesrw.Bytes.Slice.eod} is written. If [eod] is [false] the 159 | latter is not written on [w] and at this point [w] can be used 160 | again to perform other non-filtered writes. *) 161 | 162 | (** {1:member_headers Member headers} 163 | 164 | Currently no facility is provided to access [gzip] member 165 | headers. It seems those are little used in practice. However 166 | support is provided to read and write [gzip] streams 167 | member-by-member which is used by certain formats. 168 | 169 | On compression the member's header generated in the stream is 170 | [zlib]'s default header; see documentation of [deflateSetHeader] 171 | in the {{:https://www.zlib.net/manual.html} the manual}. Note that 172 | this watermarks the operating system in the stream (at least in 173 | v1.3.1 as of writing). *) 174 | end 175 | 176 | (** {1:library Library parameters} *) 177 | 178 | val version : unit -> string 179 | (** [version ()] is the version of the [zlib] C library. *) 180 | 181 | val default_slice_length : int 182 | (** [default_slice_length] is [128KB]. Note, this choice is made by 183 | [Bytesrw_zlib] not the [zlib] library. *) 184 | 185 | (** {2:compression_level Compression levels} *) 186 | 187 | val default_compression : level 188 | (** [default_compression] is [-1], the default compression level. The 189 | resulting level depends on the [zlib] library. *) 190 | 191 | val no_compression : level 192 | (** [no_compression] is [0], indicates no compression. *) 193 | 194 | val best_speed : level 195 | (** [best_speed] is [1], indicates fastest compression. *) 196 | 197 | val best_compression : level 198 | (** [best_compression] is [9], indicates best compression. *) 199 | -------------------------------------------------------------------------------- /src/zlib/bytesrw_zlib.mllib: -------------------------------------------------------------------------------- 1 | Bytesrw_zlib -------------------------------------------------------------------------------- /src/zlib/bytesrw_zlib_stubs.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*/ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #if ZLIB_VERNUM < 0x12b0 15 | #error "Unsupported zlib version, at least 1.2.11 is needed" 16 | #endif 17 | 18 | #define z_streamp_val(v) (*((z_streamp *) Data_custom_val(v))) 19 | 20 | /* OCaml Bytesrw_zlib.flush value map */ 21 | 22 | static int ocaml_zlib_flush[] = 23 | { Z_NO_FLUSH, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH, Z_FINISH, 24 | Z_BLOCK, Z_TREES }; 25 | 26 | /* OCaml Bytesrw_zlib.Zbuf.t value fields */ 27 | 28 | enum ocaml_zbuf_fields 29 | { ocaml_zbuf_bytes = 0, ocaml_zbuf_size, ocaml_zbuf_pos }; 30 | 31 | /* Library parameters */ 32 | 33 | CAMLprim value ocaml_bytesrw_zlib_version (value unit) 34 | { return (caml_copy_string (zlibVersion ())); } 35 | 36 | /* Inflate */ 37 | 38 | void ocaml_bytesrw_finalize_inflate_z_stream (value zs) 39 | { 40 | z_streamp s = z_streamp_val (zs); 41 | if (s != NULL) { 42 | inflateEnd (s); caml_stat_free (s); 43 | z_streamp_val (zs) = NULL; 44 | } 45 | } 46 | 47 | CAMLprim value ocaml_bytesrw_free_inflate_z_stream (value zs) 48 | { ocaml_bytesrw_finalize_inflate_z_stream (zs); return Val_unit; } 49 | 50 | CAMLprim value ocaml_bytesrw_create_inflate_z_stream (value window_bits) 51 | { 52 | z_streamp s = caml_stat_alloc (sizeof (z_stream)); 53 | if (s == NULL) caml_failwith ("Could not allocate z_stream"); 54 | s->zalloc = Z_NULL; s->zfree = Z_NULL; s->opaque = Z_NULL; 55 | s->avail_in = 0; s->next_in = Z_NULL; 56 | 57 | int rc = inflateInit2(s, Int_val (window_bits)); 58 | if (rc != Z_OK) { 59 | value err = caml_copy_string( (s->msg) ? s->msg : 60 | "Unknown inflateInit2 error"); 61 | caml_stat_free (s); 62 | caml_failwith_value (err); 63 | } 64 | value zs = caml_alloc_final (1, &ocaml_bytesrw_finalize_inflate_z_stream, 65 | 0, 1); 66 | z_streamp_val (zs) = s; 67 | return zs; 68 | } 69 | 70 | CAMLprim value ocaml_bytesrw_inflate_reset (value zs) 71 | { 72 | z_streamp s = z_streamp_val (zs); 73 | int rc = inflateReset (s); 74 | if (rc != Z_OK) 75 | { caml_failwith ((s->msg) ? s->msg : "Unknown inflateReset error"); } 76 | return Val_unit; 77 | } 78 | 79 | CAMLprim value ocaml_bytesrw_inflate 80 | (value zs, value src, value dst) 81 | { 82 | z_streamp s = z_streamp_val (zs); 83 | 84 | size_t in_pos = Int_val (Field (src, ocaml_zbuf_pos)); 85 | size_t in_size = Int_val (Field (src, ocaml_zbuf_size)); 86 | s->next_in = Bytes_val (Field (src, ocaml_zbuf_bytes)) + in_pos; 87 | s->avail_in = in_size - in_pos; 88 | 89 | size_t out_pos = Int_val (Field (dst, ocaml_zbuf_pos)); 90 | size_t out_size = Int_val (Field (dst, ocaml_zbuf_size)); 91 | s->next_out = Bytes_val (Field (dst, ocaml_zbuf_bytes)) + out_pos; 92 | s->avail_out = out_size - out_pos; 93 | 94 | int rc = inflate (s, Z_NO_FLUSH); 95 | if (rc != Z_OK && rc != Z_STREAM_END && rc != Z_BUF_ERROR) 96 | { caml_failwith ((s->msg) ? s->msg : "Unknown inflate error"); } 97 | 98 | size_t in_consumed = in_size - in_pos - s->avail_in; 99 | size_t out_consumed = out_size - out_pos - s->avail_out; 100 | 101 | Store_field (src, ocaml_zbuf_pos, Val_int (in_pos + in_consumed)); 102 | Store_field (dst, ocaml_zbuf_pos, Val_int (out_pos + out_consumed)); 103 | 104 | return Val_bool (rc == Z_STREAM_END); 105 | } 106 | 107 | /* Deflate */ 108 | 109 | void ocaml_bytesrw_finalize_deflate_z_stream (value zs) 110 | { 111 | z_streamp s = z_streamp_val (zs); 112 | if (s != NULL) { 113 | deflateEnd (s); caml_stat_free (s); 114 | z_streamp_val (zs) = NULL; 115 | } 116 | } 117 | 118 | CAMLprim value ocaml_bytesrw_free_deflate_z_stream (value zs) 119 | { ocaml_bytesrw_finalize_deflate_z_stream (zs); return Val_unit; } 120 | 121 | CAMLprim value ocaml_bytesrw_create_deflate_z_stream 122 | (value level, value window_bits) 123 | { 124 | z_streamp s = caml_stat_alloc (sizeof (z_stream)); 125 | if (s == NULL) caml_failwith ("Could not allocate z_stream"); 126 | s->zalloc = Z_NULL; s->zfree = Z_NULL; s->opaque = Z_NULL; 127 | s->avail_in = 0; s->next_in = Z_NULL; 128 | 129 | int rc = deflateInit2(s, Int_val (level), Z_DEFLATED, Int_val (window_bits), 130 | 8, Z_DEFAULT_STRATEGY); 131 | if (rc != Z_OK) { 132 | value err = caml_copy_string( (s->msg) ? s->msg : 133 | "Unknown deflateInit2 error"); 134 | caml_stat_free (s); 135 | caml_failwith_value (err); 136 | } 137 | value zs = caml_alloc_final (1, &ocaml_bytesrw_finalize_deflate_z_stream, 138 | 0, 1); 139 | z_streamp_val (zs) = s; 140 | return zs; 141 | } 142 | 143 | CAMLprim value ocaml_bytesrw_deflate_reset (value zs) 144 | { 145 | z_streamp s = z_streamp_val (zs); 146 | int rc = deflateReset (s); 147 | if (rc != Z_OK) 148 | { caml_failwith ((s->msg) ? s->msg : "Unknown deflateReset error"); } 149 | return Val_unit; 150 | } 151 | 152 | CAMLprim value ocaml_bytesrw_deflate 153 | (value zs, value src, value dst, value flush_op) 154 | { 155 | z_streamp s = z_streamp_val (zs); 156 | 157 | size_t in_pos = Int_val (Field (src, ocaml_zbuf_pos)); 158 | size_t in_size = Int_val (Field (src, ocaml_zbuf_size)); 159 | s->next_in = Bytes_val (Field (src, ocaml_zbuf_bytes)) + in_pos; 160 | s->avail_in = in_size - in_pos; 161 | 162 | size_t out_pos = Int_val (Field (dst, ocaml_zbuf_pos)); 163 | size_t out_size = Int_val (Field (dst, ocaml_zbuf_size)); 164 | s->next_out = Bytes_val (Field (dst, ocaml_zbuf_bytes)) + out_pos; 165 | s->avail_out = out_size - out_pos; 166 | 167 | int rc = deflate (s, ocaml_zlib_flush [Int_val (flush_op)]); 168 | if (rc != Z_OK && rc != Z_STREAM_END && rc != Z_BUF_ERROR) 169 | { caml_failwith ((s->msg) ? s->msg : "Unknown deflate error"); } 170 | 171 | size_t in_consumed = in_size - in_pos - s->avail_in; 172 | size_t out_consumed = out_size - out_pos - s->avail_out; 173 | 174 | Store_field (src, ocaml_zbuf_pos, Val_int (in_pos + in_consumed)); 175 | Store_field (dst, ocaml_zbuf_pos, Val_int (out_pos + out_consumed)); 176 | 177 | return Val_bool (rc == Z_STREAM_END); 178 | } 179 | -------------------------------------------------------------------------------- /src/zlib/libbytesrw_zlib_stubs.clib: -------------------------------------------------------------------------------- 1 | bytesrw_zlib_stubs.o -------------------------------------------------------------------------------- /src/zstd/bytesrw_zstd.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** [zstd] streams. 7 | 8 | This module provides support for reading and writing 9 | {{:https://www.rfc-editor.org/rfc/rfc8878.html}[zstd]} compressed 10 | streams with the {{:http://zstd.net/}[libzstd]} C library. 11 | 12 | {b Positions.} The positions of readers and writers created 13 | by filters of this module default to [0]. *) 14 | 15 | open Bytesrw 16 | 17 | (** {1:errors Errors} *) 18 | 19 | type Bytes.Stream.error += Error of string (** *) 20 | (** The type for [zstd] stream errors. 21 | 22 | Except for the {{!lib}library parameters}, all functions of this 23 | module and resulting reader and writers may raise 24 | {!Bytesrw.Bytes.Stream.Error} with this error. *) 25 | 26 | (** {1:decompress Decompress} *) 27 | 28 | (** Decompression parameters. *) 29 | module Dctx_params : sig 30 | type t 31 | (** The type for decompression parameters. *) 32 | 33 | val make : ?init:t -> ?window_log_max:int -> unit -> t 34 | (** [make ()] are the given compression parameters. Those unspecfied 35 | take the value of [init] which defaults to {!default}. See 36 | corresponding accessors for the default values. *) 37 | 38 | val default : t 39 | (** [default] are the default paramaters. See accessors for the 40 | default values. *) 41 | 42 | val window_log_max : t -> int 43 | (** [window_log_max] is the maximum back-reference distance in power 44 | of two allowed for decoding. Errors if the stream requires 45 | more. [0] is the default maximum window. This can be used to 46 | limit the memory used for decoding. *) 47 | 48 | (**/**) 49 | val unsafe_param : int -> int -> t -> t 50 | (**/**) 51 | end 52 | 53 | (** Decompression dictionaries. *) 54 | module Ddict : sig 55 | type t 56 | (** The type for dictionaries. *) 57 | 58 | val of_binary_string : string -> t 59 | (** [of_binary_string s] is a dictionary from the binary data [s]. *) 60 | end 61 | 62 | val decompress_reads : 63 | ?all_frames:bool -> ?dict:Ddict.t -> ?params:Dctx_params.t -> unit -> 64 | Bytes.Reader.filter 65 | (** [decompress_reads () r] filters the reads of [r] by decompressing 66 | [zstd] frames. 67 | {ul 68 | {- [dict] is the decompression dictionary, if any.} 69 | {- [params] defaults to {!Dctx_params.default}} 70 | {- [slice_length] defaults to {!dstream_out_size}.} 71 | {- If you get to create [r] and it has no constraints on its own 72 | use {!dstream_in_size} for its slices.}} 73 | If [all_frames] is: 74 | {ul 75 | {- [true] (default), this decompressses all frames until [r] returns 76 | {!Bytesrw.Bytes.Slice.eod} and concatenates the result.} 77 | {- [false] this decompresses a single frame. Once the resulting reader 78 | returns {!Bytesrw.Bytes.Slice.eod}, [r] is positioned exactly after 79 | the end of frame and can be used again to perform other non-filtered 80 | reads (e.g. a new [zstd] frame or other unrelated data).}} *) 81 | 82 | val decompress_writes : 83 | ?dict:Ddict.t -> ?params:Dctx_params.t -> unit -> Bytes.Writer.filter 84 | (** [decompress_writes () w ~eod] filters the writes on [w] by decompressing 85 | sequences of [zstd] frames until {!Bytesrw.Bytes.Slice.eod} is written. 86 | If [eod] is [false] the last {!Bytesrw.Bytes.Slice.eod} is not written 87 | on [w] and at this point [w] can be used again to perform other non-filtered 88 | writes. 89 | {ul 90 | {- [dict] is the decompression dictionary, if any.} 91 | {- [params] defaults to {!Dctx_params.default}} 92 | {- [slice_length] defaults to {!dstream_in_size}} 93 | {- Compressed slice lengths abides to [w]'s desire but if you get to 94 | create it and it has no constraints on its own use 95 | {!dstream_out_size}.}} *) 96 | 97 | (** {1:compress Compress} 98 | 99 | {b Warning.} The default {!Cctx_params.default} compression 100 | parameters are those of the C library and do not perform 101 | checksums. If you want to compress so that the [zstd] command 102 | line tool can uncompress you need to checksum. See the 103 | example in the {{!page-index.quick}quick start}. *) 104 | 105 | (** Compression parameters. *) 106 | module Cctx_params : sig 107 | 108 | type clevel = int 109 | (** The type for compression levels. See {!val-clevel}. *) 110 | 111 | type t 112 | (** The type for compression parameters. *) 113 | 114 | val make : 115 | ?init:t -> ?checksum:bool -> ?clevel:clevel -> ?window_log:int -> unit -> t 116 | (** [make ()] are the given compression parameters. Those unspecfied 117 | take the value of [init] which defaults to {!default}. See 118 | corresponding accessors for the default values. *) 119 | 120 | val default : t 121 | (** [default] are the default parameters. See accessors for the 122 | default values. *) 123 | 124 | val checksum : t -> bool 125 | (** [checksum p] is [true] if frames are checksumed. Defaults 126 | to [false] {b Warning.} This mirrors the library default but does 127 | not mirror the [zstd] tool default. *) 128 | 129 | val clevel : t -> clevel 130 | (** [clevel p] is the compression level. Must be in 131 | the {!min_clevel} to {!max_clevel} range. Defaults 132 | to {!default_clevel}. [0] means default compression level. *) 133 | 134 | val window_log : t -> int 135 | (** [window_log] is the maximal allowed back-reference distance in power 136 | of [2]. [0] means default window log. *) 137 | 138 | (**/**) 139 | val unsafe_param : int -> int -> t -> t 140 | (**/**) 141 | end 142 | 143 | (** Compression dictionaries. *) 144 | module Cdict : sig 145 | type t 146 | (** The type for dictionaries. *) 147 | 148 | val of_binary_string : string -> t 149 | (** [of_binary_string s] is a dictionary from the binary data [s]. *) 150 | end 151 | 152 | val compress_reads : 153 | ?dict:Cdict.t -> ?params:Cctx_params.t -> unit -> Bytes.Reader.filter 154 | (** [compress_reads () r] filters the reads of [r] by compressing them 155 | to a single [zstd] frame. 156 | {ul 157 | {- [dict] is the compression dictionary, if any.} 158 | {- [params] defaults to {!Cctx_params.default}.} 159 | {- [slice_length] defaults to {!cstream_out_size}.} 160 | {- If you get to create [r] and it has no constraints on its own 161 | use {!cstream_in_size} for its slices.}} *) 162 | 163 | val compress_writes : 164 | ?dict:Cdict.t -> ?params:Cctx_params.t -> unit -> Bytes.Writer.filter 165 | (** [compress_writes () w ~eod] filters the writes on [w] by compressing them 166 | to a single [zstd] frame until {!Bytesrw.Bytes.Slice.eod} is written. 167 | If [eod] is [false] the last {!Bytesrw.Bytes.Slice.eod} is not written 168 | on [w] and at this point [w] can be used again to perform non-filtered 169 | writes. 170 | {ul 171 | {- [dict] is the compression dictionary, if any.} 172 | {- [params] defaults to {!Cctx_params.default}.} 173 | {- [slice_length] defaults to {!cstream_in_size}.} 174 | {- Decompressed slice length abides to [w]'s desire but if you get to 175 | create it and it has no constraints on its own use 176 | {!cstream_out_size}.}} *) 177 | 178 | (** {1:lib Library parameters} *) 179 | 180 | val version : unit -> string 181 | (** [version ()] is the version of the [libzstd] C library. *) 182 | 183 | val min_clevel : unit -> Cctx_params.clevel 184 | (** [min_clevel ()] is the minimum negative compression level allowed. *) 185 | 186 | val max_clevel : unit -> Cctx_params.clevel 187 | (** [max_clevel ()] is the maximum compression level available. *) 188 | 189 | val default_clevel : unit -> Cctx_params.clevel 190 | (** [default_clevel ()] is the default compression level. *) 191 | 192 | val cstream_in_size : unit -> int 193 | (** [cstream_in_size ()] is the recommended length of input slices 194 | on compression. *) 195 | 196 | val cstream_out_size : unit -> int 197 | (** [cstream_out_size ()] is the recommended length of output slices 198 | on compression. *) 199 | 200 | val dstream_in_size : unit -> int 201 | (** [dstream_in_size ()] is the recommended length of input slices 202 | on decompression. *) 203 | 204 | val dstream_out_size : unit -> int 205 | (** [dstream_out_size ()] is the recommended length of output 206 | slices on decompression. *) 207 | -------------------------------------------------------------------------------- /src/zstd/bytesrw_zstd.mllib: -------------------------------------------------------------------------------- 1 | Bytesrw_zstd -------------------------------------------------------------------------------- /src/zstd/bytesrw_zstd_stubs.c: -------------------------------------------------------------------------------- 1 | /*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*/ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #if ZSTD_VERSION_NUMBER < 10400 15 | #error "Unsupported libzstd version, at least 1.4.0 is needed" 16 | #endif 17 | 18 | #define ZSTD_CCtx_val(v) (*((ZSTD_CCtx **) Data_custom_val(v))) 19 | #define ZSTD_DCtx_val(v) (*((ZSTD_DCtx **) Data_custom_val(v))) 20 | 21 | /* OCaml Bytesrw_zstd.end_directive value map */ 22 | 23 | static ZSTD_EndDirective ocaml_zstd_end_directive[] = 24 | { ZSTD_e_continue, ZSTD_e_flush, ZSTD_e_end}; 25 | 26 | /* OCaml Bytesrw_zstd.c_parameter value map */ 27 | 28 | static ZSTD_dParameter ocaml_zstd_d_parameter[] = 29 | { ZSTD_d_windowLogMax}; 30 | 31 | /* OCaml Bytesrw_zstd.c_parameter value map */ 32 | 33 | static ZSTD_cParameter ocaml_zstd_c_parameter[] = 34 | { ZSTD_c_compressionLevel, ZSTD_c_windowLog, ZSTD_c_checksumFlag}; 35 | 36 | /* OCaml Zbuf.t value fields */ 37 | 38 | enum ocaml_zbuf_fields 39 | { ocaml_zbuf_bytes = 0, ocaml_zbuf_size, ocaml_zbuf_pos }; 40 | 41 | /* Library parameters */ 42 | 43 | CAMLprim value ocaml_bytesrw_ZSTD_versionString (value unit) 44 | { return (caml_copy_string (ZSTD_versionString ())); } 45 | 46 | CAMLprim value ocaml_bytesrw_ZSTD_minCLevel (value unit) 47 | { return (Val_int (ZSTD_minCLevel ())); } 48 | 49 | CAMLprim value ocaml_bytesrw_ZSTD_maxCLevel (value unit) 50 | { return (Val_int (ZSTD_maxCLevel ())); } 51 | 52 | CAMLprim value ocaml_bytesrw_ZSTD_defaultCLevel (value unit) 53 | /* Once 1.5.0 is required we can use ZSTD_defaultCLevel () */ 54 | { return (Val_int (ZSTD_CLEVEL_DEFAULT)); } 55 | 56 | CAMLprim value ocaml_bytesrw_ZSTD_CStreamInSize (value unit) 57 | { return (Val_int (ZSTD_CStreamInSize ())); } 58 | 59 | CAMLprim value ocaml_bytesrw_ZSTD_CStreamOutSize (value unit) 60 | { return (Val_int (ZSTD_CStreamOutSize ())); } 61 | 62 | CAMLprim value ocaml_bytesrw_ZSTD_DStreamInSize (value unit) 63 | { return (Val_int (ZSTD_DStreamInSize ())); } 64 | 65 | CAMLprim value ocaml_bytesrw_ZSTD_DStreamOutSize (value unit) 66 | { return (Val_int (ZSTD_DStreamOutSize ())); } 67 | 68 | /* Decompression */ 69 | 70 | void ocaml_bytesrw_finalize_ZSTD_DCtx (value dctx) 71 | { size_t rc = ZSTD_freeDCtx (ZSTD_DCtx_val (dctx)); /* N.B. accepts NULL */ } 72 | 73 | CAMLprim value ocaml_bytesrw_ZSTD_freeDCtx (value dctx) 74 | { 75 | ZSTD_freeDCtx (ZSTD_DCtx_val (dctx)); 76 | ZSTD_DCtx_val (dctx) = NULL; 77 | return Val_unit; 78 | } 79 | 80 | CAMLprim value ocaml_bytesrw_ZSTD_createDCtx (value unit) 81 | { 82 | ZSTD_DCtx *ctx = ZSTD_createDCtx (); 83 | if (!ctx) caml_failwith ("Could not allocate ZSTD_DCtx"); 84 | value dctx = caml_alloc_final (1, &ocaml_bytesrw_finalize_ZSTD_DCtx, 0, 1); 85 | ZSTD_DCtx_val (dctx) = ctx; 86 | return dctx; 87 | } 88 | 89 | CAMLprim value ocaml_bytesrw_ZSTD_DCtx_setParameter 90 | (value dctx, value p, value v) 91 | { 92 | ZSTD_DCtx *ctx = ZSTD_DCtx_val (dctx); 93 | ZSTD_dParameter param = 94 | (Is_block (p) ? 95 | Int_val (Field (p, 0)) : ocaml_zstd_d_parameter[Int_val (p)]); 96 | size_t rc = ZSTD_DCtx_setParameter (ctx, param, Int_val (v)); 97 | if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc)); 98 | return Val_unit; 99 | } 100 | 101 | CAMLprim value ocaml_bytesrw_ZSTD_DCtx_loadDictionary(value dctx, value s) 102 | { 103 | ZSTD_DCtx *ctx = ZSTD_DCtx_val (dctx); 104 | size_t rc = ZSTD_DCtx_loadDictionary (ctx, String_val (s), 105 | caml_string_length (s)); 106 | if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc)); 107 | return Val_unit; 108 | } 109 | 110 | CAMLprim value ocaml_bytesrw_ZSTD_decompressStream 111 | (value dctx, value src, value dst) 112 | { 113 | ZSTD_DCtx *ctx = ZSTD_DCtx_val (dctx); 114 | ZSTD_inBuffer bsrc; 115 | bsrc.src = Bytes_val (Field (src, ocaml_zbuf_bytes)); 116 | bsrc.size = Int_val (Field (src, ocaml_zbuf_size)); 117 | bsrc.pos = Int_val (Field (src, ocaml_zbuf_pos)); 118 | 119 | ZSTD_outBuffer bdst; 120 | bdst.dst = Bytes_val (Field (dst, ocaml_zbuf_bytes)); 121 | bdst.size = Int_val (Field (dst, ocaml_zbuf_size)); 122 | bdst.pos = Int_val (Field (dst, ocaml_zbuf_pos)); 123 | 124 | size_t rc = ZSTD_decompressStream (ctx, &bdst, &bsrc); 125 | if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc)); 126 | 127 | Store_field (src, ocaml_zbuf_pos, Val_int (bsrc.pos)); 128 | Store_field (dst, ocaml_zbuf_pos, Val_int (bdst.pos)); 129 | return Val_bool (rc == 0 /* End of frame */); 130 | } 131 | 132 | /* Compression */ 133 | 134 | void ocaml_bytesrw_finalize_ZSTD_CCtx (value cctx) 135 | { ZSTD_freeCCtx (ZSTD_CCtx_val (cctx)); /* N.B. accepts NULL */ } 136 | 137 | CAMLprim value ocaml_bytesrw_ZSTD_freeCCtx (value cctx) 138 | { 139 | ZSTD_freeCCtx (ZSTD_CCtx_val (cctx)); 140 | ZSTD_CCtx_val (cctx) = NULL; 141 | return Val_unit; 142 | } 143 | 144 | CAMLprim value ocaml_bytesrw_ZSTD_createCCtx (value unit) 145 | { 146 | ZSTD_CCtx *ctx = ZSTD_createCCtx (); 147 | if (!ctx) caml_failwith ("Could not allocate ZSTD_CCtx"); 148 | value cctx = caml_alloc_final (1, &ocaml_bytesrw_finalize_ZSTD_CCtx, 0, 1); 149 | ZSTD_CCtx_val (cctx) = ctx; 150 | return cctx; 151 | } 152 | 153 | CAMLprim value ocaml_bytesrw_ZSTD_CCtx_setParameter 154 | (value cctx, value p, value v) 155 | { 156 | ZSTD_CCtx *ctx = ZSTD_CCtx_val (cctx); 157 | ZSTD_cParameter param = 158 | (Is_block (p) ? 159 | Int_val (Field (p, 0)) : ocaml_zstd_c_parameter[Int_val (p)]); 160 | 161 | size_t rc = ZSTD_CCtx_setParameter (ctx, param, Int_val (v)); 162 | if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc)); 163 | return Val_unit; 164 | } 165 | 166 | CAMLprim value ocaml_bytesrw_ZSTD_CCtx_loadDictionary(value cctx, value s) 167 | { 168 | ZSTD_CCtx *ctx = ZSTD_CCtx_val (cctx); 169 | size_t rc = ZSTD_CCtx_loadDictionary (ctx, String_val (s), 170 | caml_string_length (s)); 171 | if (ZSTD_isError (rc)) caml_failwith (ZSTD_getErrorName (rc)); 172 | return Val_unit; 173 | } 174 | 175 | CAMLprim value ocaml_bytesrw_ZSTD_compressStream2 176 | (value dctx, value src, value dst, value edir) 177 | { 178 | ZSTD_CCtx *ctx = ZSTD_CCtx_val (dctx); 179 | 180 | ZSTD_inBuffer bsrc; 181 | bsrc.src = Bytes_val (Field (src, ocaml_zbuf_bytes)); 182 | bsrc.size = Int_val (Field (src, ocaml_zbuf_size)); 183 | bsrc.pos = Int_val (Field (src, ocaml_zbuf_pos)); 184 | 185 | ZSTD_outBuffer bdst; 186 | bdst.dst = Bytes_val (Field (dst, ocaml_zbuf_bytes)); 187 | bdst.size = Int_val (Field (dst, ocaml_zbuf_size)); 188 | bdst.pos = Int_val (Field (dst, ocaml_zbuf_pos)); 189 | 190 | size_t rem = ZSTD_compressStream2 (ctx, &bdst, &bsrc, 191 | ocaml_zstd_end_directive[Int_val (edir)]); 192 | 193 | if (ZSTD_isError (rem)) caml_failwith (ZSTD_getErrorName (rem)); 194 | Store_field (src, ocaml_zbuf_pos, Val_int (bsrc.pos)); 195 | Store_field (dst, ocaml_zbuf_pos, Val_int (bdst.pos)); 196 | return Val_bool (rem == 0 /* end_dir completed */); 197 | } 198 | -------------------------------------------------------------------------------- /src/zstd/libbytesrw_zstd_stubs.clib: -------------------------------------------------------------------------------- 1 | bytesrw_zstd_stubs.o -------------------------------------------------------------------------------- /test/blake3tap.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | let stdin_reader unix ~slice_length = match unix with 9 | | true -> Bytesrw_unix.bytes_reader_of_fd ?slice_length Unix.stdin 10 | | false -> Bytes.Reader.of_in_channel ?slice_length In_channel.stdin 11 | 12 | let stdout_writer unix ~slice_length = match unix with 13 | | true -> Bytesrw_unix.bytes_writer_of_fd ?slice_length Unix.stdout 14 | | false -> Bytes.Writer.of_out_channel ?slice_length Out_channel.stdout 15 | 16 | let blake3_reader r = Bytesrw_blake3.Blake3.(to_hex (reader r)) 17 | let blake3_reads r = 18 | let i, hash = Bytesrw_blake3.Blake3.reads r in 19 | i, fun () -> Bytesrw_blake3.Blake3.(to_hex (value hash)) 20 | 21 | let blake3_writes w = 22 | let w, hash = Bytesrw_blake3.Blake3.writes w in 23 | w, fun () -> Bytesrw_blake3.Blake3.(to_hex (value hash)) 24 | 25 | let sink processor unix ~slice_length = match processor with 26 | | `Reader -> 27 | let i = stdin_reader unix ~slice_length in 28 | i, blake3_reader i 29 | | `Writer -> 30 | let i = stdin_reader unix ~slice_length in 31 | let w, get_hash = blake3_writes (Bytes.Writer.ignore ()) in 32 | let () = Bytes.Writer.write_reader w ~eod:true i in 33 | i, get_hash () 34 | 35 | let filter processor unix ~slice_length = match processor with 36 | | `Reader -> 37 | let i = stdin_reader unix ~slice_length in 38 | let i, get_hash = blake3_reads i in 39 | let o = stdout_writer unix ~slice_length in 40 | Bytes.Writer.write_reader ~eod:true o i; 41 | i, get_hash () 42 | | `Writer -> 43 | let i = stdin_reader unix ~slice_length in 44 | let o = stdout_writer unix ~slice_length in 45 | let o, get_hash = blake3_writes o in 46 | Bytes.Writer.write_reader ~eod:true o i; 47 | i, get_hash () 48 | 49 | let log_count i = Printf.eprintf "i:%d\n%!" (Bytes.Reader.read_length i) 50 | 51 | let tap mode processor slice_length show_count unix = 52 | try 53 | let i, hash = match mode with 54 | | `Sink -> sink processor unix ~slice_length 55 | | `Filter -> filter processor unix ~slice_length 56 | in 57 | if show_count then log_count i; 58 | Printf.eprintf "%s\n%!" hash; 59 | Ok 0 60 | with 61 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 62 | 63 | open Cmdliner 64 | 65 | let cmd = 66 | let doc = "Rewrite stdin to stdout and report the BLAKE3 hash of data" in 67 | let mode = 68 | let c = `Sink, Arg.info ["sink"] ~doc:"Only read stdin." in 69 | let d = `Filter, Arg.info ["filter"] ~doc:"Write stdin to stdout." in 70 | Arg.(value & vflag `Filter [c; d]) 71 | in 72 | let processor = 73 | let r = `Reader, Arg.info ["reader"] ~doc:"Use a byte stream reader tap." in 74 | let w = `Writer, Arg.info ["writer"] ~doc:"Use a byte stream writer tap." in 75 | Arg.(value & vflag `Reader [r; w]) 76 | in 77 | let slice_length = 78 | let doc = "IO byte slices size." in 79 | Arg.(value & opt (some int) None & info ["io-size"] ~doc ~docv:"SIZE") 80 | in 81 | let show_count = 82 | let doc = "Show on $(b,stderr) final amount of bytes read." in 83 | Arg.(value & flag & info ["show-count"] ~doc) 84 | in 85 | let unix = 86 | let doc = "Use OCaml Unix library I/O instead of Stdlib channels" in 87 | Arg.(value & flag & info ["unix-io"] ~doc) 88 | in 89 | Cmd.v (Cmd.info "blake3tap" ~version:"%%VERSION%%" ~doc) @@ 90 | Term.(const tap $ mode $ processor $ slice_length $ show_count $ unix) 91 | 92 | let main () = Cmd.eval_result' cmd 93 | let () = if !Sys.interactive then () else exit (main ()) 94 | -------------------------------------------------------------------------------- /test/cookbook.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: CC0-1.0 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | (* Applying filters to strings *) 9 | 10 | let id s = 11 | let filters = Bytesrw_zstd.[compress_reads (); decompress_reads ()] in 12 | Bytes.Reader.filter_string filters s 13 | 14 | let id s = 15 | let filters = Bytesrw_zstd.[decompress_writes (); compress_writes ()] in 16 | Bytes.Writer.filter_string filters s 17 | 18 | (* Checksumming streams *) 19 | 20 | let blake3_and_compress ~plain = 21 | try 22 | let plain, blake3 = Bytesrw_blake3.Blake3.reads plain in 23 | let comp = Bytesrw_zstd.compress_reads () plain in 24 | let comp = Bytes.Reader.to_string comp in 25 | Ok (comp, Bytesrw_blake3.Blake3.value blake3) 26 | with 27 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 28 | 29 | let decompress_and_blake3 ~comp = 30 | try 31 | let plain = Bytesrw_zstd.decompress_reads () comp in 32 | let r, blake3 = Bytesrw_blake3.Blake3.reads plain in 33 | let s = Bytes.Reader.to_string r in 34 | Ok (s, Bytesrw_blake3.Blake3.value blake3) 35 | with 36 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 37 | 38 | (* Limiting streams *) 39 | 40 | let limited_decompress ~quota ~comp = 41 | let buf = Buffer.create quota in 42 | try 43 | let plain = Bytesrw_zstd.decompress_reads () comp in 44 | let () = Bytes.Reader.add_to_buffer buf (Bytes.Reader.limit quota plain) in 45 | Ok (`Data (Buffer.contents buf)) 46 | with 47 | | Bytes.Stream.Error (Bytes.Stream.Limit _quota, _) -> 48 | Ok (`Quota_exceeded (Buffer.contents buf)) 49 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 50 | 51 | (* Tracing streams *) 52 | 53 | let rtrace ~id r = Bytes.Reader.tap (Bytes.Slice.tracer ~id) r 54 | let wtrace ~id w = Bytes.Writer.tap (Bytes.Slice.tracer ~id) w 55 | 56 | (* Adding your own stream error *) 57 | 58 | module Myformat : sig 59 | 60 | (** {1:errors Errors} *) 61 | 62 | type Bytesrw.Bytes.Stream.error += 63 | | Error of string (** *) 64 | (** The type for [myformat] stream errors. *) 65 | 66 | (** {1:streams Streams} *) 67 | 68 | (* … *) 69 | end = struct 70 | type Bytes.Stream.error += Error of string 71 | 72 | let format_error = 73 | let case msg = Error msg in 74 | let message = function Error msg -> msg | _ -> assert false in 75 | Bytes.Stream.make_format_error ~format:"myformat" ~case ~message 76 | 77 | let error e = Bytes.Stream.error format_error e 78 | let reader_error r e = Bytes.Reader.error format_error r e 79 | let writer_error w e = Bytes.Writer.error format_error w e 80 | end 81 | -------------------------------------------------------------------------------- /test/gziptrip.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | let stdin_reader unix ~slice_length = match unix with 9 | | true -> Bytesrw_unix.bytes_reader_of_fd ?slice_length Unix.stdin 10 | | false -> Bytes.Reader.of_in_channel ?slice_length In_channel.stdin 11 | 12 | let stdout_writer unix ~slice_length = match unix with 13 | | true -> Bytesrw_unix.bytes_writer_of_fd ?slice_length Unix.stdout 14 | | false -> Bytes.Writer.of_out_channel ?slice_length Out_channel.stdout 15 | 16 | let filter_stdio_with_reader_filter unix ~slice_length filter = 17 | let i = stdin_reader unix ~slice_length in 18 | let o = stdout_writer unix ~slice_length in 19 | Bytes.Writer.write_reader ~eod:true o (filter i); 20 | i, o 21 | 22 | let filter_stdio_with_writer_filter unix ~slice_length filter = 23 | let i = stdin_reader unix ~slice_length in 24 | let o = stdout_writer unix ~slice_length in 25 | Bytes.Writer.write_reader ~eod:true (filter o) i; 26 | Bytes.Writer.write_eod o; (* Writer filters do not write eod *) 27 | i, o 28 | 29 | let decompress processor unix ~slice_length = match processor with 30 | | `Reader -> 31 | let d = Bytesrw_zlib.Gzip.decompress_reads () in 32 | filter_stdio_with_reader_filter unix ~slice_length d 33 | | `Writer -> 34 | let d = Bytesrw_zlib.Gzip.decompress_writes () ~eod:true in 35 | filter_stdio_with_writer_filter unix ~slice_length d 36 | 37 | let compress processor unix ~slice_length = match processor with 38 | | `Reader -> 39 | let c = Bytesrw_zlib.Gzip.compress_reads () in 40 | filter_stdio_with_reader_filter unix ~slice_length c 41 | | `Writer -> 42 | let c = Bytesrw_zlib.Gzip.compress_writes () ~eod:true in 43 | filter_stdio_with_writer_filter unix ~slice_length c 44 | 45 | let log_count i o = 46 | let i = Bytes.Reader.read_length i in 47 | let o = Bytes.Writer.written_length o in 48 | let pct = Float.to_int ((float o /. float i) *. 100.) in 49 | Printf.eprintf "i:%d o:%d o/i:%d%%\n%!" i o pct 50 | 51 | let trip mode clevel processor slice_length show_count unix = 52 | try 53 | let i, o = match mode with 54 | | `Decompress -> decompress processor unix ~slice_length 55 | | `Compress -> compress processor unix ~slice_length 56 | in 57 | if show_count then log_count i o; 58 | Ok 0 59 | with 60 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 61 | 62 | open Cmdliner 63 | 64 | let cmd = 65 | let doc = "Gzip (De)compression from stdin to stdout" in 66 | let mode = 67 | let c = `Compress, Arg.info ["z"; "compress"] ~doc:"Compress." in 68 | let d = `Decompress, Arg.info ["d"; "decompress"] ~doc:"Decompress." in 69 | Arg.(value & vflag `Compress [c; d]) 70 | in 71 | let processor = 72 | let r = 73 | `Reader, Arg.info ["reader"] ~doc:"Use a byte stream reader processor." in 74 | let w = 75 | `Writer, Arg.info ["writer"] ~doc:"Use a byte stream writer processor." in 76 | Arg.(value & vflag `Reader [r; w]) 77 | in 78 | let slice_length = 79 | let doc = "IO byte slices size." in 80 | Arg.(value & opt (some int) None & info ["io-size"] ~doc ~docv:"SIZE") 81 | in 82 | let clevel = 83 | let doc = 84 | Printf.sprintf "Use compression level $(docv) (%d-%d)" 85 | (Bytesrw_zlib.default_compression) (Bytesrw_zlib.best_compression) 86 | in 87 | Arg.(value & opt int (Bytesrw_zlib.default_compression) & 88 | info ["l"] ~doc ~docv:"LEVEL") 89 | in 90 | let show_count = 91 | let doc = "Show on $(b,stderr) final amount of bytes read and written." in 92 | Arg.(value & flag & info ["show-count"] ~doc) 93 | in 94 | let unix = 95 | let doc = "Use OCaml Unix library I/O instead of Stdlib channels" in 96 | Arg.(value & flag & info ["unix-io"] ~doc) 97 | in 98 | Cmd.v (Cmd.info "gziptrip" ~version:"%%VERSION%%" ~doc) @@ 99 | Term.(const trip $ mode $ clevel $ processor $ 100 | slice_length $ show_count $ unix) 101 | 102 | let main () = Cmd.eval_result' cmd 103 | let () = if !Sys.interactive then () else exit (main ()) 104 | -------------------------------------------------------------------------------- /test/quickstart.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: CC0-1.0 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Compile with: 7 | ocamlfind ocamlopt -package bytesrw,bytesrw.zstd -linkpkg quickstart.ml *) 8 | 9 | open Bytesrw 10 | 11 | let stdio_compress_reads () = 12 | try 13 | let stdin = Bytes.Reader.of_in_channel In_channel.stdin in 14 | let stdout = Bytes.Writer.of_out_channel Out_channel.stdout in 15 | let params = Bytesrw_zstd.Cctx_params.make ~checksum:true () in 16 | let zstdr = Bytesrw_zstd.compress_reads ~params () stdin in 17 | Bytes.Writer.write_reader ~eod:true stdout zstdr; 18 | Ok () 19 | with 20 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 21 | | Sys_error e -> Error e 22 | 23 | let stdio_compress_writes () = 24 | try 25 | let stdin = Bytes.Reader.of_in_channel In_channel.stdin in 26 | let stdout = Bytes.Writer.of_out_channel Out_channel.stdout in 27 | let params = Bytesrw_zstd.Cctx_params.make ~checksum:true () in 28 | let zstdw = Bytesrw_zstd.compress_writes ~params () ~eod:true stdout in 29 | Bytes.Writer.write_reader ~eod:true zstdw stdin; 30 | Ok () 31 | with 32 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 33 | | Sys_error e -> Error e 34 | 35 | let main () = 36 | Result.fold ~ok:(Fun.const 0) ~error:(fun e -> prerr_endline e; 1) @@ 37 | if Array.exists (String.equal "-w") Sys.argv 38 | then stdio_compress_writes () 39 | else stdio_compress_reads () 40 | 41 | let () = if !Sys.interactive then () else exit (main ()) 42 | -------------------------------------------------------------------------------- /test/test_blake3.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | open B0_testing 8 | 9 | let repeat n = Test.range ~kind:"slice_length" ~first:1 ~last:n 10 | 11 | (* Test vectors *) 12 | 13 | type t = 14 | { data : string; 15 | blake3 : string } 16 | 17 | let t0 = 18 | { data = ""; 19 | blake3 = 20 | "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"; } 21 | 22 | let t1 = 23 | { data = 24 | "BLAKE3 is based on an optimized instance of the established \ 25 | hash function BLAKE2 and on the original Bao tree mode."; 26 | blake3 = 27 | "d352deef3f9b5aff803f7f2ab3aa4a15a0f21f4babce3534451057084155a280" } 28 | 29 | (* Testing *) 30 | 31 | let test_blake3 = 32 | Test.test "Bytesrw_blake3.Blake3" @@ fun () -> 33 | let module H = Bytesrw_blake3.Blake3 in 34 | let testh t = t.blake3 and hex = H.to_hex in 35 | assert (H.(hex (string t0.data) = testh t0)); 36 | assert (H.(hex (string t1.data) = testh t1)); 37 | begin repeat 5 @@ fun n -> 38 | let r = Bytes.Reader.of_string ~slice_length:n t1.data in 39 | let r, st = H.reads r in 40 | let () = Bytes.Reader.discard r in 41 | assert (H.(hex (value st) = testh t1)); 42 | let w = Bytes.Writer.of_buffer ~slice_length:n (Buffer.create 255) in 43 | let w, st = H.writes w in 44 | let () = Bytes.Writer.write_string w t1.data in 45 | assert (H.(hex (value st) = testh t1)); 46 | let () = Bytes.Writer.write_eod w in 47 | assert (H.(hex (value st) = testh t1)); 48 | let () = Bytes.Writer.write_eod w in 49 | assert (H.(hex (value st) = testh t1)); 50 | end; 51 | let r, st = H.reads (Bytes.Reader.empty ()) in 52 | let () = Bytes.Reader.discard r in 53 | assert (H.(hex (value st) = testh t0)); 54 | let w = Bytes.Writer.of_buffer ~slice_length:2 (Buffer.create 255) in 55 | let w, st = H.writes w in 56 | let () = Bytes.Writer.write_eod w in 57 | assert (H.(hex (value st) = testh t0)); 58 | let h = H.(string t1.data) in 59 | assert (hex h = testh t1); 60 | assert (H.equal h (H.of_hex (H.to_hex h) |> Result.get_ok)); 61 | assert (H.equal h 62 | (H.of_binary_string (H.to_binary_string h) |> Result.get_ok)); 63 | () 64 | 65 | let main () = 66 | Test.main @@ fun () -> 67 | Test.log "Using libblake3 %s" (Bytesrw_blake3.version ()); 68 | Test.autorun (); 69 | Gc.full_major () 70 | 71 | let () = if !Sys.interactive then () else exit (main ()) 72 | -------------------------------------------------------------------------------- /test/test_md.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_testing 7 | open Bytesrw 8 | 9 | let repeat ?__POS__ n f = 10 | Test.range ?__POS__ ~kind:"slice_length" ~first:1 ~last:n f 11 | 12 | 13 | (* Test vectors *) 14 | 15 | type t = 16 | { data : string; sha_1 : string; sha_256 : string; sha_384 : string; 17 | sha_512 : string; } 18 | 19 | let t0 = 20 | { data = ""; 21 | sha_1 = "da39a3ee5e6b4b0d3255bfef95601890afd80709"; 22 | sha_256 = 23 | "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; 24 | sha_384 = 25 | "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da\ 26 | 274edebfe76f65fbd51ad2f14898b95b"; 27 | sha_512 = 28 | "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce\ 29 | 47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"; } 30 | 31 | let t1 = 32 | { data = 33 | "SHA-1 is used to generate a condensed representation of a \ 34 | message called a message digest."; 35 | sha_1 = "a2ee13f265a9b1d7ce14e6b539a6371710e8e4cd"; 36 | sha_256 = 37 | "aa97506c73d3b4207b7bc50faf8bf04c9918524db0ac827073e13067807815f9"; 38 | sha_384 = 39 | "e7a9d7ff5f40240d0747d7d387dd59b3cf4275ba13f5465668b96c357aee6b47\ 40 | 20356901245d4d1339211ca4faff5fde"; 41 | sha_512 = 42 | "08d45fb0542f32fd4db91767eaad372374f754c035e4920cae61e778d9118430\ 43 | a0807f26aa8eab5b1f9425cabf96378694d0a0fadaad14ea30f5d1077d7e910b" } 44 | 45 | let test_mod (module H : Bytesrw_md.Sha) testh = 46 | let hex = H.to_hex in 47 | assert (H.(hex (string t0.data) = testh t0)); 48 | assert (H.(hex (string t1.data) = testh t1)); 49 | begin repeat 5 @@ fun n -> 50 | let r = Bytes.Reader.of_string ~slice_length:n t1.data in 51 | let r, st = H.reads r in 52 | let () = Bytes.Reader.discard r in 53 | assert (H.(hex (value st) = testh t1)); 54 | let w = Bytes.Writer.of_buffer ~slice_length:n (Buffer.create 255) in 55 | let w, st = H.writes w in 56 | let () = Bytes.Writer.write_string w t1.data in 57 | assert (H.(hex (value st) = testh t1)); 58 | let () = Bytes.Writer.write_eod w in 59 | assert (H.(hex (value st) = testh t1)); 60 | let () = Bytes.Writer.write_eod w in 61 | assert (H.(hex (value st) = testh t1)); 62 | end; 63 | let r, st = H.reads (Bytes.Reader.empty ()) in 64 | let () = Bytes.Reader.discard r in 65 | assert (H.(hex (value st) = testh t0)); 66 | let w = Bytes.Writer.of_buffer ~slice_length:2 (Buffer.create 255) in 67 | let w, st = H.writes w in 68 | let () = Bytes.Writer.write_eod w in 69 | assert (H.(hex (value st) = testh t0)); 70 | let h = H.(string t1.data) in 71 | assert (hex h = testh t1); 72 | assert (H.equal h (H.of_hex (H.to_hex h) |> Result.get_ok)); 73 | assert (H.equal h 74 | (H.of_binary_string (H.to_binary_string h) |> Result.get_ok)); 75 | () 76 | 77 | let test_sha_1 = 78 | Test.test "Bytesrw_md.Sha_1" @@ fun () -> 79 | test_mod (module Bytesrw_md.Sha_1) (fun t -> t.sha_1); 80 | () 81 | 82 | let test_sha_256 = 83 | Test.test "Bytesrw_md.Sha_256" @@ fun () -> 84 | test_mod (module Bytesrw_md.Sha_256) (fun t -> t.sha_256); 85 | () 86 | 87 | let test_sha_384 = 88 | Test.test "Bytesrw_md.Sha_384" @@ fun () -> 89 | test_mod (module Bytesrw_md.Sha_384) (fun t -> t.sha_384); 90 | () 91 | 92 | let test_sha_512 = 93 | Test.test "Bytesrw_md.Sha_512" @@ fun () -> 94 | test_mod (module Bytesrw_md.Sha_512) (fun t -> t.sha_512); 95 | () 96 | 97 | (* Tests *) 98 | 99 | let main () = Test.main @@ fun () -> Test.autorun (); Gc.full_major () 100 | let () = if !Sys.interactive then () else exit (main ()) 101 | -------------------------------------------------------------------------------- /test/test_utf.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | open B0_testing 8 | 9 | let tracer = Bytes.Slice.tracer ~ppf:Format.std_formatter 10 | let reader_of_list ss = 11 | Bytes.Reader.of_slice_seq (List.to_seq (List.map Bytes.Slice.of_string ss)) 12 | 13 | let test_guess_reader_encoding = 14 | Test.test "Test Bytesrw_utf.guess_reader_encoding" @@ fun () -> 15 | let test (s, exp) = 16 | let r = Bytes.Reader.of_string s in 17 | let g = Bytesrw_utf.guess_reader_encoding r in 18 | let pp_enc = Bytesrw_utf.Encoding.pp in 19 | if exp <> g then begin 20 | Test.fail "expected: %a found: %a" pp_enc exp pp_enc g; 21 | end else (Test.string (Bytes.Reader.to_string r) s) 22 | in 23 | (* This was taken from the uutf suite which also tested subsequent 24 | malformed data. *) 25 | (* UTF-8 guess *) 26 | test ("", `Utf_8); 27 | test ("\xEF", `Utf_8); (* malformed *) 28 | test ("\xEF\xBB", `Utf_8); (* malformed *) 29 | test ("\xEF\xBB\x00", `Utf_8); (* malformed *) 30 | test ("\xEF\xBB\xBF\xEF\xBB\xBF", `Utf_8); 31 | test ("\n\r\n", `Utf_8); 32 | test ("\n\x80\xEF\xBB\xBF\n", `Utf_8); (* malformed *) 33 | test ("\n\n\xEF\xBB\x00\n", `Utf_8); (* malformed *) 34 | test ("\n\xC8\x99", `Utf_8); 35 | test ("\xC8\x99\n", `Utf_8); 36 | test ("\xC8\x99\n\n", `Utf_8); 37 | test ("\xC8\x99\xC8\x99", `Utf_8); 38 | test ("\xC8\x99\xF0\x9F\x90\xAB", `Utf_8); 39 | test ("\xF0\x9F\x90\xAB\n", `Utf_8); 40 | (* UTF-16BE guess *) 41 | test ("\xFE\xFF\xDB\xFF\xDF\xFF\x00\x0A", `Utf_16be); 42 | test ("\xFE\xFF\xDB\xFF\x00\x0A\x00\x0A", `Utf_16be); (* malformed *) 43 | test ("\xFE\xFF\xDB\xFF\xDF", `Utf_16be); (* malformed *) 44 | test ("\x80\x81\xDB\xFF\xDF\xFF\xFE\xFF\xDF\xFF\xDB\xFF", 45 | `Utf_16be); (* malformed *) 46 | test ("\x80\x81\xDF\xFF\xDB\xFF\xFE", `Utf_16be); (* malformred *) 47 | test ("\x00\x0A", `Utf_16be); 48 | test ("\x00\x0A\xDB", `Utf_16be); (* malformed *) 49 | test ("\x00\x0A\xDB\xFF", `Utf_16be); (* malformed *) 50 | test ("\x00\x0A\xDB\xFF\xDF", `Utf_16be); (* malformed *) 51 | test ("\x00\x0A\xDB\xFF\xDF\xFF", `Utf_16be); 52 | test ("\x00\x0A\x00\x0A", `Utf_16be); 53 | (* UTF-16LE guess *) 54 | test ("\xFF\xFE\xFF\xDB\xFF\xDF\x0A\x00", `Utf_16le); 55 | test ("\xFF\xFE\xFF\xDB\x0A\x00\x0A\x00", `Utf_16le); (* malformed *) 56 | test ("\xFF\xFE\xFF\xDB\xDF", `Utf_16le); (* malformed *) 57 | test ("\x0A\x00", `Utf_16le); 58 | test ("\x0A\x00\xDB", `Utf_16le); (* malformed *) 59 | test ("\x0A\x00\xFF\xDB", `Utf_16le); (* malformed *) 60 | test ("\x0A\x00\xFF\xDB\xDF", `Utf_16le); (* malformed *) 61 | test ("\x0A\x00\xFF\xDB\xFF\xDF", `Utf_16le); 62 | test ("\x0A\x00\x0A\x00", `Utf_16le); 63 | () 64 | 65 | let main () = Test.main @@ fun () -> Test.autorun () 66 | let () = if !Sys.interactive then () else exit (main ()) 67 | -------------------------------------------------------------------------------- /test/test_xxhash.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_testing 7 | open Bytesrw 8 | 9 | let repeat n = Test.range ~kind:"slice_length" ~first:1 ~last:n 10 | 11 | (* Test vectors *) 12 | 13 | type t = { data : string; xxh3_64 : string; xxh3_128 : string; } 14 | let t0 = 15 | { data = ""; 16 | xxh3_64 = "2d06800538d394c2"; 17 | xxh3_128 = "99aa06d3014798d86001c324468d497f"; } 18 | 19 | let t1 = 20 | { data = "xxHash is an extremely fast non-cryptographic hash algorithm"; 21 | xxh3_64 = "339d1954a9e06117"; 22 | xxh3_128 = "07b0640ae1f202e6990373bfcc1e5c75" } 23 | 24 | let t2 = 25 | { data = "abc\n"; 26 | xxh3_64 = "079364cbfdf9f4cb"; 27 | xxh3_128 = "158bbebef0c159c99914d27c01087efa"; } 28 | 29 | let test_module (module H : Bytesrw_xxhash.Xxh3) testh = 30 | let hex = H.to_hex in 31 | assert (H.(hex (string t0.data) = testh t0)); 32 | assert (H.(hex (string t1.data) = testh t1)); 33 | assert (H.(hex (string t1.data) = testh t1)); 34 | begin repeat 5 @@ fun n -> 35 | let r = Bytes.Reader.of_string ~slice_length:n t1.data in 36 | let r, st = H.reads r in 37 | let () = Bytes.Reader.discard r in 38 | assert (H.(hex (value st) = testh t1)); 39 | let w = Bytes.Writer.of_buffer ~slice_length:n (Buffer.create 255) in 40 | let w, st = H.writes w in 41 | let () = Bytes.Writer.write_string w t1.data in 42 | assert (H.(hex (value st) = testh t1)); 43 | let () = Bytes.Writer.write_eod w in 44 | assert (H.(hex (value st) = testh t1)); 45 | let () = Bytes.Writer.write_eod w in 46 | assert (H.(hex (value st) = testh t1)); 47 | end; 48 | let r, st = H.reads (Bytes.Reader.empty ()) in 49 | let () = Bytes.Reader.discard r in 50 | assert (H.(hex (value st) = testh t0)); 51 | let w = Bytes.Writer.of_buffer ~slice_length:2 (Buffer.create 255) in 52 | let w, st = H.writes w in 53 | let () = Bytes.Writer.write_eod w in 54 | assert (H.(hex (value st) = testh t0)); 55 | let f h = 56 | assert (H.equal h (H.of_hex (H.to_hex h) |> Result.get_ok)); 57 | assert (H.equal h 58 | (H.of_binary_string (H.to_binary_string h) |> Result.get_ok)); 59 | in 60 | f H.(string t0.data); 61 | f H.(string t1.data); 62 | f H.(string t2.data); 63 | () 64 | 65 | let test_xxh3_64 = 66 | Test.test "Bytesrw_xxhash.Xxh3_64" @@ fun () -> 67 | test_module (module Bytesrw_xxhash.Xxh3_64) (fun t -> t.xxh3_64); 68 | let h = Bytesrw_xxhash.Xxh3_64.(string t1.data) in 69 | assert (Printf.sprintf "%Lx" (Bytesrw_xxhash.Xxh3_64.to_uint64 h) = 70 | t1.xxh3_64); 71 | () 72 | 73 | let test_xxh3_128 = 74 | Test.test "Testing Bytesrw_xxhash.Xxh3_128" @@ fun () -> 75 | test_module (module Bytesrw_xxhash.Xxh3_128) (fun t -> t.xxh3_128); 76 | () 77 | 78 | (* Tests *) 79 | 80 | let main () = 81 | Test.main @@ fun () -> 82 | Test.log "Using libxxhash %s" (Bytesrw_xxhash.version ()); 83 | Test.autorun (); 84 | Gc.full_major () 85 | 86 | let () = if !Sys.interactive then () else exit (main ()) 87 | -------------------------------------------------------------------------------- /test/test_zlib.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | open B0_testing 8 | 9 | let repeat n = Test.range ~kind:"slice_length" ~first:1 ~last:n 10 | 11 | let test_stream_error f = 12 | let is_exn = function Bytes.Stream.Error _ -> true | _ -> false in 13 | Test.catch f @@ function fnd -> Test.holds (is_exn fnd) 14 | 15 | (* Test vectors *) 16 | 17 | let a30_deflate = 18 | "\x4b\xc4\x0b\x00", String.make 30 'a' 19 | 20 | let a30_zlib = 21 | "\x78\x9c\x4b\xc4\x0b\x00\xb0\x4f\x0b\x5f", String.make 30 'a' 22 | 23 | let a_gz = 24 | "\x1f\x8b\x08\x08\x94\x58\x2d\x66\x00\x03\x61\x2e\x74\x78\x74\x00\x4b\x4c\ 25 | \x84\x01\x00\xf0\xcd\x11\x4c\x0a\x00\x00\x00", String.make 10 'a' 26 | 27 | let b_gz = 28 | "\x1f\x8b\x08\x08\x8c\x58\x2d\x66\x00\x03\x62\x2e\x74\x78\x74\x00\x4b\x4a\ 29 | \x82\x01\x00\xf8\x4c\x2f\x42\x0a\x00\x00\x00", String.make 10 'b' 30 | 31 | let more = "moreatthedoor" 32 | 33 | (* Tests *) 34 | 35 | let test_deflate_decompress_reads = 36 | Test.test "Bytesrw_zlib.Deflate.decompress_reads" @@ fun () -> 37 | begin repeat 5 @@ fun n -> (* One stream *) 38 | let c = Bytes.Reader.of_string ~slice_length:n (fst a30_deflate) in 39 | let d = Bytesrw_zlib.Deflate.decompress_reads () c in 40 | assert (Bytes.Reader.to_string d = snd a30_deflate) 41 | end; 42 | begin repeat 5 @@ fun n -> (* One stream with unexpected leftover data *) 43 | let c = fst a30_deflate ^ more in 44 | let c = Bytes.Reader.of_string ~slice_length:n c in 45 | let d = Bytesrw_zlib.Deflate.decompress_reads () c in 46 | test_stream_error @@ fun () -> Bytes.Reader.to_string d 47 | end; 48 | begin repeat 5 @@ fun n -> (* One stream with expected leftover data *) 49 | let c = (fst a30_deflate) ^ more in 50 | let c = Bytes.Reader.of_string ~slice_length:n c in 51 | let d = Bytesrw_zlib.Deflate.decompress_reads ~leftover:true () c in 52 | assert (Bytes.Reader.to_string d = snd a30_deflate); 53 | assert (Bytes.Reader.pos c = String.length (fst a30_deflate)); 54 | assert (Bytes.Reader.to_string c = more); 55 | end; 56 | begin repeat 5 @@ fun n -> (* One stream with expected leftover data but "" *) 57 | let c = (fst a30_deflate) in 58 | let c = Bytes.Reader.of_string ~slice_length:n c in 59 | let d = Bytesrw_zlib.Deflate.decompress_reads ~leftover:true () c in 60 | assert (Bytes.Reader.to_string d = snd a30_deflate); 61 | assert (Bytes.Reader.pos c = String.length (fst a30_deflate)); 62 | assert (Bytes.Reader.to_string c = ""); 63 | end; 64 | () 65 | 66 | let test_deflate_decompress_writes = 67 | Test.test "Bytesrw_zlib.Deflate.decompress_writes" @@ fun () -> 68 | begin repeat 5 @@ fun n -> (* One stream. *) 69 | let b = Buffer.create 255 in 70 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 71 | let d = Bytesrw_zlib.Deflate.decompress_writes ~eod:true () w in 72 | let c = Bytes.Reader.of_string ~slice_length:n (fst a30_deflate) in 73 | let () = Bytes.Writer.write_reader ~eod:true d c in 74 | assert (Buffer.contents b = snd a30_deflate); 75 | end; 76 | begin repeat 5 @@ fun n -> (* One stream with unexpected leftover *) 77 | let b = Buffer.create 255 in 78 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 79 | let d = Bytesrw_zlib.Deflate.decompress_writes ~eod:true () w in 80 | let c = fst a30_deflate in 81 | let c = Bytes.Reader.of_string ~slice_length:n (c ^ c) in 82 | test_stream_error @@ fun () -> 83 | Bytes.Writer.write_reader ~eod:true d c 84 | end; 85 | () 86 | 87 | let test_deflate_compress_reads = 88 | Test.test "Bytesrw_zlib.Deflate.compress_reads" @@ fun () -> 89 | begin repeat 5 @@ fun n -> 90 | let data = snd a30_deflate in 91 | let d = Bytes.Reader.of_string ~slice_length:n data in 92 | let c = Bytesrw_zlib.Deflate.compress_reads () ~slice_length:n d in 93 | let trip = Bytesrw_zlib.Deflate.decompress_reads () ~slice_length:n c in 94 | assert (Bytes.Reader.to_string trip = data) 95 | end 96 | 97 | let test_deflate_compress_writes = 98 | Test.test "Bytesrw_zlib.Deflate.compress_writes" @@ fun () -> 99 | begin repeat 5 @@ fun n -> 100 | let b = Buffer.create 255 in 101 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 102 | let trip = 103 | Bytesrw_zlib.Deflate.compress_writes () ~slice_length:n ~eod:true @@ 104 | Bytesrw_zlib.Deflate.decompress_writes () ~slice_length:n ~eod:true @@ 105 | w 106 | in 107 | let data = snd a30_deflate in 108 | let r = Bytes.Reader.of_string ~slice_length:n data in 109 | let () = Bytes.Writer.write_reader ~eod:true trip r in 110 | assert (Buffer.contents b = data) 111 | end 112 | 113 | let test_zlib_decompress_reads = 114 | Test.test "Bytesrw_zlib.Zlib.decompress_reads" @@ fun () -> 115 | begin repeat 5 @@ fun n -> (* One stream *) 116 | let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zlib) in 117 | let d = Bytesrw_zlib.Zlib.decompress_reads () c in 118 | assert (Bytes.Reader.to_string d = snd a30_zlib); 119 | end; 120 | begin repeat 5 @@ fun n -> (* One stream with expected leftover data *) 121 | let data = fst a30_zlib ^ more in 122 | let c = Bytes.Reader.of_string ~slice_length:n data in 123 | let d = Bytesrw_zlib.Zlib.decompress_reads ~leftover:true () c in 124 | assert (Bytes.Reader.to_string d = snd a30_zlib); 125 | assert (Bytes.Reader.pos c = String.length (fst a30_zlib)); 126 | assert (Bytes.Reader.to_string c = more); 127 | end; 128 | () 129 | 130 | let test_zlib_decompress_writes = 131 | Test.test "Bytesrw_zlib.Zlib.decompress_writes" @@ fun () -> 132 | begin repeat 5 @@ fun n -> (* One stream *) 133 | let b = Buffer.create 255 in 134 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 135 | let d = Bytesrw_zlib.Zlib.decompress_writes () ~eod:true w in 136 | let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zlib) in 137 | let () = Bytes.Writer.write_reader ~eod:true d c in 138 | assert (Buffer.contents b = snd a30_zlib); 139 | end 140 | 141 | let test_zlib_compress_reads = 142 | Test.test "Bytesrw_zlib.Zlib.compress_reads" @@ fun () -> 143 | begin repeat 5 @@ fun n -> 144 | let data = snd a30_zlib in 145 | let d = Bytes.Reader.of_string ~slice_length:n data in 146 | let c = Bytesrw_zlib.Zlib.compress_reads () ~slice_length:n d in 147 | let trip = Bytesrw_zlib.Zlib.decompress_reads () ~slice_length:n c in 148 | assert (Bytes.Reader.to_string trip = data) 149 | end 150 | 151 | let test_zlib_compress_writes = 152 | Test.test "Bytesrw_zlib.Zlib.compress_writes" @@ fun () -> 153 | begin repeat 5 @@ fun n -> 154 | let b = Buffer.create 255 in 155 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 156 | let trip = 157 | Bytesrw_zlib.Zlib.compress_writes () ~slice_length:n ~eod:true @@ 158 | Bytesrw_zlib.Zlib.decompress_writes () ~slice_length:n ~eod:true @@ 159 | w 160 | in 161 | let data = snd a30_zlib in 162 | let r = Bytes.Reader.of_string ~slice_length:n data in 163 | let () = Bytes.Writer.write_reader ~eod:true trip r in 164 | assert (Buffer.contents b = data) 165 | end 166 | 167 | let test_gzip_decompress_reads = 168 | Test.test "Bytesrw_zlib.Gzip.decompress_reads" @@ fun () -> 169 | begin repeat 5 @@ fun n -> (* One member *) 170 | let c = Bytes.Reader.of_string ~slice_length:n (fst a_gz) in 171 | let d0 = Bytesrw_zlib.Gzip.decompress_reads () c in 172 | assert (Bytes.Reader.to_string d0 = snd a_gz); 173 | end; 174 | begin repeat 5 @@ fun n -> (* Two members, one shot *) 175 | let data = (fst a_gz) ^ (fst b_gz) in 176 | let c = Bytes.Reader.of_string ~slice_length:n data in 177 | let d0 = Bytesrw_zlib.Gzip.decompress_reads () c in 178 | assert (Bytes.Reader.to_string d0 = snd a_gz ^ snd b_gz); 179 | end; 180 | begin repeat 5 @@ fun n -> (* One member, leftover data *) 181 | let data = (fst a_gz) ^ more in 182 | let c = Bytes.Reader.of_string ~slice_length:n data in 183 | let d0 = Bytesrw_zlib.Gzip.decompress_reads ~all_members:false () c in 184 | assert (Bytes.Reader.to_string d0 = snd a_gz); 185 | assert (Bytes.Reader.pos c = String.length (fst a_gz)); 186 | assert (Bytes.Reader.to_string c = more) 187 | end; 188 | begin repeat 5 @@ fun n -> (* One member, empty leftover data *) 189 | let c = Bytes.Reader.of_string ~slice_length:n (fst b_gz) in 190 | let d0 = Bytesrw_zlib.Gzip.decompress_reads ~all_members:false () c in 191 | assert (Bytes.Reader.to_string d0 = snd b_gz); 192 | assert (Bytes.Reader.pos c = String.length (fst b_gz)); 193 | assert (Bytes.Reader.to_string c = "") 194 | end; 195 | begin repeat 5 @@ fun n -> (* Two members, two shots *) 196 | let cdata = (fst a_gz) ^ (fst b_gz) in 197 | let dlen = String.length cdata in 198 | let c = Bytes.Reader.of_string ~slice_length:n (cdata ^ more) in 199 | let d0 = Bytesrw_zlib.Gzip.decompress_reads ~all_members:false () c in 200 | assert (Bytes.Reader.to_string d0 = snd a_gz); 201 | assert (Bytes.Reader.pos c = String.length (fst a_gz)); 202 | let d1 = Bytesrw_zlib.Gzip.decompress_reads ~all_members:false () c in 203 | assert (Bytes.Reader.to_string d1 = snd b_gz); 204 | assert (Bytes.Reader.pos c = dlen); 205 | assert (Bytes.Reader.to_string c = more); 206 | end; 207 | () 208 | 209 | let test_gzip_decompress_writes = 210 | Test.test "Bytesrw_zlib.Gzip.decompress_writes" @@ fun () -> 211 | begin repeat 5 @@ fun n -> 212 | let b = Buffer.create 255 in 213 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 214 | let d = Bytesrw_zlib.Gzip.decompress_writes () ~eod:true w in 215 | let c = Bytes.Reader.of_string ~slice_length:n (fst a_gz) in 216 | let () = Bytes.Writer.write_reader ~eod:true d c in 217 | assert (Buffer.contents b = snd a_gz); 218 | end; 219 | begin repeat 5 @@ fun n -> 220 | let data = (fst a_gz) ^ (fst b_gz) in 221 | let res = (snd a_gz) ^ (snd b_gz) in 222 | let b = Buffer.create 255 in 223 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 224 | let d = Bytesrw_zlib.Gzip.decompress_writes () ~eod:true w in 225 | let c = Bytes.Reader.of_string ~slice_length:n data in 226 | let () = Bytes.Writer.write_reader ~eod:true d c in 227 | assert (Buffer.contents b = res); 228 | end 229 | 230 | let test_gzip_compress_reads = 231 | Test.test "Bytesrw_zlib.Gzip.compress_reads" @@ fun () -> 232 | begin repeat 5 @@ fun n -> 233 | let data = snd a_gz in 234 | let d = Bytes.Reader.of_string ~slice_length:n data in 235 | let c = Bytesrw_zlib.Gzip.compress_reads () ~slice_length:n d in 236 | let trip = Bytesrw_zlib.Gzip.decompress_reads () ~slice_length:n c in 237 | assert (Bytes.Reader.to_string trip = data) 238 | end 239 | 240 | let test_gzip_compress_writes = 241 | Test.test "Bytesrw_zlib.Gzip.compress_writes" @@ fun () -> 242 | begin repeat 5 @@ fun n -> 243 | let b = Buffer.create 255 in 244 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 245 | let trip = 246 | Bytesrw_zlib.Gzip.compress_writes () ~slice_length:n ~eod:true @@ 247 | Bytesrw_zlib.Gzip.decompress_writes () ~slice_length:n ~eod:true @@ 248 | w 249 | in 250 | let data = snd a_gz in 251 | let r = Bytes.Reader.of_string ~slice_length:n data in 252 | let () = Bytes.Writer.write_reader ~eod:true trip r in 253 | assert (Buffer.contents b = data) 254 | end 255 | 256 | let main () = 257 | Test.main @@ fun () -> 258 | Test.log "Using zlib %s" (Bytesrw_zlib.version ()); 259 | Test.autorun (); 260 | Gc.full_major () 261 | 262 | let () = if !Sys.interactive then () else exit (main ()) 263 | -------------------------------------------------------------------------------- /test/test_zstd.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | open B0_testing 8 | 9 | let repeat n = Test.range ~kind:"slice_length" ~first:1 ~last:n 10 | 11 | let test_stream_error f = 12 | let is_exn = function Bytes.Stream.Error _ -> true | _ -> false in 13 | Test.catch f @@ fun fnd -> Test.holds (is_exn fnd) 14 | 15 | (* Test vectors *) 16 | 17 | let a30_zstd = (* Note this compressed data has a checksum. *) 18 | "\x28\xb5\x2f\xfd\x04\x58\x45\x00\x00\x10\x61\x61\x01\x00\x0c\xc0\x02\x61\ 19 | \x36\xf8\xbb", String.make 30 'a' 20 | 21 | let b30_zstd = (* Note this compressed data has a checksum. *) 22 | "\x28\xb5\x2f\xfd\x04\x58\x45\x00\x00\x10\x62\x62\x01\x00\x0c\xc0\x02\xb3\ 23 | \x56\x1f\x2e", String.make 30 'b' 24 | 25 | let more = "moreatthedoor" 26 | 27 | (* Tests *) 28 | 29 | let test_decompress_reads = 30 | Test.test "Bytesrw_zstd.decompress_reads" @@ fun () -> 31 | begin repeat 5 @@ fun n -> (* one frame *) 32 | let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zstd) in 33 | let d = Bytesrw_zstd.decompress_reads () ~slice_length:n c in 34 | assert (Bytes.Reader.to_string d = snd a30_zstd) 35 | end; 36 | begin repeat 5 @@ fun n -> (* one frame with unexpected leftover data *) 37 | let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zstd ^ more) in 38 | let d = Bytesrw_zstd.decompress_reads () ~slice_length:n c in 39 | test_stream_error @@ fun () -> Bytes.Reader.to_string d 40 | end; 41 | begin repeat 5 @@ fun n -> (* one frame with expected leftover data *) 42 | let c = fst a30_zstd ^ more in 43 | let c = Bytes.Reader.of_string ~slice_length:n c in 44 | let d = 45 | Bytesrw_zstd.decompress_reads ~all_frames:false () ~slice_length:n c 46 | in 47 | assert (Bytes.Reader.to_string d = snd a30_zstd); 48 | assert (Bytes.Reader.pos c = String.length (fst a30_zstd)); 49 | assert (Bytes.Reader.to_string c = more); 50 | end; 51 | begin repeat 5 @@ fun n -> (* two frames, one shot *) 52 | let data = (fst a30_zstd) ^ (fst b30_zstd) in 53 | let c = Bytes.Reader.of_string ~slice_length:n data in 54 | let d = Bytesrw_zstd.decompress_reads () ~slice_length:n c in 55 | assert (Bytes.Reader.to_string d = snd a30_zstd ^ snd b30_zstd) 56 | end; 57 | begin repeat 5 @@ fun n -> (* two frames, two shots *) 58 | let data = (fst a30_zstd) ^ (fst b30_zstd) in 59 | let c = Bytes.Reader.of_string ~slice_length:n data in 60 | let d = 61 | Bytesrw_zstd.decompress_reads ~all_frames:false () ~slice_length:n c 62 | in 63 | assert (Bytes.Reader.to_string d = snd a30_zstd); 64 | assert (Bytes.Reader.pos c = String.length (fst a30_zstd)); 65 | let d = 66 | Bytesrw_zstd.decompress_reads ~all_frames:false () ~slice_length:n c 67 | in 68 | assert (Bytes.Reader.to_string d = snd b30_zstd); 69 | assert (Bytes.Reader.to_string c = ""); 70 | end; 71 | () 72 | 73 | let test_decompress_writes = 74 | Test.test "Bytesrw_zstd.decompress_writes" @@ fun () -> 75 | begin repeat 5 @@ fun n -> (* one frame *) 76 | let b = Buffer.create 255 in 77 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 78 | let d = Bytesrw_zstd.decompress_writes () ~slice_length:n ~eod:true w in 79 | let c = Bytes.Reader.of_string ~slice_length:n (fst a30_zstd) in 80 | let () = Bytes.Writer.write_reader ~eod:true d c in 81 | assert (Buffer.contents b = snd a30_zstd) 82 | end; 83 | begin repeat 5 @@ fun n -> (* one with unexpected leftover *) 84 | let b = Buffer.create 255 in 85 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 86 | let d = Bytesrw_zstd.decompress_writes () ~slice_length:n ~eod:true w in 87 | let c = Bytes.Reader.of_string ~slice_length:n ((fst a30_zstd) ^ more) in 88 | test_stream_error @@ fun () -> Bytes.Writer.write_reader ~eod:true d c; 89 | end; 90 | () 91 | 92 | let test_compress_reads = 93 | Test.test "Bytesrw_zstd.compress_reads" @@ fun () -> 94 | repeat 5 @@ fun n -> 95 | let data = snd a30_zstd in 96 | let d = Bytes.Reader.of_string ~slice_length:n data in 97 | let c = Bytesrw_zstd.compress_reads () ~slice_length:n d in 98 | let trip = Bytesrw_zstd.decompress_reads () ~slice_length:n c in 99 | assert (Bytes.Reader.to_string trip = data) 100 | 101 | let test_compress_writes = 102 | Test.test "Bytesrw_zstd.compress_writes" @@ fun () -> 103 | repeat 5 @@ fun n -> 104 | let data = snd a30_zstd in 105 | let b = Buffer.create 255 in 106 | let w = Bytes.Writer.of_buffer ~slice_length:n b in 107 | let dw = Bytesrw_zstd.decompress_writes () ~slice_length:n ~eod:true w in 108 | let c = Bytesrw_zstd.compress_writes () ~slice_length:n ~eod:true dw in 109 | let rdata = Bytes.Reader.of_string ~slice_length:n data in 110 | let () = Bytes.Writer.write_reader ~eod:true c rdata in 111 | assert (Buffer.contents b = data) 112 | 113 | let test_dictionary_support = 114 | Test.test "dictionary support" @@ fun () -> 115 | repeat 5 @@ fun n -> 116 | let dict = "aaaaaaaa" in 117 | let data = "aaaaaaaabbbbbbbb" ^ "aaaaaaaa" ^ "aaaaaaaa" ^ "aaaaaaaa"in 118 | let cdict = Bytesrw_zstd.Cdict.of_binary_string dict in 119 | let ddict = Bytesrw_zstd.Ddict.of_binary_string dict in 120 | let datar = Bytes.Reader.of_string data ~slice_length:n in 121 | let c = Bytesrw_zstd.compress_reads ~dict:cdict () ~slice_length:n datar in 122 | let d = Bytesrw_zstd.decompress_reads ~dict:ddict () ~slice_length:n c in 123 | assert (Bytes.Reader.to_string d = data); 124 | () 125 | 126 | let main () = 127 | Test.main @@ fun () -> 128 | Test.log "Using libsztd %s" (Bytesrw_zstd.version ()); 129 | Test.autorun (); 130 | Gc.full_major () 131 | 132 | let () = if !Sys.interactive then () else exit (main ()) 133 | -------------------------------------------------------------------------------- /test/utf8codec.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* UTF-8 decoder with position tracking using Bytes.Reader.t and the 7 | Stdlib codecs. 8 | 9 | FIXME: integrate the optimizations there were done in Jsont_bytesrw. *) 10 | 11 | open Bytesrw 12 | 13 | (* XXX add these things to Stdlib.Uchar *) 14 | 15 | let uchar_max_utf_8_byte_length = 4 16 | let uchar_utf_8_byte_decode_length byte = (* or utf_8_byte_length_of_byte *) 17 | if byte < 0x80 then 1 else if byte < 0xC2 then 0 else 18 | if byte < 0xE0 then 2 else if byte < 0xF0 then 3 else 19 | if byte < 0xF5 then 4 else 0 20 | 21 | (* Decoder *) 22 | 23 | let sot = 0x1A0000 (* start of text U+10FFFF + 1 *) 24 | let eot = 0x1A0001 (* end of text U+10FFFF + 2 *) 25 | 26 | type decoder = 27 | { file : string; 28 | reader : Bytes.Reader.t; 29 | mutable i : Bytes.t; (* Current input slice. *) 30 | mutable i_max : int; (* Maximum byte index in [i]. *) 31 | mutable i_next : int; (* Next byte index to read in [i]. *) 32 | mutable overlap : Bytes.t; (* Buffer for overlapping decodes. *) 33 | mutable u : int; (* Current Unicode scalar value or sot or eot. *) 34 | mutable byte_count : int; (* Global byte count. *) 35 | mutable line : int; (* Current line number. *) 36 | mutable line_start : int; (* Current line first global byte position. *) } 37 | 38 | let make_decoder ?(file = "-") reader = 39 | let overlap = Bytes.create uchar_max_utf_8_byte_length in 40 | { file; reader; i = overlap (* overwritten by initial refill *); 41 | i_max = 0; i_next = 1 (* triggers an initial refill *); 42 | overlap; u = sot; byte_count = 0; line = 1; line_start = 0; } 43 | 44 | (* Decoder position and errors *) 45 | 46 | let col_next d = d.byte_count - d.line_start 47 | let err_loc d line col fmt = 48 | Format.kasprintf failwith ("%s:%d:%d: " ^^ fmt) d.file line col 49 | 50 | let err_malformed_utf_8 d = 51 | err_loc d d.line (col_next d) 52 | "UTF-8 decoding error at input byte %d" d.byte_count 53 | 54 | (* Next character *) 55 | 56 | let[@inline] is_eoslice d = d.i_next > d.i_max 57 | let[@inline] is_eod d = d.i_max = - 1 (* Only happens on Slice.eod *) 58 | let[@inline] available d = d.i_max - d.i_next + 1 59 | let[@inline] next_utf_8_length d = 60 | uchar_utf_8_byte_decode_length (Bytes.get_uint8 d.i d.i_next) 61 | 62 | let set_slice d slice = 63 | d.i <- Bytes.Slice.bytes slice; 64 | d.i_next <- Bytes.Slice.first slice; 65 | d.i_max <- d.i_next + Bytes.Slice.length slice - 1 66 | 67 | let rec setup_overlap d start need = match need with 68 | | 0 -> 69 | let slice = match available d with 70 | | 0 -> Bytes.Reader.read d.reader 71 | | length -> Bytes.Slice.make d.i ~first:d.i_next ~length 72 | in 73 | d.i <- d.overlap; d.i_next <- 0; d.i_max <- start; slice 74 | | need -> 75 | if is_eoslice d then set_slice d (Bytes.Reader.read d.reader); 76 | if is_eod d 77 | then (d.byte_count <- d.byte_count - start; err_malformed_utf_8 d); 78 | let available = available d in 79 | let take = Int.min need available in 80 | for i = 0 to take - 1 do 81 | Bytes.set d.overlap (start + i) (Bytes.get d.i (d.i_next + i)) 82 | done; 83 | d.i_next <- d.i_next + take; d.byte_count <- d.byte_count + take; 84 | setup_overlap d (start + take) (need - take) 85 | 86 | let rec nextc d = match available d with 87 | | a when a <= 0 -> 88 | if is_eod d then d.u <- eot else 89 | (set_slice d (Bytes.Reader.read d.reader); nextc d) 90 | | a when a < uchar_max_utf_8_byte_length && a < next_utf_8_length d -> 91 | let s = setup_overlap d 0 (next_utf_8_length d) in nextc d; set_slice d s 92 | | _ -> 93 | let udec = Bytes.get_utf_8_uchar d.i d.i_next in 94 | if not (Uchar.utf_decode_is_valid udec) then err_malformed_utf_8 d else 95 | let u = Uchar.to_int (Uchar.utf_decode_uchar udec) in 96 | let ulen = Uchar.utf_decode_length udec in 97 | d.i_next <- d.i_next + ulen; d.byte_count <- d.byte_count + ulen; 98 | begin match u with 99 | | 0x000D (* CR *) -> d.line_start <- d.byte_count; d.line <- d.line + 1; 100 | | 0x000A (* LF *) -> 101 | d.line_start <- d.byte_count; 102 | if d.u <> 0x000D then d.line <- d.line + 1; 103 | | _ -> () 104 | end; 105 | d.u <- u 106 | 107 | (* UTF-8 encoder. *) 108 | 109 | type encoder = 110 | { writer : Bytes.Writer.t; (* Destination of bytes. *) 111 | o : Stdlib.Bytes.t; (* Buffer for slices. *) 112 | o_max : int; (* Max index in [o]. *) 113 | mutable o_next : int; (* Next writable index in [o]. *) } 114 | 115 | let[@inline] rem_len e = e.o_max - e.o_next + 1 116 | 117 | let make_encoder ?buf:(o = Bytes.create 65535) writer = 118 | let len = Bytes.length o in 119 | if len < 4 then invalid_arg "encoder bytes buffer length < 4" else 120 | { writer; o; o_max = len - 1; o_next = 0 } 121 | 122 | let flush e = 123 | Bytes.Writer.write e.writer (Bytes.Slice.make e.o ~first:0 ~length:e.o_next); 124 | e.o_next <- 0 125 | 126 | let encode_eot e = flush e; Bytes.Writer.write_eod e.writer 127 | let encode_char e c = 128 | if e.o_next > e.o_max then flush e; 129 | Bytes.set e.o e.o_next c; e.o_next <- e.o_next + 1 130 | 131 | let rec encode_uchar e u = 132 | let rem_len = rem_len e in 133 | if rem_len < 4 && Uchar.utf_8_byte_length u > rem_len 134 | then (flush e; encode_uchar e u) 135 | else (e.o_next <- e.o_next + Bytes.set_utf_8_uchar e.o e.o_next u) 136 | 137 | let rec encode_substring e s first length = 138 | if length = 0 then () else 139 | let len = Int.min (rem_len e) length in 140 | if len = 0 then (flush e; encode_substring e s first length) else 141 | begin 142 | Stdlib.Bytes.blit_string s first e.o e.o_next len; 143 | e.o_next <- e.o_next + len; 144 | encode_substring e s (first + len) (length - len) 145 | end 146 | 147 | let encode_string e s = encode_substring e s 0 (String.length s) 148 | 149 | (* Testing *) 150 | 151 | let uchars_of_string s = 152 | let rec loop acc s i max = 153 | if i > max then List.rev acc else 154 | let d = String.get_utf_8_uchar s i in 155 | let u = Uchar.to_int (Uchar.utf_decode_uchar d) in 156 | loop (u :: acc) s (i + Uchar.utf_decode_length d) max 157 | in 158 | loop [] s 0 (String.length s - 1) 159 | 160 | let ustr u = Printf.sprintf "U+%04X" u 161 | let strf = Printf.sprintf 162 | let exp exp fnd = 163 | if (exp <> fnd) 164 | then failwith (strf "expected %s found: %s" (ustr exp) (ustr fnd)) 165 | 166 | let rec assert_dec d = function 167 | | [] -> nextc d; exp eot d.u 168 | | u :: us -> nextc d; exp u d.u; assert_dec d us 169 | 170 | let test s = 171 | let uchars = uchars_of_string s in 172 | for slice_length = 1 to Int.max (String.length s) 1 do 173 | let d = make_decoder (Bytes.Reader.of_string ~slice_length s) in 174 | exp sot d.u; assert_dec d uchars 175 | done 176 | 177 | let uchar_utf8 u = 178 | let b = Bytes.create (Uchar.utf_8_byte_length u) in 179 | ignore (Bytes.set_utf_8_uchar b 0 u); Bytes.unsafe_to_string b 180 | 181 | let test_uchars () = 182 | let rec loop u = 183 | if Uchar.equal u Uchar.max then () else 184 | let s = "abé" ^ (uchar_utf8 u) ^ "🐫" in 185 | let s' = "abé" ^ (uchar_utf8 u) in 186 | (test s; test s'; loop (Uchar.succ u)) 187 | in 188 | loop Uchar.min 189 | 190 | let test_encoder () = 191 | let b = Buffer.create 255 in 192 | let w = Bytes.Writer.of_buffer b in 193 | let e = make_encoder ~buf:(Bytes.create 4) w in 194 | encode_char e 'a'; 195 | encode_uchar e (Uchar.of_int 0x1F42B); 196 | encode_string e "0123456789"; 197 | encode_eot e; 198 | assert (Buffer.contents b = "a🐫0123456789"); 199 | () 200 | 201 | let main () = 202 | B0_testing.Test.main @@ fun () -> 203 | test ""; 204 | test "a"; 205 | test "abcd"; 206 | test "abécd"; 207 | test "🐫"; 208 | test_uchars (); 209 | test_encoder () 210 | 211 | let () = if !Sys.interactive then () else exit (main ()) 212 | -------------------------------------------------------------------------------- /test/xxh3tap.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | let stdin_reader unix ~slice_length = match unix with 9 | | true -> Bytesrw_unix.bytes_reader_of_fd ?slice_length Unix.stdin 10 | | false -> Bytes.Reader.of_in_channel ?slice_length In_channel.stdin 11 | 12 | let stdout_writer unix ~slice_length = match unix with 13 | | true -> Bytesrw_unix.bytes_writer_of_fd ?slice_length Unix.stdout 14 | | false -> Bytes.Writer.of_out_channel ?slice_length Out_channel.stdout 15 | 16 | let xxh3_64_reader r = Bytesrw_xxhash.Xxh3_64.(to_hex (reader r)) 17 | let xxh3_64_reads r = 18 | let i, hash = Bytesrw_xxhash.Xxh3_64.reads r in 19 | i, (fun () -> Bytesrw_xxhash.Xxh3_64.(to_hex (value hash))) 20 | 21 | let xxh3_64_writes w = 22 | let w, hash = Bytesrw_xxhash.Xxh3_64.writes w in 23 | w, (fun () -> Bytesrw_xxhash.Xxh3_64.(to_hex (value hash))) 24 | 25 | let xxh3_128_reader r = Bytesrw_xxhash.Xxh3_128.(to_hex (reader r)) 26 | let xxh3_128_reads r = 27 | let i, hash = Bytesrw_xxhash.Xxh3_128.reads r in 28 | i, (fun () -> Bytesrw_xxhash.Xxh3_128.(to_hex (value hash))) 29 | 30 | let xxh3_128_writes w = 31 | let w, hash = Bytesrw_xxhash.Xxh3_128.writes w in 32 | w, (fun () -> Bytesrw_xxhash.Xxh3_128.(to_hex (value hash))) 33 | 34 | let hash_reader = function 35 | | `Xxh3_64 -> xxh3_64_reader | `Xxh3_128 -> xxh3_128_reader 36 | 37 | let hash_reads = function 38 | | `Xxh3_64 -> xxh3_64_reads | `Xxh3_128 -> xxh3_128_reads 39 | 40 | let hash_writes = function 41 | | `Xxh3_64 -> xxh3_64_writes | `Xxh3_128 -> xxh3_128_writes 42 | 43 | let sink hash processor unix ~slice_length = match processor with 44 | | `Reader -> 45 | let i = stdin_reader unix ~slice_length in 46 | i, hash_reader hash i 47 | | `Writer -> 48 | let i = stdin_reader unix ~slice_length in 49 | let w, get_hash = hash_writes hash (Bytes.Writer.ignore ()) in 50 | let () = Bytes.Writer.write_reader w ~eod:true i in 51 | i, get_hash () 52 | 53 | let filter hash processor unix ~slice_length = match processor with 54 | | `Reader -> 55 | let i = stdin_reader unix ~slice_length in 56 | let i, get_hash = hash_reads hash i in 57 | let o = stdout_writer unix ~slice_length in 58 | Bytes.Writer.write_reader ~eod:true o i; 59 | i, get_hash () 60 | | `Writer -> 61 | let i = stdin_reader unix ~slice_length in 62 | let o = stdout_writer unix ~slice_length in 63 | let o, get_hash = hash_writes hash o in 64 | Bytes.Writer.write_reader ~eod:true o i; 65 | i, get_hash () 66 | 67 | let log_count i = Printf.eprintf "i:%d\n%!" (Bytes.Reader.read_length i) 68 | 69 | let tap mode hash processor slice_length show_count unix = 70 | try 71 | let i, hash = match mode with 72 | | `Sink -> sink hash processor unix ~slice_length 73 | | `Filter -> filter hash processor unix ~slice_length 74 | in 75 | if show_count then log_count i; 76 | Printf.eprintf "%s\n%!" hash; 77 | Ok 0 78 | with 79 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 80 | 81 | open Cmdliner 82 | 83 | let cmd = 84 | let doc = "Rewrite stdin to stdout and report the xxh3-{64,128} of data" in 85 | let mode = 86 | let c = `Sink, Arg.info ["sink"] ~doc:"Only read stdin." in 87 | let d = `Filter, Arg.info ["filter"] ~doc:"Write stdin to stdout." in 88 | Arg.(value & vflag `Filter [c; d]) 89 | in 90 | let processor = 91 | let r = `Reader, Arg.info ["reader"] ~doc:"Use a byte stream reader tap." in 92 | let w = `Writer, Arg.info ["writer"] ~doc:"Use a byte stream writer tap." in 93 | Arg.(value & vflag `Reader [r; w]) 94 | in 95 | let slice_length = 96 | let doc = "IO byte slices size." in 97 | Arg.(value & opt (some int) None & info ["io-size"] ~doc ~docv:"SIZE") 98 | in 99 | let hash = 100 | let h64 = `Xxh3_64, Arg.info ["xxh3-64"] ~doc:"Use xxh3-64." in 101 | let h128 = `Xxh3_128, Arg.info ["xxh3-128"] ~doc:"Use xxh3-128." in 102 | Arg.(value & vflag `Xxh3_64 [h64; h128]) 103 | in 104 | let show_count = 105 | let doc = "Show on $(b,stderr) final amount of bytes read." in 106 | Arg.(value & flag & info ["show-count"] ~doc) 107 | in 108 | let unix = 109 | let doc = "Use OCaml Unix library I/O instead of Stdlib channels" in 110 | Arg.(value & flag & info ["unix-io"] ~doc) 111 | in 112 | Cmd.v (Cmd.info "xxh3tap" ~version:"%%VERSION%%" ~doc) @@ 113 | Term.(const tap $ mode $ hash $ processor $ slice_length $ show_count $ unix) 114 | 115 | let main () = Cmd.eval_result' cmd 116 | let () = if !Sys.interactive then () else exit (main ()) 117 | -------------------------------------------------------------------------------- /test/zstdtrip.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2024 The bytesrw programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Bytesrw 7 | 8 | let stdin_reader unix ~slice_length = match unix with 9 | | true -> Bytesrw_unix.bytes_reader_of_fd ?slice_length Unix.stdin 10 | | false -> Bytes.Reader.of_in_channel ?slice_length In_channel.stdin 11 | 12 | let stdout_writer unix ~slice_length = match unix with 13 | | true -> Bytesrw_unix.bytes_writer_of_fd ?slice_length Unix.stdout 14 | | false -> Bytes.Writer.of_out_channel ?slice_length Out_channel.stdout 15 | 16 | let filter_stdio_with_reader_filter unix ~slice_length filter = 17 | let i = stdin_reader unix ~slice_length in 18 | let o = stdout_writer unix ~slice_length in 19 | Bytes.Writer.write_reader ~eod:true o (filter i); 20 | i, o 21 | 22 | let filter_stdio_with_writer_filter unix ~slice_length filter = 23 | let i = stdin_reader unix ~slice_length in 24 | let o = stdout_writer unix ~slice_length in 25 | Bytes.Writer.write_reader ~eod:true (filter o) i; 26 | i, o 27 | 28 | let decompress processor params unix ~slice_length = match processor with 29 | | `Reader -> 30 | let d = Bytesrw_zstd.decompress_reads ~params () in 31 | filter_stdio_with_reader_filter unix ~slice_length d 32 | | `Writer -> 33 | let d = Bytesrw_zstd.decompress_writes ~params () ~eod:true in 34 | filter_stdio_with_writer_filter unix ~slice_length d 35 | 36 | let compress processor params unix ~slice_length = match processor with 37 | | `Reader -> 38 | let c = Bytesrw_zstd.compress_reads ~params () in 39 | filter_stdio_with_reader_filter unix ~slice_length c 40 | | `Writer -> 41 | let c = Bytesrw_zstd.compress_writes ~params () ~eod:true in 42 | filter_stdio_with_writer_filter unix ~slice_length c 43 | 44 | let log_count i o = 45 | let i = Bytes.Reader.read_length i in 46 | let o = Bytes.Writer.written_length o in 47 | let pct = Float.to_int ((float o /. float i) *. 100.) in 48 | Printf.eprintf "i:%d o:%d o/i:%d%%\n%!" i o pct 49 | 50 | let trip mode clevel no_checksum processor slice_length show_count unix = 51 | try 52 | let i, o = match mode with 53 | | `Decompress -> 54 | let params = Bytesrw_zstd.Dctx_params.make () in 55 | decompress processor params unix ~slice_length 56 | | `Compress -> 57 | let checksum = not no_checksum in 58 | let params = Bytesrw_zstd.Cctx_params.make ~clevel ~checksum () in 59 | compress processor params unix ~slice_length 60 | in 61 | if show_count then log_count i o; 62 | Ok 0 63 | with 64 | | Bytes.Stream.Error e -> Bytes.Stream.error_to_result e 65 | 66 | open Cmdliner 67 | 68 | let cmd = 69 | let doc = "Zstd (De)compression from stdin to stdout" in 70 | let mode = 71 | let c = `Compress, Arg.info ["z"; "compress"] ~doc:"Compress." in 72 | let d = `Decompress, Arg.info ["d"; "decompress"] ~doc:"Decompress." in 73 | Arg.(value & vflag `Compress [c; d]) 74 | in 75 | let processor = 76 | let r = 77 | `Reader, Arg.info ["reader"] ~doc:"Use a byte stream reader processor." 78 | in 79 | let w = 80 | `Writer, Arg.info ["writer"] ~doc:"Use a byte stream writer processor." 81 | in 82 | Arg.(value & vflag `Reader [r; w]) 83 | in 84 | let slice_length = 85 | let doc = "IO byte slices size." in 86 | Arg.(value & opt (some int) None & info ["io-size"] ~doc ~docv:"SIZE") 87 | in 88 | let clevel = 89 | let doc = 90 | Printf.sprintf "Use compression level $(docv) (%d-%d)" 91 | (Bytesrw_zstd.min_clevel ()) (Bytesrw_zstd.max_clevel ()) 92 | in 93 | Arg.(value & opt int (Bytesrw_zstd.default_clevel ()) & 94 | info ["l"] ~doc ~docv:"LEVEL") 95 | in 96 | let no_checksum = 97 | let doc = "Do not add integrity checksums" in 98 | Arg.(value & flag & info ["no-check"] ~doc) 99 | in 100 | let show_count = 101 | let doc = "Show on $(b,stderr) final amount of bytes read and written." in 102 | Arg.(value & flag & info ["show-count"] ~doc) 103 | in 104 | let unix = 105 | let doc = "Use OCaml Unix library I/O instead of Stdlib channels" in 106 | Arg.(value & flag & info ["unix-io"] ~doc) 107 | in 108 | Cmd.v (Cmd.info "zstdtrip" ~version:"%%VERSION%%" ~doc) @@ 109 | Term.(const trip $ mode $ clevel $ no_checksum $ processor $ 110 | slice_length $ show_count $ unix) 111 | 112 | let main () = Cmd.eval_result' cmd 113 | let () = if !Sys.interactive then () else exit (main ()) 114 | --------------------------------------------------------------------------------