├── .gitignore
├── .merlin
├── .ocp-indent
├── B0.ml
├── BRZO
├── CHANGES.md
├── DEVEL.md
├── LICENSE.md
├── NOTES.md
├── README.md
├── TODO.md
├── _tags
├── abnf
    ├── json.abnf
    ├── sexp.abnf
    └── toml.abnf
├── doc
    └── index.mld
├── opam
├── pkg
    ├── META
    └── pkg.ml
├── src
    ├── serialkit.mllib
    ├── serialkit_json.ml
    ├── serialkit_json.mli
    ├── serialkit_sexp.ml
    ├── serialkit_sexp.mli
    ├── serialkit_text.ml
    ├── serialkit_text.mli
    ├── serialkit_toml.ml
    └── serialkit_toml.mli
├── test
    ├── expect
    │   ├── test.sexp
    │   └── test.toml
    ├── test.ml
    └── test_toml.ml
└── tool
    ├── cmd_main.ml
    ├── cmd_main.mli
    ├── cmd_sexp.ml
    ├── cmd_sexp.mli
    ├── cmd_toml.ml
    ├── cmd_toml.mli
    ├── std.ml
    └── std.mli


/.gitignore:
--------------------------------------------------------------------------------
1 | _b0
2 | _build
3 | tmp
4 | *~
5 | \.\#*
6 | \#*#
7 | *.install
8 | *.native
9 | *.byte


--------------------------------------------------------------------------------
/.merlin:
--------------------------------------------------------------------------------
1 | PKG cmdliner b0.kit
2 | S src
3 | S test
4 | S tool
5 | B _b0/**
6 | 


--------------------------------------------------------------------------------
/.ocp-indent:
--------------------------------------------------------------------------------
1 | strict_with=always,match_clause=4,strict_else=never
2 | 


--------------------------------------------------------------------------------
/B0.ml:
--------------------------------------------------------------------------------
 1 | open B0_kit.V000
 2 | open Result.Syntax
 3 | 
 4 | (* OCaml library names *)
 5 | 
 6 | let cmdliner = B0_ocaml.libname "cmdliner"
 7 | let serialkit = B0_ocaml.libname "serialkit"
 8 | 
 9 | (* Libraries *)
10 | 
11 | let serialkit_lib =
12 |   let srcs = [ `Dir (Fpath.v "src") ] in
13 |   let name = "serialkit-lib" in
14 |   B0_ocaml.lib ~name serialkit ~doc:"serialkit library" ~srcs ~requires:[]
15 | 
16 | (* Tools *)
17 | 
18 | let serialkit_tool =
19 |   let srcs = Fpath.[`Dir (v "tool")] in
20 |   let requires = [cmdliner; serialkit] in
21 |   B0_ocaml.exe "serialkit" ~public:true ~doc:"serialkit tool" ~srcs ~requires
22 | 
23 | (* Tests *)
24 | 
25 | let test_spec =
26 |   let srcs = Fpath.[`File (v "test/test.ml")] in
27 |   let requires = [serialkit] in
28 |   B0_ocaml.exe "test" ~doc:"Tests" ~srcs ~requires
29 | 
30 | let test_toml =
31 |   let srcs = Fpath.[`File (v "test/test_toml.ml")] in
32 |   let requires = [serialkit] in
33 |   B0_ocaml.exe "test-toml" ~doc:"TOML Tests" ~srcs ~requires
34 | 
35 | (* Expectation tests *)
36 | 
37 | let expect_serialk_runs ctx =
38 |   let runs cmd = (* command, output suffix *)
39 |     [ Cmd.(arg cmd % "locs"), ".locs"; ]
40 |   in
41 |   let test_run ctx serialk file (cmd, ext) =
42 |     let cwd = B0_expect.base ctx and stdout = Fpath.(file -+ ext) in
43 |     B0_expect.stdout ctx ~cwd ~stdout Cmd.(serialk %% cmd)
44 |   in
45 |   let test_file ctx serialk file =
46 |     let cmd = String.subrange ~first:1 (Fpath.get_ext file) in
47 |     List.iter (test_run ctx serialk file) (runs cmd)
48 |   in
49 |   let serialk = B0_expect.get_unit_exe_file_cmd ctx serialkit_tool in
50 |   let test_files =
51 |     let base_files = B0_expect.base_files ctx ~rel:true ~recurse:false in
52 |     let input f = match Fpath.get_ext ~multi:true f with
53 |     | ".json" | ".sexp" | ".toml" | ".cbor" | ".xml" -> true
54 |     | _ -> false
55 |     in
56 |     List.filter input base_files
57 |   in
58 |   List.iter (test_file ctx serialk) test_files
59 | 
60 | let expect =
61 |   B0_unit.of_action'
62 |     "expect" ~units:[serialkit_tool] ~doc:"Test expectations" @@
63 |   B0_expect.action_func ~base:(Fpath.v "test/expect") @@ fun ctx ->
64 |   expect_serialk_runs ctx;
65 |   ()
66 | 
67 | (* Packs *)
68 | 
69 | let default =
70 |   let meta =
71 |     B0_meta.empty
72 |     |> ~~ B0_meta.authors ["The serialkit programmers"]
73 |     |> ~~ B0_meta.maintainers ["Daniel Bünzli <daniel.buenzl i@erratique.ch>"]
74 |     |> ~~ B0_meta.homepage "https://erratique.ch/software/serialkit"
75 |     |> ~~ B0_meta.online_doc "https://erratique.ch/software/serialkit/doc"
76 |     |> ~~ B0_meta.licenses ["ISC"]
77 |     |> ~~ B0_meta.repo "git+https://erratique.ch/repos/serialkit.git"
78 |     |> ~~ B0_meta.issues "https://github.com/dbuenzli/serialkit/issues"
79 |     |> ~~ B0_meta.description_tags
80 |       ["codec"; "json"; "sexp"; "toml"; "query"; "org:erratique"; ]
81 |     |> ~~ B0_opam.build
82 |       {|[["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%"]]|}
83 |     |> ~~ B0_opam.depends [
84 |       "ocaml", {|>= "4.14.0"|};
85 |       "ocamlfind", {|build|};
86 |       "ocamlbuild", {|build|};
87 |       "topkg", {|build & >= "1.0.3"|};
88 |       "cmdliner", {|>= "1.1.0"|}]
89 |     |> B0_meta.tag B0_release.tag
90 |     |> B0_meta.tag B0_opam.tag
91 |   in
92 |   B0_pack.make "default" ~doc:"serialkit package" ~meta ~locked:true @@
93 |   B0_unit.list ()
94 | 


--------------------------------------------------------------------------------
/BRZO:
--------------------------------------------------------------------------------
1 | (srcs-x pkg test B0.ml tool)


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
1 | vX.Y.Z YYYY-MM-DD Location
2 | --------------------------
3 | 
4 | First release. 
5 | 


--------------------------------------------------------------------------------
/DEVEL.md:
--------------------------------------------------------------------------------
 1 | # Expectation tests 
 2 | 
 3 | To add a new test add a `.{json,sexp,toml,xml,cbor}` file in `test/expect`,
 4 | run the tests and add the new generated files to the repo. 
 5 | 
 6 | ```sh
 7 | b0 -- expect 
 8 | b0 -- expect --help
 9 | ```
10 | 
11 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019 The serialkit programmers
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for any
 4 | purpose with or without fee is hereby granted, provided that the above
 5 | copyright notice and this permission notice appear in all copies.
 6 | 
 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 | 


--------------------------------------------------------------------------------
/NOTES.md:
--------------------------------------------------------------------------------
 1 | Layout preserving edits via AST 
 2 | -------------------------------
 3 | 
 4 | Goal, have a natural, high-level data structure for processing. But
 5 | still allow user layout preserving edits.
 6 | 
 7 | * In sexpm the idea was to patch/diff sequences of lexemes with a lexeme
 8 |   for whitespace. Not sure this is a good approach. It was not fully
 9 |   pursued but it seems hard to reconcline in the high-level API.
10 |   
11 | * In serialk for now we keep very precise AST location and we do raw string 
12 |   surgery. Though this can be encapsulated in higher-level abstractions it 
13 |   remains rather brittle and subject to bugs in location tracking. In the
14 |   latter case it makes it easy to generate syntax errors.
15 | 
16 | * A better approach may be to define a notion of whitespace
17 |   attachement and replace the `Sexp.loc` type in the `Sexp.t` case by
18 |   a `Sexp.src` which has both the location and the attached whitespace
19 |   (and the info whether an atom was quoted or not). Now we can we have
20 |   a `preserve`ing output procedure that spits out the attached
21 |   whitespace when it serializes.
22 |       
23 |   Whitespace attachement likely remains before/after lexeme based. The only 
24 |   problem is how to disambuigate them so that they are uniquely attached: 
25 |   ```
26 |   (a b c ; here's a comment
27 |   )
28 |   ```
29 |   Tentative:
30 |   1. Before/After '(' -> list before/after lsep
31 |   2. Before ')' -> list before rsep
32 |   3. Before atom -> before atom 
33 |   
34 | * Look into `ocamlformat`
35 | 
36 | * Having syntactically inexisting lists e.g. toplevel sexp list and or
37 |   sexp key bindings as defined as serialk make things more difficult. 
38 |   But they seem more natural to the end user.
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | serialkit — Serialization formats toolkit for OCaml
 2 | ===================================================
 3 | %%VERSION%%
 4 | 
 5 | Serialkit provides easy to use OCaml APIs to query, update and
 6 | generate data in generic serialization data formats.
 7 | 
 8 | The supported data formats are JSON text, s-expressions and TOML.
 9 | 
10 | Serialkit is distributed under the ISC license. It has no dependencies.
11 | 
12 | Homepage: <https://erratique.ch/software/serialkit>
13 | 
14 | ## Installation
15 | 
16 | serialkit can be installed with `opam`:
17 | 
18 |     opam install serialkit
19 |     opam install serialkit cmdliner # for the serialkit tool
20 | 
21 | If you don't use `opam` consult the [`opam`](opam) file for build
22 | instructions.
23 | 
24 | ## Documentation
25 | 
26 | The documentation can be consulted [online] or via `odig doc serialkit`.
27 | 
28 | Questions are welcome but better asked on the [OCaml forum] than on
29 | the issue tracker. 
30 | 
31 | [online]: https://erratique.ch/software/serialkit/doc
32 | [OCaml forum]: https://discuss.ocaml.org/
33 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | Change `opt_mem` to 
2 | 
3 | val mem' : absent:'a -> string -> 'a t -> 'a t
4 | 


--------------------------------------------------------------------------------
/_tags:
--------------------------------------------------------------------------------
1 | true : bin_annot, safe_string
2 | <src> : include
3 | <test> : include
4 | <test/*> : package(cmdliner)
5 | <tool/*> : package(cmdliner)
6 | <_b0> : -traverse


--------------------------------------------------------------------------------
/abnf/json.abnf:
--------------------------------------------------------------------------------
 1 | ;; As found in https://www.rfc-editor.org/rfc/rfc8259
 2 | 
 3 | JSON-text = ws value ws
 4 | 
 5 | begin-array     = ws %x5B ws  ; [ left square bracket
 6 | begin-object    = ws %x7B ws  ; { left curly bracket
 7 | end-array       = ws %x5D ws  ; ] right square bracket
 8 | end-object      = ws %x7D ws  ; } right curly bracket
 9 | name-separator  = ws %x3A ws  ; : colon
10 | value-separator = ws %x2C ws  ; , comma
11 | 
12 | ws = *(%x20 /              ; Space
13 |        %x09 /              ; Horizontal tab
14 |        %x0A /              ; Line feed or New line
15 |        %x0D )              ; Carriage return
16 | 
17 | ; Values
18 | 
19 | value = false / null / true / object / array / number / string
20 | false = %x66.61.6c.73.65   ; false
21 | null  = %x6e.75.6c.6c      ; null
22 | true  = %x74.72.75.65      ; true
23 | 
24 | ; Objects
25 | 
26 | object = begin-object [ member *( value-separator member ) ]
27 |          end-object
28 | 
29 | member = string name-separator value
30 | 
31 | ; Arrays
32 | 
33 | array = begin-array [ value *( value-separator value ) ] end-array
34 | 
35 | ; Numbers
36 | 
37 | number = [ minus ] int [ frac ] [ exp ]
38 | decimal-point = %x2E       ; .
39 | digit1-9 = %x31-39         ; 1-9
40 | e = %x65 / %x45            ; e E
41 | exp = e [ minus / plus ] 1*DIGIT
42 | frac = decimal-point 1*DIGIT
43 | int = zero / ( digit1-9 *DIGIT )
44 | minus = %x2D               ; -
45 | plus = %x2B                ; +
46 | zero = %x30                ; 0
47 | 
48 | ; Strings
49 | 
50 | string = quotation-mark *char quotation-mark
51 | char = unescaped /
52 |        escape (
53 |          %x22 /          ; "    quotation mark  U+0022
54 |          %x5C /          ; \    reverse solidus U+005C
55 |          %x2F /          ; /    solidus         U+002F
56 |          %x62 /          ; b    backspace       U+0008
57 |          %x66 /          ; f    form feed       U+000C
58 |          %x6E /          ; n    line feed       U+000A
59 |          %x72 /          ; r    carriage return U+000D
60 |          %x74 /          ; t    tab             U+0009
61 |          %x75 4HEXDIG )  ; uXXXX                U+XXXX
62 | 
63 | escape = %x5C              ; \
64 | quotation-mark = %x22      ; "
65 | unescaped = %x20-21 / %x23-5B / %x5D-10FFFF


--------------------------------------------------------------------------------
/abnf/sexp.abnf:
--------------------------------------------------------------------------------
 1 | ;; This document describes the s-expression syntax parsed by
 2 | ;; serialk using the ABNF format of RFC 5234.
 3 | ;;
 4 | ;; A few constraints are not expressed by this grammar:
 5 | ;;
 6 | ;; 1. `unum` once interpreted as an hexadecimal number must
 7 | ;;    be a Unicode scalar value
 8 | ;; 2. A `comment can` be ended by the end of input
 9 | ;;
10 | 
11 |  sexp-seq = *(ws / comment / sexp)
12 |      sexp = atom / list
13 |      list = %x0028 sexp-seq %x0029
14 |      atom = token / qtoken
15 |     token = t-char *(t-char)
16 |    qtoken = %x0022 *(q-char / escape / cont) %x0022
17 |    escape = %x005E (%x0020 / %x0022 / %x005E / %x006E / %x0072 /
18 |                     %x0075 %x007B unum %x007D)
19 |      unum = 1*6(HEXDIG)
20 |      cont = %x005E nl ws
21 |        ws = *(ws-char)
22 |   comment = %x003B *(c-char) nl
23 |        nl = %x000A / %x000D / %x000D %x000A
24 |    t-char = %x0021 / %x0023-0027 / %x002A-%x003A / %x003C-%x005D /
25 |             %x005F-%x007E / %x0080-D7FF / %xE000-10FFFF
26 |    q-char = t-char / ws-char / %x0028 / %x0029 / %x003B
27 |   ws-char = %x0020 / %x0009 / %x000A / %x000B / %x000C / %x000D
28 |    c-char = %x0009 / %x000B / %x000C / %x0020-D7FF / %xE000-10FFFF
29 | 


--------------------------------------------------------------------------------
/abnf/toml.abnf:
--------------------------------------------------------------------------------
  1 | ;; This document describes TOML's syntax, using the ABNF format (defined in
  2 | ;; RFC 5234 -- https://www.ietf.org/rfc/rfc5234.txt).
  3 | ;;
  4 | ;; All valid TOML documents will match this description, however certain
  5 | ;; invalid documents would need to be rejected as per the semantics described
  6 | ;; in the supporting text description.
  7 | 
  8 | ;; It is possible to try this grammar interactively, using instaparse.
  9 | ;;     http://instaparse.mojombo.com/
 10 | ;;
 11 | ;; To do so, in the lower right, click on Options and change `:input-format` to
 12 | ;; ':abnf'. Then paste this entire ABNF document into the grammar entry box
 13 | ;; (above the options). Then you can type or paste a sample TOML document into
 14 | ;; the beige box on the left. Tada!
 15 | 
 16 | ;; Overall Structure
 17 | 
 18 | toml = expression *( newline expression )
 19 | 
 20 | expression =  ws [ comment ]
 21 | expression =/ ws keyval ws [ comment ]
 22 | expression =/ ws table ws [ comment ]
 23 | 
 24 | ;; Whitespace
 25 | 
 26 | ws = *wschar
 27 | wschar =  %x20  ; Space
 28 | wschar =/ %x09  ; Horizontal tab
 29 | 
 30 | ;; Newline
 31 | 
 32 | newline =  %x0A     ; LF
 33 | newline =/ %x0D.0A  ; CRLF
 34 | 
 35 | ;; Comment
 36 | 
 37 | comment-start-symbol = %x23 ; #
 38 | non-ascii = %x80-D7FF / %xE000-10FFFF
 39 | non-eol = %x09 / %x20-7E / non-ascii
 40 | 
 41 | comment = comment-start-symbol *non-eol
 42 | 
 43 | ;; Key-Value pairs
 44 | 
 45 | keyval = key keyval-sep val
 46 | 
 47 | key = simple-key / dotted-key
 48 | simple-key = quoted-key / unquoted-key
 49 | 
 50 | unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
 51 | quoted-key = basic-string / literal-string
 52 | dotted-key = simple-key 1*( dot-sep simple-key )
 53 | 
 54 | dot-sep   = ws %x2E ws  ; . Period
 55 | keyval-sep = ws %x3D ws ; =
 56 | 
 57 | val = string / boolean / array / inline-table / date-time / float / integer
 58 | 
 59 | ;; String
 60 | 
 61 | string = ml-basic-string / basic-string / ml-literal-string / literal-string
 62 | 
 63 | ;; Basic String
 64 | 
 65 | basic-string = quotation-mark *basic-char quotation-mark
 66 | 
 67 | quotation-mark = %x22            ; "
 68 | 
 69 | basic-char = basic-unescaped / escaped
 70 | basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
 71 | escaped = escape escape-seq-char
 72 | 
 73 | escape = %x5C                   ; \
 74 | escape-seq-char =  %x22         ; "    quotation mark  U+0022
 75 | escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
 76 | escape-seq-char =/ %x62         ; b    backspace       U+0008
 77 | escape-seq-char =/ %x66         ; f    form feed       U+000C
 78 | escape-seq-char =/ %x6E         ; n    line feed       U+000A
 79 | escape-seq-char =/ %x72         ; r    carriage return U+000D
 80 | escape-seq-char =/ %x74         ; t    tab             U+0009
 81 | escape-seq-char =/ %x75 4HEXDIG ; uXXXX                U+XXXX
 82 | escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX            U+XXXXXXXX
 83 | 
 84 | ;; Multiline Basic String
 85 | 
 86 | ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
 87 |                   ml-basic-string-delim
 88 | ml-basic-string-delim = 3quotation-mark
 89 | ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
 90 | 
 91 | mlb-content = mlb-char / newline / mlb-escaped-nl
 92 | mlb-char = mlb-unescaped / escaped
 93 | mlb-quotes = 1*2quotation-mark
 94 | mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
 95 | mlb-escaped-nl = escape ws newline *( wschar / newline )
 96 | 
 97 | ;; Literal String
 98 | 
 99 | literal-string = apostrophe *literal-char apostrophe
100 | 
101 | apostrophe = %x27 ; ' apostrophe
102 | 
103 | literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
104 | 
105 | ;; Multiline Literal String
106 | 
107 | ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
108 |                     ml-literal-string-delim
109 | ml-literal-string-delim = 3apostrophe
110 | ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
111 | 
112 | mll-content = mll-char / newline
113 | mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
114 | mll-quotes = 1*2apostrophe
115 | 
116 | ;; Integer
117 | 
118 | integer = dec-int / hex-int / oct-int / bin-int
119 | 
120 | minus = %x2D                       ; -
121 | plus = %x2B                        ; +
122 | underscore = %x5F                  ; _
123 | digit1-9 = %x31-39                 ; 1-9
124 | digit0-7 = %x30-37                 ; 0-7
125 | digit0-1 = %x30-31                 ; 0-1
126 | 
127 | hex-prefix = %x30.78               ; 0x
128 | oct-prefix = %x30.6F               ; 0o
129 | bin-prefix = %x30.62               ; 0b
130 | 
131 | dec-int = [ minus / plus ] unsigned-dec-int
132 | unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT )
133 | 
134 | hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG )
135 | oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 )
136 | bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 )
137 | 
138 | ;; Float
139 | 
140 | float = float-int-part ( exp / frac [ exp ] )
141 | float =/ special-float
142 | 
143 | float-int-part = dec-int
144 | frac = decimal-point zero-prefixable-int
145 | decimal-point = %x2E               ; .
146 | zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT )
147 | 
148 | exp = "e" float-exp-part
149 | float-exp-part = [ minus / plus ] zero-prefixable-int
150 | 
151 | special-float = [ minus / plus ] ( inf / nan )
152 | inf = %x69.6e.66  ; inf
153 | nan = %x6e.61.6e  ; nan
154 | 
155 | ;; Boolean
156 | 
157 | boolean = true / false
158 | 
159 | true    = %x74.72.75.65     ; true
160 | false   = %x66.61.6C.73.65  ; false
161 | 
162 | ;; Date and Time (as defined in RFC 3339)
163 | 
164 | date-time      = offset-date-time / local-date-time / local-date / local-time
165 | 
166 | date-fullyear  = 4DIGIT
167 | date-month     = 2DIGIT  ; 01-12
168 | date-mday      = 2DIGIT  ; 01-28, 01-29, 01-30, 01-31 based on month/year
169 | time-delim     = "T" / %x20 ; T, t, or space
170 | time-hour      = 2DIGIT  ; 00-23
171 | time-minute    = 2DIGIT  ; 00-59
172 | time-second    = 2DIGIT  ; 00-58, 00-59, 00-60 based on leap second rules
173 | time-secfrac   = "." 1*DIGIT
174 | time-numoffset = ( "+" / "-" ) time-hour ":" time-minute
175 | time-offset    = "Z" / time-numoffset
176 | 
177 | partial-time   = time-hour ":" time-minute ":" time-second [ time-secfrac ]
178 | full-date      = date-fullyear "-" date-month "-" date-mday
179 | full-time      = partial-time time-offset
180 | 
181 | ;; Offset Date-Time
182 | 
183 | offset-date-time = full-date time-delim full-time
184 | 
185 | ;; Local Date-Time
186 | 
187 | local-date-time = full-date time-delim partial-time
188 | 
189 | ;; Local Date
190 | 
191 | local-date = full-date
192 | 
193 | ;; Local Time
194 | 
195 | local-time = partial-time
196 | 
197 | ;; Array
198 | 
199 | array = array-open [ array-values ] ws-comment-newline array-close
200 | 
201 | array-open =  %x5B ; [
202 | array-close = %x5D ; ]
203 | 
204 | array-values =  ws-comment-newline val ws-comment-newline array-sep array-values
205 | array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
206 | 
207 | array-sep = %x2C  ; , Comma
208 | 
209 | ws-comment-newline = *( wschar / [ comment ] newline )
210 | 
211 | ;; Table
212 | 
213 | table = std-table / array-table
214 | 
215 | ;; Standard Table
216 | 
217 | std-table = std-table-open key std-table-close
218 | 
219 | std-table-open  = %x5B ws     ; [ Left square bracket
220 | std-table-close = ws %x5D     ; ] Right square bracket
221 | 
222 | ;; Inline Table
223 | 
224 | inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
225 | 
226 | inline-table-open  = %x7B ws     ; {
227 | inline-table-close = ws %x7D     ; }
228 | inline-table-sep   = ws %x2C ws  ; , Comma
229 | 
230 | inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
231 | 
232 | ;; Array Table
233 | 
234 | array-table = array-table-open key array-table-close
235 | 
236 | array-table-open  = %x5B.5B ws  ; [[ Double left square bracket
237 | array-table-close = ws %x5D.5D  ; ]] Double right square bracket
238 | 
239 | ;; Built-in ABNF terms, reproduced here for clarity
240 | 
241 | ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
242 | DIGIT = %x30-39 ; 0-9
243 | HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
244 | 


--------------------------------------------------------------------------------
/doc/index.mld:
--------------------------------------------------------------------------------
 1 | {0 Serialkit {%html: <span class="version">%%VERSION%%</span>%}}
 2 | 
 3 | Serialkit provides easy to use APIs to query, edit and (de)serialize
 4 | data serialisation formats.
 5 | 
 6 | {b Warning.} Serialkit codecs work on complete, in-memory,
 7 | representations of the data. If you need streaming support pass your
 8 | way. Besides at the moment IO is performed directly from and to OCaml
 9 | strings but that constraint may be lifted in the future.
10 | 
11 | {1:lib Library [serialkit]}
12 | 
13 | {!modules:
14 | Serialkit_text
15 | Serialkit_json
16 | Serialkit_sexp
17 | Serialkit_toml
18 | }
19 | 


--------------------------------------------------------------------------------
/opam:
--------------------------------------------------------------------------------
 1 | opam-version: "2.0"
 2 | name: "serialkit"
 3 | synopsis: "Serialization formats toolkit for OCaml"
 4 | description: """\
 5 | Serialkit provides easy to use OCaml APIs to query, update and
 6 | generate data in generic serialization data formats.
 7 | 
 8 | The supported data formats are JSON text, s-expressions and TOML.
 9 | 
10 | Serialkit is distributed under the ISC license. It has no dependencies.
11 | 
12 | Homepage: <https://erratique.ch/software/serialkit>"""
13 | maintainer: "Daniel Bünzli <daniel.buenzl i@erratique.ch>"
14 | authors: "The serialkit programmers"
15 | license: "ISC"
16 | tags: ["codec" "json" "sexp" "toml" "query" "org:erratique"]
17 | homepage: "https://erratique.ch/software/serialkit"
18 | doc: "https://erratique.ch/software/serialkit/doc"
19 | bug-reports: "https://github.com/dbuenzli/serialkit/issues"
20 | depends: [
21 |   "ocaml" {>= "4.14.0"}
22 |   "ocamlfind" {build}
23 |   "ocamlbuild" {build}
24 |   "topkg" {build & >= "1.0.3"}
25 |   "cmdliner" {>= "1.1.0"}
26 | ]
27 | build: ["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%"]
28 | dev-repo: "git+https://erratique.ch/repos/serialkit.git"
29 | 


--------------------------------------------------------------------------------
/pkg/META:
--------------------------------------------------------------------------------
1 | description = "Serialization formats toolkit for OCaml"
2 | version = "%%VERSION_NUM%%"
3 | requires = ""
4 | archive(byte) = "serialkit.cma"
5 | archive(native) = "serialkit.cmxa"
6 | plugin(byte) = "serialkit.cma"
7 | plugin(native) = "serialkit.cmxs"
8 | exists_if = "serialkit.cma serialkit.cmxa"
9 | 


--------------------------------------------------------------------------------
/pkg/pkg.ml:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ocaml
 2 | #use "topfind"
 3 | #require "topkg"
 4 | open Topkg
 5 | 
 6 | let () =
 7 |   Pkg.describe "serialkit" @@ fun c ->
 8 |   Ok [ Pkg.mllib "src/serialkit.mllib";
 9 |        Pkg.test "test/test";
10 |        Pkg.bin "tool/cmd_main" ~dst:"serialkit" ]
11 | 


--------------------------------------------------------------------------------
/src/serialkit.mllib:
--------------------------------------------------------------------------------
1 | Serialkit_text
2 | Serialkit_json
3 | Serialkit_sexp
4 | Serialkit_toml
5 | 


--------------------------------------------------------------------------------
/src/serialkit_json.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2016 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Serialkit_text
  7 | 
  8 | module Json = struct
  9 | 
 10 |   (* JSON text *)
 11 | 
 12 |   type loc = Textloc.t
 13 |   type mem = (string * loc) * t
 14 |   and t =
 15 |   [ `Null of loc
 16 |   | `Bool of bool * loc
 17 |   | `Float of float * loc
 18 |   | `String of string * loc
 19 |   | `A of t list * loc
 20 |   | `O of mem list * loc ]
 21 | 
 22 |   let loc_nil = Textloc.none
 23 |   let loc = function
 24 |   | `Null l | `Bool (_, l) | `Float (_, l) | `String (_, l) | `A (_, l)
 25 |   | `O (_, l) -> l
 26 | 
 27 |   (* Constructors *)
 28 | 
 29 |   let null = `Null loc_nil
 30 |   let bool b = `Bool (b, loc_nil)
 31 |   let float f = `Float (f, loc_nil)
 32 |   let string s = `String (s, loc_nil)
 33 |   let array vs = `A (vs, loc_nil)
 34 |   let mem n v = ((n, loc_nil), v)
 35 |   let obj mems = `O (mems, loc_nil)
 36 | 
 37 |   (* Accessors *)
 38 | 
 39 |   let kind = function
 40 |   | `Null _ -> "null" | `Bool _ -> "bool" | `Float _ -> "float"
 41 |   | `String _ -> "string" | `A _ -> "array" | `O _ -> "object"
 42 | 
 43 |   let err_exp exp fnd =
 44 |     Format.asprintf "%a: %s but expected %s" Textloc.pp (loc fnd) (kind fnd) exp
 45 | 
 46 |   let err_exp_null = err_exp "null"
 47 |   let err_exp_bool = err_exp "bool"
 48 |   let err_exp_float = err_exp "number"
 49 |   let err_exp_string = err_exp "string"
 50 |   let err_exp_array = err_exp "array"
 51 |   let err_exp_obj = err_exp "object"
 52 | 
 53 |   let err e = Error e
 54 |   let to_null = function `Null _ -> Ok () | j -> err (err_exp_null j)
 55 |   let to_bool = function `Bool (b, _) -> Ok b | j -> err (err_exp_bool j)
 56 |   let to_float = function `Float (f, _) -> Ok f | j -> err (err_exp_float j)
 57 |   let to_string = function `String (s,_) -> Ok s | j -> err (err_exp_string j)
 58 |   let to_array = function `A (vs, _) -> Ok vs | j -> err (err_exp_array j)
 59 |   let to_obj = function `O (mems, _) -> Ok mems | j -> err (err_exp_obj j)
 60 | 
 61 |   let err = invalid_arg
 62 |   let get_null = function `Null _ -> () | j -> err (err_exp_null j)
 63 |   let get_bool = function `Bool (b, _) -> b | j -> err (err_exp_bool j)
 64 |   let get_float = function `Float (f, _) -> f | j -> err (err_exp_float j)
 65 |   let get_string = function `String (s,_) -> s | j -> err (err_exp_string j)
 66 |   let get_array = function `A (vs, _) -> vs | j -> err (err_exp_array j)
 67 |   let get_obj = function `O (mems, _) -> mems | j -> err (err_exp_obj j)
 68 | 
 69 |   (* Decode *)
 70 | 
 71 |   (* FIXME add positions and reuse Tlex. *)
 72 | 
 73 |   type decoder = { t : Buffer.t; i : string; mutable pos : int; }
 74 |   let decoder s = { t = Buffer.create 255; i = s; pos = 0 }
 75 |   let accept d = d.pos <- d.pos + 1 [@@ ocaml.inline]
 76 |   let treset d = Buffer.reset d.t [@@ ocaml.inline]
 77 |   let taccept d = Buffer.add_char d.t d.i.[d.pos]; accept d; [@@ ocaml.inline]
 78 |   let taddc d c = Buffer.add_char d.t c [@@ ocaml.inline]
 79 |   let taddu d u = Buffer.add_utf_8_uchar d.t u
 80 |   let token d = Buffer.contents d.t [@@ ocaml.inline]
 81 |   let eoi d = d.pos = String.length d.i [@@ ocaml.inline]
 82 |   let byte d = match eoi d with
 83 |   | true -> 0xFFF
 84 |   | false -> Char.code d.i.[d.pos]
 85 |   [@@ ocaml.inline]
 86 | 
 87 |   let err d fmt =
 88 |     Format.kasprintf (fun s -> raise_notrace (Failure s)) ("%d: " ^^ fmt) d.pos
 89 | 
 90 |   let pp_byte ppf d = match byte d with
 91 |   | 0xFFF -> Format.fprintf ppf "end of input"
 92 |   | b -> Format.fprintf ppf "%C" (Char.chr b)
 93 | 
 94 |   type utf_8_case =
 95 |   | L1 | L2 | L3_E0 | L3_E1_EC_or_EE_EF | L3_ED | L4_F0 | L4_F1_F3 | L4_F4 | E
 96 | 
 97 |   let utf_8_case =
 98 | (*
 99 |   (* See https://tools.ietf.org/html/rfc3629#section-4 *)
100 |   Printf.printf "[|";
101 |   for i = 0 to 255 do
102 |     if i mod 16 = 0 then Printf.printf "\n";
103 |     if 0x00 <= i && i <= 0x7F then Printf.printf "L1; " else
104 |     if 0xC2 <= i && i <= 0xDF then Printf.printf "L2; " else
105 |     if 0xE0 = i then Printf.printf "L3_E0; " else
106 |     if 0xE1 <= i && i <= 0xEC || 0xEE <= i && i <= 0xEF
107 |     then Printf.printf "L3_E1_EC_or_EE_EF; " else
108 |     if 0xED = i then Printf.printf "L3_ED;" else
109 |     if 0xF0 = i then Printf.printf "L4_F0; " else
110 |     if 0xF1 <= i && i <= 0xF3 then Printf.printf "L4_F1_F3; " else
111 |     if 0xF4 = i then Printf.printf "L4_F4; " else
112 |     Printf.printf "E; "
113 |   done;
114 |   Printf.printf "\n|]"
115 | *)
116 |   [|
117 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
118 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
119 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
120 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
121 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
122 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
123 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
124 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
125 |     E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
126 |     E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
127 |     E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
128 |     E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
129 |     E; E; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
130 |     L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
131 |     L3_E0; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
132 |     L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
133 |     L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
134 |     L3_E1_EC_or_EE_EF; L3_ED;L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
135 |     L4_F0; L4_F1_F3; L4_F1_F3; L4_F1_F3; L4_F4; E; E; E; E; E; E; E; E; E; E; E;
136 |   |]
137 | 
138 |   let taccept_utf_8 d =
139 |     let err d = err d "expected UTF-8 byte found: %a" pp_byte d in
140 |     let b = byte d in
141 |     let accept_tail d =
142 |       if (byte d lsr 6 = 0b10) then taccept d else err d [@@ocaml.inline]
143 |   in
144 |   match utf_8_case.(b) with
145 |   | L1 -> taccept d
146 |   | L2 -> taccept d; accept_tail d
147 |   | L3_E0 ->
148 |       taccept d;
149 |       if (byte d - 0xA0 < 0xBF - 0xA0) then taccept d else err d;
150 |       accept_tail d
151 |   | L3_E1_EC_or_EE_EF -> taccept d; accept_tail d; accept_tail d
152 |   | L3_ED ->
153 |       taccept d;
154 |       if (byte d - 0x80 < 0x9F - 0x80) then taccept d else err d;
155 |       accept_tail d
156 |   | L4_F0 ->
157 |       taccept d;
158 |       if (byte d - 0x90 < 0xBF - 0x90) then taccept d else err d;
159 |       accept_tail d; accept_tail d
160 |   | L4_F1_F3 -> taccept d; accept_tail d; accept_tail d; accept_tail d;
161 |   | L4_F4 ->
162 |       taccept d;
163 |       if (byte d - 0x80 < 0x8F - 0x80) then taccept d else err d;
164 |   | E -> err d
165 | 
166 |   let accept_bytes d bytes = (* first byte already checked *)
167 |     let max = String.length bytes - 1 in
168 |     let rec loop i = match i > max with
169 |     | true -> ()
170 |     | false ->
171 |         match Char.code bytes.[i] = byte d with
172 |         | true -> accept d; loop (i + 1)
173 |         | false ->
174 |             err d "expected %C found: %a while parsing '%s'"
175 |               bytes.[i] pp_byte d bytes
176 |     in
177 |     accept d; loop 1
178 | 
179 |   let rec skip_ws d = match byte d with
180 |   | 0x20 | 0x09 | 0x0A | 0x0D -> accept d; skip_ws d
181 |   | _ -> ()
182 | 
183 |   let parse_true d = accept_bytes d "true"; `Bool (true, loc_nil)
184 |   let parse_false d = accept_bytes d "false"; `Bool (false, loc_nil)
185 |   let parse_null d = accept_bytes d "null"; `Null loc_nil
186 |   let parse_number d = (* not fully compliant *)
187 |     let conv d = try `Float (float_of_string (token d), loc_nil) with
188 |     | Failure e -> err d "could not parse a float from: %S" (token d)
189 |     in
190 |     let rec taccept_non_sep d = match byte d with
191 |     | 0x20 | 0x09 | 0x0A | 0x0D | 0x2C | 0x5D | 0x7D | 0xFFF -> conv d
192 |     | _ -> taccept d; taccept_non_sep d
193 |     in
194 |     treset d; taccept d; taccept_non_sep d
195 | 
196 |   let rec parse_uescape d hi u count =
197 |     let pp_ucp ppf d = Format.fprintf ppf "U+%04X" d in
198 |     let err_not_lo d u = err d "not a low surrogate %a" pp_ucp u in
199 |     let err_lo d u = err d "lone low surrogate %a" pp_ucp u in
200 |     let err_hi d u = err d "lone high surrogate %a" pp_ucp u in
201 |     match count > 0 with
202 |     | true ->
203 |         begin match byte d with
204 |         | c when 0x30 <= c && c <= 0x39 ->
205 |             accept d; parse_uescape d hi (u * 16 + c - 0x30) (count - 1)
206 |         | c when 0x41 <= c && c <= 0x46 ->
207 |             accept d; parse_uescape d hi (u * 16 + c - 0x37) (count - 1)
208 |         | c when 0x61 <= c && c <= 0x66 ->
209 |             accept d; parse_uescape d hi (u * 16 + c - 0x57) (count - 1)
210 |         | c ->
211 |             err d "expected hex digit found: %C" (Char.chr c)
212 |         end
213 |     | false ->
214 |         match hi with
215 |         | Some hi -> (* combine high and low surrogate into scalar value. *)
216 |             if u < 0xDC00 || u > 0xDFFF then err_not_lo d u else
217 |             let u = ((((hi land 0x3FF) lsl 10) lor (u land 0x3FF)) + 0x10000) in
218 |             taddu d (Uchar.unsafe_of_int u)
219 |         | None ->
220 |             if u < 0xD800 || u > 0xDFFF then taddu d (Uchar.unsafe_of_int u)
221 |             else if u > 0xDBFF then err_lo d u else
222 |             match byte d with
223 |             | 0x5C ->
224 |                 accept d;
225 |                 begin match byte d with
226 |                 | 0x75 -> accept d; parse_uescape d (Some u) 0 4
227 |                 | _ -> err_hi d u
228 |                 end
229 |             | _ -> err_hi d u
230 | 
231 |   let parse_string d =
232 |     let parse_escape d = match byte d with
233 |     | (0x22 | 0x5C | 0x2F as b) -> taddc d (Char.chr b); accept d;
234 |     | 0x62 -> taddc d '\x08'; accept d;
235 |     | 0x66 -> taddc d '\x0C'; accept d;
236 |     | 0x6E -> taddc d '\x0A'; accept d;
237 |     | 0x72 -> taddc d '\x0D'; accept d;
238 |     | 0x74 -> taddc d '\x09'; accept d;
239 |     | 0x75 ->
240 |         accept d; parse_uescape d None 0 4
241 |     | _ -> err d "expected escape found: %a" pp_byte d
242 |     in
243 |     let rec loop d = match byte d with
244 |     | 0x5C (* '\' *) -> accept d; parse_escape d; loop d
245 |     | 0x22 (* '"' *) -> accept d; `String ((token d), loc_nil)
246 |     | 0xFFF -> err d "unclosed string"
247 |     | _ -> taccept_utf_8 d; loop d
248 |     in
249 |     accept d; treset d; loop d
250 | 
251 |   let rec parse_object d = match (accept d; skip_ws d; byte d) with
252 |   | 0x7D (* '}' *) -> accept d; `O ([], loc_nil)
253 |   | _ ->
254 |       let parse_name d =
255 |         let `String name = match (skip_ws d; byte d) with
256 |         | 0x22 (* '"' *) -> parse_string d
257 |         | _ -> err d "expected '\"' found: %a" pp_byte d
258 |         in
259 |         skip_ws d; name
260 |       in
261 |       let rec loop acc d =
262 |         let name = parse_name d in
263 |         match byte d with
264 |         | 0x3A (* ':' *) ->
265 |             let v = (accept d; parse_value d) in
266 |             begin match byte d with
267 |             | 0x2C (* ',' *) -> accept d; loop ((name, v) :: acc) d
268 |             | 0x7D (* '}' *) -> accept d; `O (List.rev ((name, v) :: acc),
269 |                                               loc_nil)
270 |             | _ -> err d "expected ',' or '}' found: %a" pp_byte d
271 |             end
272 |         | _ -> err d "expected ':' found: %a" pp_byte d
273 |       in
274 |       loop [] d
275 | 
276 |   and parse_array d = match (accept d; skip_ws d; byte d) with
277 |   | 0x5D (* ']' *) -> accept d; `A ([], loc_nil)
278 |   | _ ->
279 |       let rec loop acc d =
280 |         let v = parse_value d in
281 |         match byte d with
282 |         | 0x2C (* ',' *) -> accept d; loop (v :: acc) d
283 |         | 0x5D (* ']' *) -> accept d; `A (List.rev (v :: acc), loc_nil)
284 |         | _ -> err d "expected ',' or ']' found: %a" pp_byte d
285 |       in
286 |       loop [] d
287 | 
288 |   and parse_value d : t =
289 |     let v = match (skip_ws d; byte d) with
290 |     | 0x22 (* '"' *) -> parse_string d
291 |     | 0x74 (* 't' *) -> parse_true d
292 |     | 0x66 (* 'f' *) -> parse_false d
293 |     | 0x6E (* 'n' *) -> parse_null d
294 |     | 0x7B (* '{' *) -> parse_object d
295 |     | 0x5B (* '[' *) -> parse_array d
296 |     | 0x2D (* '-' *) -> parse_number d
297 |     | b when 0x30 (* '0' *) <= b && b <= 0x39 (* '9' *) -> parse_number d
298 |     | _ -> err d "expected a JSON value found: %a" pp_byte d
299 |     in
300 |     skip_ws d;
301 |     v
302 | 
303 |   let of_string ?(file = Textloc.file_none) s =
304 |     try
305 |       let d = decoder s in
306 |       let v = parse_value d in
307 |       match byte d with
308 |       | 0xFFF (* eoi *) -> Ok v
309 |       | _ -> err d "expected end of input found: %a" pp_byte d
310 |     with
311 |     | Failure e -> Error e
312 | 
313 |   (* JSON generation *)
314 | 
315 |   module G = struct
316 |     (* Not T.R. we could CPS. *)
317 | 
318 |     type enc = { mutable sep : bool; b : Buffer.t }
319 |     type t = enc -> unit
320 | 
321 |     let addc c enc = Buffer.add_char enc.b c
322 |     let adds s enc = Buffer.add_string enc.b s
323 |     let adds_esc s enc =
324 |       let is_control =
325 |         function '\x00' .. '\x1F' | '\x7F' -> true | _ -> false
326 |       in
327 |       let len = String.length s in
328 |       let max_idx = len - 1 in
329 |       let flush b start i =
330 |         if start < len then Buffer.add_substring b s start (i - start);
331 |       in
332 |       let rec loop start i = match i > max_idx with
333 |       | true -> flush enc.b start i
334 |       | false ->
335 |           let next = i + 1 in
336 |           match String.get s i with
337 |           | '"' -> flush enc.b start i; adds "\\\"" enc; loop next next
338 |           | '\\' -> flush enc.b start i; adds "\\\\" enc; loop next next
339 |           | c when is_control c ->
340 |               flush enc.b start i;
341 |               adds (Format.asprintf "\\u%04X" (Char.code c)) enc;
342 |               loop next next
343 |           | c -> loop start next
344 |       in
345 |       loop 0 0
346 | 
347 |     let null enc = adds "null" enc
348 |     let bool b enc = adds (if b then "true" else "false") enc
349 |     let int i enc = adds (string_of_int i) enc
350 |     let float f enc = adds (Format.asprintf "%.16g" f) enc
351 |     let string s enc = addc '"' enc; adds_esc s enc; addc '"' enc
352 | 
353 |     let nosep enc = enc.sep <- false
354 |     let sep enc = enc.sep
355 |     let set_sep sep enc = enc.sep <- sep
356 |     let if_sep enc = if not enc.sep then enc.sep <- true else addc ',' enc
357 | 
358 |     type array = t
359 |     let array enc = ()
360 |     let array_end els enc =
361 |       let sep = sep enc in
362 |       addc '[' enc; nosep enc; els enc; addc ']' enc; set_sep sep enc
363 | 
364 |     let el e arr enc = arr enc; if_sep enc; e enc
365 |     let el_if c e arr enc = if c then el (e ()) arr enc else arr enc
366 | 
367 |     type obj = t
368 |     let obj enc = ()
369 |     let obj_end mems enc =
370 |       let sep = sep enc in
371 |       addc '{' enc; nosep enc; mems enc; addc '}' enc; set_sep sep enc
372 | 
373 |     let mem m v obj enc = obj enc; if_sep enc; string m enc; addc ':' enc; v enc
374 |     let mem_if c m v obj enc = if c then mem m (v ()) obj enc else obj enc
375 | 
376 |     (* Derived generators *)
377 | 
378 |     let strf fmt = Format.kasprintf string fmt
379 |     let list elv l =
380 |       array_end (List.fold_left (fun a v -> el (elv v) a) array l)
381 | 
382 |     let option some o = match o with None -> null | Some v -> some v
383 |     let rec json = function
384 |     | `Null _ -> null
385 |     | `Bool (b, _) -> bool b
386 |     | `Float (f, _) -> float f
387 |     | `String (s, _) -> string s
388 |     | `A (a, _) ->
389 |         array_end @@ List.fold_left (fun a e -> el (json e) a) array a
390 |     | `O (o, _) ->
391 |         obj_end @@ List.fold_left (fun o ((m, _), v) -> mem m (json v) o) obj o
392 | 
393 |     (* Output generated values *)
394 | 
395 |     let buffer_add b g = g { sep = true; b }
396 |     let to_string g =
397 |       let b = Buffer.create 65535 in
398 |       (buffer_add b g; Buffer.contents b)
399 |   end
400 | 
401 |   let to_string v = G.to_string (G.json v)
402 | 
403 |   let pp ppf (v : t) = (* FIXME not T.R. *)
404 |     let pp_string ppf s = (* FIXME quick & dirty escaping *)
405 |       Format.pp_print_string ppf (G.to_string (G.json ((`String (s, loc_nil)))))
406 |     in
407 |     let pp_comma ppf () =
408 |       Format.(pp_print_char ppf ','; pp_print_space ppf ())
409 |     in
410 |     let rec loop ppf = function
411 |     | `Null _ -> Format.pp_print_string ppf "null"
412 |     | `Bool (b,_ ) -> Format.pp_print_string ppf (if b then "true" else "false")
413 |     | `Float (f, _) -> Format.fprintf ppf "%.16g" f
414 |     | `String (s, _) -> pp_string ppf s
415 |     | `A (a, _) ->
416 |         Format.pp_open_box ppf 1;
417 |         Format.pp_print_char ppf '[';
418 |         Format.pp_print_list ~pp_sep:pp_comma loop ppf a;
419 |         Format.pp_print_char ppf ']';
420 |         Format.pp_close_box ppf ();
421 |     | `O (o, _) ->
422 |         let pp_mem ppf ((m, _), v) =
423 |           Format.pp_open_box ppf 1;
424 |           pp_string ppf m;
425 |           Format.pp_print_char ppf ':'; Format.pp_print_space ppf ();
426 |           loop ppf v;
427 |           Format.pp_close_box ppf ();
428 |         in
429 |         Format.pp_open_vbox ppf 1;
430 |         Format.pp_print_char ppf '{';
431 |         Format.pp_print_list ~pp_sep:pp_comma pp_mem ppf o;
432 |         Format.pp_print_char ppf '}';
433 |         Format.pp_close_box ppf ();
434 |     in
435 |     loop ppf v
436 | end
437 | 
438 | module Jsong = Json.G
439 | module Jsonq = struct
440 | 
441 |   module Sset = Set.Make (String)
442 |   module Smap = Map.Make (String)
443 | 
444 |   let pp_quote ppf s = Format.fprintf ppf "'%s'" s
445 |   let pp_mem = pp_quote
446 | 
447 |   type path = (* Paths in JSON values, array and object member traversals. *)
448 |     ([`A | `O of string] * Json.loc) list (* in reverse order *)
449 | 
450 |   let path_to_string p =
451 |     let seg = function `A -> "[]" | `O n -> "." ^ n in
452 |     String.concat "" (List.rev_map seg p)
453 | 
454 |   let path_to_trace ?(pp_mem = pp_mem) p =
455 |     let seg = function
456 |     | `A, l -> Format.asprintf "%a: in array" Textloc.pp l
457 |     | `O m, l -> Format.asprintf "%a: in key %a" Textloc.pp l pp_mem m
458 |     in
459 |     String.concat "\n" (List.map seg p)
460 | 
461 |   (* Errors *)
462 | 
463 |   exception Err of path * Textloc.t * string
464 | 
465 |   let err p l msg = raise_notrace (Err (p, l, msg))
466 |   let errf p l fmt = Format.kasprintf (err p l) fmt
467 |   let err_exp exp p fnd =
468 |     errf p (Json.loc fnd) "found %s but expected %s" (Json.kind fnd) exp
469 | 
470 |   let err_exp_null = err_exp "null"
471 |   let err_exp_bool = err_exp "bool"
472 |   let err_exp_float = err_exp "number"
473 |   let err_exp_string = err_exp "string"
474 |   let err_exp_array = err_exp "array"
475 |   let err_exp_obj = err_exp "object"
476 |   let err_empty_array p l = errf p l "unexpected empty array"
477 |   let err_miss_mem p l n = errf p l "member %a unbound in object" pp_mem n
478 |   let err_to_string ?pp_mem p loc msg =
479 |     let pp_lines ppf s =
480 |       Format.fprintf ppf "@[<v>%a@]"
481 |         (Format.pp_print_list Format.pp_print_string)
482 |         (String.split_on_char '\n' s)
483 |     in
484 |     match p with
485 |     | [] -> Format.asprintf "%a:@\n%a" Textloc.pp loc pp_lines msg
486 |     | p ->
487 |         Format.asprintf "%a:@\n%a@\n  @[%a@]"
488 |           Textloc.pp loc pp_lines msg pp_lines (path_to_trace p)
489 | 
490 |   (* Queries *)
491 | 
492 |   type 'a t = path -> Json.t -> 'a
493 | 
494 |   let query q s = try Ok (q [] s) with
495 |   | Err (p, l, m) -> Error (err_to_string p l m)
496 | 
497 |   (* Succeeding and failing queries *)
498 | 
499 |   let succeed v p j = v
500 |   let fail msg p j = err p (Json.loc j) msg
501 |   let failf fmt = Format.kasprintf fail fmt
502 | 
503 |   (* Query combinators *)
504 | 
505 |   let app fq q p j = fq p j (q p j)
506 |   let ( $ ) = app
507 |   let pair q0 q1 p j = let v0 = q0 p j in v0, q1 p j
508 |   let bind q f p j = f (q p j) p j
509 |   let map f q p j = f (q p j)
510 |   let some q p j = Some (q p j)
511 | 
512 |   (* JSON queries *)
513 | 
514 |   let fold ~null ~bool ~float ~string ~array ~obj p = function
515 |   | `Null _ as j -> null p j
516 |   | `Bool _ as j -> bool p j
517 |   | `Float _ as j -> float p j
518 |   | `String _ as j -> string p j
519 |   | `A _ as j -> array p j
520 |   | `O _ as j -> obj p j
521 | 
522 |   let partial_fold ?null ?bool ?float ?string ?array ?obj () p j =
523 |     let with_q q p j = match q with
524 |     | None ->
525 |         let kind k = function None -> "" | Some _ -> k  in
526 |         let kinds = [ kind "null" null; kind "bool" bool;
527 |                       kind "number" float; kind "string" string;
528 |                       kind "array" array; kind "obj" obj ]
529 |         in
530 |         let kinds = List.filter (fun s -> s <> "") kinds in
531 |         let kinds = String.concat ", " kinds in
532 |         (* FIXME use error messages from Err_msg *)
533 |         let kinds = if kinds = "" then "nothing" else "one of " ^ kinds in
534 |         err_exp kinds p j
535 |     | Some q -> q p j
536 |     in
537 |     match j with
538 |     | `Null _ as j -> with_q null p j
539 |     | `Bool _ as j -> with_q bool p j
540 |     | `Float _ as j -> with_q float p j
541 |     | `String _ as j -> with_q string p j
542 |     | `A _ as j -> with_q array p j
543 |     | `O _ as j -> with_q obj p j
544 | 
545 |   let json p s = s
546 |   let loc p s = Json.loc s
547 |   let with_loc q p s = (q p s), Json.loc s
548 | 
549 |   (* Nulls *)
550 | 
551 |   let is_null p = function `Null _ -> true | j -> false
552 |   let null p = function `Null _ -> () | j -> err_exp_null p j
553 |   let nullable q p = function `Null _ -> None | j -> Some (q p j)
554 | 
555 |   (* Atomic values *)
556 | 
557 |   let bool p = function `Bool (b, _) -> b | j -> err_exp_bool p j
558 |   let float p = function `Float (f, _) -> f | j -> err_exp_float p j
559 |   let int = map truncate float
560 |   let string p = function `String (s, _) -> s | j -> err_exp_string p j
561 | 
562 |   let string_to ~kind parse p = function
563 |   | `String (s, _) as j ->
564 |       (match parse s with Ok v -> v | Error m -> fail m p j)
565 |   | j -> err_exp kind p j
566 | 
567 |   let enum ~kind ss p = function
568 |   | `String (s, _) when Sset.mem s ss -> s
569 |   | `String (s, l) ->
570 |       let ss = Sset.elements ss in
571 |       let hint, ss = match Textdec.err_suggest ss s with
572 |       | [] -> Textdec.pp_must_be, ss
573 |       | ss -> Textdec.pp_did_you_mean, ss
574 |       in
575 |       let kind ppf () = Format.pp_print_string ppf kind in
576 |       let pp_v = Format.pp_print_string in
577 |       errf p l "%a" (Textdec.pp_unknown' ~kind pp_v ~hint) (s, ss)
578 |   | j -> err_exp kind p j
579 | 
580 |   let enum_map ~kind sm p = function
581 |   | `String (s, l) ->
582 |       begin match Smap.find s sm with
583 |       | v -> v
584 |       | exception Not_found ->
585 |           let ss = Smap.fold (fun k _ acc -> k :: acc) sm [] in
586 |           let hint, ss = match Textdec.err_suggest ss s with
587 |           | [] -> Textdec.pp_must_be, ss
588 |           | ss -> Textdec.pp_did_you_mean, ss
589 |           in
590 |           let kind ppf () = Format.pp_print_string ppf kind in
591 |           let pp_v = Format.pp_print_string in
592 |           errf p l "%a" (Textdec.pp_unknown' ~kind pp_v ~hint) (s, ss)
593 |       end
594 |   | j -> err_exp kind p j
595 | 
596 |   (* Array *)
597 | 
598 |   let is_empty_array p = function `A (a, _) -> a = [] | j -> err_exp_array p j
599 |   let hd q p = function
600 |   | `A ([], l) -> err_empty_array p l
601 |   | `A (v :: _, l) -> q ((`A, l) :: p) v
602 |   | j -> err_exp_array p j
603 | 
604 |   let tl q p = function
605 |   | `A ([], l) -> err_empty_array p l
606 |   | `A (_ :: [], l) -> q p (`A ([], Textloc.to_last l))
607 |   | `A (_ :: (v :: _ as a), l) ->
608 |       let l = Textloc.reloc ~first:(Textloc.to_first (Json.loc v)) ~last:l in
609 |       q p (`A (a, l))
610 |   | j -> err_exp_array p j
611 | 
612 |   let nth ?absent n q p = function
613 |   | `A (vs, l) ->
614 |       let p = (`A, l) :: p in
615 |       let k, vs = if n < 0 then - n - 1, List.rev vs else n, vs in
616 |       let rec loop k = function
617 |       | v :: vs when k = 0 -> q p v
618 |       | _ :: vs -> loop (k - 1) vs
619 |       | [] ->
620 |           match absent with
621 |           | None -> errf p l "%d: no such index in array" n
622 |           | Some absent -> absent
623 |       in
624 |       loop k vs
625 |   | j -> err_exp_array p j
626 | 
627 |   let fold_array f q acc p = function
628 |   | `A (vs, l) ->
629 |       let p = (`A, l) :: p in
630 |       let add p acc v = f (q p v) acc in
631 |       List.fold_left (add p) acc vs
632 |   | j -> err_exp_array p j
633 | 
634 |   let array qv = map List.rev (fold_array (fun v acc -> v :: acc) qv [])
635 | 
636 |   (* Objects *)
637 | 
638 |   let rec mem_find n = function
639 |   | ((n', _), j) :: ms when String.equal n' n -> Some j
640 |   | _  :: ms -> mem_find n ms
641 |   | [] -> None
642 | 
643 |   let mem : string -> 'a t -> 'a t = fun n q p -> function
644 |   | `O (ms, l) ->
645 |       begin match mem_find n ms with
646 |       | None -> err_miss_mem p l n
647 |       | Some j -> q  ((`O n, l) :: p) j
648 |       end
649 |   | j -> err_exp_obj p j
650 | 
651 |   let opt_mem n q ~absent p = function
652 |   | `O (ms, l) ->
653 |       begin match mem_find n ms with
654 |       | None -> absent
655 |       | Some j -> q ((`O n, l) :: p) j
656 |       end
657 |   | j -> err_exp_obj p j
658 | 
659 |   let mem_dom ~validate p = function
660 |   | `O (ms, l) ->
661 |       let add_mem = match validate with
662 |       | None -> fun acc ((n, _), _) -> Sset.add n acc
663 |       | Some dom ->
664 |           fun acc ((n, _), _) -> match Sset.mem n dom with
665 |           | true -> Sset.add n acc
666 |           | false ->
667 |               let ns = Sset.elements dom in
668 |               let hint, ss = match Textdec.err_suggest ns n with
669 |               | [] -> Textdec.pp_must_be, ns
670 |               | ss -> Textdec.pp_did_you_mean, ss
671 |               in
672 |               let kind ppf () = Format.pp_print_string ppf "member" in
673 |               let pp_v = Format.pp_print_string in
674 |               errf p l "%a" (Textdec.pp_unknown' ~kind pp_v ~hint) (n, ss)
675 |       in
676 |       List.fold_left add_mem Sset.empty ms
677 |   | j -> err_exp_obj p j
678 | 
679 |   let fold_obj f mem q acc p = function
680 |   | `O (ms, l) ->
681 |       let f acc ((n, nl), j) = match mem n with
682 |       | Ok k -> f k (q ((`O n, l) :: p) j) acc
683 |       | Error e -> err ((`O n, l) :: p) nl e
684 |       in
685 |       List.fold_left f acc ms
686 |   | j -> err_exp_obj p j
687 | end
688 | 


--------------------------------------------------------------------------------
/src/serialkit_json.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2016 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** JSON text support.
  7 | 
  8 |     As specified in {{:https://tools.ietf.org/html/rfc8259}RFC8259}.
  9 | 
 10 |     Open this module to use it, this only introduces modules in your scope. *)
 11 | 
 12 | open Serialkit_text
 13 | 
 14 | (** JSON text definitions and codec. *)
 15 | module Json : sig
 16 | 
 17 |   (** {1:json JSON text} *)
 18 | 
 19 |   type loc = Textloc.t
 20 |   (** The type for text locations. *)
 21 | 
 22 |   val loc_nil : loc
 23 |   (** [loc_nil] is an invalid input location. *)
 24 | 
 25 |   type mem = (string * loc) * t
 26 |   (** The type for JSON object members. *)
 27 | 
 28 |   and t =
 29 |   [ `Null of loc
 30 |   | `Bool of bool * loc
 31 |   | `Float of float * loc
 32 |   | `String of string * loc
 33 |   | `A of t list * loc
 34 |   | `O of mem list * loc ]
 35 |   (** The type for generic JSON text representations. *)
 36 | 
 37 |   val loc : t -> loc
 38 |   (** [loc j] is [j]'s input location. *)
 39 | 
 40 |   (** {1:cons Constructors} *)
 41 | 
 42 |   val null : t
 43 |   (** [null] is [`Null loc_nil]. *)
 44 | 
 45 |   val bool : bool -> t
 46 |   (** [bool b] is [`Bool (b, loc_nil)]. *)
 47 | 
 48 |   val float : float -> t
 49 |   (** [float b] is [`Float (f, loc_nil)]. *)
 50 | 
 51 |   val string : string -> t
 52 |   (** [string s] is [`String (s, loc_nil)]. *)
 53 | 
 54 |   val array : t list -> t
 55 |   (** [a vs] is [`A (vs, loc_nil)]. *)
 56 | 
 57 |   val mem : string -> t -> mem
 58 |   (** [mem n v] is [((n, loc_nil), v)]. *)
 59 | 
 60 |   val obj : mem list -> t
 61 |   (** [obj mems] is [`O (mems, loc_nil)]. *)
 62 | 
 63 |   (** {1:access Accessors} *)
 64 | 
 65 |   val to_null : t -> (unit, string) result
 66 |   (** [to_null j] extracts a null from [j]. If [j] is not a null an
 67 |       error with the location formatted according to {!Tloc.pp}
 68 |       is returned. *)
 69 | 
 70 |   val to_bool : t -> (bool, string) result
 71 |   (** [to_bool j] extracts a bool from [j]. If [j] is not a bool an
 72 |       error with the location formatted according to {!Tloc.pp}
 73 |       is returned. *)
 74 | 
 75 |   val to_float : t -> (float, string) result
 76 |   (** [to_float j] extracts a float from [j]. If [j] is not a float an
 77 |       error with the location formatted according to {!Tloc.pp}
 78 |       is returned. *)
 79 | 
 80 |   val to_string : t -> (string, string) result
 81 |   (** [to_string j] extracts a string from [j]. If [j] is not a string an
 82 |       error with the location formatted according to {!Tloc.pp}
 83 |       is returned. *)
 84 | 
 85 |   val to_array : t -> (t list, string) result
 86 |   (** [to_array j] extracts a array from [j]. If [j] is not a array an
 87 |       error with the location formatted according to {!Tloc.pp}
 88 |       is returned. *)
 89 | 
 90 |   val to_obj : t -> (mem list, string) result
 91 |   (** [to_obj j] extracts a array from [j]. If [j] is not a array an
 92 |       error with the location formatted according to {!Tloc.pp}
 93 |       is returned. *)
 94 | 
 95 |   val get_null : t -> unit
 96 |   (** [get_null j] is like {!to_null} but raises {!Invalid_argument}
 97 |       if [j] is not a null. *)
 98 | 
 99 |   val get_bool : t -> bool
100 |   (** [get_bool j] is like {!to_bool} but raises {!Invalid_argument}
101 |       if [j] is not a bool. *)
102 | 
103 |   val get_float : t -> float
104 |   (** [get_float j] is like {!to_float} but raises {!Invalid_argument}
105 |       if [j] is not a float. *)
106 | 
107 |   val get_string : t -> string
108 |   (** [get_string j] is like {!to_string} but raises {!Invalid_argument}
109 |       if [j] is not a string. *)
110 | 
111 |   val get_array : t -> t list
112 |   (** [get_array j] is like {!to_array} but raises {!Invalid_argument}
113 |       if [j] is not a array. *)
114 | 
115 |   val get_obj : t -> mem list
116 |   (** [get_obj j] is like {!to_obj} but raises {!Invalid_argument}
117 |       if [j] is not a array. *)
118 | 
119 |   (** {1:fmt Formatters} *)
120 | 
121 |   val pp : Format.formatter -> t -> unit
122 |   (** [pp] formats JSON text.
123 | 
124 |       {b Warning.} Assumes all OCaml strings in the formatted value
125 |       are UTF-8 encoded. *)
126 | 
127 |   (** {1:codec Codec} *)
128 | 
129 |   val of_string : ?file:Textloc.fpath -> string -> (t, string) result
130 |   (** [of_string s] parses JSON text from [s] according to
131 |       {{:https://tools.ietf.org/html/rfc8259}RFC8259} with the following
132 |       limitations:
133 |       {ul
134 |       {- Numbers are parsed with [string_of_float] which is not
135 |          compliant.}
136 |       {- TODO Unicode escapes are left unparsed (this will not round trip
137 |          with {!to_string}).}}
138 | 
139 |       {b Note.} All OCaml strings returned by this function are UTF-8
140 |       encoded. *)
141 | 
142 |   val to_string : t -> string
143 |   (** [to_string j] is [j] as JSON text, encoded according to
144 |       {{:https://tools.ietf.org/html/rfc8259}RFC8259}.
145 | 
146 |       {b Warning.} Assumes all OCaml strings in [j] are UTF-8 encoded. *)
147 | end
148 | 
149 | (** JSON value generation. *)
150 | module Jsong : sig
151 | 
152 |   (** {1:gen Generation} *)
153 | 
154 |   type t
155 |   (** The type for generated JSON values. *)
156 | 
157 |   val null : t
158 |   (** [null] is the generated JSON null value. *)
159 | 
160 |   val bool : bool -> t
161 |   (** [bool b] is [b] as a generated JSON boolean value. *)
162 | 
163 |   val int : int -> t
164 |   (** [int i] is [i] as a generated JSON number. *)
165 | 
166 |   val float : float -> t
167 |   (** [float f] is [f] as a generated JSON number. *)
168 | 
169 |   val string : string -> t
170 |   (** [str s] is [s] as a generated JSON string value. *)
171 | 
172 |   type array
173 |   (** The type for generated JSON arrays. *)
174 | 
175 |   val array : array
176 |   (** [array] is an empty array. *)
177 | 
178 |   val array_end : array -> t
179 |   (** [array_end els] is arr a a generated JSON value. *)
180 | 
181 |   val el : t -> array -> array
182 |   (** [el e arr] is array [arr] wit [e] added at the end. *)
183 | 
184 |   val el_if : bool -> (unit -> t) -> array -> array
185 |   (** [el cond v arr] is [el (v ()) arr] if [cond] is [true] and
186 |       [arr] otherwise. *)
187 | 
188 |   type obj
189 |   (** The type for generated JSON objects. *)
190 | 
191 |   val obj : obj
192 |   (** [obj] is an empty object. *)
193 | 
194 |   val obj_end : obj -> t
195 |   (** [obj_end o] is [o] as a generated JSON value. *)
196 | 
197 |   val mem : string -> t -> obj -> obj
198 |   (** [mem name v o] is [o] with member [name] bound to value [v]
199 |       added. *)
200 | 
201 |   val mem_if : bool -> string -> (unit -> t) -> obj -> obj
202 |   (** [mem_if cond name v o] is [mem name (v ()) o] if [cond] is [true]
203 |       and [o] otherwise. *)
204 | 
205 |   (** {1:derived Derived generators} *)
206 | 
207 |   val strf : ('a, Format.formatter, unit, t) format4 -> 'a
208 |   (** [strf fmt ...] is a JSON string generated value formatted according
209 |       to [fmt]. *)
210 | 
211 |   val option : ('a -> t) -> 'a option -> t
212 |   (** [option some o] is [o] as a generated JSON value which is
213 |       {!null} if [o] is [None] and [some v] if [o] is [some v]. *)
214 | 
215 |   val list : ('a -> t) -> 'a list -> t
216 |   (** [list el l] is [l] as a generated JSON array whose elements
217 |       are generated using [el]. *)
218 | 
219 |   val json : Json.t -> t
220 |   (** [of_json v] is the JSON value [v] as a generated value. *)
221 | 
222 |   (** {1:output Output} *)
223 | 
224 |   val buffer_add : Buffer.t -> t -> unit
225 |   (** [buffer_add b g] adds the generated JSON value [g] to [b]. *)
226 | 
227 |   val to_string : t -> string
228 |   (** [to_string g] is the generated JSON value [g] as a string. *)
229 | end
230 | 
231 | (** JSON value queries.
232 | 
233 |     {b TODO} maybe we could expose a bit more options for error
234 |     reporting. In particular the internal [path] type and a combinator
235 |     in the vein of {!loc} to report back the path trace. Basically
236 |     see {!Serialk_sexp}. *)
237 | module Jsonq : sig
238 | 
239 |   (** {1:query Queries} *)
240 | 
241 |   type 'a t
242 |   (** The type JSON value queries. A query either fails or succeeds against
243 |       a JSON value returning a value of type ['a]. *)
244 | 
245 |   val query : 'a t -> Json.t -> ('a, string) result
246 |   (** [query q j] is [Ok v] if que query [q] succeeds on [s] and
247 |       a (multiline) [Error e] otherwise. *)
248 | 
249 |   (** {1:success Success and failure} *)
250 | 
251 |   val succeed : 'a -> 'a t
252 |   (** [succeed v] is a query that succeeds with value [v] on any
253 |       JSON value. *)
254 | 
255 |   val fail : string -> 'a t
256 |   (** [fail msg] is a query that fails on any JSON value with message
257 |       [msg]. Do not include position information in [msg], this
258 |       is automatically handled by the module. *)
259 | 
260 |   val failf : ('a, Format.formatter, unit, 'b t) format4 -> 'a
261 |   (** [failf fmt ...] is like {!fail} but formats the message
262 |       according to [fmt]. *)
263 | 
264 |   (** {1:qcomb Query combinators} *)
265 | 
266 |   val app : ('a -> 'b) t -> 'a t -> 'b t
267 |   (** [app fq q] queries a s-expression first with [fq] and then with [q]
268 |       and applies the result of latter to the former. *)
269 | 
270 |   val ( $ ) : ('a -> 'b) t -> 'a t -> 'b t
271 |   (** [f $ v] is [app f v]. *)
272 | 
273 |   val pair : 'a t -> 'b t -> ('a * 'b) t
274 |   (** [pair q0 q1] queries first with [q0] and then with [q1] and returns
275 |       the pair of their result. *)
276 | 
277 |   val bind : 'a t -> ('a -> 'b t) -> 'b t
278 |   (** [bind q f] queries a s-expression with [q], applies the result to
279 |       [f] and re-queries the s-expression with the result. *)
280 | 
281 |   val map : ('a -> 'b) -> 'a t -> 'b t
282 |   (** [map f q] is [app (succeed f) q]. *)
283 | 
284 |   val some : 'a t -> 'a option t
285 |   (** [some q] is [map Option.some q]. *)
286 | 
287 |   (** {1:json JSON queries} *)
288 | 
289 |   val fold :
290 |     null:'a t -> bool:'a t -> float:'a t -> string:'a t ->  array:'a t ->
291 |     obj:'a t -> 'a t
292 |   (** [fold] queries JSON values according to their kind using the
293 |       provided queries. *)
294 | 
295 |   val partial_fold :
296 |     ?null:'a t -> ?bool:'a t -> ?float:'a t -> ?string:'a t -> ?array:'a t ->
297 |     ?obj:'a t -> unit -> 'a t
298 |   (** [partial_fold] is like {!fold} but only queries the kinds that
299 |       are explicitely specified. It errors on other kinds. *)
300 | 
301 |   val json : Json.t t
302 |   (** [json] queries any JSON value and returns it. *)
303 | 
304 |   val loc : Json.loc t
305 |   (** [loc]is [map Sexp.loc sexp]. *)
306 | 
307 |   val with_loc : 'a t -> ('a * Json.loc) t
308 |   (** [with_loc q] queries with [q] and returns the result with the
309 |       location of the queried JSON value. *)
310 | 
311 |   (** {1:nulls Nulls} *)
312 | 
313 |   val is_null : bool t
314 |   (** [is_null] tests for a JSON null value. *)
315 | 
316 |   val null : unit t
317 |   (** [null] queries JSON null as unit and fails otherwise. *)
318 | 
319 |   val nullable : 'a t -> 'a option t
320 |   (** [nullable q] is None on JSON null and otherwise queries the value
321 |       with [q]. *)
322 | 
323 |   (** {1:atoms Atomic values} *)
324 | 
325 |   val bool : bool t
326 |   (** [bool] queries JSON bool values as a [bool] value and fails otherwise. *)
327 | 
328 |   val float : float t
329 |   (** [float] queries JSON number values as a [float] value and fails
330 |       otherwise. *)
331 | 
332 |   val int : int t
333 |   (** [int] is [map truncate float]. *)
334 | 
335 |   val string : string t
336 |   (** [string] queries JSON string values as a [string] value and
337 |       fails otherwise. *)
338 | 
339 |   val string_to : kind:string -> (string -> ('a, string) result) -> 'a t
340 |   (** [string_to ~kind parse] queries a JSON string and parses it
341 |       with [p]. In case of [Error m] error {!fail}s with [m]. [kind]
342 |       is the kind of value parsed, it is used for the error in case no
343 |       JSON string is found. *)
344 | 
345 |   val enum : kind:string -> Set.Make(String).t -> string t
346 |   (** [enum ~kind ss] queries a JSON string for one of the elements of [ss]
347 |       and fails otherwise. [kind] is for the kind of elements in [ss],
348 |       it used for error reporting. *)
349 | 
350 |   val enum_map : kind:string -> 'a Map.Make(String).t -> 'a t
351 |   (** [enum_map ~kind sm] queries a string for it's map in [sm] and fails
352 |       if the string is not bound in [sm]. [kind] is for the kind elements
353 |       in [sm], it is used for error reporting. *)
354 | 
355 |   (** {1:arrays Arrays}
356 | 
357 |       These queries only succeed on JSON array values. *)
358 | 
359 |   val is_empty_array : bool t
360 |   (** [is_empty_array] queries an array for emptyness. *)
361 | 
362 |   val hd : 'a t -> 'a t
363 |   (** [hd q] queries the first element of an array with [q]. Fails on empty
364 |       arrays. *)
365 | 
366 |   val tl : 'a t -> 'a t
367 |   (** [tail q] queries the tail of an array with [q]. Fails on empty
368 |       arrays. *)
369 | 
370 |   val fold_array : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b t
371 |   (** [fold_array f q acc] queries the elements of an array from left to
372 |       right with [q] and folds the result with [f] starting with [acc]. *)
373 | 
374 |   val array : 'a t -> 'a list t
375 |   (** [array q] queries the elements of an array with [q]. *)
376 | 
377 |   (** {2:array_index Array index queries} *)
378 | 
379 |   val nth : ?absent:'a -> int -> 'a t -> 'a t
380 |   (** [nth ?absent n q] queries the [n]th element of an array with [q]. If
381 |       [n] is negative counts from the end of the array, so [-1] is the
382 |       last array element. If the element does not exist this fails if
383 |       [absent] is [None] and succeeds with [v] if [absent] is [Some v]. *)
384 | 
385 |   (** {1:objects Objects}
386 | 
387 |       These queries only succeed on JSON object values. *)
388 | 
389 |   val mem : string -> 'a t -> 'a t
390 |   (** [mem n q] queries the member [n] of a JSON object with [q]. The
391 |       query fails if [n] is unbound in the object. *)
392 | 
393 |   val opt_mem : string -> 'a t -> absent:'a -> 'a t
394 |   (** [opt_mem n q ~absent] queries the member [n] of a JSON object with [q].
395 |       absent is returned if [n] is unbound in the object. *)
396 | 
397 |   val mem_dom : validate:Set.Make(String).t option -> Set.Make(String).t t
398 |   (** [mem_dom ~validate] queries the member domain of a JSON object.
399 |       If [validate] is [Some dom], the query fails if a member name is not in
400 |       [dom]. *)
401 | 
402 |   val fold_obj :
403 |     ('m -> 'a -> 'b -> 'b) -> (string -> ('m, string) result) ->
404 |     'a t -> 'b -> 'b t
405 |   (** [fold_obj f m q acc] parses each member with [m], queries its
406 |       value with [q] and folds the result with [f] starting with [acc]. *)
407 | end
408 | 


--------------------------------------------------------------------------------
/src/serialkit_sexp.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2019 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** S-expression support.
  7 | 
  8 |     The module {!Sexp} has an s-expression codec and general
  9 |     definitions for working with them. {!Sexpg} generates
 10 |     s-expressions without going through a generic
 11 |     representation. {!Sexpq} queries and updates generic
 12 |     representations with combinators.
 13 | 
 14 |     Consult a {{!sexp_syntax}short introduction} to s-expressions and
 15 |     the syntax parsed by the codec, the encoding of
 16 |     {{!sexp_dict}key-value dictionaries} supported by this module and
 17 |     the end-user syntax for {{!sexp_path_caret}addressing and
 18 |     updating} s-expressions.
 19 | 
 20 |     Open this module to use it, this only introduces modules in your
 21 |     scope.
 22 | 
 23 |     {b Warning.} Serialization functions always assumes all OCaml
 24 |     strings in the data you provide is UTF-8 encoded. This is not
 25 |     checked by the module. *)
 26 | 
 27 | (** {1:api API} *)
 28 | 
 29 | open Serialkit_text
 30 | 
 31 | (** S-expression definitions and codec. *)
 32 | module Sexp : sig
 33 | 
 34 |   (** {1:meta Meta information} *)
 35 | 
 36 |   type 'a fmt = Format.formatter -> 'a -> unit
 37 |   (** The type for formatting functions. *)
 38 | 
 39 |   type loc = Textloc.t
 40 |   (** The type for source text locations. *)
 41 | 
 42 |   val loc_nil : Textloc.t
 43 |   (** [loc_nil] is a source text location for non-parsed s-expressions. *)
 44 | 
 45 |   val pp_loc : loc fmt
 46 |   (** [pp_loc] is {!Tloc.pp}. *)
 47 | 
 48 |   type a_meta
 49 |   (** The type for meta information about atoms. *)
 50 | 
 51 |   type l_meta
 52 |   (** The type for meta information about lists. *)
 53 | 
 54 |   val a_meta_nil : a_meta
 55 |   (** [a_meta_nil] is parse information for non-parsed atoms. *)
 56 | 
 57 |   val l_meta_nil : l_meta
 58 |   (** [l_meta_nil] is parse information for non-parsed lists. *)
 59 | 
 60 |   (** {1:sexp S-expressions} *)
 61 | 
 62 |   type t = [ `A of string * a_meta | `L of t list * l_meta ]
 63 |   (** The type for generic s-expression representations. Either an
 64 |       atom or a list. *)
 65 | 
 66 |   val atom : string -> t
 67 |   (** [atom a] is [`A (a, a_meta_nil)]. *)
 68 | 
 69 |   val list : t list -> t
 70 |   (** [list l] is [`L (l, l_meta_nil)]. *)
 71 | 
 72 |   (** {1:access Accessors} *)
 73 | 
 74 |   val loc : t -> loc
 75 |   (** [loc s] is [s]'s source text location. *)
 76 | 
 77 |   val to_atom : t -> (string, string) result
 78 |   (** [to_atom s] extracts an atom from [s].  If [s] is a list an error
 79 |       with the location formatted according to {!Tloc.pp} is
 80 |       returned. *)
 81 | 
 82 |   val get_atom : t -> string
 83 |   (** [get_atom s] is like {!to_atom} but raises {!Invalid_argument}
 84 |       if [s] is not an atom. *)
 85 | 
 86 |   val to_list : t -> (t list, string) result
 87 |   (** [to_list s] extracts a list from [s]. If [s] is an atom an error
 88 |       with the location formatted according to {!Tloc.pp} is
 89 |       returned. *)
 90 | 
 91 |   val get_list : t -> t list
 92 |   (** [get_atom s] is like {!to_list} but raises {!Invalid_argument}
 93 |       if [s] is not an list. *)
 94 | 
 95 |   val to_splice : t -> t list
 96 |   (** [to_splice s] is the either the list of [s] if [s] is or or
 97 |       [[s]] if [s] is an atom. *)
 98 | 
 99 |   (** {1:fmt Formatting} *)
100 | 
101 |   val pp : t fmt
102 |   (** [pp] formats an s-expression. *)
103 | 
104 |   val pp_layout : t fmt
105 |   (** [pp_layout ppf l] is like {!pp} but uses layout information. *)
106 | 
107 |   val pp_seq : t fmt
108 |   (** [pp_seq] formats an s-expression but if it is a list the
109 |       outer list separators are not formatted in the output.
110 | 
111 |       {b Warning.} Assumes all OCaml strings in the formatted value are
112 |       UTF-8 encoded. *)
113 | 
114 |   val pp_seq_layout : t fmt
115 |   (** [pp_seq_layout] is like {!pp_seq} but uses layout information. *)
116 | 
117 |   (** {1:codec Codec} *)
118 | 
119 |   type error_kind
120 |   (** The type for kinds of decoding error. *)
121 | 
122 |   val pp_error_kind : unit -> error_kind fmt
123 |   (** [pp_error_kind ()] formats an error kind. *)
124 | 
125 |   type error = error_kind * loc
126 |   (** The type for decoding errors. *)
127 | 
128 |   val pp_error :
129 |     ?pp_loc:loc fmt -> ?pp_error_kind:error_kind fmt ->
130 |     ?pp_prefix:unit fmt -> unit -> error fmt
131 |   (** [pp_error ~pp_loc ~pp_error_kind ~pp_prefix ()] formats errors
132 |       using [pp_loc] (defaults to {!pp_loc}), [pp_error_kind]
133 |       (defaults to {!pp_error_kind}) and [pp_prefix] (defaults formats
134 |       ["Error: "]). *)
135 | 
136 |   val error_to_string :
137 |     ?pp_error:error fmt -> ('a, error) result -> ('a, string) result
138 |   (** [error_to_string r] converts an error to a string using [pp_error]
139 |       (defaults to {!pp_error}). *)
140 | 
141 |   val seq_of_string : ?file:Textloc.fpath -> string -> (t, error) result
142 |   (** [seq_of_string ?file s] parses a {e sequence} of s-expressions from
143 |       [s]. [file] is the file for locations, defaults to ["-"]. The
144 |       sequence is returned as a fake s-expression list that spans from
145 |       the start of the first s-expression in [s] to the end of the
146 |       last one; note that this list does not exist syntactically in
147 |       [s].
148 | 
149 |       If there are no s-expression in [s] the list is empty its
150 |       location has both start and end positions at byte [0] (which may
151 |       not exist).
152 | 
153 |       {b Note.} All OCaml strings returned by this function are UTF-8
154 |       encoded. *)
155 | 
156 |   val seq_of_string' :
157 |     ?pp_error:error fmt -> ?file:Textloc.fpath -> string -> (t, string) result
158 |   (** [seq_of_string'] s {!seq_of_string} composed with {!error_to_string}. *)
159 | 
160 |   val seq_to_string : t -> string
161 |   (** [seq_to_string s] encodes [s] to a sequence of s-expressions. If [s]
162 |       is an s-expression list this wrapping list is not syntactically
163 |       represented in the output (see also {!seq_of_string}), use
164 |       [to_string (list [l])] if you want to output [l] as a list.
165 | 
166 |       {b Warning.} Assumes all OCaml strings in [s] are UTF-8 encoded. *)
167 | 
168 |   (** {1:sexp_index S-expression indices} *)
169 | 
170 |   type index =
171 |   | Nth of int (** *)
172 |   | Key of string (** *)
173 |   (** The type for s-expression indexing operations.
174 |       {ul
175 |       {- [Nth n], lookup zero-based element [n] in a list. If [n] is
176 |          negative, counts the number of elements from the end of the
177 |          list, i.e. [-1] is the last list element.}
178 |       {- [Key k], lookup binding [k] in an s-expression
179 |          {{!sexp_dict}dictionary.}}} *)
180 | 
181 |   val pp_key : string fmt
182 |   (** [pp_key] formats a key, this is {!Format.pp_print_string}. *)
183 | 
184 |   val pp_index : ?pp_key:string fmt -> unit -> index fmt
185 |   (** [pp_index] formats indices. Keys are unbracketed and formatted
186 |       with [pp_key], defaults to {!pp_key}. *)
187 | 
188 |   (** {1:sexp_path S-expression paths} *)
189 | 
190 |   type path = index list
191 |   (** The type for paths, a sequence of indexing operations in {b reverse}
192 |       order. *)
193 | 
194 |   val path_of_string : string -> (path, string) result
195 |   (** [path_of_string] parses a path from [s] according to the syntax
196 |       {{!sexp_path_caret}given here}. *)
197 | 
198 |   val pp_path : ?pp_key:string fmt -> unit -> path fmt
199 |   (** [pp_path ?pp_key ()] is a formatter for paths using [pp_key] to
200 |       format keys (defaults to {!pp_key}). *)
201 | 
202 |   (** {1:carets Carets} *)
203 | 
204 |   type caret_loc =
205 |   | Before (** The void before the s-expression found by the path. *)
206 |   | Over  (** The s-expression found by the path. *)
207 |   | After (** The void after the s-expression found by the path. *)
208 |   (** The type for caret locations. *)
209 | 
210 |   type caret = caret_loc * path
211 |   (** The type for carets. A caret location and the path at which it
212 |       applies. *)
213 | 
214 |   val caret_of_string : string -> (caret, string) result
215 |   (** [caret_of_string s] parses a caret from [s] according to the
216 |       syntax {{!sexp_path_caret}given here}. *)
217 | 
218 |   val pp_caret : ?pp_key:string fmt -> unit -> caret fmt
219 |   (** [pp_caret ?pp_key ()] is a formatter for carets using [pp_key]
220 |       to format keys (defaults to {!pp_key}). *)
221 | end
222 | 
223 | (** S-expression generation. *)
224 | module Sexpg : sig
225 | 
226 |   (** {1:gen Generation} *)
227 | 
228 |   type t
229 |   (** The type for generated s-expressions. *)
230 | 
231 |   val atom : string -> t
232 |   (** [atom s] is [s] as an atom. *)
233 | 
234 |   type lyst
235 |   (** The type for generated s-expression lists. *)
236 | 
237 |   val ls : lyst
238 |   (** [ls] starts a list. *)
239 | 
240 |   val le : lyst -> t
241 |   (** [le l] ends lists [l]. *)
242 | 
243 |   val el : t -> lyst -> lyst
244 |   (** [el e l] is list [l] with [e] added at the end. *)
245 | 
246 |   val el_if : bool -> (unit -> t) -> lyst -> lyst
247 |   (** [el cond v l] is [el (v ()) l] if [cond] is [true] and
248 |       [l] otherwise. *)
249 | 
250 |   (** {1:derived Derived generators} *)
251 | 
252 |   val atomf : ('a, Format.formatter, unit, t) format4 -> 'a
253 |   (** [atomf fmt ...] is an atom formatted according to [fmt]. *)
254 | 
255 |   val bool : bool -> t
256 |   (** [bool b] is [atomf "%b" b]. *)
257 | 
258 |   val int : int -> t
259 |   (** [int i] is [atomf "%d" i]. *)
260 | 
261 |   val float : float -> t
262 |   (** [float f] is [atomf "%g" f]. *)
263 | 
264 |   val float_hex : float -> t
265 |   (** [float_hex f] is [atomf "%h" f]. *)
266 | 
267 |   val string : string -> t
268 |   (** [string s] is {!atom}. *)
269 | 
270 |   val option : ('a -> t) -> 'a option -> t
271 |   (** [option some o] is [o] as the [none] atom if [o] is
272 |       [none] and a list starting with [some] atom followed by [some v]
273 |       if [o] is [Some v]. *)
274 | 
275 |   val list : ('a -> t) -> 'a list -> t
276 |   (** [list el l] is [l] as a list whose elements are generated using
277 |       [el]. *)
278 | 
279 |   val sexp : Sexp.t -> t
280 |   (** [sexp s] is the s-expression [s] as a generated value. *)
281 | 
282 |   (** {1:output Output} *)
283 | 
284 |   val buffer_add : Buffer.t -> t -> unit
285 |   (** [buffer_add b g] adds the generated s-expression value [g] to [b]. *)
286 | 
287 |   val to_string : t -> string
288 |   (** [to_string g] is the generated s-expression value [g] as a string. *)
289 | end
290 | 
291 | (** S-expression queries. *)
292 | module Sexpq : sig
293 | 
294 |   (** {1:query_results Result paths} *)
295 | 
296 |   type path = (Sexp.index * Sexp.loc) list
297 |   (** The type for result paths. This is a sequence of indexing
298 |       operations tupled with the source text location of the indexed
299 |       s-expression in {b reverse} order. *)
300 | 
301 |   val pp_path :
302 |     ?pp_loc:Sexp.loc Sexp.fmt -> ?pp_key:string Sexp.fmt -> unit ->
303 |     path Sexp.fmt
304 |   (** [pp_path ~pp_loc ~pp_key ()] formats paths using [pp_loc]
305 |       (defaults to {!Sexp.pp_loc}) and [pp_key] to format the keys
306 |       (defaults to {!Sexp.pp_key}). *)
307 | 
308 |   (** {1:query_errors Query errors} *)
309 | 
310 |   type error_kind =
311 |   [ `Key_unbound of string * string list
312 |   | `Msg of string
313 |   | `Nth_unbound of int * int
314 |   | `Out_of_dom of string * string * string list ]
315 |   (** The type for kinds of errors.
316 |       {ul
317 |       {- [`Key_unbound (k, dom)] on [k] that should have been in [dom]
318 |          (if not empty).}
319 |       {- [`Msg m] an arbitrary message [m] (should not
320 |          include position information).}
321 |       {- [`Nth_unbound (n, len)] on [n] an out of bound index in a list
322 |          of length [len].}
323 |       {- [`Out_of_dom (kind, v, dom)] on [v] of kind [kind] that
324 |          should have been in [dom]}} *)
325 | 
326 |   val pp_error_kind :
327 |     ?pp_em:string Sexp.fmt -> ?pp_key:string Sexp.fmt -> unit ->
328 |     error_kind Sexp.fmt
329 |   (** [pp_error_kind ~pp_loc ~pp_em ~pp_key ()] formats error kinds
330 |       using [pp_loc] for locations, [pp_em] for emphasis and [pp_key]
331 |       for keys. *)
332 | 
333 |   type error = error_kind * (path * Sexp.loc)
334 |   (** The type for query errors. The error kind tupled with the path
335 |       to the offending s-expression and the location of the
336 |       s-expression. *)
337 | 
338 |   val pp_error :
339 |     ?pp_loc:Sexp.loc Sexp.fmt -> ?pp_path:path Sexp.fmt ->
340 |     ?pp_error_kind:error_kind Sexp.fmt -> ?pp_prefix:unit Sexp.fmt -> unit ->
341 |     error Sexp.fmt
342 |   (** [pp_error ~pp_loc ~pp_path ~pp_error_kind ~pp_prefix ()] formats
343 |       errors using [pp_loc], [pp_path] (defaults to {!pp_path}),
344 |       [pp_error_kind] (defaults to {!pp_error_kind}) and [pp_prefix]
345 |       (defaults formats ["Error: "]). *)
346 | 
347 |   val error_to_string :
348 |     ?pp_error:error Sexp.fmt -> ('a, error) result -> ('a, string) result
349 |   (** [error_to_string ~pp_error r] converts an error in [r] to a string using
350 |       [pp_error], defaults to {!pp_error}. *)
351 | 
352 |   (** {1:queries Queries} *)
353 | 
354 |   type 'a t
355 |   (** The type for s-expression queries. A query either succeeds
356 |       against an s-expression with a value of type ['a] or it
357 |       fails. *)
358 | 
359 |   val query : 'a t -> Sexp.t -> ('a, error) result
360 |   (** [query q s] is [Ok v] if the query [q] succeeds on [s] and
361 |       [Error e] otherwise. *)
362 | 
363 |   val query_at_path : 'a t -> (Sexp.t * path) -> ('a, error) result
364 |   (** [query_at_path q (s, p)] is like {!query} except it assumes [s]
365 |       is at path [p]. Use to further query s-expressions obtained with
366 |       {!sexp_with_path} so that errors return the full path to errors. *)
367 | 
368 |   val query' : ?pp_error:error Sexp.fmt -> 'a t -> Sexp.t -> ('a, string) result
369 |   (** [query' q s] is like {!query} except the result is composed with
370 |       {!error_to_string}. *)
371 | 
372 |   (** {1:outcome Success and failure} *)
373 | 
374 |   val succeed : 'a -> 'a t
375 |   (** [succeed v] is a query that succeeds with value [v] on any
376 |       s-expression. *)
377 | 
378 |   val fail : error_kind -> 'a t
379 |   (** [fail k] is a query that fails on any s-expression with error
380 |       kind [k]. *)
381 | 
382 |   val failf : ('a, Format.formatter, unit, 'b t) format4 -> 'a
383 |   (** [failf fmt ...] is [fail (`Msg m)] with [m] formatted according
384 |       to [fmt]. *)
385 | 
386 |   (** {1:qcomb Query combinators} *)
387 | 
388 |   val app : ('a -> 'b) t -> 'a t -> 'b t
389 |   (** [app fq q] queries an s-expression first with [fq] and then with [q]
390 |       and applies the result of latter to the former. *)
391 | 
392 |   val ( $ ) : ('a -> 'b) t -> 'a t -> 'b t
393 |   (** [f $ v] is [app f v]. *)
394 | 
395 |   val pair : 'a t -> 'b t -> ('a * 'b) t
396 |   (** [pair q0 q1] queries an s-expression first with [q0] and then with [q1]
397 |       and returns the pair of their result. *)
398 | 
399 |   val bind : 'a t -> ('a -> 'b t) -> 'b t
400 |   (** [bind q f] queries an s-expression with [q], applies the result to
401 |       [f] and re-queries the s-expression with the result. *)
402 | 
403 |   val map : ('a -> 'b) -> 'a t -> 'b t
404 |   (** [map f q] is [app (succeed f) q]. *)
405 | 
406 |   val some : 'a t -> 'a option t
407 |   (** [some q] is [map Option.some q]. *)
408 | 
409 |   val loc : 'a t -> ('a * (path * Sexp.loc)) t
410 |   (** [loc q] queries with [q] an returns the result with the
411 |       query path and source text location to the queried
412 |       s-expression. *)
413 | 
414 |   (** {1:qsexp S-expression queries}
415 | 
416 |       Queries for s-expressions. These queries never fail. *)
417 | 
418 |   val fold : atom:'a t -> list:'a t -> 'a t
419 |   (** [fold ~atom ~list] queries atoms with [atom] and lists with [list]. *)
420 | 
421 |   val sexp : Sexp.t t
422 |   (** [sexp] queries any s-expression and returns its generic representation. *)
423 | 
424 |   val sexp_with_path : (Sexp.t * path) t
425 |   (** [sexp_with_path] is like {!sexp} but also returns the path to
426 |       s-expression. *)
427 | 
428 |   (** {1:qatom Atom queries}
429 | 
430 |       Queries for atoms. These queries fail on lists. *)
431 | 
432 |   val atom : string t
433 |   (** [atom] queries an atom as a string. *)
434 | 
435 |   val atom_to : kind:string -> (string -> ('a, string) result) -> 'a t
436 |   (** [atom_to ~kind p] queries an atom and parses it with [p]. In
437 |       case of [Error m] fails with message [m]. [kind] is the kind of
438 |       value parsed, used for the error in case a list is found. *)
439 | 
440 |   (**  {b TODO.} Maybe combinators to devise an approriate parse
441 |        function for {!atom_to} are a better idea than the following
442 |        two combinators. *)
443 | 
444 |   val enum : kind:string -> Set.Make(String).t -> string t
445 |   (** [enum ~kind ss] queries an atom for one of the element of [ss]
446 |       and fails otherwise. [kind] is for the kind of elements in [ss],
447 |       it used for error reporting. *)
448 | 
449 |   val enum_map : kind:string -> 'a Map.Make(String).t -> 'a t
450 |   (** [enum_map ~pp_elt ~kind sm] queries an atom for it's map in [sm]
451 |       and fails if the atom is not bound in [sm]. [kind] is for the
452 |       kind of elements in [sm], it used for error reporting. *)
453 | 
454 |   val bool : bool t
455 |   (** [bool] queries an atom for one of [true] or [false]. *)
456 | 
457 |   val int : int t
458 |   (** [int] queries an atom for an integer value parsed with
459 |       {!int_of_string}. *)
460 | 
461 |   val int32 : int32 t
462 |   (** [int32] queries an atom for an integer value parsed with
463 |       {!Int32.of_string}. *)
464 | 
465 |   val int64 : int64 t
466 |   (** [int64] queries an atom for an integer value parsed with
467 |       {!Int64.of_string}. *)
468 | 
469 |   val float : float t
470 |   (** [float] queries an atom for a float value parsed with
471 |       {!float_of_string}. *)
472 | 
473 |   (** {1:qlist List queries}
474 | 
475 |       Queries for s-expression lists. These queries fail on atoms. *)
476 | 
477 |   val is_empty : bool t
478 |   (** [is_empty] queries a list for emptyness. *)
479 | 
480 |   val hd : 'a t -> 'a t
481 |   (** [hd q] queries the head of a list with [q]. Fails on empty lists. *)
482 | 
483 |   val tl : 'a t -> 'a t
484 |   (** [tail q] queries the tail of a list with [q]. Fails on empty lists. *)
485 | 
486 |   val fold_list : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b t
487 |   (** [fold_list f q acc] queries the elements of a list from left to
488 |       right with [q] and folds the result with [f] starting with
489 |       [acc]. *)
490 | 
491 |   val list : 'a t -> 'a list t
492 |   (** [list q] queries the elements of a list with [q]. *)
493 | 
494 |   (** {2:qlist List index queries} *)
495 | 
496 |   val nth : ?absent:'a -> int -> 'a t -> 'a t
497 |   (** [nth ?absent n q] queries the [n]th index of a list with [q]. If
498 |       [n] is negative counts from the end of the list, so [-1] is the
499 |       last list element. If the element does not exist this fails if
500 |       [absent] is [None] and succeeds with [v] if [absent] is [Some
501 |       v]. *)
502 | 
503 |   val delete_nth : must_exist:bool -> int -> Sexp.t t
504 |   (** [delete_nth ~must_exist n] deletes the [n]th element of the
505 |       list. If the element does not exist this fails when [must_exist]
506 |       is [true] or returns the list unchanged when [must_exist] is
507 |       [false]. *)
508 | 
509 |   (** {1:qdict Dictionary queries}
510 | 
511 |       Queries for s-expression {{!sexp}dictionaries}. These queries
512 |       fail on atoms. *)
513 | 
514 |   val key : ?absent:'a -> string -> 'a t -> 'a t
515 |   (** [key ?absent k q] queries the value of key [k] of a dictionary
516 |       with [q].  If [k] is not bound this fails if [absent] is [None]
517 |       and succeeds with [v] if [absent] is [Some v]. *)
518 | 
519 |   val delete_key : must_exist:bool -> string -> Sexp.t t
520 |   (** [delete_key ~must_exist k] deletes key [k] from the dictionary.
521 |       If [k] is not bound this fails when [must_exist] is [true] or
522 |       returns the dictionary unchanged when [must_exist] is
523 |       [false]. *)
524 | 
525 |   val key_dom : validate:Set.Make(String).t option -> Set.Make(String).t t
526 |   (** [key_dom validate] queries the key domain of a list of bindings.
527 |       If [validate] is [Some dom], the query fails if a key is not in
528 |       [dom]. The query also fails if a binding is not well-formed.
529 |       [pp_key] is used to format keys.
530 | 
531 |       {b TODO.} Not really happy about this function, we lose the key
532 |       locations which is useful for further deconstruction. Also maybe
533 |       we rather want binding folds. *)
534 | 
535 |   val atomic : 'a t -> 'a t
536 |   (** [atomic q] queries an atom or the atom of a singleton list with
537 |       [q]. It fails on empty lists or non-singleton lists.
538 | 
539 |       This is useful for singleton {{!sexp_dict}dictionary}
540 |       bindings. In error reporting treats the list as if it doesn't
541 |       exist syntactically which is the case in dictionary bindings. *)
542 | 
543 |   (** {1:indices Index queries} *)
544 | 
545 |   val index : ?absent:'a -> Sexp.index -> 'a t -> 'a t
546 |   (** [index ?absent i q] queries the s-expression index [i] with [q] using
547 |       {!nth} or {!key} according to [i]. Fails on atoms. *)
548 | 
549 |   val delete_index : must_exist:bool -> Sexp.index -> Sexp.t t
550 |   (** [delete_index ~must_exist i] deletes the s-expression index [i]
551 |       using {!delete_nth} or {!delete_key} according to [i]. *)
552 | 
553 |   (** {1:path_caret Path and caret queries}
554 | 
555 |       These queries fail on indexing errors, that is if an atom gets
556 |       indexed. *)
557 | 
558 |   val path : ?absent:'a -> Sexp.path -> 'a t -> 'a t
559 |   (** [path p q] queries the s-expression found by [p] using [q]. If
560 |       [p] can't be found this fails if [absent] is [None] and succeeds
561 |       with [v] if [absent] is [Some v]. *)
562 | 
563 |   val probe_path : Sexp.path -> (path * Sexp.t * Sexp.path) t
564 |   (** [probe_path p] is a query that probes for [p]'s existence. Except
565 |       for indexing errors it always succeeds with [(sp, s, rem)]:
566 |       {ul
567 |       {- If [p] is found, this is the path to
568 |          [sp] to the found expression [s] and [rem] is empty.}
569 |       {- If [p] is not found, this is the path [sp] that leads
570 |          to the s-expression [s] that could not be indexed and [rem]
571 |          has the indexes that could not be performed.}} *)
572 | 
573 |   val delete_at_path : must_exist:bool -> Sexp.path -> Sexp.t t
574 |   (** [delete_at_path ~must_exist p] deletes the s-expression found by
575 |       [p] from the queried s-expression. If the path does not exist
576 |       this fails if [must_exist] is [true] and returns the
577 |       s-expression itself if [must_exist] is [false]. *)
578 | 
579 |   val splice_at_path :
580 |     ?stub:Sexp.t -> must_exist:bool -> Sexp.path -> rep:Sexp.t -> Sexp.t t
581 |   (** [splice_at_path ?stub ~must_exist p ~rep] replaces the s-expression
582 |       found at [p] by splicing [rep]. If the path does not exist this fails
583 |       if [must_exist] is [true] and the non-existing part of
584 |       the path is created if [must_exist] is [false]. If elements need
585 |       to be created [stub] (defaults to [Sexp.atom ""]) is used. *)
586 | 
587 |   val splice_at_caret :
588 |     ?stub:Sexp.t -> must_exist:bool -> Sexp.caret -> rep:Sexp.t -> Sexp.t t
589 |   (** [splice_caret ?stub ~must_exist p rep] splices the s-expression [rep]
590 |       at the caret [p] of the s-expression. If path of the caret does not
591 |       exist this fails if [must_exist] is [true] and the non-existing part of
592 |       the path is created if [must_exist] is [false]. If atoms
593 |       need to be create [stub] (defaults to [Sexp.atom ""]) is used. *)
594 | 
595 |   (** {1:ocaml OCaml datatype encoding queries} *)
596 | 
597 |   val option : 'a t -> 'a option t
598 |   (** [option q] queries with [q] the value of an option represented
599 |       according the encoding of {!Sexpg.option}. *)
600 | end
601 | 
602 | (** {1:sexp_dict Dictionaries}
603 | 
604 |     An s-expression {e dictionary} is a list of bindings. A {e
605 |     binding} is a list that starts with a {e key} and the remaining
606 |     elements of the list are the binding's {e value}. For example in this
607 |     binding:
608 | {v
609 | (key v0 v1 ...)
610 | v}
611 |     The key is [key] and the value the possibly empty list [v0], [v1],
612 |     ... of s-expressions. The {{!Sexpq.qdict}API} for dictionaries
613 |     represents the value by a fake (doesn't exist syntactically)
614 |     s-expression list whose text location starts at the first element
615 |     of the value.
616 | 
617 |     {1:sexp_path_caret Path & caret syntax}
618 | 
619 |     Path and carets provide a way for end users to address
620 |     s-expressions and edit locations.
621 | 
622 |     A {e path} is a sequence of {{!sexp_dict}key} and list indexing
623 |     operations. Applying the path to an s-expression leads to an
624 |     s-expression or nothing if one of the indices does not exist, or
625 |     an error if ones tries to index an atom.
626 | 
627 |     A {e caret} is a path and a spatial specification for the
628 |     s-expression found by the path. The caret indicates either the
629 |     void before that expression, the expression itself (over caret) or the
630 |     void after it.
631 | 
632 |     Here are a few examples of paths and carets, syntactically the
633 |     charater ['v'] is used to denote the caret's insertion point before or
634 |     after a path. There's no distinction between a path an over caret.
635 | 
636 | {v
637 | Ocaml.libs        # value of key 'libs' of dictionary 'ocaml'
638 | ocaml.v[libs]     # before the key binding (if any)
639 | ocaml.[libs]v     # after the key binding (if any)
640 | 
641 | ocaml.libs.[0]    # first element of key 'libs' of dictionary 'ocaml'
642 | ocaml.libs.v[0]   # before first element (if any)
643 | ocaml.libs.[0]v   # after first element (if any)
644 | 
645 | ocaml.libs.[-1]   # last element of key 'libs' of dictionary 'ocaml'
646 | ocaml.libs.v[-1]  # before last element (if any)
647 | ocaml.libs.[-1]v  # after last element (if any)
648 | v}
649 | 
650 |     More formally a {e path} is a [.] seperated list of indices.
651 | 
652 |     An {e index} is written [[i]]. [i] can a zero-based list index
653 |     with negative indices counting from the end of the list ([-1] is
654 |     the last element). Or [i] can be a dictionary key [key]. If there
655 |     is no ambiguity, the surrounding brackets can be dropped.
656 | 
657 |     A caret is a path whose last index brackets can be prefixed or
658 |     suffixed by an insertion point, represented by the character
659 |     ['v'].  This respectively denote the void before or after the
660 |     s-expression found by the path.
661 | 
662 |     {b Note.} The syntax has no form of quoting at the moment this
663 |     means key names can't contain, [\[], [\]], or start with a number.
664 | 
665 |     {1:sexp_syntax S-expression syntax}
666 | 
667 |     S-expressions are a general way of describing data via atoms
668 |     (sequences of characters) and lists delimited by parentheses.
669 |     Here are a few examples of s-expressions and their syntax:
670 | 
671 | {v
672 | this-is-an_atom
673 | (this is a list of seven atoms)
674 | (this list contains (a nested) list)
675 | 
676 | ; This is a comment
677 | ; Anything that follows a semi-colon is ignored until the next line
678 | 
679 | (this list ; has three atoms and an embeded ()
680 |  comment)
681 | 
682 | "this is a quoted atom, it can contain spaces ; and ()"
683 | 
684 | "quoted atoms can be split ^
685 |  across lines or contain Unicode esc^u{0061}pes"
686 | v}
687 | 
688 |     We define the syntax of s-expressions over a sequence of
689 |     {{:http://unicode.org/glossary/#unicode_scalar_value}Unicode
690 |     characters} in which all US-ASCII control characters
691 |     (U+0000..U+001F and U+007F) except {{!whitespace}whitespace} are
692 |     forbidden in unescaped form.
693 | 
694 |     {2:sexp S-expressions}
695 | 
696 |     An {e s-expression} is either an {{!atoms}{e atom}} or a
697 |     {{!lists}{e list}} of s-expressions interspaced with
698 |     {{!whitespace}{e whitespace}} and {{!comments}{e comments}}. A {e
699 |     sequence of s-expressions} is a succession of s-expressions
700 |     interspaced with whitespace and comments.
701 | 
702 |     These elements are informally described below and finally made
703 |     precise via an ABNF {{!grammar}grammar}.
704 | 
705 |     {2:whitespace Whitespace}
706 | 
707 |     Whitespace is a sequence of whitespace characters, namely, space
708 |     [' '] (U+0020), tab ['\t'] (U+0009), line feed ['\n'] (U+000A),
709 |     vertical tab ['\t'] (U+000B), form feed (U+000C) and carriage return
710 |     ['\r'] (U+000D).
711 | 
712 |     {2:comments Comments}
713 | 
714 |     Unless it occurs inside an atom in quoted form (see below)
715 |     anything that follows a semicolon [';'] (U+003B) is ignored until
716 |     the next {e end of line}, that is either a line feed ['\n'] (U+000A), a
717 |     carriage return ['\r']  (U+000D) or a carriage return and a line feed
718 |     ["\r\n"] (<U+000D,U+000A>).
719 | 
720 | {v
721 | (this is not a comment) ; This is a comment
722 | (this is not a comment)
723 | v}
724 | 
725 |     {2:atoms Atoms}
726 | 
727 |     An atom represents ground data as a string of Unicode characters.
728 |     It can, via escapes, represent any sequence of Unicode characters,
729 |     including control characters and U+0000. It cannot represent an
730 |     arbitrary byte sequence except via a client-defined encoding
731 |     convention (e.g. Base64 or hex encoding).
732 | 
733 |     Atoms can be specified either via an unquoted or a quoted form. In
734 |     unquoted form the atom is written without delimiters. In quoted
735 |     form the atom is delimited by double quote ['"'] (U+0022)
736 |     characters, it is mandatory for atoms that contain
737 |     {{!whitespace}whitespace}, parentheses ['('] [')'], semicolons
738 |     [';'], quotes ['"'], carets ['^'] or characters that need to be
739 |     escaped.
740 | 
741 | {v
742 | abc        ; a token for the atom "abc"
743 | "abc"      ; a quoted token for the atom "abc"
744 | "abc; (d"  ; a quoted token for the atom "abc; (d"
745 | ""         ; the quoted token for the atom ""
746 | v}
747 | 
748 |     For atoms that do not need to be quoted, both their unquoted and
749 |     quoted form represent the same string; e.g. the string ["true"]
750 |     can be represented both by the atoms {e true} and {e "true"}. The
751 |     empty string can only be represented in quoted form by {e ""}.
752 | 
753 |     In quoted form escapes are introduced by a caret ['^']. Double
754 |     quotes ['"'] and carets ['^'] must always be escaped.
755 | 
756 | {v
757 | "^^"             ; atom for ^
758 | "^n"             ; atom for line feed U+000A
759 | "^u{0000}"       ; atom for U+0000
760 | "^"^u{1F42B}^""  ; atom with a quote, U+1F42B and a quote
761 | v}
762 | 
763 |     The following escape sequences are recognized:
764 |     {ul
765 |     {- ["^ "] (<U+005E,U+0020>) for space [' '] (U+0020)}
766 |     {- ["^\""] (<U+005E,U+0022>) for double quote ['"'] (U+0022)
767 |        {b mandatory}}
768 |     {- ["^^"] (<U+005E,U+005E>) for caret ['^'] (U+005E) {b mandatory}}
769 |     {- ["^n"] (<U+005E,U+006E>) for line feed ['\n'] (U+000A)}
770 |     {- ["^r"] (<U+005E,U+0072>) for carriage return ['\r'] (U+000D)}
771 |     {- ["^u{X}"] with [X] is from 1 to at most 6 upper or lower case
772 |        hexadecimal digits standing for the corresponding
773 |        {{:http://unicode.org/glossary/#unicode_scalar_value}Unicode character}
774 |          U+X.}
775 |     {- Any other character except line feed ['\n'] (U+000A) or
776 |        carriage return ['\r'] (U+000D), following a caret is an
777 |        illegal sequence of characters. In the two former cases the
778 |        atom continues on the next line and white space is ignored.}}
779 | 
780 |     An atom in quoted form can be split across lines by using a caret
781 |     ['^'] (U+005E) followed by a line feed ['\n'] (U+000A) or a
782 |     carriage return ['\r'] (U+000D); any subsequent
783 |     {{!whitespace}whitespace} is ignored.
784 | 
785 | {v
786 | "^
787 |   a^
788 |   ^ " ; the atom "a "
789 | v}
790 | 
791 |     The character ^ (U+005E) is used as an escape character rather
792 |     than the usual \ (U+005C) in order to make quoted Windows®
793 |     file paths decently readable and, not the least, utterly please
794 |     DKM.
795 | 
796 |     {2:lists Lists}
797 | 
798 |     Lists are delimited by left ['('] (U+0028) and right [')']
799 |     (U+0029) parentheses. Their elements are s-expressions separated
800 |     by optional {{!whitespace}whitespace} and
801 |     {{!comments}comments}. For example:
802 | 
803 | {v
804 | (a list (of four) expressions)
805 | (a list(of four)expressions)
806 | ("a"list("of"four)expressions)
807 | (a list (of ; This is a comment
808 | four) expressions)
809 | () ; the empty list
810 | v}
811 | 
812 |     {2:grammar Formal grammar}
813 | 
814 |     The following {{:https://tools.ietf.org/html/rfc5234}RFC 5234}
815 |     ABNF grammar is defined on a sequence of
816 |     {{:http://unicode.org/glossary/#unicode_scalar_value}Unicode characters}.
817 | 
818 | {v
819 |  sexp-seq = *(ws / comment / sexp)
820 |      sexp = atom / list
821 |      list = %x0028 sexp-seq %x0029
822 |      atom = token / qtoken
823 |     token = t-char *(t-char)
824 |    qtoken = %x0022 *(q-char / escape / cont) %x0022
825 |    escape = %x005E (%x0020 / %x0022 / %x005E / %x006E / %x0072 /
826 |                     %x0075 %x007B unum %x007D)
827 |      unum = 1*6(HEXDIG)
828 |      cont = %x005E nl ws
829 |        ws = *(ws-char)
830 |   comment = %x003B *(c-char) nl
831 |        nl = %x000A / %x000D / %x000D %x000A
832 |    t-char = %x0021 / %x0023-0027 / %x002A-%x003A / %x003C-%x005D /
833 |             %x005F-%x007E / %x0080-D7FF / %xE000-10FFFF
834 |    q-char = t-char / ws-char / %x0028 / %x0029 / %x003B
835 |   ws-char = %x0020 / %x0009 / %x000A / %x000B / %x000C / %x000D
836 |    c-char = %x0009 / %x000B / %x000C / %x0020-D7FF / %xE000-10FFFF
837 | v}
838 | 
839 |     A few additional constraints not expressed by the grammar:
840 |     {ul
841 |     {- [unum] once interpreted as an hexadecimal number must be a
842 |        {{:http://unicode.org/glossary/#unicode_scalar_value}Unicode scalar
843 |        value.}}
844 |     {- A comment can be ended by the end of the character sequence rather
845 |        than [nl]. }}
846 | *)
847 | 


--------------------------------------------------------------------------------
/src/serialkit_text.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2019 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | 
  7 | let unsafe_get = String.unsafe_get
  8 | module String_set = Set.Make (String)
  9 | 
 10 | (* Heterogeneous dictionaries *)
 11 | 
 12 | module Dict = struct
 13 |   (* Type identifiers, can be deleted once we require 5.1 *)
 14 |   module Type = struct
 15 |     type (_, _) eq = Equal : ('a, 'a) eq
 16 |     module Id = struct
 17 |       type _ id = ..
 18 |       module type ID = sig type t type _ id += Id : t id end
 19 |       type 'a t = (module ID with type t = 'a)
 20 | 
 21 |       let make (type a) () : a t =
 22 |         (module struct type t = a type _ id += Id : t id end)
 23 | 
 24 |       let provably_equal
 25 |           (type a b) ((module A) : a t) ((module B) : b t) : (a, b) eq option
 26 |         =
 27 |         match A.Id with B.Id -> Some Equal | _ -> None
 28 | 
 29 |       let uid (type a) ((module A) : a t) =
 30 |         Obj.Extension_constructor.id (Obj.Extension_constructor.of_val A.Id)
 31 |     end
 32 |   end
 33 | 
 34 |   module M = Map.Make (Int)
 35 |   type 'a key = 'a Type.Id.t
 36 |   type binding = B : 'a key * 'a -> binding
 37 |   type t = binding M.t
 38 | 
 39 |   let key = Type.Id.make
 40 |   let empty = M.empty
 41 |   let mem k m = M.mem (Type.Id.uid k) m
 42 |   let add k v m = M.add (Type.Id.uid k) (B (k, v)) m
 43 |   let tag k m = add k () m
 44 |   let remove k m = M.remove (Type.Id.uid k) m
 45 |   let find : type a. a key -> t -> a option =
 46 |   fun k m -> match M.find_opt (Type.Id.uid k) m with
 47 |   | None -> None
 48 |   | Some B (k', v) ->
 49 |       match Type.Id.provably_equal k k' with
 50 |       | None -> assert false | Some Type.Equal -> Some v
 51 | end
 52 | 
 53 | (* Text locations *)
 54 | 
 55 | module Textloc = struct
 56 | 
 57 |   (* File paths *)
 58 | 
 59 |   type fpath = string
 60 |   let file_none = "-"
 61 |   let pp_path = Format.pp_print_string
 62 | 
 63 |   (* Byte positions *)
 64 | 
 65 |   type byte_pos = int (* zero-based *)
 66 |   let byte_pos_none = -1
 67 | 
 68 |   (* Lines *)
 69 | 
 70 |   type line_num = int (* one-based *)
 71 |   let line_num_none = -1
 72 | 
 73 |   (* Line positions
 74 | 
 75 |      We keep the byte position of the first element on the line. This
 76 |      first element may not exist and be equal to the text length if
 77 |      the input ends with a newline. Editors expect tools to compute
 78 |      visual columns (not a very good idea). By keeping these byte
 79 |      positions we can approximate columns by subtracting the line byte
 80 |      position data byte location. This will only be correct on
 81 |      US-ASCII data. *)
 82 | 
 83 |   type line_pos = line_num * byte_pos
 84 |   let line_pos_first = 1, 0
 85 |   let line_pos_none = line_num_none, byte_pos_none
 86 | 
 87 |   (* Text locations *)
 88 | 
 89 |   type t =
 90 |     { file : fpath;
 91 |       first_byte : byte_pos; last_byte : byte_pos;
 92 |       first_line : line_pos; last_line : line_pos }
 93 | 
 94 |   let v ~file ~first_byte ~last_byte ~first_line ~last_line =
 95 |     { file; first_byte; last_byte; first_line; last_line }
 96 | 
 97 |   let file l = l.file
 98 |   let first_byte l = l.first_byte
 99 |   let last_byte l = l.last_byte
100 |   let first_line l = l.first_line
101 |   let last_line l = l.last_line
102 |   let none =
103 |     let first_byte = byte_pos_none and last_byte = byte_pos_none in
104 |     let first_line = line_pos_none and last_line = line_pos_none in
105 |     v ~file:file_none ~first_byte ~last_byte ~first_line ~last_line
106 | 
107 |   (* Predicates and comparisons *)
108 | 
109 |   let is_none l = l.first_byte < 0
110 |   let is_empty l = l.first_byte > l.last_byte
111 |   let equal l0 l1 =
112 |     String.equal l0.file l1.file &&
113 |     Int.equal l0.first_byte l1.first_byte &&
114 |     Int.equal l0.last_byte l1.last_byte
115 | 
116 |   let compare l0 l1 =
117 |     let c = String.compare l0.file l1.file in
118 |    if c <> 0 then c else
119 |     let c = Int.compare l0.first_byte l1.first_byte in
120 |     if c <> 0 then c else
121 |     Int.compare l0.last_byte l1.last_byte
122 | 
123 |   (* Shrink and stretch *)
124 | 
125 |   let set_first l ~first_byte ~first_line = { l with first_byte; first_line }
126 |   let set_last l ~last_byte ~last_line = { l with last_byte; last_line }
127 | 
128 |   [@@@warning "-6"]
129 |   let to_first l = v l.file l.first_byte l.first_byte l.first_line l.first_line
130 |   let to_last l = v l.file l.last_byte l.last_byte l.last_line l.last_line
131 |   let before l = v l.file l.first_byte byte_pos_none l.first_line line_pos_none
132 |   let after l =
133 |     v l.file (l.first_byte + 1) byte_pos_none l.last_line line_pos_none
134 |   [@@@warning "+6"]
135 | 
136 |   let span l0 l1 =
137 |     let first_byte, first_line =
138 |       if l0.first_byte < l1.first_byte
139 |       then l0.first_byte, l0.first_line
140 |       else l1.first_byte, l1.first_line
141 |     in
142 |     let last_byte, last_line, file =
143 |       if l0.last_byte < l1.last_byte
144 |       then l1.last_byte, l1.last_line, l1.file
145 |       else l0.last_byte, l0.last_line, l0.file
146 |     in
147 |     v ~file ~first_byte ~first_line ~last_byte ~last_line
148 | 
149 |   [@@@warning "-6"]
150 |   let reloc ~first ~last =
151 |     v last.file first.first_byte last.last_byte first.first_line last.last_line
152 |   [@@@warning "+6"]
153 |   (* Formatters *)
154 | 
155 |   let pf = Format.fprintf
156 |   let pp_ocaml ppf l = match is_none l with
157 |   | true -> pf ppf "File \"%a\"" pp_path l.file
158 |   | false ->
159 |       let pp_lines ppf l = match fst l.first_line = fst l.last_line with
160 |       | true -> pf ppf "line %d" (fst l.first_line)
161 |       | false -> pf ppf "lines %d-%d" (fst l.first_line) (fst l.last_line)
162 |       in
163 |       (* "characters" represent positions (insertion points) not columns *)
164 |       let pos_s = l.first_byte - snd l.first_line in
165 |       let pos_e = l.last_byte - snd l.last_line + 1 in
166 |       if pos_s = 0 && pos_e = 0
167 |       then pf ppf "File \"%a\", %a" pp_path l.file pp_lines l
168 |       else pf ppf "File \"%a\", %a, characters %d-%d"
169 |           pp_path l.file pp_lines l pos_s pos_e
170 | 
171 |   let pp_gnu ppf l = match is_none l with
172 |   | true -> pf ppf "%a:" pp_path l.file
173 |   | false ->
174 |       let pp_lines ppf l =
175 |         let col_s = l.first_byte - snd l.first_line + 1 in
176 |         let col_e = l.last_byte - snd l.last_line + 1 in
177 |         match fst l.first_line = fst l.last_line with
178 |         | true ->  pf ppf "%d.%d-%d" (fst l.first_line) col_s col_e
179 |         | false ->
180 |             pf ppf "%d.%d-%d.%d"
181 |               (fst l.first_line) col_s (fst l.last_line) col_e
182 |       in
183 |       pf ppf "%a:%a" pp_path l.file pp_lines l
184 | 
185 |   let pp = pp_gnu
186 | 
187 |   let pp_dump ppf l =
188 |     pf ppf "file:%s bytes:%d-%d lines:%d-%d lines-bytes:%d-%d]"
189 |       l.file l.first_byte l.last_byte (fst l.first_line) (fst l.last_line)
190 |       (snd l.first_line) (snd l.last_line)
191 | end
192 | 
193 | (* Node meta data *)
194 | 
195 | module Meta = struct
196 |   type id = int
197 |   type t = { textloc : Textloc.t; id : id; dict : Dict.t }
198 | 
199 |   let new_id = let id = Atomic.make 0 in fun () -> Atomic.fetch_and_add id 1
200 |   let make ?(textloc = Textloc.none) () =
201 |     { textloc; id = new_id (); dict = Dict.empty }
202 | 
203 |   let none = make ()
204 |   let id m = m.id
205 |   let textloc m = m.textloc
206 |   let with_textloc ~keep_id m textloc = match keep_id with
207 |   | true -> { m with textloc }
208 |   | false -> { m with textloc; id = new_id () }
209 | 
210 |   let equal m0 m1 = Int.equal m0.id m1.id
211 |   let compare m0 m1 = Int.compare m0.id m1.id
212 |   let is_none m = equal none m
213 | 
214 |   type 'a key = 'a Dict.key
215 |   let key = Dict.key
216 |   let mem k m = Dict.mem k m.dict
217 |   let add k v m = { m with dict = Dict.add k v m.dict }
218 |   let tag k m = add k () m
219 |   let remove k m = { m with dict = Dict.remove k m.dict }
220 |   let find k m = Dict.find k m.dict
221 | end
222 | 
223 | (* UTF tools
224 | 
225 |    This can be killed once we require OCaml 4.14.
226 |    This is stricly equivalent to what was upstream to the Stdlib. *)
227 | 
228 | module Uchar = struct
229 |   include Uchar
230 | 
231 |   (* UTF codecs tools *)
232 | 
233 |   type utf_decode = int
234 |   (* This is an int [0xDUUUUUU] decomposed as follows:
235 |      - [D] is four bits for decode information, the highest bit is set if the
236 |      decode is valid. The three lower bits indicate the number of elements
237 |      from the source that were consumed by the decode.
238 |      - [UUUUUU] is the decoded Unicode character or the Unicode replacement
239 |      character U+FFFD if for invalid decodes. *)
240 | 
241 |   let valid_bit = 27
242 |   let decode_bits = 24
243 | 
244 |   let[@inline] utf_decode_is_valid d = (d lsr valid_bit) = 1
245 |   let[@inline] utf_decode_length d = (d lsr decode_bits) land 0b111
246 |   let[@inline] utf_decode_uchar d = unsafe_of_int (d land 0xFFFFFF)
247 |   let[@inline] utf_decode n u = ((8 lor n) lsl decode_bits) lor (to_int u)
248 |   let[@inline] utf_decode_invalid n = (n lsl decode_bits) lor (Uchar.to_int rep)
249 | end
250 | 
251 | (* UTF-8 *)
252 | 
253 | let dec_invalid = Uchar.utf_decode_invalid
254 | let[@inline] dec_ret n u = Uchar.utf_decode n (Uchar.unsafe_of_int u)
255 | 
256 | let[@inline] not_in_x80_to_xBF b = b lsr 6 <> 0b10
257 | let[@inline] not_in_xA0_to_xBF b = b lsr 5 <> 0b101
258 | let[@inline] not_in_x80_to_x9F b = b lsr 5 <> 0b100
259 | let[@inline] not_in_x90_to_xBF b = b < 0x90 || 0xBF < b
260 | let[@inline] not_in_x80_to_x8F b = b lsr 4 <> 0x8
261 | 
262 | let[@inline] utf_8_uchar_2 b0 b1 =
263 |   ((b0 land 0x1F) lsl 6) lor
264 |   ((b1 land 0x3F))
265 | 
266 | let[@inline] utf_8_uchar_3 b0 b1 b2 =
267 |   ((b0 land 0x0F) lsl 12) lor
268 |   ((b1 land 0x3F) lsl 6) lor
269 |   ((b2 land 0x3F))
270 | 
271 | let[@inline] utf_8_uchar_4 b0 b1 b2 b3 =
272 |   ((b0 land 0x07) lsl 18) lor
273 |   ((b1 land 0x3F) lsl 12) lor
274 |   ((b2 land 0x3F) lsl 6) lor
275 |   ((b3 land 0x3F))
276 | 
277 | external unsafe_get_uint8 : string -> int -> int = "%bytes_unsafe_get"
278 | 
279 | let string_get_utf_8_uchar b i =
280 |   let b0 = Bytes.get_uint8 (Bytes.unsafe_of_string b) i in
281 |   let get = unsafe_get_uint8 in
282 |   let max = String.length b - 1 in
283 |   match Char.unsafe_chr b0 with (* See The Unicode Standard, Table 3.7 *)
284 |   | '\x00' .. '\x7F' -> dec_ret 1 b0
285 |   | '\xC2' .. '\xDF' ->
286 |       let i = i + 1 in if i > max then dec_invalid 1 else
287 |       let b1 = get b i in if not_in_x80_to_xBF b1 then dec_invalid 1 else
288 |       dec_ret 2 (utf_8_uchar_2 b0 b1)
289 |   | '\xE0' ->
290 |       let i = i + 1 in if i > max then dec_invalid 1 else
291 |       let b1 = get b i in if not_in_xA0_to_xBF b1 then dec_invalid 1 else
292 |       let i = i + 1 in if i > max then dec_invalid 2 else
293 |       let b2 = get b i in if not_in_x80_to_xBF b2 then dec_invalid 2 else
294 |       dec_ret 3 (utf_8_uchar_3 b0 b1 b2)
295 |   | '\xE1' .. '\xEC' | '\xEE' .. '\xEF' ->
296 |       let i = i + 1 in if i > max then dec_invalid 1 else
297 |       let b1 = get b i in if not_in_x80_to_xBF b1 then dec_invalid 1 else
298 |       let i = i + 1 in if i > max then dec_invalid 2 else
299 |       let b2 = get b i in if not_in_x80_to_xBF b2 then dec_invalid 2 else
300 |       dec_ret 3 (utf_8_uchar_3 b0 b1 b2)
301 |   | '\xED' ->
302 |       let i = i + 1 in if i > max then dec_invalid 1 else
303 |       let b1 = get b i in if not_in_x80_to_x9F b1 then dec_invalid 1 else
304 |       let i = i + 1 in if i > max then dec_invalid 2 else
305 |       let b2 = get b i in if not_in_x80_to_xBF b2 then dec_invalid 2 else
306 |       dec_ret 3 (utf_8_uchar_3 b0 b1 b2)
307 |   | '\xF0' ->
308 |       let i = i + 1 in if i > max then dec_invalid 1 else
309 |       let b1 = get b i in if not_in_x90_to_xBF b1 then dec_invalid 1 else
310 |       let i = i + 1 in if i > max then dec_invalid 2 else
311 |       let b2 = get b i in if not_in_x80_to_xBF b2 then dec_invalid 2 else
312 |       let i = i + 1 in if i > max then dec_invalid 3 else
313 |       let b3 = get b i in if not_in_x80_to_xBF b3 then dec_invalid 3 else
314 |       dec_ret 4 (utf_8_uchar_4 b0 b1 b2 b3)
315 |   | '\xF1' .. '\xF3' ->
316 |       let i = i + 1 in if i > max then dec_invalid 1 else
317 |       let b1 = get b i in if not_in_x80_to_xBF b1 then dec_invalid 1 else
318 |       let i = i + 1 in if i > max then dec_invalid 2 else
319 |       let b2 = get b i in if not_in_x80_to_xBF b2 then dec_invalid 2 else
320 |       let i = i + 1 in if i > max then dec_invalid 3 else
321 |       let b3 = get b i in if not_in_x80_to_xBF b3 then dec_invalid 3 else
322 |       dec_ret 4 (utf_8_uchar_4 b0 b1 b2 b3)
323 |   | '\xF4' ->
324 |       let i = i + 1 in if i > max then dec_invalid 1 else
325 |       let b1 = get b i in if not_in_x80_to_x8F b1 then dec_invalid 1 else
326 |       let i = i + 1 in if i > max then dec_invalid 2 else
327 |       let b2 = get b i in if not_in_x80_to_xBF b2 then dec_invalid 2 else
328 |       let i = i + 1 in if i > max then dec_invalid 3 else
329 |       let b3 = get b i in if not_in_x80_to_xBF b3 then dec_invalid 3 else
330 |       dec_ret 4 (utf_8_uchar_4 b0 b1 b2 b3)
331 |   | _ -> dec_invalid 1
332 | 
333 | 
334 | (* Error message helpers. *)
335 | 
336 | module Err_msg = struct
337 |   let pf = Format.fprintf
338 |   let pp_sp = Format.pp_print_space
339 |   let pp_nop _ () = ()
340 |   let pp_any fmt ppf _ = pf ppf fmt
341 |   let pp_op_enum op ?(empty = pp_nop) pp_v ppf = function
342 |   | [] -> empty ppf ()
343 |   | [v] -> pp_v ppf v
344 |   | _ as vs ->
345 |       let rec loop ppf = function
346 |       | [v0; v1] -> pf ppf "%a@ %s@ %a" pp_v v0 op pp_v v1
347 |       | v :: vs -> pf ppf "%a,@ " pp_v v; loop ppf vs
348 |       | [] -> assert false
349 |       in
350 |       loop ppf vs
351 | 
352 |   let pp_and_enum ?empty pp_v ppf vs = pp_op_enum "and" ?empty pp_v ppf vs
353 |   let pp_or_enum ?empty pp_v ppf vs = pp_op_enum "or" ?empty pp_v ppf vs
354 |   let pp_did_you_mean pp_v ppf = function
355 |   | [] -> () | vs -> pf ppf "Did@ you@ mean %a ?" (pp_or_enum pp_v) vs
356 | 
357 |   let pp_must_be pp_v ppf = function
358 |   | [] -> () | vs -> pf ppf "Must be %a." (pp_or_enum pp_v) vs
359 | 
360 |   let pp_unknown ~kind pp_v ppf v = pf ppf "Unknown %a %a." kind () pp_v v
361 |   let pp_unknown' ~kind pp_v ~hint ppf (v, hints) = match hints with
362 |   | [] -> pp_unknown ~kind pp_v ppf v
363 |   | hints -> pp_unknown ~kind pp_v ppf v; pp_sp ppf (); (hint pp_v) ppf hints
364 | 
365 |   let edit_distance s0 s1 =
366 |     let minimum (a : int) (b : int) (c : int) : int = min a (min b c) in
367 |     let s0,s1 = if String.length s0 <= String.length s1 then s0,s1 else s1,s0 in
368 |     let m = String.length s0 and n = String.length s1 in
369 |     let rec rows row0 row i = match i > n with
370 |     | true -> row0.(m)
371 |     | false ->
372 |         row.(0) <- i;
373 |         for j = 1 to m do
374 |           if s0.[j - 1] = s1.[i - 1] then row.(j) <- row0.(j - 1) else
375 |           row.(j) <- minimum (row0.(j - 1) + 1) (row0.(j) + 1) (row.(j - 1) + 1)
376 |         done;
377 |         rows row row0 (i + 1)
378 |     in
379 |     rows (Array.init (m + 1) (fun x -> x)) (Array.make (m + 1) 0) 1
380 | 
381 |   let suggest ?(dist = 2) candidates s =
382 |     let add (min, acc) name =
383 |       let d = edit_distance s name in
384 |       if d = min then min, (name :: acc) else
385 |       if d < min then d, [name] else
386 |       min, acc
387 |     in
388 |     let d, suggs = List.fold_left add (max_int, []) candidates in
389 |     if d <= dist (* suggest only if not too far *) then List.rev suggs else []
390 | end
391 | 
392 | (* UTF-8 decoding table. *)
393 | 
394 | module Utf_8 = struct
395 |   type case =
396 |   | L1 | L2 | L3_E0 | L3_E1_EC_or_EE_EF | L3_ED | L4_F0 | L4_F1_F3 | L4_F4 | E
397 | 
398 |   let case =
399 | (*
400 |   (* See https://tools.ietf.org/html/rfc3629#section-4 *)
401 |   Printf.printf "[|";
402 |   for i = 0 to 255 do
403 |     if i mod 16 = 0 then Printf.printf "\n";
404 |     if 0x00 <= i && i <= 0x7F then Printf.printf "L1; " else
405 |     if 0xC2 <= i && i <= 0xDF then Printf.printf "L2; " else
406 |     if 0xE0 = i then Printf.printf "L3_E0; " else
407 |     if 0xE1 <= i && i <= 0xEC || 0xEE <= i && i <= 0xEF
408 |     then Printf.printf "L3_E1_EC_or_EE_EF; " else
409 |     if 0xED = i then Printf.printf "L3_ED;" else
410 |     if 0xF0 = i then Printf.printf "L4_F0; " else
411 |     if 0xF1 <= i && i <= 0xF3 then Printf.printf "L4_F1_F3; " else
412 |     if 0xF4 = i then Printf.printf "L4_F4; " else
413 |     Printf.printf "E; "
414 |   done;
415 |   Printf.printf "\n|]"
416 | *)
417 |   [|
418 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
419 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
420 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
421 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
422 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
423 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
424 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
425 |     L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
426 |     E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
427 |     E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
428 |     E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
429 |     E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
430 |     E; E; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
431 |     L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
432 |     L3_E0; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
433 |     L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
434 |     L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
435 |     L3_E1_EC_or_EE_EF; L3_ED;L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
436 |     L4_F0; L4_F1_F3; L4_F1_F3; L4_F1_F3; L4_F4; E; E; E; E; E; E; E; E; E; E; E;
437 |   |]
438 | end
439 | 
440 | (* UTF-8 text decoder *)
441 | 
442 | module Textdec = struct
443 |   type 'a fmt = Format.formatter -> 'a -> unit
444 |   let pp_did_you_mean = Err_msg.pp_did_you_mean
445 |   let pp_and_enum = Err_msg.pp_and_enum
446 |   let pp_or_enum = Err_msg.pp_or_enum
447 |   let pp_did_you_mean = Err_msg.pp_did_you_mean
448 |   let pp_must_be = Err_msg.pp_must_be
449 |   let pp_unknown = Err_msg.pp_unknown
450 |   let pp_unknown' = Err_msg.pp_unknown'
451 | 
452 |   (* Decoders *)
453 | 
454 |   type t =
455 |     { file : Textloc.fpath; i : string; tok : Buffer.t;
456 |       mutable pos : int; mutable line : int; mutable line_pos : int; }
457 | 
458 |   let from_string ?(file = Textloc.file_none) i =
459 |     { file; i; tok = Buffer.create 255; pos = 0; line = 1; line_pos = 0 }
460 | 
461 |   (* Location *)
462 | 
463 |   let file d = d.file
464 |   let pos d = d.pos
465 |   let line d = d.line, d.line_pos
466 |   let loc d ~first_byte ~last_byte ~first_line ~last_line =
467 |     Textloc.v ~file:d.file ~first_byte ~last_byte ~first_line ~last_line
468 | 
469 |   let loc_to_here d ~first_byte ~first_line =
470 |     let last_line = (d.line, d.line_pos) in
471 |     loc d ~first_byte ~last_byte:d.pos ~first_line ~last_line
472 | 
473 |   let loc_here d =
474 |     loc_to_here d ~first_byte:d.pos ~first_line:(d.line, d.line_pos)
475 | 
476 |   (* Errors *)
477 | 
478 |   exception Err of Textloc.t * string
479 | 
480 |   let err loc msg = raise_notrace (Err (loc, msg))
481 |   let err_to_here d ~first_byte ~first_line fmt =
482 |     Format.kasprintf (err (loc_to_here d ~first_byte ~first_line)) fmt
483 | 
484 |   let err_here d fmt = Format.kasprintf (err (loc_here d)) fmt
485 |   let err_suggest = Err_msg.suggest
486 | 
487 |   (* Lexing *)
488 | 
489 |   let[@inline] incr_line d = match d.i.[d.pos] with (* assert (not (eoi d)) *)
490 |   | '\r' -> d.line <- d.line + 1; d.line_pos <- d.pos + 1
491 |   | '\n' ->
492 |       (if d.pos = 0 || d.i.[d.pos - 1] <> '\r' then d.line <- d.line + 1);
493 |       d.line_pos <- d.pos + 1;
494 |   | _ -> ()
495 | 
496 |   let[@inline] eoi d = d.pos >= String.length d.i
497 |   let[@inline] byte d = if eoi d then 0xFFFF else Char.code d.i.[d.pos]
498 |   let[@inline] accept_byte d = incr_line d; d.pos <- d.pos + 1
499 | 
500 |   let accept_utf_8 accept d =
501 |     let err d = match byte d with
502 |     | 0xFFFF -> err_here d "UTF-8 decoding error: unexpected end of input"
503 |     | b -> err_here d "UTF-8 decoding error: byte %02x illegal here" b
504 |     in
505 |     let accept_tail d = if (byte d lsr 6 = 0b10) then accept d else err d in
506 |     match byte d with
507 |     | 0xFFFF -> err d
508 |     | b ->
509 |         (* If a subsequent [byte d] invocation is 0xFFFF we get to [err]. *)
510 |         match Utf_8.case.(b) with
511 |         | L1 -> accept d
512 |         | L2 -> accept d; accept_tail d
513 |         | L3_E0 ->
514 |             accept d;
515 |             if (byte d - 0xA0 < 0xBF - 0xA0) then accept d else err d;
516 |             accept_tail d
517 |         | L3_E1_EC_or_EE_EF ->
518 |             accept d; accept_tail d; accept_tail d
519 |         | L3_ED ->
520 |             accept d;
521 |             if (byte d - 0x80 < 0x9F - 0x80) then accept d else err d;
522 |             accept_tail d
523 |         | L4_F0 ->
524 |             accept d;
525 |             if (byte d - 0x90 < 0xBF - 0x90) then accept d else err d;
526 |             accept_tail d; accept_tail d
527 |         | L4_F1_F3 ->
528 |             accept d;
529 |             accept_tail d; accept_tail d; accept_tail d;
530 |         | L4_F4 ->
531 |             accept d;
532 |             if (byte d - 0x80 < 0x8F - 0x80) then accept d else err d;
533 |         | E -> err d
534 | 
535 |   let accept_uchar d = accept_utf_8 accept_byte d
536 | 
537 |   (* Tokenizer *)
538 | 
539 |   let[@inline] lex_clear d = Buffer.clear d.tok
540 |   let[@inline] lex_pop d = let t = Buffer.contents d.tok in lex_clear d; t
541 |   let[@inline] lex_add_byte d b = Buffer.add_char d.tok (Char.chr b)
542 |   let[@inline] lex_add_bytes d s = Buffer.add_string d.tok s
543 |   let[@inline] lex_add_char d c = Buffer.add_char d.tok c
544 |   let[@inline] lex_add_uchar d u = Buffer.add_utf_8_uchar d.tok u
545 |   let[@inline] lex_accept_byte d =
546 |     Buffer.add_char d.tok d.i.[d.pos]; accept_byte d
547 | 
548 |   let[@inline] lex_accept_uchar d = accept_utf_8 lex_accept_byte d
549 | 
550 |   (* Insertions and substitutions *)
551 | 
552 |   let string_subrange ?(first = 0) ?last s =
553 |     let max = String.length s - 1 in
554 |     let last = match last with
555 |     | None -> max
556 |     | Some l when l > max -> max
557 |     | Some l -> l
558 |     in
559 |     let first = if first < 0 then 0 else first in
560 |     if first > last then "" else
561 |     String.sub s first (last - first + 1)
562 | 
563 |   let string_replace ~start ~stop ~rep s =
564 |     let len = String.length s in
565 |     if stop < start || start < 0 || start > len || stop < 0 || stop > len
566 |     then invalid_arg (Printf.sprintf "invalid start:%d stop:%d" start stop) else
567 |     let b = String.sub s 0 start in
568 |     let a = String.sub s stop (len - stop) in
569 |     String.concat "" [b; rep; a]
570 | end
571 | 


--------------------------------------------------------------------------------
/src/serialkit_text.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2019 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** UTF-8 text based formats tools.
  7 | 
  8 |     Open this module to use it defines only module in your scope. *)
  9 | 
 10 | (**/**)
 11 | module Dict : sig
 12 |   type 'a key
 13 |   val key : unit -> 'a key
 14 |   type t
 15 |   val empty : t
 16 |   val mem : 'a key -> t -> bool
 17 |   val add : 'a key -> 'a -> t -> t
 18 |   val tag : unit key -> t -> t
 19 |   val remove : 'a key -> t -> t
 20 |   val find : 'a key -> t -> 'a option
 21 | end
 22 | (**/**)
 23 | 
 24 | (** Text locations.
 25 | 
 26 |     A text location identifies a text span in a given UTF-8 encoded file
 27 |     by an inclusive range of absolute {{!Textloc.type-byte_pos}byte} positions
 28 |     and the {{!Textloc.type-line_pos}line positions} on which those occur. *)
 29 | module Textloc : sig
 30 | 
 31 |   (** {1:fpath File paths} *)
 32 | 
 33 |   type fpath = string
 34 |   (** The type for file paths. *)
 35 | 
 36 |   val file_none : fpath
 37 |   (** [file_none] is ["-"]. A file path to use when there is none. *)
 38 | 
 39 |   (** {1:pos Positions} *)
 40 | 
 41 |   (** {2:byte_pos Byte positions} *)
 42 | 
 43 |   type byte_pos = int
 44 |   (** The type for zero-based, absolute, byte positions in text. If
 45 |       the text has [n] bytes, [0] is the first position and [n-1] is
 46 |       the last position. *)
 47 | 
 48 |   val byte_pos_none : byte_pos
 49 |   (** [byte_pos_none] is [-1]. A position to use when there is none. *)
 50 | 
 51 |   (** {2:lines Lines} *)
 52 | 
 53 |   type line_num = int
 54 |   (** The type for one-based, line numbers in the text. Lines
 55 |       increment after a {e newline} which is either a line feed ['\n']
 56 |       (U+000A), a carriage return ['\r'] (U+000D) or a carriage return
 57 |       and a line feed ["\r\n"] (<U+000D,U+000A>). *)
 58 | 
 59 |   val line_num_none : line_num
 60 |   (** [line_num_none] is [-1]. A line number to use when there is none. *)
 61 | 
 62 |   (** {2:line_pos Line positions} *)
 63 | 
 64 |   type line_pos = line_num * byte_pos
 65 |   (** The type for line positions. This identifies a line by its line
 66 |       number and the absolute byte position following its newline
 67 |       (or the start of text for the first line). That byte position:
 68 |       {ul
 69 |       {- Indexes the first byte of text of the line if the line is non-empty.}
 70 |       {- Indexes the first byte of the next newline if the line is empty.}
 71 |       {- Is out of bounds and equal to the text's length for a last empty
 72 |          line (this includes when the text is empty).}} *)
 73 | 
 74 |   val line_pos_first : line_pos
 75 |   (** [line_pos_first] is [1, 0]. Note that this is the only line position
 76 |       of the empty text. *)
 77 | 
 78 |   val line_pos_none : line_pos
 79 |   (** [line_pos_none] is [(line_none, pos_none)]. *)
 80 | 
 81 |   (** {1:tloc Text locations} *)
 82 | 
 83 |   type t
 84 |   (** The type for text locations. A text location identifies a text
 85 |       span in an UTF-8 encoded file by an inclusive range of absolute
 86 |       {{!type-byte_pos}byte positions} and the {{!type-line_pos}line positions}
 87 |       on which they occur.
 88 | 
 89 |       If the first byte equals the last byte the range contains
 90 |       exactly that byte. If the first byte is greater than the last
 91 |       byte this represents an insertion point before the first byte. In
 92 |       this case information about the last position should be ignored:
 93 |       it can contain anything. *)
 94 | 
 95 |   val none : t
 96 |   (** [none] is a position to use when there is none. *)
 97 | 
 98 |   val v :
 99 |     file:fpath -> first_byte:byte_pos -> last_byte:byte_pos ->
100 |     first_line:line_pos -> last_line:line_pos -> t
101 |   (** [v ~file ~first_byte ~last_byte ~first_line ~last_line] is a text
102 |       location with the given arguments, see corresponding accessors for
103 |       the semantics. If you don't have a file use {!file_none}. *)
104 | 
105 |   val file : t -> fpath
106 |   (** [file l] is [l]'s file. *)
107 | 
108 |   val first_byte : t -> byte_pos
109 |   (** [first_byte l] is [l]'s first byte. Irrelevant if {!is_none} is
110 |       [true]. *)
111 | 
112 |   val last_byte : t -> byte_pos
113 |   (** [last_byte l] is [l]'s last byte. Irrelevant if {!is_none} or {!is_empty}
114 |       is [true]. *)
115 | 
116 |   val first_line : t -> line_pos
117 |   (** [first_line l] is the line position on which [first_byte l] lies.
118 |       Irrelevant if {!is_none} is [true].*)
119 | 
120 |   val last_line : t -> line_pos
121 |   (** [last_line l] is the line position on which [last_byte l] lies.
122 |       Irrelevant if {!is_none} or {!is_empty} is [true].*)
123 | 
124 |   (** {2:preds Predicates and comparisons} *)
125 | 
126 |   val is_none : t -> bool
127 |   (** [is_none t] is [true] iff [first_byte < 0]. *)
128 | 
129 |   val is_empty : t -> bool
130 |   (** [is_empty t] is [true] iff [first_byte t > last_byte t]. *)
131 | 
132 |   val equal : t -> t -> bool
133 |   (** [equal t0 t1] is [true] iff [t0] and [t1] are equal. This checks
134 |       that {!file}, {!first_byte} and {!last_byte} are equal. Line information
135 |       is ignored. *)
136 | 
137 |   val compare : t -> t -> int
138 |   (** [compare t0 t1] orders [t0] and [t1]. The order is compatible
139 |       with {!equal}. Comparison starts with {!file}, follows with {!first_byte}
140 |       and ends, if needed, with {!last_byte}. Line information is ignored. *)
141 | 
142 |   (** {2:shrink_and_stretch Shrink and stretch} *)
143 | 
144 |   val set_first : t -> first_byte:byte_pos -> first_line:line_pos -> t
145 |   (** [set_first l ~first_byte ~first_line] sets the the first position of
146 |       [l] to given values. *)
147 | 
148 |   val set_last : t -> last_byte:byte_pos -> last_line:line_pos -> t
149 |   (** [set_last l ~last_byte ~last_line] sets the last position of [l]
150 |       to given values. *)
151 | 
152 |   val to_first : t -> t
153 |   (** [to_first l] has both first and last positions set to [l]'s first
154 |       position. The range spans {!first_byte}. See also {!before}. *)
155 | 
156 |   val to_last : t -> t
157 |   (** [to_last l] has both first and last positions set to [l]'s last
158 |       position. The range spans {!last_byte}. See also {!after}. *)
159 | 
160 |   val before : t -> t
161 |   (** [before t] is the {{!is_empty}empty} text location starting at
162 |       {!first_byte}. *)
163 | 
164 |   val after : t -> t
165 |   (** [after t] is the empty {{!is_empty}empty} location starting at
166 |       [last_byte t + 1]; note that at the end of input this may be an
167 |       invalid byte {e index}. The {!first_line} and {!last_line} of the
168 |       result is [last_line t]. *)
169 | 
170 |   val span : t -> t -> t
171 |   (** [span l0 l1] is the span from the smallest byte position of [l0] and
172 |       [l1] to the largest byte position of [l0] and [l1]. The file path is
173 |       taken from the greatest byte position. *)
174 | 
175 |   val reloc : first:t -> last:t -> t
176 |   (** [reloc ~first ~last] uses the first position of [first], the
177 |       last position of [last] and the file of [last]. *)
178 | 
179 |   (** {2:fmt Formatting} *)
180 | 
181 |   val pp_ocaml : Format.formatter -> t -> unit
182 |   (** [pp_ocaml] formats text locations like the OCaml compiler. *)
183 | 
184 |   val pp_gnu : Format.formatter -> t -> unit
185 |   (** [pp_gnu] formats text locations according to the
186 |       {{:https://www.gnu.org/prep/standards/standards.html#Errors}GNU
187 |       convention}. *)
188 | 
189 |   val pp : Format.formatter -> t -> unit
190 |   (** [pp] is {!pp_gnu}. *)
191 | 
192 |   val pp_dump : Format.formatter -> t -> unit
193 |   (** [pp_dump] formats raw data for debugging. *)
194 | end
195 | 
196 | (** Text node metadata.
197 | 
198 |     Holds text locations and custom, client-defined metadata. *)
199 | module Meta : sig
200 | 
201 |   type id = int
202 |   (** The type for non-negative metadata identifiers. *)
203 | 
204 |   type t
205 |   (** The type for abstract syntax tree node metadata. *)
206 | 
207 |   val none : t
208 |   (** [none] is metadata for when there is none, its {!textloc} is
209 |       {!Textloc.none}. *)
210 | 
211 |   val make : ?textloc:Textloc.t -> unit -> t
212 |   (** [make textloc] is metadata with text location [textloc] (defaults
213 |       to {!Textloc.none}) and a fresh identifier (see {!val-id}). *)
214 | 
215 |   val id : t -> id
216 |   (** [id m] is an identifier for the metadata. Depending on how you
217 |       process the abstract syntax tree this may become non-unique but
218 |       the metadata values in an abstract syntax tree returned by
219 |       {!Doc.of_string} with [locs:true] have distinct identifiers. *)
220 | 
221 |   val textloc : t -> Textloc.t
222 |   (** [textloc m] is the source location of the syntactic construct [m]
223 |       is attached to. *)
224 | 
225 |   val with_textloc : keep_id:bool -> t -> Textloc.t -> t
226 |   (** [with_textloc ~keep_id m textloc] is metadata [m] with text location
227 |       [textloc] and a fresh id, unless [keep_id] is [true]. *)
228 | 
229 |   (** {1:preds Predicates and comparisons} *)
230 | 
231 |   val equal : t -> t -> bool
232 |   (** [equal m0 m1] is [true] if [m0] and [m1] have the same {!val-id}.
233 |       Note that they may have different {{!custom}metadata.} *)
234 | 
235 |   val compare : t -> t -> int
236 |   (** [compare m0 m1] is a total order on metadata {!val-id}s compatible with
237 |       {!equal}. *)
238 | 
239 |   val is_none : t -> bool
240 |   (** [is_none m] is [equal none m]. *)
241 | 
242 |   (** {1:custom Custom metadata}
243 | 
244 |       {b Warning.} Operating on custom metadata never changes
245 |       {!val-id}. It is possible for two meta values to have the same
246 |       id and different metadata. *)
247 | 
248 |   type 'a key
249 |   (** The type for custom metadata keys. *)
250 | 
251 |   val key : unit -> 'a key
252 |   (** [key ()] is a new metadata key. *)
253 | 
254 |   val mem : 'a key -> t -> bool
255 |   (** [mem k m] is [true] iff [k] is bound in [m]. *)
256 | 
257 |   val add : 'a key -> 'a -> t -> t
258 |   (** [add k v m] is [m] with key [k] bound to [v]. *)
259 | 
260 |   val tag : unit key -> t -> t
261 |   (** [tag k m] is [add k () m]. *)
262 | 
263 |   val remove : 'a key -> t -> t
264 |   (** [remove k m] is [m] with key [k] unbound in [v]. *)
265 | 
266 |   val find : 'a key -> t -> 'a option
267 |   (** [find k m] the value of [k] in [m], if any. *)
268 | end
269 | 
270 | (** UTF-8 text decoder.
271 | 
272 |     A decoder inputs {e valid} UTF-8 text, maintains {{!Textloc}text locations}
273 |     according to advances in the input and has a {{!Textdec.lex}lexeme buffer}
274 |     for lexing. *)
275 | module Textdec : sig
276 | 
277 |   (** {1:dec Decoder} *)
278 | 
279 |   type t
280 |   (** The type for UTF-8 text decoders. *)
281 | 
282 |   val from_string : ?file:Textloc.fpath -> string -> t
283 |   (** [from_string ~file s] decodes [s] using [file] (defaults to
284 |       {!Tloc.no_file}) for text location. *)
285 | 
286 |   (** {1:loc Locations} *)
287 | 
288 |   val file : t -> Textloc.fpath
289 |   (** [file d] is the input file. *)
290 | 
291 |   val pos : t -> Textloc.byte_pos
292 |   (** [pos d] is the current decoding byte position. *)
293 | 
294 |   val line : t -> Textloc.line_pos
295 |   (** [line d] is the current decoding line position. Lines increment as
296 |       described {{!Tloc.line}here}. *)
297 | 
298 |   val loc :
299 |     t -> first_byte:Textloc.byte_pos -> last_byte:Textloc.byte_pos ->
300 |     first_line:Textloc.line_pos -> last_line:Textloc.line_pos -> Textloc.t
301 |   (** [loc d ~first_byte ~last_bytex ~first_line ~last_line] is
302 |       {!Tloc.v} using [file d] for the file. *)
303 | 
304 |   val loc_to_here :
305 |     t -> first_byte:Textloc.byte_pos -> first_line:Textloc.line_pos -> Textloc.t
306 |   (** [loc_to_here d ~first_byte ~first_line] is a location that starts at
307 |       [~first_byte] and [~first_line] and ends at the current decoding
308 |       position as determined by {!pos} and {!line}. *)
309 | 
310 |   val loc_here : t -> Textloc.t
311 |   (** [loc_here d] is like {!loc_to_here} with the start position
312 |       at the current decoding position as determined by
313 |       {!pos} and {!line}. *)
314 | 
315 |   (** {1:err Errors} *)
316 | 
317 |   exception Err of Textloc.t * string
318 |   (** The exception for errors. A location and an english error message *)
319 | 
320 |   val err : Textloc.t -> string -> 'b
321 |   (** [err loc msg] raises [Err (loc, msg)] with no trace. *)
322 | 
323 |   val err_to_here :
324 |     t -> first_byte:Textloc.byte_pos -> first_line:Textloc.line_pos ->
325 |     ('a, Format.formatter, unit, 'b) format4 -> 'a
326 |   (** [err_to_here d ~first_byte ~first_line fmt ...] is
327 |       [err d (loc_to_here d ~first_byte ~first_line) fmt ...] *)
328 | 
329 |   val err_here : t -> ('a, Format.formatter, unit, 'b) format4 -> 'a
330 |   (** [err_here d] is [err d (loc_here d) fmt ...]. *)
331 | 
332 |   (** {2:err_msg Error message helpers} *)
333 | 
334 |   val err_suggest : ?dist:int -> string list -> string -> string list
335 |   (** [err_suggest ~dist candidates s] are the elements of
336 |       [candidates] whose edit distance is the smallest to [s] and at
337 |       most at a distance of [dist] of [s] (defaults to [2]). If
338 |       multiple results are returned the order of [candidates] is
339 |       preserved. *)
340 | 
341 |   type 'a fmt = Format.formatter -> 'a -> unit
342 |   (** The type for formatters. *)
343 | 
344 |   val pp_and_enum : ?empty:unit fmt -> 'a fmt -> 'a list fmt
345 |   (** [and_enum ~empty pp_v ppf l] formats [l] according to its length.
346 |       {ul
347 |       {- [0], formats {!empty} (defaults to {!nop}).}
348 |       {- [1], formats the element with [pp_v].}
349 |       {- [2], formats ["%a and %a"] with the list elements}
350 |       {- [n], formats ["%a, ... and %a"] with the list elements}} *)
351 | 
352 |   val pp_or_enum : ?empty:unit fmt -> 'a fmt -> 'a list fmt
353 |   (** [or_enum] is like {!and_enum} but uses "or" instead of "and". *)
354 | 
355 |   val pp_did_you_mean : 'a fmt -> 'a list fmt
356 |   (** [did_you_mean pp_v] formats ["Did you mean %a ?"] with {!or_enum}
357 |       if the list is non-empty and {!nop} otherwise. *)
358 | 
359 |   val pp_must_be : 'a fmt -> 'a list fmt
360 |   (** [must_be pp_v] formats ["Must be %a."] with {!or_enum} if the list
361 |       is non-empty and {!nop} otherwise. *)
362 | 
363 |   val pp_unknown : kind:unit fmt -> 'a fmt -> 'a fmt
364 |   (** [unknown ~kind pp_v] formats ["Unknown %a %a." kind () pp_v]. *)
365 | 
366 |   val pp_unknown' :
367 |     kind:unit fmt -> 'a fmt -> hint:('a fmt -> 'a list fmt) ->
368 |     ('a * 'a list) fmt
369 |   (** [unknown ~kind pp_v ~hint (v, hints)] formats {!unknown} followed
370 |       by a space and [hint pp_v hints] if [hints] is non-empty. *)
371 | 
372 |   (** {1:dec Decoding} *)
373 | 
374 |   val eoi : t -> bool
375 |   (** [eoi d] is [true] iff the decoder is at the end of input. *)
376 | 
377 |   val byte : t -> int
378 |   (** [byte d] is the byte at current position or [0xFFFF] if
379 |       [eoi d] is [true]. *)
380 | 
381 |   val accept_uchar : t -> unit
382 |   (** [accept_uchar d] accepts an UTF-8 encoded character starting at
383 |       the current position and moves to the byte location after it. Raises
384 |       {!Err} in case of UTF-8 decoding error. *)
385 | 
386 |   val accept_byte : t -> unit
387 |   (** [accept_byte d] accepts the byte at the current position and
388 |       moves to the byte location after it. {b Warning.} Faster than
389 |       {!accept_uchar} but the client needs to make sure it's not
390 |       accepting invalid UTF-8 data, i.e. that [byte d] is an US-ASCII
391 |       encoded character (i.e. [<= 0x7F]). *)
392 | 
393 |   (** {1:lex Lexeme buffer} *)
394 | 
395 |   val lex_clear : t -> unit
396 |   (** [lex_clear d] sets the lexeme to the empty string. *)
397 | 
398 |   val lex_pop : t -> string
399 |   (** [lex_pop d] is the lexeme and {!lex_clear}s it. *)
400 | 
401 |   val lex_add_byte : t -> int -> unit
402 |   (** [lex_add_byte d b] adds byte [b] to the lexen. *)
403 | 
404 |   val lex_add_bytes : t -> string -> unit
405 |   (** [lex_add_byte d s] adds bytes [s] to the lexen. *)
406 | 
407 |   val lex_add_char : t -> char -> unit
408 |   (** [lex_add_char d c] adds character [c] to the lexen. *)
409 | 
410 |   val lex_add_uchar : t -> Uchar.t -> unit
411 |   (** [lex_add_uchar t u] adds the UTF-8 encoding of character [u]
412 |       to the lexen. *)
413 | 
414 |   val lex_accept_uchar : t -> unit
415 |   (** [lex_accept_uchar d] is like {!accept_uchar} but also adds the
416 |       UTF-8 byte sequence to the lexeme. *)
417 | 
418 |   val lex_accept_byte : t -> unit
419 |   (** [lex_accept_byte d] is like {!accept_byte} but also adds the
420 |       byte to the lexeme. {b Warning.} {!accept_byte}'s warning
421 |       applies. *)
422 | 
423 |   (** {1:text String substitutions and insertions}
424 | 
425 |       Strictly speaking this doesn't belong here but here you go. *)
426 | 
427 |   val string_subrange : ?first:int -> ?last:int -> string -> string
428 |   (** [string_subrange ~first ~last s] are the consecutive bytes of [s]
429 |       whose indices exist in the range \[[first];[last]\].
430 | 
431 |       [first] defaults to [0] and last to [String.length s - 1].
432 | 
433 |       Note that both [first] and [last] can be any integer. If
434 |       [first > last] the interval is empty and the empty string is
435 |       returned. *)
436 | 
437 |   val string_replace : start:int -> stop:int -> rep:string -> string -> string
438 |   (** [string_replace ~start ~stop ~rep s] replaces the index range
439 |       \[[start];stop-1\] of [s] with [rep] as follows. If [start = stop]
440 |       the [rep] is inserted before [start]. [start] and [stop] must be
441 |       in range \[[0];[String.length s]\] and [start <= stop] or
442 |       [Invalid_argument] is raised. *)
443 | end
444 | 


--------------------------------------------------------------------------------
/src/serialkit_toml.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2021 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Serialkit_text
  7 | 
  8 | let max_month_day =           (* max day number in a given year's month. *)
  9 |   let is_leap_year y = (y mod 4 = 0) && (y mod 100 <> 0 || y mod 400 = 0) in
 10 |   let mlen = [|31; 28 (* or not *); 31; 30; 31; 30; 31; 31; 30; 31; 30; 31|] in
 11 |   fun y m -> if (m = 2 && is_leap_year y) then 29 else mlen.(m - 1)
 12 | 
 13 | let is_date_valid (y, m, d) =
 14 |   0 <= y && y <= 9999 &&
 15 |   1 <= m && m <= 12 &&
 16 |   1 <= d && d <= max_month_day y m
 17 | 
 18 | let is_time_valid (hh, mm, ss) =
 19 |   0 <= hh && hh <= 23 &&
 20 |   0 <= mm && mm <= 59 &&
 21 |   0 <= ss && ss <= 60
 22 | 
 23 | module Toml = struct
 24 |   let file_ext = ".toml"
 25 |   let mime_type = "application/toml"
 26 |   let version = "1.0.0"
 27 | 
 28 |   module Textloc = Serialkit_text.Textloc
 29 |   module Meta = Serialkit_text.Meta
 30 | 
 31 |   type 'a node = 'a * Meta.t
 32 | 
 33 |   type tz_offset_s = int
 34 |   type date = int * int * int
 35 | 
 36 |   type time = (int * int * int) * float option * tz_offset_s option
 37 |   type date_time = date option * time option
 38 |   type table = (string node * value) node list
 39 | 
 40 |   and value =
 41 |   [ `Boolean of bool node
 42 |   | `Integer of int64 node
 43 |   | `Float of float node
 44 |   | `String of string node
 45 |   | `Array of value list node
 46 |   | `Date_time of date_time node
 47 |   | `Table of table node ]
 48 | 
 49 |   type t = table node
 50 | 
 51 |   (* Formatting *)
 52 | 
 53 |   type 'a fmt = Format.formatter -> 'a -> unit
 54 |   let pf = Format.fprintf
 55 |   let pp_char = Format.pp_print_char
 56 |   let pp_str = Format.pp_print_string
 57 |   let pp_sp = Format.pp_print_space
 58 |   let pp_date ppf (y, m, d) = pf ppf "%04d-%02d-%02d" y m d
 59 |   let pp_time ppf ((hh, mm, ss), frac, tz) = failwith "TODO"
 60 |   let pp_date_time ppf (date, time) = match date, time with
 61 |   | None, None -> assert false
 62 |   | Some d, None -> pp_date ppf d
 63 |   | None, Some t -> pp_time ppf t
 64 |   | Some d, Some t -> pp_date ppf d; pp_char ppf ' '; pp_time ppf t
 65 | 
 66 |   let rec pp_value : value fmt = fun ppf -> function
 67 |   | `Boolean (b, _) -> Format.pp_print_bool ppf b
 68 |   | `Integer (i, _) -> pf ppf "%Ld" i
 69 |   | `Float (f, _) -> pf ppf "%.16g" f
 70 |   | `String (s, _) -> pf ppf "%S" s
 71 |   | `Array (l, _) ->
 72 |       let pp_sep ppf () = pp_char ppf ','; pp_sp ppf () in
 73 |       Format.pp_open_box ppf 2;
 74 |       Format.pp_print_list ~pp_sep pp_value ppf l;
 75 |       Format.pp_close_box ppf ()
 76 |   | `Date_time (dt, _) -> pp_date_time ppf dt
 77 |   | `Table (t, _) ->
 78 |       let pp_binding ppf (((k, _), v), _) =
 79 |         pf ppf "@[<h>%s =@ %a@]" k pp_value v
 80 |       in
 81 |       Format.pp_print_list ~pp_sep:Format.pp_print_cut pp_binding ppf t
 82 | 
 83 |   let pp ppf v = pp_value ppf (`Table v)
 84 |   let pp_layout = (* TODO *) pp
 85 | 
 86 |   (* Decode errors *)
 87 | 
 88 |   module Error = struct
 89 |     type expected =
 90 |     | Basic_char
 91 |     | Bin_digit
 92 |     | Char of char
 93 |     | Comment
 94 |     | Comment_char
 95 |     | Date_time
 96 |     | Dec_digit
 97 |     | Eoi
 98 |     | Equal_char
 99 |     | False
100 |     | Float
101 |     | Float_exp
102 |     | Float_frac
103 |     | Hex_digit
104 |     | Inf
105 |     | Integer
106 |     | Key
107 |     | Keyval
108 |     | Lf
109 |     | Literal_char
110 |     | Nan
111 |     | Newline
112 |     | Oct_digit
113 |     | Table
114 |     | Tz_offset
115 |     | True
116 |     | Value
117 | 
118 |     let pp_expected ppf = function
119 |     | Basic_char -> pp_str ppf "a basic string character"
120 |     | Bin_digit -> pp_str ppf "a binary digit"
121 |     | Char c -> pp_str ppf "a '%C' character"
122 |     | Comment -> pp_str ppf "a comment"
123 |     | Comment_char -> pp_str ppf "a comment character"
124 |     | Date_time -> pp_str ppf "a date-time"
125 |     | Dec_digit -> pp_str ppf "a decimal digit"
126 |     | Eoi -> pp_str ppf "end of input"
127 |     | Equal_char -> pp_str ppf "an equal character ('=')"
128 |     | Hex_digit -> pp_str ppf "an hexadecimal digit"
129 |     | False -> pp_str ppf "'true'"
130 |     | Float -> pp_str ppf "float"
131 |     | Float_exp -> pp_str ppf "float exponent"
132 |     | Float_frac -> pp_str ppf "float fractional part"
133 |     | Inf -> pp_str ppf "'inf'"
134 |     | Integer -> pp_str ppf "integer"
135 |     | Key -> pp_str ppf "a key"
136 |     | Keyval -> pp_str ppf "a key/value pair"
137 |     | Lf -> pp_str ppf "a line feed (\\n)"
138 |     | Literal_char -> pp_str ppf "a string literal character"
139 |     | Nan -> pp_str ppf "'nan'"
140 |     | Newline -> pp_str ppf "a newline ('\\n')"
141 |     | Oct_digit -> pp_str ppf "an octal digit"
142 |     | Table -> pp_str ppf "a table"
143 |     | Tz_offset -> pp_str ppf "a timezone offset"
144 |     | True -> pp_str ppf "'true'"
145 |     | Value -> pp_str ppf "a value"
146 | 
147 |     type unclosed =
148 |     | Basic_string | Basic_multiline_string | Literal_string
149 |     | Literal_multiline_string
150 | 
151 |     let pp_unclosed ppf = function
152 |     | Basic_string -> pp_str ppf "Unclosed basic string"
153 |     | Basic_multiline_string -> pp_str ppf "Unclosed multiline basic string"
154 |     | Literal_string -> pp_str ppf "Unclosed literal string"
155 |     | Literal_multiline_string -> pp_str ppf "Unclosed multiline literal string"
156 | 
157 |     type escape =
158 |     | Not_escape_char of char
159 |     | Not_uchar_value of int
160 |     | Invalid_uchar_escape
161 | 
162 |     let pp_escape ppf = function
163 |     | Not_escape_char c -> pf ppf "%C is not a valid escape character" c
164 |     | Not_uchar_value i -> pf ppf "%04X is not a Unicode character" i
165 |     | Invalid_uchar_escape -> pf ppf "Invalid Unicode character escape"
166 | 
167 |     type invalid =
168 |     | Leading_zeros
169 |     | Int64_overflow
170 |     | Date of date
171 |     | Hour of int
172 |     | Minute of int
173 |     | Second of int
174 | 
175 |     let pp_invalid ppf = function
176 |     | Leading_zeros -> pf ppf "Leading zeros are not allowed"
177 |     | Int64_overflow -> pf ppf "Integer overflows 64-bit integers"
178 |     | Date (y, m, d) -> pf ppf "%04d-%02d-%02d is not a valid date" y m d
179 |     | Hour i -> pf ppf "%02d is not a valid hour (00-23)" i
180 |     | Minute i -> pf ppf "%02d is not a valid minute (00-59)" i
181 |     | Second i -> pf ppf "%02d is not a valid second (00-60)" i
182 | 
183 |     type kind =
184 |     | Escape of escape
185 |     | Expected of expected list
186 |     | Invalid of invalid
187 |     | Unclosed of unclosed
188 | 
189 |     let pp_kind () ppf = function
190 |     | Escape esc -> pp_escape ppf esc
191 |     | Expected exps -> Textdec.pp_or_enum pp_expected ppf exps
192 |     | Invalid inv -> pp_invalid ppf inv
193 |     | Unclosed uncl -> pp_unclosed ppf uncl
194 | 
195 |     type t = kind * Textloc.t
196 |     let pp_prefix ppf () = Format.pp_print_string ppf "Error: "
197 |     let pp
198 |         ?(pp_loc = Textloc.pp) ?(pp_kind = pp_kind ())
199 |         ?(pp_prefix = pp_prefix) () ppf (k, l)
200 |       =
201 |       pf ppf "@[<v>%a:@,%a%a@]" pp_loc l pp_prefix () pp_kind k
202 | 
203 |     let to_string ?(pp = pp ()) = function
204 |     | Ok _ as v -> v | Error e -> Error (Format.asprintf "%a" pp e)
205 |   end
206 | 
207 |   (* Decoder *)
208 | 
209 |   type decoder =
210 |     { file : Textloc.fpath;
211 |       s : string;
212 |       nolocs : bool;
213 |       nolayout : bool;
214 |       max_pos : int; (* String.length i - 1 *)
215 |       mutable pos : int; (* current byte position. *)
216 |       mutable line_pos : Textloc.line_pos; (* line position of [pos] *)
217 |       buf : Buffer.t; }
218 | 
219 |   let decoder ?(layout = false) ?(locs = true) ?(file = Textloc.file_none) s =
220 |     let nolocs = not locs and nolayout = not layout in
221 | 
222 |     { file; s; nolocs; nolayout; max_pos = String.length s - 1;
223 |       pos = 0; line_pos = Textloc.line_pos_first;
224 |       buf = Buffer.create 255 }
225 | 
226 |   let tloc_line_span d ~first ~last =
227 |     Textloc.v ~file:d.file ~first_byte:first ~last_byte:last
228 |       ~first_line:d.line_pos ~last_line:d.line_pos
229 | 
230 |   (* Decoding errors *)
231 | 
232 |   exception Err of Error.t
233 | 
234 |   let dec_error_escape d ~first ~last esc =
235 |     let tloc = tloc_line_span d ~first ~last in
236 |     raise (Err (Escape esc, tloc))
237 | 
238 |   let dec_error_expected d ~first ?(last = first) exps =
239 |     let tloc = tloc_line_span d ~first ~last in
240 |     raise (Err (Expected exps, tloc))
241 | 
242 |   let dec_error_invalid d ~first ?(last = first) inv =
243 |     let tloc = tloc_line_span d ~first ~last in
244 |     raise (Err (Invalid inv, tloc))
245 | 
246 |   let dec_error_unclosed d ~first uncl =
247 |     let tloc = tloc_line_span d ~first ~last:d.max_pos in
248 |     raise (Err (Unclosed uncl, tloc))
249 | 
250 |   let dec_error_eoi d ~first exps =
251 |     let tloc = tloc_line_span d ~first ~last:d.max_pos in
252 |     raise (Err (Expected exps, tloc))
253 | 
254 |   (* AST *)
255 | 
256 |   (* The type structure below closely mirrors the TOML ABNF. *)
257 | 
258 |   type comment = Textloc.t
259 |   (* if non empty includes both the # and the newline *)
260 |   type ws = Textloc.t (* may include newlines if there's no comment or data. *)
261 |   type ws_comment_nl = ws * comment (* either has the newline, or none if eoi *)
262 |   type basic_string = string
263 |   type literal_string = string
264 | 
265 |   type simple_key = [`Bare | `Quoted_basic | `Quoted_literal ] * string
266 |   type key = (ws * simple_key * ws) list
267 | 
268 |   type keyval = key * ws * tval
269 |   and tval =
270 |   | String
271 |   | Boolean of bool
272 |   | Array of ws_comment_nl * tval
273 |   | Inline_table of ws * (keyval * ws) list
274 |   | Date_time of date_time
275 |   | Float of float
276 |   | Integer of int64
277 | 
278 |   type expression =
279 |   | Whitespace of ws_comment_nl
280 |   | Keyval' of keyval * ws_comment_nl
281 |   | Table' of ws * [`Std | `Array] * key * ws_comment_nl
282 | 
283 |   type toml = expression list
284 | 
285 |   (* Decoding functions *)
286 | 
287 |   let dec_ws d = (* ws *)
288 |     let rec loop s max i =
289 |       if i > max then i else match s.[i] with
290 |       | ' ' | '\t' -> loop s max (i + 1)
291 |       | _ -> i
292 |     in
293 |     let next = loop d.s d.max_pos d.pos in
294 |     let ws = tloc_line_span d ~first:d.pos ~last:(next - 1) in
295 |     d.pos <- next;
296 |     ws
297 | 
298 |   let dec_newline d ws = (* newline, first newline checked, added to ws *)
299 |     let next =
300 |       if d.s.[d.pos] = '\n' then d.pos + 1 else (* '\r' *)
301 |       let next = d.pos + 1 in
302 |       if next > d.max_pos || d.s.[next] <> '\n'
303 |       then dec_error_expected d ~first:next [Lf]
304 |       else next + 1
305 |     in
306 |     let line_pos = fst d.line_pos + 1, next in
307 |     let ws = tloc_line_span d ~first:(Textloc.first_byte ws) ~last:(next - 1) in
308 |     d.pos <- next;
309 |     d.line_pos <- line_pos;
310 |     ws
311 | 
312 |   let dec_comment_newline d = (* [comment] newline, # checked *)
313 |     let rec loop s max i =
314 |       if i > max then i else match s.[i] with
315 |       | '\n' -> i + 1
316 |       | '\r' ->
317 |           let i = i + 1 in
318 |           if i > d.max_pos || d.s.[i] <> '\n'
319 |           then dec_error_expected ~first:i d [Lf]
320 |           else i + 1
321 |       | '\x09' | '\x20' .. '\x7E' -> loop s max (i + 1)
322 |       | '\x00' .. '\x08' | '\x0A' .. '\x1F' | '\x7F' ->
323 |           dec_error_expected d ~first:i [Comment_char]
324 |       | _ ->
325 |           let udec = String.get_utf_8_uchar s i in
326 |           match Uchar.utf_decode_is_valid udec with
327 |           | true -> loop s max (i + Uchar.utf_decode_length udec)
328 |           | false -> dec_error_expected d ~first:i [Comment_char]
329 |     in
330 |     let next = loop d.s d.max_pos d.pos in
331 |     let line_pos =
332 |       if next > d.max_pos then d.line_pos else
333 |       fst d.line_pos + 1, next
334 |     in
335 |     let comment = tloc_line_span d ~first:d.pos ~last:(next - 1) in
336 |     d.pos <- next;
337 |     d.line_pos <- line_pos;
338 |     comment
339 | 
340 |   let dec_ws_comment_newline d =
341 |     if d.pos > d.max_pos then Textloc.none, Textloc.none else
342 |     let ws = dec_ws d in
343 |     if d.pos > d.max_pos then ws, Textloc.none else
344 |     match d.s.[d.pos] with
345 |     | '#' -> ws, dec_comment_newline d
346 |     | '\n' | '\r' -> dec_newline d ws, Textloc.none
347 |     | _ -> dec_error_expected d ~first:d.pos [Comment; Newline]
348 | 
349 |   let dec_table d ws = failwith "TODO"
350 | 
351 |   let dec_literal_string d = (* apostrophe checked. *)
352 |     let rec loop s max i =
353 |       if i > max then dec_error_unclosed d ~first:d.pos Literal_string else
354 |       match s.[i] with
355 |       | '\'' ->
356 |           let first = d.pos + 1 and last = i - 1 in
357 |           let s = String.sub s first (last - first + 1) in
358 |           d.pos <- i + 1;
359 |           s
360 |       | '\x09' | '\x20' .. '\x26' | '\x28' .. '\x7E' -> loop s max (i + 1)
361 |       | '\x00' .. '\x08' | '\x0A' .. '\x1F' | '\x7F' ->
362 |           dec_error_expected d ~first:i [Literal_char]
363 |       | _ ->
364 |         let udec = String.get_utf_8_uchar s i in
365 |         match Uchar.utf_decode_is_valid udec with
366 |         | true -> loop s max (i + Uchar.utf_decode_length udec)
367 |         | false -> dec_error_expected d ~first:i [Literal_char]
368 |     in
369 |     loop d.s d.max_pos (d.pos + 1)
370 | 
371 |   let rec dec_uchar_escape d ~first count u i = match count with
372 |   | 0 ->
373 |       if Uchar.is_valid u then Uchar.unsafe_of_int u, i else
374 |       dec_error_escape d ~first ~last:(i - 1) (Not_uchar_value u)
375 |   | count when i > d.max_pos ->
376 |       dec_error_escape d ~first ~last:(i - 1) Invalid_uchar_escape
377 |   | count ->
378 |       match d.s.[i] with
379 |       | '0' .. '9' as c ->
380 |           let u = u * 16 + (Char.code c - 0x30) in
381 |           dec_uchar_escape d ~first (count - 1) u (i + 1)
382 |       | 'A' .. 'F' as c ->
383 |           let u = u * 16 + (Char.code c - 0x37) in
384 |           dec_uchar_escape d ~first (count - 1) u (i + 1)
385 |       | c ->
386 |           dec_error_escape d ~first ~last:(i - 1) Invalid_uchar_escape
387 | 
388 |   let dec_basic_string_with_escapes d first_esc =
389 |     let flush b s max start i =
390 |       if start <= max then Buffer.add_substring b s start (i - start)
391 |     in
392 |     let rec loop b s max start i =
393 |       if i > max then dec_error_unclosed d ~first:d.pos Basic_string else
394 |       match s.[i] with
395 |       | '\"' ->
396 |           flush b s max start i; d.pos <- i + 1; Buffer.contents b
397 |       | '\x09' | '\x20' .. '\x21' | '\x23' .. '\x5B' | '\x5D' .. '\x7E' ->
398 |           loop b s max start (i + 1)
399 |       | '\x00' .. '\x08' | '\x0A' .. '\x1F' | '\x7F' ->
400 |           dec_error_expected d ~first:i [Basic_char]
401 |       | '\x5C' ->
402 |           flush b s max start i;
403 |           let i = i + 1 in
404 |           if i > max then dec_error_unclosed d ~first:d.pos Basic_string else
405 |           begin match s.[i] with
406 |           | '\x22' | '\x5C' | '\x62' | '\x66' | '\x6E' | '\x72' | '\x74' as c ->
407 |               let next = i + 1 in
408 |               Buffer.add_char b c; loop b s max next next
409 |           | '\x75' ->
410 |               let u, next = dec_uchar_escape d ~first:(i - 1) 4 0 i in
411 |               Buffer.add_utf_8_uchar b u; loop b s max next next
412 |           | '\x55' ->
413 |               let u, next = dec_uchar_escape d ~first:(i - 1) 8 0 i in
414 |               Buffer.add_utf_8_uchar b u; loop b s max next next
415 |           | c ->
416 |               let first = i - 1 and last = i + 1 in
417 |               dec_error_escape d ~first ~last (Not_escape_char c)
418 |           end
419 |       | _ ->
420 |         let udec = String.get_utf_8_uchar s i in
421 |         match Uchar.utf_decode_is_valid udec with
422 |         | true -> loop b s max start (i + Uchar.utf_decode_length udec)
423 |         | false -> dec_error_expected d ~first:i [Literal_char]
424 |     in
425 |     Buffer.reset d.buf;
426 |     flush d.buf d.s d.max_pos d.pos first_esc;
427 |     loop d.buf d.s d.max_pos first_esc first_esc
428 | 
429 |   let dec_basic_string d = (* quote checked *)
430 |     let rec loop s max i =
431 |       if i > max then dec_error_unclosed d ~first:d.pos Basic_string else
432 |       match s.[i] with
433 |       | '\"' ->
434 |           let first = d.pos + 1 and last = i - 1 in
435 |           let s = String.sub s first (last - first + 1) in
436 |           d.pos <- i + 1;
437 |           s
438 |       | '\x09' | '\x20' .. '\x21' | '\x23' .. '\x5B' | '\x5D' .. '\x7E' ->
439 |           loop s max (i + 1)
440 |       | '\x00' .. '\x08' | '\x0A' .. '\x1F' | '\x7F' ->
441 |           dec_error_expected d ~first:i [Basic_char]
442 |       | '\x5C' ->
443 |           dec_basic_string_with_escapes d i
444 |       | _ ->
445 |         let udec = String.get_utf_8_uchar s i in
446 |         match Uchar.utf_decode_is_valid udec with
447 |         | true -> loop s max (i + Uchar.utf_decode_length udec)
448 |         | false -> dec_error_expected d ~first:i [Literal_char]
449 |     in
450 |     loop d.s d.max_pos d.pos
451 | 
452 |   let is_unquoted_key_char = function
453 |   | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | '_' -> true | _ -> false
454 | 
455 |   let dec_unquoted_key d =
456 |     let rec loop s max i =
457 |       if i > max || not (is_unquoted_key_char s.[i]) then i else
458 |       loop s max (i + 1)
459 |     in
460 |     let next = loop d.s d.max_pos d.pos in
461 |     let first = d.pos and last = next - 1 in
462 |     let s = String.sub d.s first (last - first + 1) in
463 |     d.pos <- next;
464 |     s
465 | 
466 |   let dec_simple_key d =
467 |     if d.pos > d.max_pos then dec_error_expected d ~first:d.pos [Key] else
468 |     match d.s.[d.pos] with
469 |     | '\'' -> let s = dec_literal_string d in `Quoted_literal, s
470 |     | '\"' -> let s = dec_basic_string d in `Quoted_basic, s
471 |     | c when is_unquoted_key_char c -> `Bare, dec_unquoted_key d
472 |     | _ -> dec_error_expected d ~first:d.pos [Key]
473 | 
474 |   let dec_key d ws =
475 |     let rec loop d acc wsl =
476 |       let k = dec_simple_key d in
477 |       let wsr = dec_ws d in
478 |       let acc = (wsl, k, wsr) :: acc in
479 |       if d.pos > d.max_pos || d.s.[d.pos] <> '.' then List.rev acc else
480 |       let ws = dec_ws d in
481 |       loop d acc ws
482 |     in
483 |     loop d [] ws
484 | 
485 |   let dec_true d = (* 't' checked *)
486 |     let last = d.pos + 3 in
487 |     if last > d.max_pos then dec_error_eoi d ~first:d.pos [True] else
488 |     if d.s.[d.pos + 1] = 'r' && d.s.[d.pos + 2] = 'u' && d.s.[d.pos + 3] = 'e'
489 |     then (d.pos <- last + 1; Boolean true)
490 |     else dec_error_expected d ~first:d.pos ~last [True]
491 | 
492 |   let dec_false d = (* 'f' checked *)
493 |     let last = d.pos + 4 in
494 |     if last > d.max_pos then dec_error_eoi d ~first:d.pos [False] else
495 |     if d.s.[d.pos + 1] = 'a' && d.s.[d.pos + 2] = 'l' &&
496 |        d.s.[d.pos + 3] = 's' && d.s.[d.pos + 4] = 'e'
497 |     then (d.pos <- last + 1; Boolean false)
498 |     else dec_error_expected d ~first:d.pos ~last [False]
499 | 
500 |   let parse_float d ~first ~last =
501 |     (* [float_of_string] is compatible with what we accept. *)
502 |     try Float (float_of_string (String.sub d.s first (last - first + 1))) with
503 |     | Failure _ -> assert false
504 | 
505 |   let parse_int64 d ~first ~last =
506 |     try Integer (Int64.of_string (String.sub d.s first (last - first + 1))) with
507 |     | Failure _ ->
508 |         (* [Int64.of_string] is compatible with what our parser checked. If
509 |            we get here, it should be only because of overflow. *)
510 |         dec_error_invalid d ~first ~last Int64_overflow
511 | 
512 |   let dec_inf d ~first = (* 'i' checked *)
513 |     let last = d.pos + 2 in
514 |     if last > d.max_pos then dec_error_eoi d ~first [Inf] else
515 |     if d.s.[d.pos + 1] = 'n' && d.s.[d.pos + 2] = 'f'
516 |     then (d.pos <- last + 1; parse_float d ~first ~last)
517 |     else dec_error_expected d ~first ~last [Inf]
518 | 
519 |   let dec_nan d ~first = (* 'n' checked *)
520 |     let last = d.pos + 2 in
521 |     if last > d.max_pos then dec_error_eoi d ~first [Nan] else
522 |     if d.s.[d.pos + 1] = 'a' && d.s.[d.pos + 2] = 'n'
523 |     then (d.pos <- last + 1; parse_float d ~first ~last)
524 |     else dec_error_expected d ~first ~last [Nan]
525 | 
526 |   let rec dec_float_exp d ~first = (* 'e' checked *)
527 |     let rec loop d ~last_is_digit ~first pos =
528 |       if pos > d.max_pos then parse_float d ~first ~last:(pos - 1) else
529 |       match d.s.[pos] with
530 |       | '0' .. '9' -> loop d ~last_is_digit:true ~first (pos + 1)
531 |       | '_' when last_is_digit -> loop d ~last_is_digit:false ~first (pos + 1)
532 |       | _ when last_is_digit ->
533 |           d.pos <- pos; parse_float d ~first ~last:(pos - 1)
534 |       | _ -> dec_error_expected d ~first:pos [Dec_digit]
535 |     in
536 |     let next = d.pos + 1 in
537 |     if next > d.max_pos then dec_error_eoi d ~first [Float_exp] else
538 |     match d.s.[next] with
539 |     | '-' | '+' -> d.pos <- next; dec_float_exp d ~first
540 |     | _ ->  loop d ~last_is_digit:false ~first next
541 | 
542 |   let dec_float_frac d ~first = (* '.' checked *)
543 |     let rec loop d ~last_is_digit ~first pos =
544 |       if pos > d.max_pos then parse_float d ~first ~last:(pos - 1) else
545 |       match d.s.[pos] with
546 |       | '0' .. '9' -> loop d ~last_is_digit:true ~first (pos + 1)
547 |       | 'e' -> d.pos <- pos; dec_float_exp d ~first
548 |       | '_' when last_is_digit -> loop d ~last_is_digit:false ~first (pos + 1)
549 |       | _ when last_is_digit ->
550 |           d.pos <- pos; parse_float d ~first ~last:(pos - 1)
551 |       | _ -> dec_error_expected d ~first:pos [Dec_digit]
552 |     in
553 |     let next = d.pos + 1 in
554 |     if next > d.max_pos then dec_error_eoi d ~first [Float_frac] else
555 |     loop d ~last_is_digit:false ~first next
556 | 
557 |   let rec dec_dec_integer_or_float d ~first =
558 |     if d.pos > d.max_pos then dec_error_eoi d ~first [Integer; Float] else
559 |     match d.s.[d.pos] with
560 |     | 'i' -> dec_inf d ~first
561 |     | 'n' -> dec_nan d ~first
562 |     | '0' ->
563 |         let next = d.pos + 1 in
564 |         begin match d.s.[next] with
565 |         | '.' -> d.pos <- next; dec_float_frac d ~first
566 |         | 'e' -> d.pos <- next; dec_float_exp d ~first
567 |         | '0' .. '9' -> dec_error_invalid d ~first Leading_zeros
568 |         | _ -> Integer 0L
569 |         end
570 |     | '1' .. '9' ->
571 |         let rec loop d ~last_is_digit ~first pos =
572 |           if pos > d.max_pos then parse_int64 d ~first ~last:(pos - 1) else
573 |           match d.s.[pos] with
574 |           | '0' .. '9' -> loop d ~last_is_digit:true ~first (pos + 1)
575 |           | '.' -> d.pos <- pos; dec_float_frac d ~first
576 |           | 'e' -> d.pos <- pos; dec_float_exp d ~first
577 |           | '_' when last_is_digit ->
578 |               loop d ~last_is_digit:false ~first (pos + 1)
579 |           | _ when last_is_digit ->
580 |               d.pos <- pos; parse_int64 d ~first ~last:(pos - 1)
581 |           | _ -> dec_error_expected d ~first:pos [Dec_digit]
582 |         in
583 |         loop d ~last_is_digit:false ~first d.pos
584 |     | _ -> dec_error_expected d ~first:d.pos [Integer; Float]
585 | 
586 |   let dec_hex_integer d ~first =
587 |     if d.pos > d.max_pos then dec_error_eoi d ~first [Hex_digit] else
588 |     let rec loop d ~last_is_digit ~first pos =
589 |       if pos > d.max_pos then parse_int64 d ~first ~last:(pos - 1) else
590 |       match d.s.[pos] with
591 |       | '0' .. '9' | 'A' .. 'F' -> loop d ~last_is_digit:true ~first (pos + 1)
592 |       | '_' when last_is_digit -> loop d ~last_is_digit:false ~first (pos + 1)
593 |       | _ when last_is_digit ->
594 |           d.pos <- pos; parse_int64 d ~first ~last:(pos - 1)
595 |       | _ -> dec_error_expected d ~first:pos [Hex_digit]
596 |     in
597 |     loop d ~last_is_digit:false ~first d.pos
598 | 
599 |   let dec_oct_integer d ~first =
600 |     if d.pos > d.max_pos then dec_error_eoi d ~first [Oct_digit] else
601 |     let rec loop d ~last_is_digit ~first pos =
602 |       if pos > d.max_pos then parse_int64 d ~first ~last:(pos - 1) else
603 |       match d.s.[pos] with
604 |       | '0' .. '7' -> loop d ~last_is_digit:true ~first (pos + 1)
605 |       | '_' when last_is_digit -> loop d ~last_is_digit:false ~first (pos + 1)
606 |       | _ when last_is_digit ->
607 |           d.pos <- pos; parse_int64 d ~first ~last:(pos - 1)
608 |       | _ -> dec_error_expected d ~first:pos [Oct_digit]
609 |     in
610 |     loop d ~last_is_digit:false ~first d.pos
611 | 
612 |   let dec_bin_integer d ~first =
613 |     if d.pos > d.max_pos then dec_error_eoi d ~first [Bin_digit] else
614 |     let rec loop d ~last_is_digit ~first pos =
615 |       if pos > d.max_pos then parse_int64 d ~first ~last:(pos - 1) else
616 |       match d.s.[pos] with
617 |       | '0' .. '1' -> loop d ~last_is_digit:true ~first (pos + 1)
618 |       | '_' when last_is_digit -> loop d ~last_is_digit:false ~first (pos + 1)
619 |       | _ when last_is_digit ->
620 |           d.pos <- pos; parse_int64 d ~first ~last:(pos - 1)
621 |       | _ -> dec_error_expected d ~first:pos [Bin_digit]
622 |     in
623 |     loop d ~last_is_digit:false ~first d.pos
624 | 
625 |   let dec_integer_or_float d ~first = (* first digit checked *)
626 |     d.pos <- d.pos + 1;
627 |     if d.pos > d.max_pos then parse_int64 d ~first ~last:(d.pos - 1) else
628 |     match d.s.[d.pos] with
629 |     | 'x' -> d.pos <- d.pos + 1; dec_hex_integer d ~first
630 |     | 'o' -> d.pos <- d.pos + 1; dec_oct_integer d ~first
631 |     | 'b' -> d.pos <- d.pos + 1; dec_bin_integer d ~first
632 |     | '0' .. '9' -> dec_dec_integer_or_float d ~first
633 |     | _ -> dec_error_expected d ~first ~last:d.pos [Integer]
634 | 
635 |   let n_digits d ~n =
636 |     let rec loop d n i v =
637 |       if n = 0 then (d.pos <- i; v) else
638 |       if i > d.max_pos then dec_error_eoi d ~first:i [Dec_digit] else
639 |       match d.s.[i] with
640 |       | '0' .. '9' as c ->
641 |           loop d (n - 1) (i + 1) (v * 10 + (Char.code c - 0x30))
642 |       | _ -> dec_error_expected d ~first:i [Dec_digit]
643 |     in
644 |     loop d n d.pos 0
645 | 
646 |   let dec_char d ~c =
647 |     if d.pos > d.max_pos then dec_error_eoi d ~first:d.pos [Char c] else
648 |     if d.s.[d.pos] = c then (d.pos <- d.pos + 1) else
649 |     dec_error_expected d ~first:d.pos [Char c]
650 | 
651 |   let dec_full_date d =
652 |     let first = d.pos in
653 |     let yyyy = n_digits d ~n:4 in
654 |     dec_char d ~c:'-';
655 |     let mm = n_digits d ~n:2 in
656 |     dec_char d ~c:'-';
657 |     let dd = n_digits d ~n:2 in
658 |     let date = yyyy, mm, dd in
659 |     if is_date_valid date then date else
660 |     dec_error_invalid d ~first ~last:(d.pos - 1) (Date date)
661 | 
662 |   let dec_hour d =
663 |     let first = d.pos in
664 |     let hh = n_digits d ~n:2 in
665 |     if 00 <= hh && hh <= 23 then hh else
666 |     dec_error_invalid d ~first ~last:(d.pos - 1) (Hour hh)
667 | 
668 |   let dec_minute d =
669 |     let first = d.pos in
670 |     let mm = n_digits d ~n:2 in
671 |     if 00 <= mm && mm <= 59 then mm else
672 |     dec_error_invalid d ~first ~last:(d.pos - 1) (Minute mm)
673 | 
674 |   let dec_sec d =
675 |     let first = d.pos in
676 |     let ss = n_digits d ~n:2 in
677 |     if 00 <= ss && ss <= 60 then ss else
678 |     dec_error_invalid d ~first ~last:(d.pos - 1) (Second ss)
679 | 
680 |   let dec_secfrac d = (* . checked *)
681 |     d.pos <- d.pos + 1;
682 |     let rec loop d first i =
683 |       if i > d.max_pos ||
684 |          match d.s.[i] with '0' .. '9' -> false | _ -> true
685 |       then (float_of_string ("0." ^ String.sub d.s first (i - first)))
686 |       else loop d first (i + 1)
687 |     in
688 |     loop d d.pos d.pos
689 | 
690 |   let dec_partial_time d =
691 |     let hh = dec_hour d in
692 |     dec_char d ~c:':';
693 |     let mm = dec_minute d in
694 |     dec_char d ~c:':';
695 |     let ss = n_digits d ~n:2 in
696 |     match d.s.[d.pos] with
697 |     | '.' -> (hh, mm, ss), Some (dec_secfrac d)
698 |     | _ -> (hh, mm, ss), None
699 | 
700 |   let dec_time_offset d =
701 |     if d.pos > d.max_pos then dec_error_eoi d ~first:d.pos [Tz_offset] else
702 |     match d.s.[d.pos] with
703 |     | 'Z' -> d.pos <- d.pos + 1; 0
704 |     | '+' | '-' as sign ->
705 |         d.pos <- d.pos + 1;
706 |         let hh = dec_hour d in
707 |         dec_char d ~c:':';
708 |         let mm = dec_minute d in
709 |         (if sign = '+' then 1 else -1) * (hh * 3600 + mm * 60)
710 |     | _ ->
711 |         dec_error_eoi d ~first:d.pos [Tz_offset]
712 | 
713 |   let dec_full_time d =
714 |     let time, frac = dec_partial_time d in
715 |     time, frac, Some (dec_time_offset d)
716 | 
717 |   let dec_local_time d =
718 |     let time, frac = dec_partial_time d in
719 |     Date_time (None, Some (time, frac, None))
720 | 
721 |   let dec_date_time d =
722 |     let date = dec_full_date d in
723 |     if d.pos > d.max_pos then Date_time (Some date, None) else
724 |     match d.s.[d.pos] with
725 |     | 'T' ->
726 |         d.pos <- d.pos + 1;
727 |         let time = dec_full_time d in
728 |         Date_time (Some date, Some time)
729 |     | ' ' ->
730 |         d.pos <- d.pos + 1;
731 |         begin match d.s.[d.pos] with
732 |         | '0' .. '9' -> Date_time (Some date, Some (dec_full_time d))
733 |         | _ -> Date_time (Some date, None)
734 |         end
735 |     | _ ->
736 |         Date_time (Some date, None)
737 | 
738 |   let dec_val d =
739 |     if d.pos > d.max_pos then dec_error_expected d ~first:d.pos [Value] else
740 |     match d.s.[d.pos] with
741 |     | '+' | '-' ->
742 |         let first = d.pos in
743 |         d.pos <- d.pos + 1; dec_dec_integer_or_float d ~first
744 |     | 'i' -> dec_inf d ~first:d.pos
745 |     | 'n' -> dec_nan d ~first:d.pos
746 |     | '0' .. '9' ->
747 |         let colon_pos = d.pos + 2 in
748 |         if colon_pos > d.max_pos then dec_integer_or_float d ~first:d.pos else
749 |         if d.s.[colon_pos] = ':' then dec_local_time d else
750 |         let dash_pos = d.pos + 4 in
751 |         if dash_pos > d.max_pos then dec_integer_or_float d ~first:d.pos else
752 |         if d.s.[dash_pos] = '-' then dec_date_time d else
753 |         dec_integer_or_float d ~first:d.pos
754 |     | '\'' -> failwith "TODO"
755 |     | '\"' -> failwith "TODO"
756 |     | 't' -> dec_true d
757 |     | 'f' -> dec_false d
758 |     | '[' -> failwith "TODO"
759 |     | _ -> dec_error_expected d ~first:d.pos [Value]
760 | 
761 |   let dec_keyval_sep d =
762 |     if d.pos > d.max_pos || d.s.[d.pos] <> '='
763 |     then dec_error_expected d ~first:d.pos [Equal_char]
764 |     else dec_ws d
765 | 
766 |   let dec_keyval d ws =
767 |     let key = dec_key d ws in
768 |     let ws = dec_keyval_sep d in
769 |     let v = dec_val d in
770 |     key, ws, v
771 | 
772 |   let dec_keyval_expression d ws =
773 |     let kv = dec_keyval d ws in
774 |     Keyval' (kv, dec_ws_comment_newline d)
775 | 
776 |   let dec_expression d =
777 |     let ws = dec_ws d in
778 |     if d.pos > d.max_pos then Whitespace (ws, Textloc.none) else
779 |     match d.s.[d.pos] with
780 |     | '#' -> Whitespace (ws, dec_comment_newline d)
781 |     | '\n' | '\r' -> Whitespace (dec_newline d ws, Textloc.none)
782 |     | '[' -> dec_table d ws
783 |     | '\'' | '\"' -> dec_keyval_expression d ws
784 |     | c when is_unquoted_key_char c -> dec_keyval_expression d ws
785 |     | _ ->
786 |         let exps = Error.[Keyval; Table; Comment; Newline; Eoi] in
787 |         dec_error_expected d ~first:d.pos exps
788 | 
789 |   let dec_toml d =
790 |     let rec loop d acc =
791 |       if d.pos > d.max_pos then List.rev acc else
792 |       loop d (dec_expression d :: acc)
793 |     in
794 |     loop d []
795 | 
796 |   let of_string ?layout ?locs ?file i =
797 |     let d = decoder ?layout ?locs ?file i in
798 |     match dec_toml d with
799 |     | _ -> Ok ([], Meta.none)
800 |     | exception Err (err, loc) -> Error (err, loc)
801 | 
802 |   let of_string' ?pp_error ?file i =
803 |     Error.to_string ?pp:pp_error (of_string ?file i)
804 | 
805 |   let to_string t = failwith "TODO"
806 | 
807 | 
808 |   (* FIXME cut and paste from sexp *)
809 | 
810 |   (* Indices *)
811 | 
812 |   type index = Nth of int | Key of string
813 | 
814 |   let pp_key = Format.pp_print_string
815 |   let pp_index ?(pp_key = pp_key) () ppf = function
816 |   | Nth n -> pf ppf "[%d]" n
817 |   | Key k -> pp_key ppf k
818 | 
819 |   let pp_bracketed_index ?(pp_key = pp_key) () ppf = function
820 |   | Nth n -> pf ppf "[%d]" n
821 |   | Key k -> pf ppf "[%a]" pp_key k
822 | 
823 |   (* Paths *)
824 | 
825 |   type path = index list (* reversed *)
826 | 
827 |   let path_err i fmt = Format.kasprintf failwith ("%d: " ^^ fmt) i
828 |   let path_err_unexp_eoi i = path_err i "unexpected end of input"
829 |   let path_err_unexp_char i s = path_err i "unexpected character: %C" s.[i]
830 |   let path_err_illegal_char i s = path_err i "illegal character here: %C" s.[i]
831 |   let err_unexp i s =
832 |     path_err i "unexpected input: %S" (Textdec.string_subrange ~first:i s)
833 | 
834 |   let path_parse_eoi s i max = if i > max then () else err_unexp i s
835 |   let path_parse_index p s i max =
836 |     let first, stop = match s.[i] with '[' -> i + 1, ']' | _ -> i, '.' in
837 |     let last, next =
838 |       let rec loop stop s i max = match i > max with
839 |       | true -> if stop = ']' then path_err_unexp_eoi i else (i - 1), i
840 |       | false ->
841 |           let illegal = s.[i] = '[' || (s.[i] = ']' && stop = '.') in
842 |           if illegal then path_err_illegal_char i s else
843 |           if s.[i] <> stop then loop stop s (i + 1) max else
844 |           (i - 1), if stop = ']' then i + 1 else i
845 |       in
846 |       loop stop s first max
847 |     in
848 |     let idx = Textdec.string_subrange ~first ~last s in
849 |     if idx = "" then path_err first "illegal empty index" else
850 |     match int_of_string idx with
851 |     | exception Failure _ -> next, (Key idx) :: p
852 |     | idx -> next, (Nth idx) :: p
853 | 
854 |   let path_of_string s =
855 |     let rec loop p s i max =
856 |       if i > max then p else
857 |       let next, p = path_parse_index p s i max in
858 |       if next > max then p else
859 |       if s.[next] <> '.' then path_err_unexp_char next s else
860 |       if next + 1 <= max then loop p s (next + 1) max else
861 |       path_err_unexp_eoi next
862 |     in
863 |     try
864 |       if s = "" then Ok [] else
865 |       let start = if s.[0] = '.' then 1 else 0 in
866 |       Ok (loop [] s start (String.length s - 1))
867 |     with Failure e -> Error e
868 | 
869 |   let pp_path ?pp_key () ppf is =
870 |     let pp_sep ppf () = Format.pp_print_char ppf '.' in
871 |     Format.pp_print_list ~pp_sep (pp_index ?pp_key ()) ppf (List.rev is)
872 | 
873 |   (* Carets *)
874 | 
875 |   type caret_loc = Before | Over | After
876 |   type caret = caret_loc * path
877 | 
878 |   let caret_path (_, p) = p
879 |   let caret_of_string s =
880 |     let rec loop p s i max =
881 |       if i > max then Over, p else
882 |       let next = i + 1 in
883 |       match s.[i] with
884 |       | 'v' when next <= max && s.[next] = '[' ->
885 |           let next, p = path_parse_index p s next max in
886 |           path_parse_eoi s next max; Before, p
887 |       | c ->
888 |           let next, p = path_parse_index p s i max in
889 |           if next > max then Over, p else
890 |           if s.[next] = 'v'
891 |           then (path_parse_eoi s (next + 1) max; After, p) else
892 |           if s.[next] <> '.' then path_err_unexp_char next s else
893 |           if next + 1 <= max then loop p s (next + 1) max else
894 |           path_err_unexp_eoi next
895 |     in
896 |     try
897 |       if s = "" then Ok (Over, []) else
898 |       let start = if s.[0] = '.' then 1 else 0 in
899 |       Ok (loop [] s start (String.length s - 1))
900 |     with Failure e -> Error e
901 | 
902 |   let pp_caret ?pp_key () ppf = function
903 |   | Over, p -> (pp_path ?pp_key ()) ppf p
904 |   | Before, (c :: p) ->
905 |       (pp_path ?pp_key ()) ppf p;
906 |       (if p <> [] then Format.pp_print_char ppf '.');
907 |       Format.pp_print_char ppf 'v'; (pp_bracketed_index ?pp_key ()) ppf c
908 |   | After, (c :: p) ->
909 |       (pp_path ?pp_key ()) ppf p;
910 |       (if p <> [] then Format.pp_print_char ppf '.');
911 |       (pp_bracketed_index ?pp_key ()) ppf c; Format.pp_print_char ppf 'v'
912 |   | _ -> ()
913 | end
914 | 


--------------------------------------------------------------------------------
/src/serialkit_toml.mli:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2021 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | (** TOML support.
  7 | 
  8 |     As specified in {{:https://toml.io/en/v1.0.0}TOML v1.0.0}.
  9 | 
 10 |     Open this module to use it, this only introduces modules in your scope. *)
 11 | 
 12 | open Serialkit_text
 13 | 
 14 | (** TOML definitions and codec.
 15 | 
 16 |     {b References}
 17 |     {ul
 18 |     {- Tom Preston-Werner et al,
 19 |        {:https://toml.io/en/v1.0.0}{e TOML v1.0.0}}.}} *)
 20 | module Toml : sig
 21 | 
 22 |   (** {1:toml TOML} *)
 23 | 
 24 |   val file_ext : string
 25 |   (** [file_ext] is [".toml"]. *)
 26 | 
 27 |   val mime_type : string
 28 |   (** [mime_type] is ["application/toml"]. *)
 29 | 
 30 |   val version : string
 31 |   (** [version] is the supported TOML version. *)
 32 | 
 33 |   (** Text locations. *)
 34 |   module Textloc = Serialkit_text.Textloc
 35 | 
 36 |   (** Node metadata. *)
 37 |   module Meta = Serialkit_text.Meta
 38 | 
 39 |   type 'a node = 'a * Meta.t
 40 |   (** The type for abstract syntax tree nodes. The data of type ['a] and its
 41 |       metadata. *)
 42 | 
 43 |   type tz_offset_s = int
 44 |   (** The type for time zone offsets between local and UTC timelines
 45 |       in seconds. This is the signed difference in seconds between
 46 |       the local timeline and the UTC timeline. *)
 47 | 
 48 |   type date = int * int * int
 49 |   (** The type dates. A year, a month and a day. *)
 50 | 
 51 |   type time = (int * int * int) * float option * tz_offset_s option
 52 |   type date_time = date option * time option
 53 |   type table = (string node * value) node list
 54 | 
 55 |   and value =
 56 |   [ `Boolean of bool node
 57 |   | `Integer of int64 node
 58 |   | `Float of float node
 59 |   | `String of string node
 60 |   | `Array of value list node
 61 |   | `Date_time of date_time node
 62 |   | `Table of table node ]
 63 | 
 64 |   type t = table node
 65 |   (** The type for TOML documents. *)
 66 | 
 67 |   (** {1:fmt Formatting} *)
 68 | 
 69 |   type 'a fmt = Format.formatter -> 'a -> unit
 70 |   (** The type for formatting functions. *)
 71 | 
 72 |   val pp : t fmt
 73 |   (** [pp] formats TOML. *)
 74 | 
 75 |   val pp_layout : t fmt
 76 |   (** [pp_layout] is like [pp] but uses layout information. *)
 77 | 
 78 |   val pp_value : value fmt
 79 |   (** [pp_value] formats TOML values. *)
 80 | 
 81 |   (** {1:codec Codec} *)
 82 | 
 83 |   (** Decoding errors. *)
 84 |   module Error : sig
 85 | 
 86 |     type kind
 87 |     (** The type for kinds of decoding errors. *)
 88 | 
 89 |     val pp_kind : unit -> kind fmt
 90 |     (** [pp_error_kind ()] formats an error kind. *)
 91 | 
 92 |     type t = kind * Textloc.t
 93 |     (** The type for decoding errors. The error kind and its location in
 94 |         text. *)
 95 | 
 96 |     val pp :
 97 |       ?pp_loc:Textloc.t fmt -> ?pp_kind:kind fmt ->
 98 |       ?pp_prefix:unit fmt -> unit -> t fmt
 99 |     (** [pp ~pp_loc ~pp_error_kind ~pp_prefix ()] formats errors
100 |         using [pp_loc] (defaults to {!pp_loc}), [pp_error_kind]
101 |         (defaults to {!pp_error_kind}) and [pp_prefix] (defaults formats
102 |         ["Error: "]). *)
103 | 
104 |     val to_string : ?pp:t fmt -> ('a, t) result -> ('a, string) result
105 |     (** [to_string ~pp r] converts an error to a string using [pp]
106 |         (defaults to {!pp}). *)
107 |   end
108 | 
109 |   val of_string :
110 |     ?layout:bool -> ?locs:bool -> ?file:Textloc.fpath -> string ->
111 |     (t, Error.t) result
112 |   (** [of_string s] parses TOML from [s].
113 | 
114 |       {ul
115 |       {- [file] is the file path from which [s] is assumed to have been
116 |          read (defaults to {!Textloc.none}).}
117 |       {- If [locs] is [true] (default) locations are stored in nodes of the
118 |          abstract syntax tree in {{!Meta.id}individually identifified} {!Meta.t}
119 |          values. If [false] node meta values are all {!Meta.none} whose text
120 |          location is {!Textloc.none}.}
121 |       {- If [layout] is [false] (default) layout values cannot be relied
122 |          and do not in general represent source layout.}}
123 | 
124 |       The parser has the following limitations.
125 | 
126 |       {ul
127 |       {- The ranges of date and time {e fields} are checked but
128 |          dates are not checked for validity. Use your favourite
129 |          date-time library to validate them.}}
130 | 
131 |       {b Note.} All OCaml strings returned by this function are UTF-8
132 |       encoded. *)
133 | 
134 |   val of_string' :
135 |     ?pp_error:Error.t fmt -> ?file:Textloc.fpath -> string ->
136 |     (t, string) result
137 |   (** [of_string'] is [of_string] composed with {!error_to_string}. *)
138 | 
139 |   val to_string : t -> string
140 |   (** [to_string t] is [t] as TOML.
141 | 
142 |       {b Warning.} Assumes all OCaml strings in [t] are UTF-8 encoded. *)
143 | 
144 |   (** {1:toml_index TOML indices} *)
145 | 
146 |   type index =
147 |   | Nth of int (** *)
148 |   | Key of string (** *)
149 |   (** The type for TOML indexing operations.
150 |       {ul
151 |       {- [Nth n], lookup zero-based element [n] in a list. If [n] is
152 |          negative, counts the number of elements from the end of the
153 |          list, i.e. [-1] is the last list element.}
154 |       {- [Key k], lookup binding [k] in an s-expression
155 |          {{!sexp_dict}dictionary.}}} *)
156 | 
157 |   val pp_key : string fmt
158 |   (** [pp_key] formats a key, this is {!Format.pp_print_string}. *)
159 | 
160 |   val pp_index : ?pp_key:string fmt -> unit -> index fmt
161 |   (** [pp_index] formats indices. Keys are unbracketed and formatted
162 |       with [pp_key], defaults to {!pp_key}. *)
163 | 
164 |   (** {1:toml_path TOML paths} *)
165 | 
166 |   type path = index list
167 |   (** The type for paths, a sequence of indexing operations in {b reverse}
168 |       order. *)
169 | 
170 |   val path_of_string : string -> (path, string) result
171 |   (** [path_of_string] parses a path from [s] according to the syntax
172 |       {{!sexp_path_caret}given here}. *)
173 | 
174 |   val pp_path : ?pp_key:string fmt -> unit -> path fmt
175 |   (** [pp_path ?pp_key ()] is a formatter for paths using [pp_key] to
176 |       format keys (defaults to {!pp_key}). *)
177 | 
178 |   (** {1:carets Carets} *)
179 | 
180 |   type caret_loc =
181 |   | Before (** The void before the TOML found by the path. *)
182 |   | Over  (** The TOML found by the path. *)
183 |   | After (** The void after the TOML found by the path. *)
184 |   (** The type for caret locations. *)
185 | 
186 |   type caret = caret_loc * path
187 |   (** The type for carets. A caret location and the path at which it
188 |       applies. *)
189 | 
190 |   val caret_of_string : string -> (caret, string) result
191 |   (** [caret_of_string s] parses a caret from [s] according to the
192 |       syntax {{!sexp_path_caret}given here}. *)
193 | 
194 |   val pp_caret : ?pp_key:string fmt -> unit -> caret fmt
195 |   (** [pp_caret ?pp_key ()] is a formatter for carets using [pp_key]
196 |       to format keys (defaults to {!pp_key}). *)
197 | end
198 | 


--------------------------------------------------------------------------------
/test/expect/test.sexp:
--------------------------------------------------------------------------------
 1 | ;;
 2 | 
 3 | testabc adf
 4 | (goet-list' bla bli blo blu)
 5 | (hey
 6 |   gluu
 7 |   (hi blu))
 8 | 
 9 | (get-atom atom)
10 | 
11 | (l0
12 |   (l00 a b c d)
13 |   (l00)
14 |   (l01 a b c d)
15 |   (l02 a
16 |        (l03 bla bli) ; Hahahahaha
17 |       );asdf
18 |       ;
19 | )
20 | 
21 | ;; asdf
22 | 


--------------------------------------------------------------------------------
/test/expect/test.toml:
--------------------------------------------------------------------------------
 1 | # This is a TOML document
 2 | 
 3 | title = "TOML Example"
 4 | 
 5 | [owner]
 6 | name = "Tom Preston-Werner"
 7 | dob = 1979-05-27T07:32:00-08:00
 8 | 
 9 | [database]
10 | enabled = true
11 | ports = [ 8000, 8001, 8002 ]
12 | data = [ ["delta", "phi"], [3.14] ]
13 | temp_targets = { cpu = 79.5, case = 72.0 }
14 | 
15 | [servers]
16 | 
17 | [servers.alpha]
18 | ip = "10.0.0.1"
19 | role = "frontend"
20 | 
21 | [servers.beta]
22 | ip = "10.0.0.2"
23 | role = "backend"
24 | 


--------------------------------------------------------------------------------
/test/test.ml:
--------------------------------------------------------------------------------
1 | (*---------------------------------------------------------------------------
2 |    Copyright (c) 2019 The serialkit programmers. All rights reserved.
3 |    SPDX-License-Identifier: ISC
4 |   ---------------------------------------------------------------------------*)
5 | 


--------------------------------------------------------------------------------
/test/test_toml.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2021 The serialkit programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | let log fmt = Format.kfprintf (Fun.const ()) Format.std_formatter (fmt ^^ "@.")
 7 | let log_fail pp ~exp ~fnd =
 8 |   log "@[<v3>Expected: @[%a@]@,Found: @[%a@]@]" pp exp pp fnd
 9 | 
10 | let main () =
11 |   log "All tests succeeded."
12 | 
13 | let () = if !Sys.interactive then () else main ()
14 | 


--------------------------------------------------------------------------------
/tool/cmd_main.ml:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2023 The cmarkit programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | open Std
 7 | open Cmdliner
 8 | 
 9 | let cmds = [ Cmd_sexp.v; Cmd_toml.v; ]
10 | 
11 | let serialkit =
12 |   let doc = "Process serialization formats" in
13 |   let exits = Exit.exits_with_err_diff in
14 |   let man = [
15 |     `S Manpage.s_description;
16 |     `P "$(mname) process various serialization formats";
17 |     `Blocks Cli.common_man; ]
18 |   in
19 |   Cmd.group (Cmd.info "serialkit" ~version:"%%VERSION%%" ~doc ~exits ~man) @@
20 |   cmds
21 | 
22 | let main () = exit (Cmd.eval' serialkit)
23 | let () = if !Sys.interactive then () else main ()
24 | 


--------------------------------------------------------------------------------
/tool/cmd_main.mli:
--------------------------------------------------------------------------------
1 | (*---------------------------------------------------------------------------
2 |    Copyright (c) 2023 The cmarkit programmers. All rights reserved.
3 |    SPDX-License-Identifier: ISC
4 |   ---------------------------------------------------------------------------*)
5 | 


--------------------------------------------------------------------------------
/tool/cmd_sexp.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2019 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Std
  7 | open Serialkit_sexp
  8 | 
  9 | let err_file = 1
 10 | let err_sexp = 2
 11 | let err_query = 3
 12 | 
 13 | let exec = Filename.basename Sys.executable_name
 14 | let log_err fmt =
 15 |   Format.fprintf Format.err_formatter ("%s: @[" ^^ fmt ^^ "@]@.") exec
 16 | 
 17 | let log_on_error ~exit:code r f = match r with
 18 | | Error e -> log_err "%s" e; code | Ok v -> f v
 19 | 
 20 | let delete file path =
 21 |   let query = Sexpq.delete_at_path ~must_exist:false path in
 22 |   log_on_error ~exit:err_file (Os.read_file file) @@ fun content ->
 23 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' ~file content) @@ fun sexp ->
 24 |   log_on_error ~exit:err_query (Sexpq.query' query sexp) @@ fun sexp ->
 25 |   Format.printf "@[%a@]" Sexp.pp_seq_layout sexp; 0
 26 | 
 27 | let get file path =
 28 |   log_on_error ~exit:err_file (Os.read_file file) @@ fun content ->
 29 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' ~file content) @@ fun sexp ->
 30 |   match path with
 31 |   | None -> Format.printf "@[%a@]" Sexp.pp_seq_layout sexp; 0
 32 |   | Some path ->
 33 |       let query = Sexpq.path path Sexpq.sexp in
 34 |       log_on_error ~exit:err_query (Sexpq.query' query sexp) @@ function
 35 |       | `A (a, _) | `L ([`A (a, _)], _) -> Format.printf "%s@." a; 0
 36 |       | `L _ as l -> Format.printf "@[%a@]@." Sexp.pp l; 0
 37 | 
 38 | let locs file =
 39 |   let pp_loc ppf l =
 40 |     Serialkit_text.Textloc.pp ppf l; Format.pp_print_char ppf ':'
 41 |   in
 42 |   let rec pp_locs ppf = function
 43 |   | `A (_, _) as s -> pp_loc ppf (Sexp.loc s)
 44 |   | `L (vs, _) as s ->
 45 |       pp_loc ppf (Sexp.loc s); Format.pp_print_cut ppf ();
 46 |       Format.pp_print_list pp_locs ppf vs
 47 |   in
 48 |   log_on_error ~exit:err_file (Os.read_file file) @@ fun content ->
 49 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' ~file content) @@ fun sexp ->
 50 |   Format.printf "@[<v>%a@]@." pp_locs sexp; 0
 51 | 
 52 | let set file caret v =
 53 |   log_on_error ~exit:err_file (Os.read_file file) @@ fun content ->
 54 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' ~file content) @@ fun sexp ->
 55 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' v) @@ fun v ->
 56 |   let query = Sexpq.splice_at_caret ~must_exist:false caret ~rep:v in
 57 |   log_on_error ~exit:err_query (Sexpq.query' query sexp) @@ fun sexp ->
 58 |   Format.printf "@[%a@]" Sexp.pp_seq_layout sexp; 0
 59 | 
 60 | (* Command line interface *)
 61 | 
 62 | open Cmdliner
 63 | 
 64 | let file_arg =
 65 |   let doc = "$(docv) is the s-expression file. Use $(b,-) for stdin." in
 66 |   Arg.(required & pos 0 (some string) None & info [] ~doc ~docv:"FILE")
 67 | 
 68 | let path_arg =
 69 |   let parse s = match Sexp.path_of_string s with
 70 |   | Ok _ as v -> v | Error e -> Error (`Msg e)
 71 |   in
 72 |   Arg.conv ~docv:"SPATH" (parse, Sexp.pp_path ())
 73 | 
 74 | let caret_arg =
 75 |   let parse s = match Sexp.caret_of_string s with
 76 |   | Ok _ as v -> v | Error e -> Error (`Msg e)
 77 |   in
 78 |   Arg.conv ~docv:"CARET" (parse, Sexp.pp_caret ())
 79 | 
 80 | let exits =
 81 |   Cmd.Exit.info err_file ~doc:"on file read errors" ::
 82 |   Cmd.Exit.info err_sexp ~doc:"on s-expression parse errors" ::
 83 |   Cmd.Exit.info err_query ~doc:"on path query errors" ::
 84 |   Cmd.Exit.defaults
 85 | 
 86 | let common_man =
 87 |   [ `S Manpage.s_bugs;
 88 |     `P "This program is distributed with the serialk OCaml library. \
 89 |         See $(i,%%PKG_HOMEPAGE%%) for contact information."; ]
 90 | 
 91 | let delete_cmd =
 92 |   let doc = "Delete an s-expression path" in
 93 |   let sdocs = Manpage.s_common_options in
 94 |   let man = [
 95 |     `S Manpage.s_description;
 96 |     `P "$(tname) returns the value of an s-expression file key.";
 97 |     `S Manpage.s_examples;
 98 |     `P "TODO";
 99 |     `Blocks common_man; ]
100 |   in
101 |   let path_arg =
102 |     let doc = "Delete s-expression path $(docv)." in
103 |     Arg.(required & pos 1 (some path_arg) None & info [] ~doc ~docv:"SPATH")
104 |   in
105 |   Cmd.v (Cmd.info "delete" ~doc ~sdocs ~exits ~man)
106 |     Term.(const delete $ file_arg $ path_arg)
107 | 
108 | let get_cmd, get_term =
109 |   let doc = "Extract an s-expression path (default)" in
110 |   let sdocs = Manpage.s_common_options in
111 |   let man = [
112 |     `S Manpage.s_description;
113 |     `P "$(tname) returns the value of an s-expression file key.";
114 |     `S Manpage.s_examples;
115 |     `P "TODO";
116 |     `Blocks common_man; ]
117 |   in
118 |   let key_opt_arg =
119 |     let doc = "Extract s-expression path $(docv)." in
120 |     Arg.(value & pos 1 (some path_arg) None & info [] ~doc ~docv:"SPATH")
121 |   in
122 |   let term = Term.(const get $ file_arg $ key_opt_arg) in
123 |   Cmd.v (Cmd.info "get" ~doc ~sdocs ~exits ~man) term, term
124 | 
125 | let set_cmd =
126 |   let doc = "Edit an s-expression path" in
127 |   let sdocs = Manpage.s_common_options in
128 |   let man = [
129 |     `S Manpage.s_description;
130 |     `P "$(tname) sets the value of an s-expression file key.";
131 |     `S Manpage.s_examples;
132 |     `P "TODO";
133 |     `Blocks common_man; ]
134 |   in
135 |   let caret_arg =
136 |     let doc = "Set caret $(docv)." in
137 |     Arg.(required & pos 1 (some caret_arg) None & info [] ~doc ~docv:"CARET")
138 |   in
139 |   let sexp =
140 |     let doc = "$(docv) to insert or substitute" in
141 |     Arg.(required & pos 2 (some string) None & info [] ~doc ~docv:"SEXP")
142 |   in
143 |   Cmd.v (Cmd.info "set" ~doc ~sdocs ~exits ~man)
144 |     Term.(const set $ file_arg $ caret_arg $ sexp)
145 | 
146 | let locs_cmd =
147 |   let doc = "Show s-expression parse locations" in
148 |   let sdocs = Manpage.s_common_options in
149 |   let man = [
150 |     `S Manpage.s_description;
151 |     `P "$(tname) outputs s-expression parse locations.";
152 |     `Blocks common_man; ]
153 |   in
154 |   Cmd.v (Cmd.info "locs" ~doc ~sdocs ~exits ~man)
155 |     Term.(const locs $ file_arg)
156 | 
157 | let v =
158 |   let doc = "Process s-expressions" in
159 |   let sdocs = Manpage.s_common_options in
160 |   let man = [
161 |     `S Manpage.s_description;
162 |     `P "$(mname) edits s-expression files";
163 |     `S Manpage.s_examples;
164 |     `S Manpage.s_bugs;
165 |     `P "This program is distributed with the serialk OCaml library. \
166 |         See $(i,%%PKG_HOMEPAGE%%) for contact information."; ]
167 |   in
168 |   Cmd.group
169 |     (Cmd.info "sexp" ~version:"%%VERSION%%" ~doc ~sdocs ~exits ~man)
170 |     ~default:get_term [get_cmd; delete_cmd; locs_cmd; set_cmd;]
171 | 


--------------------------------------------------------------------------------
/tool/cmd_sexp.mli:
--------------------------------------------------------------------------------
1 | (*---------------------------------------------------------------------------
2 |    Copyright (c) 2023 The cmarkit programmers. All rights reserved.
3 |    SPDX-License-Identifier: ISC
4 |   ---------------------------------------------------------------------------*)
5 | 
6 | val v : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t
7 | (** [v] is the command line for [commonmark]. *)
8 | 


--------------------------------------------------------------------------------
/tool/cmd_toml.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2021 The serialkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | open Std
  7 | open Serialkit_toml
  8 | 
  9 | let err_file = 1
 10 | let err_toml = 2
 11 | let err_query = 3
 12 | 
 13 | let exec = Filename.basename Sys.executable_name
 14 | let log_err fmt =
 15 |   Format.fprintf Format.err_formatter ("%s: @[" ^^ fmt ^^ "@]@.") exec
 16 | 
 17 | let log_on_error ~exit:code r f = match r with
 18 | | Error e -> log_err "%s" e; code | Ok v -> f v
 19 | 
 20 | let delete file path =
 21 |   failwith "TODO"
 22 | (*
 23 |   let query = Sexpq.delete_at_path ~must_exist:false path in
 24 |   log_on_error ~exit:err_file (File.read file) @@ fun content ->
 25 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' ~file content) @@ fun sexp ->
 26 |   log_on_error ~exit:err_query (Sexpq.query' query sexp) @@ fun sexp ->
 27 |   Format.printf "@[%a@]" Sexp.pp_seq_layout sexp; 0
 28 | *)
 29 | 
 30 | let get file path =
 31 |   log_on_error ~exit:err_file (Os.read_file file) @@ fun content ->
 32 |   log_on_error ~exit:err_toml (Toml.of_string' ~file content) @@ fun toml ->
 33 |   match path with
 34 |   | None -> Format.printf "@[%a@]" Toml.pp_layout toml; 0
 35 |   | Some path ->
 36 |       failwith "TODO"
 37 | (*
 38 |       let query = Sexpq.path path Sexpq.sexp in
 39 |       log_on_error ~exit:err_query (Sexpq.query' query sexp) @@ function
 40 |       | `A (a, _) | `L ([`A (a, _)], _) -> Format.printf "%s@." a; 0
 41 |       | `L _ as l -> Format.printf "@[%a@]@." Sexp.pp l; 0
 42 | *)
 43 | 
 44 | let locs file =
 45 |   failwith "TODO"
 46 | (*
 47 |   let pp_loc ppf l = Serialk_text.Tloc.pp ppf l; Format.pp_print_char ppf ':' in
 48 |   let rec pp_locs ppf = function
 49 |   | `A (_, _) as s -> pp_loc ppf (Sexp.loc s)
 50 |   | `L (vs, _) as s ->
 51 |       pp_loc ppf (Sexp.loc s); Format.pp_print_cut ppf ();
 52 |       Format.pp_print_list pp_locs ppf vs
 53 |   in
 54 |   log_on_error ~exit:err_file (File.read file) @@ fun content ->
 55 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' ~file content) @@ fun sexp ->
 56 |   Format.printf "@[<v>%a@]@." pp_locs sexp; 0
 57 | *)
 58 | 
 59 | let set file caret v =
 60 |   failwith "TODO"
 61 | (*
 62 |   log_on_error ~exit:err_file (File.read file) @@ fun content ->
 63 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' ~file content) @@ fun sexp ->
 64 |   log_on_error ~exit:err_sexp (Sexp.seq_of_string' v) @@ fun v ->
 65 |   let query = Sexpq.splice_at_caret ~must_exist:false caret ~rep:v in
 66 |   log_on_error ~exit:err_query (Sexpq.query' query sexp) @@ fun sexp ->
 67 |   Format.printf "@[%a@]" Sexp.pp_seq_layout sexp; 0
 68 | *)
 69 | 
 70 | (* Command line interface *)
 71 | 
 72 | open Cmdliner
 73 | 
 74 | let file_arg =
 75 |   let doc = "$(docv) is the TOML file. Use $(b,-) for stdin." in
 76 |   Arg.(required & pos 0 (some string) None & info [] ~doc ~docv:"FILE")
 77 | 
 78 | let path_arg =
 79 |   let parse s = match Toml.path_of_string s with
 80 |   | Ok _ as v -> v | Error e -> Error (`Msg e)
 81 |   in
 82 |   Arg.conv ~docv:"TPATH" (parse, Toml.pp_path ())
 83 | 
 84 | let caret_arg =
 85 |   let parse s = match Toml.caret_of_string s with
 86 |   | Ok _ as v -> v | Error e -> Error (`Msg e)
 87 |   in
 88 |   Arg.conv ~docv:"CARET" (parse, Toml.pp_caret ())
 89 | 
 90 | let exits =
 91 |   Cmd.Exit.info err_file ~doc:"on file read errors" ::
 92 |   Cmd.Exit.info err_toml ~doc:"on TOML parse errors" ::
 93 |   Cmd.Exit.info err_query ~doc:"on path query errors" ::
 94 |   Cmd.Exit.defaults
 95 | 
 96 | let common_man =
 97 |   [ `S Manpage.s_bugs;
 98 |     `P "This program is distributed with the serialk OCaml library.
 99 |      See $(i,%%PKG_HOMEPAGE%%) for contact information."; ]
100 | 
101 | let delete_cmd =
102 |   let doc = "Delete an s-expression path" in
103 |   let sdocs = Manpage.s_common_options in
104 |   let man = [
105 |     `S Manpage.s_description;
106 |     `P "$(tname) returns the value of an s-expression file key.";
107 |     `S Manpage.s_examples;
108 |     `P "TODO";
109 |     `Blocks common_man; ]
110 |   in
111 |   let path_arg =
112 |     let doc = "Delete s-expression path $(docv)." in
113 |     Arg.(required & pos 1 (some path_arg) None & info [] ~doc ~docv:"SPATH")
114 |   in
115 |   Cmd.v (Cmd.info "delete" ~doc ~sdocs ~exits ~man) @@
116 |   Term.(const delete $ file_arg $ path_arg)
117 | 
118 | let get_cmd =
119 |   let doc = "Extract a TOML path (default)" in
120 |   let sdocs = Manpage.s_common_options in
121 |   let man = [
122 |     `S Manpage.s_description;
123 |     `P "$(tname) returns the value of a TOML file key.";
124 |     `S Manpage.s_examples;
125 |     `P "TODO";
126 |     `Blocks common_man; ]
127 |   in
128 |   let key_opt_arg =
129 |     let doc = "Extract TOML path $(docv)." in
130 |     Arg.(value & pos 1 (some path_arg) None & info [] ~doc ~docv:"SPATH")
131 |   in
132 |   Cmd.v (Cmd.info "get" ~doc ~sdocs ~exits ~man) @@
133 |   Term.(const get $ file_arg $ key_opt_arg)
134 | 
135 | let set_cmd =
136 |   let doc = "Edit a TOML file key" in
137 |   let sdocs = Manpage.s_common_options in
138 |   let man = [
139 |     `S Manpage.s_description;
140 |     `P "$(tname) sets the value of an TOML file key.";
141 |     `S Manpage.s_examples;
142 |     `P "TODO";
143 |     `Blocks common_man; ]
144 |   in
145 |   let caret_arg =
146 |     let doc = "Set caret $(docv)." in
147 |     Arg.(required & pos 1 (some caret_arg) None & info [] ~doc ~docv:"CARET")
148 |   in
149 |   let toml =
150 |     let doc = "$(docv) to insert or substitute" in
151 |     Arg.(required & pos 2 (some string) None & info [] ~doc ~docv:"TOML")
152 |   in
153 |   Cmd.v (Cmd.info "set" ~doc ~sdocs ~exits ~man) @@
154 |   Term.(const set $ file_arg $ caret_arg $ toml)
155 | 
156 | let locs_cmd =
157 |   let doc = "Show TOML parse locations" in
158 |   let sdocs = Manpage.s_common_options in
159 |   let man = [
160 |     `S Manpage.s_description;
161 |     `P "$(tname) outputs TOML parse locations.";
162 |     `Blocks common_man; ]
163 |   in
164 |   Cmd.v (Cmd.info "locs" ~doc ~sdocs ~exits ~man) @@
165 |   Term.(const locs $ file_arg)
166 | 
167 | let cmds = [get_cmd; delete_cmd; locs_cmd; set_cmd;]
168 | let v =
169 |   let doc = "Process TOML" in
170 |   let sdocs = Manpage.s_common_options in
171 |   let man = [
172 |     `S Manpage.s_description;
173 |     `P "$(mname) edits TOML files";
174 |     `S Manpage.s_examples;
175 |     `S Manpage.s_bugs;
176 |     `P "This program is distributed with the serialk OCaml library.
177 |         See $(i,%%PKG_HOMEPAGE%%) for contact information."; ]
178 |   in
179 |   Cmd.group
180 |     (Cmd.info "toml" ~version:"%%VERSION%%" ~doc ~sdocs ~exits ~man) cmds
181 | 


--------------------------------------------------------------------------------
/tool/cmd_toml.mli:
--------------------------------------------------------------------------------
1 | (*---------------------------------------------------------------------------
2 |    Copyright (c) 2023 The cmarkit programmers. All rights reserved.
3 |    SPDX-License-Identifier: ISC
4 |   ---------------------------------------------------------------------------*)
5 | 
6 | val v : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t
7 | (** [v] is the command line for [toml]. *)
8 | 


--------------------------------------------------------------------------------
/tool/std.ml:
--------------------------------------------------------------------------------
  1 | (*---------------------------------------------------------------------------
  2 |    Copyright (c) 2023 The cmarkit programmers. All rights reserved.
  3 |    SPDX-License-Identifier: ISC
  4 |   ---------------------------------------------------------------------------*)
  5 | 
  6 | type fpath = string
  7 | 
  8 | module Result = struct
  9 |   include Result
 10 |   let error_to_failure = function Ok v -> v | Error err -> failwith err
 11 |   module Syntax = struct
 12 |     let ( let* ) = Result.bind
 13 |   end
 14 | end
 15 | 
 16 | module Log = struct
 17 |   let exec = Filename.basename Sys.executable_name
 18 | 
 19 |   let err fmt =
 20 |     Format.fprintf Format.err_formatter ("%s: @[" ^^ fmt ^^ "@]@.") exec
 21 | 
 22 |   let warn fmt =
 23 |     Format.fprintf Format.err_formatter ("@[" ^^ fmt ^^ "@]@.")
 24 | 
 25 |   let on_error ~use r f = match r with
 26 |   | Ok v -> f v | Error e -> err "%s" e; use
 27 | end
 28 | 
 29 | module Os = struct
 30 | 
 31 |   (* Emulate B0_std.Os functionality to eschew the dep *)
 32 | 
 33 |   let read_file file =
 34 |     try
 35 |       let ic = if file = "-" then stdin else open_in_bin file in
 36 |       let finally () = if file = "-" then () else close_in_noerr ic in
 37 |       Fun.protect ~finally @@ fun () -> Ok (In_channel.input_all ic)
 38 |     with
 39 |     | Sys_error err -> Error err
 40 | 
 41 |   let write_file file s =
 42 |     try
 43 |       let oc = if file = "-" then stdout else open_out_bin file in
 44 |       let finally () = if file = "-" then () else close_out_noerr oc in
 45 |       Fun.protect ~finally @@ fun () -> Ok (Out_channel.output_string oc s)
 46 |     with
 47 |     | Sys_error err -> Error err
 48 | 
 49 |   let with_tmp_dir f =
 50 |     try
 51 |       let tmpdir =
 52 |         let file = Filename.temp_file "cmarkit" "dir" in
 53 |         (Sys.remove file; Sys.mkdir file 0o700; file)
 54 |       in
 55 |       let finally () = try Sys.rmdir tmpdir with Sys_error _ -> () in
 56 |       Fun.protect ~finally @@ fun () -> Ok (f tmpdir)
 57 |     with
 58 |     | Sys_error err -> Error ("Making temporary dir: " ^ err)
 59 | 
 60 |   let with_cwd cwd f =
 61 |     try
 62 |       let curr = Sys.getcwd () in
 63 |       let () = Sys.chdir cwd in
 64 |       let finally () = try Sys.chdir curr with Sys_error _ -> () in
 65 |       Fun.protect ~finally @@ fun () -> Ok (f ())
 66 |     with
 67 |     | Sys_error err -> Error ("With cwd: " ^ err)
 68 | end
 69 | 
 70 | module Exit = struct
 71 |   open Cmdliner
 72 | 
 73 |   type code = Cmdliner.Cmd.Exit.code
 74 |   let err_file = 1
 75 |   let err_diff = 2
 76 | 
 77 |   let exits =
 78 |     Cmd.Exit.info err_file ~doc:"on file read errors." ::
 79 |     Cmd.Exit.defaults
 80 | 
 81 |   let exits_with_err_diff =
 82 |     Cmd.Exit.info err_diff ~doc:"on render differences." :: exits
 83 | end
 84 | 
 85 | let process_files f files =
 86 |   let rec loop = function
 87 |   | [] -> 0
 88 |   | file :: files ->
 89 |       Log.on_error ~use:Exit.err_file (Os.read_file file) @@ fun content ->
 90 |       f ~file content; loop files
 91 |   in
 92 |   loop files
 93 | 
 94 | module Cli = struct
 95 |   open Cmdliner
 96 | 
 97 |   let common_man =
 98 |     [ `S Manpage.s_bugs;
 99 |       `P "This program is distributed with the $(b,cmarkit) OCaml library. \
100 |           See $(i,https://erratique.ch/software/cmarkit) for contact \
101 |           information.";
102 |       `S Manpage.s_see_also;
103 |       `P "More information about the renderers can be found in the \
104 |           documentation of the $(b,cmarkit) OCaml library. Consult \
105 |           $(b,odig doc cmarkit) or the online documentation." ]
106 | end
107 | 


--------------------------------------------------------------------------------
/tool/std.mli:
--------------------------------------------------------------------------------
 1 | (*---------------------------------------------------------------------------
 2 |    Copyright (c) 2023 The cmarkit programmers. All rights reserved.
 3 |    SPDX-License-Identifier: ISC
 4 |   ---------------------------------------------------------------------------*)
 5 | 
 6 | type fpath = string
 7 | 
 8 | module Result : sig
 9 |   include module type of Result
10 |   val error_to_failure : ('a, string) result -> 'a
11 | 
12 |   module Syntax : sig
13 |     val (let*) : ('a, 'e) result -> ('a -> ('b, 'e) result) -> ('b, 'e) result
14 |   end
15 | end
16 | 
17 | module Log : sig
18 |   val err : ('a, Format.formatter, unit, unit) format4 -> 'a
19 |   val warn : ('a, Format.formatter, unit, unit) format4 -> 'a
20 |   val on_error : use:'a -> ('b, string) result -> ('b -> 'a) -> 'a
21 | end
22 | 
23 | module Os : sig
24 |   val read_file : fpath -> (string, string) result
25 |   val write_file : fpath -> string -> (unit, string) result
26 |   val with_tmp_dir : (fpath -> 'a) -> ('a, string) result
27 |   val with_cwd : fpath -> (unit -> 'a) -> ('a, string) result
28 | end
29 | 
30 | module Exit : sig
31 |   type code = Cmdliner.Cmd.Exit.code
32 |   val err_file : code
33 |   val err_diff : code
34 |   val exits : Cmdliner.Cmd.Exit.info list
35 |   val exits_with_err_diff : Cmdliner.Cmd.Exit.info list
36 | end
37 | 
38 | val process_files : (file:fpath -> string -> 'a) -> string list -> Exit.code
39 | 
40 | module Cli : sig
41 |   open Cmdliner
42 |   val common_man : Manpage.block list
43 | end
44 | 


--------------------------------------------------------------------------------