├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── .ocamlformat
├── CHANGES.md
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── doc
    ├── README.md
    ├── build.sh
    ├── contributing.mld
    ├── dune
    └── index.mld
├── dune
├── dune-project
├── odoc-parser.opam
├── odoc-parser.opam.template
├── package.json
├── src
    ├── ast.ml
    ├── compat.ml
    ├── compat.mli
    ├── dune
    ├── lexer.mli
    ├── lexer.mll
    ├── loc.ml
    ├── loc.mli
    ├── odoc_parser.ml
    ├── odoc_parser.mli
    ├── parse_error.ml
    ├── syntax.ml
    ├── syntax.mli
    ├── token.ml
    └── warning.ml
└── test
    ├── dune
    ├── test.ml
    ├── test.mli
    └── test_tables.ml


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: "Build"
 2 | 
 3 | on:
 4 |   - push
 5 |   - pull_request
 6 | 
 7 | jobs:
 8 |   build: # Check build on various OSes
 9 | 
10 |     strategy:
11 |       matrix:
12 |         os:
13 |           - macos-latest
14 |           - ubuntu-latest
15 |           # - windows-latest Windows doesn't work yet
16 |         ocaml-version:
17 |           # Don't include every versions. OCaml-CI already covers that
18 |           - 4.11.1
19 |         include:
20 |           - os: ubuntu-latest # Enable coverage only on a single build
21 |             send-coverage: true
22 |       fail-fast: false
23 | 
24 |     runs-on: ${{ matrix.os }}
25 | 
26 |     steps:
27 |       # Clone the project
28 |       - uses: actions/checkout@v2
29 | 
30 |       # Setup
31 |       - name: Setup OCaml ${{ matrix.ocaml-version }}
32 |         uses: ocaml/setup-ocaml@v2
33 |         with:
34 |           ocaml-compiler: ${{ matrix.ocaml-version }}
35 |           opam-local-packages: |
36 |             odoc-parser.opam
37 | 
38 |       - name: Install dependencies
39 |         run: opam install -y --deps-only -t ./odoc-parser.opam
40 | 
41 |       - name: dune runtest
42 |         run: opam exec -- dune runtest
43 | 
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dune output.
 2 | _build
 3 | *.install
 4 | *.merlin
 5 | 
 6 | # Editor config
 7 | .vscode
 8 | 
 9 | # Bisect_ppx output.
10 | _coverage/
11 | 
12 | # For local experiments.
13 | scratch/
14 | 
15 | # opam v2
16 | _opam/
17 | 
18 | # Esy outputs
19 | _release
20 | _esy
21 | 
22 | # Generated docs
23 | docs/
24 | 
25 | 


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
1 | version=0.22.4
2 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | 2.0.0
 2 | -----
 3 | 
 4 | - New inline and display math markup (@giltho, #5)
 5 | 
 6 | 1.0.1
 7 | -----
 8 | 
 9 | - OCaml 5.0 support (@talex5, #6)
10 | 
11 | 1.0.0
12 | -----
13 | 
14 | - New syntax to allow associating metadata with code blocks 
15 |   (@Julow, #2, #3)
16 | 
17 | 0.9.0
18 | -----
19 | 
20 | - Extracted from odoc repository
21 | 
22 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## Building and testing
 4 | 
 5 | `odoc-parser` uses [dune](https://dune.build/) to build and run tests. We use
 6 | inline expect tests with [ppx_expect](https://github.com/janestreet/ppx_expect) -
 7 | see the [test file](test/test.ml).
 8 | 
 9 | We require that the parser builds and tests pass on all versions of the OCaml
10 | compiler from 4.02.3 onwards. This is tested on each pull request by
11 | [ocaml-ci](https://github.com/ocurrent/ocaml-ci).
12 | 
13 | Build the project using
14 | 
15 | ```sh
16 | $ dune build
17 | ```
18 | 
19 | and test using
20 | 
21 | ```sh
22 | $ dune runtest
23 | ```
24 | 
25 | if the code has changed and the tests require updating, use `dune promote`.
26 | 
27 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2017-2021 the odoc-parser programmers.
2 | 
3 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
4 | 
5 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # odoc-parser 
 2 | 
 3 | Odoc-parser is a parser for odoc markup, which is an extension of the original markup
 4 | language parsed by [ocamldoc](https://ocaml.org/releases/4.12/htmlman/ocamldoc.html).
 5 | 
 6 | OCaml code can contain specially formatted comments that are used to document the
 7 | interfaces of modules. These comments are delimited by `(**` and `*)`. This parser
 8 | is intended to be used to parse the contents of these comments.
 9 | 
10 | The parser is part of the [odoc](https://github.com/ocaml/odoc/) project.
11 | 
12 | Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details of the development process.
13 | 
14 | ## Example usage:
15 | 
16 | ```ocaml
17 | # #require "odoc-parser";;
18 | # let location = {Lexing.pos_fname = ""; pos_lnum = 1; pos_bol = 0; pos_cnum = 0};;
19 | val location : Lexing.position =
20 |   {Lexing.pos_fname = ""; pos_lnum = 1; pos_bol = 0; pos_cnum = 0}
21 | # let p = Odoc_parser.parse_comment ~location ~text:"{b Bold!}[unfinished";;
22 | val p : Odoc_parser.t = <abstr>
23 | # let w = Odoc_parser.warnings p;;
24 | val w : Odoc_parser.Warning.t list =
25 |   [{Odoc_parser.Warning.location =
26 |      {Odoc_parser.Loc.file = "";
27 |       start = {Odoc_parser.Loc.line = 1; column = 20};
28 |       end_ = {Odoc_parser.Loc.line = 1; column = 20}};
29 |     message = "End of text is not allowed in '[...]' (code)."}]
30 | # Odoc_parser.ast p;;
31 | - : Odoc_parser.Ast.t =
32 | [{Odoc_parser__.Loc.location =
33 |    {Odoc_parser__.Loc.file = "";
34 |     start = {Odoc_parser__.Loc.line = 1; column = 0};
35 |     end_ = {Odoc_parser__.Loc.line = 1; column = 20}};
36 |   value =
37 |    `Paragraph
38 |      [{Odoc_parser__.Loc.location =
39 |         {Odoc_parser__.Loc.file = "";
40 |          start = {Odoc_parser__.Loc.line = 1; column = 0};
41 |          end_ = {Odoc_parser__.Loc.line = 1; column = 9}};
42 |        value =
43 |         `Styled
44 |           (`Bold,
45 |            [{Odoc_parser__.Loc.location =
46 |               {Odoc_parser__.Loc.file = "";
47 |                start = {Odoc_parser__.Loc.line = 1; column = 3};
48 |                end_ = {Odoc_parser__.Loc.line = 1; column = 8}};
49 |              value = `Word "Bold!"}])};
50 |       {Odoc_parser__.Loc.location =
51 |         {Odoc_parser__.Loc.file = "";
52 |          start = {Odoc_parser__.Loc.line = 1; column = 9};
53 |          end_ = {Odoc_parser__.Loc.line = 1; column = 20}};
54 |        value = `Code_span "unfinished"}]}]
55 | ```
56 | 
57 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
 1 | # Docs website
 2 | 
 3 | To build the gh-pages branch, run the script 'build.sh' from the
 4 | root of the repository:
 5 | 
 6 | ```sh
 7 | cd odoc-parser
 8 | doc/build.sh
 9 | ```
10 | 
11 | The script will build the odocl file corresponding to the API docs,
12 | then switch to the gh-pages branch and output the HTML files for
13 | them.
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/doc/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | dune build @docgen
 4 | 
 5 | git checkout gh-pages
 6 | 
 7 | odoc html-generate -o . _build/default/doc/odoc_parser.odocl --indent
 8 | odoc html-generate -o . _build/default/doc/page-index.odocl --indent
 9 | odoc html-generate -o . _build/default/doc/page-contributing.odocl --indent
10 | odoc support-files -o .
11 | 
12 | 


--------------------------------------------------------------------------------
/doc/contributing.mld:
--------------------------------------------------------------------------------
 1 | {0 Contributing}
 2 | 
 3 | {1 Building and testing}
 4 | 
 5 | [odoc-parser] uses {{: https://dune.build/} dune} to build and run tests. We use
 6 | inline expect tests with {{: https://github.com/janestreet/ppx_expect} ppx_expect} -
 7 | see the {{:https://github.com/ocaml-doc/odoc-parser/blob/main/test/test.ml}test file}.
 8 | 
 9 | We require that the parser builds and tests pass on all versions of the OCaml
10 | compiler from 4.02.3 onwards. This is tested on each pull request by
11 | {{: https://github.com/ocurrent/ocaml-ci} ocaml-ci}.
12 | 
13 | Build the project using
14 | 
15 | {[
16 | $ dune build
17 | ]}
18 | 
19 | and test using
20 | 
21 | {[
22 | $ dune runtest
23 | ]}
24 | 
25 | if the code has changed and the tests require updating, use [dune promote].
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/doc/dune:
--------------------------------------------------------------------------------
 1 | (rule
 2 |  (target odoc_parser__.odoc)
 3 |  (deps ../src/.odoc_parser.objs/byte/odoc_parser__.cmt)
 4 |  (action
 5 |   (run odoc compile -o %{target} %{deps})))
 6 | 
 7 | (rule
 8 |  (target odoc_parser__Loc.odoc)
 9 |  (deps
10 |   (:cmti ../src/.odoc_parser.objs/byte/odoc_parser__Loc.cmti)
11 |   odoc_parser__.odoc)
12 |  (action
13 |   (run odoc compile -o %{target} -I . %{cmti})))
14 | 
15 | (rule
16 |  (target odoc_parser__Warning.odoc)
17 |  (deps
18 |   (:cmti ../src/.odoc_parser.objs/byte/odoc_parser__Warning.cmt)
19 |   odoc_parser__.odoc
20 |   odoc_parser__Loc.odoc)
21 |  (action
22 |   (run odoc compile -o %{target} -I . %{cmti})))
23 | 
24 | (rule
25 |  (target odoc_parser__Ast.odoc)
26 |  (deps
27 |   (:cmti ../src/.odoc_parser.objs/byte/odoc_parser__Ast.cmt)
28 |   odoc_parser__.odoc
29 |   odoc_parser__Loc.odoc)
30 |  (action
31 |   (run odoc compile -o %{target} -I . %{cmti})))
32 | 
33 | (rule
34 |  (target odoc_parser.odoc)
35 |  (deps
36 |   (:cmti ../src/.odoc_parser.objs/byte/odoc_parser.cmti)
37 |   odoc_parser__.odoc
38 |   odoc_parser__Loc.odoc
39 |   odoc_parser__Warning.odoc
40 |   odoc_parser__Ast.odoc)
41 |  (action
42 |   (run odoc compile -o %{target} -I . %{cmti})))
43 | 
44 | (rule
45 |  (target page-index.odoc)
46 |  (deps index.mld)
47 |  (action
48 |   (run odoc compile -o %{target} -I . %{deps})))
49 | 
50 | (rule
51 |  (target page-contributing.odoc)
52 |  (deps contributing.mld)
53 |  (action
54 |   (run odoc compile -o %{target} -I . %{deps})))
55 | 
56 | (rule
57 |  (alias docgen)
58 |  (target odoc_parser.odocl)
59 |  (deps
60 |   (:odoc odoc_parser.odoc))
61 |  (action
62 |   (run odoc link -o %{target} -I . %{odoc})))
63 | 
64 | (rule
65 |  (alias docgen)
66 |  (target page-index.odocl)
67 |  (deps
68 |   odoc_parser.odocl
69 |   page-contributing.odoc
70 |   (:odoc page-index.odoc))
71 |  (action
72 |   (run odoc link -o %{target} -I . %{odoc})))
73 | 
74 | (rule
75 |  (alias docgen)
76 |  (target page-contributing.odocl)
77 |  (deps
78 |   (:odoc page-contributing.odoc))
79 |  (action
80 |   (run odoc link -o %{target} -I . %{odoc})))
81 | 


--------------------------------------------------------------------------------
/doc/index.mld:
--------------------------------------------------------------------------------
 1 | {0 odoc-parser}
 2 | 
 3 | Odoc-parser is a parser for odoc markup, which is an extension of the original markup
 4 | language parsed by {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html}ocamldoc}.
 5 | 
 6 | OCaml code can contain specially formatted comments that are used to document the
 7 | interfaces of modules. These comments are delimited by [(**] and [*)]. This parser
 8 | is intended to be used to parse the contents of these comments.
 9 | 
10 | The parser is part of the {{:https://github.com/ocaml/odoc/}odoc} project.
11 | 
12 | Please see {{!page-contributing}Contributing} for details of the development process.
13 | 
14 | {{!Odoc_parser}here are the API docs} for version 0.9.0.
15 | 
16 | {1 Example usage}
17 | 
18 | {[
19 | # #require "odoc-parser";;
20 | # let location = {Lexing.pos_fname = ""; pos_lnum = 1; pos_bol = 0; pos_cnum = 0};;
21 | val location : Lexing.position =
22 |   {Lexing.pos_fname = ""; pos_lnum = 1; pos_bol = 0; pos_cnum = 0}
23 | # let p = Odoc_parser.parse_comment ~location ~text:"{b Bold!}[unfinished";;
24 | val p : Odoc_parser.t = <abstr>
25 | # let w = Odoc_parser.warnings p;;
26 | val w : Odoc_parser.Warning.t list =
27 |   [{Odoc_parser.Warning.location =
28 |      {Odoc_parser.Loc.file = "";
29 |       start = {Odoc_parser.Loc.line = 1; column = 20};
30 |       end_ = {Odoc_parser.Loc.line = 1; column = 20}};
31 |     message = "End of text is not allowed in '[...]' (code)."}]
32 | # Odoc_parser.ast p;;
33 | - : Odoc_parser.Ast.t =
34 | [{Odoc_parser__.Loc.location =
35 |    {Odoc_parser__.Loc.file = "";
36 |     start = {Odoc_parser__.Loc.line = 1; column = 0};
37 |     end_ = {Odoc_parser__.Loc.line = 1; column = 20}};
38 |   value =
39 |    `Paragraph
40 |      [{Odoc_parser__.Loc.location =
41 |         {Odoc_parser__.Loc.file = "";
42 |          start = {Odoc_parser__.Loc.line = 1; column = 0};
43 |          end_ = {Odoc_parser__.Loc.line = 1; column = 9}};
44 |        value =
45 |         `Styled
46 |           (`Bold,
47 |            [{Odoc_parser__.Loc.location =
48 |               {Odoc_parser__.Loc.file = "";
49 |                start = {Odoc_parser__.Loc.line = 1; column = 3};
50 |                end_ = {Odoc_parser__.Loc.line = 1; column = 8}};
51 |              value = `Word "Bold!"}])};
52 |       {Odoc_parser__.Loc.location =
53 |         {Odoc_parser__.Loc.file = "";
54 |          start = {Odoc_parser__.Loc.line = 1; column = 9};
55 |          end_ = {Odoc_parser__.Loc.line = 1; column = 20}};
56 |        value = `Code_span "unfinished"} ] } ]
57 | ]}
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/dune:
--------------------------------------------------------------------------------
 1 | ; Note: We disable warning 18 here to support compiler versions < 4.03
 2 | ; See https://caml.inria.fr/mantis/view.php?id=7135 for details
 3 | 
 4 | (env
 5 |  (dev
 6 |   (flags
 7 |    (:standard -g -w -18-53)))
 8 |  (release
 9 |   (flags
10 |    (:standard -g -w -18-53))))
11 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
 1 | (lang dune 2.8)
 2 | (name odoc-parser)
 3 | (version 2.0.0) 
 4 | 
 5 | (generate_opam_files true)
 6 | 
 7 | (source (github ocaml-doc/odoc-parser))
 8 | (license ISC)
 9 | (authors "Anton Bachin <antonbachin@yahoo.com>")
10 | (maintainers "Jon Ludlam <jon@recoil.org>")
11 | 
12 | (package
13 |  (name odoc-parser)
14 |  (synopsis "Parser for ocaml documentation comments")
15 |  (description "Odoc_parser is a library for parsing the contents of OCaml documentation
16 | comments, formatted using 'odoc' syntax, an extension of the language
17 | understood by ocamldoc.")
18 |  (depends
19 |   dune
20 |   (ocaml (>= 4.02.0))
21 |   astring
22 |   result
23 |   camlp-streams
24 |   (ppx_expect :with-test)))
25 | 
26 | 


--------------------------------------------------------------------------------
/odoc-parser.opam:
--------------------------------------------------------------------------------
 1 | # This file is generated by dune, edit dune-project instead
 2 | opam-version: "2.0"
 3 | version: "2.0.0"
 4 | synopsis: "Parser for ocaml documentation comments"
 5 | description: """
 6 | Odoc_parser is a library for parsing the contents of OCaml documentation
 7 | comments, formatted using 'odoc' syntax, an extension of the language
 8 | understood by ocamldoc."""
 9 | maintainer: ["Jon Ludlam <jon@recoil.org>"]
10 | authors: ["Anton Bachin <antonbachin@yahoo.com>"]
11 | license: "ISC"
12 | homepage: "https://github.com/ocaml-doc/odoc-parser"
13 | bug-reports: "https://github.com/ocaml-doc/odoc-parser/issues"
14 | dev-repo: "git+https://github.com/ocaml-doc/odoc-parser.git"
15 | # This template exists because without it dune pop is dependencies and build rules
16 | # involving odoc. Since odoc depends on this package, this doesn't work.
17 | doc: "https://ocaml-doc.github.io/odoc-parser/"
18 | depends: [
19 |   "dune" {>= "2.8"}
20 |   "ocaml" {>= "4.02.0"}
21 |   "astring"
22 |   "result"
23 |   "camlp-streams"
24 |   "ppx_expect" {with-test}
25 |   ("ocaml" {< "4.04.1" & with-test} | "sexplib0" {with-test})
26 | ]
27 | build: [
28 |   ["dune" "subst"] {dev}
29 |   [
30 |     "dune"
31 |     "build"
32 |     "-p"
33 |     name
34 |     "-j"
35 |     jobs
36 |     "@install"
37 |     "@runtest" {with-test}
38 |   ]
39 | ]
40 | 
41 | 


--------------------------------------------------------------------------------
/odoc-parser.opam.template:
--------------------------------------------------------------------------------
 1 | # This template exists because without it dune pop is dependencies and build rules
 2 | # involving odoc. Since odoc depends on this package, this doesn't work.
 3 | doc: "https://ocaml-doc.github.io/odoc-parser/"
 4 | depends: [
 5 |   "dune" {>= "2.8"}
 6 |   "ocaml" {>= "4.02.0"}
 7 |   "astring"
 8 |   "result"
 9 |   "camlp-streams"
10 |   "ppx_expect" {with-test}
11 |   ("ocaml" {< "4.04.1" & with-test} | "sexplib0" {with-test})
12 | ]
13 | build: [
14 |   ["dune" "subst"] {dev}
15 |   [
16 |     "dune"
17 |     "build"
18 |     "-p"
19 |     name
20 |     "-j"
21 |     jobs
22 |     "@install"
23 |     "@runtest" {with-test}
24 |   ]
25 | ]
26 | 
27 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "odoc-parser",
 3 |   "version": "2.0.0",
 4 |   "description": "A parser for odoc markup",
 5 |   "repository": "https://github.com/ocaml-doc/odoc-parser",
 6 |   "authors": [
 7 |     "Anton Bachin <antonbachin@yahoo.com>"
 8 |   ],
 9 |   "esy": {
10 |     "build": "dune build -p odoc-parser",
11 |     "buildsInSource": "_build"
12 |   },
13 |   "dependencies": {
14 |     "@opam/astring": "^0.8.3",
15 |     "@opam/dune": "^2.8.5",
16 |     "@opam/result": "*",
17 |     "@opam/ppx_expect": "*",
18 |     "@opam/camlp-streams": "*",
19 |     "ocaml": "^4.02.3"
20 |   },
21 |   "license": "ISC",
22 |   "private": false
23 | }
24 | 


--------------------------------------------------------------------------------
/src/ast.ml:
--------------------------------------------------------------------------------
 1 | (** Abstract syntax tree representing ocamldoc comments *)
 2 | 
 3 | (** This is a syntactic representation of ocamldoc comments. See
 4 |     {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html}The manual} for a detailed
 5 |     description of the syntax understood. Note that there is no attempt at semantic
 6 |     analysis, and hence these types are capable of representing values that will
 7 |     be rejected by further stages, for example, invalid references or headings that
 8 |     are out of range. *)
 9 | 
10 | type 'a with_location = 'a Loc.with_location
11 | type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ]
12 | type alignment = [ `Left | `Center | `Right ]
13 | 
14 | type reference_kind = [ `Simple | `With_text ]
15 | (** References in doc comments can be of two kinds: [{!simple}] or [{{!ref}With text}]. *)
16 | 
17 | type inline_element =
18 |   [ `Space of string
19 |   | `Word of string
20 |   | `Code_span of string
21 |   | `Raw_markup of string option * string
22 |   | `Styled of style * inline_element with_location list
23 |   | `Reference of
24 |     reference_kind * string with_location * inline_element with_location list
25 |   | `Link of string * inline_element with_location list
26 |   | `Math_span of string  (** @since 2.0.0 *) ]
27 | (** Inline elements are equivalent to what would be found in a [span] in HTML.
28 |     Mostly these are straightforward. The [`Reference] constructor takes a triple
29 |     whose second element is the reference itself, and the third the replacement
30 |     text. Similarly the [`Link] constructor has the link itself as first parameter
31 |     and the second is the replacement text. *)
32 | 
33 | type 'a cell = 'a with_location list * [ `Header | `Data ]
34 | type 'a row = 'a cell list
35 | type 'a grid = 'a row list
36 | type 'a abstract_table = 'a grid * alignment option list option
37 | 
38 | type code_block_meta = {
39 |   language : string with_location;
40 |   tags : string with_location option;
41 | }
42 | 
43 | type code_block = {
44 |   meta : code_block_meta option;
45 |   delimiter : string option;
46 |   content : string with_location;
47 |   output : nestable_block_element with_location list option;
48 | }
49 | 
50 | and nestable_block_element =
51 |   [ `Paragraph of inline_element with_location list
52 |   | `Code_block of code_block
53 |   | `Verbatim of string
54 |   | `Modules of string with_location list
55 |   | `List of
56 |     [ `Unordered | `Ordered ]
57 |     * [ `Light | `Heavy ]
58 |     * nestable_block_element with_location list list
59 |   | `Table of table
60 |   | `Math_block of string  (** @since 2.0.0 *) ]
61 | (** Some block elements may be nested within lists or tags, but not all.
62 |     The [`List] constructor has a parameter of type [\[`Light | `Heavy\]].
63 |     This corresponds to the syntactic constructor used (see the
64 |     {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html#sss:ocamldoc-list}manual}).
65 |     *)
66 | 
67 | and table = nestable_block_element abstract_table * [ `Light | `Heavy ]
68 | 
69 | type internal_tag =
70 |   [ `Canonical of string with_location | `Inline | `Open | `Closed | `Hidden ]
71 | (** Internal tags are used to exercise fine control over the output of odoc. They
72 |     are never rendered in the output *)
73 | 
74 | type ocamldoc_tag =
75 |   [ `Author of string
76 |   | `Deprecated of nestable_block_element with_location list
77 |   | `Param of string * nestable_block_element with_location list
78 |   | `Raise of string * nestable_block_element with_location list
79 |   | `Return of nestable_block_element with_location list
80 |   | `See of
81 |     [ `Url | `File | `Document ]
82 |     * string
83 |     * nestable_block_element with_location list
84 |   | `Since of string
85 |   | `Before of string * nestable_block_element with_location list
86 |   | `Version of string ]
87 | (** ocamldoc tags are those that are specified in the {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html#ss:ocamldoc-tags}manual}) *)
88 | 
89 | type tag = [ ocamldoc_tag | internal_tag ]
90 | type heading = int * string option * inline_element with_location list
91 | 
92 | type block_element =
93 |   [ nestable_block_element | `Heading of heading | `Tag of tag ]
94 | 
95 | type t = block_element with_location list
96 | 


--------------------------------------------------------------------------------
/src/compat.ml:
--------------------------------------------------------------------------------
 1 | module Option = struct
 2 |   type 'a t = 'a option = None | Some of 'a
 3 | 
 4 |   let is_some = function None -> false | Some _ -> true
 5 |   let value ~default = function None -> default | Some x -> x
 6 | 
 7 |   let join_list l =
 8 |     let rec loop acc = function
 9 |       | [] -> Some (List.rev acc)
10 |       | Some a :: q -> loop (a :: acc) q
11 |       | None :: _ -> None
12 |     in
13 |     loop [] l
14 | end
15 | 
16 | module Char = struct
17 |   include Char
18 | 
19 |   let equal (x : char) y = x = y
20 | end
21 | 
22 | module String = struct
23 |   include String
24 | 
25 |   let for_all f str =
26 |     let rec aux i =
27 |       if i >= String.length str then true
28 |       else if f (String.get str i) then aux (i + 1)
29 |       else false
30 |     in
31 |     aux 0
32 | end
33 | 


--------------------------------------------------------------------------------
/src/compat.mli:
--------------------------------------------------------------------------------
 1 | (** @since 4.08 *)
 2 | module Option : sig
 3 |   type 'a t = 'a option = None | Some of 'a
 4 | 
 5 |   val is_some : 'a option -> bool
 6 |   (** [is_some o] is [true] if and only if [o] is [Some o]. *)
 7 | 
 8 |   val value : default:'a -> 'a option -> 'a
 9 |   val join_list : 'a option list -> 'a list option
10 | end
11 | 
12 | module Char : sig
13 |   include module type of Char
14 | 
15 |   val equal : t -> t -> bool
16 |   (** The equal function for chars.
17 |       @since 4.03.0 *)
18 | end
19 | 
20 | module String : sig
21 |   include module type of String
22 | 
23 |   val for_all : (char -> bool) -> string -> bool
24 |   (** [for_all p s] checks if all characters in [s] satisfy the preficate [p].
25 |       @since 4.13.0 *)
26 | end
27 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
 1 | (ocamllex lexer)
 2 | 
 3 | (library
 4 |  (name odoc_parser)
 5 |  (public_name odoc-parser)
 6 |  (instrumentation
 7 |   (backend bisect_ppx))
 8 |  (flags
 9 |   (:standard -w -50))
10 |  (libraries astring result camlp-streams))
11 | 


--------------------------------------------------------------------------------
/src/lexer.mli:
--------------------------------------------------------------------------------
 1 | (* Internal module, not exposed *)
 2 | 
 3 | type input = {
 4 |   file : string;
 5 |   offset_to_location : int -> Loc.point;
 6 |   warnings : Warning.t list ref;
 7 |   lexbuf : Lexing.lexbuf;
 8 | }
 9 | 
10 | val token : input -> Lexing.lexbuf -> Token.t Loc.with_location
11 | 


--------------------------------------------------------------------------------
/src/lexer.mll:
--------------------------------------------------------------------------------
  1 | {
  2 | 
  3 | let unescape_word : string -> string = fun s ->
  4 |   (* The common case is that there are no escape sequences. *)
  5 |   match String.index s '\\' with
  6 |   | exception Not_found -> s
  7 |   | _ ->
  8 |     let buffer = Buffer.create (String.length s) in
  9 |     let rec scan_word index =
 10 |       if index >= String.length s then
 11 |         ()
 12 |       else
 13 |         let c = s.[index] in
 14 |         let c, increment =
 15 |           match c with
 16 |           | '\\' ->
 17 |             if index + 1 < String.length s then
 18 |               match s.[index + 1] with
 19 |               | '{' | '}' | '[' | ']' | '@' as c -> c, 2
 20 |               | _ -> c, 1
 21 |             else c, 1
 22 |           | _ -> c, 1
 23 |         in
 24 |         Buffer.add_char buffer c;
 25 |         scan_word (index + increment)
 26 |     in
 27 |     scan_word 0;
 28 |     Buffer.contents buffer
 29 | 
 30 | type math_kind =
 31 |   Inline | Block
 32 | 
 33 | let math_constr kind x =
 34 |   match kind with
 35 |   | Inline -> `Math_span x
 36 |   | Block -> `Math_block x
 37 | 
 38 | (* This is used for code and verbatim blocks. It can be done with a regular
 39 |    expression, but the regexp gets quite ugly, so a function is easier to
 40 |    understand. *)
 41 | let trim_leading_blank_lines : string -> string = fun s ->
 42 |   let rec scan_for_last_newline : int -> int -> int =
 43 |       fun index trim_until ->
 44 |     if index >= String.length s then
 45 |       String.length s
 46 |     else
 47 |       match s.[index] with
 48 |       | ' ' | '\t' | '\r' -> scan_for_last_newline (index + 1) trim_until
 49 |       | '\n' -> scan_for_last_newline (index + 1) (index + 1)
 50 |       | _ -> trim_until
 51 |   in
 52 |   let trim_until = scan_for_last_newline 0 0 in
 53 |   String.sub s trim_until (String.length s - trim_until)
 54 | 
 55 | let trim_trailing_blank_lines : string -> string = fun s ->
 56 |   let rec scan_for_last_newline : int -> int option -> int option =
 57 |       fun index trim_from ->
 58 |     if index < 0 then
 59 |       Some 0
 60 |     else
 61 |       match s.[index] with
 62 |       | ' ' | '\t' | '\r' -> scan_for_last_newline (index - 1) trim_from
 63 |       | '\n' -> scan_for_last_newline (index - 1) (Some index)
 64 |       | _ -> trim_from
 65 |   in
 66 |   let last = String.length s - 1 in
 67 |   match scan_for_last_newline last None with
 68 |   | None ->
 69 |     s
 70 |   | Some trim_from ->
 71 |     let trim_from =
 72 |       if trim_from > 0 && s.[trim_from - 1] = '\r' then
 73 |         trim_from - 1
 74 |       else
 75 |         trim_from
 76 |     in
 77 |     String.sub s 0 trim_from
 78 | 
 79 | (** Returns [None] for an empty, [Some ident] for an indented line. *)
 80 | let trim_leading_whitespace : first_line_offset:int -> string -> string =
 81 |  fun ~first_line_offset s ->
 82 |   let count_leading_whitespace line =
 83 |     let rec count_leading_whitespace' index len =
 84 |       if index = len then None
 85 |       else
 86 |         match line.[index] with
 87 |         | ' ' | '\t' -> count_leading_whitespace' (index + 1) len
 88 |         | _ -> Some index
 89 |     in
 90 |     let len = String.length line in
 91 |     (* '\r' may remain because we only split on '\n' below. This is important
 92 |        for the first line, which would be considered not empty without this check. *)
 93 |     let len = if len > 0 && line.[len - 1] = '\r' then len - 1 else len in
 94 |     count_leading_whitespace' 0 len
 95 |   in
 96 | 
 97 |   let lines = Astring.String.cuts ~sep:"\n" s in
 98 | 
 99 |   let least_amount_of_whitespace =
100 |     List.fold_left (fun least_so_far line ->
101 |       match (count_leading_whitespace line, least_so_far) with
102 |       | (Some _ as n', None) -> n'
103 |       | (Some n as n', Some least) when n < least -> n'
104 |       | _ -> least_so_far)
105 |   in
106 | 
107 |   let first_line_max_drop, least_amount_of_whitespace =
108 |     match lines with
109 |     | [] -> 0, None
110 |     | first_line :: tl ->
111 |       begin match count_leading_whitespace first_line with
112 |         | Some n ->
113 |           n, least_amount_of_whitespace (Some (first_line_offset + n)) tl
114 |         | None ->
115 |           0, least_amount_of_whitespace None tl
116 |       end
117 |   in
118 | 
119 |   match least_amount_of_whitespace with
120 |   | None ->
121 |     s
122 |   | Some least_amount_of_whitespace ->
123 |     let drop n line =
124 |       (* Since blank lines were ignored when calculating
125 |          [least_amount_of_whitespace], their length might be less than the
126 |          amount. *)
127 |       if String.length line < n then line
128 |       else String.sub line n (String.length line - n)
129 |     in
130 |     let lines =
131 |       match lines with
132 |       | [] -> []
133 |       | first_line :: tl ->
134 |         drop (min first_line_max_drop least_amount_of_whitespace) first_line
135 |         :: List.map (drop least_amount_of_whitespace) tl
136 |     in
137 |     String.concat "\n" lines
138 | 
139 | type input = {
140 |   file : string;
141 |   offset_to_location : int -> Loc.point;
142 |   warnings : Warning.t list ref;
143 |   lexbuf : Lexing.lexbuf;
144 | }
145 | 
146 | let with_location_adjustments
147 |     k input ?start_offset ?adjust_start_by ?end_offset ?adjust_end_by value =
148 | 
149 |   let start =
150 |     match start_offset with
151 |     | None -> Lexing.lexeme_start input.lexbuf
152 |     | Some s -> s
153 |   in
154 |   let start =
155 |     match adjust_start_by with
156 |     | None -> start
157 |     | Some s -> start + String.length s
158 |   in
159 |   let end_ =
160 |     match end_offset with
161 |     | None -> Lexing.lexeme_end input.lexbuf
162 |     | Some e -> e
163 |   in
164 |   let end_ =
165 |     match adjust_end_by with
166 |     | None -> end_
167 |     | Some s -> end_ - String.length s
168 |   in
169 |   let location = {
170 |     Loc.file = input.file;
171 |     start = input.offset_to_location start;
172 |     end_ = input.offset_to_location end_;
173 |   }
174 |   in
175 |   k input location value
176 | 
177 | let emit =
178 |   with_location_adjustments (fun _ -> Loc.at)
179 | 
180 | let warning =
181 |   with_location_adjustments (fun input location error ->
182 |     input.warnings := (error location) :: !(input.warnings))
183 | 
184 | let reference_token start target =
185 |   match start with
186 |   | "{!" -> `Simple_reference target
187 |   | "{{!" -> `Begin_reference_with_replacement_text target
188 |   | "{:" -> `Simple_link target
189 |   | "{{:" -> `Begin_link_with_replacement_text target
190 |   | _ -> assert false
191 | 
192 | let trim_leading_space_or_accept_whitespace input start_offset text =
193 |   match text.[0] with
194 |   | ' ' -> String.sub text 1 (String.length text - 1)
195 |   | '\t' | '\r' | '\n' -> text
196 |   | exception Invalid_argument _ -> ""
197 |   | _ ->
198 |     warning
199 |       input
200 |       ~start_offset
201 |       ~end_offset:(start_offset + 2)
202 |       Parse_error.no_leading_whitespace_in_verbatim;
203 |     text
204 | 
205 | let trim_trailing_space_or_accept_whitespace text =
206 |   match text.[String.length text - 1] with
207 |   | ' ' -> String.sub text 0 (String.length text - 1)
208 |   | '\t' | '\r' | '\n' -> text
209 |   | _ -> text
210 |   | exception Invalid_argument _ -> text
211 | 
212 | let emit_verbatim input start_offset buffer =
213 |   let t = Buffer.contents buffer in
214 |   let t = trim_trailing_space_or_accept_whitespace t in
215 |   let t = trim_leading_space_or_accept_whitespace input start_offset t in
216 |   let t = trim_leading_blank_lines t in
217 |   let t = trim_trailing_blank_lines t in
218 |   emit input (`Verbatim t) ~start_offset
219 | 
220 | (* The locations have to be treated carefully in this function. We need to ensure that
221 |    the []`Code_block] location matches the entirety of the block including the terminator,
222 |    and the content location is precicely the location of the text of the code itself.
223 |    Note that the location reflects the content _without_ stripping of whitespace, whereas
224 |    the value of the content in the tree has whitespace stripped from the beginning,
225 |    and trailing empty lines removed. *)
226 | let emit_code_block ~start_offset content_offset input metadata delim terminator c has_results =
227 |   let c = Buffer.contents c |> trim_trailing_blank_lines in
228 |   let content_location = input.offset_to_location content_offset in
229 |   let c =
230 |     with_location_adjustments
231 |       (fun _ _location c ->
232 |          let first_line_offset = content_location.column in
233 |          trim_leading_whitespace ~first_line_offset c)
234 |       input c
235 |   in
236 |   let c = trim_leading_blank_lines c in
237 |   let c = with_location_adjustments ~adjust_end_by:terminator ~start_offset:content_offset (fun _ -> Loc.at) input c in
238 |   emit ~start_offset input (`Code_block (metadata, delim, c, has_results))
239 | 
240 | let heading_level input level =
241 |   if String.length level >= 2 && level.[0] = '0' then begin
242 |     warning
243 |       input ~start_offset:1 (Parse_error.leading_zero_in_heading_level level)
244 |   end;
245 |   int_of_string level
246 | 
247 | let buffer_add_lexeme buffer lexbuf =
248 |   Buffer.add_string buffer (Lexing.lexeme lexbuf)
249 | 
250 | }
251 | 
252 | let markup_char =
253 |   ['{' '}' '[' ']' '@' '|']
254 | let space_char =
255 |   [' ' '\t' '\n' '\r']
256 | let bullet_char =
257 |   ['-' '+']
258 | 
259 | let word_char =
260 |   (_ # markup_char # space_char # bullet_char) | ('\\' markup_char)
261 | 
262 | let horizontal_space =
263 |   [' ' '\t']
264 | let newline =
265 |   '\n' | "\r\n"
266 | 
267 | let reference_start =
268 |   "{!" | "{{!" | "{:" | "{{:"
269 | 
270 | let raw_markup =
271 |   ([^ '%'] | '%'+ [^ '%' '}'])* '%'*
272 | 
273 | let raw_markup_target =
274 |   ([^ ':' '%'] | '%'+ [^ ':' '%' '}'])* '%'*
275 | 
276 | let language_tag_char =
277 |   ['a'-'z' 'A'-'Z' '0'-'9' '_' '-' ]
278 | 
279 | let delim_char =
280 |   ['a'-'z' 'A'-'Z' '0'-'9' '_' ]
281 | 
282 | rule reference_paren_content input start ref_offset start_offset depth_paren
283 |   buffer =
284 |   parse
285 |   | '('
286 |     {
287 |       buffer_add_lexeme buffer lexbuf ;
288 |       reference_paren_content input start ref_offset start_offset
289 |         (depth_paren + 1) buffer lexbuf }
290 |   | ')'
291 |     {
292 |       buffer_add_lexeme buffer lexbuf ;
293 |       if depth_paren = 0 then
294 |         reference_content input start ref_offset buffer lexbuf
295 |       else
296 |         reference_paren_content input start ref_offset start_offset
297 |           (depth_paren - 1) buffer lexbuf }
298 |   | eof
299 |     { warning
300 |         input
301 |         ~start_offset
302 |         (Parse_error.unclosed_bracket ~bracket:"(") ;
303 |       Buffer.contents buffer }
304 |   | _
305 |     {
306 |       buffer_add_lexeme buffer lexbuf ;
307 |       reference_paren_content input start ref_offset start_offset depth_paren
308 |         buffer lexbuf }
309 | 
310 | and reference_content input start start_offset buffer = parse
311 |   | '}'
312 |     {
313 |       Buffer.contents buffer
314 |     }
315 |   | '('
316 |     {
317 |       buffer_add_lexeme buffer lexbuf ;
318 |       reference_paren_content input start start_offset
319 |         (Lexing.lexeme_start lexbuf) 0 buffer lexbuf
320 |     }
321 |   | '"' [^ '"']* '"'
322 |     {
323 |       buffer_add_lexeme buffer lexbuf ;
324 |       reference_content input start start_offset buffer lexbuf
325 |     }
326 |   | eof
327 |     { warning
328 |         input
329 |         ~start_offset
330 |         (Parse_error.unclosed_bracket ~bracket:start) ;
331 |       Buffer.contents buffer }
332 |   | _
333 |     {
334 |       buffer_add_lexeme buffer lexbuf ;
335 |       reference_content input start start_offset buffer lexbuf }
336 | 
337 | and token input = parse
338 |   | horizontal_space* eof
339 |     { emit input `End }
340 | 
341 |   | ((horizontal_space* newline as prefix)
342 |     horizontal_space* ((newline horizontal_space*)+ as suffix) as ws)
343 |     { emit input (`Blank_line ws) ~adjust_start_by:prefix ~adjust_end_by:suffix }
344 | 
345 |   | (horizontal_space* newline horizontal_space* as ws)
346 |     { emit input (`Single_newline ws) }
347 | 
348 |   | (horizontal_space+ as ws)
349 |     { emit input (`Space ws) }
350 | 
351 |   | (horizontal_space* (newline horizontal_space*)? as p) '}'
352 |     { emit input `Right_brace ~adjust_start_by:p }
353 | 
354 |   | '|'
355 |     { emit input `Bar }
356 | 
357 |   | word_char (word_char | bullet_char | '@')*
358 |   | bullet_char (word_char | bullet_char | '@')+ as w
359 |     { emit input (`Word (unescape_word w)) }
360 | 
361 |   | '['
362 |     { code_span
363 |         (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf }
364 | 
365 |   | '-'
366 |     { emit input `Minus }
367 | 
368 |   | '+'
369 |     { emit input `Plus }
370 | 
371 |   | "{b"
372 |     { emit input (`Begin_style `Bold) }
373 | 
374 |   | "{i"
375 |     { emit input (`Begin_style `Italic) }
376 | 
377 |   | "{e"
378 |     { emit input (`Begin_style `Emphasis) }
379 | 
380 |   | "{L"
381 |     { emit input (`Begin_paragraph_style `Left) }
382 | 
383 |   | "{C"
384 |     { emit input (`Begin_paragraph_style  `Center) }
385 | 
386 |   | "{R"
387 |     { emit input (`Begin_paragraph_style  `Right) }
388 | 
389 |   | "{^"
390 |     { emit input (`Begin_style `Superscript) }
391 | 
392 |   | "{_"
393 |     { emit input (`Begin_style `Subscript) }
394 | 
395 |   | "{math" space_char
396 |     { math Block (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf }
397 | 
398 |   | "{m" horizontal_space
399 |     { math Inline (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf }
400 | 
401 | 
402 |   | "{!modules:" ([^ '}']* as modules) '}'
403 |     { emit input (`Modules modules) }
404 | 
405 |   | (reference_start as start)
406 |     {
407 |       let start_offset = Lexing.lexeme_start lexbuf in
408 |       let target =
409 |         reference_content input start start_offset (Buffer.create 16) lexbuf
410 |       in
411 |       let token = (reference_token start target) in
412 |       emit ~start_offset input token }
413 | 
414 |   | "{["
415 |     { code_block (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf }
416 | 
417 |   | (("{" (delim_char* as delim) "@" horizontal_space*) as prefix) (language_tag_char+ as lang_tag_)
418 |     {
419 |       let start_offset = Lexing.lexeme_start lexbuf in
420 |       let lang_tag =
421 |         with_location_adjustments ~adjust_start_by:prefix (fun _ -> Loc.at) input lang_tag_
422 |       in
423 |       let emit_truncated_code_block () =
424 |         let empty_content = with_location_adjustments (fun _ -> Loc.at) input "" in
425 |         emit ~start_offset input (`Code_block (Some (lang_tag, None), delim, empty_content, false))
426 |       in
427 |       match code_block_metadata_tail input lexbuf with
428 |       | `Ok metadata -> code_block start_offset (Lexing.lexeme_end lexbuf) (Some (lang_tag, metadata)) (Buffer.create 256) delim input lexbuf
429 |       | `Eof ->
430 |           warning input ~start_offset Parse_error.truncated_code_block_meta;
431 |           emit_truncated_code_block ()
432 |       | `Invalid_char c ->
433 |           warning input ~start_offset
434 |             (Parse_error.language_tag_invalid_char lang_tag_ c);
435 |           code_block start_offset (Lexing.lexeme_end lexbuf) (Some (lang_tag, None)) (Buffer.create 256) delim input lexbuf
436 |     }
437 | 
438 |   | "{@" horizontal_space* '['
439 |     {
440 |       warning input Parse_error.no_language_tag_in_meta;
441 |       code_block (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf
442 |     }
443 | 
444 |   | "{v"
445 |     { verbatim
446 |         (Buffer.create 1024) None (Lexing.lexeme_start lexbuf) input lexbuf }
447 | 
448 |   | "{%" ((raw_markup_target as target) ':')? (raw_markup as s)
449 |     ("%}" | eof as e)
450 |     { let token = `Raw_markup (target, s) in
451 |       if e <> "%}" then
452 |         warning
453 |           input
454 |           ~start_offset:(Lexing.lexeme_end lexbuf)
455 |           (Parse_error.not_allowed
456 |             ~what:(Token.describe `End)
457 |             ~in_what:(Token.describe token));
458 |       emit input token }
459 | 
460 |   | "{ul"
461 |     { emit input (`Begin_list `Unordered) }
462 | 
463 |   | "{ol"
464 |     { emit input (`Begin_list `Ordered) }
465 | 
466 |   | "{li"
467 |     { emit input (`Begin_list_item `Li) }
468 | 
469 |   | "{-"
470 |     { emit input (`Begin_list_item `Dash) }
471 | 
472 |   | "{table"
473 |     { emit input (`Begin_table_heavy) }
474 | 
475 |   | "{t"
476 |     { emit input (`Begin_table_light) }
477 | 
478 |   | "{tr"
479 |     { emit input `Begin_table_row }
480 | 
481 |   | "{th"
482 |     { emit input (`Begin_table_cell `Header) }
483 | 
484 |   | "{td"
485 |     { emit input (`Begin_table_cell `Data) }
486 | 
487 |   | '{' (['0'-'9']+ as level) ':' (([^ '}'] # space_char)* as label)
488 |     { emit
489 |         input (`Begin_section_heading (heading_level input level, Some label)) }
490 | 
491 |   | '{' (['0'-'9']+ as level)
492 |     { emit input (`Begin_section_heading (heading_level input level, None)) }
493 | 
494 |   | "@author" ((horizontal_space+ [^ '\r' '\n']*)? as author)
495 |     { emit input (`Tag (`Author author)) }
496 | 
497 |   | "@deprecated"
498 |     { emit input (`Tag `Deprecated) }
499 | 
500 |   | "@param" horizontal_space+ ((_ # space_char)+ as name)
501 |     { emit input (`Tag (`Param name)) }
502 | 
503 |   | ("@raise" | "@raises") horizontal_space+ ((_ # space_char)+ as name)
504 |     { emit input (`Tag (`Raise name)) }
505 | 
506 |   | ("@return" | "@returns")
507 |     { emit input (`Tag `Return) }
508 | 
509 |   | "@see" horizontal_space* '<' ([^ '>']* as url) '>'
510 |     { emit input (`Tag (`See (`Url, url))) }
511 | 
512 |   | "@see" horizontal_space* '\'' ([^ '\'']* as filename) '\''
513 |     { emit input (`Tag (`See (`File, filename))) }
514 | 
515 |   | "@see" horizontal_space* '"' ([^ '"']* as name) '"'
516 |     { emit input (`Tag (`See (`Document, name))) }
517 | 
518 |   | "@since" ((horizontal_space+ [^ '\r' '\n']*)? as version)
519 |     { emit input (`Tag (`Since version)) }
520 | 
521 |   | "@before" horizontal_space+ ((_ # space_char)+ as version)
522 |     { emit input (`Tag (`Before version)) }
523 | 
524 |   | "@version" ((horizontal_space+ [^ '\r' '\n']*)? as version)
525 |     { emit input (`Tag (`Version version)) }
526 | 
527 |   | "@canonical" ((horizontal_space+ [^ '\r' '\n']*)? as identifier)
528 |     { emit input (`Tag (`Canonical identifier)) }
529 | 
530 |   | "@inline"
531 |     { emit input (`Tag `Inline) }
532 | 
533 |   | "@open"
534 |     { emit input (`Tag `Open) }
535 | 
536 |   | "@closed"
537 |     { emit input (`Tag `Closed) }
538 | 
539 |   | "@hidden"
540 |     { emit input (`Tag `Hidden) }
541 | 
542 |   | "]}"
543 |     { emit input `Right_code_delimiter}
544 | 
545 |   | '{'
546 |     { try bad_markup_recovery (Lexing.lexeme_start lexbuf) input lexbuf
547 |       with Failure _ ->
548 |         warning
549 |           input
550 |           (Parse_error.bad_markup
551 |             "{" ~suggestion:"escape the brace with '\\{'.");
552 |         emit input (`Word "{") }
553 | 
554 |   | ']'
555 |     { warning input Parse_error.unpaired_right_bracket;
556 |       emit input (`Word "]") }
557 | 
558 |   | "@param"
559 |     { warning input Parse_error.truncated_param;
560 |       emit input (`Tag (`Param "")) }
561 | 
562 |   | ("@raise" | "@raises") as tag
563 |     { warning input (Parse_error.truncated_raise tag);
564 |       emit input (`Tag (`Raise "")) }
565 | 
566 |   | "@before"
567 |     { warning input Parse_error.truncated_before;
568 |       emit input (`Tag (`Before "")) }
569 | 
570 |   | "@see"
571 |     { warning input Parse_error.truncated_see;
572 |       emit input (`Word "@see") }
573 | 
574 |   | '@' ['a'-'z' 'A'-'Z']+ as tag
575 |     { warning input (Parse_error.unknown_tag tag);
576 |       emit input (`Word tag) }
577 | 
578 |   | '@'
579 |     { warning input Parse_error.stray_at;
580 |       emit input (`Word "@") }
581 | 
582 |   | '\r'
583 |     { warning input Parse_error.stray_cr;
584 |       token input lexbuf }
585 | 
586 |   | "{!modules:" ([^ '}']* as modules) eof
587 |     { warning
588 |         input
589 |         ~start_offset:(Lexing.lexeme_end lexbuf)
590 |         (Parse_error.not_allowed
591 |           ~what:(Token.describe `End)
592 |           ~in_what:(Token.describe (`Modules "")));
593 |       emit input (`Modules modules) }
594 | 
595 | and code_span buffer nesting_level start_offset input = parse
596 |   | ']'
597 |     { if nesting_level = 0 then
598 |         emit input (`Code_span (Buffer.contents buffer)) ~start_offset
599 |       else begin
600 |         Buffer.add_char buffer ']';
601 |         code_span buffer (nesting_level - 1) start_offset input lexbuf
602 |       end }
603 | 
604 |   | '['
605 |     { Buffer.add_char buffer '[';
606 |       code_span buffer (nesting_level + 1) start_offset input lexbuf }
607 | 
608 |   | '\\' ('[' | ']' as c)
609 |     { Buffer.add_char buffer c;
610 |       code_span buffer nesting_level start_offset input lexbuf }
611 | 
612 |   | newline newline
613 |     { warning
614 |         input
615 |         (Parse_error.not_allowed
616 |           ~what:(Token.describe (`Blank_line "\n\n"))
617 |           ~in_what:(Token.describe (`Code_span "")));
618 |       Buffer.add_char buffer '\n';
619 |       code_span buffer nesting_level start_offset input lexbuf }
620 | 
621 |   | eof
622 |     { warning
623 |         input
624 |         (Parse_error.not_allowed
625 |           ~what:(Token.describe `End)
626 |           ~in_what:(Token.describe (`Code_span "")));
627 |       emit input (`Code_span (Buffer.contents buffer)) ~start_offset }
628 | 
629 |   | _ as c
630 |     { Buffer.add_char buffer c;
631 |       code_span buffer nesting_level start_offset input lexbuf }
632 | 
633 | and math kind buffer nesting_level start_offset input = parse
634 |   | '}'
635 |     { if nesting_level == 0 then
636 |         emit input (math_constr kind (Buffer.contents buffer)) ~start_offset
637 |       else begin
638 |         Buffer.add_char buffer '}';
639 |         math kind buffer (nesting_level - 1) start_offset input lexbuf
640 |       end
641 |       }
642 |   | '{'
643 |     { Buffer.add_char buffer '{';
644 |       math kind buffer (nesting_level + 1) start_offset input lexbuf }
645 |   | ("\\{" | "\\}") as s
646 |     { Buffer.add_string buffer s;
647 |       math kind buffer nesting_level start_offset input lexbuf }
648 |   | (newline) as s
649 |     {
650 |       match kind with
651 |       | Inline ->
652 |         warning
653 |           input
654 |           (Parse_error.not_allowed
655 |             ~what:(Token.describe (`Blank_line "\n"))
656 |             ~in_what:(Token.describe (math_constr kind "")));
657 |         Buffer.add_char buffer '\n';
658 |         math kind buffer nesting_level start_offset input lexbuf
659 |       | Block ->
660 |         Buffer.add_string buffer s;
661 |         math kind buffer nesting_level start_offset input lexbuf
662 |     }
663 |   | eof
664 |     { warning
665 |         input
666 |         (Parse_error.not_allowed
667 |           ~what:(Token.describe `End)
668 |           ~in_what:(Token.describe (math_constr kind "")));
669 |       emit input (math_constr kind (Buffer.contents buffer)) ~start_offset }
670 |   | _ as c
671 |     { Buffer.add_char buffer c;
672 |       math kind buffer nesting_level start_offset input lexbuf }
673 | 
674 | and verbatim buffer last_false_terminator start_offset input = parse
675 |   | (space_char as c) "v}"
676 |     { Buffer.add_char buffer c;
677 |       emit_verbatim input start_offset buffer }
678 | 
679 |   | "v}"
680 |     { Buffer.add_string buffer "v}";
681 |       verbatim
682 |         buffer (Some (Lexing.lexeme_start lexbuf)) start_offset input lexbuf }
683 | 
684 |   | eof
685 |     { begin match last_false_terminator with
686 |       | None ->
687 |         warning
688 |           input
689 |           (Parse_error.not_allowed
690 |             ~what:(Token.describe `End)
691 |             ~in_what:(Token.describe (`Verbatim "")))
692 |       | Some location ->
693 |         warning
694 |           input
695 |           ~start_offset:location
696 |           ~end_offset:(location + 2)
697 |           Parse_error.no_trailing_whitespace_in_verbatim
698 |       end;
699 |       emit_verbatim input start_offset buffer }
700 | 
701 |   | _ as c
702 |     { Buffer.add_char buffer c;
703 |       verbatim buffer last_false_terminator start_offset input lexbuf }
704 | 
705 | 
706 | 
707 | and bad_markup_recovery start_offset input = parse
708 |   | [^ '}']+ as text '}' as rest
709 |     { let suggestion =
710 |         Printf.sprintf "did you mean '{!%s}' or '[%s]'?" text text in
711 |       warning
712 |         input
713 |         ~start_offset
714 |         (Parse_error.bad_markup ("{" ^ rest) ~suggestion);
715 |       emit input (`Code_span text) ~start_offset}
716 | 
717 | (* The second field of the metadata.
718 |    This rule keeps whitespaces and newlines in the 'metadata' field except the
719 |    ones just before the '['. *)
720 | and code_block_metadata_tail input = parse
721 |  | (space_char+ as prefix)
722 |    ((space_char* (_ # space_char # ['['])+)+ as meta)
723 |    ((space_char* '[') as suffix)
724 |     {
725 |       let meta =
726 |         with_location_adjustments ~adjust_start_by:prefix ~adjust_end_by:suffix (fun _ -> Loc.at) input meta
727 |       in
728 |       `Ok (Some meta)
729 |     }
730 |   | (newline | horizontal_space)* '['
731 |     { `Ok None }
732 |   | _ as c
733 |     { `Invalid_char c }
734 |   | eof
735 |     { `Eof }
736 | 
737 | and code_block start_offset content_offset metadata prefix delim input = parse
738 |   | ("]" (delim_char* as delim') "[") as terminator
739 |     { if delim = delim'
740 |       then emit_code_block ~start_offset content_offset input metadata delim terminator prefix true
741 |       else
742 |         (Buffer.add_string prefix terminator;
743 |         code_block start_offset content_offset metadata prefix delim input lexbuf) }
744 |   | ("]" (delim_char* as delim') "}") as terminator
745 |     { 
746 |       if delim = delim'
747 |       then emit_code_block ~start_offset content_offset input metadata delim terminator prefix false
748 |       else (
749 |         Buffer.add_string prefix terminator;
750 |         code_block start_offset content_offset metadata prefix delim input lexbuf
751 |       )
752 |     }
753 |   | eof
754 |     {
755 |       warning input ~start_offset Parse_error.truncated_code_block;
756 |       emit_code_block ~start_offset content_offset input metadata delim "" prefix false
757 |     }
758 |   | (_ as c)
759 |     {
760 |       Buffer.add_char prefix c;
761 |       code_block start_offset content_offset metadata prefix delim input lexbuf
762 |     }
763 | 


--------------------------------------------------------------------------------
/src/loc.ml:
--------------------------------------------------------------------------------
 1 | type point = { line : int; column : int }
 2 | type span = { file : string; start : point; end_ : point }
 3 | type +'a with_location = { location : span; value : 'a }
 4 | 
 5 | let at location value = { location; value }
 6 | let location { location; _ } = location
 7 | let value { value; _ } = value
 8 | let map f annotated = { annotated with value = f annotated.value }
 9 | let same annotated value = { annotated with value }
10 | 
11 | let span spans =
12 |   match spans with
13 |   | [] ->
14 |       {
15 |         file = "_none_";
16 |         start = { line = 1; column = 0 };
17 |         end_ = { line = 1; column = 0 };
18 |       }
19 |   | first :: spans ->
20 |       let last = List.fold_left (fun _ span -> span) first spans in
21 |       { file = first.file; start = first.start; end_ = last.end_ }
22 | 
23 | let nudge_start offset span =
24 |   { span with start = { span.start with column = span.start.column + offset } }
25 | 
26 | let spans_multiple_lines = function
27 |   | {
28 |       location =
29 |         { start = { line = start_line; _ }; end_ = { line = end_line; _ }; _ };
30 |       _;
31 |     } ->
32 |       end_line > start_line
33 | 


--------------------------------------------------------------------------------
/src/loc.mli:
--------------------------------------------------------------------------------
 1 | (** Locations in files. *)
 2 | 
 3 | (** This module concerns locations in source files, both points indicating a specific
 4 |     character and spans between two points. *)
 5 | 
 6 | (** {2 Basic types} *)
 7 | 
 8 | type point = { line : int; column : int }
 9 | (** A specific character *)
10 | 
11 | type span = { file : string; start : point; end_ : point }
12 | (** A range of characters between [start] and [end_] in a particular file *)
13 | 
14 | val span : span list -> span
15 | (** [span spans] takes a list of spans and returns a single {!type-span} starting
16 |     at the start of the first span and ending at the end of the final span *)
17 | 
18 | val nudge_start : int -> span -> span
19 | (** This adjusts only the column number, implicitly assuming that the offset does
20 |    not move the location across a newline character. *)
21 | 
22 | (** {2 Located values} *)
23 | 
24 | type +'a with_location = { location : span; value : 'a }
25 | (** Describes values located at a particular span *)
26 | 
27 | val at : span -> 'a -> 'a with_location
28 | (** Constructor for {!with_location} *)
29 | 
30 | val location : 'a with_location -> span
31 | (** Returns the location of a located value *)
32 | 
33 | val value : 'a with_location -> 'a
34 | (** Returns the value of a located value *)
35 | 
36 | val map : ('a -> 'b) -> 'a with_location -> 'b with_location
37 | (** Map over a located value without changing its location *)
38 | 
39 | val same : _ with_location -> 'b -> 'b with_location
40 | (** [same x y] retuns the value y wrapped in a {!with_location} whose
41 |     location is that of [x] *)
42 | 
43 | val spans_multiple_lines : _ with_location -> bool
44 | (** [spans_multiple_lines x] checks to see whether [x] is located
45 |     on a single line or whether it covers more than one. *)
46 | 


--------------------------------------------------------------------------------
/src/odoc_parser.ml:
--------------------------------------------------------------------------------
  1 | module Ast = Ast
  2 | module Loc = Loc
  3 | module Warning = Warning
  4 | 
  5 | type t = {
  6 |   ast : Ast.t;
  7 |   warnings : Warning.t list;
  8 |   reversed_newlines : (int * int) list;
  9 |   original_pos : Lexing.position;
 10 | }
 11 | 
 12 | (* odoc uses an ocamllex lexer. The "engine" for such lexers is the standard
 13 |    [Lexing] module.
 14 | 
 15 |    As the [Lexing] module reads the input, it keeps track of only the byte
 16 |    offset into the input. It is normally the job of each particular lexer
 17 |    implementation to decide which character sequences count as newlines, and
 18 |    keep track of line/column locations. This is usually done by writing several
 19 |    extra regular expressions, and calling [Lexing.new_line] at the right time.
 20 | 
 21 |    Keeping track of newlines like this makes the odoc lexer somewhat too
 22 |    diffiult to read, however. To factor the aspect of keeping track of newlines
 23 |    fully out of the odoc lexer, instead of having it keep track of newlines as
 24 |    it's scanning the input, the input is pre-scanned before feeding it into the
 25 |    lexer. A table of all the newlines is assembled, and used to convert offsets
 26 |    into line/column pairs after the lexer emits tokens.
 27 | 
 28 |    [reversed_newlines ~input ~comment_location offset] returns a list of pairs
 29 |    of (line number * offset), allowing the easy conversion from the byte
 30 |    [offset], relative to the beginning of a comment, into a location, relative
 31 |    to the beginning of the file containing the comment. This can then be used
 32 |    to convert from byte offset to line number / column number - a Loc.point,
 33 |    and additionally for converting back from a Loc.point to a Lexing.position.
 34 | *)
 35 | 
 36 | let reversed_newlines : input:string -> (int * int) list =
 37 |  fun ~input ->
 38 |   let rec find_newlines line_number input_index newlines_accumulator =
 39 |     if input_index >= String.length input then newlines_accumulator
 40 |     else if
 41 |       (* This is good enough to detect CR-LF also. *)
 42 |       input.[input_index] = '\n'
 43 |     then
 44 |       find_newlines (line_number + 1) (input_index + 1)
 45 |         ((line_number + 1, input_index + 1) :: newlines_accumulator)
 46 |     else find_newlines line_number (input_index + 1) newlines_accumulator
 47 |   in
 48 |   find_newlines 1 0 [ (1, 0) ]
 49 | 
 50 | (* [offset_to_location] converts from an offset within the comment text, where
 51 |    [reversed_newlines] is the result of the above function and [comment_location]
 52 |    is the location of the comment within its file. The function is meant to be
 53 |    partially applied to its first two arguments, at which point it is passed to
 54 |    the lexer, so it can apply the table to its emitted tokens. *)
 55 | 
 56 | let offset_to_location :
 57 |     reversed_newlines:(int * int) list ->
 58 |     comment_location:Lexing.position ->
 59 |     int ->
 60 |     Loc.point =
 61 |  fun ~reversed_newlines ~comment_location byte_offset ->
 62 |   let rec scan_to_last_newline reversed_newlines_prefix =
 63 |     match reversed_newlines_prefix with
 64 |     | [] -> assert false
 65 |     | (line_in_comment, line_start_offset) :: prefix ->
 66 |         if line_start_offset > byte_offset then scan_to_last_newline prefix
 67 |         else
 68 |           let column_in_comment = byte_offset - line_start_offset in
 69 |           let line_in_file =
 70 |             line_in_comment + comment_location.Lexing.pos_lnum - 1
 71 |           in
 72 |           let column_in_file =
 73 |             if line_in_comment = 1 then
 74 |               column_in_comment + comment_location.Lexing.pos_cnum
 75 |               - comment_location.Lexing.pos_bol
 76 |             else column_in_comment
 77 |           in
 78 |           { Loc.line = line_in_file; column = column_in_file }
 79 |   in
 80 |   scan_to_last_newline reversed_newlines
 81 | 
 82 | (* Given a Loc.point and the result of [parse_comment], this function returns
 83 |    a valid Lexing.position *)
 84 | let position_of_point : t -> Loc.point -> Lexing.position =
 85 |  fun v point ->
 86 |   let { reversed_newlines; original_pos; _ } = v in
 87 |   let line_in_comment = point.Loc.line - original_pos.pos_lnum + 1 in
 88 |   let rec find_pos_bol reversed_newlines_prefix =
 89 |     match reversed_newlines_prefix with
 90 |     | [] -> assert false
 91 |     | [ _ ] -> original_pos.pos_bol
 92 |     | (line_number, line_start_offset) :: prefix ->
 93 |         if line_number > line_in_comment then find_pos_bol prefix
 94 |         else line_start_offset + original_pos.pos_cnum
 95 |   in
 96 |   let pos_bol = find_pos_bol reversed_newlines in
 97 |   let pos_lnum = point.Loc.line in
 98 |   let pos_cnum = point.column + pos_bol in
 99 |   let pos_fname = original_pos.pos_fname in
100 |   { Lexing.pos_bol; pos_lnum; pos_cnum; pos_fname }
101 | 
102 | (* The main entry point for this module *)
103 | let parse_comment ~location ~text =
104 |   let warnings = ref [] in
105 |   let reversed_newlines = reversed_newlines ~input:text in
106 |   let token_stream =
107 |     let lexbuf = Lexing.from_string text in
108 |     let offset_to_location =
109 |       offset_to_location ~reversed_newlines ~comment_location:location
110 |     in
111 |     let input : Lexer.input =
112 |       { file = location.Lexing.pos_fname; offset_to_location; warnings; lexbuf }
113 |     in
114 |     Stream.from (fun _token_index -> Some (Lexer.token input lexbuf))
115 |   in
116 |   let ast, warnings = Syntax.parse warnings token_stream in
117 |   { ast; warnings; reversed_newlines; original_pos = location }
118 | 
119 | (* Accessor functions, as [t] is opaque *)
120 | let warnings t = t.warnings
121 | let ast t = t.ast
122 | 


--------------------------------------------------------------------------------
/src/odoc_parser.mli:
--------------------------------------------------------------------------------
 1 | (** Parser for ocamldoc formatted comments. *)
 2 | 
 3 | type t
 4 | (** [type t] is the result of parsing. *)
 5 | 
 6 | val parse_comment : location:Lexing.position -> text:string -> t
 7 | (** [parse_comment ~location ~text] parses [text] as an ocamldoc formatted
 8 |     string. The parser will try to recover from any invalid syntax encountered,
 9 |     and therefore this will always produce a result without raising exceptions
10 |     with zero or more warnings. The location passed in should represent the
11 |     start of the {i content} of the documentation comment - so for a line such
12 |     as
13 |     {[
14 |       (** A comment starting in the first column (0) *)
15 |     ]}
16 |     the location should represent the space immediately before the [A], so the
17 |     in the 4th column (e.g. [{... pos_bol=0; pos_cnum=3 }]) *)
18 | 
19 | module Ast = Ast
20 | module Loc = Loc
21 | 
22 | (** Warnings produced during parsing. *)
23 | module Warning : sig
24 |   type t = Warning.t = { location : Loc.span; message : string }
25 |   (** Warnings are represented as record containing the human-readable text 
26 |       of the warning alongside the location of the offending text in the source *)
27 | 
28 |   val pp : Format.formatter -> t -> unit
29 |   (** Pretty printer for {!t} *)
30 | 
31 |   val to_string : t -> string
32 |   (** [to_string] will format the location and warning as text to be
33 |       printed. *)
34 | end
35 | 
36 | val warnings : t -> Warning.t list
37 | (** Extract any warnings from the parser result. *)
38 | 
39 | val ast : t -> Ast.t
40 | (** Extract the {!Ast.t} from the parser result. *)
41 | 
42 | val position_of_point : t -> Loc.point -> Lexing.position
43 | (** Helper function to turn the internal representation of positions back into
44 |     the usual representation in the Lexing module. Note that this relies on
45 |     the information passed in {!parse_comment}, and hence requires the result
46 |     of that call in addition to the {!Loc.point} being converted. *)
47 | 


--------------------------------------------------------------------------------
/src/parse_error.ml:
--------------------------------------------------------------------------------
 1 | let capitalize_ascii = Astring.String.Ascii.capitalize
 2 | 
 3 | let bad_markup : ?suggestion:string -> string -> Loc.span -> Warning.t =
 4 |  fun ?suggestion -> Warning.make ?suggestion "'%s': bad markup."
 5 | 
 6 | let leading_zero_in_heading_level : string -> Loc.span -> Warning.t =
 7 |   Warning.make "'%s': leading zero in heading level."
 8 | 
 9 | let should_not_be_empty : what:string -> Loc.span -> Warning.t =
10 |  fun ~what -> Warning.make "%s should not be empty." (capitalize_ascii what)
11 | 
12 | let markup_should_not_be_used : what:string -> Loc.span -> Warning.t =
13 |  fun ~what ->
14 |   Warning.make "%s should not be used because it has no effect."
15 |     (capitalize_ascii what)
16 | 
17 | let should_begin_on_its_own_line : what:string -> Loc.span -> Warning.t =
18 |  fun ~what ->
19 |   Warning.make "%s should begin on its own line." (capitalize_ascii what)
20 | 
21 | let should_be_followed_by_whitespace : what:string -> Loc.span -> Warning.t =
22 |  fun ~what ->
23 |   Warning.make "%s should be followed by space, a tab, or a new line."
24 |     (capitalize_ascii what)
25 | 
26 | let not_allowed :
27 |     ?suggestion:string -> what:string -> in_what:string -> Loc.span -> Warning.t
28 |     =
29 |  fun ?suggestion ~what ~in_what ->
30 |   Warning.make ?suggestion "%s is not allowed in %s." (capitalize_ascii what)
31 |     in_what
32 | 
33 | let unclosed_bracket :
34 |     ?suggestion:string -> bracket:string -> Loc.span -> Warning.t =
35 |  fun ?suggestion ~bracket ->
36 |   Warning.make ?suggestion "Open bracket '%s' is never closed." bracket
37 | 
38 | let no_leading_whitespace_in_verbatim : Loc.span -> Warning.t =
39 |   Warning.make "'{v' should be followed by whitespace."
40 | 
41 | let no_trailing_whitespace_in_verbatim : Loc.span -> Warning.t =
42 |   Warning.make "'v}' should be preceded by whitespace."
43 | 
44 | let stray_at : Loc.span -> Warning.t = Warning.make "Stray '@'."
45 | 
46 | let stray_cr : Loc.span -> Warning.t =
47 |   Warning.make "Stray '\\r' (carriage return character)."
48 | 
49 | let truncated_before : Loc.span -> Warning.t =
50 |   Warning.make "'@before' expects version number on the same line."
51 | 
52 | let truncated_param : Loc.span -> Warning.t =
53 |   Warning.make "'@param' expects parameter name on the same line."
54 | 
55 | let truncated_raise : string -> Loc.span -> Warning.t =
56 |   Warning.make "'%s' expects exception constructor on the same line."
57 | 
58 | let truncated_see : Loc.span -> Warning.t =
59 |   Warning.make
60 |     "'@see' should be followed by <url>, 'file', or \"document title\"."
61 | 
62 | let unknown_tag : string -> Loc.span -> Warning.t =
63 |   Warning.make "Unknown tag '%s'."
64 | 
65 | let unpaired_right_brace : Loc.span -> Warning.t =
66 |   Warning.make ~suggestion:"try '\\}'." "Unpaired '}' (end of markup)."
67 | 
68 | let unpaired_right_bracket : Loc.span -> Warning.t =
69 |   Warning.make ~suggestion:"try '\\]'." "Unpaired ']' (end of code)."
70 | 
71 | let no_language_tag_in_meta : Loc.span -> Warning.t =
72 |   Warning.make ~suggestion:"try '{[ ... ]}' or '{@ocaml[ ... ]}'."
73 |     "'{@' should be followed by a language tag."
74 | 
75 | let language_tag_invalid_char lang_tag : char -> Loc.span -> Warning.t =
76 |   let suggestion = "try '{@" ^ lang_tag ^ "[ ... ]}'." in
77 |   Warning.make ~suggestion "Invalid character '%c' in language tag."
78 | 
79 | let truncated_code_block_meta : Loc.span -> Warning.t =
80 |   Warning.make ~suggestion:"try '{@ocaml[ ... ]}'." "Missing end of code block."
81 | 
82 | let truncated_code_block : Loc.span -> Warning.t =
83 |   Warning.make ~suggestion:"add ']}'." "Missing end of code block."
84 | 


--------------------------------------------------------------------------------
/src/syntax.ml:
--------------------------------------------------------------------------------
   1 | (* This module is a recursive descent parser for the ocamldoc syntax. The parser
   2 |    consumes a token stream of type [Token.t Stream.t], provided by the lexer,
   3 |    and produces a comment AST of the type defined in [Parser_.Ast].
   4 | 
   5 |    The AST has two main levels: inline elements, which can appear inside
   6 |    paragraphs, and are spaced horizontally when presented, and block elements,
   7 |    such as paragraphs and lists, which are spaced vertically when presented.
   8 |    Block elements contain inline elements, but not vice versa.
   9 | 
  10 |    Corresponding to this, the parser has three "main" functions:
  11 | 
  12 |    - [delimited_inline_element_list] parses a run of inline elements that is
  13 |      delimited by curly brace markup ([{...}]).
  14 |    - [paragraph] parses a run of inline elements that make up a paragraph, and
  15 |      is not explicitly delimited with curly braces.
  16 |    - [block_element_list] parses a sequence of block elements. A comment is a
  17 |      sequence of block elements, so [block_element_list] is the top-level
  18 |      parser. It is also used for list item and tag content. *)
  19 | 
  20 | open! Compat
  21 | 
  22 | type 'a with_location = 'a Loc.with_location
  23 | 
  24 | (* {2 Input} *)
  25 | 
  26 | type input = {
  27 |   tokens : Token.t Loc.with_location Stream.t;
  28 |   warnings : Warning.t list ref;
  29 | }
  30 | 
  31 | (* {2 Output} *)
  32 | 
  33 | let add_warning input warning = input.warnings := warning :: !(input.warnings)
  34 | let junk input = Stream.junk input.tokens
  35 | 
  36 | let peek input =
  37 |   match Stream.peek input.tokens with
  38 |   | Some token -> token
  39 |   | None -> assert false
  40 | 
  41 | module Table = struct
  42 |   module Light_syntax = struct
  43 |     let valid_align = function
  44 |       | [ { Loc.value = `Word w; _ } ] -> (
  45 |           match String.length w with
  46 |           | 0 -> `Valid None
  47 |           | 1 -> (
  48 |               match w with
  49 |               | "-" -> `Valid None
  50 |               | ":" -> `Valid (Some `Center)
  51 |               | _ -> `Invalid)
  52 |           | len ->
  53 |               if String.for_all (Char.equal '-') (String.sub w 1 (len - 2)) then
  54 |                 match (String.get w 0, String.get w (len - 1)) with
  55 |                 | ':', ':' -> `Valid (Some `Center)
  56 |                 | ':', '-' -> `Valid (Some `Left)
  57 |                 | '-', ':' -> `Valid (Some `Right)
  58 |                 | '-', '-' -> `Valid None
  59 |                 | _ -> `Invalid
  60 |               else `Invalid)
  61 |       | _ -> `Invalid
  62 | 
  63 |     let valid_align_row lx =
  64 |       let rec loop acc = function
  65 |         | [] -> Some (List.rev acc)
  66 |         | x :: q -> (
  67 |             match valid_align x with
  68 |             | `Invalid -> None
  69 |             | `Valid alignment -> loop (alignment :: acc) q)
  70 |       in
  71 |       loop [] lx
  72 | 
  73 |     let create ~grid ~align : Ast.table =
  74 |       let cell_to_block (x, k) =
  75 |         let whole_loc = Loc.span (List.map (fun x -> x.Loc.location) x) in
  76 |         match x with
  77 |         | [] -> ([], k)
  78 |         | _ -> ([ Loc.at whole_loc (`Paragraph x) ], k)
  79 |       in
  80 |       let row_to_block = List.map cell_to_block in
  81 |       let grid_to_block = List.map row_to_block in
  82 |       ((grid_to_block grid, align), `Light)
  83 | 
  84 |     let with_kind kind : 'a with_location list list -> 'a Ast.row =
  85 |       List.map (fun c -> (c, kind))
  86 | 
  87 |     let from_raw_data grid : Ast.table =
  88 |       match grid with
  89 |       | [] -> create ~grid:[] ~align:None
  90 |       | row1 :: rows2_N -> (
  91 |           match valid_align_row row1 with
  92 |           (* If the first line is the align row, everything else is data. *)
  93 |           | Some _ as align ->
  94 |               create ~grid:(List.map (with_kind `Data) rows2_N) ~align
  95 |           | None -> (
  96 |               match rows2_N with
  97 |               (* Only 1 line, if this is not the align row this is data. *)
  98 |               | [] -> create ~grid:[ with_kind `Data row1 ] ~align:None
  99 |               | row2 :: rows3_N -> (
 100 |                   match valid_align_row row2 with
 101 |                   (* If the second line is the align row, the first one is the
 102 |                      header and the rest is data. *)
 103 |                   | Some _ as align ->
 104 |                       let header = with_kind `Header row1 in
 105 |                       let data = List.map (with_kind `Data) rows3_N in
 106 |                       create ~grid:(header :: data) ~align
 107 |                   (* No align row in the first 2 lines, everything is considered
 108 |                      data. *)
 109 |                   | None ->
 110 |                       create ~grid:(List.map (with_kind `Data) grid) ~align:None
 111 |                   )))
 112 |   end
 113 | 
 114 |   module Heavy_syntax = struct
 115 |     let create ~grid : Ast.table = ((grid, None), `Heavy)
 116 |     let from_grid grid : Ast.table = create ~grid
 117 |   end
 118 | end
 119 | 
 120 | module Reader = struct
 121 |   let until_rbrace input acc =
 122 |     let rec consume () =
 123 |       let next_token = peek input in
 124 |       match next_token.value with
 125 |       | `Right_brace ->
 126 |           junk input;
 127 |           `End (acc, next_token.location)
 128 |       | `Space _ | `Single_newline _ | `Blank_line _ ->
 129 |           junk input;
 130 |           consume ()
 131 |       | _ -> `Token next_token
 132 |     in
 133 |     consume ()
 134 | 
 135 |   module Infix = struct
 136 |     let ( >>> ) consume if_token =
 137 |       match consume with
 138 |       | `End (ret, loc) -> (ret, loc)
 139 |       | `Token t -> if_token t
 140 |   end
 141 | end
 142 | 
 143 | open Reader.Infix
 144 | 
 145 | (* The last token in the stream is always [`End], and it is never consumed by
 146 |    the parser, so the [None] case is impossible. *)
 147 | 
 148 | let npeek n input = Stream.npeek n input.tokens
 149 | 
 150 | (* {2 Non-link inline elements} *)
 151 | type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ]
 152 | 
 153 | (* Convenient abbreviation for use in patterns. *)
 154 | type token_that_always_begins_an_inline_element =
 155 |   [ `Word of string
 156 |   | `Code_span of string
 157 |   | `Raw_markup of string option * string
 158 |   | `Begin_style of style
 159 |   | `Simple_reference of string
 160 |   | `Begin_reference_with_replacement_text of string
 161 |   | `Simple_link of string
 162 |   | `Begin_link_with_replacement_text of string
 163 |   | `Math_span of string ]
 164 | 
 165 | (* Check that the token constructors above actually are all in [Token.t]. *)
 166 | let _check_subset : token_that_always_begins_an_inline_element -> Token.t =
 167 |  fun t -> (t :> Token.t)
 168 | 
 169 | (* Consumes tokens that make up a single non-link inline element:
 170 | 
 171 |    - a horizontal space ([`Space], significant in inline elements),
 172 |    - a word (see [word]),
 173 |    - a code span ([...], [`Code_span _]), or
 174 |    - styled text ({e ...}).
 175 | 
 176 |    The latter requires a recursive call to [delimited_inline_element_list],
 177 |    defined below.
 178 | 
 179 |    This should be part of [delimited_inline_element_list]; however, it is also
 180 |    called by function [paragraph]. As a result, it is factored out, and made
 181 |    mutually-recursive with [delimited_inline_element_list].
 182 | 
 183 |    This is called only when it is known that the first token in the list is the
 184 |    beginning of an inline element. In the case of [`Minus] and [`Plus], that
 185 |    means the caller has determined that they are not a list bullet (i.e., not
 186 |    the first non-whitespace tokens on their line).
 187 | 
 188 |    This function consumes exactly the tokens that make up the element. *)
 189 | let rec inline_element :
 190 |     input -> Loc.span -> _ -> Ast.inline_element with_location =
 191 |  fun input location next_token ->
 192 |   match next_token with
 193 |   | `Space _ as token ->
 194 |       junk input;
 195 |       Loc.at location token
 196 |   | `Word _ as token ->
 197 |       junk input;
 198 |       Loc.at location token
 199 |       (* This is actually the same memory representation as the token, complete
 200 |          with location, and is probably the most common case. Perhaps the token
 201 |          can be reused somehow. The same is true of [`Space], [`Code_span]. *)
 202 |   | `Minus ->
 203 |       junk input;
 204 |       Loc.at location (`Word "-")
 205 |   | `Plus ->
 206 |       junk input;
 207 |       Loc.at location (`Word "+")
 208 |   | `Bar ->
 209 |       junk input;
 210 |       Loc.at location (`Word "|")
 211 |   | (`Code_span _ | `Math_span _ | `Raw_markup _) as token ->
 212 |       junk input;
 213 |       Loc.at location token
 214 |   | `Begin_style s as parent_markup ->
 215 |       junk input;
 216 | 
 217 |       let requires_leading_whitespace =
 218 |         match s with
 219 |         | `Bold | `Italic | `Emphasis -> true
 220 |         | `Superscript | `Subscript -> false
 221 |       in
 222 |       let content, brace_location =
 223 |         delimited_inline_element_list ~parent_markup
 224 |           ~parent_markup_location:location ~requires_leading_whitespace input
 225 |       in
 226 | 
 227 |       let location = Loc.span [ location; brace_location ] in
 228 | 
 229 |       if content = [] then
 230 |         Parse_error.should_not_be_empty
 231 |           ~what:(Token.describe parent_markup)
 232 |           location
 233 |         |> add_warning input;
 234 | 
 235 |       Loc.at location (`Styled (s, content))
 236 |   | `Simple_reference r ->
 237 |       junk input;
 238 | 
 239 |       let r_location = Loc.nudge_start (String.length "{!") location in
 240 |       let r = Loc.at r_location r in
 241 | 
 242 |       Loc.at location (`Reference (`Simple, r, []))
 243 |   | `Begin_reference_with_replacement_text r as parent_markup ->
 244 |       junk input;
 245 | 
 246 |       let r_location = Loc.nudge_start (String.length "{{!") location in
 247 |       let r = Loc.at r_location r in
 248 | 
 249 |       let content, brace_location =
 250 |         delimited_inline_element_list ~parent_markup
 251 |           ~parent_markup_location:location ~requires_leading_whitespace:false
 252 |           input
 253 |       in
 254 | 
 255 |       let location = Loc.span [ location; brace_location ] in
 256 | 
 257 |       if content = [] then
 258 |         Parse_error.should_not_be_empty
 259 |           ~what:(Token.describe parent_markup)
 260 |           location
 261 |         |> add_warning input;
 262 | 
 263 |       Loc.at location (`Reference (`With_text, r, content))
 264 |   | `Simple_link u ->
 265 |       junk input;
 266 | 
 267 |       let u = String.trim u in
 268 | 
 269 |       if u = "" then
 270 |         Parse_error.should_not_be_empty
 271 |           ~what:(Token.describe next_token)
 272 |           location
 273 |         |> add_warning input;
 274 | 
 275 |       Loc.at location (`Link (u, []))
 276 |   | `Begin_link_with_replacement_text u as parent_markup ->
 277 |       junk input;
 278 | 
 279 |       let u = String.trim u in
 280 | 
 281 |       if u = "" then
 282 |         Parse_error.should_not_be_empty
 283 |           ~what:(Token.describe parent_markup)
 284 |           location
 285 |         |> add_warning input;
 286 | 
 287 |       let content, brace_location =
 288 |         delimited_inline_element_list ~parent_markup
 289 |           ~parent_markup_location:location ~requires_leading_whitespace:false
 290 |           input
 291 |       in
 292 | 
 293 |       `Link (u, content) |> Loc.at (Loc.span [ location; brace_location ])
 294 | 
 295 | (* Consumes tokens that make up a sequence of inline elements that is ended by
 296 |    a '}', a [`Right_brace] token. The brace token is also consumed.
 297 | 
 298 |    The sequences are also preceded by some markup like '{b'. Some of these
 299 |    markup tokens require whitespace immediately after the token, and others not.
 300 |    The caller indicates which way that is through the
 301 |    [~requires_leading_whitespace] argument.
 302 | 
 303 |    Whitespace is significant in inline element lists. In particular, "foo [bar]"
 304 |    is represented as [`Word "foo"; `Space; `Code_span "bar"], while "foo[bar]"
 305 |    is [`Word "foo"; `Code_span "bar"]. It doesn't matter how much whitespace is
 306 |    there, just whether it is present or not. Single newlines and horizontal
 307 |    space in any amount are allowed. Blank lines are not, as these are separators
 308 |    for {e block} elements.
 309 | 
 310 |    In correct input, the first and last elements emitted will not be [`Space],
 311 |    i.e. [`Space] appears only between other non-link inline elements. In
 312 |    incorrect input, there might be [`Space] followed immediately by something
 313 |    like an @author tag.
 314 | 
 315 |    The [~parent_markup] and [~parent_markup_location] arguments are used for
 316 |    generating error messages. *)
 317 | and delimited_inline_element_list :
 318 |     parent_markup:[< Token.t ] ->
 319 |     parent_markup_location:Loc.span ->
 320 |     requires_leading_whitespace:bool ->
 321 |     input ->
 322 |     Ast.inline_element with_location list * Loc.span =
 323 |  fun ~parent_markup ~parent_markup_location ~requires_leading_whitespace input ->
 324 |   (* [~at_start_of_line] is used to interpret [`Minus] and [`Plus]. These are
 325 |      word tokens if not the first non-whitespace tokens on their line. Then,
 326 |      they are allowed in a non-link element list. *)
 327 |   let rec consume_elements :
 328 |       at_start_of_line:bool ->
 329 |       Ast.inline_element with_location list ->
 330 |       Ast.inline_element with_location list * Loc.span =
 331 |    fun ~at_start_of_line acc ->
 332 |     let next_token = peek input in
 333 |     match next_token.value with
 334 |     | `Right_brace ->
 335 |         junk input;
 336 |         (List.rev acc, next_token.location)
 337 |     (* The [`Space] token is not space at the beginning or end of line, because
 338 |        that is combined into [`Single_newline] or [`Blank_line] tokens. It is
 339 |        also not at the beginning of markup (after e.g. '{b'), because that is
 340 |        handled separately before calling
 341 |        [consume_non_link_inline_elements], and not immediately before '}',
 342 |        because that is combined into the [`Right_brace] token by the lexer. So,
 343 |        it is an internal space, and we want to add it to the non-link inline
 344 |        element list. *)
 345 |     | (`Space _ | #token_that_always_begins_an_inline_element) as token ->
 346 |         let acc = inline_element input next_token.location token :: acc in
 347 |         consume_elements ~at_start_of_line:false acc
 348 |     | `Single_newline ws ->
 349 |         junk input;
 350 |         let element = Loc.same next_token (`Space ws) in
 351 |         consume_elements ~at_start_of_line:true (element :: acc)
 352 |     | `Blank_line ws as blank ->
 353 |         Parse_error.not_allowed ~what:(Token.describe blank)
 354 |           ~in_what:(Token.describe parent_markup)
 355 |           next_token.location
 356 |         |> add_warning input;
 357 | 
 358 |         junk input;
 359 |         let element = Loc.same next_token (`Space ws) in
 360 |         consume_elements ~at_start_of_line:true (element :: acc)
 361 |     | `Bar as token ->
 362 |         let acc = inline_element input next_token.location token :: acc in
 363 |         consume_elements ~at_start_of_line:false acc
 364 |     | (`Minus | `Plus) as bullet ->
 365 |         (if at_start_of_line then
 366 |          let suggestion =
 367 |            Printf.sprintf "move %s so it isn't the first thing on the line."
 368 |              (Token.print bullet)
 369 |          in
 370 |          Parse_error.not_allowed ~what:(Token.describe bullet)
 371 |            ~in_what:(Token.describe parent_markup)
 372 |            ~suggestion next_token.location
 373 |          |> add_warning input);
 374 | 
 375 |         let acc = inline_element input next_token.location bullet :: acc in
 376 |         consume_elements ~at_start_of_line:false acc
 377 |     | other_token ->
 378 |         Parse_error.not_allowed
 379 |           ~what:(Token.describe other_token)
 380 |           ~in_what:(Token.describe parent_markup)
 381 |           next_token.location
 382 |         |> add_warning input;
 383 | 
 384 |         let last_location =
 385 |           match acc with
 386 |           | last_token :: _ -> last_token.location
 387 |           | [] -> parent_markup_location
 388 |         in
 389 | 
 390 |         (List.rev acc, last_location)
 391 |   in
 392 | 
 393 |   let first_token = peek input in
 394 |   match first_token.value with
 395 |   | `Space _ ->
 396 |       junk input;
 397 |       consume_elements ~at_start_of_line:false []
 398 |       (* [~at_start_of_line] is [false] here because the preceding token was some
 399 |          some markup like '{b', and we didn't move to the next line, so the next
 400 |          token will not be the first non-whitespace token on its line. *)
 401 |   | `Single_newline _ ->
 402 |       junk input;
 403 |       consume_elements ~at_start_of_line:true []
 404 |   | `Blank_line _ as blank ->
 405 |       (* In case the markup is immediately followed by a blank line, the error
 406 |          message printed by the catch-all case below can be confusing, as it will
 407 |          suggest that the markup must be followed by a newline (which it is). It
 408 |          just must not be followed by two newlines. To explain that clearly,
 409 |          handle that case specifically. *)
 410 |       Parse_error.not_allowed ~what:(Token.describe blank)
 411 |         ~in_what:(Token.describe parent_markup)
 412 |         first_token.location
 413 |       |> add_warning input;
 414 | 
 415 |       junk input;
 416 |       consume_elements ~at_start_of_line:true []
 417 |   | `Right_brace ->
 418 |       junk input;
 419 |       ([], first_token.location)
 420 |   | _ ->
 421 |       if requires_leading_whitespace then
 422 |         Parse_error.should_be_followed_by_whitespace
 423 |           ~what:(Token.print parent_markup)
 424 |           parent_markup_location
 425 |         |> add_warning input;
 426 |       consume_elements ~at_start_of_line:false []
 427 | 
 428 | (* {2 Paragraphs} *)
 429 | 
 430 | (* Consumes tokens that make up a paragraph.
 431 | 
 432 |    A paragraph is a sequence of inline elements that ends on a blank line, or
 433 |    explicit block markup such as a verbatim block on a new line.
 434 | 
 435 |    Because of the significance of newlines, paragraphs are parsed line-by-line.
 436 |    The function [paragraph] is called only when the current token is the first
 437 |    non-whitespace token on its line, and begins an inline element. [paragraph]
 438 |    then parses a line of inline elements. Afterwards, it looks ahead to the next
 439 |    line. If that line also begins with an inline element, it parses that line,
 440 |    and so on. *)
 441 | let paragraph : input -> Ast.nestable_block_element with_location =
 442 |  fun input ->
 443 |   (* Parses a single line of a paragraph, consisting of inline elements. The
 444 |      only valid ways to end a paragraph line are with [`End], [`Single_newline],
 445 |      [`Blank_line], and [`Right_brace]. Everything else either belongs in the
 446 |      paragraph, or signifies an attempt to begin a block element inside a
 447 |      paragraph line, which is an error. These errors are caught elsewhere; the
 448 |      paragraph parser just stops. *)
 449 |   let rec paragraph_line :
 450 |       Ast.inline_element with_location list ->
 451 |       Ast.inline_element with_location list =
 452 |    fun acc ->
 453 |     let next_token = peek input in
 454 |     match next_token.value with
 455 |     | ( `Space _ | `Minus | `Plus | `Bar
 456 |       | #token_that_always_begins_an_inline_element ) as token ->
 457 |         let element = inline_element input next_token.location token in
 458 |         paragraph_line (element :: acc)
 459 |     | _ -> acc
 460 |   in
 461 | 
 462 |   (* After each line is parsed, decides whether to parse more lines. *)
 463 |   let rec additional_lines :
 464 |       Ast.inline_element with_location list ->
 465 |       Ast.inline_element with_location list =
 466 |    fun acc ->
 467 |     match npeek 2 input with
 468 |     | { value = `Single_newline ws; location }
 469 |       :: { value = #token_that_always_begins_an_inline_element | `Bar; _ }
 470 |       :: _ ->
 471 |         junk input;
 472 |         let acc = Loc.at location (`Space ws) :: acc in
 473 |         let acc = paragraph_line acc in
 474 |         additional_lines acc
 475 |     | _ -> List.rev acc
 476 |   in
 477 | 
 478 |   let elements = paragraph_line [] |> additional_lines in
 479 |   `Paragraph elements |> Loc.at (Loc.span (List.map Loc.location elements))
 480 | 
 481 | (* {2 Block elements} *)
 482 | 
 483 | (* {3 Helper types} *)
 484 | 
 485 | (* The interpretation of tokens in the block parser depends on where on a line
 486 |     each token appears. The six possible "locations" are:
 487 | 
 488 |     - [`At_start_of_line], when only whitespace has been read on the current
 489 |       line.
 490 |     - [`After_tag], when a valid tag token, such as [@deprecated], has been read,
 491 |       and only whitespace has been read since.
 492 |     - [`After_shorthand_bullet], when a valid shorthand list item bullet, such as
 493 |       [-], has been read, and only whitespace has been read since.
 494 |     - [`After_explicit_list_bullet], when a valid explicit bullet, such as [{li],
 495 |       has been read, and only whitespace has been read since.
 496 |     - [`After_table_cell], when a table cell opening markup ('{th' or '{td') has been read.
 497 |     - [`After_text], when any other valid non-whitespace token has already been
 498 |       read on the current line.
 499 | 
 500 |     Here are some examples of how this affects the interpretation of tokens:
 501 | 
 502 |     - A paragraph can start anywhere except [`After_text] (two paragraphs cannot
 503 |       be on the same line, but paragraphs can be nested in just about anything).
 504 |     - [`Minus] is interpreted as a list item bullet [`At_start_of_line],
 505 |       [`After_tag], and [`After_explicit_list_bullet].
 506 |     - Tags are only allowed [`At_start_of_line].
 507 | 
 508 |    To track the location accurately, the functions that make up the block parser
 509 |    pass explicit [where_in_line] values around and return them.
 510 | 
 511 |    In a few cases, [where_in_line] can be inferred from what helper was called.
 512 |    For example, the [paragraph] parser always stops on the same line as the last
 513 |    significant token that is in the paragraph it consumed, so the location must
 514 |    be [`After_text]. *)
 515 | type where_in_line =
 516 |   [ `At_start_of_line
 517 |   | `After_tag
 518 |   | `After_shorthand_bullet
 519 |   | `After_explicit_list_bullet
 520 |   | `After_table_cell
 521 |   | `After_text ]
 522 | 
 523 | (* The block parsing loop, function [block_element_list], stops when it
 524 |    encounters certain tokens.
 525 | 
 526 |    When it is called for the whole comment, or for in explicit list item
 527 |    ([{li foo}]), it can only stop on end of input or a right brace.
 528 | 
 529 |    When it is called inside a shorthand list item ([- foo]), it stops on end of
 530 |    input, right brace, a blank line (indicating end of shorthand list), plus or
 531 |    minus (indicating the start of the next liste item), or a section heading or
 532 |    tag, which cannot be nested in list markup.
 533 | 
 534 |    The block parser [block_element_list] explicitly returns the token that
 535 |    stopped it, with a type more precise than [Token.t stream_head]: if it was
 536 |    called for the whole comment or an explicit list item, the stop token will
 537 |    have type [stops_at_delimiters stream_head], and if it was called for a
 538 |    shorthand list item, the stop token will have type
 539 |    [implicit_stop stream_head]. This allows the calling parsers to write precise
 540 |    cases for exactly the tokens that might be at the front of the stream after
 541 |    the block parser returns. *)
 542 | type stops_at_delimiters = [ `End | `Right_brace ]
 543 | type code_stop = [ `End | `Right_code_delimiter ]
 544 | 
 545 | type stopped_implicitly =
 546 |   [ `End
 547 |   | `Blank_line of string
 548 |   | `Right_brace
 549 |   | `Minus
 550 |   | `Plus
 551 |   | Token.section_heading
 552 |   | Token.tag ]
 553 | 
 554 | (* Ensure that the above two types are really subsets of [Token.t]. *)
 555 | let _check_subset : stops_at_delimiters -> Token.t = fun t -> (t :> Token.t)
 556 | let _check_subset : stopped_implicitly -> Token.t = fun t -> (t :> Token.t)
 557 | 
 558 | (* The different contexts in which the block parser [block_element_list] can be
 559 |    called. The block parser's behavior depends somewhat on the context. For
 560 |    example, while paragraphs are allowed anywhere, shorthand lists are not
 561 |    allowed immediately inside other shorthand lists, while tags are not allowed
 562 |    anywhere except at the comment top level.
 563 | 
 564 |    Besides telling the block parser how to behave, each context also carries two
 565 |    types, which determine the return type of the block parser:
 566 | 
 567 |    - The type of blocks the parser returns. Note that [nestable_block_element]
 568 |      is included in [block_element]. However, the extra block kinds in
 569 |      [block_element] are only allowed at the comment top level.
 570 |    - The type of token that the block parser stops at. See discussion above. *)
 571 | type ('block, 'stops_at_which_tokens) context =
 572 |   | Top_level : (Ast.block_element, stops_at_delimiters) context
 573 |   | In_shorthand_list : (Ast.nestable_block_element, stopped_implicitly) context
 574 |   | In_explicit_list : (Ast.nestable_block_element, stops_at_delimiters) context
 575 |   | In_table_cell : (Ast.nestable_block_element, stops_at_delimiters) context
 576 |   | In_code_results : (Ast.nestable_block_element, code_stop) context
 577 |   | In_tag : (Ast.nestable_block_element, Token.t) context
 578 | 
 579 | (* This is a no-op. It is needed to prove to the type system that nestable block
 580 |    elements are acceptable block elements in all contexts. *)
 581 | let accepted_in_all_contexts :
 582 |     type block stops_at_which_tokens.
 583 |     (block, stops_at_which_tokens) context ->
 584 |     Ast.nestable_block_element ->
 585 |     block =
 586 |  fun context block ->
 587 |   match context with
 588 |   | Top_level -> (block :> Ast.block_element)
 589 |   | In_shorthand_list -> block
 590 |   | In_explicit_list -> block
 591 |   | In_table_cell -> block
 592 |   | In_code_results -> block
 593 |   | In_tag -> block
 594 | 
 595 | (* Converts a tag to a series of words. This is used in error recovery, when a
 596 |    tag cannot be generated. *)
 597 | let tag_to_words = function
 598 |   | `Author s -> [ `Word "@author"; `Space " "; `Word s ]
 599 |   | `Before s -> [ `Word "@before"; `Space " "; `Word s ]
 600 |   | `Canonical s -> [ `Word "@canonical"; `Space " "; `Word s ]
 601 |   | `Deprecated -> [ `Word "@deprecated" ]
 602 |   | `Inline -> [ `Word "@inline" ]
 603 |   | `Open -> [ `Word "@open" ]
 604 |   | `Closed -> [ `Word "@closed" ]
 605 |   | `Hidden -> [ `Word "@hidden" ]
 606 |   | `Param s -> [ `Word "@param"; `Space " "; `Word s ]
 607 |   | `Raise s -> [ `Word "@raise"; `Space " "; `Word s ]
 608 |   | `Return -> [ `Word "@return" ]
 609 |   | `See (`Document, s) -> [ `Word "@see"; `Space " "; `Word ("\"" ^ s ^ "\"") ]
 610 |   | `See (`File, s) -> [ `Word "@see"; `Space " "; `Word ("'" ^ s ^ "'") ]
 611 |   | `See (`Url, s) -> [ `Word "@see"; `Space " "; `Word ("<" ^ s ^ ">") ]
 612 |   | `Since s -> [ `Word "@since"; `Space " "; `Word s ]
 613 |   | `Version s -> [ `Word "@version"; `Space " "; `Word s ]
 614 | 
 615 | (* {3 Block element lists} *)
 616 | 
 617 | (* Consumes tokens making up a sequence of block elements. These are:
 618 | 
 619 |    - paragraphs,
 620 |    - code blocks,
 621 |    - verbatim text blocks,
 622 |    - tables,
 623 |    - lists, and
 624 |    - section headings. *)
 625 | let rec block_element_list :
 626 |     type block stops_at_which_tokens.
 627 |     (block, stops_at_which_tokens) context ->
 628 |     parent_markup:[< Token.t | `Comment ] ->
 629 |     input ->
 630 |     block with_location list
 631 |     * stops_at_which_tokens with_location
 632 |     * where_in_line =
 633 |  fun context ~parent_markup input ->
 634 |   let rec consume_block_elements :
 635 |       parsed_a_tag:bool ->
 636 |       where_in_line ->
 637 |       block with_location list ->
 638 |       block with_location list
 639 |       * stops_at_which_tokens with_location
 640 |       * where_in_line =
 641 |    fun ~parsed_a_tag where_in_line acc ->
 642 |     let describe token =
 643 |       match token with
 644 |       | #token_that_always_begins_an_inline_element -> "paragraph"
 645 |       | _ -> Token.describe token
 646 |     in
 647 | 
 648 |     let warn_if_after_text { Loc.location; value = token } =
 649 |       if where_in_line = `After_text then
 650 |         Parse_error.should_begin_on_its_own_line ~what:(describe token) location
 651 |         |> add_warning input
 652 |     in
 653 | 
 654 |     let warn_if_after_tags { Loc.location; value = token } =
 655 |       if parsed_a_tag then
 656 |         let suggestion =
 657 |           Printf.sprintf "move %s before any tags." (Token.describe token)
 658 |         in
 659 |         Parse_error.not_allowed ~what:(describe token)
 660 |           ~in_what:"the tags section" ~suggestion location
 661 |         |> add_warning input
 662 |     in
 663 | 
 664 |     let warn_because_not_at_top_level { Loc.location; value = token } =
 665 |       let suggestion =
 666 |         Printf.sprintf "move %s outside of any other markup."
 667 |           (Token.print token)
 668 |       in
 669 |       Parse_error.not_allowed ~what:(Token.describe token)
 670 |         ~in_what:(Token.describe parent_markup)
 671 |         ~suggestion location
 672 |       |> add_warning input
 673 |     in
 674 | 
 675 |     match peek input with
 676 |     (* Terminators: the two tokens that terminate anything. *)
 677 |     | { value = `End; _ } as next_token -> (
 678 |         match context with
 679 |         | Top_level -> (List.rev acc, next_token, where_in_line)
 680 |         | In_shorthand_list -> (List.rev acc, next_token, where_in_line)
 681 |         | In_explicit_list -> (List.rev acc, next_token, where_in_line)
 682 |         | In_tag -> (List.rev acc, next_token, where_in_line)
 683 |         | In_table_cell -> (List.rev acc, next_token, where_in_line)
 684 |         | In_code_results -> (List.rev acc, next_token, where_in_line))
 685 |     | { value = `Right_brace; _ } as next_token -> (
 686 |         (* This little absurdity is needed to satisfy the type system. Without it,
 687 |            OCaml is unable to prove that [stream_head] has the right type for all
 688 |            possible values of [context]. *)
 689 |         match context with
 690 |         | Top_level -> (List.rev acc, next_token, where_in_line)
 691 |         | In_shorthand_list -> (List.rev acc, next_token, where_in_line)
 692 |         | In_explicit_list -> (List.rev acc, next_token, where_in_line)
 693 |         | In_table_cell -> (List.rev acc, next_token, where_in_line)
 694 |         | In_tag -> (List.rev acc, next_token, where_in_line)
 695 |         | In_code_results ->
 696 |             junk input;
 697 |             consume_block_elements ~parsed_a_tag where_in_line acc)
 698 |     | { value = `Right_code_delimiter; _ } as next_token -> (
 699 |         match context with
 700 |         | In_code_results -> (List.rev acc, next_token, where_in_line)
 701 |         | _ ->
 702 |             junk input;
 703 |             consume_block_elements ~parsed_a_tag where_in_line acc)
 704 |     (* Whitespace. This can terminate some kinds of block elements. It is also
 705 |        necessary to track it to interpret [`Minus] and [`Plus] correctly, as
 706 |        well as to ensure that all block elements begin on their own line. *)
 707 |     | { value = `Space _; _ } ->
 708 |         junk input;
 709 |         consume_block_elements ~parsed_a_tag where_in_line acc
 710 |     | { value = `Single_newline _; _ } ->
 711 |         junk input;
 712 |         consume_block_elements ~parsed_a_tag `At_start_of_line acc
 713 |     | { value = `Blank_line _; _ } as next_token -> (
 714 |         match context with
 715 |         (* Blank lines terminate shorthand lists ([- foo]). They also terminate
 716 |            paragraphs, but the paragraph parser is aware of that internally. *)
 717 |         | In_shorthand_list -> (List.rev acc, next_token, where_in_line)
 718 |         (* Otherwise, blank lines are pretty much like single newlines. *)
 719 |         | _ ->
 720 |             junk input;
 721 |             consume_block_elements ~parsed_a_tag `At_start_of_line acc)
 722 |     (* Explicit list items ([{li ...}] and [{- ...}]) can never appear directly
 723 |        in block content. They can only appear inside [{ul ...}] and [{ol ...}].
 724 |        So, catch those. *)
 725 |     | { value = `Begin_list_item _ as token; location } ->
 726 |         let suggestion =
 727 |           Printf.sprintf "move %s into %s, or use %s." (Token.print token)
 728 |             (Token.describe (`Begin_list `Unordered))
 729 |             (Token.describe `Minus)
 730 |         in
 731 |         Parse_error.not_allowed ~what:(Token.describe token)
 732 |           ~in_what:(Token.describe parent_markup)
 733 |           ~suggestion location
 734 |         |> add_warning input;
 735 | 
 736 |         junk input;
 737 |         consume_block_elements ~parsed_a_tag where_in_line acc
 738 |     (* Table rows ([{tr ...}]) can never appear directly
 739 |        in block content. They can only appear inside [{table ...}]. *)
 740 |     | { value = `Begin_table_row as token; location } ->
 741 |         let suggestion =
 742 |           Printf.sprintf "move %s into %s." (Token.print token)
 743 |             (Token.describe `Begin_table_heavy)
 744 |         in
 745 |         Parse_error.not_allowed ~what:(Token.describe token)
 746 |           ~in_what:(Token.describe parent_markup)
 747 |           ~suggestion location
 748 |         |> add_warning input;
 749 |         junk input;
 750 |         consume_block_elements ~parsed_a_tag where_in_line acc
 751 |     (* Table cells ([{th ...}] and [{td ...}]) can never appear directly
 752 |        in block content. They can only appear inside [{tr ...}]. *)
 753 |     | { value = `Begin_table_cell _ as token; location } ->
 754 |         let suggestion =
 755 |           Printf.sprintf "move %s into %s." (Token.print token)
 756 |             (Token.describe `Begin_table_row)
 757 |         in
 758 |         Parse_error.not_allowed ~what:(Token.describe token)
 759 |           ~in_what:(Token.describe parent_markup)
 760 |           ~suggestion location
 761 |         |> add_warning input;
 762 |         junk input;
 763 |         consume_block_elements ~parsed_a_tag where_in_line acc
 764 |     (* Tags. These can appear at the top level only. Also, once one tag is seen,
 765 |        the only top-level elements allowed are more tags. *)
 766 |     | { value = `Tag tag as token; location } as next_token -> (
 767 |         let recover_when_not_at_top_level context =
 768 |           warn_because_not_at_top_level next_token;
 769 |           junk input;
 770 |           let words = List.map (Loc.at location) (tag_to_words tag) in
 771 |           let paragraph =
 772 |             `Paragraph words
 773 |             |> accepted_in_all_contexts context
 774 |             |> Loc.at location
 775 |           in
 776 |           consume_block_elements ~parsed_a_tag `At_start_of_line
 777 |             (paragraph :: acc)
 778 |         in
 779 | 
 780 |         match context with
 781 |         (* Tags cannot make sense in an explicit list ([{ul {li ...}}]). *)
 782 |         | In_explicit_list -> recover_when_not_at_top_level context
 783 |         (* If a tag starts at the beginning of a line, it terminates the preceding
 784 |            tag and/or the current shorthand list. In this case, return to the
 785 |            caller, and let the caller decide how to interpret the tag token. *)
 786 |         | In_shorthand_list ->
 787 |             if where_in_line = `At_start_of_line then
 788 |               (List.rev acc, next_token, where_in_line)
 789 |             else recover_when_not_at_top_level context
 790 |         | In_table_cell -> recover_when_not_at_top_level context
 791 |         | In_tag ->
 792 |             if where_in_line = `At_start_of_line then
 793 |               (List.rev acc, next_token, where_in_line)
 794 |             else recover_when_not_at_top_level context
 795 |         | In_code_results -> recover_when_not_at_top_level context
 796 |         (* If this is the top-level call to [block_element_list], parse the
 797 |            tag. *)
 798 |         | Top_level -> (
 799 |             if where_in_line <> `At_start_of_line then
 800 |               Parse_error.should_begin_on_its_own_line
 801 |                 ~what:(Token.describe token) location
 802 |               |> add_warning input;
 803 | 
 804 |             junk input;
 805 | 
 806 |             match tag with
 807 |             | (`Author s | `Since s | `Version s | `Canonical s) as tag ->
 808 |                 let s = String.trim s in
 809 |                 if s = "" then
 810 |                   Parse_error.should_not_be_empty ~what:(Token.describe token)
 811 |                     location
 812 |                   |> add_warning input;
 813 |                 let tag =
 814 |                   match tag with
 815 |                   | `Author _ -> `Author s
 816 |                   | `Since _ -> `Since s
 817 |                   | `Version _ -> `Version s
 818 |                   | `Canonical _ ->
 819 |                       (* TODO The location is only approximate, as we need lexer
 820 |                          cooperation to get the real location. *)
 821 |                       let r_location =
 822 |                         Loc.nudge_start (String.length "@canonical ") location
 823 |                       in
 824 |                       `Canonical (Loc.at r_location s)
 825 |                 in
 826 | 
 827 |                 let tag = Loc.at location (`Tag tag) in
 828 |                 consume_block_elements ~parsed_a_tag:true `After_text
 829 |                   (tag :: acc)
 830 |             | (`Deprecated | `Return) as tag ->
 831 |                 let content, _stream_head, where_in_line =
 832 |                   block_element_list In_tag ~parent_markup:token input
 833 |                 in
 834 |                 let tag =
 835 |                   match tag with
 836 |                   | `Deprecated -> `Deprecated content
 837 |                   | `Return -> `Return content
 838 |                 in
 839 |                 let location =
 840 |                   location :: List.map Loc.location content |> Loc.span
 841 |                 in
 842 |                 let tag = Loc.at location (`Tag tag) in
 843 |                 consume_block_elements ~parsed_a_tag:true where_in_line
 844 |                   (tag :: acc)
 845 |             | (`Param _ | `Raise _ | `Before _) as tag ->
 846 |                 let content, _stream_head, where_in_line =
 847 |                   block_element_list In_tag ~parent_markup:token input
 848 |                 in
 849 |                 let tag =
 850 |                   match tag with
 851 |                   | `Param s -> `Param (s, content)
 852 |                   | `Raise s -> `Raise (s, content)
 853 |                   | `Before s -> `Before (s, content)
 854 |                 in
 855 |                 let location =
 856 |                   location :: List.map Loc.location content |> Loc.span
 857 |                 in
 858 |                 let tag = Loc.at location (`Tag tag) in
 859 |                 consume_block_elements ~parsed_a_tag:true where_in_line
 860 |                   (tag :: acc)
 861 |             | `See (kind, target) ->
 862 |                 let content, _next_token, where_in_line =
 863 |                   block_element_list In_tag ~parent_markup:token input
 864 |                 in
 865 |                 let location =
 866 |                   location :: List.map Loc.location content |> Loc.span
 867 |                 in
 868 |                 let tag = `Tag (`See (kind, target, content)) in
 869 |                 let tag = Loc.at location tag in
 870 |                 consume_block_elements ~parsed_a_tag:true where_in_line
 871 |                   (tag :: acc)
 872 |             | (`Inline | `Open | `Closed | `Hidden) as tag ->
 873 |                 let tag = Loc.at location (`Tag tag) in
 874 |                 consume_block_elements ~parsed_a_tag:true `After_text
 875 |                   (tag :: acc)))
 876 |     | ( { value = #token_that_always_begins_an_inline_element; _ }
 877 |       | { value = `Bar; _ } ) as next_token ->
 878 |         warn_if_after_tags next_token;
 879 |         warn_if_after_text next_token;
 880 | 
 881 |         let block = paragraph input in
 882 |         let block = Loc.map (accepted_in_all_contexts context) block in
 883 |         let acc = block :: acc in
 884 |         consume_block_elements ~parsed_a_tag `After_text acc
 885 |     | { value = `Verbatim s as token; location } as next_token ->
 886 |         warn_if_after_tags next_token;
 887 |         warn_if_after_text next_token;
 888 |         if s = "" then
 889 |           Parse_error.should_not_be_empty ~what:(Token.describe token) location
 890 |           |> add_warning input;
 891 | 
 892 |         junk input;
 893 |         let block = accepted_in_all_contexts context token in
 894 |         let block = Loc.at location block in
 895 |         let acc = block :: acc in
 896 |         consume_block_elements ~parsed_a_tag `After_text acc
 897 |     | { value = `Math_block s as token; location } as next_token ->
 898 |         warn_if_after_tags next_token;
 899 |         warn_if_after_text next_token;
 900 |         if s = "" then
 901 |           Parse_error.should_not_be_empty ~what:(Token.describe token) location
 902 |           |> add_warning input;
 903 | 
 904 |         junk input;
 905 |         let block = accepted_in_all_contexts context token in
 906 |         let block = Loc.at location block in
 907 |         let acc = block :: acc in
 908 |         consume_block_elements ~parsed_a_tag `After_text acc
 909 |     | {
 910 |         value =
 911 |           `Code_block (meta, delim, { value = s; location = v_loc }, has_outputs)
 912 |           as token;
 913 |         location;
 914 |       } as next_token ->
 915 |         warn_if_after_tags next_token;
 916 |         warn_if_after_text next_token;
 917 |         junk input;
 918 |         let delimiter = if delim = "" then None else Some delim in
 919 |         let output, location =
 920 |           if not has_outputs then (None, location)
 921 |           else
 922 |             let content, next_token, _where_in_line =
 923 |               block_element_list In_code_results ~parent_markup:token input
 924 |             in
 925 |             junk input;
 926 |             let locations =
 927 |               location :: List.map (fun content -> content.Loc.location) content
 928 |             in
 929 |             let location = Loc.span locations in
 930 |             let location = { location with end_ = next_token.location.end_ } in
 931 |             (Some content, location)
 932 |         in
 933 | 
 934 |         if s = "" then
 935 |           Parse_error.should_not_be_empty ~what:(Token.describe token) location
 936 |           |> add_warning input;
 937 | 
 938 |         let meta =
 939 |           match meta with
 940 |           | None -> None
 941 |           | Some (language, tags) -> Some { Ast.language; tags }
 942 |         in
 943 |         let block =
 944 |           accepted_in_all_contexts context
 945 |             (`Code_block
 946 |               {
 947 |                 Ast.meta;
 948 |                 delimiter;
 949 |                 content = { value = s; location = v_loc };
 950 |                 output;
 951 |               })
 952 |         in
 953 |         let block = Loc.at location block in
 954 |         let acc = block :: acc in
 955 |         consume_block_elements ~parsed_a_tag `After_text acc
 956 |     | { value = `Modules s as token; location } as next_token ->
 957 |         warn_if_after_tags next_token;
 958 |         warn_if_after_text next_token;
 959 | 
 960 |         junk input;
 961 | 
 962 |         (* TODO Use some library for a splitting function, or move this out into a
 963 |            Util module. *)
 964 |         let split_string delimiters s =
 965 |           let rec scan_delimiters acc index =
 966 |             if index >= String.length s then List.rev acc
 967 |             else if String.contains delimiters s.[index] then
 968 |               scan_delimiters acc (index + 1)
 969 |             else scan_word acc index (index + 1)
 970 |           and scan_word acc start_index index =
 971 |             if index >= String.length s then
 972 |               let word = String.sub s start_index (index - start_index) in
 973 |               List.rev (word :: acc)
 974 |             else if String.contains delimiters s.[index] then
 975 |               let word = String.sub s start_index (index - start_index) in
 976 |               scan_delimiters (word :: acc) (index + 1)
 977 |             else scan_word acc start_index (index + 1)
 978 |           in
 979 | 
 980 |           scan_delimiters [] 0
 981 |         in
 982 | 
 983 |         (* TODO Correct locations await a full implementation of {!modules}
 984 |            parsing. *)
 985 |         let modules =
 986 |           split_string " \t\r\n" s |> List.map (fun r -> Loc.at location r)
 987 |         in
 988 | 
 989 |         if modules = [] then
 990 |           Parse_error.should_not_be_empty ~what:(Token.describe token) location
 991 |           |> add_warning input;
 992 | 
 993 |         let block = accepted_in_all_contexts context (`Modules modules) in
 994 |         let block = Loc.at location block in
 995 |         let acc = block :: acc in
 996 |         consume_block_elements ~parsed_a_tag `After_text acc
 997 |     | { value = `Begin_list kind as token; location } as next_token ->
 998 |         warn_if_after_tags next_token;
 999 |         warn_if_after_text next_token;
1000 | 
1001 |         junk input;
1002 | 
1003 |         let items, brace_location =
1004 |           explicit_list_items ~parent_markup:token input
1005 |         in
1006 |         if items = [] then
1007 |           Parse_error.should_not_be_empty ~what:(Token.describe token) location
1008 |           |> add_warning input;
1009 | 
1010 |         let location = Loc.span [ location; brace_location ] in
1011 |         let block = `List (kind, `Heavy, items) in
1012 |         let block = accepted_in_all_contexts context block in
1013 |         let block = Loc.at location block in
1014 |         let acc = block :: acc in
1015 |         consume_block_elements ~parsed_a_tag `After_text acc
1016 |     | { value = (`Begin_table_light | `Begin_table_heavy) as token; location }
1017 |       as next_token ->
1018 |         warn_if_after_tags next_token;
1019 |         warn_if_after_text next_token;
1020 |         junk input;
1021 |         let block, brace_location =
1022 |           let parent_markup = token in
1023 |           let parent_markup_location = location in
1024 |           match token with
1025 |           | `Begin_table_light ->
1026 |               light_table input ~parent_markup ~parent_markup_location
1027 |           | `Begin_table_heavy ->
1028 |               heavy_table input ~parent_markup ~parent_markup_location
1029 |         in
1030 |         let location = Loc.span [ location; brace_location ] in
1031 |         let block = accepted_in_all_contexts context (`Table block) in
1032 |         let block = Loc.at location block in
1033 |         let acc = block :: acc in
1034 |         consume_block_elements ~parsed_a_tag `After_text acc
1035 |     | { value = (`Minus | `Plus) as token; location } as next_token -> (
1036 |         (match where_in_line with
1037 |         | `After_text | `After_shorthand_bullet ->
1038 |             Parse_error.should_begin_on_its_own_line
1039 |               ~what:(Token.describe token) location
1040 |             |> add_warning input
1041 |         | _ -> ());
1042 | 
1043 |         warn_if_after_tags next_token;
1044 | 
1045 |         match context with
1046 |         | In_shorthand_list -> (List.rev acc, next_token, where_in_line)
1047 |         | _ ->
1048 |             let items, where_in_line =
1049 |               shorthand_list_items next_token where_in_line input
1050 |             in
1051 |             let kind =
1052 |               match token with `Minus -> `Unordered | `Plus -> `Ordered
1053 |             in
1054 |             let location =
1055 |               location :: List.map Loc.location (List.flatten items) |> Loc.span
1056 |             in
1057 |             let block = `List (kind, `Light, items) in
1058 |             let block = accepted_in_all_contexts context block in
1059 |             let block = Loc.at location block in
1060 |             let acc = block :: acc in
1061 |             consume_block_elements ~parsed_a_tag where_in_line acc)
1062 |     | { value = `Begin_section_heading (level, label) as token; location } as
1063 |       next_token -> (
1064 |         warn_if_after_tags next_token;
1065 | 
1066 |         let recover_when_not_at_top_level context =
1067 |           warn_because_not_at_top_level next_token;
1068 |           junk input;
1069 |           let content, brace_location =
1070 |             delimited_inline_element_list ~parent_markup:token
1071 |               ~parent_markup_location:location ~requires_leading_whitespace:true
1072 |               input
1073 |           in
1074 |           let location = Loc.span [ location; brace_location ] in
1075 |           let paragraph =
1076 |             `Paragraph content
1077 |             |> accepted_in_all_contexts context
1078 |             |> Loc.at location
1079 |           in
1080 |           consume_block_elements ~parsed_a_tag `At_start_of_line
1081 |             (paragraph :: acc)
1082 |         in
1083 | 
1084 |         match context with
1085 |         | In_shorthand_list ->
1086 |             if where_in_line = `At_start_of_line then
1087 |               (List.rev acc, next_token, where_in_line)
1088 |             else recover_when_not_at_top_level context
1089 |         | In_explicit_list -> recover_when_not_at_top_level context
1090 |         | In_table_cell -> recover_when_not_at_top_level context
1091 |         | In_tag -> recover_when_not_at_top_level context
1092 |         | In_code_results -> recover_when_not_at_top_level context
1093 |         | Top_level ->
1094 |             if where_in_line <> `At_start_of_line then
1095 |               Parse_error.should_begin_on_its_own_line
1096 |                 ~what:(Token.describe token) location
1097 |               |> add_warning input;
1098 | 
1099 |             let label =
1100 |               match label with
1101 |               | Some "" ->
1102 |                   Parse_error.should_not_be_empty ~what:"heading label" location
1103 |                   |> add_warning input;
1104 |                   None
1105 |               | _ -> label
1106 |             in
1107 | 
1108 |             junk input;
1109 | 
1110 |             let content, brace_location =
1111 |               delimited_inline_element_list ~parent_markup:token
1112 |                 ~parent_markup_location:location
1113 |                 ~requires_leading_whitespace:true input
1114 |             in
1115 |             if content = [] then
1116 |               Parse_error.should_not_be_empty ~what:(Token.describe token)
1117 |                 location
1118 |               |> add_warning input;
1119 | 
1120 |             let location = Loc.span [ location; brace_location ] in
1121 |             let heading = `Heading (level, label, content) in
1122 |             let heading = Loc.at location heading in
1123 |             let acc = heading :: acc in
1124 |             consume_block_elements ~parsed_a_tag `After_text acc)
1125 |     | { value = `Begin_paragraph_style _ as token; location } ->
1126 |         junk input;
1127 |         let content, brace_location =
1128 |           delimited_inline_element_list ~parent_markup:token
1129 |             ~parent_markup_location:location ~requires_leading_whitespace:true
1130 |             input
1131 |         in
1132 |         let location = Loc.span [ location; brace_location ] in
1133 | 
1134 |         Parse_error.markup_should_not_be_used ~what:(Token.describe token)
1135 |           location
1136 |         |> add_warning input;
1137 | 
1138 |         let paragraph =
1139 |           `Paragraph content
1140 |           |> accepted_in_all_contexts context
1141 |           |> Loc.at location
1142 |         in
1143 |         consume_block_elements ~parsed_a_tag `At_start_of_line (paragraph :: acc)
1144 |   in
1145 | 
1146 |   let where_in_line =
1147 |     match context with
1148 |     | Top_level -> `At_start_of_line
1149 |     | In_shorthand_list -> `After_shorthand_bullet
1150 |     | In_explicit_list -> `After_explicit_list_bullet
1151 |     | In_table_cell -> `After_table_cell
1152 |     | In_code_results -> `After_tag
1153 |     | In_tag -> `After_tag
1154 |   in
1155 | 
1156 |   consume_block_elements ~parsed_a_tag:false where_in_line []
1157 | 
1158 | (* {3 Lists} *)
1159 | 
1160 | (* Consumes a sequence of implicit list items. Each one consists of a [`Minus]
1161 |    or [`Plus] token, followed by block elements until:
1162 | 
1163 |    - a blank line, or
1164 |    - a list bullet of the opposite kind (e.g. [`Plus] for a [`Minus] list).
1165 | 
1166 |    This function is called when the next token is known to be [`Minus] or
1167 |    [`Plus]. It consumes that token, and calls the block element parser (see
1168 |    above). That parser returns to [implicit_list_items] only on [`Blank_line],
1169 |    [`End], [`Minus] or [`Plus] at the start of a line, or [`Right_brace]. *)
1170 | and shorthand_list_items :
1171 |     [ `Minus | `Plus ] with_location ->
1172 |     where_in_line ->
1173 |     input ->
1174 |     Ast.nestable_block_element with_location list list * where_in_line =
1175 |  fun first_token where_in_line input ->
1176 |   let bullet_token = first_token.value in
1177 | 
1178 |   let rec consume_list_items :
1179 |       [> ] with_location ->
1180 |       where_in_line ->
1181 |       Ast.nestable_block_element with_location list list ->
1182 |       Ast.nestable_block_element with_location list list * where_in_line =
1183 |    fun next_token where_in_line acc ->
1184 |     match next_token.value with
1185 |     | `End | `Right_brace | `Blank_line _ | `Tag _ | `Begin_section_heading _ ->
1186 |         (List.rev acc, where_in_line)
1187 |     | (`Minus | `Plus) as bullet ->
1188 |         if bullet = bullet_token then (
1189 |           junk input;
1190 | 
1191 |           let content, stream_head, where_in_line =
1192 |             block_element_list In_shorthand_list ~parent_markup:bullet input
1193 |           in
1194 |           if content = [] then
1195 |             Parse_error.should_not_be_empty ~what:(Token.describe bullet)
1196 |               next_token.location
1197 |             |> add_warning input;
1198 | 
1199 |           let acc = content :: acc in
1200 |           consume_list_items stream_head where_in_line acc)
1201 |         else (List.rev acc, where_in_line)
1202 |   in
1203 | 
1204 |   consume_list_items
1205 |     (first_token :> stopped_implicitly with_location)
1206 |     where_in_line []
1207 | 
1208 | (* Consumes a sequence of explicit list items (starting with '{li ...}' and
1209 |    '{-...}', which are represented by [`Begin_list_item _] tokens).
1210 | 
1211 |    This function is called immediately after '{ul' or '{ol' ([`Begin_list _]) is
1212 |    read. The only "valid" way to exit is by reading a [`Right_brace] token,
1213 |    which is consumed.
1214 | 
1215 |    Whitespace inside the list, but outside list items, is not significant – this
1216 |    parsing function consumes all of it. Otherwise, only list item start tokens
1217 |    are accepted. Everything else is an error. *)
1218 | and explicit_list_items :
1219 |     parent_markup:[< Token.t ] ->
1220 |     input ->
1221 |     Ast.nestable_block_element with_location list list * Loc.span =
1222 |  fun ~parent_markup input ->
1223 |   let rec consume_list_items :
1224 |       Ast.nestable_block_element with_location list list ->
1225 |       Ast.nestable_block_element with_location list list * Loc.span =
1226 |    fun acc ->
1227 |     let next_token = peek input in
1228 |     match next_token.value with
1229 |     | `End ->
1230 |         Parse_error.not_allowed next_token.location ~what:(Token.describe `End)
1231 |           ~in_what:(Token.describe parent_markup)
1232 |         |> add_warning input;
1233 |         (List.rev acc, next_token.location)
1234 |     | `Right_brace ->
1235 |         junk input;
1236 |         (List.rev acc, next_token.location)
1237 |     | `Space _ | `Single_newline _ | `Blank_line _ ->
1238 |         junk input;
1239 |         consume_list_items acc
1240 |     | `Begin_list_item kind as token ->
1241 |         junk input;
1242 | 
1243 |         (* '{li', represented by [`Begin_list_item `Li], must be followed by
1244 |            whitespace. *)
1245 |         (if kind = `Li then
1246 |          match (peek input).value with
1247 |          | `Space _ | `Single_newline _ | `Blank_line _ | `Right_brace ->
1248 |              ()
1249 |              (* The presence of [`Right_brace] above requires some explanation:
1250 | 
1251 |                 - It is better to be silent about missing whitespace if the next
1252 |                   token is [`Right_brace], because the error about an empty list
1253 |                   item will be generated below, and that error is more important to
1254 |                   the user.
1255 |                 - The [`Right_brace] token also happens to include all whitespace
1256 |                   before it, as a convenience for the rest of the parser. As a
1257 |                   result, not ignoring it could be wrong: there could in fact be
1258 |                   whitespace in the concrete syntax immediately after '{li', just
1259 |                   it is not represented as [`Space], [`Single_newline], or
1260 |                   [`Blank_line]. *)
1261 |          | _ ->
1262 |              Parse_error.should_be_followed_by_whitespace next_token.location
1263 |                ~what:(Token.print token)
1264 |              |> add_warning input);
1265 | 
1266 |         let content, token_after_list_item, _where_in_line =
1267 |           block_element_list In_explicit_list ~parent_markup:token input
1268 |         in
1269 | 
1270 |         if content = [] then
1271 |           Parse_error.should_not_be_empty next_token.location
1272 |             ~what:(Token.describe token)
1273 |           |> add_warning input;
1274 | 
1275 |         (match token_after_list_item.value with
1276 |         | `Right_brace -> junk input
1277 |         | `End ->
1278 |             Parse_error.not_allowed token_after_list_item.location
1279 |               ~what:(Token.describe `End) ~in_what:(Token.describe token)
1280 |             |> add_warning input);
1281 | 
1282 |         let acc = content :: acc in
1283 |         consume_list_items acc
1284 |     | token ->
1285 |         let suggestion =
1286 |           match token with
1287 |           | `Begin_section_heading _ | `Tag _ ->
1288 |               Printf.sprintf "move %s outside the list." (Token.describe token)
1289 |           | _ ->
1290 |               Printf.sprintf "move %s into a list item, %s or %s."
1291 |                 (Token.describe token)
1292 |                 (Token.print (`Begin_list_item `Li))
1293 |                 (Token.print (`Begin_list_item `Dash))
1294 |         in
1295 |         Parse_error.not_allowed next_token.location ~what:(Token.describe token)
1296 |           ~in_what:(Token.describe parent_markup)
1297 |           ~suggestion
1298 |         |> add_warning input;
1299 | 
1300 |         junk input;
1301 |         consume_list_items acc
1302 |   in
1303 | 
1304 |   consume_list_items []
1305 | 
1306 | (* Consumes a sequence of table rows that might start with [`Bar].
1307 | 
1308 |    This function is called immediately after '{t' ([`Begin_table `Light]) is
1309 |    read. The only "valid" way to exit is by reading a [`Right_brace] token,
1310 |    which is consumed. *)
1311 | and light_table ~parent_markup ~parent_markup_location input =
1312 |   let rec consume_rows acc ~last_loc =
1313 |     Reader.until_rbrace input acc >>> fun next_token ->
1314 |     match next_token.Loc.value with
1315 |     | `Bar | #token_that_always_begins_an_inline_element -> (
1316 |         let next, row, last_loc =
1317 |           light_table_row ~parent_markup ~last_loc input
1318 |         in
1319 |         match next with
1320 |         | `Continue -> consume_rows (row :: acc) ~last_loc
1321 |         | `Stop -> (row :: acc, last_loc))
1322 |     | other_token ->
1323 |         Parse_error.not_allowed next_token.location
1324 |           ~what:(Token.describe other_token)
1325 |           ~in_what:(Token.describe parent_markup)
1326 |         |> add_warning input;
1327 |         junk input;
1328 |         consume_rows acc ~last_loc
1329 |   in
1330 |   let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in
1331 |   let grid = List.rev rows in
1332 |   (Table.Light_syntax.from_raw_data grid, brace_location)
1333 | 
1334 | (* Consumes a table row that might start with [`Bar]. *)
1335 | and light_table_row ~parent_markup ~last_loc input =
1336 |   let rec consume_row acc_row acc_cell acc_space ~new_line ~last_loc =
1337 |     let push_cells row cell =
1338 |       match cell with [] -> row | _ -> List.rev cell :: row
1339 |     in
1340 |     let return row cell = List.rev (push_cells row cell) in
1341 |     let next_token = peek input in
1342 |     match next_token.value with
1343 |     | `Right_brace ->
1344 |         junk input;
1345 |         (`Stop, return acc_row acc_cell, next_token.location)
1346 |     | `Space _ as token ->
1347 |         junk input;
1348 |         let i = Loc.at next_token.location token in
1349 |         consume_row acc_row acc_cell (i :: acc_space) ~new_line ~last_loc
1350 |     | `Single_newline _ | `Blank_line _ ->
1351 |         junk input;
1352 |         (`Continue, return acc_row acc_cell, last_loc)
1353 |     | `Bar ->
1354 |         junk input;
1355 |         let acc_row = if new_line then [] else List.rev acc_cell :: acc_row in
1356 |         consume_row acc_row [] [] ~new_line:false ~last_loc
1357 |     | #token_that_always_begins_an_inline_element as token ->
1358 |         let i = inline_element input next_token.location token in
1359 |         if Loc.spans_multiple_lines i then
1360 |           Parse_error.not_allowed
1361 |             ~what:(Token.describe (`Single_newline ""))
1362 |             ~in_what:(Token.describe `Begin_table_light)
1363 |             i.location
1364 |           |> add_warning input;
1365 |         let acc_cell =
1366 |           if acc_cell = [] then [ i ] else (i :: acc_space) @ acc_cell
1367 |         in
1368 |         consume_row acc_row acc_cell [] ~new_line:false
1369 |           ~last_loc:next_token.location
1370 |     | other_token ->
1371 |         Parse_error.not_allowed next_token.location
1372 |           ~what:(Token.describe other_token)
1373 |           ~in_what:(Token.describe parent_markup)
1374 |         |> add_warning input;
1375 |         junk input;
1376 |         consume_row acc_row acc_cell acc_space ~new_line ~last_loc
1377 |   in
1378 |   consume_row [] [] [] ~new_line:true ~last_loc
1379 | 
1380 | (* Consumes a sequence of table rows (starting with '{tr ...}', which are
1381 |    represented by [`Begin_table_row] tokens).
1382 | 
1383 |    This function is called immediately after '{table' ([`Begin_table `Heavy]) is
1384 |    read. The only "valid" way to exit is by reading a [`Right_brace] token,
1385 |    which is consumed. *)
1386 | and heavy_table ~parent_markup ~parent_markup_location input =
1387 |   let rec consume_rows acc ~last_loc =
1388 |     Reader.until_rbrace input acc >>> fun next_token ->
1389 |     match next_token.Loc.value with
1390 |     | `Begin_table_row as token ->
1391 |         junk input;
1392 |         let items, last_loc = heavy_table_row ~parent_markup:token input in
1393 |         consume_rows (List.rev items :: acc) ~last_loc
1394 |     | token ->
1395 |         Parse_error.not_allowed next_token.location ~what:(Token.describe token)
1396 |           ~in_what:(Token.describe parent_markup)
1397 |           ~suggestion:"Move outside of {table ...}, or inside {tr ...}"
1398 |         |> add_warning input;
1399 |         junk input;
1400 |         consume_rows acc ~last_loc
1401 |   in
1402 |   let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in
1403 |   let grid = List.rev rows in
1404 |   (Table.Heavy_syntax.from_grid grid, brace_location)
1405 | 
1406 | (* Consumes a sequence of table cells (starting with '{th ...}' or '{td ... }',
1407 |    which are represented by [`Begin_table_cell] tokens).
1408 | 
1409 |    This function is called immediately after '{tr' ([`Begin_table_row]) is
1410 |    read. The only "valid" way to exit is by reading a [`Right_brace] token,
1411 |    which is consumed. *)
1412 | and heavy_table_row ~parent_markup input =
1413 |   let rec consume_cell_items acc =
1414 |     Reader.until_rbrace input acc >>> fun next_token ->
1415 |     match next_token.Loc.value with
1416 |     | `Begin_table_cell kind as token ->
1417 |         junk input;
1418 |         let content, token_after_list_item, _where_in_line =
1419 |           block_element_list In_table_cell ~parent_markup:token input
1420 |         in
1421 |         (match token_after_list_item.value with
1422 |         | `Right_brace -> junk input
1423 |         | `End ->
1424 |             Parse_error.not_allowed token_after_list_item.location
1425 |               ~what:(Token.describe `End) ~in_what:(Token.describe token)
1426 |             |> add_warning input);
1427 |         consume_cell_items ((content, kind) :: acc)
1428 |     | token ->
1429 |         Parse_error.not_allowed next_token.location ~what:(Token.describe token)
1430 |           ~in_what:(Token.describe parent_markup)
1431 |           ~suggestion:
1432 |             "Move outside of {table ...}, or inside {td ...} or {th ...}"
1433 |         |> add_warning input;
1434 |         junk input;
1435 |         consume_cell_items acc
1436 |   in
1437 |   consume_cell_items []
1438 | 
1439 | (* {2 Entry point} *)
1440 | 
1441 | let parse warnings tokens =
1442 |   let input : input = { tokens; warnings } in
1443 | 
1444 |   let rec parse_block_elements () =
1445 |     let elements, last_token, _where_in_line =
1446 |       block_element_list Top_level ~parent_markup:`Comment input
1447 |     in
1448 | 
1449 |     match last_token.value with
1450 |     | `End -> elements
1451 |     | `Right_brace ->
1452 |         Parse_error.unpaired_right_brace last_token.location
1453 |         |> add_warning input;
1454 | 
1455 |         let block =
1456 |           Loc.same last_token (`Paragraph [ Loc.same last_token (`Word "}") ])
1457 |         in
1458 | 
1459 |         junk input;
1460 |         elements @ (block :: parse_block_elements ())
1461 |   in
1462 |   let ast = parse_block_elements () in
1463 |   (ast, List.rev !(input.warnings))
1464 | 


--------------------------------------------------------------------------------
/src/syntax.mli:
--------------------------------------------------------------------------------
1 | (* Internal module, not exposed *)
2 | 
3 | val parse :
4 |   Warning.t list ref ->
5 |   Token.t Loc.with_location Stream.t ->
6 |   Ast.t * Warning.t list
7 | 


--------------------------------------------------------------------------------
/src/token.ml:
--------------------------------------------------------------------------------
  1 | (* This module contains the token type, emitted by the lexer, and consumed by
  2 |    the comment syntax parser. It also contains two functions that format tokens
  3 |    for error messages. *)
  4 | 
  5 | type section_heading = [ `Begin_section_heading of int * string option ]
  6 | type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ]
  7 | type paragraph_style = [ `Left | `Center | `Right ]
  8 | 
  9 | type tag =
 10 |   [ `Tag of
 11 |     [ `Author of string
 12 |     | `Deprecated
 13 |     | `Param of string
 14 |     | `Raise of string
 15 |     | `Return
 16 |     | `See of [ `Url | `File | `Document ] * string
 17 |     | `Since of string
 18 |     | `Before of string
 19 |     | `Version of string
 20 |     | `Canonical of string
 21 |     | `Inline
 22 |     | `Open
 23 |     | `Closed
 24 |     | `Hidden ] ]
 25 | 
 26 | type t =
 27 |   [ (* End of input. *)
 28 |     `End
 29 |   | (* Runs of whitespace. [Blank_line] is any run of whitespace that contains two
 30 |        or more newline characters. [Single_newline] is any run of whitespace that
 31 |        contains exactly one newline character. [Space] is any run of whitespace
 32 |        that contains no newline characters.
 33 | 
 34 |        It is an important invariant in the parser that no adjacent whitespace
 35 |        tokens are emitted by the lexer. Otherwise, there would be the need for
 36 |        unbounded lookahead, a (co-?)ambiguity between
 37 |        [Single_newline Single_newline] and [Blank_line], and other problems. *)
 38 |     `Space of
 39 |     string
 40 |   | `Single_newline of string
 41 |   | `Blank_line of string
 42 |   | (* A right curly brace ([}]), i.e. end of markup. *)
 43 |     `Right_brace
 44 |   | `Right_code_delimiter
 45 |   | (* Words are anything that is not whitespace or markup. Markup symbols can be
 46 |        be part of words if escaped.
 47 | 
 48 |        Words can contain plus and minus symbols, but those are emitted as [Plus]
 49 |        and [Minus] tokens. The parser combines plus and minus into words, except
 50 |        when they appear first on a line, in which case the tokens are list item
 51 |        bullets. *)
 52 |     `Word of
 53 |     string
 54 |   | `Code_span of string
 55 |   | `Raw_markup of string option * string
 56 |   | `Math_span of string
 57 |   | `Math_block of string
 58 |   | `Begin_style of style
 59 |   | `Begin_paragraph_style of paragraph_style
 60 |   | (* Other inline element markup. *)
 61 |     `Simple_reference of string
 62 |   | `Begin_reference_with_replacement_text of string
 63 |   | `Simple_link of string
 64 |   | `Begin_link_with_replacement_text of string
 65 |   | (* Leaf block element markup. *)
 66 |     `Code_block of
 67 |     (string Loc.with_location * string Loc.with_location option) option
 68 |     * string
 69 |     * string Loc.with_location
 70 |     * bool
 71 |   | `Verbatim of string
 72 |   | `Modules of string
 73 |   | (* List markup. *)
 74 |     `Begin_list of [ `Unordered | `Ordered ]
 75 |   | `Begin_list_item of [ `Li | `Dash ]
 76 |   | (* Table markup. *)
 77 |     `Begin_table_light
 78 |   | `Begin_table_heavy
 79 |   | `Begin_table_row
 80 |   | `Begin_table_cell of [ `Header | `Data ]
 81 |   | `Minus
 82 |   | `Plus
 83 |   | `Bar
 84 |   | section_heading
 85 |   | tag ]
 86 | 
 87 | let print : [< t ] -> string = function
 88 |   | `Begin_paragraph_style `Left -> "'{L'"
 89 |   | `Begin_paragraph_style `Center -> "'{C'"
 90 |   | `Begin_paragraph_style `Right -> "'{R'"
 91 |   | `Begin_style `Bold -> "'{b'"
 92 |   | `Begin_style `Italic -> "'{i'"
 93 |   | `Begin_style `Emphasis -> "'{e'"
 94 |   | `Begin_style `Superscript -> "'{^'"
 95 |   | `Begin_style `Subscript -> "'{_'"
 96 |   | `Begin_reference_with_replacement_text _ -> "'{{!'"
 97 |   | `Begin_link_with_replacement_text _ -> "'{{:'"
 98 |   | `Begin_list_item `Li -> "'{li ...}'"
 99 |   | `Begin_list_item `Dash -> "'{- ...}'"
100 |   | `Begin_table_light -> "{t"
101 |   | `Begin_table_heavy -> "{table"
102 |   | `Begin_table_row -> "'{tr'"
103 |   | `Begin_table_cell `Header -> "'{th'"
104 |   | `Begin_table_cell `Data -> "'{td'"
105 |   | `Minus -> "'-'"
106 |   | `Plus -> "'+'"
107 |   | `Bar -> "'|'"
108 |   | `Begin_section_heading (level, label) ->
109 |       let label = match label with None -> "" | Some label -> ":" ^ label in
110 |       Printf.sprintf "'{%i%s'" level label
111 |   | `Tag (`Author _) -> "'@author'"
112 |   | `Tag `Deprecated -> "'@deprecated'"
113 |   | `Tag (`Param _) -> "'@param'"
114 |   | `Tag (`Raise _) -> "'@raise'"
115 |   | `Tag `Return -> "'@return'"
116 |   | `Tag (`See _) -> "'@see'"
117 |   | `Tag (`Since _) -> "'@since'"
118 |   | `Tag (`Before _) -> "'@before'"
119 |   | `Tag (`Version _) -> "'@version'"
120 |   | `Tag (`Canonical _) -> "'@canonical'"
121 |   | `Tag `Inline -> "'@inline'"
122 |   | `Tag `Open -> "'@open'"
123 |   | `Tag `Closed -> "'@closed'"
124 |   | `Tag `Hidden -> "'@hidden"
125 |   | `Raw_markup (None, _) -> "'{%...%}'"
126 |   | `Raw_markup (Some target, _) -> "'{%" ^ target ^ ":...%}'"
127 | 
128 | (* [`Minus] and [`Plus] are interpreted as if they start list items. Therefore,
129 |    for error messages based on [Token.describe] to be accurate, formatted
130 |    [`Minus] and [`Plus] should always be plausibly list item bullets. *)
131 | let describe : [< t | `Comment ] -> string = function
132 |   | `Word w -> Printf.sprintf "'%s'" w
133 |   | `Code_span _ -> "'[...]' (code)"
134 |   | `Raw_markup _ -> "'{%...%}' (raw markup)"
135 |   | `Begin_paragraph_style `Left -> "'{L ...}' (left alignment)"
136 |   | `Begin_paragraph_style `Center -> "'{C ...}' (center alignment)"
137 |   | `Begin_paragraph_style `Right -> "'{R ...}' (right alignment)"
138 |   | `Begin_style `Bold -> "'{b ...}' (boldface text)"
139 |   | `Begin_style `Italic -> "'{i ...}' (italic text)"
140 |   | `Begin_style `Emphasis -> "'{e ...}' (emphasized text)"
141 |   | `Begin_style `Superscript -> "'{^...}' (superscript)"
142 |   | `Begin_style `Subscript -> "'{_...}' (subscript)"
143 |   | `Math_span _ -> "'{m ...}' (math span)"
144 |   | `Math_block _ -> "'{math ...}' (math block)"
145 |   | `Simple_reference _ -> "'{!...}' (cross-reference)"
146 |   | `Begin_reference_with_replacement_text _ ->
147 |       "'{{!...} ...}' (cross-reference)"
148 |   | `Simple_link _ -> "'{:...} (external link)'"
149 |   | `Begin_link_with_replacement_text _ -> "'{{:...} ...}' (external link)"
150 |   | `End -> "end of text"
151 |   | `Space _ -> "whitespace"
152 |   | `Single_newline _ -> "line break"
153 |   | `Blank_line _ -> "blank line"
154 |   | `Right_brace -> "'}'"
155 |   | `Right_code_delimiter -> "']}'"
156 |   | `Code_block _ -> "'{[...]}' (code block)"
157 |   | `Verbatim _ -> "'{v ... v}' (verbatim text)"
158 |   | `Modules _ -> "'{!modules ...}'"
159 |   | `Begin_list `Unordered -> "'{ul ...}' (bulleted list)"
160 |   | `Begin_list `Ordered -> "'{ol ...}' (numbered list)"
161 |   | `Begin_list_item `Li -> "'{li ...}' (list item)"
162 |   | `Begin_list_item `Dash -> "'{- ...}' (list item)"
163 |   | `Begin_table_light -> "'{t ...}' (table)"
164 |   | `Begin_table_heavy -> "'{table ...}' (table)"
165 |   | `Begin_table_row -> "'{tr ...}' (table row)"
166 |   | `Begin_table_cell `Header -> "'{th ... }' (table header cell)"
167 |   | `Begin_table_cell `Data -> "'{td ... }' (table data cell)"
168 |   | `Minus -> "'-' (bulleted list item)"
169 |   | `Plus -> "'+' (numbered list item)"
170 |   | `Bar -> "'|'"
171 |   | `Begin_section_heading (level, _) ->
172 |       Printf.sprintf "'{%i ...}' (section heading)" level
173 |   | `Tag (`Author _) -> "'@author'"
174 |   | `Tag `Deprecated -> "'@deprecated'"
175 |   | `Tag (`Param _) -> "'@param'"
176 |   | `Tag (`Raise _) -> "'@raise'"
177 |   | `Tag `Return -> "'@return'"
178 |   | `Tag (`See _) -> "'@see'"
179 |   | `Tag (`Since _) -> "'@since'"
180 |   | `Tag (`Before _) -> "'@before'"
181 |   | `Tag (`Version _) -> "'@version'"
182 |   | `Tag (`Canonical _) -> "'@canonical'"
183 |   | `Tag `Inline -> "'@inline'"
184 |   | `Tag `Open -> "'@open'"
185 |   | `Tag `Closed -> "'@closed'"
186 |   | `Tag `Hidden -> "'@hidden"
187 |   | `Comment -> "top-level text"
188 | 
189 | let describe_element = function
190 |   | `Reference (`Simple, _, _) -> describe (`Simple_reference "")
191 |   | `Reference (`With_text, _, _) ->
192 |       describe (`Begin_reference_with_replacement_text "")
193 |   | `Link _ -> describe (`Begin_link_with_replacement_text "")
194 |   | `Heading (level, _, _) -> describe (`Begin_section_heading (level, None))
195 | 


--------------------------------------------------------------------------------
/src/warning.ml:
--------------------------------------------------------------------------------
 1 | type t = { location : Loc.span; message : string }
 2 | 
 3 | let to_string e =
 4 |   let { location; message } = e in
 5 |   let location_string =
 6 |     if location.start.line = location.end_.line then
 7 |       Printf.sprintf "line %i, characters %i-%i" location.start.line
 8 |         location.start.column location.end_.column
 9 |     else
10 |       Printf.sprintf "line %i, character %i to line %i, character %i"
11 |         location.start.line location.start.column location.end_.line
12 |         location.end_.column
13 |   in
14 |   Printf.sprintf "File \"%s\", %s:\n%s" location.file location_string message
15 | 
16 | let pp fmt v = Format.fprintf fmt "%s" (to_string v)
17 | 
18 | let kasprintf k fmt =
19 |   Format.(kfprintf (fun _ -> k (flush_str_formatter ())) str_formatter fmt)
20 | 
21 | let kmake k ?suggestion =
22 |   kasprintf (fun message ->
23 |       match suggestion with
24 |       | None -> k message
25 |       | Some suggestion -> k (message ^ "\nSuggestion: " ^ suggestion))
26 | 
27 | let make ?suggestion format =
28 |   let k message location = { location; message } in
29 |   kmake k ?suggestion format
30 | 


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name odoc_parser_test)
3 |  (inline_tests)
4 |  (enabled_if
5 |   (>= %{ocaml_version} 4.04.1))
6 |  (preprocess
7 |   (pps ppx_expect))
8 |  (libraries sexplib0 odoc-parser))
9 | 


--------------------------------------------------------------------------------
/test/test.mli:
--------------------------------------------------------------------------------
1 | val test : ?location:Odoc_parser.Loc.point -> string -> unit
2 | 


--------------------------------------------------------------------------------
/test/test_tables.ml:
--------------------------------------------------------------------------------
  1 | open Test
  2 | 
  3 | [@@@ocaml.warning "-32"]
  4 | 
  5 | let%expect_test _ =
  6 |   let module Heavy = struct
  7 |     let empty_table_heavy =
  8 |       test "{table }";
  9 |       [%expect
 10 |         {|
 11 |         ((output
 12 |           (((f.ml (1 0) (1 8))
 13 |             (table (syntax heavy) (grid ()) (align "no alignment")))))
 14 |          (warnings ())) |}]
 15 | 
 16 |     let empty_row =
 17 |       test "{table {tr } }";
 18 |       [%expect
 19 |         {|
 20 |         ((output
 21 |           (((f.ml (1 0) (1 14))
 22 |             (table (syntax heavy) (grid ((row ()))) (align "no alignment")))))
 23 |          (warnings ()))|}]
 24 | 
 25 |     let no_header =
 26 |       test "{table {tr {td}}}";
 27 |       [%expect
 28 |         {|
 29 |         ((output
 30 |           (((f.ml (1 0) (1 17))
 31 |             (table (syntax heavy) (grid ((row ((data ()))))) (align "no alignment")))))
 32 |          (warnings ())) |}]
 33 | 
 34 |     let no_data =
 35 |       test "{table {tr {th}}}";
 36 |       [%expect
 37 |         {|
 38 |         ((output
 39 |           (((f.ml (1 0) (1 17))
 40 |             (table (syntax heavy) (grid ((row ((header ())))))
 41 |              (align "no alignment")))))
 42 |          (warnings ())) |}]
 43 | 
 44 |     let bad_data =
 45 |       test "{table absurd content}";
 46 |       [%expect
 47 |         {|
 48 |         ((output
 49 |           (((f.ml (1 0) (1 22))
 50 |             (table (syntax heavy) (grid ()) (align "no alignment")))))
 51 |          (warnings
 52 |           ( "File \"f.ml\", line 1, characters 7-13:\
 53 |            \n'absurd' is not allowed in '{table ...}' (table).\
 54 |            \nSuggestion: Move outside of {table ...}, or inside {tr ...}"
 55 |             "File \"f.ml\", line 1, characters 14-21:\
 56 |            \n'content' is not allowed in '{table ...}' (table).\
 57 |            \nSuggestion: Move outside of {table ...}, or inside {tr ...}"))) |}]
 58 | 
 59 |     let bad_row =
 60 |       test "{table {tr absurd content}}";
 61 |       [%expect
 62 |         {|
 63 |         ((output
 64 |           (((f.ml (1 0) (1 27))
 65 |             (table (syntax heavy) (grid ((row ()))) (align "no alignment")))))
 66 |          (warnings
 67 |           ( "File \"f.ml\", line 1, characters 11-17:\
 68 |            \n'absurd' is not allowed in '{tr ...}' (table row).\
 69 |            \nSuggestion: Move outside of {table ...}, or inside {td ...} or {th ...}"
 70 |             "File \"f.ml\", line 1, characters 18-25:\
 71 |            \n'content' is not allowed in '{tr ...}' (table row).\
 72 |            \nSuggestion: Move outside of {table ...}, or inside {td ...} or {th ...}"))) |}]
 73 | 
 74 |     let multiple_headers =
 75 |       test "{table {tr {th}} {tr {th}} {tr {td}}}";
 76 |       [%expect
 77 |         {|
 78 |         ((output
 79 |           (((f.ml (1 0) (1 37))
 80 |             (table (syntax heavy)
 81 |              (grid ((row ((header ()))) (row ((header ()))) (row ((data ())))))
 82 |              (align "no alignment")))))
 83 |          (warnings ())) |}]
 84 | 
 85 |     let complex_table =
 86 |       test
 87 |         {|
 88 |         {table
 89 |           {tr
 90 |             {th xxx}
 91 |             {th yyy}
 92 |           }
 93 |           {tr
 94 |             {td aaaa bbb ccc {i ddd}
 95 |             }
 96 |             {td
 97 |                {table {tr {td}}}
 98 |             }
 99 |           }
100 |           {tr
101 |             {td
102 |                - aaa
103 |                - bbb
104 |                - ccc
105 |             }
106 |             {td
107 |               {t
108 |                  x | y | z
109 |                  --|---|--
110 |                  1 | 2 | 3
111 |               }
112 |             }
113 |           }
114 |         }
115 |         |};
116 |       [%expect
117 |         {|
118 |         ((output
119 |           (((f.ml (2 8) (28 9))
120 |             (table (syntax heavy)
121 |              (grid
122 |               ((row
123 |                 ((header
124 |                   (((f.ml (4 16) (4 19))
125 |                     (paragraph (((f.ml (4 16) (4 19)) (word xxx)))))))
126 |                  (header
127 |                   (((f.ml (5 16) (5 19))
128 |                     (paragraph (((f.ml (5 16) (5 19)) (word yyy)))))))))
129 |                (row
130 |                 ((data
131 |                   (((f.ml (8 16) (8 36))
132 |                     (paragraph
133 |                      (((f.ml (8 16) (8 20)) (word aaaa)) ((f.ml (8 20) (8 21)) space)
134 |                       ((f.ml (8 21) (8 24)) (word bbb)) ((f.ml (8 24) (8 25)) space)
135 |                       ((f.ml (8 25) (8 28)) (word ccc)) ((f.ml (8 28) (8 29)) space)
136 |                       ((f.ml (8 29) (8 36))
137 |                        (italic (((f.ml (8 32) (8 35)) (word ddd))))))))))
138 |                  (data
139 |                   (((f.ml (11 15) (11 32))
140 |                     (table (syntax heavy) (grid ((row ((data ())))))
141 |                      (align "no alignment")))))))
142 |                (row
143 |                 ((data
144 |                   (((f.ml (16 15) (18 20))
145 |                     (unordered light
146 |                      ((((f.ml (16 17) (16 20))
147 |                         (paragraph (((f.ml (16 17) (16 20)) (word aaa))))))
148 |                       (((f.ml (17 17) (17 20))
149 |                         (paragraph (((f.ml (17 17) (17 20)) (word bbb))))))
150 |                       (((f.ml (18 17) (18 20))
151 |                         (paragraph (((f.ml (18 17) (18 20)) (word ccc)))))))))))
152 |                  (data
153 |                   (((f.ml (21 14) (25 15))
154 |                     (table (syntax light)
155 |                      (grid
156 |                       ((row
157 |                         ((header
158 |                           (((f.ml (22 17) (22 18))
159 |                             (paragraph (((f.ml (22 17) (22 18)) (word x)))))))
160 |                          (header
161 |                           (((f.ml (22 21) (22 22))
162 |                             (paragraph (((f.ml (22 21) (22 22)) (word y)))))))
163 |                          (header
164 |                           (((f.ml (22 25) (22 26))
165 |                             (paragraph (((f.ml (22 25) (22 26)) (word z)))))))))
166 |                        (row
167 |                         ((data
168 |                           (((f.ml (24 17) (24 18))
169 |                             (paragraph (((f.ml (24 17) (24 18)) (word 1)))))))
170 |                          (data
171 |                           (((f.ml (24 21) (24 22))
172 |                             (paragraph (((f.ml (24 21) (24 22)) (word 2)))))))
173 |                          (data
174 |                           (((f.ml (24 25) (24 26))
175 |                             (paragraph (((f.ml (24 25) (24 26)) (word 3)))))))))))
176 |                      (align (default default default))))))))))
177 |              (align "no alignment")))))
178 |          (warnings ())) |}]
179 |   end in
180 |   ()
181 | 
182 | let%expect_test _ =
183 |   let module Light = struct
184 |     let empty_table_light =
185 |       test "{t }";
186 |       [%expect
187 |         {|
188 |         ((output
189 |           (((f.ml (1 0) (1 4))
190 |             (table (syntax light) (grid ()) (align "no alignment")))))
191 |          (warnings ())) |}]
192 | 
193 |     let simple =
194 |       test {|
195 |         {t
196 |           | a |
197 |         }
198 |       |};
199 |       [%expect
200 |         {|
201 |         ((output
202 |           (((f.ml (2 8) (4 9))
203 |             (table (syntax light)
204 |              (grid
205 |               ((row
206 |                 ((data
207 |                   (((f.ml (3 12) (3 13))
208 |                     (paragraph (((f.ml (3 12) (3 13)) (word a)))))))))))
209 |              (align "no alignment")))))
210 |          (warnings ())) |}]
211 | 
212 |     let stars =
213 |       test
214 |         {|
215 |         {t
216 |           |a|   *b*|
217 |           |*c| d* |
218 |         }
219 |       |};
220 |       [%expect
221 |         {|
222 |         ((output
223 |           (((f.ml (2 8) (5 9))
224 |             (table (syntax light)
225 |              (grid
226 |               ((row
227 |                 ((data
228 |                   (((f.ml (3 11) (3 12))
229 |                     (paragraph (((f.ml (3 11) (3 12)) (word a)))))))
230 |                  (data
231 |                   (((f.ml (3 16) (3 19))
232 |                     (paragraph (((f.ml (3 16) (3 19)) (word *b*)))))))))
233 |                (row
234 |                 ((data
235 |                   (((f.ml (4 11) (4 13))
236 |                     (paragraph (((f.ml (4 11) (4 13)) (word *c)))))))
237 |                  (data
238 |                   (((f.ml (4 15) (4 17))
239 |                     (paragraph (((f.ml (4 15) (4 17)) (word d*)))))))))))
240 |              (align "no alignment")))))
241 |          (warnings ())) |}]
242 | 
243 |     let backquotes =
244 |       test {|
245 |       {t
246 |          | `a |`
247 |       }
248 |       |};
249 |       [%expect
250 |         {|
251 |         ((output
252 |           (((f.ml (2 6) (4 7))
253 |             (table (syntax light)
254 |              (grid
255 |               ((row
256 |                 ((data
257 |                   (((f.ml (3 11) (3 13))
258 |                     (paragraph (((f.ml (3 11) (3 13)) (word `a)))))))
259 |                  (data
260 |                   (((f.ml (3 15) (3 16))
261 |                     (paragraph (((f.ml (3 15) (3 16)) (word `)))))))))))
262 |              (align "no alignment")))))
263 |          (warnings ())) |}]
264 | 
265 |     let no_header =
266 |       test {|
267 |       {t
268 |        |---|---|
269 |        | x | y |
270 |       }
271 |       |};
272 |       [%expect
273 |         {|
274 |         ((output
275 |           (((f.ml (2 6) (5 7))
276 |             (table (syntax light)
277 |              (grid
278 |               ((row
279 |                 ((data
280 |                   (((f.ml (4 9) (4 10)) (paragraph (((f.ml (4 9) (4 10)) (word x)))))))
281 |                  (data
282 |                   (((f.ml (4 13) (4 14))
283 |                     (paragraph (((f.ml (4 13) (4 14)) (word y)))))))))))
284 |              (align (default default))))))
285 |          (warnings ())) |}]
286 | 
287 |     let no_align =
288 |       test {|
289 |       {t
290 |        | x | y |
291 |        | x | y |
292 |       }
293 |       |};
294 |       [%expect
295 |         {|
296 |           ((output
297 |             (((f.ml (2 6) (5 7))
298 |               (table (syntax light)
299 |                (grid
300 |                 ((row
301 |                   ((data
302 |                     (((f.ml (3 9) (3 10)) (paragraph (((f.ml (3 9) (3 10)) (word x)))))))
303 |                    (data
304 |                     (((f.ml (3 13) (3 14))
305 |                       (paragraph (((f.ml (3 13) (3 14)) (word y)))))))))
306 |                  (row
307 |                   ((data
308 |                     (((f.ml (4 9) (4 10)) (paragraph (((f.ml (4 9) (4 10)) (word x)))))))
309 |                    (data
310 |                     (((f.ml (4 13) (4 14))
311 |                       (paragraph (((f.ml (4 13) (4 14)) (word y)))))))))))
312 |                (align "no alignment")))))
313 |            (warnings ())) |}]
314 | 
315 |     let only_align =
316 |       test {|
317 |       {t
318 |         |--|--|
319 |       }
320 |       |};
321 |       [%expect
322 |         {|
323 |         ((output
324 |           (((f.ml (2 6) (4 7))
325 |             (table (syntax light) (grid ()) (align (default default))))))
326 |          (warnings ())) |}]
327 | 
328 |     let no_data =
329 |       test {|
330 |       {t
331 |        | x | y |
332 |        |---|---|
333 |       }
334 |       |};
335 |       [%expect
336 |         {|
337 |           ((output
338 |             (((f.ml (2 6) (5 7))
339 |               (table (syntax light)
340 |                (grid
341 |                 ((row
342 |                   ((header
343 |                     (((f.ml (3 9) (3 10)) (paragraph (((f.ml (3 9) (3 10)) (word x)))))))
344 |                    (header
345 |                     (((f.ml (3 13) (3 14))
346 |                       (paragraph (((f.ml (3 13) (3 14)) (word y)))))))))))
347 |                (align (default default))))))
348 |            (warnings ())) |}]
349 | 
350 |     let alignment =
351 |       test
352 |         {|
353 |       {t
354 |        | a | b | c | d |
355 |        |---|:--|--:|:-:|
356 |       }
357 |       |};
358 |       [%expect
359 |         {|
360 |         ((output
361 |           (((f.ml (2 6) (5 7))
362 |             (table (syntax light)
363 |              (grid
364 |               ((row
365 |                 ((header
366 |                   (((f.ml (3 9) (3 10)) (paragraph (((f.ml (3 9) (3 10)) (word a)))))))
367 |                  (header
368 |                   (((f.ml (3 13) (3 14))
369 |                     (paragraph (((f.ml (3 13) (3 14)) (word b)))))))
370 |                  (header
371 |                   (((f.ml (3 17) (3 18))
372 |                     (paragraph (((f.ml (3 17) (3 18)) (word c)))))))
373 |                  (header
374 |                   (((f.ml (3 21) (3 22))
375 |                     (paragraph (((f.ml (3 21) (3 22)) (word d)))))))))))
376 |              (align (default left right center))))))
377 |          (warnings ())) |}]
378 | 
379 |     let no_bars =
380 |       test
381 |         {|
382 |       {t
383 |         a | b | c | d
384 |        ---|:--|--:|:-:
385 |         a | b | c | d
386 |       }
387 |       |};
388 |       [%expect
389 |         {|
390 |           ((output
391 |             (((f.ml (2 6) (6 7))
392 |               (table (syntax light)
393 |                (grid
394 |                 ((row
395 |                   ((header
396 |                     (((f.ml (3 8) (3 9)) (paragraph (((f.ml (3 8) (3 9)) (word a)))))))
397 |                    (header
398 |                     (((f.ml (3 12) (3 13))
399 |                       (paragraph (((f.ml (3 12) (3 13)) (word b)))))))
400 |                    (header
401 |                     (((f.ml (3 16) (3 17))
402 |                       (paragraph (((f.ml (3 16) (3 17)) (word c)))))))
403 |                    (header
404 |                     (((f.ml (3 20) (3 21))
405 |                       (paragraph (((f.ml (3 20) (3 21)) (word d)))))))))
406 |                  (row
407 |                   ((data
408 |                     (((f.ml (5 8) (5 9)) (paragraph (((f.ml (5 8) (5 9)) (word a)))))))
409 |                    (data
410 |                     (((f.ml (5 12) (5 13))
411 |                       (paragraph (((f.ml (5 12) (5 13)) (word b)))))))
412 |                    (data
413 |                     (((f.ml (5 16) (5 17))
414 |                       (paragraph (((f.ml (5 16) (5 17)) (word c)))))))
415 |                    (data
416 |                     (((f.ml (5 20) (5 21))
417 |                       (paragraph (((f.ml (5 20) (5 21)) (word d)))))))))))
418 |                (align (default left right center))))))
419 |            (warnings ())) |}]
420 | 
421 |     let light_table_new_lines =
422 |       test
423 |         {|
424 |       {t
425 | 
426 |        | a | b | c | d |
427 | 
428 |        |---|---|---|---|
429 | 
430 |        | a | b | c | d |
431 | 
432 |       }
433 |       |};
434 |       [%expect
435 |         {|
436 |           ((output
437 |             (((f.ml (2 6) (10 7))
438 |               (table (syntax light)
439 |                (grid
440 |                 ((row
441 |                   ((header
442 |                     (((f.ml (4 9) (4 10)) (paragraph (((f.ml (4 9) (4 10)) (word a)))))))
443 |                    (header
444 |                     (((f.ml (4 13) (4 14))
445 |                       (paragraph (((f.ml (4 13) (4 14)) (word b)))))))
446 |                    (header
447 |                     (((f.ml (4 17) (4 18))
448 |                       (paragraph (((f.ml (4 17) (4 18)) (word c)))))))
449 |                    (header
450 |                     (((f.ml (4 21) (4 22))
451 |                       (paragraph (((f.ml (4 21) (4 22)) (word d)))))))))
452 |                  (row
453 |                   ((data
454 |                     (((f.ml (8 9) (8 10)) (paragraph (((f.ml (8 9) (8 10)) (word a)))))))
455 |                    (data
456 |                     (((f.ml (8 13) (8 14))
457 |                       (paragraph (((f.ml (8 13) (8 14)) (word b)))))))
458 |                    (data
459 |                     (((f.ml (8 17) (8 18))
460 |                       (paragraph (((f.ml (8 17) (8 18)) (word c)))))))
461 |                    (data
462 |                     (((f.ml (8 21) (8 22))
463 |                       (paragraph (((f.ml (8 21) (8 22)) (word d)))))))))))
464 |                (align (default default default default))))))
465 |            (warnings ())) |}]
466 | 
467 |     let light_table_markup =
468 |       test
469 |         {|
470 |       {t
471 |        | {i a} {:google.com} \t | | {m b} {e c} {% xyz %} | {b d} [foo] |
472 |        |---|---|---|---|
473 |       }
474 |       |};
475 |       [%expect
476 |         {|
477 |           ((output
478 |             (((f.ml (2 6) (5 7))
479 |               (table (syntax light)
480 |                (grid
481 |                 ((row
482 |                   ((header
483 |                     (((f.ml (3 9) (3 31))
484 |                       (paragraph
485 |                        (((f.ml (3 9) (3 14))
486 |                          (italic (((f.ml (3 12) (3 13)) (word a)))))
487 |                         ((f.ml (3 14) (3 15)) space)
488 |                         ((f.ml (3 15) (3 28)) (google.com ()))
489 |                         ((f.ml (3 28) (3 29)) space)
490 |                         ((f.ml (3 29) (3 31)) (word "\\t")))))))
491 |                    (header ())
492 |                    (header
493 |                     (((f.ml (3 36) (3 57))
494 |                       (paragraph
495 |                        (((f.ml (3 36) (3 41)) (math_span b))
496 |                         ((f.ml (3 41) (3 42)) space)
497 |                         ((f.ml (3 42) (3 47))
498 |                          (emphasis (((f.ml (3 45) (3 46)) (word c)))))
499 |                         ((f.ml (3 47) (3 48)) space)
500 |                         ((f.ml (3 48) (3 57)) (raw_markup () " xyz ")))))))
501 |                    (header
502 |                     (((f.ml (3 60) (3 71))
503 |                       (paragraph
504 |                        (((f.ml (3 60) (3 65)) (bold (((f.ml (3 63) (3 64)) (word d)))))
505 |                         ((f.ml (3 65) (3 66)) space)
506 |                         ((f.ml (3 66) (3 71)) (code_span foo)))))))))))
507 |                (align (default default default default))))))
508 |            (warnings ())) |}]
509 | 
510 |     let light_table_markup_with_newlines =
511 |       test
512 |         {|
513 |       {t | h1           | h2          |
514 |          |--------------|-------------|
515 |          | {e with
516 |               newlines} | {b d} [foo] |
517 |       }
518 |       |};
519 |       [%expect
520 |         {|
521 |         ((output
522 |           (((f.ml (2 6) (6 7))
523 |             (table (syntax light)
524 |              (grid
525 |               ((row
526 |                 ((header
527 |                   (((f.ml (2 11) (2 13))
528 |                     (paragraph (((f.ml (2 11) (2 13)) (word h1)))))))
529 |                  (header
530 |                   (((f.ml (2 26) (2 28))
531 |                     (paragraph (((f.ml (2 26) (2 28)) (word h2)))))))))
532 |                (row
533 |                 ((data
534 |                   (((f.ml (4 11) (5 23))
535 |                     (paragraph
536 |                      (((f.ml (4 11) (5 23))
537 |                        (emphasis
538 |                         (((f.ml (4 14) (4 18)) (word with))
539 |                          ((f.ml (4 18) (5 14)) space)
540 |                          ((f.ml (5 14) (5 22)) (word newlines))))))))))
541 |                  (data
542 |                   (((f.ml (5 26) (5 37))
543 |                     (paragraph
544 |                      (((f.ml (5 26) (5 31)) (bold (((f.ml (5 29) (5 30)) (word d)))))
545 |                       ((f.ml (5 31) (5 32)) space)
546 |                       ((f.ml (5 32) (5 37)) (code_span foo)))))))))))
547 |              (align (default default))))))
548 |          (warnings
549 |           ( "File \"f.ml\", line 4, character 11 to line 5, character 23:\
550 |            \nLine break is not allowed in '{t ...}' (table)."))) |}]
551 | 
552 |     let no_space =
553 |       test
554 |         {|
555 |        {t
556 |          | a | b |c| d |
557 |          |---|--:|:--|:-:|
558 |        }
559 |       |};
560 |       [%expect
561 |         {|
562 |         ((output
563 |           (((f.ml (2 7) (5 8))
564 |             (table (syntax light)
565 |              (grid
566 |               ((row
567 |                 ((header
568 |                   (((f.ml (3 11) (3 12))
569 |                     (paragraph (((f.ml (3 11) (3 12)) (word a)))))))
570 |                  (header
571 |                   (((f.ml (3 15) (3 16))
572 |                     (paragraph (((f.ml (3 15) (3 16)) (word b)))))))
573 |                  (header
574 |                   (((f.ml (3 18) (3 19))
575 |                     (paragraph (((f.ml (3 18) (3 19)) (word c)))))))
576 |                  (header
577 |                   (((f.ml (3 21) (3 22))
578 |                     (paragraph (((f.ml (3 21) (3 22)) (word d)))))))))))
579 |              (align (default right left center))))))
580 |          (warnings ())) |}]
581 | 
582 |     let multiple_headers =
583 |       test
584 |         {|
585 |       {t
586 |        ||a|b|
587 |        |:-|---:|
588 |        |c|d|
589 |        |cc|dd|
590 |        |-:|:-:|
591 |        |e|f|
592 |        |g|h||
593 |       }
594 |       |};
595 |       [%expect
596 |         {|
597 |         ((output
598 |           (((f.ml (2 6) (10 7))
599 |             (table (syntax light)
600 |              (grid
601 |               ((row
602 |                 ((header ())
603 |                  (header
604 |                   (((f.ml (3 9) (3 10)) (paragraph (((f.ml (3 9) (3 10)) (word a)))))))
605 |                  (header
606 |                   (((f.ml (3 11) (3 12))
607 |                     (paragraph (((f.ml (3 11) (3 12)) (word b)))))))))
608 |                (row
609 |                 ((data
610 |                   (((f.ml (5 8) (5 9)) (paragraph (((f.ml (5 8) (5 9)) (word c)))))))
611 |                  (data
612 |                   (((f.ml (5 10) (5 11))
613 |                     (paragraph (((f.ml (5 10) (5 11)) (word d)))))))))
614 |                (row
615 |                 ((data
616 |                   (((f.ml (6 8) (6 10))
617 |                     (paragraph (((f.ml (6 8) (6 10)) (word cc)))))))
618 |                  (data
619 |                   (((f.ml (6 11) (6 13))
620 |                     (paragraph (((f.ml (6 11) (6 13)) (word dd)))))))))
621 |                (row
622 |                 ((data
623 |                   (((f.ml (7 8) (7 10))
624 |                     (paragraph (((f.ml (7 8) (7 10)) (word -:)))))))
625 |                  (data
626 |                   (((f.ml (7 11) (7 14))
627 |                     (paragraph (((f.ml (7 11) (7 14)) (word :-:)))))))))
628 |                (row
629 |                 ((data
630 |                   (((f.ml (8 8) (8 9)) (paragraph (((f.ml (8 8) (8 9)) (word e)))))))
631 |                  (data
632 |                   (((f.ml (8 10) (8 11))
633 |                     (paragraph (((f.ml (8 10) (8 11)) (word f)))))))))
634 |                (row
635 |                 ((data
636 |                   (((f.ml (9 8) (9 9)) (paragraph (((f.ml (9 8) (9 9)) (word g)))))))
637 |                  (data
638 |                   (((f.ml (9 10) (9 11))
639 |                     (paragraph (((f.ml (9 10) (9 11)) (word h)))))))
640 |                  (data ())))))
641 |              (align (left right))))))
642 |          (warnings ())) |}]
643 | 
644 |     let block_element_in_cell =
645 |       test
646 |         {|
647 |            {t
648 |            | {[ a ]} | b |
649 |            |---|---|
650 |            }
651 |           |};
652 |       [%expect
653 |         {|
654 |         ((output
655 |           (((f.ml (2 11) (5 12))
656 |             (table (syntax light)
657 |              (grid
658 |               ((row
659 |                 ((header ())
660 |                  (header
661 |                   (((f.ml (3 23) (3 24))
662 |                     (paragraph (((f.ml (3 23) (3 24)) (word b)))))))))))
663 |              (align (default default))))))
664 |          (warnings
665 |           ( "File \"f.ml\", line 3, characters 13-20:\
666 |            \n'{[...]}' (code block) is not allowed in '{t ...}' (table)."))) |}]
667 | 
668 |     let block_element_in_row =
669 |       test
670 |         {|
671 |            {t
672 |            {[ a ]}
673 |            | a | b |
674 |            |---|---|
675 |            }
676 |           |};
677 |       [%expect
678 |         {|
679 |         ((output
680 |           (((f.ml (2 11) (6 12))
681 |             (table (syntax light)
682 |              (grid
683 |               ((row
684 |                 ((header
685 |                   (((f.ml (4 13) (4 14))
686 |                     (paragraph (((f.ml (4 13) (4 14)) (word a)))))))
687 |                  (header
688 |                   (((f.ml (4 17) (4 18))
689 |                     (paragraph (((f.ml (4 17) (4 18)) (word b)))))))))))
690 |              (align (default default))))))
691 |          (warnings
692 |           ( "File \"f.ml\", line 3, characters 11-18:\
693 |            \n'{[...]}' (code block) is not allowed in '{t ...}' (table)."))) |}]
694 | 
695 |     let more_cells_later =
696 |       test
697 |         {|
698 |       {t
699 |        | x | y |
700 |        |---|---|
701 |        | x | y | z |
702 |       }
703 |       |};
704 |       [%expect
705 |         {|
706 |         ((output
707 |           (((f.ml (2 6) (6 7))
708 |             (table (syntax light)
709 |              (grid
710 |               ((row
711 |                 ((header
712 |                   (((f.ml (3 9) (3 10)) (paragraph (((f.ml (3 9) (3 10)) (word x)))))))
713 |                  (header
714 |                   (((f.ml (3 13) (3 14))
715 |                     (paragraph (((f.ml (3 13) (3 14)) (word y)))))))))
716 |                (row
717 |                 ((data
718 |                   (((f.ml (5 9) (5 10)) (paragraph (((f.ml (5 9) (5 10)) (word x)))))))
719 |                  (data
720 |                   (((f.ml (5 13) (5 14))
721 |                     (paragraph (((f.ml (5 13) (5 14)) (word y)))))))
722 |                  (data
723 |                   (((f.ml (5 17) (5 18))
724 |                     (paragraph (((f.ml (5 17) (5 18)) (word z)))))))))))
725 |              (align (default default))))))
726 |          (warnings ())) |}]
727 | 
728 |     let less_cells_later =
729 |       test
730 |         {|
731 |       {t
732 |        | x | y |
733 |        |---|---|
734 |        x 
735 |       }
736 |       |};
737 |       [%expect
738 |         {|
739 |         ((output
740 |           (((f.ml (2 6) (6 7))
741 |             (table (syntax light)
742 |              (grid
743 |               ((row
744 |                 ((header
745 |                   (((f.ml (3 9) (3 10)) (paragraph (((f.ml (3 9) (3 10)) (word x)))))))
746 |                  (header
747 |                   (((f.ml (3 13) (3 14))
748 |                     (paragraph (((f.ml (3 13) (3 14)) (word y)))))))))
749 |                (row
750 |                 ((data
751 |                   (((f.ml (5 7) (5 8)) (paragraph (((f.ml (5 7) (5 8)) (word x)))))))))))
752 |              (align (default default))))))
753 |          (warnings ())) |}]
754 | 
755 |     let multiple_word =
756 |       test
757 |         {|
758 |       {t
759 |   | Header and other word |
760 |   |-----------------------|
761 |   | cell and other words  |
762 |       }
763 |       |};
764 |       [%expect
765 |         {|
766 |         ((output
767 |           (((f.ml (2 6) (6 7))
768 |             (table (syntax light)
769 |              (grid
770 |               ((row
771 |                 ((header
772 |                   (((f.ml (3 4) (3 25))
773 |                     (paragraph
774 |                      (((f.ml (3 4) (3 10)) (word Header))
775 |                       ((f.ml (3 10) (3 11)) space) ((f.ml (3 11) (3 14)) (word and))
776 |                       ((f.ml (3 14) (3 15)) space)
777 |                       ((f.ml (3 15) (3 20)) (word other))
778 |                       ((f.ml (3 20) (3 21)) space)
779 |                       ((f.ml (3 21) (3 25)) (word word)))))))))
780 |                (row
781 |                 ((data
782 |                   (((f.ml (5 4) (5 24))
783 |                     (paragraph
784 |                      (((f.ml (5 4) (5 8)) (word cell)) ((f.ml (5 8) (5 9)) space)
785 |                       ((f.ml (5 9) (5 12)) (word and)) ((f.ml (5 12) (5 13)) space)
786 |                       ((f.ml (5 13) (5 18)) (word other))
787 |                       ((f.ml (5 18) (5 19)) space)
788 |                       ((f.ml (5 19) (5 24)) (word words)))))))))))
789 |              (align (default))))))
790 |          (warnings ())) |}]
791 | 
792 |     let multiple_word_header =
793 |       test
794 |         {|
795 |       {t
796 |   | Header other word |
797 |   |-------------------|
798 |   | Header other word |
799 |       }
800 |       |};
801 |       [%expect
802 |         {|
803 |         ((output
804 |           (((f.ml (2 6) (6 7))
805 |             (table (syntax light)
806 |              (grid
807 |               ((row
808 |                 ((header
809 |                   (((f.ml (3 4) (3 21))
810 |                     (paragraph
811 |                      (((f.ml (3 4) (3 10)) (word Header))
812 |                       ((f.ml (3 10) (3 11)) space)
813 |                       ((f.ml (3 11) (3 16)) (word other))
814 |                       ((f.ml (3 16) (3 17)) space)
815 |                       ((f.ml (3 17) (3 21)) (word word)))))))))
816 |                (row
817 |                 ((data
818 |                   (((f.ml (5 4) (5 21))
819 |                     (paragraph
820 |                      (((f.ml (5 4) (5 10)) (word Header))
821 |                       ((f.ml (5 10) (5 11)) space)
822 |                       ((f.ml (5 11) (5 16)) (word other))
823 |                       ((f.ml (5 16) (5 17)) space)
824 |                       ((f.ml (5 17) (5 21)) (word word)))))))))))
825 |              (align (default))))))
826 |          (warnings ())) |}]
827 |   end in
828 |   ()
829 | 


--------------------------------------------------------------------------------