├── .gitattributes
├── .github
    └── workflows
    │   ├── deploy-doc.yml
    │   └── workflow.yml
├── .gitignore
├── .ocamlformat
├── .ocp-indent
├── CHANGES.md
├── LICENSE
├── Makefile
├── README.md
├── bin
    ├── dune
    └── main.ml
├── dune-project
├── omd.opam
├── src
    ├── ast.ml
    ├── ast_block.ml
    ├── ast_constructors.ml
    ├── ast_inline.ml
    ├── block_parser.ml
    ├── block_parser.mli
    ├── dune
    ├── html.ml
    ├── html.mli
    ├── omd.ml
    ├── omd.mli
    ├── parser.ml
    ├── sexp.ml
    ├── strSlice.ml
    ├── strSlice.mli
    └── toc.ml
├── tests
    ├── John_MacFarlane_said_peg-markdown_takes_forever_to_process_this--jgm_peg-markdown_issues_28.md
    ├── attributes.md
    ├── blackbox
    │   ├── dune
    │   ├── emphasis.t
    │   ├── heading-id.t
    │   ├── normalize-label.t
    │   └── regression-224.t
    ├── def_list.md
    ├── dune
    ├── dune.inc
    ├── expect_tests.ml
    ├── extra_table_tests.md
    ├── extract_tests.ml
    ├── gfm_table_spec.md
    ├── omd.ml
    └── spec.txt
└── tools
    ├── dune
    ├── entities.json
    └── gen_entities.ml


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ml* text eol=lf
2 | *.md text eol=lf
3 | *.html text eol=lf
4 | dune.inc linguist-generated=true
5 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-doc.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy odoc
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   deploy-doc:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout code
13 |         uses: actions/checkout@v3
14 | 
15 |       - name: Use OCaml 4.13.x
16 |         uses: ocaml/setup-ocaml@v2
17 |         with:
18 |           ocaml-compiler: 4.13.x
19 |           dune-cache: true
20 | 
21 |       - name: Deploy odoc to GitHub Pages
22 |         uses: ocaml/setup-ocaml/deploy-doc@v2
23 | 


--------------------------------------------------------------------------------
/.github/workflows/workflow.yml:
--------------------------------------------------------------------------------
 1 | name: Main workflow
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   build:
11 |     strategy:
12 |       fail-fast: false
13 |       matrix:
14 |         os:
15 |           - macos-latest
16 |           - ubuntu-latest
17 |           - windows-latest
18 |         ocaml-compiler:
19 |           # Decision on version matrix informed by https://discuss.ocaml.org/t/which-ocaml-compiler-versions-should-we-run-against-in-ci/7933/2
20 |           - 4.13.x
21 |           - 4.14.x
22 |         include:
23 |           - ocaml-compiler: 5.0.x
24 |             os:
25 |               - ubuntu-latest
26 |           - ocaml-compiler: 5.0.x
27 |             os:
28 |               - macos-latest
29 | 
30 |     runs-on: ${{ matrix.os }}
31 |     steps:
32 |       - name: Checkout code
33 |         uses: actions/checkout@v3
34 | 
35 |       - name: Use OCaml ${{ matrix.ocaml-compiler }}
36 |         uses: ocaml/setup-ocaml@v2
37 |         with:
38 |           ocaml-compiler: ${{ matrix.ocaml-compiler }}
39 | 
40 |       - name: Install deps
41 |         run: opam install . --deps-only --with-doc --with-test
42 | 
43 |       - name: Build
44 |         run: opam exec -- make build
45 | 
46 |       - name: Test
47 |         run: opam exec -- make test
48 | 
49 |   lint-doc:
50 |     runs-on: ubuntu-latest
51 |     steps:
52 |       - name: Checkout code
53 |         uses: actions/checkout@v3
54 | 
55 |       - name: Use OCaml 4.14.x
56 |         uses: ocaml/setup-ocaml@v2
57 |         with:
58 |           ocaml-compiler: 4.14.x
59 |           dune-cache: true
60 | 
61 |       - name: Lint doc
62 |         uses: ocaml/setup-ocaml/lint-doc@v2
63 | 
64 |   lint-fmt:
65 |     runs-on: ubuntu-latest
66 |     steps:
67 |       - name: Checkout code
68 |         uses: actions/checkout@v3
69 | 
70 |       - name: Use OCaml 4.14.x
71 |         uses: ocaml/setup-ocaml@v2
72 |         with:
73 |           ocaml-compiler: 4.14.x
74 |           dune-cache: true
75 | 
76 |       - name: Lint fmt
77 |         uses: ocaml/setup-ocaml/lint-fmt@v2
78 | 
79 |   lint-opam:
80 |     runs-on: ubuntu-latest
81 |     steps:
82 |       - name: Checkout code
83 |         uses: actions/checkout@v3
84 | 
85 |       - name: Use OCaml 4.14.x
86 |         uses: ocaml/setup-ocaml@v2
87 |         with:
88 |           ocaml-compiler: 4.14.x
89 |           dune-cache: true
90 | 
91 |       - name: Lint opam
92 |         uses: ocaml/setup-ocaml/lint-opam@v2
93 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | .merlin
3 | *.install
4 | _opam
5 | 


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
 1 | version = 0.21.0
 2 | exp-grouping = preserve
 3 | break-fun-sig = fit-or-vertical
 4 | break-fun-decl = fit-or-vertical
 5 | wrap-fun-args = false
 6 | dock-collection-brackets = false
 7 | break-separators = before
 8 | break-infix = fit-or-vertical
 9 | type-decl = sparse
10 | 


--------------------------------------------------------------------------------
/.ocp-indent:
--------------------------------------------------------------------------------
1 | match_clause=4
2 | strict_with=auto
3 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
  1 | 2.0.0~alpha4
  2 | ------------
  3 | 
  4 | - Add an `empty` inline value (#298 @cuihtlauac)
  5 | 
  6 | - Remove stdcompat dependency (#304 #305 @hannesm, review by @samoht)
  7 | 
  8 | - Minimum supported OCaml version is now 4.13 (#304 @hannesm)
  9 | 
 10 | 2.0.0~alpha3
 11 | ------------
 12 | 
 13 | - Expose the HTML escape function `htmlentities` (#295 @cuihtlauac)
 14 | 
 15 | - Support generation of identifiers in headers (#294, @tatchi)
 16 | 
 17 | - Support GitHub-Flavoured Markdown tables (#292, @bobatkey)
 18 | 
 19 | - Update parser to support CommonMark Spec 0.30 (#266, @SquidDev)
 20 | 
 21 | - Preserve the order of input files in the HTML output to stdout (#258,
 22 |   @patricoferris)
 23 | 
 24 | - Fix all deviations from CommonMark Spec 0.30 (#284, #283, #278, #277, #269,
 25 |   @tatchi)
 26 | 
 27 | 2.0.0~alpha2
 28 | ------------
 29 | 
 30 | - Clean up AST representation (#229, @sonologico)
 31 | 
 32 | - Restore table of contents generation functionality (#240, @sonologico)
 33 | 
 34 | - Fix parsing of <...>-style links and inline code parsing (#233, @SquidDev)
 35 | 
 36 | - Fix handing of the nonbreaking whitespace character, ASCII code 160 (#247,
 37 |   @shonfeder)
 38 | 
 39 | - Downgrade dune requirements to 2.0 (@nobj)
 40 | 
 41 | 
 42 | 2.0.0~alpha1
 43 | ------------
 44 | 
 45 | - Lower OCaml requirement to 4.04.2  (#213, @jfrolich)
 46 | 
 47 | - Big refactoring by @nojb. Changes in interface (simplification of code, might
 48 |   affect performance a little). To be tested!
 49 | 
 50 | 1.3.x
 51 | -----
 52 | 
 53 | - might stop checking validity of HTML tag *names* and accept any XML-parsable
 54 |   tag name.
 55 | 
 56 | 1.2.5
 57 | -----
 58 | 
 59 | - only fixes a single bug (an ordered list could be transformed into an
 60 |   unordered list)
 61 | 
 62 | 1.2.4
 63 | -----
 64 | 
 65 | - only fixes a single bug (some spaces were wrongly handled in the HTML parsing)
 66 | 
 67 | 1.2.2/3
 68 | -------
 69 | 
 70 | - fix a few issues with HTML parsing.
 71 | 
 72 | 1.2.1
 73 | -----
 74 | 
 75 | - mainly fixes issues with HTML parsing.
 76 | 
 77 | 1.2.0
 78 | -----
 79 | 
 80 | - introduces options `-w` and `-W`. Fixes mostly concern subtle uses of `\n`s in
 81 |   HTML and Markdown outputs.
 82 | 
 83 | 1.1.2
 84 | -----
 85 | 
 86 | - fix: some URL-related parsing issues.
 87 | 
 88 | 1.1.0/1.1.1
 89 | -----------
 90 | 
 91 | - fix: some HTML-related issues.
 92 | 
 93 | 1.0.1
 94 | -----
 95 | 
 96 | - fixes some parsing issues, improves output. (2014-10-02)
 97 | 
 98 | 1.0.0
 99 | -----
100 | 
101 | - warning: this release is only partially compatible with previous versions.
102 | 
103 | - accept HTML blocks which directly follow each other
104 | 
105 | - fix: accept all XML-compatible attribute names for HTML
106 |   attributes
107 | 
108 | - fix backslash-escaping for hash-ending ATX-titles + fix Markdown output for
109 |   Html_block
110 | 
111 | - fix (HTML parsing) bugs introduced in 1.0.0.b and 1.0.0.c
112 | 
113 | - rewrite parser of block HTML to use the updated Omd.t
114 | 
115 | - rewrite parser of inline HTML to use the updated Omd.t
116 | 
117 | - upgrade Omd.t for HTML representation
118 | 
119 | There will not be any newer 0.9.x release although new bugs have been
120 | discovered. Thus it's recommended to upgrade to the latest 1.x.y.
121 | 
122 | 0.9.7
123 | -----
124 | 
125 | - introduction of media:end + bug fixes.
126 | 
127 | If you need to have a version that still has `Tag of extension` instead of `Tag
128 | of name * extension` and don't want to upgrade, you may use 0.9.3
129 | 
130 | 0.9.6
131 | -----
132 | 
133 | - fix a bug (concerning extensions) introduced by 0.9.4.
134 | 
135 | 0.9.5
136 | -----
137 | 
138 | - bug fix + `Tag of extension` changed to `Tag of name * extension`
139 | 
140 | 0.9.4
141 | -----
142 | 
143 | - fixes a bug for the new feature
144 | 
145 | 0.9.3
146 | -----
147 | 
148 | - new feature `media:type="text/omd"`. This version is recommended if you do
149 |   not use that new feature and want to use 0.9.x
150 | 
151 | 0.9.2
152 | -----
153 | 
154 | - not released...
155 | 
156 | older versions
157 | --------------
158 | 
159 | - cf. [commit log](https://github.com/ocaml/omd/commits/master)
160 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2013-2018, Philippe Wang <philippe.wang@cl.cam.ac.uk>
 4 | Copyright (c) 2018, Nicolás Ojeda Bär <n.oje.bar@gmail.com>
 5 | 
 6 | Permission to use, copy, modify, and/or distribute this software for any
 7 | purpose with or without fee is hereby granted, provided that the above
 8 | copyright notice and this permission notice appear in all copies.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | ##
 2 | # Omd
 3 | #
 4 | # @file
 5 | 
 6 | .PHONY: test build fmt deps
 7 | 
 8 | build: deps
 9 | 	dune build
10 | 
11 | deps:
12 | 	opam install . --deps-only --yes
13 | 
14 | test:
15 | 	dune build @gen --auto-promote
16 | 	dune runtest
17 | 
18 | fmt:
19 | 	dune build @fmt --auto-promote
20 | # end
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | `omd`: Markdown library and tool in OCaml
  2 | =========================================
  3 | 
  4 | Omd is an OCaml library designed to parse, manipulate, and print Markdown into
  5 | different formats. In addition to the library, a command-line tool `omd` is
  6 | included to easily convert markdown into HTML.
  7 | 
  8 | Omd aims for compliance with the [CommonMark](https://commonmark.org/) standard.
  9 | We are currently compliant with [0.30 of the ComonMark
 10 | spec](https://spec.commonmark.org/0.30/).
 11 | 
 12 | Omd is developed on GitHub. If you need to report an issue, please do so at
 13 | https://github.com/ocaml/omd/issues.
 14 | 
 15 | Installation
 16 | ------------
 17 | 
 18 | The recommended way to install `omd` is via the [opam package manager][opam].
 19 | 
 20 | You can install versions published to opam with:
 21 | 
 22 | ```sh
 23 | $ opam install omd
 24 | ```
 25 | 
 26 | You can install the current development version from the GitHub repository with
 27 | 
 28 | ```sh
 29 | $ opam pin git@github.com:ocaml/omd.git
 30 | ```
 31 | 
 32 | Documentation
 33 | -------------
 34 | 
 35 | - View API docs online: https://ocaml.github.io/omd
 36 | - View the API docs locally: `odig doc omd`
 37 | 
 38 | Building from source
 39 | --------------------
 40 | 
 41 | You can also build it manually from source with:
 42 | 
 43 | ```sh
 44 | $ git clone https://github.com/ocaml/omd.git
 45 | $ cd omd
 46 | $ make build
 47 | ```
 48 | 
 49 | You can run the test suite with
 50 | 
 51 | ```sh
 52 | $ make test
 53 | ```
 54 | 
 55 | Dependencies
 56 | ------------
 57 | 
 58 | The minimum version of OCaml required is 4.13.
 59 | 
 60 | Dependencies are tracked in the [dune-project](./dune-project) and can be
 61 | installed by running:
 62 | 
 63 | ```sh
 64 | $ opam install . --deps-only
 65 | # or
 66 | $ make deps
 67 | ```
 68 | 
 69 | History
 70 | -------
 71 | 
 72 | Omd 1 was developed by [Philippe Wang](https://github.com/pw374/) at [OCaml
 73 | Labs](http://ocaml.io/) in [Cambridge](http://www.cl.cam.ac.uk).
 74 | 
 75 | Its development was motivated by at least these facts:
 76 | 
 77 | - We wanted an OCaml implementation of Markdown; some OCaml parsers of Markdown
 78 |   existed before but they were incomplete. It's easier for an OCaml project to
 79 |   depend on an pure-OCaml implementation of Markdown than to depend some
 80 |   interface to a library implemented using another language, and this is ever
 81 |   more important since [Opam](https://opam.ocaml.org) exists.
 82 | 
 83 | - We wanted to provide a way to make the contents of the
 84 |   [OCaml.org](http://ocaml.org/) website be essentially in Markdown instead of
 85 |   HTML. And we wanted to this website to be implemented in OCaml.
 86 | 
 87 | - Having an OCaml implementation of Markdown is virtually mandatory for those
 88 |   who want to use a Markdown parser in a [Mirage](http://www.openmirage.org)
 89 |   application.  Note that OMD has replaced the previous Markdown parser of
 90 |   [COW](https://github.com/mirage/ocaml-cow), which has been developed as part
 91 |   of the Mirage project.
 92 | 
 93 | Omd 2 started development in 2020, beginning [Nicolás Ojeda
 94 | Bär](https://github.com/nojb)'s redesign and rewrite, and is currently ongoing.
 95 | Omd 2 has yet to reach feature parity with Omd 1.
 96 | 
 97 | Thanks
 98 | ------
 99 | 
100 | Special thanks for feedback and contributions to this project goes out to:
101 | 
102 | - [Christophe Troestler](https://github.com/Chris00)
103 | - [Ashish Argawal](https://github.com/agarwal)
104 | - [Sebastien Mondet](https://github.com/smondet)
105 | - [Thomas Gazagnaire](https://github.com/samoht)
106 | - [Daniel Bünzli](https://github.com/dbuenzli)
107 | - [Amir Chaudry](https://github.com/amirmc)
108 | - [Anil Madhavapeddy](https://github.com/avsm/)
109 | - [David Sheets](https://github.com/dsheets/)
110 | - [Jeremy Yallop](https://github.com/yallop/)
111 | - [Nicolás Ojeda Bär](https://github.com/nojb)
112 | - [Raphael Sousa Santos](https://sonologi.co/)
113 | - [Corentin Leruth](https://github.com/tatchi)
114 | - [Bob Atkey](https://bentnib.org/)
115 | - *please insert your name here if you believe you've been forgotten*
116 | 


--------------------------------------------------------------------------------
/bin/dune:
--------------------------------------------------------------------------------
1 | (executable
2 |  (name main)
3 |  (public_name omd)
4 |  (libraries omd dune-build-info))
5 | 


--------------------------------------------------------------------------------
/bin/main.ml:
--------------------------------------------------------------------------------
 1 | let with_open_in fn f =
 2 |   let ic = open_in fn in
 3 |   match f ic with
 4 |   | r ->
 5 |       close_in ic;
 6 |       r
 7 |   | exception e ->
 8 |       close_in_noerr ic;
 9 |       raise e
10 | 
11 | let with_open_out fn f =
12 |   let oc = open_out fn in
13 |   match f oc with
14 |   | r ->
15 |       close_out oc;
16 |       r
17 |   | exception e ->
18 |       close_out_noerr oc;
19 |       raise e
20 | 
21 | let process ?auto_identifiers ic oc =
22 |   let md = Omd.of_channel ic in
23 |   output_string oc (Omd.to_html ?auto_identifiers md)
24 | 
25 | let print_version () =
26 |   let version =
27 |     match Build_info.V1.version () with
28 |     | None -> "n/a"
29 |     | Some v -> Build_info.V1.Version.to_string v
30 |   in
31 |   print_endline version;
32 |   exit 0
33 | 
34 | let input = ref []
35 | let output = ref ""
36 | let auto_identifiers = ref None
37 | 
38 | let spec =
39 |   [ ( "-o"
40 |     , Arg.Set_string output
41 |     , " file.html Specify the output file (default is stdout)." )
42 |   ; ( "--auto-identifiers"
43 |     , Arg.Bool (fun x -> auto_identifiers := Some x)
44 |     , " Should identifiers be automatically assigned to headings." )
45 |   ; ( "--version"
46 |     , Arg.Unit print_version
47 |     , " Display the version of the currently installed omd." )
48 |   ; ( "--"
49 |     , Rest (fun s -> input := s :: !input)
50 |     , " Consider all remaining arguments as input file names." )
51 |   ]
52 | 
53 | let main () =
54 |   Arg.parse
55 |     (Arg.align spec)
56 |     (fun s -> input := s :: !input)
57 |     "omd [options] [inputfile1 .. inputfileN] [options]";
58 |   let with_output f =
59 |     if !output = "" then f stdout else with_open_out !output f
60 |   in
61 |   let auto_identifiers = !auto_identifiers in
62 |   with_output @@ fun oc ->
63 |   if !input = [] then process ?auto_identifiers stdin oc
64 |   else
65 |     let f filename =
66 |       with_open_in filename @@ fun ic -> process ?auto_identifiers ic oc
67 |     in
68 |     List.(iter f (rev !input))
69 | 
70 | let () =
71 |   try main () with
72 |   | Sys_error msg ->
73 |       Printf.eprintf "Error: %s\n" msg;
74 |       exit 1
75 |   | exn ->
76 |       Printf.eprintf "Error: %s\n" (Printexc.to_string exn);
77 |       Printexc.print_backtrace stderr;
78 |       exit 1
79 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
 1 | (lang dune 2.7)
 2 | (name omd)
 3 | (version 2.0.0~alpha1)
 4 | 
 5 | (cram enable)
 6 | 
 7 | (generate_opam_files)
 8 | 
 9 | (license ISC)
10 | (authors "Philippe Wang <philippe.wang@gmail.com>"
11 |          "Nicolás Ojeda Bär <n.oje.bar@gmail.com>")
12 | (maintainers "Shon Feder <shon.feder@gmail.com>"
13 |              "Raphael Sousa Santos <@sonologico>")
14 | (source (github ocaml/omd))
15 | 
16 | (package
17 |  (name omd)
18 |  (synopsis "A Markdown frontend in pure OCaml")
19 |  (description
20 |   "This Markdown library is implemented using only pure OCaml (including
21 | I/O operations provided by the standard OCaml compiler distribution).
22 | OMD is meant to be as faithful as possible to the original Markdown.
23 | Additionally, OMD implements a few Github markdown features, an
24 | extension mechanism, and some other features. Note that the opam
25 | package installs both the OMD library and the command line tool `omd`.")
26 |  (tags (org:ocamllabs org:mirage))
27 |  (depends
28 |   (ocaml (>= 4.13))
29 |    uutf
30 |    uucp
31 |    uunf
32 |   (dune-build-info (>= 2.7))
33 |   (ppx_expect :with-test)))
34 | 


--------------------------------------------------------------------------------
/omd.opam:
--------------------------------------------------------------------------------
 1 | # This file is generated by dune, edit dune-project instead
 2 | opam-version: "2.0"
 3 | version: "2.0.0~alpha1"
 4 | synopsis: "A Markdown frontend in pure OCaml"
 5 | description: """
 6 | This Markdown library is implemented using only pure OCaml (including
 7 | I/O operations provided by the standard OCaml compiler distribution).
 8 | OMD is meant to be as faithful as possible to the original Markdown.
 9 | Additionally, OMD implements a few Github markdown features, an
10 | extension mechanism, and some other features. Note that the opam
11 | package installs both the OMD library and the command line tool `omd`."""
12 | maintainer: [
13 |   "Shon Feder <shon.feder@gmail.com>" "Raphael Sousa Santos <@sonologico>"
14 | ]
15 | authors: [
16 |   "Philippe Wang <philippe.wang@gmail.com>"
17 |   "Nicolás Ojeda Bär <n.oje.bar@gmail.com>"
18 | ]
19 | license: "ISC"
20 | tags: ["org:ocamllabs" "org:mirage"]
21 | homepage: "https://github.com/ocaml/omd"
22 | bug-reports: "https://github.com/ocaml/omd/issues"
23 | depends: [
24 |   "dune" {>= "2.7"}
25 |   "ocaml" {>= "4.13"}
26 |   "uutf"
27 |   "uucp"
28 |   "uunf"
29 |   "dune-build-info" {>= "2.7"}
30 |   "ppx_expect" {with-test}
31 |   "odoc" {with-doc}
32 | ]
33 | build: [
34 |   ["dune" "subst"] {dev}
35 |   [
36 |     "dune"
37 |     "build"
38 |     "-p"
39 |     name
40 |     "-j"
41 |     jobs
42 |     "@install"
43 |     "@runtest" {with-test}
44 |     "@doc" {with-doc}
45 |   ]
46 | ]
47 | dev-repo: "git+https://github.com/ocaml/omd.git"
48 | 


--------------------------------------------------------------------------------
/src/ast.ml:
--------------------------------------------------------------------------------
 1 | module Impl = struct
 2 |   include Ast_inline
 3 |   include Ast_block.List_types
 4 |   include Ast_block.Table_alignments
 5 |   include Ast_block.WithInline
 6 | 
 7 |   type attributes = (string * string) list
 8 |   type doc = attributes block list
 9 | end
10 | 
11 | module type Intf = module type of Impl
12 | 
13 | module Util = struct
14 |   include Impl
15 | 
16 |   let same_block_list_kind k1 k2 =
17 |     match (k1, k2) with
18 |     | Ordered (_, c1), Ordered (_, c2) | Bullet c1, Bullet c2 -> c1 = c2
19 |     | _ -> false
20 | end
21 | 


--------------------------------------------------------------------------------
/src/ast_block.ml:
--------------------------------------------------------------------------------
 1 | module type BlockContent = sig
 2 |   type 'a t
 3 | end
 4 | 
 5 | module StringContent = struct
 6 |   type 'attr t = string
 7 | end
 8 | 
 9 | module InlineContent = struct
10 |   type 'attr t = 'attr Ast_inline.inline
11 | end
12 | 
13 | module List_types = struct
14 |   type list_type =
15 |     | Ordered of int * char
16 |     | Bullet of char
17 | 
18 |   type list_spacing =
19 |     | Loose
20 |     | Tight
21 | end
22 | 
23 | module Table_alignments = struct
24 |   type cell_alignment =
25 |     | Default
26 |     | Left
27 |     | Centre
28 |     | Right
29 | end
30 | 
31 | open List_types
32 | open Table_alignments
33 | 
34 | module Make (C : BlockContent) = struct
35 |   type 'attr def_elt =
36 |     { term : 'attr C.t
37 |     ; defs : 'attr C.t list
38 |     }
39 | 
40 |   (* A value of type 'attr is present in all variants of this type. We use it to associate
41 |      extra information to each node in the AST. Cn the common case, the attributes type defined
42 |      above is used. We might eventually have an alternative function to parse blocks while keeping
43 |      concrete information such as source location and we'll use it for that as well. *)
44 |   type 'attr block =
45 |     | Paragraph of 'attr * 'attr C.t
46 |     | List of 'attr * list_type * list_spacing * 'attr block list list
47 |     | Blockquote of 'attr * 'attr block list
48 |     | Thematic_break of 'attr
49 |     | Heading of 'attr * int * 'attr C.t
50 |     | Code_block of 'attr * string * string
51 |     | Html_block of 'attr * string
52 |     | Definition_list of 'attr * 'attr def_elt list
53 |     | Table of 'attr * ('attr C.t * cell_alignment) list * 'attr C.t list list
54 |         (** A table is represented by a header row, which is a list of pairs of
55 |             header cells and alignments, and a list of rows *)
56 | end
57 | 
58 | module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct
59 |   module SrcBlock = Make (Src)
60 |   module DstBlock = Make (Dst)
61 | 
62 |   let rec map (f : 'attr Src.t -> 'attr Dst.t) :
63 |       'attr SrcBlock.block -> 'attr DstBlock.block = function
64 |     | SrcBlock.Paragraph (attr, x) -> DstBlock.Paragraph (attr, f x)
65 |     | List (attr, ty, sp, bl) ->
66 |         List (attr, ty, sp, List.map (List.map (map f)) bl)
67 |     | Blockquote (attr, xs) -> Blockquote (attr, List.map (map f) xs)
68 |     | Thematic_break attr -> Thematic_break attr
69 |     | Heading (attr, level, text) -> Heading (attr, level, f text)
70 |     | Definition_list (attr, l) ->
71 |         let f { SrcBlock.term; defs } =
72 |           { DstBlock.term = f term; defs = List.map f defs }
73 |         in
74 |         Definition_list (attr, List.map f l)
75 |     | Code_block (attr, label, code) -> Code_block (attr, label, code)
76 |     | Html_block (attr, x) -> Html_block (attr, x)
77 |     | Table (attr, headers, rows) ->
78 |         Table
79 |           ( attr
80 |           , List.map (fun (header, alignment) -> (f header, alignment)) headers
81 |           , List.map (List.map f) rows )
82 | end
83 | 
84 | module Mapper = MakeMapper (StringContent) (InlineContent)
85 | module Raw = Make (StringContent)
86 | module WithInline = Make (InlineContent)
87 | 


--------------------------------------------------------------------------------
/src/ast_constructors.ml:
--------------------------------------------------------------------------------
  1 | open Ast.Impl
  2 | 
  3 | module type Intf = sig
  4 |   (** Functions to help constructing the elements of a {!doc}.
  5 | 
  6 |       E.g.,
  7 | 
  8 |       {[
  9 |         let open Omd.Ctor in
 10 |         let para =
 11 |           p ~attrs:[ ("class", "my-para") ] [ txt "Content of"; em "this"; txt "paragraph" ]
 12 |         in
 13 |         [ blockquote [ para; hr; p [ txt "Content of second paragraph" ] ] ]
 14 |       ]}
 15 | 
 16 |       Produces
 17 | 
 18 |       {v
 19 | <blockquote>
 20 | <p class="my-para">Content of<em>this</em>paragraph</p>
 21 | <hr />
 22 | <p>Content of second paragraph</p>
 23 | </blockquote>
 24 |       v}
 25 | 
 26 |       The optional [attrs] argument always defaults to an empty list, and can
 27 |       generally be omitted. *)
 28 | 
 29 |   (** {3 Constructors for inline elements}  *)
 30 | 
 31 |   val empty : attributes inline
 32 |   (** [empty] is an empty inline element. *)
 33 | 
 34 |   val txt : ?attrs:attributes -> string -> attributes inline
 35 |   (** [txt ~attrs s] is {{!Text} [Text (attrs, s)]}. *)
 36 | 
 37 |   val em : ?attrs:attributes -> string -> attributes inline
 38 |   (** [em ~attrs s] is {{!Emph} [Emph (attrs, txt s)]}. See {!txt}. *)
 39 | 
 40 |   val strong : ?attrs:attributes -> string -> attributes inline
 41 |   (** [strong ~attrs s] is {{!Strong} [Strong (attrs, txt s)]}. See {!txt}. *)
 42 | 
 43 |   val code : ?attrs:attributes -> string -> attributes inline
 44 |   (** [code ~attrs s] is {{!Code} [Code (attrs, s)]}. *)
 45 | 
 46 |   val br : attributes inline
 47 |   (** [br] is {{!Hard_break}[Hard_break []]}. *)
 48 | 
 49 |   val nl : attributes inline
 50 |   (** [nl] is {{!Soft_break}[Soft_break []]}. *)
 51 | 
 52 |   val a :
 53 |        ?attrs:attributes
 54 |     -> ?title:string
 55 |     -> url:string
 56 |     -> string
 57 |     -> attributes inline
 58 |   (** [a ~attrs ~title ~url label] is a link around the text of [label],
 59 |       pointing to the [url], with the optional title [title] and additional [attrs].
 60 |       See {!Link}. *)
 61 | 
 62 |   val img :
 63 |        ?attrs:attributes
 64 |     -> ?title:string
 65 |     -> alt:string
 66 |     -> string
 67 |     -> attributes inline
 68 |   (** [img ~attrs ~title ~alt src] is an image from the given [src] that has the
 69 |       [alt] text as a fallback, with the optional title [title] and additional
 70 |       [attrs].  See {!Image}. *)
 71 | 
 72 |   val html : string -> attributes inline
 73 |   (** [html s] is an inline HTML string. See {!Html}. *)
 74 | 
 75 |   (** {3 Constructors for block-level elements} *)
 76 | 
 77 |   val p : ?attrs:attributes -> attributes inline list -> attributes block
 78 |   (** [p ~attrs inlines] is a pragraph block holding the given [inline]
 79 |       elements. See {!Paragraph}. *)
 80 | 
 81 |   val ul :
 82 |        ?attrs:attributes
 83 |     -> ?spacing:list_spacing
 84 |     -> attributes block list list
 85 |     -> attributes block
 86 |   (** [ul ~attrs ~spacing items] is an unordered list with the specified [spacing], listing
 87 |       the given [items]. Each item is a list of block elements.
 88 | 
 89 |       - [spacing] defaults to {!Loose}.
 90 | 
 91 |       E.g.,
 92 | 
 93 |       {[
 94 |         ul ~spacing:Tight
 95 |           [ [ p [ txt "Item 1" ] ]
 96 |           ; [ p [ txt "Item 2" ] ]
 97 |           ; [ p [ txt "Item 3" ] ]
 98 |           ]
 99 |       ]}
100 | 
101 |       See {!List} and {!Bullet}. *)
102 | 
103 |   val ol :
104 |        ?attrs:attributes
105 |     -> ?start:int
106 |     -> ?char:[ `Dot | `Paren ]
107 |     -> ?spacing:list_spacing
108 |     -> attributes block list list
109 |     -> attributes block
110 |   (** [ol ~attrs ~start ~char ~spacing items] is like {!ul}, but constructs an ordered list,
111 |       where [start] is the number to start enumerating from, and [char] indicates the
112 |       character following the number in the enumeration.
113 | 
114 |       - [char] can be either [`Dot] indicating ['.'] or [`Paren] indicating [')'], and
115 |         defaults to [`Dot].
116 |       - [start] defaults to [1].
117 | 
118 |       See {!List} and {!Ordered}. *)
119 | 
120 |   val blockquote :
121 |     ?attrs:attributes -> attributes block list -> attributes block
122 |   (** [blockquote ~attrs blocks] is a blockquote element containing the given
123 |       [blocks]. See {!Blockquote}. *)
124 | 
125 |   val hr : attributes block
126 |   (** [hr] is {{!Thematic_break} [Thematic_break []]}. *)
127 | 
128 |   val h : ?attrs:attributes -> int -> attributes inline list -> attributes block
129 |   (** [h ~attrs level inlines] is a heading of the given [level] comprised of
130 |       the [inlines]. See {!Heading}. *)
131 | 
132 |   val code_bl : ?attrs:attributes -> ?lang:string -> string -> attributes block
133 |   (** [code_bl ~attrs ~lang code] is a code block labeled with language [lang].
134 | 
135 |       - [lang] defaults to being absent.
136 | 
137 |       See {!Code_block} *)
138 | 
139 |   val html_bl : ?attrs:attributes -> string -> attributes block
140 |   (** [html_bl ~attrs html] is a block-level element of raw HTML. See {!Html_block}. *)
141 | 
142 |   type 'attr ctor_def_elt =
143 |     { term : 'attr inline list
144 |     ; defs : 'attr inline list list
145 |     }
146 |   (** Type for the items given to {!dl} definition lists. It is isomorphic to {!def_elt}. *)
147 | 
148 |   val dl : ?attrs:attributes -> attributes ctor_def_elt list -> attributes block
149 |   (** [dl ~attrs elements] is a definition list of the given [elements]. See
150 |       {!Definition_list}.
151 | 
152 |       E.g.,
153 | 
154 |       {[
155 |         dl
156 |           [ { term = [ txt "def term 1" ]
157 |             ; defs =
158 |                 [ [ txt "definition 1.1" ]
159 |                 ; [ txt "definition 1.2" ]
160 |                 ; [ txt "definition 1.3" ]
161 |                 ]
162 |             }
163 |           ; { term = [ txt "def term 2" ]
164 |             ; defs =
165 |                 [ [ txt "definition 2.1" ]
166 |                 ; [ txt "definition 2.2" ]
167 |                 ; [ txt "definition 2.3" ]
168 |                 ]
169 |             }
170 |           ]
171 |       ]} *)
172 | end
173 | 
174 | module Impl : Intf = struct
175 |   let concat elems = Concat ([], elems)
176 |   let empty = concat []
177 |   let txt ?(attrs = []) s = Text (attrs, s)
178 |   let em ?(attrs = []) s = Emph (attrs, txt s)
179 |   let strong ?(attrs = []) s = Strong (attrs, txt s)
180 |   let code ?(attrs = []) s = Code (attrs, s)
181 |   let br = Hard_break []
182 |   let nl = Soft_break []
183 | 
184 |   let a ?(attrs = []) ?title ~url label =
185 |     Link (attrs, { label = txt label; destination = url; title })
186 | 
187 |   let img ?(attrs = []) ?title ~alt src =
188 |     Image (attrs, { label = txt alt; destination = src; title })
189 | 
190 |   (* Note that attributes are not actually supported Html nodes currently. *)
191 |   let html s = Html ([], s)
192 | 
193 |   (* Block constructors *)
194 | 
195 |   let p ?(attrs = []) inlines = Paragraph (attrs, concat inlines)
196 | 
197 |   let ul ?(attrs = []) ?(spacing = Loose) items =
198 |     List (attrs, Bullet '-', spacing, items)
199 | 
200 |   let ol ?(attrs = []) ?(start = 1) ?(char = `Dot) ?(spacing = Loose) items =
201 |     let c = match char with `Dot -> '.' | `Paren -> ')' in
202 |     List (attrs, Ordered (start, c), spacing, items)
203 | 
204 |   let blockquote ?(attrs = []) blocks = Blockquote (attrs, blocks)
205 |   let hr = Thematic_break []
206 |   let h ?(attrs = []) level inlines = Heading (attrs, level, concat inlines)
207 |   let code_bl ?(attrs = []) ?(lang = "") s = Code_block (attrs, lang, s)
208 |   let html_bl ?(attrs = []) s = Html_block (attrs, s)
209 | 
210 |   type 'attr ctor_def_elt =
211 |     { term : 'attr inline list
212 |     ; defs : 'attr inline list list
213 |     }
214 | 
215 |   let dl ?(attrs = []) (items : 'attr ctor_def_elt list) =
216 |     let def_elt_of_pair { term; defs } : 'attr def_elt =
217 |       let term = concat term in
218 |       let defs = List.map concat defs in
219 |       { term; defs }
220 |     in
221 |     let def_elts = List.map def_elt_of_pair items in
222 |     Definition_list (attrs, def_elts)
223 | end
224 | 


--------------------------------------------------------------------------------
/src/ast_inline.ml:
--------------------------------------------------------------------------------
 1 | (* TODO The presence of `attrs` in several of these nodes is leaking an
 2 |         implementation detail: we have no support for attributes in `Concat`
 3 |         `Soft_break` or `Html` nodes. The attributes are just dropped during
 4 |         rendering.  Should we remove this from the UI, or should we include
 5 |         those somehow? Or should we include these in the document model, but
 6 |         but with the caveat that most renderings of the document don't support
 7 |         attributes in these nodes? *)
 8 | type 'attr inline =
 9 |   | Concat of 'attr * 'attr inline list
10 |   | Text of 'attr * string
11 |   | Emph of 'attr * 'attr inline
12 |   | Strong of 'attr * 'attr inline
13 |   | Code of 'attr * string
14 |   | Hard_break of 'attr
15 |   | Soft_break of 'attr
16 |   | Link of 'attr * 'attr link
17 |   | Image of 'attr * 'attr link
18 |   | Html of 'attr * string
19 | 
20 | and 'attr link =
21 |   { label : 'attr inline
22 |   ; destination : string
23 |   ; title : string option
24 |   }
25 | 


--------------------------------------------------------------------------------
/src/block_parser.ml:
--------------------------------------------------------------------------------
  1 | open Ast.Util
  2 | module Raw = Ast_block.Raw
  3 | 
  4 | module Pre = struct
  5 |   type container =
  6 |     | Rblockquote of t
  7 |     | Rlist of
  8 |         list_type
  9 |         * list_spacing
 10 |         * bool
 11 |         * int
 12 |         * attributes Raw.block list list
 13 |         * t
 14 |     | Rparagraph of string list
 15 |     | Rfenced_code of
 16 |         int
 17 |         * int
 18 |         * Parser.code_block_kind
 19 |         * (string * string)
 20 |         * string list
 21 |         * attributes
 22 |     | Rindented_code of string list
 23 |     | Rhtml of Parser.html_kind * string list
 24 |     | Rdef_list of string * string list
 25 |     | Rtable_header of StrSlice.t list * string
 26 |     | Rtable of (string * cell_alignment) list * string list list
 27 |     | Rempty
 28 | 
 29 |   and t =
 30 |     { blocks : attributes Raw.block list
 31 |     ; next : container
 32 |     }
 33 | 
 34 |   let concat l = String.concat "\n" (List.rev l) ^ "\n"
 35 | 
 36 |   let trim_left s =
 37 |     let rec loop i =
 38 |       if i >= String.length s then i
 39 |       else match s.[i] with ' ' | '\t' -> loop (succ i) | _ -> i
 40 |     in
 41 |     let i = loop 0 in
 42 |     if i > 0 then String.sub s i (String.length s - i) else s
 43 | 
 44 |   let link_reference_definitions s =
 45 |     let defs, off = Parser.link_reference_definitions (Parser.P.of_string s) in
 46 |     let s = String.sub s off (String.length s - off) |> String.trim in
 47 |     (defs, s)
 48 | 
 49 |   let rec close link_defs { blocks; next } =
 50 |     let finish = finish link_defs in
 51 |     match next with
 52 |     | Rblockquote state -> Raw.Blockquote ([], finish state) :: blocks
 53 |     | Rlist (ty, sp, _, _, closed_items, state) ->
 54 |         List ([], ty, sp, List.rev (finish state :: closed_items)) :: blocks
 55 |     | Rparagraph l ->
 56 |         let s = concat (List.map trim_left l) in
 57 |         let defs, off =
 58 |           Parser.link_reference_definitions (Parser.P.of_string s)
 59 |         in
 60 |         let s = String.sub s off (String.length s - off) |> String.trim in
 61 |         link_defs := defs @ !link_defs;
 62 |         if s = "" then blocks else Paragraph ([], s) :: blocks
 63 |     | Rfenced_code (_, _, _kind, (label, _other), [], attr) ->
 64 |         Code_block (attr, label, "") :: blocks
 65 |     | Rfenced_code (_, _, _kind, (label, _other), l, attr) ->
 66 |         Code_block (attr, label, concat l) :: blocks
 67 |     | Rdef_list (term, defs) ->
 68 |         let l, blocks =
 69 |           match blocks with
 70 |           | Definition_list (_, l) :: b -> (l, b)
 71 |           | b -> ([], b)
 72 |         in
 73 |         Definition_list ([], l @ [ { term; defs = List.rev defs } ]) :: blocks
 74 |     | Rindented_code l ->
 75 |         (* TODO: trim from the right *)
 76 |         let rec loop = function "" :: l -> loop l | _ as l -> l in
 77 |         Code_block ([], "", concat (loop l)) :: blocks
 78 |     | Rhtml (_, l) -> Html_block ([], concat l) :: blocks
 79 |     | Rtable_header (_header, line) ->
 80 |         (* FIXME: this will only ever get called on the very last
 81 |            line. Should it do the link definitions? *)
 82 |         close link_defs { blocks; next = Rparagraph [ line ] }
 83 |     | Rtable (header, rows) -> Table ([], header, List.rev rows) :: blocks
 84 |     | Rempty -> blocks
 85 | 
 86 |   and finish link_defs state = List.rev (close link_defs state)
 87 | 
 88 |   let empty = { blocks = []; next = Rempty }
 89 |   let classify_line s = Parser.parse s
 90 | 
 91 |   let classify_delimiter s =
 92 |     let left, s =
 93 |       match StrSlice.head s with
 94 |       | Some ':' -> (true, StrSlice.drop 1 s)
 95 |       | _ -> (false, s)
 96 |     in
 97 |     let right, s =
 98 |       match StrSlice.last s with
 99 |       | Some ':' -> (true, StrSlice.drop_last s)
100 |       | _ -> (false, s)
101 |     in
102 |     if StrSlice.exists (fun c -> c <> '-') s then None
103 |     else
104 |       match (left, right) with
105 |       | true, true -> Some Centre
106 |       | true, false -> Some Left
107 |       | false, true -> Some Right
108 |       | false, false -> Some Default
109 | 
110 |   let match_table_headers headers delimiters =
111 |     let rec loop processed = function
112 |       | [], [] -> Some (List.rev processed)
113 |       | header :: headers, line :: delimiters -> (
114 |           match classify_delimiter line with
115 |           | None -> None
116 |           | Some alignment ->
117 |               loop
118 |                 ((StrSlice.to_string header, alignment) :: processed)
119 |                 (headers, delimiters))
120 |       | [], _ :: _ | _ :: _, [] -> None
121 |     in
122 |     loop [] (headers, delimiters)
123 | 
124 |   let rec match_row_length l1 l2 =
125 |     match (l1, l2) with
126 |     | [], _ -> []
127 |     | l1, [] -> List.init (List.length l1) (fun _ -> "")
128 |     | _ :: l1, x :: l2 -> StrSlice.to_string x :: match_row_length l1 l2
129 | 
130 |   let rec process link_defs { blocks; next } s =
131 |     let process = process link_defs in
132 |     let close = close link_defs in
133 |     let finish = finish link_defs in
134 |     match (next, classify_line s) with
135 |     | Rempty, Parser.Lempty -> { blocks; next = Rempty }
136 |     | Rempty, Lblockquote s -> { blocks; next = Rblockquote (process empty s) }
137 |     | Rempty, Lthematic_break ->
138 |         { blocks = Thematic_break [] :: blocks; next = Rempty }
139 |     | Rempty, Lsetext_heading { level = 2; len } when len >= 3 ->
140 |         { blocks = Thematic_break [] :: blocks; next = Rempty }
141 |     | Rempty, Latx_heading (level, text, attr) ->
142 |         { blocks = Heading (attr, level, text) :: blocks; next = Rempty }
143 |     | Rempty, Lfenced_code (ind, num, q, info, a) ->
144 |         { blocks; next = Rfenced_code (ind, num, q, info, [], a) }
145 |     | Rempty, Lhtml (_, kind) -> process { blocks; next = Rhtml (kind, []) } s
146 |     | Rempty, Lindented_code s ->
147 |         { blocks; next = Rindented_code [ StrSlice.to_string s ] }
148 |     | Rempty, Llist_item (kind, indent, s) ->
149 |         { blocks
150 |         ; next = Rlist (kind, Tight, false, indent, [], process empty s)
151 |         }
152 |     | Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _ | Ltable_line []) ->
153 |         { blocks; next = Rparagraph [ StrSlice.to_string s ] }
154 |     | Rempty, Ltable_line items ->
155 |         { blocks; next = Rtable_header (items, StrSlice.to_string s) }
156 |     | Rparagraph [ h ], Ldef_list def ->
157 |         { blocks; next = Rdef_list (h, [ def ]) }
158 |     | Rdef_list (term, defs), Ldef_list def ->
159 |         { blocks; next = Rdef_list (term, def :: defs) }
160 |     | Rparagraph _, Llist_item ((Ordered (1, _) | Bullet _), _, s1)
161 |       when not (Parser.is_empty (Parser.P.of_string (StrSlice.to_string s1))) ->
162 |         process { blocks = close { blocks; next }; next = Rempty } s
163 |     | ( Rparagraph _
164 |       , ( Lempty | Lblockquote _ | Lthematic_break | Latx_heading _
165 |         | Lfenced_code _
166 |         | Lhtml (true, _) ) ) ->
167 |         process { blocks = close { blocks; next }; next = Rempty } s
168 |     | Rparagraph (_ :: _ as lines), Lsetext_heading { level; _ } ->
169 |         let text = concat (List.map trim_left lines) in
170 |         let defs, text = link_reference_definitions text in
171 |         link_defs := defs @ !link_defs;
172 |         if text = "" then
173 |           (* Happens when there's nothing between the [link reference definition] and the [setext heading].
174 | 
175 |                [foo]: /foo-url
176 |                ===
177 |                [foo]
178 | 
179 |              In that case, there's nothing to make as Heading. We can simply add `===` as Rparagraph
180 |           *)
181 |           { blocks; next = Rparagraph [ StrSlice.to_string s ] }
182 |         else { blocks = Heading ([], level, text) :: blocks; next = Rempty }
183 |     | Rparagraph lines, _ ->
184 |         { blocks; next = Rparagraph (StrSlice.to_string s :: lines) }
185 |     | Rfenced_code (_, num, q, _, _, _), Lfenced_code (_, num', q1, ("", _), _)
186 |       when num' >= num && q = q1 ->
187 |         { blocks = close { blocks; next }; next = Rempty }
188 |     | Rfenced_code (ind, num, q, info, lines, a), _ ->
189 |         let s =
190 |           let ind = min (Parser.indent s) ind in
191 |           if ind > 0 then StrSlice.offset ind s else s
192 |         in
193 |         { blocks
194 |         ; next =
195 |             Rfenced_code (ind, num, q, info, StrSlice.to_string s :: lines, a)
196 |         }
197 |     | Rdef_list (term, d :: defs), Lparagraph ->
198 |         { blocks
199 |         ; next = Rdef_list (term, (d ^ "\n" ^ StrSlice.to_string s) :: defs)
200 |         }
201 |     | Rdef_list _, _ ->
202 |         process { blocks = close { blocks; next }; next = Rempty } s
203 |     | Rtable_header (headers, line), Ltable_line items -> (
204 |         match match_table_headers headers items with
205 |         | Some headers ->
206 |             (* Makes sure that there are the same number of delimiters
207 |                as headers. See
208 |                https://github.github.com/gfm/#example-203 *)
209 |             { blocks; next = Rtable (headers, []) }
210 |         | None ->
211 |             (* Reinterpret the previous line as the start of a
212 |                paragraph. *)
213 |             process { blocks; next = Rparagraph [ line ] } s)
214 |     | Rtable_header (_, line), _ ->
215 |         (* If we only have a potential header, and the current line
216 |            doesn't look like a table delimiter, then reinterpret the
217 |            previous line as the start of a paragraph. *)
218 |         process { blocks; next = Rparagraph [ line ] } s
219 |     | Rtable (header, rows), Ltable_line row ->
220 |         (* Make sure the number of items in the row is consistent with
221 |            the headers and the rest of the rows. See
222 |            https://github.github.com/gfm/#example-204 *)
223 |         let row = match_row_length header row in
224 |         { blocks; next = Rtable (header, row :: rows) }
225 |     | Rtable (header, rows), (Lparagraph | Lsetext_heading _) ->
226 |         (* Treat a contiguous line after a table as a row, even if it
227 |            doesn't contain any '|'
228 |            characters. https://github.github.com/gfm/#example-202 *)
229 |         let row = match_row_length header [ s ] in
230 |         { blocks; next = Rtable (header, row :: rows) }
231 |     | Rtable _, _ ->
232 |         process { blocks = close { blocks; next }; next = Rempty } s
233 |     | Rindented_code lines, Lindented_code s ->
234 |         { blocks; next = Rindented_code (StrSlice.to_string s :: lines) }
235 |     | Rindented_code lines, Lempty ->
236 |         let n = min (Parser.indent s) 4 in
237 |         let s = StrSlice.offset n s in
238 |         { blocks; next = Rindented_code (StrSlice.to_string s :: lines) }
239 |     | Rindented_code _, _ ->
240 |         process { blocks = close { blocks; next }; next = Rempty } s
241 |     | Rhtml ((Hcontains l as k), lines), _
242 |       when List.exists (fun t -> StrSlice.contains t s) l ->
243 |         { blocks =
244 |             close { blocks; next = Rhtml (k, StrSlice.to_string s :: lines) }
245 |         ; next = Rempty
246 |         }
247 |     | Rhtml (Hblank, _), Lempty ->
248 |         { blocks = close { blocks; next }; next = Rempty }
249 |     | Rhtml (k, lines), _ ->
250 |         { blocks; next = Rhtml (k, StrSlice.to_string s :: lines) }
251 |     | Rblockquote state, Lblockquote s ->
252 |         { blocks; next = Rblockquote (process state s) }
253 |     | Rlist (kind, style, _, ind, items, state), Lempty ->
254 |         { blocks
255 |         ; next = Rlist (kind, style, true, ind, items, process state s)
256 |         }
257 |     | Rlist (_, _, true, ind, _, { blocks = []; next = Rempty }), _
258 |       when Parser.indent s >= ind ->
259 |         process { blocks = close { blocks; next }; next = Rempty } s
260 |     | Rlist (kind, style, prev_empty, ind, items, state), _
261 |       when Parser.indent s >= ind ->
262 |         let s = StrSlice.offset ind s in
263 |         let state = process state s in
264 |         let style =
265 |           let rec new_block = function
266 |             | Rblockquote { blocks = []; next }
267 |             | Rlist (_, _, _, _, _, { blocks = []; next }) ->
268 |                 new_block next
269 |             | Rparagraph [ _ ]
270 |             | Rfenced_code (_, _, _, _, [], _)
271 |             | Rindented_code [ _ ]
272 |             | Rhtml (_, [ _ ]) ->
273 |                 true
274 |             | _ -> false
275 |           in
276 |           if prev_empty && new_block state.next then Loose else style
277 |         in
278 |         { blocks; next = Rlist (kind, style, false, ind, items, state) }
279 |     | ( Rlist (kind, style, prev_empty, _, items, state)
280 |       , Llist_item (kind', ind, s) )
281 |       when same_block_list_kind kind kind' ->
282 |         let style = if prev_empty then Loose else style in
283 |         { blocks
284 |         ; next =
285 |             Rlist
286 |               (kind, style, false, ind, finish state :: items, process empty s)
287 |         }
288 |     | (Rlist _ | Rblockquote _), _ -> (
289 |         let rec loop = function
290 |           | Rlist (kind, style, prev_empty, ind, items, { blocks; next }) -> (
291 |               match loop next with
292 |               | Some next ->
293 |                   Some
294 |                     (Rlist
295 |                        (kind, style, prev_empty, ind, items, { blocks; next }))
296 |               | None -> None)
297 |           | Rblockquote { blocks; next } -> (
298 |               match loop next with
299 |               | Some next -> Some (Rblockquote { blocks; next })
300 |               | None -> None)
301 |           | Rparagraph (_ :: _ as lines) -> (
302 |               match classify_line s with
303 |               | Parser.Lparagraph | Lindented_code _
304 |               | Lsetext_heading { level = 1; _ }
305 |               | Lhtml (false, _) ->
306 |                   Some (Rparagraph (StrSlice.to_string s :: lines))
307 |               | _ -> None)
308 |           | _ -> None
309 |         in
310 |         match loop next with
311 |         | Some next -> { blocks; next }
312 |         | None -> process { blocks = close { blocks; next }; next = Rempty } s)
313 | 
314 |   let process link_defs state s = process link_defs state (StrSlice.of_string s)
315 | 
316 |   let of_channel ic =
317 |     let link_defs = ref [] in
318 |     let rec loop state =
319 |       match input_line ic with
320 |       | s -> loop (process link_defs state s)
321 |       | exception End_of_file ->
322 |           let blocks = finish link_defs state in
323 |           (blocks, List.rev !link_defs)
324 |     in
325 |     loop empty
326 | 
327 |   let read_line s off =
328 |     let buf = Buffer.create 128 in
329 |     let rec loop cr_read off =
330 |       if off >= String.length s then (Buffer.contents buf, None)
331 |       else
332 |         match s.[off] with
333 |         | '\n' -> (Buffer.contents buf, Some (succ off))
334 |         | '\r' ->
335 |             if cr_read then Buffer.add_char buf '\r';
336 |             loop true (succ off)
337 |         | c ->
338 |             if cr_read then Buffer.add_char buf '\r';
339 |             Buffer.add_char buf c;
340 |             loop false (succ off)
341 |     in
342 |     loop false off
343 | 
344 |   let of_string s =
345 |     let link_defs = ref [] in
346 |     let rec loop state = function
347 |       | None ->
348 |           let blocks = finish link_defs state in
349 |           (blocks, List.rev !link_defs)
350 |       | Some off ->
351 |           let s, off = read_line s off in
352 |           loop (process link_defs state s) off
353 |     in
354 |     loop empty (Some 0)
355 | end
356 | 


--------------------------------------------------------------------------------
/src/block_parser.mli:
--------------------------------------------------------------------------------
 1 | open Ast.Impl
 2 | module Raw = Ast_block.Raw
 3 | 
 4 | module Pre : sig
 5 |   val of_channel :
 6 |     in_channel -> attributes Raw.block list * attributes Parser.link_def list
 7 | 
 8 |   val of_string :
 9 |     string -> attributes Raw.block list * attributes Parser.link_def list
10 | end
11 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
 1 | (library
 2 |  (name omd)
 3 |  (public_name omd)
 4 |  (libraries uutf uucp uunf))
 5 | 
 6 | (rule
 7 |  (with-stdout-to
 8 |   entities.ml
 9 |   (chdir
10 |    ../tools
11 |    (run ./gen_entities.exe %{dep:../tools/entities.json}))))
12 | 


--------------------------------------------------------------------------------
/src/html.ml:
--------------------------------------------------------------------------------
  1 | open Ast.Impl
  2 | 
  3 | type element_type =
  4 |   | Inline
  5 |   | Block
  6 |   | Table
  7 | 
  8 | type t =
  9 |   | Element of element_type * string * attributes * t option
 10 |   | Text of string
 11 |   | Raw of string
 12 |   | Null
 13 |   | Concat of t * t
 14 | 
 15 | let elt etype name attrs childs = Element (etype, name, attrs, childs)
 16 | let text s = Text s
 17 | let raw s = Raw s
 18 | 
 19 | let concat t1 t2 =
 20 |   match (t1, t2) with Null, t | t, Null -> t | _ -> Concat (t1, t2)
 21 | 
 22 | let concat_map f l = List.fold_left (fun accu x -> concat accu (f x)) Null l
 23 | 
 24 | let concat_map2 f l1 l2 =
 25 |   List.fold_left2 (fun accu x y -> concat accu (f x y)) Null l1 l2
 26 | 
 27 | (* only convert when "necessary" *)
 28 | let htmlentities s =
 29 |   let b = Buffer.create (String.length s) in
 30 |   let rec loop i =
 31 |     if i >= String.length s then Buffer.contents b
 32 |     else begin
 33 |       begin
 34 |         match s.[i] with
 35 |         | '"' -> Buffer.add_string b "&quot;"
 36 |         | '&' -> Buffer.add_string b "&amp;"
 37 |         | '<' -> Buffer.add_string b "&lt;"
 38 |         | '>' -> Buffer.add_string b "&gt;"
 39 |         | c -> Buffer.add_char b c
 40 |       end;
 41 |       loop (succ i)
 42 |     end
 43 |   in
 44 |   loop 0
 45 | 
 46 | let add_attrs_to_buffer buf attrs =
 47 |   let f (k, v) = Printf.bprintf buf " %s=\"%s\"" k (htmlentities v) in
 48 |   List.iter f attrs
 49 | 
 50 | let rec add_to_buffer buf = function
 51 |   | Element (eltype, name, attrs, None) ->
 52 |       Printf.bprintf buf "<%s%a />" name add_attrs_to_buffer attrs;
 53 |       if eltype = Block then Buffer.add_char buf '\n'
 54 |   | Element (eltype, name, attrs, Some c) ->
 55 |       Printf.bprintf
 56 |         buf
 57 |         "<%s%a>%s%a</%s>%s"
 58 |         name
 59 |         add_attrs_to_buffer
 60 |         attrs
 61 |         (match eltype with Table -> "\n" | _ -> "")
 62 |         add_to_buffer
 63 |         c
 64 |         name
 65 |         (match eltype with Table | Block -> "\n" | _ -> "")
 66 |   | Text s -> Buffer.add_string buf (htmlentities s)
 67 |   | Raw s -> Buffer.add_string buf s
 68 |   | Null -> ()
 69 |   | Concat (t1, t2) ->
 70 |       add_to_buffer buf t1;
 71 |       add_to_buffer buf t2
 72 | 
 73 | let escape_uri s =
 74 |   let b = Buffer.create (String.length s) in
 75 |   String.iter
 76 |     (function
 77 |       | ( '!' | '*' | '\'' | '(' | ')' | ';' | ':' | '@' | '=' | '+' | '$' | ','
 78 |         | '/' | '?' | '%' | '#'
 79 |         | 'A' .. 'Z'
 80 |         | 'a' .. 'z'
 81 |         | '0' .. '9'
 82 |         | '-' | '_' | '.' | '~' | '&' ) as c ->
 83 |           Buffer.add_char b c
 84 |       | _ as c -> Printf.bprintf b "%%%2X" (Char.code c))
 85 |     s;
 86 |   Buffer.contents b
 87 | 
 88 | let trim_start_while p s =
 89 |   let start = ref true in
 90 |   let b = Buffer.create (String.length s) in
 91 |   Uutf.String.fold_utf_8
 92 |     (fun () _ -> function
 93 |       | `Malformed _ -> Buffer.add_string b s
 94 |       | `Uchar u when p u && !start -> ()
 95 |       | `Uchar u when !start ->
 96 |           start := false;
 97 |           Uutf.Buffer.add_utf_8 b u
 98 |       | `Uchar u -> Uutf.Buffer.add_utf_8 b u)
 99 |     ()
100 |     s;
101 |   Buffer.contents b
102 | 
103 | let underscore = Uchar.of_char '_'
104 | let hyphen = Uchar.of_char '-'
105 | let period = Uchar.of_char '.'
106 | let is_white_space = Uucp.White.is_white_space
107 | let is_alphabetic = Uucp.Alpha.is_alphabetic
108 | let is_hex_digit = Uucp.Num.is_hex_digit
109 | 
110 | module Identifiers : sig
111 |   type t
112 | 
113 |   val empty : t
114 | 
115 |   val touch : string -> t -> int * t
116 |   (** Bump the frequency count for the given string. 
117 |       It returns the previous count (before bumping) *)
118 | end = struct
119 |   module SMap = Map.Make (String)
120 | 
121 |   type t = int SMap.t
122 | 
123 |   let empty = SMap.empty
124 |   let count s t = match SMap.find_opt s t with None -> 0 | Some x -> x
125 |   let incr s t = SMap.add s (count s t + 1) t
126 | 
127 |   let touch s t =
128 |     let count = count s t in
129 |     (count, incr s t)
130 | end
131 | 
132 | (* Based on pandoc algorithm to derive id's.
133 |    See: https://pandoc.org/MANUAL.html#extension-auto_identifiers *)
134 | let slugify s =
135 |   let s = trim_start_while (fun c -> not (is_alphabetic c)) s in
136 |   let length = String.length s in
137 |   let b = Buffer.create length in
138 |   let last_is_ws = ref false in
139 |   let add_to_buffer u =
140 |     if !last_is_ws = true then begin
141 |       Uutf.Buffer.add_utf_8 b (Uchar.of_char '-');
142 |       last_is_ws := false
143 |     end;
144 |     Uutf.Buffer.add_utf_8 b u
145 |   in
146 |   let fold () _ = function
147 |     | `Malformed _ -> add_to_buffer Uutf.u_rep
148 |     | `Uchar u when is_white_space u && not !last_is_ws -> last_is_ws := true
149 |     | `Uchar u when is_white_space u && !last_is_ws -> ()
150 |     | `Uchar u ->
151 |         (if is_alphabetic u || is_hex_digit u then
152 |          match Uucp.Case.Map.to_lower u with
153 |          | `Self -> add_to_buffer u
154 |          | `Uchars us -> List.iter add_to_buffer us);
155 |         if u = underscore || u = hyphen || u = period then add_to_buffer u
156 |   in
157 |   Uutf.String.fold_utf_8 fold () s;
158 |   Buffer.contents b
159 | 
160 | let to_plain_text t =
161 |   let buf = Buffer.create 1024 in
162 |   let rec go : _ inline -> unit = function
163 |     | Concat (_, l) -> List.iter go l
164 |     | Text (_, t) | Code (_, t) -> Buffer.add_string buf t
165 |     | Emph (_, i)
166 |     | Strong (_, i)
167 |     | Link (_, { label = i; _ })
168 |     | Image (_, { label = i; _ }) ->
169 |         go i
170 |     | Hard_break _ | Soft_break _ -> Buffer.add_char buf ' '
171 |     | Html _ -> ()
172 |   in
173 |   go t;
174 |   Buffer.contents buf
175 | 
176 | let nl = Raw "\n"
177 | 
178 | let rec url label destination title attrs =
179 |   let attrs =
180 |     match title with None -> attrs | Some title -> ("title", title) :: attrs
181 |   in
182 |   let attrs = ("href", escape_uri destination) :: attrs in
183 |   elt Inline "a" attrs (Some (inline label))
184 | 
185 | and img label destination title attrs =
186 |   let attrs =
187 |     match title with None -> attrs | Some title -> ("title", title) :: attrs
188 |   in
189 |   let attrs =
190 |     ("src", escape_uri destination) :: ("alt", to_plain_text label) :: attrs
191 |   in
192 |   elt Inline "img" attrs None
193 | 
194 | and inline = function
195 |   | Ast.Impl.Concat (_, l) -> concat_map inline l
196 |   | Text (_, t) -> text t
197 |   | Emph (attr, il) -> elt Inline "em" attr (Some (inline il))
198 |   | Strong (attr, il) -> elt Inline "strong" attr (Some (inline il))
199 |   | Code (attr, s) -> elt Inline "code" attr (Some (text s))
200 |   | Hard_break attr -> concat (elt Inline "br" attr None) nl
201 |   | Soft_break _ -> nl
202 |   | Html (_, body) -> raw body
203 |   | Link (attr, { label; destination; title }) ->
204 |       url label destination title attr
205 |   | Image (attr, { label; destination; title }) ->
206 |       img label destination title attr
207 | 
208 | let alignment_attributes = function
209 |   | Default -> []
210 |   | Left -> [ ("align", "left") ]
211 |   | Right -> [ ("align", "right") ]
212 |   | Centre -> [ ("align", "center") ]
213 | 
214 | let table_header headers =
215 |   elt
216 |     Table
217 |     "thead"
218 |     []
219 |     (Some
220 |        (elt
221 |           Table
222 |           "tr"
223 |           []
224 |           (Some
225 |              (concat_map
226 |                 (fun (header, alignment) ->
227 |                   let attrs = alignment_attributes alignment in
228 |                   elt Block "th" attrs (Some (inline header)))
229 |                 headers))))
230 | 
231 | let table_body headers rows =
232 |   elt
233 |     Table
234 |     "tbody"
235 |     []
236 |     (Some
237 |        (concat_map
238 |           (fun row ->
239 |             elt
240 |               Table
241 |               "tr"
242 |               []
243 |               (Some
244 |                  (concat_map2
245 |                     (fun (_, alignment) cell ->
246 |                       let attrs = alignment_attributes alignment in
247 |                       elt Block "td" attrs (Some (inline cell)))
248 |                     headers
249 |                     row)))
250 |           rows))
251 | 
252 | let rec block ~auto_identifiers = function
253 |   | Blockquote (attr, q) ->
254 |       elt
255 |         Block
256 |         "blockquote"
257 |         attr
258 |         (Some (concat nl (concat_map (block ~auto_identifiers) q)))
259 |   | Paragraph (attr, md) -> elt Block "p" attr (Some (inline md))
260 |   | List (attr, ty, sp, bl) ->
261 |       let name = match ty with Ordered _ -> "ol" | Bullet _ -> "ul" in
262 |       let attr =
263 |         match ty with
264 |         | Ordered (n, _) when n <> 1 -> ("start", string_of_int n) :: attr
265 |         | _ -> attr
266 |       in
267 |       let li t =
268 |         let block' t =
269 |           match (t, sp) with
270 |           | Paragraph (_, t), Tight -> concat (inline t) nl
271 |           | _ -> block ~auto_identifiers t
272 |         in
273 |         let nl = if sp = Tight then Null else nl in
274 |         elt Block "li" [] (Some (concat nl (concat_map block' t)))
275 |       in
276 |       elt Block name attr (Some (concat nl (concat_map li bl)))
277 |   | Code_block (attr, label, code) ->
278 |       let code_attr =
279 |         if String.trim label = "" then []
280 |         else [ ("class", "language-" ^ label) ]
281 |       in
282 |       let c = text code in
283 |       elt Block "pre" attr (Some (elt Inline "code" code_attr (Some c)))
284 |   | Thematic_break attr -> elt Block "hr" attr None
285 |   | Html_block (_, body) -> raw body
286 |   | Heading (attr, level, text) ->
287 |       let name =
288 |         match level with
289 |         | 1 -> "h1"
290 |         | 2 -> "h2"
291 |         | 3 -> "h3"
292 |         | 4 -> "h4"
293 |         | 5 -> "h5"
294 |         | 6 -> "h6"
295 |         | _ -> "p"
296 |       in
297 |       elt Block name attr (Some (inline text))
298 |   | Definition_list (attr, l) ->
299 |       let f { term; defs } =
300 |         concat
301 |           (elt Block "dt" [] (Some (inline term)))
302 |           (concat_map (fun s -> elt Block "dd" [] (Some (inline s))) defs)
303 |       in
304 |       elt Block "dl" attr (Some (concat_map f l))
305 |   | Table (attr, headers, []) ->
306 |       elt Table "table" attr (Some (table_header headers))
307 |   | Table (attr, headers, rows) ->
308 |       elt
309 |         Table
310 |         "table"
311 |         attr
312 |         (Some (concat (table_header headers) (table_body headers rows)))
313 | 
314 | let of_doc ?(auto_identifiers = true) doc =
315 |   let identifiers = Identifiers.empty in
316 |   let f identifiers = function
317 |     | Heading (attr, level, text) ->
318 |         let attr, identifiers =
319 |           if (not auto_identifiers) || List.mem_assoc "id" attr then
320 |             (attr, identifiers)
321 |           else
322 |             let id = slugify (to_plain_text text) in
323 |             (* Default identifier if empty. It matches what pandoc does. *)
324 |             let id = if id = "" then "section" else id in
325 |             let count, identifiers = Identifiers.touch id identifiers in
326 |             let id =
327 |               if count = 0 then id else Printf.sprintf "%s-%i" id count
328 |             in
329 |             (("id", id) :: attr, identifiers)
330 |         in
331 |         (Heading (attr, level, text), identifiers)
332 |     | _ as c -> (c, identifiers)
333 |   in
334 |   let html, _ =
335 |     List.fold_left
336 |       (fun (accu, ids) x ->
337 |         let x', ids = f ids x in
338 |         let el = concat accu (block ~auto_identifiers x') in
339 |         (el, ids))
340 |       (Null, identifiers)
341 |       doc
342 |   in
343 |   html
344 | 
345 | let to_string t =
346 |   let buf = Buffer.create 1024 in
347 |   add_to_buffer buf t;
348 |   Buffer.contents buf
349 | 


--------------------------------------------------------------------------------
/src/html.mli:
--------------------------------------------------------------------------------
 1 | open Ast.Impl
 2 | 
 3 | type element_type =
 4 |   | Inline
 5 |   | Block
 6 |   | Table
 7 | 
 8 | type t =
 9 |   | Element of element_type * string * attributes * t option
10 |   | Text of string
11 |   | Raw of string
12 |   | Null
13 |   | Concat of t * t
14 | 
15 | val htmlentities : string -> string
16 | val of_doc : ?auto_identifiers:bool -> attributes block list -> t
17 | val to_string : t -> string
18 | 


--------------------------------------------------------------------------------
/src/omd.ml:
--------------------------------------------------------------------------------
 1 | (* The document model *)
 2 | 
 3 | include Ast.Impl
 4 | 
 5 | (* Helper functions for construction document AST *)
 6 | 
 7 | module Ctor = Ast_constructors.Impl
 8 | 
 9 | (* Table of contents *)
10 | 
11 | let headers = Toc.headers
12 | let toc = Toc.toc
13 | 
14 | (* Conversion *)
15 | 
16 | let parse_inline defs s = Parser.inline defs (Parser.P.of_string s)
17 | 
18 | let parse_inlines (md, defs) : doc =
19 |   let defs =
20 |     let f (def : attributes Parser.link_def) =
21 |       { def with label = Parser.normalize def.label }
22 |     in
23 |     List.map f defs
24 |   in
25 |   List.map (Ast_block.Mapper.map (parse_inline defs)) md
26 | 
27 | let escape_html_entities = Html.htmlentities
28 | let of_channel ic : doc = parse_inlines (Block_parser.Pre.of_channel ic)
29 | let of_string s = parse_inlines (Block_parser.Pre.of_string s)
30 | 
31 | let to_html ?auto_identifiers doc =
32 |   Html.to_string (Html.of_doc ?auto_identifiers doc)
33 | 
34 | let to_sexp ast = Format.asprintf "@[%a@]@." Sexp.print (Sexp.create ast)
35 | 


--------------------------------------------------------------------------------
/src/omd.mli:
--------------------------------------------------------------------------------
 1 | (** {1 A markdown parser in OCaml} *)
 2 | 
 3 | (** {2 The document model}
 4 | 
 5 |     The following types define the AST representing Omd's document model. *)
 6 | 
 7 | include Ast.Intf
 8 | 
 9 | (** {2 Helper functions for constructing the document AST } *)
10 | 
11 | module Ctor : Ast_constructors.Intf
12 | 
13 | (** {2 Generating and constructing tables of contents} *)
14 | 
15 | val headers :
16 |   ?remove_links:bool -> 'attr block list -> ('attr * int * 'attr inline) list
17 | 
18 | val toc : ?start:int list -> ?depth:int -> doc -> doc
19 | 
20 | (** {2 Helper functions} *)
21 | 
22 | val escape_html_entities : string -> string
23 | (** Perform escaping of HTML entities. Turns: ['"'] into ["&quot;"],
24 |     ['&'] into ["&amp;"], ['<'] in ["&lt;"] and ['>'] into ["&gt;"]
25 | *)
26 | 
27 | (** {2 Converting to and from documents} *)
28 | 
29 | val of_channel : in_channel -> doc
30 | val of_string : string -> doc
31 | val to_html : ?auto_identifiers:bool -> doc -> string
32 | val to_sexp : doc -> string
33 | 


--------------------------------------------------------------------------------
/src/parser.ml:
--------------------------------------------------------------------------------
   1 | open Ast.Impl
   2 | 
   3 | type 'attr link_def =
   4 |   { label : string
   5 |   ; destination : string
   6 |   ; title : string option
   7 |   ; attributes : 'attr
   8 |   }
   9 | 
  10 | let is_whitespace = function
  11 |   | ' ' | '\t' | '\010' .. '\013' -> true
  12 |   | _ -> false
  13 | 
  14 | let is_punct = function
  15 |   | '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-'
  16 |   | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\\' | ']' | '^'
  17 |   | '_' | '`' | '{' | '|' | '}' | '~' ->
  18 |       true
  19 |   | _ -> false
  20 | 
  21 | exception Fail
  22 | (** Raised when a parser fails, used for control flow rather than error
  23 |     handling *)
  24 | 
  25 | (** Stateful parser combinators *)
  26 | module P : sig
  27 |   type state
  28 |   type 'a t = state -> 'a
  29 | 
  30 |   val of_string : string -> state
  31 | 
  32 |   val peek : char option t
  33 |   (** [Some c] if [c] is the next character in the input, or [None]
  34 |       if the input is exhausted.
  35 | 
  36 |       NOTE: Does not advance the state. *)
  37 | 
  38 |   val peek_exn : char t
  39 |   (** the next character in the input, or raises [Fail] if the
  40 |       input is exhausted.
  41 | 
  42 |       NOTE: Does not advance the state. *)
  43 | 
  44 |   val peek_before : char -> state -> char
  45 |   (** the previous character in the input, or the next
  46 |       character, if we are at the start of the input.
  47 | 
  48 |       NOTE: Does not advance the state. *)
  49 | 
  50 |   val peek_after : char -> state -> char
  51 |   (** the character after the next in the input, or the next
  52 |       character, if we are at the end of the input.
  53 | 
  54 |       NOTE: Does not advance the state. *)
  55 | 
  56 |   val pos : state -> int
  57 |   val range : state -> int -> int -> string
  58 |   val set_pos : state -> int -> unit
  59 | 
  60 |   val junk : unit t
  61 |   (** ignores the next character in the input *)
  62 | 
  63 |   val char : char -> unit t
  64 |   (** accepts a [c] *)
  65 | 
  66 |   val next : char t
  67 | 
  68 |   val ws : unit t
  69 |   (** accepts 0 or more white space characters *)
  70 | 
  71 |   val sp : unit t
  72 |   (** accepts 0 or more spaces or tabs *)
  73 | 
  74 |   val ws1 : unit t
  75 |   (** accepts 1 or more spaces or tabs, fails if none is found *)
  76 | 
  77 |   val ( ||| ) : 'a t -> 'a t -> 'a t
  78 |   (** [p ||| q] tries to accept [p], but in case [p] fails, it accepts [q]
  79 |       (which can fail). *)
  80 | 
  81 |   val ( >>> ) : unit t -> 'a t -> 'a t
  82 |   (** [p >>> q] accepts [p] followed by [q], returning whatever [q] does *)
  83 | 
  84 |   val ( <<< ) : 'a t -> unit t -> 'a t
  85 |   (** [p >>> q] accepts [p] followed by [q], returning whatever [q] does *)
  86 | 
  87 |   val protect : 'a t -> 'a t
  88 |   (** run the given parser, resetting the state back to it's initial condition
  89 |       if the parser fails *)
  90 | 
  91 |   val pair : 'a t -> 'b t -> ('a * 'b) t
  92 | 
  93 |   val on_sub : (StrSlice.t -> 'a * StrSlice.t) -> 'a t
  94 |   (** Given a function [f] that takes a prefix of a string slice to a value [x]
  95 |       of type ['a] and some remainder of the slice, [on_sub f] produces [x] from
  96 |       the state, and advances the input the length of the slice that was
  97 |       consumed by [f]. *)
  98 | end = struct
  99 |   type state =
 100 |     { str : string
 101 |     ; mutable pos : int
 102 |     }
 103 | 
 104 |   let of_string str = { str; pos = 0 }
 105 | 
 106 |   type 'a t = state -> 'a
 107 | 
 108 |   let char c st =
 109 |     if st.pos >= String.length st.str then raise Fail
 110 |     else if st.str.[st.pos] <> c then raise Fail
 111 |     else st.pos <- st.pos + 1
 112 | 
 113 |   let next st =
 114 |     if st.pos >= String.length st.str then raise Fail
 115 |     else
 116 |       let c = st.str.[st.pos] in
 117 |       st.pos <- st.pos + 1;
 118 |       c
 119 | 
 120 |   let peek st =
 121 |     if st.pos >= String.length st.str then None else Some st.str.[st.pos]
 122 | 
 123 |   let peek_exn st = match peek st with Some c -> c | None -> raise Fail
 124 |   let peek_before c st = if st.pos = 0 then c else st.str.[st.pos - 1]
 125 | 
 126 |   let peek_after c st =
 127 |     if st.pos + 1 >= String.length st.str then c else st.str.[st.pos + 1]
 128 | 
 129 |   let pos st = st.pos
 130 |   let range st pos n = String.sub st.str pos n
 131 |   let set_pos st pos = st.pos <- pos
 132 |   let junk st = if st.pos < String.length st.str then st.pos <- st.pos + 1
 133 | 
 134 |   let protect p st =
 135 |     let off = pos st in
 136 |     try p st
 137 |     with e ->
 138 |       set_pos st off;
 139 |       raise e
 140 | 
 141 |   let ( ||| ) p1 p2 st = try protect p1 st with Fail -> p2 st
 142 | 
 143 |   let ws st =
 144 |     let rec loop () =
 145 |       if is_whitespace (peek_exn st) then (
 146 |         junk st;
 147 |         loop ())
 148 |     in
 149 |     try loop () with Fail -> ()
 150 | 
 151 |   let sp st =
 152 |     let rec loop () =
 153 |       match peek_exn st with
 154 |       | ' ' | '\t' ->
 155 |           junk st;
 156 |           loop ()
 157 |       | _ -> ()
 158 |     in
 159 |     try loop () with Fail -> ()
 160 | 
 161 |   let ws1 st =
 162 |     if is_whitespace (peek_exn st) then (
 163 |       junk st;
 164 |       ws st)
 165 |     else raise Fail
 166 | 
 167 |   let ( >>> ) p q st =
 168 |     p st;
 169 |     q st
 170 | 
 171 |   let ( <<< ) p q st =
 172 |     let x = p st in
 173 |     q st;
 174 |     x
 175 | 
 176 |   let pair p q st =
 177 |     let x = p st in
 178 |     let y = q st in
 179 |     (x, y)
 180 | 
 181 |   let on_sub fn st =
 182 |     let result, s = fn (StrSlice.of_string ~off:st.pos st.str) in
 183 |     st.pos <- StrSlice.get_offset s;
 184 |     result
 185 | end
 186 | 
 187 | type html_kind =
 188 |   | Hcontains of string list
 189 |   | Hblank
 190 | 
 191 | type code_block_kind =
 192 |   | Tilde
 193 |   | Backtick
 194 | 
 195 | type t =
 196 |   | Lempty
 197 |   | Lblockquote of StrSlice.t
 198 |   | Lthematic_break
 199 |   | Latx_heading of int * string * attributes
 200 |   | Lsetext_heading of
 201 |       { level : int
 202 |       ; len : int
 203 |       }  (** the level of the heading and how long the underline marker is *)
 204 |   | Lfenced_code of int * int * code_block_kind * (string * string) * attributes
 205 |   | Lindented_code of StrSlice.t
 206 |   | Lhtml of bool * html_kind
 207 |   | Llist_item of list_type * int * StrSlice.t
 208 |   | Lparagraph
 209 |   | Ldef_list of string
 210 |   | Ltable_line of StrSlice.t list
 211 | 
 212 | (* drop up to 3 spaces, returning the number of spaces dropped and the remainder of the string *)
 213 | let sp3 s =
 214 |   match StrSlice.take 3 s with
 215 |   | [ ' '; ' '; ' ' ] -> (3, StrSlice.drop 3 s)
 216 |   | ' ' :: ' ' :: _ -> (2, StrSlice.drop 2 s)
 217 |   | ' ' :: _ -> (1, StrSlice.drop 1 s)
 218 |   | _ -> (0, s)
 219 | 
 220 | (** TODO Why is this here? Doesn't it almost exactly repeat the one in [P], only with slices?
 221 |     Why is this kind of repetition needed? *)
 222 | let ( ||| ) p1 p2 s = try p1 s with Fail -> p2 s
 223 | 
 224 | let trim_leading_ws s = StrSlice.drop_while is_whitespace s
 225 | let trim_trailing_ws s = StrSlice.drop_last_while is_whitespace s
 226 | let trim_ws s = trim_leading_ws s |> trim_trailing_ws
 227 | let is_empty s = StrSlice.is_empty (trim_leading_ws s)
 228 | 
 229 | (* See https://spec.commonmark.org/0.30/#thematic-breaks *)
 230 | let thematic_break =
 231 |   (* Accepts thematic break chars or fail, counting how many chars we find *)
 232 |   let f symb c count =
 233 |     if Char.equal symb c then succ count
 234 |     else if is_whitespace c then
 235 |       (* Thematic break chars can be separated by spaces *)
 236 |       count
 237 |     else raise Fail
 238 |   in
 239 |   fun s ->
 240 |     match StrSlice.head s with
 241 |     | Some (('*' | '_' | '-') as symb) ->
 242 |         if StrSlice.fold_left (f symb) 0 s >= 3 then
 243 |           (* Three or more of the same thematic break chars found *)
 244 |           Lthematic_break
 245 |         else raise Fail
 246 |     | Some _ | None -> raise Fail
 247 | 
 248 | (* See https://spec.commonmark.org/0.30/#setext-heading *)
 249 | let setext_heading s =
 250 |   (* The first char determines if possible setext and the level of the heading *)
 251 |   let level, symb =
 252 |     match StrSlice.head s with
 253 |     | Some '=' -> (1, '=')
 254 |     | Some '-' -> (2, '-')
 255 |     | _ -> raise Fail
 256 |   in
 257 |   let heading_chars, rest =
 258 |     StrSlice.split_at (fun c -> not (Char.equal c symb)) s
 259 |   in
 260 |   let len = StrSlice.length heading_chars in
 261 |   if Char.equal symb '-' && len = 1 then
 262 |     (* can be interpreted as an empty list item *)
 263 |     raise Fail
 264 |   else if not (StrSlice.for_all is_whitespace rest) then
 265 |     (* if anything except whitespace is left, it can't be a setext heading underline *)
 266 |     raise Fail
 267 |   else Lsetext_heading { level; len }
 268 | 
 269 | (* Parses a string slice in pandoc-style into an association list
 270 | 
 271 |    See https://pandoc.org/MANUAL.html#extension-header_attributes *)
 272 | let parse_attributes s =
 273 |   let attributes = String.split_on_char ' ' s in
 274 |   let f (id, classes, acc) s =
 275 |     if s = "" then (id, classes, acc)
 276 |     else
 277 |       match s.[0] with
 278 |       | '#' -> (Some (String.sub s 1 (String.length s - 1)), classes, acc)
 279 |       | '.' -> (id, String.sub s 1 (String.length s - 1) :: classes, acc)
 280 |       | _ -> (
 281 |           let attr = String.split_on_char '=' s in
 282 |           match attr with
 283 |           | [] -> (id, classes, acc)
 284 |           | h :: t -> (id, classes, (h, String.concat "=" t) :: acc))
 285 |   in
 286 |   let id, classes, acc = List.fold_left f (None, [], []) attributes in
 287 |   let acc = List.rev acc in
 288 |   let acc =
 289 |     match classes with
 290 |     | [] -> acc
 291 |     | _ :: _ ->
 292 |         let classes = String.concat " " (List.rev classes) in
 293 |         ("class", classes) :: acc
 294 |   in
 295 |   match id with Some id -> ("id", id) :: acc | None -> acc
 296 | 
 297 | (* Parses a string slice into an attribute list (possibly empty) and the non-attribute part of the string
 298 | 
 299 |    These are pandoc style attributes https://pandoc.org/MANUAL.html#extension-attributes *)
 300 | let attribute_string s =
 301 |   let buf = Buffer.create 64 in
 302 |   let rec loop s =
 303 |     match StrSlice.head s with
 304 |     | None -> (StrSlice.of_string (Buffer.contents buf), None)
 305 |     | Some ('\\' as c) -> (
 306 |         let s = StrSlice.tail s in
 307 |         match StrSlice.head s with
 308 |         | Some c when is_punct c ->
 309 |             Buffer.add_char buf c;
 310 |             loop (StrSlice.tail s)
 311 |         | Some _ | None ->
 312 |             Buffer.add_char buf c;
 313 |             loop s)
 314 |     | Some '{' ->
 315 |         let buf' = Buffer.create 64 in
 316 |         let rec loop' s =
 317 |           match StrSlice.head s with
 318 |           | Some '}' -> (
 319 |               (* Found a closing bracket not at the end of the line *)
 320 |               let s = StrSlice.tail s in
 321 |               match StrSlice.head s with
 322 |               | None ->
 323 |                   (* At end of line, so we've finished parsing the attributes *)
 324 |                   ( StrSlice.of_string (Buffer.contents buf)
 325 |                   , Some (Buffer.contents buf') )
 326 |               | Some _ ->
 327 |                   (* Not at end of line, so this can't be a set of attributes *)
 328 |                   Buffer.add_char buf '{';
 329 |                   Buffer.add_buffer buf buf';
 330 |                   Buffer.add_char buf '}';
 331 |                   loop s)
 332 |           | None ->
 333 |               Buffer.add_char buf '{';
 334 |               Buffer.add_buffer buf buf';
 335 |               (StrSlice.of_string (Buffer.contents buf), None)
 336 |           | Some '{' ->
 337 |               Buffer.add_char buf '{';
 338 |               Buffer.add_buffer buf buf';
 339 |               Buffer.reset buf';
 340 |               loop' (StrSlice.tail s)
 341 |           | Some c ->
 342 |               Buffer.add_char buf' c;
 343 |               loop' (StrSlice.tail s)
 344 |         in
 345 |         loop' (StrSlice.tail s)
 346 |     | Some c ->
 347 |         Buffer.add_char buf c;
 348 |         loop (StrSlice.tail s)
 349 |   in
 350 |   let s', a = loop (trim_leading_ws s) in
 351 |   let attrs = Option.map parse_attributes a |> Option.value ~default:[] in
 352 |   (s', attrs)
 353 | 
 354 | let atx_heading s =
 355 |   let rec loop n s =
 356 |     if n > 6 then raise Fail;
 357 |     match StrSlice.head s with
 358 |     | Some '#' -> loop (succ n) (StrSlice.tail s)
 359 |     | Some w when is_whitespace w ->
 360 |         let s, a =
 361 |           match StrSlice.last s with
 362 |           | Some '}' -> attribute_string s
 363 |           | _ -> (s, [])
 364 |         in
 365 |         let s = trim_ws s in
 366 |         let rec loop t =
 367 |           match StrSlice.last t with
 368 |           | Some '#' -> loop (StrSlice.drop_last t)
 369 |           | Some w when is_whitespace w -> trim_trailing_ws t
 370 |           | None -> trim_trailing_ws t
 371 |           | Some _ -> s
 372 |         in
 373 |         Latx_heading (n, StrSlice.to_string (trim_leading_ws (loop s)), a)
 374 |     | Some _ -> raise Fail
 375 |     | None -> Latx_heading (n, StrSlice.to_string s, [])
 376 |   in
 377 |   loop 0 s
 378 | 
 379 | let entity s =
 380 |   match StrSlice.take 2 s with
 381 |   | '#' :: ('x' | 'X') :: _ ->
 382 |       let rec loop m n s =
 383 |         if m > 6 then raise Fail;
 384 |         match StrSlice.head s with
 385 |         | Some ('a' .. 'f' as c) ->
 386 |             loop
 387 |               (succ m)
 388 |               ((n * 16) + Char.code c - Char.code 'a' + 10)
 389 |               (StrSlice.tail s)
 390 |         | Some ('A' .. 'F' as c) ->
 391 |             loop
 392 |               (succ m)
 393 |               ((n * 16) + Char.code c - Char.code 'A' + 10)
 394 |               (StrSlice.tail s)
 395 |         | Some ('0' .. '9' as c) ->
 396 |             loop
 397 |               (succ m)
 398 |               ((n * 16) + Char.code c - Char.code '0')
 399 |               (StrSlice.tail s)
 400 |         | Some ';' ->
 401 |             if m = 0 then raise Fail;
 402 |             let u =
 403 |               if n = 0 || not (Uchar.is_valid n) then Uchar.rep
 404 |               else Uchar.of_int n
 405 |             in
 406 |             ([ u ], StrSlice.tail s)
 407 |         | Some _ | None -> raise Fail
 408 |       in
 409 |       loop 0 0 (StrSlice.drop 2 s)
 410 |   | '#' :: _ ->
 411 |       let rec loop m n s =
 412 |         if m > 7 then raise Fail;
 413 |         match StrSlice.head s with
 414 |         | Some ('0' .. '9' as c) ->
 415 |             loop
 416 |               (succ m)
 417 |               ((n * 10) + Char.code c - Char.code '0')
 418 |               (StrSlice.tail s)
 419 |         | Some ';' ->
 420 |             if m = 0 then raise Fail;
 421 |             let u =
 422 |               if n = 0 || not (Uchar.is_valid n) then Uchar.rep
 423 |               else Uchar.of_int n
 424 |             in
 425 |             ([ u ], StrSlice.tail s)
 426 |         | Some _ | None -> raise Fail
 427 |       in
 428 |       loop 0 0 (StrSlice.tail s)
 429 |   | ('a' .. 'z' | 'A' .. 'Z') :: _ ->
 430 |       let rec loop len t =
 431 |         match StrSlice.head t with
 432 |         | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9') ->
 433 |             loop (succ len) (StrSlice.tail t)
 434 |         | Some ';' -> (
 435 |             let name = StrSlice.to_string (StrSlice.sub ~len s) in
 436 |             match Entities.f name with
 437 |             | [] -> raise Fail
 438 |             | cps -> (cps, StrSlice.tail t))
 439 |         | Some _ | None -> raise Fail
 440 |       in
 441 |       loop 1 (StrSlice.tail s)
 442 |   | _ -> raise Fail
 443 | 
 444 | let info_string c s =
 445 |   let buf = Buffer.create 17 in
 446 |   let s, a =
 447 |     match StrSlice.last s with Some '}' -> attribute_string s | _ -> (s, [])
 448 |   in
 449 |   let s = trim_ws s in
 450 |   let rec loop s =
 451 |     match StrSlice.head s with
 452 |     (* TODO use is_whitespace *)
 453 |     | Some (' ' | '\t' | '\010' .. '\013') | None ->
 454 |         if c = '`' && StrSlice.exists (function '`' -> true | _ -> false) s
 455 |         then raise Fail;
 456 |         ((Buffer.contents buf, StrSlice.to_string (trim_leading_ws s)), a)
 457 |     | Some '`' when c = '`' -> raise Fail
 458 |     | Some ('\\' as c) -> (
 459 |         let s = StrSlice.tail s in
 460 |         match StrSlice.head s with
 461 |         | Some c when is_punct c ->
 462 |             Buffer.add_char buf c;
 463 |             loop (StrSlice.tail s)
 464 |         | Some _ | None ->
 465 |             Buffer.add_char buf c;
 466 |             loop s)
 467 |     | Some ('&' as c) -> (
 468 |         let s = StrSlice.tail s in
 469 |         match entity s with
 470 |         | ul, s ->
 471 |             List.iter (Uutf.Buffer.add_utf_8 buf) ul;
 472 |             loop s
 473 |         | exception Fail ->
 474 |             Buffer.add_char buf c;
 475 |             loop s)
 476 |     | Some c ->
 477 |         Buffer.add_char buf c;
 478 |         loop (StrSlice.tail s)
 479 |   in
 480 |   loop (trim_leading_ws s)
 481 | 
 482 | let fenced_code ind s =
 483 |   match StrSlice.head s with
 484 |   | Some (('`' | '~') as c) ->
 485 |       let rec loop n s =
 486 |         match StrSlice.head s with
 487 |         | Some c1 when c = c1 -> loop (succ n) (StrSlice.tail s)
 488 |         | Some _ | None ->
 489 |             if n < 3 then raise Fail;
 490 |             let s, a = info_string c s in
 491 |             let c = if c = '`' then Backtick else Tilde in
 492 |             Lfenced_code (ind, n, c, s, a)
 493 |       in
 494 |       loop 1 (StrSlice.tail s)
 495 |   | Some _ | None -> raise Fail
 496 | 
 497 | let indent s =
 498 |   let rec loop n s =
 499 |     match StrSlice.head s with
 500 |     | Some ' ' -> loop (n + 1) (StrSlice.tail s)
 501 |     | Some '\t' -> loop (n + 4) (StrSlice.tail s)
 502 |     | Some _ | None -> n
 503 |   in
 504 |   loop 0 s
 505 | 
 506 | let unordered_list_item ind s =
 507 |   match StrSlice.head s with
 508 |   | Some (('+' | '-' | '*') as c) ->
 509 |       let s = StrSlice.tail s in
 510 |       if is_empty s then Llist_item (Bullet c, 2 + ind, s)
 511 |       else
 512 |         let n = indent s in
 513 |         if n = 0 then raise Fail;
 514 |         let n = if n <= 4 then n else 1 in
 515 |         Llist_item (Bullet c, n + 1 + ind, StrSlice.offset n s)
 516 |   | Some _ | None -> raise Fail
 517 | 
 518 | let ordered_list_item ind s =
 519 |   let rec loop n m s =
 520 |     match StrSlice.head s with
 521 |     | Some ('0' .. '9' as c) ->
 522 |         if n >= 9 then raise Fail;
 523 |         loop (succ n) ((m * 10) + Char.code c - Char.code '0') (StrSlice.tail s)
 524 |     | Some (('.' | ')') as c) ->
 525 |         let s = StrSlice.tail s in
 526 |         if is_empty s then Llist_item (Ordered (m, c), n + 1 + ind, s)
 527 |         else
 528 |           let ind' = indent s in
 529 |           if ind' = 0 then raise Fail;
 530 |           let ind' = if ind' <= 4 then ind' else 1 in
 531 |           Llist_item (Ordered (m, c), n + ind + ind' + 1, StrSlice.offset ind' s)
 532 |     | Some _ | None -> raise Fail
 533 |   in
 534 |   loop 0 0 s
 535 | 
 536 | let tag_name s0 =
 537 |   match StrSlice.head s0 with
 538 |   | Some ('a' .. 'z' | 'A' .. 'Z') ->
 539 |       let rec loop len s =
 540 |         match StrSlice.head s with
 541 |         | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-') ->
 542 |             loop (succ len) (StrSlice.tail s)
 543 |         | Some _ | None -> (StrSlice.to_string (StrSlice.sub s0 ~len), s)
 544 |       in
 545 |       loop 1 (StrSlice.tail s0)
 546 |   | Some _ | None -> raise Fail
 547 | 
 548 | let known_tags =
 549 |   [ "address"
 550 |   ; "aside"
 551 |   ; "base"
 552 |   ; "basefont"
 553 |   ; "blockquote"
 554 |   ; "body"
 555 |   ; "caption"
 556 |   ; "center"
 557 |   ; "col"
 558 |   ; "colgroup"
 559 |   ; "dd"
 560 |   ; "details"
 561 |   ; "dialog"
 562 |   ; "dir"
 563 |   ; "div"
 564 |   ; "dl"
 565 |   ; "dt"
 566 |   ; "fieldset"
 567 |   ; "figcaption"
 568 |   ; "figure"
 569 |   ; "footer"
 570 |   ; "form"
 571 |   ; "frame"
 572 |   ; "frameset"
 573 |   ; "h1"
 574 |   ; "h2"
 575 |   ; "h3"
 576 |   ; "h4"
 577 |   ; "h5"
 578 |   ; "h6"
 579 |   ; "head"
 580 |   ; "header"
 581 |   ; "hr"
 582 |   ; "html"
 583 |   ; "iframe"
 584 |   ; "legend"
 585 |   ; "li"
 586 |   ; "link"
 587 |   ; "main"
 588 |   ; "menu"
 589 |   ; "menuitem"
 590 |   ; "meta"
 591 |   ; "nav"
 592 |   ; "noframes"
 593 |   ; "ol"
 594 |   ; "optgroup"
 595 |   ; "option"
 596 |   ; "p"
 597 |   ; "param"
 598 |   ; "section"
 599 |   ; "source"
 600 |   ; "summary"
 601 |   ; "table"
 602 |   ; "tbody"
 603 |   ; "td"
 604 |   ; "tfoot"
 605 |   ; "th"
 606 |   ; "thead"
 607 |   ; "title"
 608 |   ; "tr"
 609 |   ; "track"
 610 |   ; "ul"
 611 |   ]
 612 | 
 613 | let special_tags = [ "pre"; "script"; "style"; "textarea" ]
 614 | 
 615 | let known_tag s =
 616 |   let s = String.lowercase_ascii s in
 617 |   List.mem s known_tags
 618 | 
 619 | let special_tag s =
 620 |   let s = String.lowercase_ascii s in
 621 |   List.mem s special_tags
 622 | 
 623 | let closing_tag s =
 624 |   let s = trim_leading_ws s in
 625 |   match StrSlice.head s with
 626 |   | Some '>' ->
 627 |       if not (is_empty (StrSlice.tail s)) then raise Fail;
 628 |       Lhtml (false, Hblank)
 629 |   | Some _ | None -> raise Fail
 630 | 
 631 | let special_tag tag s =
 632 |   if not (special_tag tag) then raise Fail;
 633 |   match StrSlice.head s with
 634 |   | Some (' ' | '\t' | '\010' .. '\013' | '>') | None ->
 635 |       Lhtml (true, Hcontains [ "</script>"; "</pre>"; "</style>" ])
 636 |   | Some _ -> raise Fail
 637 | 
 638 | let known_tag tag s =
 639 |   if not (known_tag tag) then raise Fail;
 640 |   match StrSlice.take 2 s with
 641 |   | (' ' | '\t' | '\010' .. '\013') :: _ | [] | '>' :: _ | '/' :: '>' :: _ ->
 642 |       Lhtml (true, Hblank)
 643 |   | _ -> raise Fail
 644 | 
 645 | (** TODO Why these repeated functions that look just like thos in [P]? *)
 646 | let ws1 s =
 647 |   match StrSlice.head s with
 648 |   | Some w when is_whitespace w -> trim_leading_ws s
 649 |   | Some _ | None -> raise Fail
 650 | 
 651 | let attribute_name s =
 652 |   match StrSlice.head s with
 653 |   | Some ('a' .. 'z' | 'A' .. 'Z' | '_' | ':') ->
 654 |       let rec loop s =
 655 |         match StrSlice.head s with
 656 |         | Some ('a' .. 'z' | 'A' .. 'Z' | '_' | '.' | ':' | '0' .. '9') ->
 657 |             loop (StrSlice.tail s)
 658 |         | Some _ | None -> s
 659 |       in
 660 |       loop s
 661 |   | Some _ | None -> raise Fail
 662 | 
 663 | let attribute_value s =
 664 |   match StrSlice.head s with
 665 |   | Some (('\'' | '"') as c) ->
 666 |       let rec loop s =
 667 |         match StrSlice.head s with
 668 |         | Some c1 when c = c1 -> StrSlice.tail s
 669 |         | Some _ -> loop (StrSlice.tail s)
 670 |         | None -> raise Fail
 671 |       in
 672 |       loop (StrSlice.tail s)
 673 |   | Some _ ->
 674 |       let rec loop first s =
 675 |         match StrSlice.head s with
 676 |         | Some
 677 |             (' ' | '\t' | '\010' .. '\013' | '"' | '\'' | '=' | '<' | '>' | '`')
 678 |         | None ->
 679 |             if first then raise Fail;
 680 |             s
 681 |         | Some _ -> loop false (StrSlice.tail s)
 682 |       in
 683 |       loop true s
 684 |   | None -> raise Fail
 685 | 
 686 | let attribute s =
 687 |   let s = ws1 s in
 688 |   let s = attribute_name s in
 689 |   let s = trim_leading_ws s in
 690 |   match StrSlice.head s with
 691 |   | Some '=' ->
 692 |       let s = trim_leading_ws (StrSlice.tail s) in
 693 |       attribute_value s
 694 |   | Some _ | None -> s
 695 | 
 696 | let attributes s =
 697 |   let rec loop s = match attribute s with s -> loop s | exception Fail -> s in
 698 |   loop s
 699 | 
 700 | let open_tag s =
 701 |   let s = attributes s in
 702 |   let s = trim_leading_ws s in
 703 |   let n =
 704 |     match StrSlice.take 2 s with
 705 |     | '/' :: '>' :: _ -> 2
 706 |     | '>' :: _ -> 1
 707 |     | _ -> raise Fail
 708 |   in
 709 |   if not (is_empty (StrSlice.drop n s)) then raise Fail;
 710 |   Lhtml (false, Hblank)
 711 | 
 712 | let raw_html s =
 713 |   match StrSlice.take 10 s with
 714 |   | '<' :: '?' :: _ -> Lhtml (true, Hcontains [ "?>" ])
 715 |   | '<' :: '!' :: '-' :: '-' :: _ -> Lhtml (true, Hcontains [ "-->" ])
 716 |   | '<' :: '!' :: '[' :: 'C' :: 'D' :: 'A' :: 'T' :: 'A' :: '[' :: _ ->
 717 |       Lhtml (true, Hcontains [ "]]>" ])
 718 |   | '<' :: '!' :: _ -> Lhtml (true, Hcontains [ ">" ])
 719 |   | '<' :: '/' :: _ ->
 720 |       let tag, s = tag_name (StrSlice.drop 2 s) in
 721 |       (known_tag tag ||| closing_tag) s
 722 |   | '<' :: _ ->
 723 |       let tag, s = tag_name (StrSlice.drop 1 s) in
 724 |       (special_tag tag ||| known_tag tag ||| open_tag) s
 725 |   | _ -> raise Fail
 726 | 
 727 | let blank s =
 728 |   if not (is_empty s) then raise Fail;
 729 |   Lempty
 730 | 
 731 | let tag_string s =
 732 |   let buf = Buffer.create 17 in
 733 |   let s, a =
 734 |     match StrSlice.last s with Some '}' -> attribute_string s | _ -> (s, [])
 735 |   in
 736 |   let s = trim_ws s in
 737 |   let rec loop s =
 738 |     match StrSlice.head s with
 739 |     (* TODO use is_whitespace *)
 740 |     | Some (' ' | '\t' | '\010' .. '\013') | None -> (Buffer.contents buf, a)
 741 |     | Some c ->
 742 |         Buffer.add_char buf c;
 743 |         loop (StrSlice.tail s)
 744 |   in
 745 |   loop (trim_leading_ws s)
 746 | 
 747 | let def_list s =
 748 |   let s = StrSlice.tail s in
 749 |   match StrSlice.head s with
 750 |   | Some w when is_whitespace w ->
 751 |       Ldef_list (String.trim (StrSlice.to_string s))
 752 |   | _ -> raise Fail
 753 | 
 754 | let indented_code ind s =
 755 |   if indent s + ind < 4 then raise Fail;
 756 |   Lindented_code (StrSlice.offset (4 - ind) s)
 757 | 
 758 | (* A sequence of cell contents separated by unescaped '|'
 759 |    characters. *)
 760 | let table_row ~pipe_prefix s =
 761 |   let rec loop items seen_pipe s =
 762 |     match StrSlice.index_unescaped '|' s with
 763 |     | None ->
 764 |         if StrSlice.for_all is_whitespace s then (items, seen_pipe)
 765 |         else (s :: items, false)
 766 |     | Some i ->
 767 |         let item = StrSlice.take_prefix i s in
 768 |         loop (item :: items) true (StrSlice.drop (i + 1) s)
 769 |   in
 770 |   let items, terminating_pipe = loop [] pipe_prefix s in
 771 |   match (pipe_prefix, items, terminating_pipe) with
 772 |   | true, _, _ | _, _ :: _, true | _, _ :: _ :: _, _ ->
 773 |       Ltable_line (List.rev_map StrSlice.trim items)
 774 |   | _ -> raise Fail
 775 | 
 776 | let parse s0 =
 777 |   let ind, s = sp3 s0 in
 778 |   match StrSlice.head s with
 779 |   | Some '>' ->
 780 |       let s = StrSlice.offset 1 s in
 781 |       let s = if indent s > 0 then StrSlice.offset 1 s else s in
 782 |       Lblockquote s
 783 |   | Some '=' -> (setext_heading ||| table_row ~pipe_prefix:false) s
 784 |   | Some '-' ->
 785 |       (setext_heading
 786 |       ||| thematic_break
 787 |       ||| unordered_list_item ind
 788 |       ||| table_row ~pipe_prefix:false)
 789 |         s
 790 |   | Some '_' -> thematic_break s
 791 |   | Some '#' -> atx_heading s
 792 |   | Some ('~' | '`') -> fenced_code ind s
 793 |   | Some '<' -> raw_html s
 794 |   | Some '*' -> (thematic_break ||| unordered_list_item ind) s
 795 |   | Some '+' -> unordered_list_item ind s
 796 |   | Some '0' .. '9' ->
 797 |       (ordered_list_item ind ||| table_row ~pipe_prefix:false) s
 798 |   | Some ':' -> (def_list ||| table_row ~pipe_prefix:false) s
 799 |   | Some '|' -> table_row ~pipe_prefix:true (StrSlice.tail s)
 800 |   | Some _ -> (blank ||| indented_code ind ||| table_row ~pipe_prefix:false) s
 801 |   | None -> Lempty
 802 | 
 803 | let parse s = try parse s with Fail -> Lparagraph
 804 | 
 805 | open P
 806 | 
 807 | let is_empty st =
 808 |   let off = pos st in
 809 |   try
 810 |     let rec loop () =
 811 |       match next st with
 812 |       | c when is_whitespace c -> loop ()
 813 |       | _ ->
 814 |           set_pos st off;
 815 |           false
 816 |     in
 817 |     loop ()
 818 |   with Fail ->
 819 |     set_pos st off;
 820 |     true
 821 | 
 822 | let inline_attribute_string s =
 823 |   let ppos = pos s in
 824 |   ws s;
 825 |   let a =
 826 |     match peek s with
 827 |     | Some '{' ->
 828 |         let buf = Buffer.create 64 in
 829 |         let rec loop s pos =
 830 |           match peek s with
 831 |           | Some '}' ->
 832 |               junk s;
 833 |               Some (Buffer.contents buf)
 834 |           | None | Some '{' ->
 835 |               set_pos s pos;
 836 |               None
 837 |           | Some c ->
 838 |               Buffer.add_char buf c;
 839 |               junk s;
 840 |               loop s pos
 841 |         in
 842 |         junk s;
 843 |         loop s (pos s)
 844 |     | _ -> None
 845 |   in
 846 |   let attr = Option.map parse_attributes a |> Option.value ~default:[] in
 847 |   if attr = [] then set_pos s ppos;
 848 |   attr
 849 | 
 850 | let entity buf st =
 851 |   junk st;
 852 |   match on_sub entity st with
 853 |   | cs -> List.iter (Uutf.Buffer.add_utf_8 buf) cs
 854 |   | exception Fail -> Buffer.add_char buf '&'
 855 | 
 856 | module Pre = struct
 857 |   type delim =
 858 |     | Ws
 859 |     | Punct
 860 |     | Other
 861 | 
 862 |   type emph_style =
 863 |     | Star
 864 |     | Underscore
 865 | 
 866 |   type link_kind =
 867 |     | Img
 868 |     | Url
 869 | 
 870 |   type t =
 871 |     | Bang_left_bracket
 872 |     | Left_bracket of link_kind
 873 |     | Emph of delim * delim * emph_style * int
 874 |     | R of attributes inline
 875 | 
 876 |   let concat = function [ x ] -> x | l -> Concat ([], l)
 877 | 
 878 |   let left_flanking = function
 879 |     | Emph (_, Other, _, _) | Emph ((Ws | Punct), Punct, _, _) -> true
 880 |     | _ -> false
 881 | 
 882 |   let right_flanking = function
 883 |     | Emph (Other, _, _, _) | Emph (Punct, (Ws | Punct), _, _) -> true
 884 |     | _ -> false
 885 | 
 886 |   let is_opener = function
 887 |     | Emph (pre, _, Underscore, _) as x ->
 888 |         left_flanking x && ((not (right_flanking x)) || pre = Punct)
 889 |     | Emph (_, _, Star, _) as x -> left_flanking x
 890 |     | _ -> false
 891 | 
 892 |   let is_closer = function
 893 |     | Emph (_, post, Underscore, _) as x ->
 894 |         right_flanking x && ((not (left_flanking x)) || post = Punct)
 895 |     | Emph (_, _, Star, _) as x -> right_flanking x
 896 |     | _ -> false
 897 | 
 898 |   let classify_delim = function
 899 |     | '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ','
 900 |     | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\\'
 901 |     | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~' ->
 902 |         Punct
 903 |     | ' ' | '\t' | '\010' .. '\013' | '\160' -> Ws
 904 |     | _ -> Other
 905 | 
 906 |   let to_r = function
 907 |     | Bang_left_bracket -> Text ([], "![")
 908 |     | Left_bracket Img -> Text ([], "![")
 909 |     | Left_bracket Url -> Text ([], "[")
 910 |     | Emph (_, _, Star, n) -> Text ([], String.make n '*')
 911 |     | Emph (_, _, Underscore, n) -> Text ([], String.make n '_')
 912 |     | R x -> x
 913 | 
 914 |   let rec find_next_emph = function
 915 |     | Emph (pre, post, style, n) :: _ -> Some (pre, post, style, n)
 916 |     | _ :: xs -> find_next_emph xs
 917 |     | [] -> None
 918 | 
 919 |   let rec find_next_closer_emph = function
 920 |     | (Emph (pre, post, style, n) as e) :: _ when is_closer e ->
 921 |         Some (pre, post, style, n)
 922 |     | _ :: xs -> find_next_closer_emph xs
 923 |     | [] -> None
 924 | 
 925 |   (* Checks the lengths of two different emphasis delimiters to see if there can be a match.
 926 | 
 927 |      From the spec: "If one of the delimiters can both open and close emphasis, then the sum of the lengths
 928 |        of the delimiter runs containing the opening and closing delimiters must not be
 929 |        a multiple of 3 unless both lengths are multiples of 3" *)
 930 |   let is_emph_match n1 n2 =
 931 |     (*
 932 |       - *foo**bar**baz*
 933 | 
 934 |             *foo** -> the second delimiter ** is both an opening and closing delimiter.
 935 |                       The sum of the length of both delimiters is 3, so they can't be matched.
 936 | 
 937 |             **bar** -> they are both opening and closing delemiters.
 938 |                        Their sum is 4 which is not a multiple of 3 so they can be matched to produce <strong>bar</strong>
 939 | 
 940 |             The end result is: <em>foo<strong>bar</strong>baz</em>
 941 | 
 942 |       - *foo***bar**baz*
 943 | 
 944 |             *foo*** -> *** is both an opening and closing delimiter.
 945 |                        Their sum is 4 so they can be matched to produce: <em>foo</em>**
 946 | 
 947 |             **bar** -> they are both opening and closing delemiters.
 948 |                        Their sum is 4 which is not a multiple of 3 so they can be matched to produce <strong>bar</strong>
 949 | 
 950 |             The end result is: <em>foo</em><strong>bar</strong>baz*
 951 | 
 952 |       - ***foo***bar**baz*
 953 | 
 954 |             ***foo*** -> the second delimiter *** is both an opening and closing delimiter.
 955 |                          Their sum is 6 which is a multiple of 3. However, both lengths are multiples of 3
 956 |                          so they can be matched to produce: <em><strong>foo</strong></em>
 957 | 
 958 |             bar**baz* -> ** is both an opening and closing delimiter.
 959 |                          Their sum is 3 so they can't be matched
 960 | 
 961 |             The end result is: <em><strong>foo</strong></em>bar**baz*
 962 |       *)
 963 |     if (n1 + n2) mod 3 = 0 && n1 mod 3 != 0 && n2 mod 3 != 0 then false
 964 |     else true
 965 | 
 966 |   let rec parse_emph = function
 967 |     | (Emph (pre, _, q1, n1) as x1) :: xs when is_opener x1 ->
 968 |         let rec loop acc = function
 969 |           | (Emph (_, post, q2, n2) as x2) :: xs1 as xs
 970 |             when is_closer x2 && q1 = q2 ->
 971 |               (* At this point we have an openener followed by a closer. Both are of the same style (either * or _) *)
 972 |               if (is_opener x2 || is_closer x1) && not (is_emph_match n1 n2)
 973 |               then
 974 |                 (*
 975 |                  The second delimiter (the closer) is also an opener, and both delimiters don't match together,
 976 |                  according to the "mod 3" rule. In that case, we check if the next delimiter can match.
 977 | 
 978 |                    *foo**bar**baz*  The second delimiter that's both an opener/closer ( ** before bar)
 979 |                                     matches with the next delimiter ( ** after bar). They'll become
 980 |                                     <strong>bar</strong>. The end result will be: <em>foo<strong>bar</strong>baz</em>
 981 | 
 982 | 
 983 |                    *foo**bar*baz*  The second delimiter that's both an opener/closer ( ** before bar)
 984 |                                    doesn't match with the next delimiter ( * after bar). **bar will be
 985 |                                    considered as regular text. The end result will be: <em>foo**bar</em>baz*
 986 |                  *)
 987 |                 match find_next_emph xs1 with
 988 |                 | Some (_, _, _, n3) when is_emph_match n3 n2 ->
 989 |                     let xs' = parse_emph xs in
 990 |                     loop acc xs'
 991 |                 | _ -> loop (x2 :: acc) xs1
 992 |               else
 993 |                 let xs =
 994 |                   if n1 >= 2 && n2 >= 2 then
 995 |                     if n2 > 2 then Emph (Other, post, q2, n2 - 2) :: xs1
 996 |                     else xs1
 997 |                   else if n2 > 1 then Emph (Punct, post, q2, n2 - 1) :: xs1
 998 |                   else xs1
 999 |                 in
1000 |                 let r =
1001 |                   let il = concat (List.map to_r (List.rev acc)) in
1002 |                   if n1 >= 2 && n2 >= 2 then R (Strong ([], il)) :: xs
1003 |                   else R (Emph ([], il)) :: xs
1004 |                 in
1005 |                 let r =
1006 |                   if n1 >= 2 && n2 >= 2 then
1007 |                     if n1 > 2 then Emph (pre, Other, q1, n1 - 2) :: r else r
1008 |                   else if n1 > 1 then Emph (pre, Punct, q1, n1 - 1) :: r
1009 |                   else r
1010 |                 in
1011 |                 parse_emph r
1012 |           | (Emph (_, _, q2, _) as x2) :: xs1 as xs when is_opener x2 ->
1013 |               (*
1014 |                This case happens when we encounter a second opener delimiter. We look ahead for the next closer,
1015 |                and if the next closer is of the same style, we can match them together.
1016 | 
1017 |                  *foo _bar_ baz_  The second opener (_ before `bar`) is of the same style as the next closer
1018 |                                   (_ after `bar`). We can match them to produce <em>bar</em>
1019 |                                   The end result will be: *foo <em>bar</em> baz_
1020 | 
1021 | 
1022 |                  *foo _bar* baz_  The second opener (_ before `bar`) is not of the same style as the next closer
1023 |                                   ( * after `bar`). They can't be matched so we'll consider _bar as regular text.
1024 |                                   The end result will be: <em>foo _bar</em> baz_
1025 |                *)
1026 |               let is_next_closer_same =
1027 |                 match find_next_closer_emph xs1 with
1028 |                 | None -> false
1029 |                 | Some (_, _, q3, _) -> q2 = q3
1030 |               in
1031 |               if not is_next_closer_same then loop (x2 :: acc) xs1
1032 |               else loop acc (parse_emph xs)
1033 |           | x :: xs -> loop (x :: acc) xs
1034 |           | [] -> x1 :: List.rev acc
1035 |         in
1036 |         loop [] xs
1037 |     | x :: xs -> x :: parse_emph xs
1038 |     | [] -> []
1039 | 
1040 |   let parse_emph xs = concat (List.map to_r (parse_emph xs))
1041 | end
1042 | 
1043 | let escape buf st =
1044 |   if next st <> '\\' then raise Fail;
1045 |   match peek st with
1046 |   | Some c when is_punct c ->
1047 |       junk st;
1048 |       Buffer.add_char buf c
1049 |   | Some _ | None -> Buffer.add_char buf '\\'
1050 | 
1051 | let link_label allow_balanced_brackets st =
1052 |   if peek_exn st <> '[' then raise Fail;
1053 |   junk st;
1054 |   let buf = Buffer.create 17 in
1055 |   let rec loop n nonempty =
1056 |     match peek_exn st with
1057 |     | ']' when n = 0 ->
1058 |         junk st;
1059 |         if not nonempty then raise Fail;
1060 |         Buffer.contents buf
1061 |     | ']' as c ->
1062 |         assert (n > 0);
1063 |         junk st;
1064 |         Buffer.add_char buf c;
1065 |         loop (pred n) true
1066 |     | '\\' as c ->
1067 |         junk st;
1068 |         Buffer.add_char buf c;
1069 |         begin
1070 |           match peek_exn st with
1071 |           | c when is_punct c ->
1072 |               junk st;
1073 |               Buffer.add_char buf c
1074 |           | _ -> ()
1075 |         end;
1076 |         loop n true
1077 |     | '[' when not allow_balanced_brackets -> raise Fail
1078 |     | '[' as c ->
1079 |         junk st;
1080 |         Buffer.add_char buf c;
1081 |         loop (succ n) true
1082 |     | w when is_whitespace w ->
1083 |         junk st;
1084 |         Buffer.add_char buf w;
1085 |         loop n nonempty
1086 |     | c ->
1087 |         junk st;
1088 |         Buffer.add_char buf c;
1089 |         loop n true
1090 |   in
1091 |   loop 0 false
1092 | 
1093 | type add_uchar_result =
1094 |   { start : bool
1095 |   ; seen_ws : bool
1096 |   }
1097 | 
1098 | (* based on https://erratique.ch/software/uucp/doc/Uucp/Case/index.html#caselesseq *)
1099 | let normalize s =
1100 |   let canonical_caseless_key s =
1101 |     let b = Buffer.create (String.length s * 2) in
1102 |     let to_nfd_and_utf_8 =
1103 |       let n = Uunf.create `NFD in
1104 |       let rec add v =
1105 |         match Uunf.add n v with
1106 |         | `Await | `End -> ()
1107 |         | `Uchar u ->
1108 |             Uutf.Buffer.add_utf_8 b u;
1109 |             add `Await
1110 |       in
1111 |       add
1112 |     in
1113 |     let add_nfd =
1114 |       let n = Uunf.create `NFD in
1115 |       let rec add v =
1116 |         match Uunf.add n v with
1117 |         | `Await | `End -> ()
1118 |         | `Uchar u ->
1119 |             (match Uucp.Case.Fold.fold u with
1120 |             | `Self -> to_nfd_and_utf_8 (`Uchar u)
1121 |             | `Uchars us -> List.iter (fun u -> to_nfd_and_utf_8 (`Uchar u)) us);
1122 |             add `Await
1123 |       in
1124 |       add
1125 |     in
1126 |     let uspace = `Uchar (Uchar.of_char ' ') in
1127 |     let add_uchar { start; seen_ws } _ = function
1128 |       | `Malformed _ ->
1129 |           add_nfd (`Uchar Uutf.u_rep);
1130 |           { start = false; seen_ws = false }
1131 |       | `Uchar u as uchar ->
1132 |           if Uucp.White.is_white_space u then { start; seen_ws = true }
1133 |           else (
1134 |             if (not start) && seen_ws then add_nfd uspace;
1135 |             add_nfd uchar;
1136 |             { start = false; seen_ws = false })
1137 |     in
1138 |     let (_ : add_uchar_result) =
1139 |       Uutf.String.fold_utf_8 add_uchar { start = true; seen_ws = false } s
1140 |     in
1141 |     add_nfd `End;
1142 |     to_nfd_and_utf_8 `End;
1143 |     Buffer.contents b
1144 |   in
1145 |   canonical_caseless_key s
1146 | 
1147 | let tag_name st =
1148 |   match peek_exn st with
1149 |   | 'a' .. 'z' | 'A' .. 'Z' ->
1150 |       junk st;
1151 |       let rec loop () =
1152 |         match peek st with
1153 |         | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-') ->
1154 |             junk st;
1155 |             loop ()
1156 |         | Some _ | None -> ()
1157 |       in
1158 |       loop ()
1159 |   | _ -> raise Fail
1160 | 
1161 | let ws_buf buf st =
1162 |   let rec loop () =
1163 |     match peek st with
1164 |     | Some w when is_whitespace w ->
1165 |         Buffer.add_char buf w;
1166 |         junk st;
1167 |         loop ()
1168 |     | Some _ | None -> ()
1169 |   in
1170 |   loop ()
1171 | 
1172 | let closing_tag st =
1173 |   let start = pos st in
1174 |   if next st <> '<' then raise Fail;
1175 |   if next st <> '/' then raise Fail;
1176 |   tag_name st;
1177 |   ws st;
1178 |   if next st <> '>' then raise Fail;
1179 |   range st start (pos st - start)
1180 | 
1181 | let list p st =
1182 |   let rec loop () =
1183 |     match protect p st with () -> loop () | exception Fail -> ()
1184 |   in
1185 |   loop ()
1186 | 
1187 | let single_quoted_attribute st =
1188 |   if next st <> '\'' then raise Fail;
1189 |   let rec loop () =
1190 |     match peek_exn st with
1191 |     | '\'' -> junk st
1192 |     (* | '&' -> *)
1193 |     (*     entity buf st; loop () *)
1194 |     | _ ->
1195 |         junk st;
1196 |         loop ()
1197 |   in
1198 |   loop ()
1199 | 
1200 | let double_quoted_attribute st =
1201 |   if next st <> '"' then raise Fail;
1202 |   let rec loop () =
1203 |     match peek_exn st with
1204 |     | '"' -> junk st
1205 |     (* | '&' -> *)
1206 |     (*     entity buf st; loop () *)
1207 |     | _ ->
1208 |         junk st;
1209 |         loop ()
1210 |   in
1211 |   loop ()
1212 | 
1213 | let unquoted_attribute st =
1214 |   let rec loop n =
1215 |     match peek_exn st with
1216 |     | ' ' | '\t' | '\010' .. '\013' | '"' | '\'' | '=' | '<' | '>' | '`' ->
1217 |         if n = 0 then raise Fail
1218 |     (* | '&' -> *)
1219 |     (*     entity buf st; loop () *)
1220 |     | _ ->
1221 |         junk st;
1222 |         loop (succ n)
1223 |   in
1224 |   loop 0
1225 | 
1226 | let attribute_value st =
1227 |   match peek_exn st with
1228 |   | '\'' -> single_quoted_attribute st
1229 |   | '"' -> double_quoted_attribute st
1230 |   | _ -> unquoted_attribute st
1231 | 
1232 | let attribute_name st =
1233 |   match peek_exn st with
1234 |   | 'a' .. 'z' | 'A' .. 'Z' | '_' | ':' ->
1235 |       junk st;
1236 |       let rec loop () =
1237 |         match peek st with
1238 |         | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' | '.' | ':' | '-') ->
1239 |             junk st;
1240 |             loop ()
1241 |         | Some _ | None -> ()
1242 |       in
1243 |       loop ()
1244 |   | _ -> raise Fail
1245 | 
1246 | let option d p st = match protect p st with r -> r | exception Fail -> d
1247 | let some p st = Some (p st)
1248 | let attribute_value_specification = ws >>> char '=' >>> ws >>> attribute_value
1249 | 
1250 | let ws1_buf buf st =
1251 |   match peek st with
1252 |   | Some w when is_whitespace w -> ws_buf buf st
1253 |   | Some _ | None -> raise Fail
1254 | 
1255 | let attribute st =
1256 |   ws1 st;
1257 |   attribute_name st;
1258 |   option () attribute_value_specification st
1259 | 
1260 | let open_tag st =
1261 |   let start = pos st in
1262 |   if next st <> '<' then raise Fail;
1263 |   tag_name st;
1264 |   list attribute st;
1265 |   ws st;
1266 |   begin
1267 |     match peek_exn st with
1268 |     | '/' -> junk st
1269 |     | _ -> ()
1270 |   end;
1271 |   if next st <> '>' then raise Fail;
1272 |   range st start (pos st - start)
1273 | 
1274 | let html_comment st =
1275 |   let buf = Buffer.create 17 in
1276 |   if next st <> '<' then raise Fail;
1277 |   if next st <> '!' then raise Fail;
1278 |   if next st <> '-' then raise Fail;
1279 |   if next st <> '-' then raise Fail;
1280 |   Buffer.add_string buf "<!--";
1281 |   let rec loop start =
1282 |     match peek_exn st with
1283 |     | '-' as c -> (
1284 |         junk st;
1285 |         match peek_exn st with
1286 |         | '-' ->
1287 |             junk st;
1288 |             if next st <> '>' then raise Fail;
1289 |             Buffer.add_string buf "-->";
1290 |             Buffer.contents buf
1291 |         | '>' when start -> raise Fail
1292 |         | _ ->
1293 |             Buffer.add_char buf c;
1294 |             loop false)
1295 |     | '>' when start -> raise Fail
1296 |     | '&' ->
1297 |         entity buf st;
1298 |         loop false
1299 |     | _ as c ->
1300 |         junk st;
1301 |         Buffer.add_char buf c;
1302 |         loop false
1303 |   in
1304 |   loop true
1305 | 
1306 | let processing_instruction st =
1307 |   let buf = Buffer.create 17 in
1308 |   if next st <> '<' then raise Fail;
1309 |   if next st <> '?' then raise Fail;
1310 |   Buffer.add_string buf "<?";
1311 |   let rec loop () =
1312 |     match peek_exn st with
1313 |     | '?' as c -> (
1314 |         junk st;
1315 |         match peek_exn st with
1316 |         | '>' ->
1317 |             junk st;
1318 |             Buffer.add_string buf "?>";
1319 |             Buffer.contents buf
1320 |         | _ ->
1321 |             Buffer.add_char buf c;
1322 |             loop ())
1323 |     | '&' ->
1324 |         entity buf st;
1325 |         loop ()
1326 |     | _ as c ->
1327 |         junk st;
1328 |         Buffer.add_char buf c;
1329 |         loop ()
1330 |   in
1331 |   loop ()
1332 | 
1333 | let cdata_section st =
1334 |   let buf = Buffer.create 17 in
1335 |   if next st <> '<' then raise Fail;
1336 |   if next st <> '!' then raise Fail;
1337 |   if next st <> '[' then raise Fail;
1338 |   if next st <> 'C' then raise Fail;
1339 |   if next st <> 'D' then raise Fail;
1340 |   if next st <> 'A' then raise Fail;
1341 |   if next st <> 'T' then raise Fail;
1342 |   if next st <> 'A' then raise Fail;
1343 |   if next st <> '[' then raise Fail;
1344 |   Buffer.add_string buf "<![CDATA[";
1345 |   let rec loop () =
1346 |     match peek_exn st with
1347 |     | ']' as c -> (
1348 |         junk st;
1349 |         match peek_exn st with
1350 |         | ']' as c1 -> (
1351 |             junk st;
1352 |             match peek_exn st with
1353 |             | '>' ->
1354 |                 junk st;
1355 |                 Buffer.add_string buf "]]>";
1356 |                 Buffer.contents buf
1357 |             | _ ->
1358 |                 Buffer.add_char buf c;
1359 |                 Buffer.add_char buf c1;
1360 |                 loop ())
1361 |         | _ ->
1362 |             Buffer.add_char buf c;
1363 |             loop ())
1364 |     | '&' ->
1365 |         entity buf st;
1366 |         loop ()
1367 |     | _ as c ->
1368 |         junk st;
1369 |         Buffer.add_char buf c;
1370 |         loop ()
1371 |   in
1372 |   loop ()
1373 | 
1374 | let declaration st =
1375 |   let buf = Buffer.create 17 in
1376 |   if next st <> '<' then raise Fail;
1377 |   if next st <> '!' then raise Fail;
1378 |   Buffer.add_string buf "<!";
1379 |   match peek_exn st with
1380 |   | 'A' .. 'Z' ->
1381 |       let rec loop () =
1382 |         match peek_exn st with
1383 |         | 'A' .. 'Z' as c ->
1384 |             junk st;
1385 |             Buffer.add_char buf c;
1386 |             loop ()
1387 |         | w when is_whitespace w ->
1388 |             ws1_buf buf st;
1389 |             let rec loop () =
1390 |               match peek_exn st with
1391 |               | '>' as c ->
1392 |                   junk st;
1393 |                   Buffer.add_char buf c;
1394 |                   Buffer.contents buf
1395 |               | '&' ->
1396 |                   entity buf st;
1397 |                   loop ()
1398 |               | _ as c ->
1399 |                   junk st;
1400 |                   Buffer.add_char buf c;
1401 |                   loop ()
1402 |             in
1403 |             loop ()
1404 |         | _ -> raise Fail
1405 |       in
1406 |       loop ()
1407 |   | _ -> raise Fail
1408 | 
1409 | let link_destination st =
1410 |   let buf = Buffer.create 17 in
1411 |   match peek_exn st with
1412 |   | '<' ->
1413 |       junk st;
1414 |       let rec loop () =
1415 |         match peek_exn st with
1416 |         | '>' ->
1417 |             junk st;
1418 |             Buffer.contents buf
1419 |         | '\010' .. '\013' | '<' -> raise Fail
1420 |         | '\\' ->
1421 |             escape buf st;
1422 |             loop ()
1423 |         | '&' ->
1424 |             entity buf st;
1425 |             loop ()
1426 |         | _ as c ->
1427 |             junk st;
1428 |             Buffer.add_char buf c;
1429 |             loop ()
1430 |       in
1431 |       loop ()
1432 |   | _ ->
1433 |       let rec loop n =
1434 |         match peek st with
1435 |         | Some ('(' as c) ->
1436 |             junk st;
1437 |             Buffer.add_char buf c;
1438 |             loop (succ n)
1439 |         | Some ')' when n = 0 ->
1440 |             if Buffer.length buf = 0 then raise Fail;
1441 |             Buffer.contents buf
1442 |         | Some (')' as c) ->
1443 |             junk st;
1444 |             Buffer.add_char buf c;
1445 |             loop (pred n)
1446 |         | Some '\\' ->
1447 |             escape buf st;
1448 |             loop n
1449 |         | Some '&' ->
1450 |             entity buf st;
1451 |             loop n
1452 |         | Some (' ' | '\t' | '\x00' .. '\x1F' | '\x7F') | None ->
1453 |             if n > 0 || Buffer.length buf = 0 then raise Fail;
1454 |             Buffer.contents buf
1455 |         | Some c ->
1456 |             junk st;
1457 |             Buffer.add_char buf c;
1458 |             loop n
1459 |       in
1460 |       loop 0
1461 | 
1462 | let eol st =
1463 |   match peek st with Some '\n' -> junk st | Some _ -> raise Fail | None -> ()
1464 | 
1465 | let link_title st =
1466 |   let buf = Buffer.create 17 in
1467 |   match peek_exn st with
1468 |   | ('\'' | '"') as c ->
1469 |       junk st;
1470 |       let rec loop () =
1471 |         match peek_exn st with
1472 |         | '\\' ->
1473 |             escape buf st;
1474 |             loop ()
1475 |         | '&' ->
1476 |             entity buf st;
1477 |             loop ()
1478 |         | _ as c1 when c = c1 ->
1479 |             junk st;
1480 |             Buffer.contents buf
1481 |         | _ as c1 ->
1482 |             junk st;
1483 |             Buffer.add_char buf c1;
1484 |             loop ()
1485 |       in
1486 |       loop ()
1487 |   | '(' ->
1488 |       junk st;
1489 |       let rec loop () =
1490 |         match peek_exn st with
1491 |         | '\\' ->
1492 |             escape buf st;
1493 |             loop ()
1494 |         | '&' ->
1495 |             entity buf st;
1496 |             loop ()
1497 |         | ')' ->
1498 |             junk st;
1499 |             Buffer.contents buf
1500 |         | _ as c ->
1501 |             junk st;
1502 |             Buffer.add_char buf c;
1503 |             loop ()
1504 |       in
1505 |       loop ()
1506 |   | _ -> raise Fail
1507 | 
1508 | let space st = match peek_exn st with ' ' -> junk st | _ -> raise Fail
1509 | 
1510 | let many p st =
1511 |   try
1512 |     while true do
1513 |       p st
1514 |     done
1515 |   with Fail -> ()
1516 | 
1517 | let scheme st =
1518 |   match peek_exn st with
1519 |   | 'a' .. 'z' | 'A' .. 'Z' ->
1520 |       let rec loop n =
1521 |         if n < 32 then
1522 |           match peek st with
1523 |           | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '+' | '.' | '-') ->
1524 |               junk st;
1525 |               loop (succ n)
1526 |           | Some _ | None -> n
1527 |         else n
1528 |       in
1529 |       let n = loop 0 in
1530 |       if n < 2 then raise Fail
1531 |   | _ -> raise Fail
1532 | 
1533 | let absolute_uri st =
1534 |   let p = pos st in
1535 |   scheme st;
1536 |   if next st <> ':' then raise Fail;
1537 |   let rec loop () =
1538 |     match peek st with
1539 |     | Some
1540 |         ( ' ' | '\t'
1541 |         | '\010' .. '\013'
1542 |         | '\x00' .. '\x1F'
1543 |         | '\x7F' .. '\x9F'
1544 |         | '<' | '>' )
1545 |     | None ->
1546 |         let txt = range st p (pos st - p) in
1547 |         (txt, txt)
1548 |     | Some _ ->
1549 |         junk st;
1550 |         loop ()
1551 |   in
1552 |   loop ()
1553 | 
1554 | let email_address st =
1555 |   let p = pos st in
1556 |   let rec loop n =
1557 |     match peek_exn st with
1558 |     | 'a' .. 'z'
1559 |     | 'A' .. 'Z'
1560 |     | '0' .. '9'
1561 |     | '.' | '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '/' | '=' | '?'
1562 |     | '^' | '_' | '`' | '{' | '|' | '}' | '~' | '-' ->
1563 |         junk st;
1564 |         loop (succ n)
1565 |     | '@' ->
1566 |         junk st;
1567 |         let label st =
1568 |           let let_dig st =
1569 |             match peek_exn st with
1570 |             | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' ->
1571 |                 junk st;
1572 |                 false
1573 |             | '-' ->
1574 |                 junk st;
1575 |                 true
1576 |             | _ -> raise Fail
1577 |           in
1578 |           if let_dig st then raise Fail;
1579 |           let rec loop last =
1580 |             match let_dig st with
1581 |             | r -> loop r
1582 |             | exception Fail -> if last then raise Fail
1583 |           in
1584 |           loop false
1585 |         in
1586 |         label st;
1587 |         list (char '.' >>> label) st;
1588 |         let txt = range st p (pos st - p) in
1589 |         (txt, "mailto:" ^ txt)
1590 |     | _ -> raise Fail
1591 |   in
1592 |   loop 0
1593 | 
1594 | let autolink st =
1595 |   match peek_exn st with
1596 |   | '<' ->
1597 |       junk st;
1598 |       let label, destination = (absolute_uri ||| email_address) st in
1599 |       if next st <> '>' then raise Fail;
1600 |       { Ast.Impl.label = Text ([], label); destination; title = None }
1601 |   | _ -> raise Fail
1602 | 
1603 | let inline_link =
1604 |   char '('
1605 |   >>> ws
1606 |   >>> option
1607 |         ("", None)
1608 |         (pair link_destination (option None (ws1 >>> some link_title)))
1609 |   <<< ws
1610 |   <<< char ')'
1611 | 
1612 | let get_buf buf =
1613 |   let s = Buffer.contents buf in
1614 |   Buffer.clear buf;
1615 |   s
1616 | 
1617 | let text buf acc =
1618 |   if Buffer.length buf = 0 then acc else Pre.R (Text ([], get_buf buf)) :: acc
1619 | 
1620 | let inline_pre buf acc st =
1621 |   let pos = pos st in
1622 |   let rec gobble_open_backtick n =
1623 |     match peek st with
1624 |     | Some '`' ->
1625 |         junk st;
1626 |         gobble_open_backtick (succ n)
1627 |     | Some _ ->
1628 |         let acc = text buf acc in
1629 |         let bufcode = Buffer.create 17 in
1630 |         let finish () =
1631 |           let content = Buffer.contents bufcode in
1632 |           let content =
1633 |             if String.for_all (fun c -> c = ' ') content then content
1634 |             else if
1635 |               String.length content >= 2
1636 |               && content.[0] = ' '
1637 |               && content.[String.length content - 1] = ' '
1638 |             then String.sub content 1 (String.length content - 2)
1639 |             else content
1640 |           in
1641 |           let attr = inline_attribute_string st in
1642 |           Pre.R (Code (attr, content)) :: acc
1643 |         in
1644 |         let rec gobble_body start m =
1645 |           match peek st with
1646 |           | Some '`' ->
1647 |               junk st;
1648 |               gobble_body start (succ m)
1649 |           | _ when m = n -> finish ()
1650 |           | Some c when is_whitespace c ->
1651 |               if m > 0 then Buffer.add_string bufcode (String.make m '`');
1652 |               Buffer.add_char bufcode (if c = '\010' then ' ' else c);
1653 |               junk st;
1654 |               gobble_body (start && m = 0) 0
1655 |           | Some c ->
1656 |               junk st;
1657 |               (* if seen_ws then Buffer.add_char bufcode ' '; *)
1658 |               if m > 0 then Buffer.add_string bufcode (String.make m '`');
1659 |               Buffer.add_char bufcode c;
1660 |               gobble_body false 0
1661 |           | None ->
1662 |               Buffer.add_string buf (range st pos n);
1663 |               set_pos st (pos + n);
1664 |               acc
1665 |         in
1666 |         gobble_body true 0
1667 |     | None ->
1668 |         Buffer.add_string buf (String.make n '`');
1669 |         acc
1670 |   in
1671 |   gobble_open_backtick 0
1672 | 
1673 | let rec inline defs st =
1674 |   let buf = Buffer.create 0 in
1675 |   let text acc = text buf acc in
1676 |   let rec reference_link kind acc st =
1677 |     let off0 = pos st in
1678 |     match protect (link_label true) st with
1679 |     | lab -> (
1680 |         let reflink lab =
1681 |           let s = normalize lab in
1682 |           match
1683 |             List.find_opt
1684 |               (fun ({ label; _ } : attributes link_def) -> label = s)
1685 |               defs
1686 |           with
1687 |           | Some { label = _; destination; title; attributes = attr } ->
1688 |               let lab1 = inline defs (of_string lab) in
1689 |               let r =
1690 |                 let def = { label = lab1; destination; title } in
1691 |                 match kind with
1692 |                 | Pre.Img -> Image (attr, def)
1693 |                 | Url -> Link (attr, def)
1694 |               in
1695 |               loop (Pre.R r :: text acc) st
1696 |           | None ->
1697 |               if kind = Img then Buffer.add_char buf '!';
1698 |               Buffer.add_char buf '[';
1699 |               let acc = text acc in
1700 |               set_pos st (succ off0);
1701 |               loop acc st
1702 |         in
1703 |         match peek st with
1704 |         | Some '[' -> (
1705 |             if peek_after '\000' st = ']' then (
1706 |               junk st;
1707 |               junk st;
1708 |               reflink lab)
1709 |             else
1710 |               match protect (link_label false) st with
1711 |               | _ ->
1712 |                   set_pos st off0;
1713 |                   junk st;
1714 |                   loop (Left_bracket kind :: text acc) st
1715 |               | exception Fail -> reflink lab)
1716 |         | Some '(' -> (
1717 |             match protect inline_link st with
1718 |             | _ ->
1719 |                 set_pos st off0;
1720 |                 junk st;
1721 |                 loop (Left_bracket kind :: text acc) st
1722 |             | exception Fail -> reflink lab)
1723 |         | Some _ | None -> reflink lab)
1724 |     | exception Fail ->
1725 |         junk st;
1726 |         loop (Left_bracket kind :: text acc) st
1727 |   and loop ~seen_link acc st =
1728 |     match peek_exn st with
1729 |     | '<' as c -> (
1730 |         match protect autolink st with
1731 |         | def ->
1732 |             let attr = inline_attribute_string st in
1733 |             loop ~seen_link (Pre.R (Link (attr, def)) :: text acc) st
1734 |         | exception Fail -> (
1735 |             match
1736 |               protect
1737 |                 (closing_tag
1738 |                 ||| open_tag
1739 |                 ||| html_comment
1740 |                 ||| declaration
1741 |                 ||| cdata_section
1742 |                 ||| processing_instruction)
1743 |                 st
1744 |             with
1745 |             | tag -> loop ~seen_link (Pre.R (Html ([], tag)) :: text acc) st
1746 |             | exception Fail ->
1747 |                 junk st;
1748 |                 Buffer.add_char buf c;
1749 |                 loop ~seen_link acc st))
1750 |     | '\n' ->
1751 |         junk st;
1752 |         sp st;
1753 |         loop ~seen_link (Pre.R (Soft_break []) :: text acc) st
1754 |     | ' ' as c -> (
1755 |         junk st;
1756 |         match peek st with
1757 |         | Some ' ' -> (
1758 |             match protect (many space >>> char '\n' >>> many space) st with
1759 |             | () -> loop ~seen_link (Pre.R (Hard_break []) :: text acc) st
1760 |             | exception Fail ->
1761 |                 junk st;
1762 |                 Buffer.add_string buf "  ";
1763 |                 loop ~seen_link acc st)
1764 |         | Some '\n' -> loop ~seen_link acc st
1765 |         | Some _ | None ->
1766 |             Buffer.add_char buf c;
1767 |             loop ~seen_link acc st)
1768 |     | '`' -> loop ~seen_link (inline_pre buf acc st) st
1769 |     | '\\' as c -> (
1770 |         junk st;
1771 |         match peek st with
1772 |         | Some '\n' ->
1773 |             junk st;
1774 |             loop ~seen_link (Pre.R (Hard_break []) :: text acc) st
1775 |         | Some c when is_punct c ->
1776 |             junk st;
1777 |             Buffer.add_char buf c;
1778 |             loop ~seen_link acc st
1779 |         | Some _ | None ->
1780 |             Buffer.add_char buf c;
1781 |             loop ~seen_link acc st)
1782 |     | '!' as c -> (
1783 |         junk st;
1784 |         match peek st with
1785 |         | Some '[' -> reference_link ~seen_link Img (text acc) st
1786 |         | Some _ | None ->
1787 |             Buffer.add_char buf c;
1788 |             loop ~seen_link acc st)
1789 |     | '&' ->
1790 |         entity buf st;
1791 |         loop ~seen_link acc st
1792 |     | ']' ->
1793 |         junk st;
1794 |         let acc = text acc in
1795 |         let rec aux ~seen_link xs = function
1796 |           | Pre.Left_bracket Url :: acc' when seen_link ->
1797 |               Buffer.add_char buf ']';
1798 |               let acc' = List.rev_append (Pre.R (Text ([], "[")) :: xs) acc' in
1799 |               loop ~seen_link acc' st
1800 |           | Left_bracket k :: acc' -> (
1801 |               match peek st with
1802 |               | Some '(' -> (
1803 |                   match protect inline_link st with
1804 |                   | destination, title ->
1805 |                       let attr = inline_attribute_string st in
1806 |                       let r =
1807 |                         let label = Pre.parse_emph xs in
1808 |                         let def = { label; destination; title } in
1809 |                         match k with
1810 |                         | Img -> Image (attr, def)
1811 |                         | Url -> Link (attr, def)
1812 |                       in
1813 |                       loop ~seen_link (Pre.R r :: acc') st
1814 |                   | exception Fail ->
1815 |                       Buffer.add_char buf ']';
1816 |                       loop ~seen_link acc st)
1817 |               | Some '[' -> (
1818 |                   let label = Pre.parse_emph xs in
1819 |                   let off1 = pos st in
1820 |                   match link_label false st with
1821 |                   | lab -> (
1822 |                       let s = normalize lab in
1823 |                       match
1824 |                         List.find_opt
1825 |                           (fun ({ label; _ } : attributes link_def) ->
1826 |                             label = s)
1827 |                           defs
1828 |                       with
1829 |                       | Some
1830 |                           { label = _; destination; title; attributes = attr }
1831 |                         ->
1832 |                           let def = { label; destination; title } in
1833 |                           let r =
1834 |                             match k with
1835 |                             | Img -> Image (attr, def)
1836 |                             | Url -> Link (attr, def)
1837 |                           in
1838 |                           loop ~seen_link (Pre.R r :: acc') st
1839 |                       | None ->
1840 |                           if k = Img then Buffer.add_char buf '!';
1841 |                           Buffer.add_char buf '[';
1842 |                           let acc = Pre.R label :: text acc' in
1843 |                           Buffer.add_char buf ']';
1844 |                           set_pos st off1;
1845 |                           loop ~seen_link acc st)
1846 |                   | exception Fail ->
1847 |                       if k = Img then Buffer.add_char buf '!';
1848 |                       Buffer.add_char buf '[';
1849 |                       let acc = Pre.R label :: text acc in
1850 |                       Buffer.add_char buf ']';
1851 |                       set_pos st off1;
1852 |                       loop ~seen_link acc st)
1853 |               | Some _ | None ->
1854 |                   Buffer.add_char buf ']';
1855 |                   loop ~seen_link acc st)
1856 |           | (Pre.R (Link _) as x) :: acc' -> aux ~seen_link:true (x :: xs) acc'
1857 |           | x :: acc' -> aux ~seen_link (x :: xs) acc'
1858 |           | [] ->
1859 |               Buffer.add_char buf ']';
1860 |               loop ~seen_link acc st
1861 |         in
1862 |         aux ~seen_link [] acc
1863 |     | '[' -> reference_link ~seen_link Url acc st
1864 |     | ('*' | '_') as c ->
1865 |         let pre = peek_before ' ' st in
1866 |         let f post n st =
1867 |           let pre = pre |> Pre.classify_delim in
1868 |           let post = post |> Pre.classify_delim in
1869 |           let e = if c = '*' then Pre.Star else Pre.Underscore in
1870 |           loop ~seen_link (Pre.Emph (pre, post, e, n) :: text acc) st
1871 |         in
1872 |         let rec aux n =
1873 |           match peek st with
1874 |           | Some c1 when c1 = c ->
1875 |               junk st;
1876 |               aux (succ n)
1877 |           | Some c1 -> f c1 n st
1878 |           | None -> f ' ' n st
1879 |         in
1880 |         aux 0
1881 |     | _ as c ->
1882 |         junk st;
1883 |         Buffer.add_char buf c;
1884 |         loop ~seen_link acc st
1885 |     | exception Fail -> Pre.parse_emph (List.rev (text acc))
1886 |   in
1887 |   loop ~seen_link:false [] st
1888 | 
1889 | let sp3 st =
1890 |   match peek_exn st with
1891 |   | ' ' -> (
1892 |       junk st;
1893 |       match peek_exn st with
1894 |       | ' ' -> (
1895 |           junk st;
1896 |           match peek_exn st with
1897 |           | ' ' ->
1898 |               junk st;
1899 |               3
1900 |           | _ -> 2
1901 |           | exception Fail -> 2)
1902 |       | _ -> 1
1903 |       | exception Fail -> 1)
1904 |   | _ -> 0
1905 |   | exception Fail -> 0
1906 | 
1907 | let link_reference_definition st : attributes link_def =
1908 |   (* TODO remove duplicated ws/ws1 functions? *)
1909 |   let ws st =
1910 |     let rec loop seen_nl =
1911 |       match peek st with
1912 |       | Some w when is_whitespace w ->
1913 |           junk st;
1914 |           loop seen_nl
1915 |       | Some '\n' when not seen_nl ->
1916 |           junk st;
1917 |           loop true
1918 |       | Some _ | None -> ()
1919 |     in
1920 |     loop false
1921 |   in
1922 |   let ws1 st =
1923 |     match next st with w when is_whitespace w -> ws st | _ -> raise Fail
1924 |   in
1925 |   ignore (sp3 st);
1926 |   let label = link_label false st in
1927 |   if next st <> ':' then raise Fail;
1928 |   ws st;
1929 |   let destination = link_destination st in
1930 |   let attributes = inline_attribute_string st in
1931 |   match protect (ws1 >>> link_title <<< sp <<< eol) st with
1932 |   | title -> { label; destination; title = Some title; attributes }
1933 |   | exception Fail ->
1934 |       (sp >>> eol) st;
1935 |       { label; destination; title = None; attributes }
1936 | 
1937 | let link_reference_definitions st =
1938 |   let rec loop acc =
1939 |     match protect link_reference_definition st with
1940 |     | def -> loop (def :: acc)
1941 |     | exception Fail -> (acc, pos st)
1942 |   in
1943 |   loop []
1944 | 


--------------------------------------------------------------------------------
/src/sexp.ml:
--------------------------------------------------------------------------------
 1 | open Ast.Impl
 2 | 
 3 | type t =
 4 |   | Atom of string
 5 |   | List of t list
 6 | 
 7 | let atom s = Atom s
 8 | 
 9 | let rec link { label; destination; title; _ } =
10 |   let title = match title with Some title -> [ Atom title ] | None -> [] in
11 |   List (Atom "link" :: inline label :: Atom destination :: title)
12 | 
13 | and inline = function
14 |   | Concat (_, xs) -> List (Atom "concat" :: List.map inline xs)
15 |   | Text (_, s) -> Atom s
16 |   | Emph (_, il) -> List [ Atom "emph"; inline il ]
17 |   | Strong (_, il) -> List [ Atom "strong"; inline il ]
18 |   | Code _ -> Atom "code" (* FIXME: this seems broken? *)
19 |   | Hard_break _ -> Atom "hard-break"
20 |   | Soft_break _ -> Atom "soft-break"
21 |   | Link (_, def) -> List [ Atom "url"; link def ]
22 |   | Html (_, s) -> List [ Atom "html"; Atom s ]
23 |   | Image _ -> Atom "img"
24 | 
25 | let table_header (header, alignment) =
26 |   List
27 |     [ inline header
28 |     ; (match alignment with
29 |       | Default -> Atom "default"
30 |       | Left -> Atom "left"
31 |       | Centre -> Atom "centre"
32 |       | Right -> Atom "right")
33 |     ]
34 | 
35 | let rec block = function
36 |   | Paragraph (_, x) -> List [ Atom "paragraph"; inline x ]
37 |   | List (_, _, _, bls) ->
38 |       List
39 |         (Atom "list"
40 |         :: List.map (fun xs -> List (Atom "list-item" :: List.map block xs)) bls
41 |         )
42 |   | Blockquote (_, xs) -> List (Atom "blockquote" :: List.map block xs)
43 |   | Thematic_break _ -> Atom "thematic-break"
44 |   | Heading (_, level, text) ->
45 |       List [ Atom "heading"; Atom (string_of_int level); inline text ]
46 |   | Code_block (_, info, _) -> List [ Atom "code-block"; Atom info ]
47 |   | Html_block (_, s) -> List [ Atom "html"; Atom s ]
48 |   | Definition_list (_, l) ->
49 |       List
50 |         [ Atom "def-list"
51 |         ; List
52 |             (List.map
53 |                (fun elt ->
54 |                  List [ inline elt.term; List (List.map inline elt.defs) ])
55 |                l)
56 |         ]
57 |   | Table (_, headers, rows) ->
58 |       List
59 |         [ Atom "table"
60 |         ; List (List.map table_header headers)
61 |         ; List (List.map (fun row -> List (List.map inline row)) rows)
62 |         ]
63 | 
64 | let create ast = List (List.map block ast)
65 | 
66 | let needs_quotes s =
67 |   let rec loop i =
68 |     if i >= String.length s then false
69 |     else
70 |       match s.[i] with
71 |       | ' ' | '\t' | '\x00' .. '\x1F' | '\x7F' .. '\x9F' -> true
72 |       | _ -> loop (succ i)
73 |   in
74 |   loop 0
75 | 
76 | let rec print ppf = function
77 |   | Atom s when needs_quotes s -> Format.fprintf ppf "%S" s
78 |   | Atom s -> Format.pp_print_string ppf s
79 |   | List l ->
80 |       Format.fprintf
81 |         ppf
82 |         "@[<1>(%a)@]"
83 |         (Format.pp_print_list ~pp_sep:Format.pp_print_space print)
84 |         l
85 | 


--------------------------------------------------------------------------------
/src/strSlice.ml:
--------------------------------------------------------------------------------
  1 | type t =
  2 |   { base : string
  3 |   ; off : int
  4 |   ; len : int
  5 |   }
  6 | 
  7 | let of_string ?(off = 0) base = { base; off; len = String.length base - off }
  8 | let to_string { base; off; len } = String.sub base off len
  9 | let print ppf s = Format.fprintf ppf "%S" (to_string s)
 10 | let get_offset { off; _ } = off
 11 | let length { len; _ } = len
 12 | let is_empty s = length s = 0
 13 | 
 14 | let offset n { base; off; len } =
 15 |   if n < 0 then invalid_arg "offset";
 16 |   let rec loop n base off len =
 17 |     if n = 0 || len = 0 then { base; off; len }
 18 |     else
 19 |       match base.[off] with
 20 |       | '\t' ->
 21 |           let ts = ((off + 4) / 4 * 4) - off in
 22 |           let b = Buffer.create len in
 23 |           Buffer.add_substring b base 0 off;
 24 |           for _ = 1 to ts do
 25 |             Buffer.add_char b ' '
 26 |           done;
 27 |           Buffer.add_substring b base (off + 1) (len - 1);
 28 |           loop n (Buffer.contents b) off (len + ts - 1)
 29 |       | _ -> loop (n - 1) base (off + 1) (len - 1)
 30 |   in
 31 |   loop n base off len
 32 | 
 33 | let lexbuf s = Lexing.from_string (to_string s)
 34 | 
 35 | let contains s1 { base; off; len } =
 36 |   let rec loop off =
 37 |     if off + String.length s1 > len then false
 38 |     else s1 = String.sub base off (String.length s1) || loop (off + 1)
 39 |   in
 40 |   loop off
 41 | 
 42 | let head = function
 43 |   | { len = 0; _ } -> None
 44 |   | { base; off; _ } -> Some base.[off]
 45 | 
 46 | let last = function
 47 |   | { len = 0; _ } -> None
 48 |   | { base; off; len } -> Some base.[off + len - 1]
 49 | 
 50 | let tail = function
 51 |   | { len = 0; _ } as s -> s
 52 |   | { base; off; len } -> { base; off = succ off; len = pred len }
 53 | 
 54 | let uncons s = head s |> Option.map (fun hd -> (hd, tail s))
 55 | 
 56 | let take n s =
 57 |   if n < 0 then invalid_arg "take";
 58 |   let rec loop n s =
 59 |     if n = 0 || length s = 0 then []
 60 |     else match head s with Some c -> c :: loop (pred n) (tail s) | None -> []
 61 |   in
 62 |   loop n s
 63 | 
 64 | let take_prefix n s =
 65 |   if n < 0 then invalid_arg "take_prefix";
 66 |   let len = min n s.len in
 67 |   { s with len }
 68 | 
 69 | let drop n s =
 70 |   if n < 0 then invalid_arg "drop";
 71 |   (* len should not be reduced below 0, as strings cannot have a negative length *)
 72 |   let len = max (s.len - n) 0 in
 73 |   (* off should not exceed the length of the base string *)
 74 |   let off = min (s.off + n) (String.length s.base) in
 75 |   { s with off; len }
 76 | 
 77 | let drop_last = function
 78 |   | { len = 0; _ } as s -> s
 79 |   | { base; off; len } -> { base; off; len = pred len }
 80 | 
 81 | let rec drop_while f s =
 82 |   match uncons s with Some (x, s') when f x -> drop_while f s' | _ -> s
 83 | 
 84 | let rec drop_last_while f s =
 85 |   match last s with
 86 |   | Some l when f l -> drop_last_while f (drop_last s)
 87 |   | _ -> s
 88 | 
 89 | let index f s =
 90 |   let len = length s in
 91 |   let rest = drop_while (fun c -> not (f c)) s in
 92 |   let idx = len - length rest in
 93 |   if idx = len then None else Some idx
 94 | 
 95 | (* Uncomment to test *)
 96 | (* TODO: rig up method to unit test our utilities *)
 97 | (* let () = *)
 98 | (*   let index c = index (Char.equal c) in *)
 99 | (*   let s = of_string "abcd" in *)
100 | (*   assert (index 'a' s = Some 0); *)
101 | (*   assert (index 'b' s = Some 1); *)
102 | (*   assert (index 'c' s = Some 2); *)
103 | (*   assert (index 'z' s = None) *)
104 | 
105 | let split_at f s =
106 |   match index f s with
107 |   | None -> (s, offset (length s) s)
108 |   | Some idx -> ({ s with len = idx }, offset idx s)
109 | 
110 | (* Uncomment to test *)
111 | (* TODO: rig up method to unit test our utilities *)
112 | (* let () = *)
113 | (*   let f x = x = 'c' in *)
114 | (*   let before, rest = split_at f (of_string "abcdef") in *)
115 | (*   assert ("ab" = to_string before); *)
116 | (*   assert ("cdef" = to_string rest); *)
117 | (*   let before, rest = split_at f (of_string "cab") in *)
118 | (*   assert ("" = to_string before); *)
119 | (*   assert ("cab" = to_string rest); *)
120 | (*   let before, rest = split_at f (of_string "aaa") in *)
121 | (*   assert ("aaa" = to_string before); *)
122 | (*   assert ("" = to_string rest) *)
123 | 
124 | let index_unescaped sep s =
125 |   let rec loop idx state =
126 |     if idx = s.off + s.len then None
127 |       (* If we get here and we're inside a verbatim span, what to do? *)
128 |     else
129 |       match (state, s.base.[idx]) with
130 |       | `normal, '\\' -> loop (idx + 1) `escape
131 |       | `normal, '`' -> loop (idx + 1) (`verbatim_open 1)
132 |       | `normal, c when c = sep -> Some (idx - s.off)
133 |       | `normal, _ -> loop (idx + 1) `normal
134 |       | `escape, _ -> loop (idx + 1) `normal
135 |       | `verbatim_open n, '`' -> loop (idx + 1) (`verbatim_open (n + 1))
136 |       | `verbatim_open n, _ -> loop (idx + 1) (`within_verbatim n)
137 |       | `within_verbatim 1, '`' -> loop (idx + 1) `normal
138 |       | `within_verbatim n, '`' -> loop (idx + 1) (`verbatim_close (n, n - 1))
139 |       | `within_verbatim n, _ -> loop (idx + 1) (`within_verbatim n)
140 |       | `verbatim_close (_, 1), '`' -> loop (idx + 1) `normal
141 |       | `verbatim_close (n, k), '`' ->
142 |           loop (idx + 1) (`verbatim_close (n, k - 1))
143 |       | `verbatim_close (n, _), _ -> loop (idx + 1) (`within_verbatim n)
144 |   in
145 |   loop s.off `normal
146 | 
147 | let exists f s =
148 |   let rec loop s i =
149 |     if i >= s.len then false
150 |     else if f s.base.[s.off + i] then true
151 |     else loop s (succ i)
152 |   in
153 |   loop s 0
154 | 
155 | let for_all f s = not (exists (fun c -> not (f c)) s)
156 | 
157 | let sub ~len s =
158 |   if len > s.len then invalid_arg "sub";
159 |   { s with len }
160 | 
161 | let fold_left f init s =
162 |   let rec aux acc rest =
163 |     match uncons rest with None -> acc | Some (x, xs) -> aux (f x acc) xs
164 |   in
165 |   aux init s
166 | 
167 | (* let () = *)
168 | (*   let s = of_string "abcde" in *)
169 | (*   assert (fold_left (fun _ n -> n + 1) 0 s = 5); *)
170 | (*   assert (fold_left (fun c s -> String.make 2 c ^ s) "" s = "eeddccbbaa") *)
171 | 
172 | let trim s =
173 |   let is_whitespace = function
174 |     | ' ' | '\t' | '\010' .. '\013' -> true
175 |     | _ -> false
176 |   in
177 |   drop_while is_whitespace (drop_last_while is_whitespace s)
178 | 


--------------------------------------------------------------------------------
/src/strSlice.mli:
--------------------------------------------------------------------------------
 1 | (* Implementation of string slices over a base string via an offset *)
 2 | 
 3 | type t
 4 | 
 5 | val of_string : ?off:int -> string -> t
 6 | val to_string : t -> string
 7 | val offset : int -> t -> t
 8 | val lexbuf : t -> Lexing.lexbuf
 9 | val contains : string -> t -> bool
10 | val length : t -> int
11 | 
12 | val index : (char -> bool) -> t -> int option
13 | (** [index c s] is [Some i] where [i] is the index of the character in [s] for
14 |     which [f] is first true, or [None] if [f] holds for no characters in [s]. *)
15 | 
16 | val index_unescaped : char -> t -> int option
17 | (** [index_unescaped c s] is [Some i] where [i] is index of the first
18 |     occurrence of the character [c] in [s] that is not preceeded by a
19 |     backslash ['\\'] and not within a verbatim inline, or [None] if
20 |     there is no such [c] in [s]. *)
21 | 
22 | val print : Format.formatter -> t -> unit
23 | val head : t -> char option
24 | val tail : t -> t
25 | 
26 | val uncons : t -> (char * t) option
27 | (** [uncons s] is [Some (h, t)] where [h] is [head s] and [t] is [tail s],
28 |     or [None] if [is_empty s] *)
29 | 
30 | val last : t -> char option
31 | (** [last s] is the [Some c] if [c] is the last character of [s], or else [None] if [s] is empty *)
32 | 
33 | val drop_last : t -> t
34 | (** [drop_last s] is the [s] without its last character *)
35 | 
36 | val take : int -> t -> char list
37 | (** [take n s] is a list of the first [n] characters of [s] *)
38 | 
39 | val take_prefix : int -> t -> t
40 | (** [take_prefix n s] is the slice consisting of the first [n]
41 |     characters of [s]. *)
42 | 
43 | val drop : int -> t -> t
44 | (** [drop n s] is [s] with the first [n] characters dropped *)
45 | 
46 | val drop_while : (char -> bool) -> t -> t
47 | (** [drop_while f s] is [s] with the longest prefix for which [f] is true for
48 |     every character dropped *)
49 | 
50 | val drop_last_while : (char -> bool) -> t -> t
51 | (** [drop_last_while f s] is [s] with the longest suffix for which [f] is true for
52 |     every character dropped *)
53 | 
54 | val split_at : (char -> bool) -> t -> t * t
55 | (** [split_at f s] is [(taken, rest)] where [taken] is the prefix of [s] for
56 |     which [f] is [false] and [rest] is remainder, including the first character
57 |     for which [f] is [true].
58 | 
59 |     E.g.,
60 | 
61 |     {[
62 |       let () =
63 |         let f x = x = 'c' in
64 |         let before, rest = split_at f (of_string "abcdef") in
65 |         assert ("ab" = to_string before);
66 |         assert ("cdef" = to_string rest);
67 |         let before, rest = split_at f (of_string "cab") in
68 |         assert ("" = to_string before);
69 |         assert ("cab" = to_string rest);
70 |         let before, rest = split_at f (of_string "aaa") in
71 |         assert ("aaa" = to_string before);
72 |         assert ("" = to_string rest)
73 |     ]}
74 | *)
75 | 
76 | val fold_left : (char -> 'a -> 'a) -> 'a -> t -> 'a
77 | val for_all : (char -> bool) -> t -> bool
78 | val exists : (char -> bool) -> t -> bool
79 | val is_empty : t -> bool
80 | val get_offset : t -> int
81 | val sub : len:int -> t -> t
82 | 
83 | val trim : t -> t
84 | (** [trim s] returns the slice that skips any whitespace at the start
85 |     or the end of [s]. *)
86 | 


--------------------------------------------------------------------------------
/src/toc.ml:
--------------------------------------------------------------------------------
  1 | open Ast.Util
  2 | 
  3 | let rec remove_links inline =
  4 |   match inline with
  5 |   | Concat (attr, inlines) -> Concat (attr, List.map remove_links inlines)
  6 |   | Emph (attr, inline) -> Emph (attr, remove_links inline)
  7 |   | Strong (attr, inline) -> Emph (attr, remove_links inline)
  8 |   | Link (_, link) -> link.label
  9 |   | Image (attr, link) ->
 10 |       Image (attr, { link with label = remove_links link.label })
 11 |   | Hard_break _ | Soft_break _ | Html _ | Code _ | Text _ -> inline
 12 | 
 13 | let headers =
 14 |   let remove_links_f = remove_links in
 15 |   fun ?(remove_links = false) doc ->
 16 |     let headers = ref [] in
 17 |     let rec loop blocks =
 18 |       List.iter
 19 |         (function
 20 |           | Heading (attr, level, inline) ->
 21 |               let inline =
 22 |                 if remove_links then remove_links_f inline else inline
 23 |               in
 24 |               headers := (attr, level, inline) :: !headers
 25 |           | Blockquote (_, blocks) -> loop blocks
 26 |           | List (_, _, _, block_lists) -> List.iter loop block_lists
 27 |           | Paragraph _ | Thematic_break _ | Html_block _ | Definition_list _
 28 |           | Code_block _ | Table _ ->
 29 |               ())
 30 |         blocks
 31 |     in
 32 |     loop doc;
 33 |     List.rev !headers
 34 | 
 35 | (* Given a list of headers — in the order of the document — go to the
 36 |    requested subsection.  We first seek for the [number]th header at
 37 |    [level]. *)
 38 | let rec find_start headers level number subsections =
 39 |   match headers with
 40 |   | (_, header_level, _) :: tl when header_level > level ->
 41 |       (* Skip, right [level]-header not yet reached. *)
 42 |       if number = 0 then
 43 |         (* Assume empty section at [level], do not consume token. *)
 44 |         match subsections with
 45 |         | [] -> headers (* no subsection to find *)
 46 |         | n :: subsections -> find_start headers (level + 1) n subsections
 47 |       else find_start tl level number subsections
 48 |   | (_, header_level, _) :: tl when header_level = level ->
 49 |       (* At proper [level].  Have we reached the [number] one? *)
 50 |       if number <= 1 then
 51 |         match subsections with
 52 |         | [] -> tl (* no subsection to find *)
 53 |         | n :: subsections -> find_start tl (level + 1) n subsections
 54 |       else find_start tl level (number - 1) subsections
 55 |   | _ ->
 56 |       (* Sought [level] has not been found in the current section *)
 57 |       []
 58 | 
 59 | let unordered_list items = List ([], Bullet '*', Tight, items)
 60 | 
 61 | let find_id attributes =
 62 |   List.find_map
 63 |     (function k, v when String.equal "id" k -> Some v | _ -> None)
 64 |     attributes
 65 | 
 66 | let link attributes label =
 67 |   let inline =
 68 |     match find_id attributes with
 69 |     | None -> label
 70 |     | Some id -> Link ([], { label; destination = "#" ^ id; title = None })
 71 |   in
 72 |   Paragraph ([], inline)
 73 | 
 74 | let rec make_toc
 75 |     (headers : ('attr * int * 'a inline) list)
 76 |     ~min_level
 77 |     ~max_level =
 78 |   match headers with
 79 |   | _ when min_level > max_level -> ([], headers)
 80 |   | [] -> ([], [])
 81 |   | (_, level, _) :: _ when level < min_level -> ([], headers)
 82 |   | (_, level, _) :: tl when level > max_level ->
 83 |       make_toc tl ~min_level ~max_level
 84 |   | (attr, level, t) :: tl when level = min_level ->
 85 |       let sub_toc, tl = make_toc tl ~min_level:(min_level + 1) ~max_level in
 86 |       let toc_entry =
 87 |         match sub_toc with
 88 |         | [] -> [ link attr t ]
 89 |         | _ -> [ link attr t; unordered_list sub_toc ]
 90 |       in
 91 |       let toc, tl = make_toc tl ~min_level ~max_level in
 92 |       (toc_entry :: toc, tl)
 93 |   | _ ->
 94 |       let sub_toc, tl =
 95 |         make_toc headers ~min_level:(min_level + 1) ~max_level
 96 |       in
 97 |       let toc, tl = make_toc tl ~min_level ~max_level in
 98 |       ([ unordered_list sub_toc ] :: toc, tl)
 99 | 
100 | let toc ?(start = []) ?(depth = 2) doc =
101 |   if depth < 1 then invalid_arg "Omd.toc: ~depth must be >= 1";
102 |   let headers = headers ~remove_links:true doc in
103 |   let headers =
104 |     match start with
105 |     | [] -> headers
106 |     | number :: _ when number < 0 ->
107 |         invalid_arg "Omd.toc: level 1 start must be >= 0"
108 |     | number :: subsections -> find_start headers 1 number subsections
109 |   in
110 |   let len = List.length start in
111 |   let toc, _ = make_toc headers ~min_level:(len + 1) ~max_level:(len + depth) in
112 |   match toc with [] -> [] | _ -> [ unordered_list toc ]
113 | 


--------------------------------------------------------------------------------
/tests/John_MacFarlane_said_peg-markdown_takes_forever_to_process_this--jgm_peg-markdown_issues_28.md:
--------------------------------------------------------------------------------
1 | ***************************************[[[[[[[[[[[[[[[[[[[[-----------------]]]]]]]]]]]]]]]]]***************************
2 | 
3 | [[[[[[[[[[[[[[[[[[[[-----------------]]]]]]]]]]]]]]]]]: ::
4 | 


--------------------------------------------------------------------------------
/tests/attributes.md:
--------------------------------------------------------------------------------
  1 | ## Tests for attributes
  2 | 
  3 | Headings with classes:
  4 | 
  5 | ```````````````````````````````` example
  6 | Heading classes attributes testing
  7 | 
  8 | # Heading 1 {.class1}
  9 | 
 10 | # Heading 2 # {.class2}
 11 | 
 12 | # Heading 3 {.class3a .class3b}
 13 | .
 14 | <p>Heading classes attributes testing</p>
 15 | <h1 class="class1">Heading 1</h1>
 16 | <h1 class="class2">Heading 2</h1>
 17 | <h1 class="class3a class3b">Heading 3</h1>
 18 | ````````````````````````````````
 19 | 
 20 | Headings with attributes:
 21 | 
 22 | ```````````````````````````````` example
 23 | Heading data attributes testing
 24 | 
 25 | # Heading 1 {data1=value1}
 26 | 
 27 | # Heading 2 # {data2=value2}
 28 | 
 29 | # Heading 3 {data1=value3a data2=value3b}
 30 | .
 31 | <p>Heading data attributes testing</p>
 32 | <h1 data1="value1">Heading 1</h1>
 33 | <h1 data2="value2">Heading 2</h1>
 34 | <h1 data1="value3a" data2="value3b">Heading 3</h1>
 35 | ````````````````````````````````
 36 | 
 37 | Headings with ids:
 38 | 
 39 | ```````````````````````````````` example
 40 | Heading id attributes testing
 41 | 
 42 | # Heading 1 {#id1}
 43 | 
 44 | # Heading 2 # {#id2}
 45 | 
 46 | # Heading 3 {#id3a #id3b}
 47 | .
 48 | <p>Heading id attributes testing</p>
 49 | <h1 id="id1">Heading 1</h1>
 50 | <h1 id="id2">Heading 2</h1>
 51 | <h1 id="id3b">Heading 3</h1>
 52 | ````````````````````````````````
 53 | 
 54 | Inline code with classes:
 55 | 
 56 | ```````````````````````````````` example
 57 | Code blocks class attributes testing
 58 | 
 59 | `Code block 1`{.class1}
 60 | 
 61 | `Code block 2`{.class2a .class2b}
 62 | .
 63 | <p>Code blocks class attributes testing</p>
 64 | <p><code class="class1">Code block 1</code></p>
 65 | <p><code class="class2a class2b">Code block 2</code></p>
 66 | ````````````````````````````````
 67 | 
 68 | Inline code with attributes:
 69 | 
 70 | ```````````````````````````````` example
 71 | Code blocks data attributes testing
 72 | 
 73 | `Code block 1`{data1=value1}
 74 | 
 75 | `Code block 2`{data1=value2a data2=value2b}
 76 | .
 77 | <p>Code blocks data attributes testing</p>
 78 | <p><code data1="value1">Code block 1</code></p>
 79 | <p><code data1="value2a" data2="value2b">Code block 2</code></p>
 80 | ````````````````````````````````
 81 | 
 82 | Inline code with ids:
 83 | 
 84 | ```````````````````````````````` example
 85 | Code blocks id attributes testing
 86 | 
 87 | `Code block 1`{#id1}
 88 | 
 89 | `Code block 2`{#id2a #id2b}
 90 | .
 91 | <p>Code blocks id attributes testing</p>
 92 | <p><code id="id1">Code block 1</code></p>
 93 | <p><code id="id2b">Code block 2</code></p>
 94 | ````````````````````````````````
 95 | 
 96 | Code blocks with classes:
 97 | 
 98 | ```````````````````````````````` example
 99 | Code blocks class attributes testing
100 | 
101 | ``` {.class1}
102 | Code block 1
103 | ```
104 | 
105 | ``` {.class2.a .class2.b}
106 | Code block 2
107 | ```
108 | .
109 | <p>Code blocks class attributes testing</p>
110 | <pre class="class1"><code>Code block 1
111 | </code></pre>
112 | <pre class="class2.a class2.b"><code>Code block 2
113 | </code></pre>
114 | ````````````````````````````````
115 | 
116 | Code blocks with attributes:
117 | 
118 | ```````````````````````````````` example
119 | Code blocks data attributes testing
120 | 
121 | ``` {data1=value1}
122 | Code block 1
123 | ```
124 | 
125 | ``` {data1=value2.a data2=value2.b}
126 | Code block 2
127 | ```
128 | .
129 | <p>Code blocks data attributes testing</p>
130 | <pre data1="value1"><code>Code block 1
131 | </code></pre>
132 | <pre data1="value2.a" data2="value2.b"><code>Code block 2
133 | </code></pre>
134 | ````````````````````````````````
135 | 
136 | Code blocks with ids:
137 | 
138 | ```````````````````````````````` example
139 | Code blocks id attributes testing
140 | 
141 | ``` {#id1}
142 | Code block 1
143 | ```
144 | 
145 | ``` {#id2.a #id2.b}
146 | Code block 2
147 | ```
148 | .
149 | <p>Code blocks id attributes testing</p>
150 | <pre id="id1"><code>Code block 1
151 | </code></pre>
152 | <pre id="id2.b"><code>Code block 2
153 | </code></pre>
154 | ````````````````````````````````
155 | 
156 | Links with classes:
157 | 
158 | ```````````````````````````````` example
159 | Link class attributes testing
160 | 
161 | [Link 1](url_1){.class1}
162 | 
163 | [Link 2](url_2){.class2a .class2b}
164 | 
165 | ![Link 1](url_1){.class1}
166 | 
167 | ![Link 2](url_2){.class2a .class2b}
168 | .
169 | <p>Link class attributes testing</p>
170 | <p><a href="url_1" class="class1">Link 1</a></p>
171 | <p><a href="url_2" class="class2a class2b">Link 2</a></p>
172 | <p><img src="url_1" alt="Link 1" class="class1" /></p>
173 | <p><img src="url_2" alt="Link 2" class="class2a class2b" /></p>
174 | ````````````````````````````````
175 | 
176 | Links with attributes:
177 | 
178 | ```````````````````````````````` example
179 | Link data attributes testing
180 | 
181 | [Link 1](url_1){data1=value1}
182 | 
183 | [Link 2](url_2){data1=value2a data2=value2b}
184 | 
185 | ![Link 1](url_1){data1=value1}
186 | 
187 | ![Link 2](url_2){data1=value2a data2=value2b}
188 | .
189 | <p>Link data attributes testing</p>
190 | <p><a href="url_1" data1="value1">Link 1</a></p>
191 | <p><a href="url_2" data1="value2a" data2="value2b">Link 2</a></p>
192 | <p><img src="url_1" alt="Link 1" data1="value1" /></p>
193 | <p><img src="url_2" alt="Link 2" data1="value2a" data2="value2b" /></p>
194 | ````````````````````````````````
195 | 
196 | Links with ids:
197 | 
198 | ```````````````````````````````` example
199 | Link id attributes testing
200 | 
201 | [Link 1](url_1){#id1}
202 | 
203 | [Link 2](url_2){#id2a #id2b}
204 | 
205 | ![Link 1](url_1){#id1}
206 | 
207 | ![Link 2](url_2){#id2a #id2b}
208 | .
209 | <p>Link id attributes testing</p>
210 | <p><a href="url_1" id="id1">Link 1</a></p>
211 | <p><a href="url_2" id="id2b">Link 2</a></p>
212 | <p><img src="url_1" alt="Link 1" id="id1" /></p>
213 | <p><img src="url_2" alt="Link 2" id="id2b" /></p>
214 | ````````````````````````````````
215 | 
216 | Refs with classes:
217 | 
218 | ```````````````````````````````` example
219 | Ref class attributes testing
220 | 
221 | [Ref 1][ref_1]
222 | 
223 | [Ref 2][ref_2]
224 | 
225 | ![Ref 3][ref_3]
226 | 
227 | ![Ref 4][ref_4]
228 | 
229 | [ref_1]: ref_1 {.class1}
230 | [ref_2]: ref_2 {.class2a .class2b}
231 | [ref_3]: ref_3 {.class1}
232 | [ref_4]: ref_4 {.class2a .class2b}
233 | .
234 | <p>Ref class attributes testing</p>
235 | <p><a href="ref_1" class="class1">Ref 1</a></p>
236 | <p><a href="ref_2" class="class2a class2b">Ref 2</a></p>
237 | <p><img src="ref_3" alt="Ref 3" class="class1" /></p>
238 | <p><img src="ref_4" alt="Ref 4" class="class2a class2b" /></p>
239 | ````````````````````````````````
240 | 
241 | Refs with attributes:
242 | 
243 | ```````````````````````````````` example
244 | Ref data attributes testing
245 | 
246 | [Ref 1][ref_1]
247 | 
248 | [Ref 2][ref_2]
249 | 
250 | ![Ref 3][ref_3]
251 | 
252 | ![Ref 4][ref_4]
253 | 
254 | [ref_1]: ref_1 {data1=value1}
255 | [ref_2]: ref_2 {data1=value2a data2=value2b}
256 | [ref_3]: ref_3 {data1=value1}
257 | [ref_4]: ref_4 {data1=value2a data2=value2b}
258 | .
259 | <p>Ref data attributes testing</p>
260 | <p><a href="ref_1" data1="value1">Ref 1</a></p>
261 | <p><a href="ref_2" data1="value2a" data2="value2b">Ref 2</a></p>
262 | <p><img src="ref_3" alt="Ref 3" data1="value1" /></p>
263 | <p><img src="ref_4" alt="Ref 4" data1="value2a" data2="value2b" /></p>
264 | ````````````````````````````````
265 | 
266 | Refs with ids:
267 | 
268 | ```````````````````````````````` example
269 | Ref id attributes testing
270 | 
271 | [Ref 1][ref_1]
272 | 
273 | [Ref 2][ref_2]
274 | 
275 | ![Ref 3][ref_3]
276 | 
277 | ![Ref 4][ref_4]
278 | 
279 | [ref_1]: ref_1 {#id1}
280 | [ref_2]: ref_2 {#id2a #id2b}
281 | [ref_3]: ref_3 {#id1}
282 | [ref_4]: ref_4 {#id2a #id2b}
283 | .
284 | <p>Ref id attributes testing</p>
285 | <p><a href="ref_1" id="id1">Ref 1</a></p>
286 | <p><a href="ref_2" id="id2b">Ref 2</a></p>
287 | <p><img src="ref_3" alt="Ref 3" id="id1" /></p>
288 | <p><img src="ref_4" alt="Ref 4" id="id2b" /></p>
289 | ````````````````````````````````
290 | 


--------------------------------------------------------------------------------
/tests/blackbox/dune:
--------------------------------------------------------------------------------
1 | (cram
2 |  (deps %{bin:omd}))
3 | 


--------------------------------------------------------------------------------
/tests/blackbox/emphasis.t:
--------------------------------------------------------------------------------
1 | Extra test not covered by the conformance tests
2 | 
3 |   $ omd << "MD"
4 |   > foo**bar*
5 |   > MD
6 |   <p>foo**bar*</p>
7 | 


--------------------------------------------------------------------------------
/tests/blackbox/heading-id.t:
--------------------------------------------------------------------------------
 1 |   $ omd << "MD"
 2 |   > ### This is an Header Without Id
 3 |   > ### 1 2 Header that starts with 2 numbers
 4 |   > ### Header with an id {#header-id}
 5 |   > ### Maître d'hÔtel 😬
 6 |   > ### 👋👋    ÔHey!       👋👋
 7 |   > ### 👋👋 ÔHey! 👋👋
 8 |   > ### *Dogs*?--in *my* house?
 9 |   > ### [HTML], [S5], or [RTF]?
10 |   > ### 3. Applications
11 |   > ### hello.world
12 |   > ### -hello-
13 |   > ###    with    multiple   spaces   
14 |   > ### with&nbsp;&nbsp;&nbsp;spaces
15 |   > ### 33
16 |   > ### 
17 |   > MD
18 |   <h3 id="this-is-an-header-without-id">This is an Header Without Id</h3>
19 |   <h3 id="header-that-starts-with-2-numbers">1 2 Header that starts with 2 numbers</h3>
20 |   <h3 id="header-id">Header with an id</h3>
21 |   <h3 id="maître-dhôtel">Maître d'hÔtel 😬</h3>
22 |   <h3 id="ôhey">👋👋    ÔHey!       👋👋</h3>
23 |   <h3 id="ôhey-1">👋👋 ÔHey! 👋👋</h3>
24 |   <h3 id="dogs--in-my-house"><em>Dogs</em>?--in <em>my</em> house?</h3>
25 |   <h3 id="html-s5-or-rtf">[HTML], [S5], or [RTF]?</h3>
26 |   <h3 id="applications">3. Applications</h3>
27 |   <h3 id="hello.world">hello.world</h3>
28 |   <h3 id="hello-">-hello-</h3>
29 |   <h3 id="with-multiple-spaces">with    multiple   spaces</h3>
30 |   <h3 id="with-spaces">with   spaces</h3>
31 |   <h3 id="section">33</h3>
32 |   <h3 id="section-1"></h3>
33 | 


--------------------------------------------------------------------------------
/tests/blackbox/normalize-label.t:
--------------------------------------------------------------------------------
 1 | Case insensitive comparison
 2 |   $ omd << "MD"
 3 |   > [ΑΓΩ]: /url
 4 |   > 
 5 |   > [αγω]
 6 |   > MD
 7 |   <p><a href="/url">αγω</a></p>
 8 | 
 9 | Collapse consecutive internal spaces, tabs
10 |   $ omd << "MD"
11 |   > [ΑΓ  	Ω]: /url
12 |   > 
13 |   > [αγ ω]
14 |   > MD
15 |   <p><a href="/url">αγ ω</a></p>
16 | 
17 | Strip leading and trailing spaces, tabs
18 |   $ omd << "MD"
19 |   > [ 	ΑΓΩ  ]: /url
20 |   > 
21 |   > [αγω]
22 |   > MD
23 |   <p><a href="/url">αγω</a></p>
24 | 
25 | Doesn't match due to the internal space
26 |   $ omd << "MD"
27 |   > [ΑΓΩ]: /url
28 |   > 
29 |   > [α γω]
30 |   > MD
31 |   <p>[α γω]</p>
32 | 


--------------------------------------------------------------------------------
/tests/blackbox/regression-224.t:
--------------------------------------------------------------------------------
 1 | Regression test for https://github.com/ocaml/omd/issues/224
 2 | 
 3 |   $ omd << "MD"
 4 |   > hello_world:
 5 |   > ```
 6 |   > is_this_code_
 7 |   > ```
 8 |   > MD
 9 |   <p>hello_world:</p>
10 |   <pre><code>is_this_code_
11 |   </code></pre>
12 | 


--------------------------------------------------------------------------------
/tests/def_list.md:
--------------------------------------------------------------------------------
 1 | ## Definition lists
 2 | 
 3 | ```````````````````````````````` example
 4 | First Term
 5 | : This is the definition of the first term.
 6 | 
 7 | Second Term
 8 | : This is one definition of the second term.
 9 | : This is another definition of the second term.
10 | which is multiline
11 | 
12 | : This is not a correct definition list
13 | .
14 | <dl><dt>First Term</dt>
15 | <dd>This is the definition of the first term.</dd>
16 | <dt>Second Term</dt>
17 | <dd>This is one definition of the second term.</dd>
18 | <dd>This is another definition of the second term.
19 | which is multiline</dd>
20 | </dl>
21 | <p>: This is not a correct definition list</p>
22 | ````````````````````````````````
23 | 


--------------------------------------------------------------------------------
/tests/dune:
--------------------------------------------------------------------------------
 1 | (library
 2 |  (name expect_tests)
 3 |  (modules expect_tests)
 4 |  (inline_tests)
 5 |  (preprocess
 6 |   (pps ppx_expect))
 7 |  (libraries omd))
 8 | 
 9 | (executable
10 |  (name extract_tests)
11 |  (libraries str)
12 |  (modules extract_tests))
13 | 
14 | (rule
15 |  (with-stdout-to
16 |   dune.inc.new
17 |   (run ./extract_tests.exe -write-dune-file %{dep:spec.txt}
18 |     %{dep:gfm_table_spec.md} %{dep:extra_table_tests.md} %{dep:attributes.md}
19 |     %{dep:def_list.md})))
20 | 
21 | (include dune.inc)
22 | 
23 | (executable
24 |  (name omd)
25 |  (libraries str omd)
26 |  (modules omd))
27 | 
28 | (rule
29 |  (alias gen)
30 |  (action
31 |   (diff dune.inc dune.inc.new)))
32 | 


--------------------------------------------------------------------------------
/tests/expect_tests.ml:
--------------------------------------------------------------------------------
  1 | let show x = Omd.to_html x |> print_string
  2 | 
  3 | let%expect_test "construct inline elements" =
  4 |   show
  5 |     Omd.Ctor.
  6 |       [ p
  7 |           [ em "emphasized"
  8 |           ; br
  9 |           ; strong ~attrs:[ ("class", "my-class") ] "strong"
 10 |           ; nl
 11 |           ; code "some code"
 12 |           ; nl
 13 |           ; a "label" ~url:"my/page/url"
 14 |           ; nl
 15 |           ; a "other label" ~url:"my/other/page" ~title:"title text"
 16 |           ; nl
 17 |           ; img "my/img/src" ~alt:"Some alt text"
 18 |           ; nl
 19 |           ; img "my/img/src" ~alt:"Some alt text" ~title:"some title"
 20 |           ; nl
 21 |           ; html "<em>inline html <!-- with a comment! --> here</em>"
 22 |           ]
 23 |       ];
 24 |   [%expect
 25 |     {|
 26 |     <p><em>emphasized</em><br />
 27 |     <strong class="my-class">strong</strong>
 28 |     <code>some code</code>
 29 |     <a href="my/page/url">label</a>
 30 |     <a href="my/other/page" title="title text">other label</a>
 31 |     <img src="my/img/src" alt="Some alt text" />
 32 |     <img src="my/img/src" alt="Some alt text" title="some title" />
 33 |     <em>inline html <!-- with a comment! --> here</em></p> |}]
 34 | 
 35 | let%expect_test "construct headings" =
 36 |   show Omd.Ctor.[ h 1 ~attrs:[ ("class", "my-class") ] [ txt "Heading 1" ] ];
 37 |   [%expect {| <h1 id="heading-1" class="my-class">Heading 1</h1> |}];
 38 |   show Omd.Ctor.[ h 6 [ txt "Heading 6"; em "with emphasis!" ] ];
 39 |   [%expect
 40 |     {| <h6 id="heading-6with-emphasis">Heading 6<em>with emphasis!</em></h6> |}]
 41 | 
 42 | let%expect_test "construct lists" =
 43 |   show
 44 |     Omd.Ctor.
 45 |       [ ul
 46 |           ~spacing:Tight
 47 |           [ [ p [ txt "Item 1" ] ]
 48 |           ; [ p [ txt "Item 2" ] ]
 49 |           ; [ p [ txt "Item 3"; strong "with strength!" ] ]
 50 |           ]
 51 |       ];
 52 |   [%expect
 53 |     {|
 54 |     <ul>
 55 |     <li>Item 1
 56 |     </li>
 57 |     <li>Item 2
 58 |     </li>
 59 |     <li>Item 3<strong>with strength!</strong>
 60 |     </li>
 61 |     </ul> |}];
 62 |   show
 63 |     Omd.Ctor.
 64 |       [ ol
 65 |           [ [ p [ txt "Item 1" ] ]
 66 |           ; [ p [ txt "Item 2" ] ]
 67 |           ; [ p [ txt "Item 3" ] ]
 68 |           ]
 69 |       ];
 70 |   [%expect
 71 |     {|
 72 |     <ol>
 73 |     <li>
 74 |     <p>Item 1</p>
 75 |     </li>
 76 |     <li>
 77 |     <p>Item 2</p>
 78 |     </li>
 79 |     <li>
 80 |     <p>Item 3</p>
 81 |     </li>
 82 |     </ol> |}]
 83 | 
 84 | let%expect_test "construct paragraphs and blockquotes with hrs" =
 85 |   let para =
 86 |     Omd.Ctor.(
 87 |       p
 88 |         ~attrs:[ ("class", "my-para") ]
 89 |         [ txt "Contet of"; em "this"; txt "paragraph" ])
 90 |   in
 91 |   show
 92 |     Omd.Ctor.
 93 |       [ blockquote [ para; hr; p [ txt "Content of second paragraph" ] ] ];
 94 |   [%expect
 95 |     {|
 96 |     <blockquote>
 97 |     <p class="my-para">Contet of<em>this</em>paragraph</p>
 98 |     <hr />
 99 |     <p>Content of second paragraph</p>
100 |     </blockquote> |}]
101 | 
102 | let%expect_test "construct code blocks" =
103 |   show
104 |     Omd.Ctor.
105 |       [ code_bl
106 |           ~attrs:[ ("class", "my-code") ]
107 |           ~lang:"ocaml"
108 |           "let foo = bar + bing"
109 |       ];
110 |   [%expect
111 |     {| <pre class="my-code"><code class="language-ocaml">let foo = bar + bing</code></pre> |}]
112 | 
113 | let%expect_test "construct html blocks" =
114 |   show
115 |     Omd.Ctor.
116 |       [ html_bl "<p><em>Some</em> inline HTML <!-- With a comment --> here</p>"
117 |       ];
118 |   [%expect {| <p><em>Some</em> inline HTML <!-- With a comment --> here</p> |}]
119 | 
120 | let%expect_test "construct definition list" =
121 |   show
122 |     Omd.Ctor.
123 |       [ dl
124 |           [ { term = [ txt "def term 1" ]
125 |             ; defs =
126 |                 [ [ txt "definition 1.1" ]
127 |                 ; [ txt "definition 1.2" ]
128 |                 ; [ txt "definition 1.3" ]
129 |                 ]
130 |             }
131 |           ; { term = [ txt "def term 2" ]
132 |             ; defs =
133 |                 [ [ txt "definition 2.1" ]
134 |                 ; [ txt "definition 2.2" ]
135 |                 ; [ txt "definition 2.3" ]
136 |                 ]
137 |             }
138 |           ]
139 |       ];
140 |   [%expect
141 |     {|
142 |     <dl><dt>def term 1</dt>
143 |     <dd>definition 1.1</dd>
144 |     <dd>definition 1.2</dd>
145 |     <dd>definition 1.3</dd>
146 |     <dt>def term 2</dt>
147 |     <dd>definition 2.1</dd>
148 |     <dd>definition 2.2</dd>
149 |     <dd>definition 2.3</dd>
150 |     </dl> |}]
151 | 


--------------------------------------------------------------------------------
/tests/extra_table_tests.md:
--------------------------------------------------------------------------------
  1 | ## Additional Table Tests
  2 | 
  3 | Complete table
  4 | 
  5 | ```````````````````````````````` example
  6 | | abc | def   | **ghi**   |
  7 | |:----|:-----:|----------:|
  8 | | 1   | 2     | [link][0] |
  9 | | 3   | 4     | `code`    |
 10 | | 5   | `6`   | \| `|`    |
 11 | 
 12 | [0]: https://example.com
 13 | .
 14 | <table>
 15 | <thead>
 16 | <tr>
 17 | <th align="left">abc</th>
 18 | <th align="center">def</th>
 19 | <th align="right"><strong>ghi</strong></th>
 20 | </tr>
 21 | </thead>
 22 | <tbody>
 23 | <tr>
 24 | <td align="left">1</td>
 25 | <td align="center">2</td>
 26 | <td align="right"><a href="https://example.com">link</a></td>
 27 | </tr>
 28 | <tr>
 29 | <td align="left">3</td>
 30 | <td align="center">4</td>
 31 | <td align="right"><code>code</code></td>
 32 | </tr>
 33 | <tr>
 34 | <td align="left">5</td>
 35 | <td align="center"><code>6</code></td>
 36 | <td align="right">| <code>|</code></td>
 37 | </tr>
 38 | </tbody>
 39 | </table>
 40 | ````````````````````````````````
 41 | 
 42 | 
 43 | Not a table (no delimiter)
 44 | 
 45 | ```````````````````````````````` example
 46 | | abc |
 47 | | def |
 48 | .
 49 | <p>| abc |
 50 | | def |</p>
 51 | ````````````````````````````````
 52 | 
 53 | Too few columns in a row gets expanded
 54 | 
 55 | ```````````````````````````````` example
 56 | | a | b |
 57 | |---|---|
 58 | | 1 |
 59 | .
 60 | <table>
 61 | <thead>
 62 | <tr>
 63 | <th>a</th>
 64 | <th>b</th>
 65 | </tr>
 66 | </thead>
 67 | <tbody>
 68 | <tr>
 69 | <td>1</td>
 70 | <td></td>
 71 | </tr>
 72 | </tbody>
 73 | </table>
 74 | ````````````````````````````````
 75 | 
 76 | Table with no columns not allowed
 77 | 
 78 | ````````````````````````````````example
 79 | |
 80 | |
 81 | .
 82 | <p>|
 83 | |</p>
 84 | ````````````````````````````````
 85 | 
 86 | Minimal table 1
 87 | 
 88 | ```````````````````````````````` example
 89 | h|
 90 | -|
 91 | .
 92 | <table>
 93 | <thead>
 94 | <tr>
 95 | <th>h</th>
 96 | </tr>
 97 | </thead>
 98 | </table>
 99 | ````````````````````````````````
100 | 
101 | Minimal table 2
102 | 
103 | ```````````````````````````````` example
104 | |h
105 | |-
106 | .
107 | <table>
108 | <thead>
109 | <tr>
110 | <th>h</th>
111 | </tr>
112 | </thead>
113 | </table>
114 | ````````````````````````````````
115 | 
116 | Minimal table 3
117 | 
118 | ```````````````````````````````` example
119 | ||
120 | ||
121 | .
122 | <table>
123 | <thead>
124 | <tr>
125 | <th></th>
126 | </tr>
127 | </thead>
128 | </table>
129 | ````````````````````````````````
130 | 
131 | Escaped `|` characters
132 | 
133 | ```````````````````````````````` example
134 | \||\|
135 | -|-
136 | |
137 | .
138 | <table>
139 | <thead>
140 | <tr>
141 | <th>|</th>
142 | <th>|</th>
143 | </tr>
144 | </thead>
145 | <tbody>
146 | <tr>
147 | <td></td>
148 | <td></td>
149 | </tr>
150 | </tbody>
151 | </table>
152 | ````````````````````````````````
153 | 
154 | `|` characters inside code spans without escaping
155 | 
156 | ```````````````````````````````` example
157 | abc | `|` | def
158 | ----|-----|-------------
159 | ghi |     | `` `| ``jkl
160 | .
161 | <table>
162 | <thead>
163 | <tr>
164 | <th>abc</th>
165 | <th><code>|</code></th>
166 | <th>def</th>
167 | </tr>
168 | </thead>
169 | <tbody>
170 | <tr>
171 | <td>ghi</td>
172 | <td></td>
173 | <td><code>`|</code>jkl</td>
174 | </tr>
175 | </tbody>
176 | </table>
177 | ````````````````````````````````
178 | 
179 | Cells starting with numbers
180 | 
181 | ```````````````````````````````` example
182 | 0 | 1
183 | --|--
184 | 3 | 4
185 | .
186 | <table>
187 | <thead>
188 | <tr>
189 | <th>0</th>
190 | <th>1</th>
191 | </tr>
192 | </thead>
193 | <tbody>
194 | <tr>
195 | <td>3</td>
196 | <td>4</td>
197 | </tr>
198 | </tbody>
199 | </table>
200 | ````````````````````````````````
201 | 
202 | Setext headings or cells? A setext heading marker isn't a start of a
203 | new block, so it gets treated as if it were a single element row.
204 | 
205 | ```````````````````````````````` example
206 | = | b
207 | --|--
208 | =
209 | .
210 | <table>
211 | <thead>
212 | <tr>
213 | <th>=</th>
214 | <th>b</th>
215 | </tr>
216 | </thead>
217 | <tbody>
218 | <tr>
219 | <td>=</td>
220 | <td></td>
221 | </tr>
222 | </tbody>
223 | </table>
224 | ````````````````````````````````
225 | 
226 | Tables in a list
227 | 
228 | ```````````````````````````````` example
229 | 1. abc | def
230 |    ----|----
231 |    1   | 2
232 | 
233 | 2. | abc | def |
234 |    |-----|-----|
235 |    | 1.  | 2.  |
236 | .
237 | <ol>
238 | <li><table>
239 | <thead>
240 | <tr>
241 | <th>abc</th>
242 | <th>def</th>
243 | </tr>
244 | </thead>
245 | <tbody>
246 | <tr>
247 | <td>1</td>
248 | <td>2</td>
249 | </tr>
250 | </tbody>
251 | </table></li>
252 | <li><table>
253 | <thead>
254 | <tr>
255 | <th>abc</th>
256 | <th>def</th>
257 | </tr>
258 | </thead>
259 | <tbody>
260 | <tr>
261 | <td>1.</td>
262 | <td>2.</td>
263 | </tr>
264 | </tbody>
265 | </table></li>
266 | </ol>
267 | ````````````````````````````````
268 | 


--------------------------------------------------------------------------------
/tests/extract_tests.ml:
--------------------------------------------------------------------------------
  1 | (* Extract test cases from Spec *)
  2 | let protect ~finally f =
  3 |   match f () with
  4 |   | exception e ->
  5 |       finally ();
  6 |       raise e
  7 |   | r ->
  8 |       finally ();
  9 |       r
 10 | 
 11 | let disabled = []
 12 | 
 13 | let with_open_in fn f =
 14 |   let ic = open_in fn in
 15 |   protect ~finally:(fun () -> close_in_noerr ic) (fun () -> f ic)
 16 | 
 17 | let with_open_out fn f =
 18 |   let oc = open_out fn in
 19 |   protect ~finally:(fun () -> close_out_noerr oc) (fun () -> f oc)
 20 | 
 21 | let begins_with s s' =
 22 |   String.length s >= String.length s' && String.sub s 0 (String.length s') = s'
 23 | 
 24 | let test_delim = "````````````````````````````````"
 25 | let tab_re = Str.regexp_string "→"
 26 | let insert_tabs s = Str.global_replace tab_re "\t" s
 27 | 
 28 | type test =
 29 |   { filename : string
 30 |   ; example : int
 31 |   ; markdown : string
 32 |   ; html : string
 33 |   }
 34 | 
 35 | let add_line buf l =
 36 |   Buffer.add_string buf (insert_tabs l);
 37 |   Buffer.add_char buf '\n'
 38 | 
 39 | let parse_test_spec filename =
 40 |   let buf = Buffer.create 256 in
 41 |   with_open_in filename @@ fun ic ->
 42 |   let rec go tests example =
 43 |     match input_line ic with
 44 |     | exception End_of_file -> List.rev tests
 45 |     | line ->
 46 |         if begins_with line test_delim then begin
 47 |           Buffer.clear buf;
 48 |           let rec get_test () =
 49 |             let line = input_line ic in
 50 |             if line = "." then begin
 51 |               let markdown = Buffer.contents buf in
 52 |               Buffer.clear buf;
 53 |               let rec get_html () =
 54 |                 let line = input_line ic in
 55 |                 if begins_with line test_delim then
 56 |                   let html = Buffer.contents buf in
 57 |                   { filename; example; markdown; html }
 58 |                 else begin
 59 |                   add_line buf line;
 60 |                   get_html ()
 61 |                 end
 62 |               in
 63 |               get_html ()
 64 |             end
 65 |             else begin
 66 |               add_line buf line;
 67 |               get_test ()
 68 |             end
 69 |           in
 70 |           go (get_test () :: tests) (succ example)
 71 |         end
 72 |         else go tests example
 73 |   in
 74 |   go [] 1
 75 | 
 76 | let write_dune_file test_specs tests =
 77 |   let pp ppf { filename; example; _ } =
 78 |     let base = Filename.remove_extension filename in
 79 |     Format.fprintf ppf "@ %s-%03d.md %s-%03d.html" base example base example
 80 |   in
 81 |   Format.printf
 82 |     "@[<v1>(rule@ @[<hov1>(deps %s)@]@ @[<v1>(targets%t)@]@ @[<hov1>(action@ \
 83 |      (run ./extract_tests.exe -generate-test-files %%{deps}))@])@]@."
 84 |     (String.concat " " test_specs)
 85 |     (fun ppf -> List.iter (pp ppf) tests);
 86 |   List.iter
 87 |     (fun { filename; example; _ } ->
 88 |       let base = Filename.remove_extension filename in
 89 |       Format.printf
 90 |         "@[<v1>(rule@ @[<hov1>(action@ @[<hov1>(with-stdout-to \
 91 |          %s-%03d.html.new@ @[<hov1>(run@ ./omd.exe@ \
 92 |          %%{dep:%s-%03d.md})@])@])@])@]@."
 93 |         base
 94 |         example
 95 |         base
 96 |         example;
 97 |       Format.printf
 98 |         "@[<v1>(rule@ @[<hov1>(alias %s-%03d)@]@ @[<hov1>(action@ \
 99 |          @[<hov1>(diff@ %s-%03d.html %s-%03d.html.new)@])@])@]@."
100 |         base
101 |         example
102 |         base
103 |         example
104 |         base
105 |         example)
106 |     tests;
107 |   let pp ppf { filename; example; _ } =
108 |     let base = Filename.remove_extension filename in
109 |     if not (List.mem example disabled) then
110 |       Format.fprintf ppf "@ (alias %s-%03d)" base example
111 |   in
112 |   Format.printf
113 |     "@[<v1>(alias@ (name runtest)@ @[<v1>(deps%t)@])@]@."
114 |     (fun ppf -> List.iter (pp ppf) tests)
115 | 
116 | let li_begin_re = Str.regexp_string "<li>\n"
117 | let li_end_re = Str.regexp_string "\n</li>"
118 | 
119 | let normalize_html s =
120 |   Str.global_replace li_end_re "</li>" (Str.global_replace li_begin_re "<li>" s)
121 | 
122 | let generate_test_files tests =
123 |   let f { filename; example; markdown; html } =
124 |     let base = Filename.remove_extension filename in
125 |     with_open_out (Printf.sprintf "%s-%03d.md" base example) (fun oc ->
126 |         output_string oc markdown);
127 |     with_open_out (Printf.sprintf "%s-%03d.html" base example) (fun oc ->
128 |         output_string oc (normalize_html html))
129 |   in
130 |   List.iter f tests
131 | 
132 | type mode =
133 |   | Generate_test_files
134 |   | Write_dune_file
135 | 
136 | let mode = ref None
137 | 
138 | let spec =
139 |   let set x () = mode := Some x in
140 |   [ ( "-generate-test-files"
141 |     , Arg.Unit (set Generate_test_files)
142 |     , " Generate test files" )
143 |   ; ("-write-dune-file", Arg.Unit (set Write_dune_file), " Write dune file")
144 |   ]
145 | 
146 | let test_specs = ref []
147 | let add_to_list l x = l := x :: !l
148 | 
149 | let () =
150 |   Arg.parse (Arg.align spec) (add_to_list test_specs) "";
151 |   let test_specs = List.rev !test_specs in
152 |   let tests = List.flatten (List.map parse_test_spec test_specs) in
153 |   match !mode with
154 |   | None -> ()
155 |   | Some Generate_test_files -> generate_test_files tests
156 |   | Some Write_dune_file -> write_dune_file test_specs tests
157 | 


--------------------------------------------------------------------------------
/tests/gfm_table_spec.md:
--------------------------------------------------------------------------------
  1 | *Note on the Omd implementation:* Table specification excerpted from
  2 | [this commit][0] in the GitHub-Flavored Markdown repository. There is
  3 | one alteration (noted below), where the GFM spec appears to contradict
  4 | the CommonMark spec (and itself) on the treatment of escape characters
  5 | in code spans.
  6 | 
  7 | [0]: https://github.com/github/cmark-gfm/blob/6a6e335709ef68cf2c616eeaf61b09ed4c654669/test/spec.txt
  8 | 
  9 | ## Tables (extension)
 10 | 
 11 | GFM enables the `table` extension, where an additional leaf block type is
 12 | available.
 13 | 
 14 | A [table](@) is an arrangement of data with rows and columns, consisting of a
 15 | single header row, a [delimiter row] separating the header from the data, and
 16 | zero or more data rows.
 17 | 
 18 | Each row consists of cells containing arbitrary text, in which [inlines] are
 19 | parsed, separated by pipes (`|`).  A leading and trailing pipe is also
 20 | recommended for clarity of reading, and if there's otherwise parsing ambiguity.
 21 | Spaces between pipes and cell content are trimmed.  Block-level elements cannot
 22 | be inserted in a table.
 23 | 
 24 | The [delimiter row](@) consists of cells whose only content are hyphens (`-`),
 25 | and optionally, a leading or trailing colon (`:`), or both, to indicate left,
 26 | right, or center alignment respectively.
 27 | 
 28 | ```````````````````````````````` example table
 29 | | foo | bar |
 30 | | --- | --- |
 31 | | baz | bim |
 32 | .
 33 | <table>
 34 | <thead>
 35 | <tr>
 36 | <th>foo</th>
 37 | <th>bar</th>
 38 | </tr>
 39 | </thead>
 40 | <tbody>
 41 | <tr>
 42 | <td>baz</td>
 43 | <td>bim</td>
 44 | </tr>
 45 | </tbody>
 46 | </table>
 47 | ````````````````````````````````
 48 | 
 49 | Cells in one column don't need to match length, though it's easier to read if
 50 | they are. Likewise, use of leading and trailing pipes may be inconsistent:
 51 | 
 52 | ```````````````````````````````` example table
 53 | | abc | defghi |
 54 | :-: | -----------:
 55 | bar | baz
 56 | .
 57 | <table>
 58 | <thead>
 59 | <tr>
 60 | <th align="center">abc</th>
 61 | <th align="right">defghi</th>
 62 | </tr>
 63 | </thead>
 64 | <tbody>
 65 | <tr>
 66 | <td align="center">bar</td>
 67 | <td align="right">baz</td>
 68 | </tr>
 69 | </tbody>
 70 | </table>
 71 | ````````````````````````````````
 72 | 
 73 | Include a pipe in a cell's content by escaping it, including inside other
 74 | inline spans:
 75 | 
 76 | **Omd alteration**: the escape `\` in the code span is preserved here,
 77 | in accordance with the rules for code spans in the CommonMark and GFM
 78 | specs.
 79 | 
 80 | ```````````````````````````````` example table
 81 | | f\|oo  |
 82 | | ------ |
 83 | | b `\|` az |
 84 | | b **\|** im |
 85 | .
 86 | <table>
 87 | <thead>
 88 | <tr>
 89 | <th>f|oo</th>
 90 | </tr>
 91 | </thead>
 92 | <tbody>
 93 | <tr>
 94 | <td>b <code>\|</code> az</td>
 95 | </tr>
 96 | <tr>
 97 | <td>b <strong>|</strong> im</td>
 98 | </tr>
 99 | </tbody>
100 | </table>
101 | ````````````````````````````````
102 | 
103 | The table is broken at the first empty line, or beginning of another
104 | block-level structure:
105 | 
106 | ```````````````````````````````` example table
107 | | abc | def |
108 | | --- | --- |
109 | | bar | baz |
110 | > bar
111 | .
112 | <table>
113 | <thead>
114 | <tr>
115 | <th>abc</th>
116 | <th>def</th>
117 | </tr>
118 | </thead>
119 | <tbody>
120 | <tr>
121 | <td>bar</td>
122 | <td>baz</td>
123 | </tr>
124 | </tbody>
125 | </table>
126 | <blockquote>
127 | <p>bar</p>
128 | </blockquote>
129 | ````````````````````````````````
130 | 
131 | ```````````````````````````````` example table
132 | | abc | def |
133 | | --- | --- |
134 | | bar | baz |
135 | bar
136 | 
137 | bar
138 | .
139 | <table>
140 | <thead>
141 | <tr>
142 | <th>abc</th>
143 | <th>def</th>
144 | </tr>
145 | </thead>
146 | <tbody>
147 | <tr>
148 | <td>bar</td>
149 | <td>baz</td>
150 | </tr>
151 | <tr>
152 | <td>bar</td>
153 | <td></td>
154 | </tr>
155 | </tbody>
156 | </table>
157 | <p>bar</p>
158 | ````````````````````````````````
159 | 
160 | The header row must match the [delimiter row] in the number of cells.  If not,
161 | a table will not be recognized:
162 | 
163 | ```````````````````````````````` example table
164 | | abc | def |
165 | | --- |
166 | | bar |
167 | .
168 | <p>| abc | def |
169 | | --- |
170 | | bar |</p>
171 | ````````````````````````````````
172 | 
173 | The remainder of the table's rows may vary in the number of cells.  If there
174 | are a number of cells fewer than the number of cells in the header row, empty
175 | cells are inserted.  If there are greater, the excess is ignored:
176 | 
177 | ```````````````````````````````` example table
178 | | abc | def |
179 | | --- | --- |
180 | | bar |
181 | | bar | baz | boo |
182 | .
183 | <table>
184 | <thead>
185 | <tr>
186 | <th>abc</th>
187 | <th>def</th>
188 | </tr>
189 | </thead>
190 | <tbody>
191 | <tr>
192 | <td>bar</td>
193 | <td></td>
194 | </tr>
195 | <tr>
196 | <td>bar</td>
197 | <td>baz</td>
198 | </tr>
199 | </tbody>
200 | </table>
201 | ````````````````````````````````
202 | 
203 | If there are no rows in the body, no `<tbody>` is generated in HTML output:
204 | 
205 | ```````````````````````````````` example table
206 | | abc | def |
207 | | --- | --- |
208 | .
209 | <table>
210 | <thead>
211 | <tr>
212 | <th>abc</th>
213 | <th>def</th>
214 | </tr>
215 | </thead>
216 | </table>
217 | ````````````````````````````````
218 | 


--------------------------------------------------------------------------------
/tests/omd.ml:
--------------------------------------------------------------------------------
 1 | let protect ~finally f =
 2 |   match f () with
 3 |   | exception e ->
 4 |       finally ();
 5 |       raise e
 6 |   | r ->
 7 |       finally ();
 8 |       r
 9 | 
10 | let li_begin_re = Str.regexp_string "<li>\n"
11 | let li_end_re = Str.regexp_string "\n</li>"
12 | 
13 | let normalize_html s =
14 |   Str.global_replace li_end_re "</li>" (Str.global_replace li_begin_re "<li>" s)
15 | 
16 | let with_open_in fn f =
17 |   let ic = open_in fn in
18 |   protect ~finally:(fun () -> close_in_noerr ic) (fun () -> f ic)
19 | 
20 | let () =
21 |   with_open_in Sys.argv.(1) @@ fun ic ->
22 |   print_string
23 |     (normalize_html (Omd.to_html ~auto_identifiers:false (Omd.of_channel ic)))
24 | 


--------------------------------------------------------------------------------
/tools/dune:
--------------------------------------------------------------------------------
1 | (executable
2 |  (name gen_entities))
3 | 


--------------------------------------------------------------------------------
/tools/gen_entities.ml:
--------------------------------------------------------------------------------
 1 | let f s =
 2 |   Scanf.sscanf s " \"&%s@;\": { %_S: [%s@], %_S: \"%_s@\" }" (fun s nums ->
 3 |       ( s
 4 |       , List.map
 5 |           (fun s -> int_of_string (String.trim s))
 6 |           (String.split_on_char ',' nums) ))
 7 | 
 8 | let main ic =
 9 |   let rec loop () =
10 |     match input_line ic with
11 |     | s ->
12 |         begin
13 |           match f s with
14 |           | name, codepoints ->
15 |               Printf.printf "  | %S -> [" name;
16 |               List.iteri
17 |                 (fun i cp ->
18 |                   if i > 0 then print_string "; ";
19 |                   Printf.printf "Uchar.of_int %d" cp)
20 |                 codepoints;
21 |               Printf.printf "]\n"
22 |           | exception _ -> ()
23 |         end;
24 |         loop ()
25 |     | exception End_of_file -> Printf.printf "  | _ -> []\n%!"
26 |   in
27 |   Printf.printf "let f = function\n";
28 |   loop ()
29 | 
30 | let () =
31 |   let ic = open_in Sys.argv.(1) in
32 |   main ic;
33 |   close_in ic
34 | 


--------------------------------------------------------------------------------