├── .gitignore ├── .merlin ├── .ocp-indent ├── B0.ml ├── BRZO ├── CHANGES.md ├── DEVEL.md ├── LICENSE.md ├── README.md ├── _tags ├── doc └── index.mld ├── opam ├── pkg ├── META └── pkg.ml ├── src ├── cmarkit.ml ├── cmarkit.mli ├── cmarkit.mllib ├── cmarkit_base.ml ├── cmarkit_base.mli ├── cmarkit_commonmark.ml ├── cmarkit_commonmark.mli ├── cmarkit_data.ml ├── cmarkit_data.mli ├── cmarkit_data_html.ml ├── cmarkit_data_uchar.ml ├── cmarkit_html.ml ├── cmarkit_html.mli ├── cmarkit_latex.ml ├── cmarkit_latex.mli ├── cmarkit_renderer.ml └── cmarkit_renderer.mli ├── support └── unicode_data.ml ├── test ├── bench.ml ├── examples.ml ├── expect │ ├── basic.exts.html │ ├── basic.exts.latex │ ├── basic.exts.locs │ ├── basic.exts.md │ ├── basic.exts.nolayout.locs │ ├── basic.exts.trip.md │ ├── basic.html │ ├── basic.latex │ ├── basic.locs │ ├── basic.md │ ├── basic.nolayout.locs │ ├── basic.trip.md │ ├── bug-18.html │ ├── bug-18.latex │ ├── bug-18.locs │ ├── bug-18.md │ ├── bug-18.nolayout.locs │ ├── bug-18.trip.md │ ├── bugs.exts.html │ ├── bugs.exts.latex │ ├── bugs.exts.locs │ ├── bugs.exts.md │ ├── bugs.exts.nolayout.locs │ ├── bugs.exts.trip.md │ ├── bugs.html │ ├── bugs.latex │ ├── bugs.locs │ ├── bugs.md │ ├── bugs.nolayout.locs │ ├── bugs.trip.md │ ├── spec.trip │ └── test.expect ├── pathological.ml ├── spec.json ├── spec.ml ├── spec.mli ├── test.ml ├── test_spec.ml └── trip_spec.ml └── tool ├── cmd_commonmark.ml ├── cmd_commonmark.mli ├── cmd_html.ml ├── cmd_html.mli ├── cmd_latex.ml ├── cmd_latex.mli ├── cmd_locs.ml ├── cmd_locs.mli ├── cmd_main.ml ├── cmd_main.mli ├── std.ml └── std.mli /.gitignore: -------------------------------------------------------------------------------- 1 | _b0 2 | _build 3 | tmp 4 | *.install -------------------------------------------------------------------------------- /.merlin: -------------------------------------------------------------------------------- 1 | PKG b0.kit uucp cmdliner 2 | S src 3 | S test 4 | B _b0/** -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | strict_with=always,match_clause=4,strict_else=never -------------------------------------------------------------------------------- /B0.ml: -------------------------------------------------------------------------------- 1 | open B0_kit.V000 2 | open Result.Syntax 3 | 4 | let commonmark_version = 5 | (* If you update this, also update Cmarkit.commonmark_version 6 | and the links in src/*.mli *) 7 | "0.30" 8 | 9 | (* OCaml library names *) 10 | 11 | let cmarkit = B0_ocaml.libname "cmarkit" 12 | let cmdliner = B0_ocaml.libname "cmdliner" 13 | let uucp = B0_ocaml.libname "uucp" 14 | let unix = B0_ocaml.libname "unix" 15 | 16 | let b0_std = B0_ocaml.libname "b0.std" 17 | let b0_file = B0_ocaml.libname "b0.file" 18 | 19 | (* Libraries *) 20 | 21 | let cmarkit_lib = 22 | let srcs = [ `Dir ~/"src" ] in 23 | let requires = [] and name = "cmarkit-lib" in 24 | B0_ocaml.lib cmarkit ~name ~doc:"The cmarkit library" ~srcs ~requires 25 | 26 | (* Tools *) 27 | 28 | let cmarkit_tool = 29 | let srcs = [ `Dir ~/"tool" ] in 30 | let requires = [cmarkit; cmdliner] in 31 | B0_ocaml.exe "cmarkit" ~public:true ~doc:"The cmarkit tool" ~srcs ~requires 32 | 33 | (* Unicode support 34 | 35 | N.B. we could do without both an exe and an action, cf. the Unicode libs. *) 36 | 37 | let unicode_data = 38 | let srcs = [ `File ~/"support/unicode_data.ml" ] in 39 | let requires = [uucp; unix] in 40 | let doc = "Generate cmarkit Unicode data" in 41 | B0_ocaml.exe "unicode_data" ~doc ~srcs ~requires 42 | 43 | let update_unicode = 44 | let doc = "Update Unicode character data " in 45 | B0_unit.of_action "update_unicode_data" ~units:[unicode_data] ~doc @@ 46 | fun env _ ~args:_ -> 47 | let* unicode_data = B0_env.unit_exe_file env unicode_data in 48 | let outf = B0_env.in_scope_dir env ~/"src/cmarkit_data_uchar.ml" in 49 | let outf = Os.Cmd.out_file ~force:true ~make_path:false outf in 50 | Os.Cmd.run ~stdout:outf (Cmd.path unicode_data) 51 | 52 | (* Tests *) 53 | 54 | let update_spec_tests = 55 | let doc = "Update the CommonMark spec tests" in 56 | B0_unit.of_action "update_spec_tests" ~doc @@ 57 | fun env _ ~args:_ -> 58 | let tests = 59 | Fmt.str "https://spec.commonmark.org/%s/spec.json" commonmark_version 60 | in 61 | let dest = B0_env.in_scope_dir env ~/"test/spec.json" in 62 | let dest = Os.Cmd.out_file ~force:true ~make_path:false dest in 63 | let* curl = B0_env.get_cmd env Cmd.(arg "curl" % "-L" % tests) in 64 | Os.Cmd.run ~stdout:dest curl 65 | 66 | let spec_srcs = [`File ~/"test/spec.mli"; `File ~/"test/spec.ml"] 67 | 68 | let bench = 69 | let doc = "Simple standard CommonMark to HTML renderer for benchmarking" in 70 | let srcs = [ `File ~/"test/bench.ml" ] in 71 | let requires = [cmarkit] in 72 | let meta = B0_meta.(empty |> tag bench) in 73 | B0_ocaml.exe "bench" ~doc ~meta ~srcs ~requires 74 | 75 | let test = 76 | let doc = "Other expectation tests" in 77 | let srcs = [ `File ~/"test/test.ml" ] in 78 | let requires = [cmarkit] in 79 | let meta = B0_meta.empty |> B0_meta.tag B0_meta.test in 80 | B0_ocaml.exe "test" ~doc ~meta ~srcs ~requires 81 | 82 | let test_spec = 83 | let doc = "Test CommonMark specification conformance tests" in 84 | let srcs = `File ~/"test/test_spec.ml" :: spec_srcs in 85 | let requires = [ b0_std; b0_file; cmarkit ] in 86 | let meta = 87 | B0_meta.empty 88 | |> B0_meta.tag B0_meta.test 89 | |> B0_meta.tag B0_meta.run 90 | |> B0_meta.add B0_unit.Action.cwd `Scope_dir 91 | in 92 | B0_ocaml.exe "test_spec" ~doc ~meta ~srcs ~requires 93 | 94 | let trip_spec = 95 | let doc = "Test CommonMark renderer on conformance tests" in 96 | let srcs = `File ~/"test/trip_spec.ml" :: spec_srcs in 97 | let requires = [ b0_std; b0_file; cmarkit ] in 98 | let meta = 99 | B0_meta.empty 100 | |> B0_meta.tag B0_meta.test 101 | |> B0_meta.tag B0_meta.run 102 | |> B0_meta.add B0_unit.Action.cwd `Scope_dir 103 | in 104 | B0_ocaml.exe "trip_spec" ~doc ~meta ~srcs ~requires 105 | 106 | let pathological = 107 | let doc = "Test a CommonMark parser on pathological tests." in 108 | let srcs = [ `File ~/"test/pathological.ml" ] in 109 | let requires = [ b0_std; unix ] in 110 | B0_ocaml.exe "pathological" ~doc ~srcs ~requires 111 | 112 | let examples = 113 | let doc = "Doc sample code" in 114 | let srcs = [ `File ~/"test/examples.ml" ] in 115 | let requires = [ cmarkit ] in 116 | let meta = B0_meta.empty |> B0_meta.(tag test) in 117 | B0_ocaml.exe "examples" ~doc ~meta ~srcs ~requires 118 | 119 | (* Expectation tests *) 120 | 121 | let expect_test ctx = 122 | let test = B0_expect.get_unit_exe_file_cmd ctx test in 123 | let cwd = B0_env.scope_dir (B0_expect.env ctx) in 124 | B0_expect.stdout ctx ~cwd ~stdout:(Fpath.v "test.expect") test 125 | 126 | let expect_trip_spec ctx = 127 | let trip_spec = B0_expect.get_unit_exe_file_cmd ctx trip_spec in 128 | let cwd = B0_env.scope_dir (B0_expect.env ctx) in 129 | B0_expect.stdout ctx ~cwd ~stdout:(Fpath.v "spec.trip") trip_spec 130 | 131 | let expect_cmarkit_renders ctx = 132 | let cmarkit = B0_expect.get_unit_exe_file_cmd ctx cmarkit_tool in 133 | let renderers = (* command, output suffix *) 134 | [ Cmd.(arg "html" % "-c" % "--unsafe"), ".html"; 135 | Cmd.(arg "latex"), ".latex"; 136 | Cmd.(arg "commonmark"), ".trip.md"; 137 | Cmd.(arg "locs"), ".locs"; 138 | Cmd.(arg "locs" % "--no-layout"), ".nolayout.locs"; ] 139 | in 140 | let test_renderer ctx cmarkit file (cmd, ext) = 141 | let with_exts = Fpath.has_ext ".exts.md" file in 142 | let cmd = Cmd.(cmd %% if' with_exts (arg "--exts") %% path file) in 143 | let cwd = B0_expect.base ctx and stdout = Fpath.(file -+ ext) in 144 | B0_expect.stdout ctx ~cwd ~stdout Cmd.(cmarkit %% cmd) 145 | in 146 | let test_file ctx cmarkit file = 147 | List.iter (test_renderer ctx cmarkit file) renderers 148 | in 149 | let test_files = 150 | let base_files = B0_expect.base_files ctx ~rel:true ~recurse:false in 151 | let input f = Fpath.has_ext ".md" f && not (Fpath.has_ext ".trip.md" f) in 152 | List.filter input base_files 153 | in 154 | List.iter (test_file ctx cmarkit) test_files 155 | 156 | let expect = 157 | let doc = "Test expectations" in 158 | let meta = B0_meta.(empty |> tag test |> tag run) in 159 | let units = [test; trip_spec; cmarkit_tool] in 160 | B0_unit.of_action' "expect" ~meta ~units ~doc @@ 161 | B0_expect.action_func ~base:(Fpath.v "test/expect") @@ fun ctx -> 162 | expect_cmarkit_renders ctx; 163 | expect_trip_spec ctx; 164 | expect_test ctx; 165 | () 166 | 167 | (* Packs *) 168 | 169 | let default = 170 | let meta = 171 | B0_meta.empty 172 | |> B0_meta.(add authors) ["The cmarkit programmers"] 173 | |> B0_meta.(add maintainers) 174 | ["Daniel Bünzli "] 175 | |> B0_meta.(add homepage) "https://erratique.ch/software/cmarkit" 176 | |> B0_meta.(add online_doc) "https://erratique.ch/software/cmarkit/doc" 177 | |> B0_meta.(add licenses) ["ISC"] 178 | |> B0_meta.(add repo) "git+https://erratique.ch/repos/cmarkit.git" 179 | |> B0_meta.(add issues) "https://github.com/dbuenzli/cmarkit/issues" 180 | |> B0_meta.(add description_tags) 181 | ["codec"; "commonmark"; "markdown"; "org:erratique"; ] 182 | |> B0_meta.tag B0_opam.tag 183 | |> B0_meta.add B0_opam.build 184 | {|[["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%" 185 | "--with-cmdliner" "%{cmdliner:installed}%"]]|} 186 | |> B0_meta.add B0_opam.depopts ["cmdliner", ""] 187 | |> B0_meta.add B0_opam.conflicts [ "cmdliner", {|< "1.1.0"|}] 188 | |> B0_meta.add B0_opam.depends 189 | [ "ocaml", {|>= "4.14.0"|}; 190 | "ocamlfind", {|build|}; 191 | "ocamlbuild", {|build|}; 192 | "topkg", {|build & >= "1.0.3"|}; 193 | "uucp", {|dev|}; 194 | "b0", {|dev & with-test|}; 195 | ] 196 | in 197 | B0_pack.make "default" ~doc:"cmarkit package" ~meta ~locked:true @@ 198 | B0_unit.list () 199 | -------------------------------------------------------------------------------- /BRZO: -------------------------------------------------------------------------------- 1 | (srcs-x pkg tmp) -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | 2 | - `Cmarkit_latex`. Add option `?first_heading_level` to the renderer 3 | to set the LaTeX heading level to use for the first CommonMark 4 | heading level. A corresponding option `--first-heading-level` is 5 | added to `cmarkit latex`. Thanks to Léo Andrès for the patch (#16). 6 | 7 | - `Cmarkit.Mapper`, do not drop empty table cells. 8 | Thanks to Hannes Mehnert for the report (#14). 9 | 10 | - Fix crash (out of bounds exception) when lists are terminated by 11 | the end of file. Thanks to Ty Overby for the report (#18). 12 | 13 | - Fix invalid markup generated for cancelled task items. 14 | Thanks to Sebastien Mondet for the report (#15). 15 | 16 | - Updated data for Unicode 16.0.0. 17 | 18 | v0.3.0 2023-12-12 La Forclaz (VS) 19 | --------------------------------- 20 | 21 | - Fix ordered item marker escaping. Thanks to Rafał Gwoździński for 22 | the report (#11). 23 | 24 | - Data updated for Unicode 15.1.0 (no changes except 25 | for the value of `Cmarkit.Doc.unicode_version`). 26 | 27 | - Fix table extension column parsing, toplevel text inlines were being 28 | dropped. Thanks to Javier Chávarri for the report (#10). 29 | 30 | - `List_item.make`, change default value of `after_marker` from 0 to 1. 31 | We don't want to generate invalid CommonMark by default. Thanks to 32 | Rafał Gwoździński for the report (#9). 33 | 34 | - Add option `-f/--full-featured`, to `cmarkit html`. A synonym for a 35 | bunch of existing options to generate a publishable document with extensions 36 | and math rendering without hassle. See `cmarkit html --help` for details. 37 | 38 | v0.2.0 2023-05-10 La Forclaz (VS) 39 | --------------------------------- 40 | 41 | - Fix bug in `Block_lines.list_of_string`. Thanks to Rafał Gwoździński 42 | for the report and the fix (#7, #8). 43 | - `Cmarkit.Mapper`. Fix non-sensical default map for `Image` nodes: do 44 | not delete `Image` nodes whose alt text maps to `None`, replace the 45 | alt text by `Inline.empty`. Thanks to Nicolás Ojeda Bär for the 46 | report and the fix (#6). 47 | 48 | v0.1.0 2023-04-06 La Forclaz (VS) 49 | --------------------------------- 50 | 51 | First release. 52 | 53 | Supported by a grant from the OCaml Software Foundation. 54 | -------------------------------------------------------------------------------- /DEVEL.md: -------------------------------------------------------------------------------- 1 | 2 | A few development tips. 3 | 4 | # Benchmark parse to HTML rendering 5 | 6 | ```sh 7 | time cmark --unsafe /file/to/md > /dev/null 8 | time $(b0 --path -- bench --unsafe /file/to/md) > /dev/null 9 | ``` 10 | 11 | # Expectation tests 12 | 13 | To add a new test, add an `.md` test in `test/expect`, run the tests 14 | and add the new generated files to the repo. 15 | 16 | ```sh 17 | b0 -- expect 18 | b0 -- expect --help 19 | ``` 20 | 21 | # Specification tests 22 | 23 | To run the specification test use: 24 | 25 | ```sh 26 | b0 -- test_spec # All examples 27 | b0 -- test_spec 1-10 34 56 # Specific examples 28 | ``` 29 | 30 | To test the CommonMark renderer on the specification tests use: 31 | 32 | ```sh 33 | b0 -- trip_spec # All examples 34 | b0 -- trip_spec 1-10 32 56 # Specific examples 35 | b0 -- trip_spec --show-diff # Show correct render diffs (if applicable) 36 | ``` 37 | 38 | Given a source a *correct* render yields the same HTML and it *round 39 | trips* if the source is byte-for-byte equivalent. Using `--show-diff` 40 | on an example that does not round trip shows the reason and the diff. 41 | 42 | The tests are also run on parses without layout preservation to check 43 | they are correct. 44 | 45 | # Pathological tests 46 | 47 | The [pathological tests][p] of `cmark` have been ported to 48 | [`test/pathological.ml`]. You can run them on any executable that 49 | reads CommonMark on standard input and writes HTML rendering on 50 | standard output. 51 | 52 | ```sh 53 | b0 -- pathological -- cmark 54 | b0 -u cmarkit -- pathological -- $(b0 --path -- cmarkit html) 55 | b0 -- pathological --help 56 | b0 -- pathological -d /tmp/ # Dump tests and expectations 57 | ``` 58 | 59 | [p]: https://github.com/commonmark/cmark/blob/master/test/pathological_tests.py 60 | [`test/pathological.ml`]: src/cmarkit.ml 61 | 62 | # Specification update 63 | 64 | If there's a specification version update. The `commonmark_version` 65 | variable must be updated in both in [`B0.ml`] and in [`src/cmarkit.ml`]. 66 | A `s/old_version/new_version/g` should be performed on `.mli` files. 67 | 68 | The repository has the CommonMark specification test file in 69 | [`test/spec.json`]. 70 | 71 | To update it invoke: 72 | 73 | ```sh 74 | b0 -- update_spec_tests 75 | ``` 76 | 77 | [`test/spec.json`]: test/spec.json 78 | [`src/cmarkit.ml`]: src/cmarkit.ml 79 | [`B0.ml`]: B0.ml 80 | 81 | # Unicode data update 82 | 83 | The library contains Unicode data generated in the file 84 | [`src/cmarkit_data_uchar.ml`] 85 | 86 | To update it invoke: 87 | 88 | ```sh 89 | opem install uucp 90 | b0 -- update_unicode_data 91 | ``` 92 | 93 | [`src/cmarkit_data_uchar.ml`]: src/cmarkit_data_uchar.ml 94 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 The cmarkit programmers 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cmarkit — CommonMark parser and renderer for OCaml 2 | ================================================== 3 | %%VERSION%% 4 | 5 | Cmarkit parses the [CommonMark specification]. It provides: 6 | 7 | - A CommonMark parser for UTF-8 encoded documents. Link label resolution 8 | can be customized and a non-strict parsing mode can be activated to add: 9 | strikethrough, LaTeX math, footnotes, task items and tables. 10 | 11 | - An extensible abstract syntax tree for CommonMark documents with 12 | source location tracking and best-effort source layout preservation. 13 | 14 | - Abstract syntax tree mapper and folder abstractions for quick and 15 | concise tree transformations. 16 | 17 | - Extensible renderers for HTML, LaTeX and CommonMark with source 18 | layout preservation. 19 | 20 | Cmarkit is distributed under the ISC license. It has no dependencies. 21 | 22 | [CommonMark specification]: https://spec.commonmark.org/ 23 | 24 | Homepage: 25 | 26 | ## Installation 27 | 28 | cmarkit can be installed with `opam`: 29 | 30 | opam install cmarkit 31 | opam install cmarkit cmdliner # For the cmarkit tool 32 | 33 | If you don't use `opam` consult the [`opam`](opam) file for build 34 | instructions. 35 | 36 | ## Documentation 37 | 38 | The documentation can be consulted [online] or via `odig doc cmarkit`. 39 | 40 | Questions are welcome but better asked on the [OCaml forum] than on 41 | the issue tracker. 42 | 43 | [online]: https://erratique.ch/software/cmarkit/doc 44 | [OCaml forum]: https://discuss.ocaml.org/ 45 | 46 | ## Sample programs 47 | 48 | The [`cmarkit`] tool parses and renders CommonMark files in various 49 | ways. 50 | 51 | See also [`bench.ml`] and the [doc examples]. 52 | 53 | [`cmarkit`]: test/cmarkit_tool.ml 54 | [`bench.ml`]: test/bench.ml 55 | [doc examples]: test/examples.ml 56 | 57 | ## Acknowledgements 58 | 59 | A grant from the [OCaml Software Foundation] helped to bring the first 60 | public release of `cmarkit`. 61 | 62 | The `cmarkit` implementation benefited from the work of John 63 | MacFarlane ([spec][CommonMark specification], [`cmark`]) and Martin 64 | Mitáš ([`md4c`]). 65 | 66 | [`cmark`]: https://github.com/commonmark/cmark 67 | [`md4c`]: https://github.com/mity/md4c 68 | [OCaml Software Foundation]: http://ocaml-sf.org/ 69 | -------------------------------------------------------------------------------- /_tags: -------------------------------------------------------------------------------- 1 | true : bin_annot, safe_string 2 | <_b0> : -traverse 3 | : include 4 | : package(b0.std b0.kit) 5 | : package(b0.std b0.kit) 6 | : package(cmdliner) -------------------------------------------------------------------------------- /doc/index.mld: -------------------------------------------------------------------------------- 1 | {0 Cmarkit {%html: %%VERSION%%%}} 2 | 3 | Cmarkit parses the {{:https://spec.commonmark.org/current}CommonMark 4 | specification}. It provides: 5 | 6 | - A CommonMark parser for UTF-8 encoded documents. Link label resolution 7 | can be {{!Cmarkit.Label.resolvers}customized} and a non-strict 8 | parsing mode can be activated to {{!Cmarkit.extensions}add}: strikethrough, 9 | L{^A}T{_E}X math, footnotes, task items and tables. 10 | - An extensible abstract syntax tree for CommonMark documents with source 11 | location tracking and best effort {{!Cmarkit_commonmark.layout}source layout 12 | preservation}. 13 | - Abstract syntax tree {{!Cmarkit.Mapper}mapper} and {{!Cmarkit.Folder}folder} 14 | abstractions for quick and concise tree transformations. 15 | - Extensible renderers for {{!Cmarkit_html}HTML}, 16 | {{!Cmarkit_latex}L{^A}T{_E}X} and {{!Cmarkit_commonmark}CommonMark} with 17 | source layout preservation. 18 | 19 | See the {{!quick}quick start}. 20 | 21 | {1:cmarkit_library Library [cmarkit]} 22 | 23 | {!modules: 24 | Cmarkit 25 | Cmarkit_renderer 26 | Cmarkit_commonmark 27 | Cmarkit_html 28 | Cmarkit_latex 29 | } 30 | 31 | {1:quick Quick start} 32 | 33 | The following functions render CommonMark snippets using the built-in 34 | renderers. The parsing bit via {!Cmarkit.Doc.of_string} is always the same 35 | except for CommonMark rendering where we make sure to keep the layout 36 | for {{!Cmarkit_commonmark.layout}source layout preservation}. 37 | 38 | If [strict] is [true] the CommonMark specification is strictly 39 | followed otherwise the built-in {{!Cmarkit.extensions}extensions} are 40 | enabled. 41 | 42 | {[ 43 | let cmark_to_html : strict:bool -> safe:bool -> string -> string = 44 | fun ~strict ~safe md -> 45 | let doc = Cmarkit.Doc.of_string ~strict md in 46 | Cmarkit_html.of_doc ~safe doc 47 | 48 | let cmark_to_latex : strict:bool -> string -> string = 49 | fun ~strict md -> 50 | let doc = Cmarkit.Doc.of_string ~strict md in 51 | Cmarkit_latex.of_doc doc 52 | 53 | let cmark_to_commonmark : strict:bool -> string -> string = 54 | fun ~strict md -> 55 | let doc = Cmarkit.Doc.of_string ~layout:true ~strict md in 56 | Cmarkit_commonmark.of_doc doc 57 | ]} 58 | 59 | If you want to: 60 | 61 | {ul 62 | {- Extend the abstract syntax tree or the renderers, see 63 | {{!Cmarkit_renderer.example}this example}.} 64 | {- Map parts of an abstract syntax, see {!Cmarkit.Mapper}.} 65 | {- Fold over parts of an abstract syntax, see {!Cmarkit.Folder}.} 66 | {- Interfere with link label definition and resolution, see 67 | {{!Cmarkit.Label}labels} and their 68 | {{!Cmarkit.Label.resolvers}resolvers}.}} 69 | -------------------------------------------------------------------------------- /opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | name: "cmarkit" 3 | synopsis: "CommonMark parser and renderer for OCaml" 4 | description: """\ 5 | Cmarkit parses the [CommonMark specification]. It provides: 6 | 7 | - A CommonMark parser for UTF-8 encoded documents. Link label resolution 8 | can be customized and a non-strict parsing mode can be activated to add: 9 | strikethrough, LaTeX math, footnotes, task items and tables. 10 | 11 | - An extensible abstract syntax tree for CommonMark documents with 12 | source location tracking and best-effort source layout preservation. 13 | 14 | - Abstract syntax tree mapper and folder abstractions for quick and 15 | concise tree transformations. 16 | 17 | - Extensible renderers for HTML, LaTeX and CommonMark with source 18 | layout preservation. 19 | 20 | Cmarkit is distributed under the ISC license. It has no dependencies. 21 | 22 | [CommonMark specification]: https://spec.commonmark.org/ 23 | 24 | Homepage: """ 25 | maintainer: "Daniel Bünzli " 26 | authors: "The cmarkit programmers" 27 | license: "ISC" 28 | tags: ["codec" "commonmark" "markdown" "org:erratique"] 29 | homepage: "https://erratique.ch/software/cmarkit" 30 | doc: "https://erratique.ch/software/cmarkit/doc" 31 | bug-reports: "https://github.com/dbuenzli/cmarkit/issues" 32 | depends: [ 33 | "ocaml" {>= "4.14.0"} 34 | "ocamlfind" {build} 35 | "ocamlbuild" {build} 36 | "topkg" {build & >= "1.0.3"} 37 | "uucp" {dev} 38 | "b0" {dev & with-test} 39 | ] 40 | depopts: ["cmdliner"] 41 | conflicts: [ 42 | "cmdliner" {< "1.1.0"} 43 | ] 44 | build: [ 45 | "ocaml" 46 | "pkg/pkg.ml" 47 | "build" 48 | "--dev-pkg" 49 | "%{dev}%" 50 | "--with-cmdliner" 51 | "%{cmdliner:installed}%" 52 | ] 53 | dev-repo: "git+https://erratique.ch/repos/cmarkit.git" 54 | -------------------------------------------------------------------------------- /pkg/META: -------------------------------------------------------------------------------- 1 | description = "CommonMark parser and renderer for OCaml" 2 | version = "%%VERSION_NUM%%" 3 | requires = "" 4 | archive(byte) = "cmarkit.cma" 5 | archive(native) = "cmarkit.cmxa" 6 | plugin(byte) = "cmarkit.cma" 7 | plugin(native) = "cmarkit.cmxs" 8 | exists_if = "cmarkit.cma cmarkit.cmxa" 9 | -------------------------------------------------------------------------------- /pkg/pkg.ml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ocaml 2 | #use "topfind" 3 | #require "topkg" 4 | open Topkg 5 | 6 | let cmdliner = Conf.with_pkg "cmdliner" 7 | 8 | let () = 9 | Pkg.describe "cmarkit" @@ fun c -> 10 | let cmdliner = Conf.value c cmdliner in 11 | let api = ["Cmarkit"; "Cmarkit_renderer"; 12 | "Cmarkit_commonmark"; "Cmarkit_html"; "Cmarkit_latex"] 13 | in 14 | Ok [ Pkg.mllib ~api "src/cmarkit.mllib"; 15 | Pkg.doc "doc/index.mld" ~dst:"odoc-pages/index.mld"; 16 | Pkg.bin ~cond:cmdliner "tool/cmd_main" ~dst:"cmarkit" ] 17 | -------------------------------------------------------------------------------- /src/cmarkit.mllib: -------------------------------------------------------------------------------- 1 | Cmarkit_data 2 | Cmarkit_data_uchar 3 | Cmarkit_data_html 4 | Cmarkit_base 5 | Cmarkit 6 | Cmarkit_renderer 7 | Cmarkit_commonmark 8 | Cmarkit_html 9 | Cmarkit_latex -------------------------------------------------------------------------------- /src/cmarkit_commonmark.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Rendering CommonMark to CommonMark. 7 | 8 | Generates CommonMark. If your document was parsed with 9 | [layout:true], it preserves most of the source layout on output. 10 | This won't be perfect, make sure you understand the 11 | {{!layout}details} before reporting issues. 12 | 13 | See {{!page-index.quick}an example}. 14 | 15 | {b Warning.} Rendering outputs are unstable. They may be tweaked 16 | even between minor versions of the library. *) 17 | 18 | (** {1:rendering Rendering} *) 19 | 20 | val of_doc : Cmarkit.Doc.t -> string 21 | (** [of_doc d] is a CommonMark document for [d]. See {!val-renderer} for 22 | more details. *) 23 | 24 | (** {1:renderer Renderer} *) 25 | 26 | val renderer : unit -> Cmarkit_renderer.t 27 | (** [renderer ()] is the default CommonMark renderer. This renders 28 | the strict CommonMark abstract syntax tree and the supported 29 | Cmarkit {{!Cmarkit.extensions}extensions}. 30 | 31 | The inline, block and document renderers always return 32 | [true]. Unknown block and inline values are rendered by an HTML 33 | comment (as permitted by the CommonMark specification). 34 | 35 | See {{!Cmarkit_renderer.example}this example} to extend or 36 | selectively override the renderer. *) 37 | 38 | (** {1:render Render functions} 39 | 40 | Only useful if you extend the renderer. *) 41 | 42 | (** {2:indents Newlines and indentation} *) 43 | 44 | val newline : Cmarkit_renderer.context -> unit 45 | (** [newline c] starts a new line, except on the first call on [c] which is 46 | a nop. *) 47 | 48 | type indent = 49 | [ `I of int (** Identation by given amount. *) 50 | | `L of int * string * int * Uchar.t option 51 | (** Indent before, list marker, indent after, list item task extension *) 52 | | `Q of int (** Identation followed by a block quote marker and a space *) 53 | | `Fn of int * Cmarkit.Label.t (** Indent before, label (footnote extension)*)] 54 | (** The type for specifying block indentation. *) 55 | 56 | val push_indent : Cmarkit_renderer.context -> indent -> unit 57 | (** [push_indent c i] pushes [i] on the current indentation of [c]. This 58 | does not render anything. *) 59 | 60 | val pop_indent : Cmarkit_renderer.context -> unit 61 | (** [pop_indent c] pops the last indentation pushed on [c]. This 62 | does not render anything. *) 63 | 64 | val indent : Cmarkit_renderer.context -> unit 65 | (** [indent i c] outputs current indentation on [c]. Note that [`L] 66 | and [`Fn] get replaced by an [`I] indent on subsequent lines, that 67 | is the list or foonote marker is output only once. *) 68 | 69 | (** {2:bslash Backslash escaping} *) 70 | 71 | module Char_set : Set.S with type elt = char 72 | (** Sets of US-ASCII characters. *) 73 | 74 | val escaped_string : 75 | ?esc_ctrl:bool -> Cmarkit_renderer.context -> Char_set.t -> string -> unit 76 | (** [escaped_string ?esc_ctrl c cs s] renders [s] on [c] with 77 | characters in [cs] backslash escaped. If [esc_ctrl] is [true] 78 | (default) {{:https://spec.commonmark.org/0.30/#ascii-control-character} 79 | ASCII control characters} are escaped to decimal escapes. *) 80 | 81 | val buffer_add_escaped_string : 82 | ?esc_ctrl:bool -> Buffer.t -> Char_set.t -> string -> unit 83 | (** [buffer_add_escaped_string b cs s] is {!escaped_string} but 84 | appends to a buffer value. *) 85 | 86 | val escaped_text : Cmarkit_renderer.context -> string -> unit 87 | (** [escaped_text c s] renders [s] on [c] trying to be smart about escaping 88 | Commonmark structural symbols for {!Cmarkit.Inline.extension-Text} inlines. 89 | We assume text can be anywhere in a sequence of inlines and in particular 90 | that it can start a line. This function also takes into account 91 | the existence of the {{!Cmarkit.extensions}extensions}. 92 | 93 | As such we escape: 94 | 95 | {ul 96 | {- These block markers: [-] [+] [_] [=] only if present at [s.[0]].} 97 | {- Only the first of runs of them: [#] [`]} 98 | {- Only the first of a run longer than 1: [~] 99 | ({{!Cmarkit.ext_strikethrough}strikethrough extension}).} 100 | {- [&] if followed by an US-ASCII letter or [#].} 101 | {- [!] if it is the last character of [s].} 102 | {- [.] or [)] only if preceeded by at most 9 digits to the start of text.} 103 | {- Everywhere, [*] [_] [\ ] [<] [>] [\[] [\]], 104 | {{:https://spec.commonmark.org/0.30/#ascii-control-character} 105 | ASCII control characters}, [$] ({{!Cmarkit.ext_math_inline}inline math 106 | extension}), [|] ({{!Cmarkit.ext_tables}table extension}) }} *) 107 | 108 | val buffer_add_escaped_text : Buffer.t -> string -> unit 109 | (** [buffer_add_escaped_text b s] is {!escaped_text} but appends to 110 | a buffer value. *) 111 | 112 | (** {1:layout Source layout preservation} 113 | 114 | The abstract syntax tree has a few block cases and data fields to 115 | represent the source document layout. This allows to update 116 | CommonMark documents without normalizing them too much when they 117 | are {{!Cmarkit.Doc.of_string}parsed} with [layout:true]. 118 | 119 | To keep things reasonably simple a few things are {b not} attempted like: 120 | 121 | {ol 122 | {- Preserving entities and character references.} 123 | {- Preserving the exact line by line indentation layout of container 124 | blocks.} 125 | {- Preserving lazy continuation lines.} 126 | {- Keeping track of used newlines except for the first one.} 127 | {- Preserving layout source location information when it can be 128 | reconstructed from the document data source location.}} 129 | 130 | In general we try to keep the following desirable properties 131 | for the abstract syntax tree definition: 132 | 133 | {ol 134 | {- Layout information should not interfere with document data or 135 | be affected by it. Otherwise data updates also needs to update 136 | the layout data, which is error prone and unconvenient.} 137 | {- Abstract syntax trees resulting from the source document, from 138 | renders of the source document parsed with or without 139 | [layout:tree] should all render to the same HTML.}} 140 | 141 | In practice CommonMark being not context free point 1. is not 142 | always achieved. In particular in {!Cmarkit.Inline.extension-Code_span} the 143 | number of delimiting backticks depends on the code content 144 | ({!Cmarkit.Inline.Code_span.of_string}, computes that for you). 145 | 146 | The renderer performs almost no checks on the layout data. You 147 | should be careful if you fill these yourself since you could 148 | generate CommonMark that will be misinterpreted. Layout 149 | data of pristine nodes coming out of {!Cmarkit.Doc.of_string}, created 150 | with the {!Cmarkit.Inline} and {!Cmarkit.Block} constructors 151 | should not need your attention (respect their input constraints 152 | though). *) 153 | 154 | (** {2:rendering_class Classifying renderings} 155 | 156 | We say that a CommonMark render: 157 | {ul 158 | {- is {e correct}, if the result renders the same HTML 159 | as the source document. This can be checked with the 160 | [cmarkit] tool included in the distribution: 161 | {[ 162 | cmarkit commonmark --html-diff mydoc.md 163 | ]} 164 | If a difference shows up, the rendering is said to be {e incorrect}.} 165 | {- {e round trips}, if the result is byte-for-byte equal to the 166 | source document. This can be checked with the [cmarkit] tool 167 | included in the distribution: 168 | {[ 169 | cmarkit commonmark --diff mydoc.md 170 | ]} 171 | If a difference shows up, the rendering does not round trip but 172 | it may still be correct.}} *) 173 | 174 | (** {2:known_diffs Known correct differences} 175 | 176 | In general lack of round trip is due to: 177 | 178 | {ul 179 | {- Loss of layout on input (see above).} 180 | {- Eager escaping of CommonMark delimiters (the escape strategy 181 | is {{!escaped_text}here}).} 182 | {- Churn around blank lines which can be part of blocks without 183 | adhering to their structural convention.}} 184 | 185 | Please do not report issues for differences that are due to the 186 | following: 187 | 188 | {ol 189 | {- Source US-ASCII control characters in textual data render as decimal 190 | character references in the output.} 191 | {- Source entity and character references are lost during parsing and 192 | thus replaced by their definition in the output.} 193 | {- Source tab stop advances may be replaced by spaces in the output.} 194 | {- Source escaped characters may end up unescaped in the output.} 195 | {- Source unescaped characters may end up escaped in the output.} 196 | {- Source lazy continuation lines are made part of blocks in the output.} 197 | {- Source indented blank lines following indented code blocks 198 | lose four spaces of indentation (as per specification these are not 199 | part of the block).} 200 | {- Source unindented blank lines in indented code blocks are indented 201 | in the output.} 202 | {- Source fenced code block indentation is retained from the opening 203 | fence and used for the following lines in the output.} 204 | {- Source block quote indentation is retained from the first line 205 | and used for the following lines in the output. The optional space 206 | following the quotation mark ['>'] is made mandatory. } 207 | {- Source list item indentation is regularized, in particular blank lines 208 | will indent.} 209 | {- Source list item that start with an empty line get a space after 210 | their marker.} 211 | {- The newline used in the output is the one found in the rendered 212 | {!Cmarkit.Doc.t} value.}} 213 | 214 | {e Simple} and {e implemented} round trip improvements to the 215 | renderer are welcome. 216 | 217 | {2:known_incorrect Known incorrect renderings} 218 | 219 | Please do not report issues incorrect renderings that are due to the 220 | following (and unlikely to be fixed): 221 | 222 | {ol 223 | {- Use of entities and character references around structural 224 | CommonMark symbols can make things go wrong. These get resolved 225 | after inline parsing because they can't be used to stand for 226 | structural CommonMark symbols, however once they have been resolved they 227 | can interact with parsing. Here is an example: 228 | {[ 229 | *emph * 230 | ]} 231 | It parses as emphasis. But if we render it to CommonMark 232 | non-breaking space renders as is and we get: 233 | {[ 234 | *emph * 235 | ]} 236 | which no longer parses as emphasis. 237 | 238 | Note in this particular case it is possible to do something 239 | about it by being smarter about the context when escaping. However 240 | there's a trade-off between renderer complexity and the (conjectured) 241 | paucity of these cases.} 242 | } 243 | 244 | Otherwise, if you spot an incorrect rendering please report a minimal 245 | reproduction case. 246 | 247 | {e Simple} and {e implemented} round trip improvements to the 248 | renderer are welcome. 249 | *) 250 | -------------------------------------------------------------------------------- /src/cmarkit_data.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Unicode character data 7 | 8 | XXX. For now we kept that simple and use the Stdlib's Set and 9 | Maps. Bring in Uucp's tmapbool and tmap if that turns out to be too 10 | costly in space or time. *) 11 | 12 | module Uset = struct 13 | include Set.Make (Uchar) 14 | let of_array = 15 | let add acc u = add (Uchar.unsafe_of_int u) acc in 16 | Array.fold_left add empty 17 | end 18 | 19 | module Umap = struct 20 | include Map.Make (Uchar) 21 | let of_array = 22 | let add acc (u, f) = add (Uchar.unsafe_of_int u) f acc in 23 | Array.fold_left add empty 24 | end 25 | 26 | let whitespace_uset = Uset.of_array Cmarkit_data_uchar.whitespace 27 | let punctuation_uset = Uset.of_array Cmarkit_data_uchar.punctuation 28 | let case_fold_umap = Umap.of_array Cmarkit_data_uchar.case_fold 29 | 30 | let unicode_version = Cmarkit_data_uchar.unicode_version 31 | let is_unicode_whitespace u = Uset.mem u whitespace_uset 32 | let is_unicode_punctuation u = Uset.mem u punctuation_uset 33 | let unicode_case_fold u = Umap.find_opt u case_fold_umap 34 | 35 | (* HTML entity data. *) 36 | 37 | module String_map = Map.Make (String) 38 | 39 | let html_entity_smap = 40 | let add acc (entity, rep) = String_map.add entity rep acc in 41 | Array.fold_left add String_map.empty Cmarkit_data_html.entities 42 | 43 | let html_entity e = String_map.find_opt e html_entity_smap 44 | -------------------------------------------------------------------------------- /src/cmarkit_data.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Data needed for CommonMark parsing. *) 7 | 8 | (** {1:unicode Unicode data} *) 9 | 10 | val unicode_version : string 11 | (** [unicode_version] is the supported Unicode version. *) 12 | 13 | val is_unicode_whitespace : Uchar.t -> bool 14 | (** [is_unicode_whitespace u] is [true] iff 15 | [u] is a CommonMark 16 | {{:https://spec.commonmark.org/current/#unicode-whitespace-character} 17 | Unicode whitespace character}. *) 18 | 19 | val is_unicode_punctuation : Uchar.t -> bool 20 | (** [is_unicode_punctuation u] is [true] iff 21 | [u] is a CommonMark 22 | {{:https://spec.commonmark.org/current/#unicode-punctuation-character} 23 | Unicode punctuation character}. *) 24 | 25 | val unicode_case_fold : Uchar.t -> string option 26 | (** [unicode_case_fold u] is the UTF-8 encoding of [u]'s Unicode 27 | {{:http://www.unicode.org/reports/tr44/#Case_Folding}case fold} or 28 | [None] if [u] case folds to itself. *) 29 | 30 | (** {1:html HTML data} *) 31 | 32 | val html_entity : string -> string option 33 | (** [html_entity e] is the UTF-8 data for of the HTML entity {e name} 34 | (without [&] and [;]) [e]. *) 35 | -------------------------------------------------------------------------------- /src/cmarkit_html.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Rendering CommonMark to HTML. 7 | 8 | Generates HTML fragments, consult the 9 | {{!integration}integration notes} for requirements on the webpage. 10 | 11 | See {{!page-index.quick}a quick example} and 12 | {{!page_frame}another one}. 13 | 14 | {b Warning.} Rendering outputs are unstable, they may be tweaked 15 | even between minor versions of the library. *) 16 | 17 | (** {1:rendering Rendering} *) 18 | 19 | val of_doc : ?backend_blocks:bool -> safe:bool -> Cmarkit.Doc.t -> string 20 | (** [of_doc ~safe d] is an HTML fragment for [d]. See {!renderer} 21 | for more details and documentation about rendering options. *) 22 | 23 | (** {1:renderers Renderers} *) 24 | 25 | val renderer : ?backend_blocks:bool -> safe:bool -> unit -> Cmarkit_renderer.t 26 | (** [renderer ~safe ()] is the default HTML renderer. This renders the 27 | strict CommonMark abstract syntax tree and the supported Cmarkit 28 | {{!Cmarkit.extensions}extensions}. 29 | 30 | The inline, block and document renderers always return 31 | [true]. Unknown block and inline values are rendered by an HTML 32 | comment. 33 | 34 | The following options are available: 35 | 36 | {ul 37 | {- [safe], if [true] {{!Cmarkit.Block.extension-Html_block}HTML blocks} and 38 | {{!Cmarkit.Inline.extension-Raw_html}raw HTML inlines} are discarded and 39 | replaced by an HTML comment in the output. Besides the URLs of 40 | autolinks, links and images that satisfy 41 | {!Cmarkit.Inline.Link.is_unsafe} are replaced by the empty string. 42 | 43 | Using safe renderings is a good first step at preventing 44 | {{:https://en.wikipedia.org/wiki/Cross-site_scripting}XSS} from 45 | untrusted user inputs but you should rather post-process rendering 46 | outputs with a dedicated HTML sanitizer.} 47 | {- [backend_blocks], if [true], code blocks with language [=html] 48 | are written verbatim in the output (iff [safe] is [true]) and 49 | any other code block whose langage starts with [=] is 50 | dropped. Defaults to [false].}} 51 | 52 | See {{!Cmarkit_renderer.example}this example} to extend or 53 | selectively override the renderer. *) 54 | 55 | val xhtml_renderer : 56 | ?backend_blocks:bool -> safe:bool -> unit -> Cmarkit_renderer.t 57 | (** [xhtml_renderer] is like {!val-renderer} but explicitely closes 58 | empty tags to possibly make the output valid XML. Note that it 59 | still renders HTML blocks and inline raw HTML unless {!safe} is 60 | [true] (which also suppresses some URLs). 61 | 62 | See {{!Cmarkit_renderer.example}this example} to extend or 63 | selectively override the renderer. *) 64 | 65 | (** {1:render Render functions} 66 | 67 | Only useful if you extend the renderer. *) 68 | 69 | val safe : Cmarkit_renderer.context -> bool 70 | (** [safe c] is [true] if a safe rendering is requested. 71 | See {!renderer} for more information. *) 72 | 73 | val html_escaped_uchar : Cmarkit_renderer.context -> Uchar.t -> unit 74 | (** [html_escaped_uchar c u] renders the UTF-8 encoding of [u] on [c] 75 | with HTML markup delimiters [<] [>] [&] and ["] escaped 76 | to HTML entities (Single quotes ['] are not escaped use ["] to delimit your 77 | attributes). This also renders U+0000 to {!Uchar.rep}. *) 78 | 79 | val buffer_add_html_escaped_uchar : Buffer.t -> Uchar.t -> unit 80 | (** [buffer_add_html_escaped_uchar] is {!html_escaped_uchar} but appends 81 | to a buffer value. *) 82 | 83 | val html_escaped_string : Cmarkit_renderer.context -> string -> unit 84 | (** [html_escaped_string c s] renders string [s] on [c] with HTML 85 | markup delimiters [<], [>], [&], and ["] escaped to HTML 86 | entities (Single quotes ['] are not escaped, use ["] to delimit your 87 | attributes). *) 88 | 89 | val buffer_add_html_escaped_string : Buffer.t -> string -> unit 90 | (** [buffer_add_html_escaped_string] is {!html_escaped_string} but appends 91 | to a buffer value. *) 92 | 93 | val pct_encoded_string : Cmarkit_renderer.context -> string -> unit 94 | (** [pct_encoded_string c s] renders string [s] on [c] with everything 95 | percent encoded except [%] and the 96 | {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.3} 97 | [unreserved]}, 98 | {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2} 99 | [sub-delims]} 100 | and the {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2} 101 | [gen-delims]} 102 | URI characters except brackets [\[] and [\]] (to match the [cmark] tool). 103 | 104 | In other words only characters [%] [a-z] [A-Z] [0-9] [-] [.] [_] [~] [!] 105 | [$] [&] ['] [(] [)] [*] [+] [,] [;] [=] [:] [/] [?] [#] [@] 106 | are not percent-encoded. 107 | 108 | {b Warning.} The function also replaces both [&] and ['] by their 109 | corresponding HTML entities, so you can't use this in a context 110 | that doesn't allow entities. Besides this assumes [s] may already 111 | have percent encoded bits so it doesn't percent encode [%], as such you 112 | can't use this as a general percent encode function. *) 113 | 114 | val buffer_add_pct_encoded_string : Buffer.t -> string -> unit 115 | (** [buffer_add_pct_encoded_string b s] is {!pct_encoded_string} but 116 | appends to a buffer value. *) 117 | 118 | (** {1:integration HTML integration notes} 119 | 120 | {2:code_blocks Code blocks} 121 | 122 | If a language [lang] can be extracted from the info string of a 123 | code block with 124 | {!Cmarkit.Block.Code_block.language_of_info_string}, a 125 | [language-lang] class is added to the corresponding [code] 126 | element. If you want to highlight the syntax, adding 127 | {{:https://highlightjs.org/}highlight.js} to your page is an 128 | option. 129 | 130 | {2:ids Heading identifiers} 131 | 132 | Headings identifiers and anchors are added to the output whenever 133 | {!Cmarkit.Block.Heading.val-id} holds a value. If the identifier 134 | already exists it is made unique by appending ["-"] and the first 135 | number starting from 1 that makes it unique. 136 | 137 | {2:math Maths} 138 | 139 | If your document has {!Cmarkit.Inline.extension-Ext_math_span} 140 | inlines or {!Cmarkit.Block.extension-Ext_math_block} blocks, the 141 | default renderer outputs them in [\(], [\)] and 142 | [\\[], [\\]] delimiters. You should add 143 | {{:https://katex.org/}K{^A}T{_E}X} or 144 | {{:https://www.mathjax.org/}MathJax} in your page to let these 145 | bits be rendered by the typography they deserve. 146 | 147 | {2:page_frame Page frame} 148 | 149 | The default renderers only generate HTML fragments. You may 150 | want to add a page frame. For example: 151 | {[ 152 | let html_doc_of_md ?(lang = "en") ~title ~safe md = 153 | let doc = Cmarkit.Doc.of_string md in 154 | let r = Cmarkit_html.renderer ~safe () in 155 | let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in 156 | let buffer_add_title = Cmarkit_html.buffer_add_html_escaped_string in 157 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 158 | {| 159 | 160 | 161 | 162 | %a 163 | 164 | 165 | %a 166 | |} 167 | lang buffer_add_title title buffer_add_doc doc 168 | ]} 169 | *) 170 | -------------------------------------------------------------------------------- /src/cmarkit_latex.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Rendering CommonMark to L{^A}T{_E}X. 7 | 8 | Generates L{^A}T{_E}X fragments, consult the {{!integration} 9 | integration notes} for requirements on the document. 10 | 11 | See {{!page-index.quick}a quick example} and {{!doc_frame}another one}. 12 | 13 | {b Warning.} Rendering outputs are unstable, they may be tweaked even 14 | between minor versions of the library. *) 15 | 16 | (** {1:rendering Rendering} *) 17 | 18 | type heading_level = 19 | | Part 20 | | Chapter 21 | | Section 22 | | Subsection (** *) 23 | (** The type for main L{^A}T{_E}X headings levels. *) 24 | 25 | val of_doc : 26 | ?backend_blocks:bool -> ?first_heading_level:heading_level -> 27 | Cmarkit.Doc.t -> string 28 | (** [of_doc d] is a L{^A}T{_E}X fragment for [d]. See {!val-renderer} 29 | for more details and documentation about rendering options. *) 30 | 31 | (** {1:renderer Renderer} *) 32 | 33 | val renderer : 34 | ?backend_blocks:bool -> ?first_heading_level:heading_level -> unit -> 35 | Cmarkit_renderer.t 36 | (** [renderer] is a default L{^A}T{_E}X renderer. This renders 37 | the strict CommonMark abstract syntax tree and the supported 38 | Cmarkit {{!Cmarkit.extensions}extensions}. 39 | 40 | The inline, block and document renderers always return 41 | [true]. Unknown block and inline values are rendered by a 42 | L{^A}T{_E}X comment. 43 | 44 | The following options are available: 45 | 46 | {ul 47 | {- [backend_blocks], if [true], code blocks with language [=latex] 48 | are written verbatim in the output and any other code block whose 49 | langage starts with [=] is dropped. Defaults to [false].} 50 | {- [first_heading_level], the L{^A}T{_E}X heading level to use 51 | for the first CommonMark heading level. Defaults to [Section].}} 52 | 53 | See {{!Cmarkit_renderer.example}this example} to extend or 54 | selectively override the renderer. *) 55 | 56 | (** {1:render Render functions} 57 | 58 | Only useful if you extend the renderer. *) 59 | 60 | val newline : Cmarkit_renderer.context -> unit 61 | (** [newline c] starts a new line. Except on the first call on [c] which is 62 | a nop. *) 63 | 64 | val latex_escaped_uchar : Cmarkit_renderer.context -> Uchar.t -> unit 65 | (** [latex_escaped_uchar c u] renders the UTF-8 encoding of [u] on [c] 66 | propertly escaped for L{^A}T{_E}X. That is the characters 67 | [&] [%] [$] [#] [_] [{] [}] [~] [^] [\ ] 68 | are escaped. This also renders U+0000 to {!Uchar.rep}. *) 69 | 70 | val buffer_add_latex_escaped_uchar : Buffer.t -> Uchar.t -> unit 71 | (** [buffer_add_latex_escaped_uchar] is {!latex_escaped_uchar} but appends 72 | to a buffer value. *) 73 | 74 | val latex_escaped_string : Cmarkit_renderer.context -> string -> unit 75 | (** [latex_escaped_string c s] renders string [s] on [c] with 76 | characters [&] [%] [$] [#] [_] [{] [}] [~] [^] [\ ] escaped. This 77 | also escapes U+0000 to {!Uchar.rep}. *) 78 | 79 | val buffer_add_latex_escaped_string : Buffer.t -> string -> unit 80 | (** [buffer_add_latex_escaped_string] is {!latex_escaped_string} 81 | but acts on a buffer value. *) 82 | 83 | (** {1:integration L{^A}T{_E}X integration notes} 84 | 85 | Along with the built-in [graphicx] package, the following 86 | L{^A}T{_E}X packages are needed to use the outputs of the default 87 | renderer: 88 | {v 89 | tlmgr install enumitem listings hyperref # Required 90 | tlmgr install ulem # Strikethrough extension 91 | tlmgr install bera fontspec # Optional 92 | v} 93 | This means you should have at least the following in your 94 | document preamble: 95 | {v 96 | % Required 97 | \usepackage{graphicx} 98 | \usepackage{enumitem} 99 | \usepackage{listings} 100 | \usepackage{hyperref} 101 | \usepackage[normalem]{ulem} % Strikethrough extension 102 | 103 | % Optional 104 | \usepackage[scaled=0.8]{beramono} % A font for code blocks 105 | \usepackage{fontspec} % Supports more Unicode characters 106 | v} 107 | 108 | See the sections below for more details. 109 | 110 | {2:char_encoding Character encoding} 111 | 112 | The output is UTF-8 encoded. 113 | {{:https://tug.org/TUGboat/tb39-1/tb121ltnews28.pdf}It became} the 114 | the default encoding for L{^A}T{_E}X in 2018. But if you are using 115 | an older version a [\usepackage[utf8]{inputenc}] may be needed. 116 | 117 | Using [xelatex] rather than [pdflatex] will not get stuck on missing 118 | glyphs. 119 | 120 | {2:links Autolinks and links} 121 | 122 | The {{:https://www.ctan.org/pkg/hyperref}[hyperref]} package is 123 | used to render links ([\href]) and autolink ([\url]). Link 124 | destination starting with a [#] are assumed to refer to 125 | {{!labels}section labels} and are rendered using the [\hyperref] 126 | macro, with the [#] chopped. 127 | 128 | {2:images Images} 129 | 130 | Images are inserted using the 131 | {{:https://ctan.org/pkg/graphicx}graphicx}'s package. Only 132 | images with relative URLs are supported, those that point 133 | to external ressources on the www are turned into links. 134 | 135 | {2:labels Section labels} 136 | 137 | Section labels are added to the output whenever 138 | {!Cmarkit.Block.Heading.val-id} holds a value. If the identifier 139 | already exists it is made unique by appending ["-"] and the first 140 | number starting from 1 that makes it unique. Also the character 141 | [_] seems problematic in labels even when escaped, we map it to [-] 142 | (if you know any better get in touch). 143 | 144 | {2:lists Lists} 145 | 146 | To support the starting point of ordereded lists without having to 147 | fiddle with [enumi] counters, the 148 | {{:https://www.ctan.org/pkg/enumitem}[enumitem]} package is used. 149 | 150 | {2:code_blocks Code blocks} 151 | 152 | If a language [lang] can be 153 | {{!Cmarkit.Block.Code_block.language_of_info_string}extracted} 154 | from a code block info string, the 155 | {{:https://www.ctan.org/pkg/listings}[listings]} package is used 156 | with the corresponding language in a [lstlisting] environment. 157 | Otherwise the built-in [verbatim] environment is used. 158 | 159 | Note that the [listings] package has no definition for the [ocaml] 160 | language, the default renderings are a bit subpar and 161 | break on character literals with double quotes. This improves things: 162 | {v 163 | \lstset{ 164 | columns=[c]fixed, 165 | basicstyle=\small\ttfamily, 166 | keywordstyle=\bfseries, 167 | upquote=true, 168 | commentstyle=\slshape, 169 | breaklines=true, 170 | showstringspaces=false} 171 | 172 | \lstdefinelanguage{ocaml}{language=[objective]caml, 173 | % Fixes double quotes in char literals 174 | literate={'"'}{\textquotesingle "\textquotesingle}3 175 | {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, 176 | } 177 | v} 178 | 179 | {2:doc_frame Document frame} 180 | 181 | The default renderer only generates L{^A}T{_E}X fragments. You 182 | may want to add a document frame. For example: 183 | {[ 184 | let latex_doc_of_md ?(title = "") md = 185 | let doc = Cmarkit.Doc.of_string md in 186 | let r = Cmarkit_latex.renderer () in 187 | let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in 188 | let buffer_add_title = Cmarkit_latex.buffer_add_latex_escaped_string in 189 | let maketitle = if title = "" then "" else {|\maketitle|} in 190 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 191 | {|\documentclass{article} 192 | 193 | \usepackage{graphicx} 194 | \usepackage{enumitem} 195 | \usepackage{listings} 196 | \usepackage{hyperref} 197 | \usepackage[normalem]{ulem} 198 | \usepackage[scaled=0.8]{beramono} 199 | \usepackage{fontspec} 200 | 201 | \lstset{ 202 | columns=[c]fixed, 203 | basicstyle=\small\ttfamily, 204 | keywordstyle=\bfseries, 205 | upquote=true, 206 | commentstyle=\slshape, 207 | breaklines=true, 208 | showstringspaces=false} 209 | 210 | \lstdefinelanguage{ocaml}{language=[objective]caml, 211 | literate={'"'}{\textquotesingle "\textquotesingle}3 212 | {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, 213 | } 214 | 215 | \title{%a} 216 | \begin{document} 217 | %s 218 | %a 219 | \end{document}|} buffer_add_title title maketitle buffer_add_doc doc 220 | ]} 221 | 222 | Ignore this: ". 223 | *) 224 | -------------------------------------------------------------------------------- /src/cmarkit_renderer.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Renderers *) 7 | 8 | module Dict = Cmarkit_base.Dict 9 | 10 | type t = 11 | { init_context : context -> Cmarkit.Doc.t -> unit; 12 | inline : inline; 13 | block : block; 14 | doc : doc; } 15 | 16 | and context = 17 | { renderer : t; 18 | mutable state : Dict.t; 19 | b : Buffer.t; 20 | mutable doc : Cmarkit.Doc.t } 21 | 22 | and inline = context -> Cmarkit.Inline.t -> bool 23 | and block = context -> Cmarkit.Block.t -> bool 24 | and doc = context -> Cmarkit.Doc.t -> bool 25 | 26 | let nop _ _ = () 27 | let none _ _ = false 28 | 29 | let make 30 | ?(init_context = nop) ?(inline = none) ?(block = none) ?(doc = none) () 31 | = 32 | { init_context; inline; block; doc } 33 | 34 | let compose g f = 35 | let init_context c d = g.init_context c d; f.init_context c d in 36 | let block c b = f.block c b || g.block c b in 37 | let inline c i = f.inline c i || g.inline c i in 38 | let doc c d = f.doc c d || g.doc c d in 39 | { init_context; inline; block; doc } 40 | 41 | let init_context r = r.init_context 42 | let inline r = r.inline 43 | let block r = r.block 44 | let doc r = r.doc 45 | 46 | module Context = struct 47 | type t = context 48 | let make renderer b = 49 | { renderer; b; state = Dict.empty; doc = Cmarkit.Doc.empty } 50 | 51 | let buffer c = c.b 52 | let renderer c = c.renderer 53 | let get_doc (c : context) = c.doc 54 | let get_defs (c : context) = Cmarkit.Doc.defs c.doc 55 | 56 | module State = struct 57 | type 'a t = 'a Dict.key 58 | let make = Dict.key 59 | let find c st = Dict.find st c.state 60 | let get c st = Option.get (Dict.find st c.state) 61 | let set c st = function 62 | | None -> c.state <- Dict.remove st c.state 63 | | Some s -> c.state <- Dict.add st s c.state 64 | end 65 | 66 | let init c d = c.renderer.init_context c d 67 | 68 | let invalid_inline _ = invalid_arg "Unknown Cmarkit.Inline.t case" 69 | let invalid_block _ = invalid_arg "Unknown Cmarkit.Block.t case" 70 | let unhandled_doc _ = invalid_arg "Unhandled Cmarkit.Doc.t" 71 | 72 | let byte r c = Buffer.add_char r.b c 73 | let utf_8_uchar r u = Buffer.add_utf_8_uchar r.b u 74 | let string c s = Buffer.add_string c.b s 75 | let inline c i = ignore (c.renderer.inline c i || invalid_inline i) 76 | let block c b = ignore (c.renderer.block c b || invalid_block b) 77 | let doc (c : context) d = 78 | c.doc <- d; init c d; 79 | ignore (c.renderer.doc c d || unhandled_doc d); 80 | c.doc <- Cmarkit.Doc.empty 81 | end 82 | 83 | let doc_to_string r d = 84 | let b = Buffer.create 1024 in 85 | let c = Context.make r b in 86 | Context.doc c d; Buffer.contents b 87 | 88 | let buffer_add_doc r b d = Context.doc (Context.make r b) d 89 | -------------------------------------------------------------------------------- /src/cmarkit_renderer.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Renderer abstraction. 7 | 8 | Stateful renderer abstraction to render documents in {!Stdlib.Buffer.t} 9 | values. 10 | 11 | {b Note.} This is a low-level interface. For quick and standard 12 | renderings see {!Cmarkit_html.of_doc}, {!Cmarkit_latex.of_doc} and 13 | {!Cmarkit_commonmark.of_doc}. If you want to extend them, 14 | see {{!example}this example}. *) 15 | 16 | (** {1:rendering Rendering} *) 17 | 18 | type t 19 | (** The type for renderers. *) 20 | 21 | val doc_to_string : t -> Cmarkit.Doc.t -> string 22 | (** [doc_to_string r d] renders document [d] to a string using renderer [r]. *) 23 | 24 | val buffer_add_doc : t -> Buffer.t -> Cmarkit.Doc.t -> unit 25 | (** [buffer_add_doc r b d] renders document [d] on buffer [b] using 26 | renderer [r]. *) 27 | 28 | (** {1:renderers Renderers} *) 29 | 30 | type context 31 | (** The type for rendering contexts, holds a renderer, a 32 | {!Stdlib.Buffer.t} value to act on and rendering state. *) 33 | 34 | type inline = context -> Cmarkit.Inline.t -> bool 35 | (** The type for inline renderers. 36 | 37 | Return [false] if you are not interested in rendering the given 38 | inline. Use {!Context.inline} and {!Context.block} on the given 39 | context if you need to invoke the renderer recursively. *) 40 | 41 | type block = context -> Cmarkit.Block.t -> bool 42 | (** The type for block renderers. 43 | 44 | Return [false] if you are not interested in rendering the given 45 | block. Use {!Context.inline} and {!Context.block} with the given 46 | context if you need to invoke the renderer recursively. *) 47 | 48 | type doc = context -> Cmarkit.Doc.t -> bool 49 | (** The type for document renderers. 50 | 51 | Return [false] if you are not interested in rendering the given 52 | document. Use {!Context.inline}, {!Context.block} and {!Context.doc} 53 | with the given context if you need to invoke the renderer recursively. *) 54 | 55 | val make : 56 | ?init_context:(context -> Cmarkit.Doc.t -> unit) -> 57 | ?inline:inline -> ?block:block -> ?doc:doc -> unit -> t 58 | (** [make ?init_context ?inline ?block ?doc ()] is a renderer using 59 | [inline], [block], [doc] to render documents. They all default to 60 | [(fun _ _ -> false)], which means that by default they defer to 61 | next renderer (see {!compose}). 62 | 63 | [init_context] is used to initialize the context for the renderer 64 | before a document render. It defaults to [fun _ _ -> ()]. *) 65 | 66 | val compose : t -> t -> t 67 | (** [compose g f] renders first with [f] and if a renderer returns [false], 68 | falls back on its counterpart in [g]. 69 | 70 | The {!init_context} of the result calls [g]'s initialization 71 | context function first, followed by the one of [f]. This means 72 | [f]'s initialization function can assume the context is already 73 | setup for [g]. *) 74 | 75 | (** {2:accessors Accessors} 76 | 77 | Normally you should not need these but you may want to peek 78 | into other renderers. *) 79 | 80 | val init_context : t -> (context -> Cmarkit.Doc.t -> unit) 81 | (** [init_context r] is the context initalization function for [r]. *) 82 | 83 | val inline : t -> inline 84 | (** [inline r] is the inline renderer of [r]. *) 85 | 86 | val block : t -> block 87 | (** [block_renderer r] is the block renderer of [r]. *) 88 | 89 | val doc : t -> doc 90 | (** [doc_renderer r] is the documentation renderer of [r]. *) 91 | 92 | (** {1:context Rendering contexts} *) 93 | 94 | (** Rendering contexts. *) 95 | module Context : sig 96 | 97 | (** {1:contexts Contexts} *) 98 | 99 | type renderer := t 100 | 101 | type t = context 102 | (** The type for rendering contexts. *) 103 | 104 | val make : renderer -> Buffer.t -> t 105 | (** [make r b] is a context using renderer [r] to render documents 106 | on buffer [b]. 107 | 108 | The renderer [r] must be able to handle any inline, block and 109 | document values (i.e. its renderers should always return [true]) 110 | otherwise [Invalid_argument] may raise on renders. 111 | 112 | This means the last renderer you {{!compose}compose with} should 113 | always have catch all cases returning [true]; after possibly 114 | indicating in the output that something was missed. The built-in 115 | renderers {!Cmarkit_commonmark.val-renderer}, 116 | {!Cmarkit_html.val-renderer} and {!Cmarkit_latex.val-renderer} 117 | do have these catch all cases. *) 118 | 119 | val renderer : t -> renderer 120 | (** [renderer c] is the renderer of [c]. *) 121 | 122 | val buffer : t -> Buffer.t 123 | (** [buffer c] is the buffer of [c]. *) 124 | 125 | val get_doc : t -> Cmarkit.Doc.t 126 | (** [get_doc c] is the document being rendered. *) 127 | 128 | val get_defs : t -> Cmarkit.Label.defs 129 | (** [get_defs c] is [Doc.defs (get_doc c)]. *) 130 | 131 | (** Custom context state. *) 132 | module State : sig 133 | 134 | type 'a t 135 | (** The type for custom state of type ['a]. *) 136 | 137 | val make : unit -> 'a t 138 | (** [make ()] is a new bit of context state. *) 139 | 140 | val find : context -> 'a t -> 'a option 141 | (** [find c state] is the state [state] of context [c], if any. *) 142 | 143 | val get : context -> 'a t -> 'a 144 | (** [get c state] is the state [state] of context [c], raises 145 | [Invalid_argument] if there is no state [state] in [c]. *) 146 | 147 | val set : context -> 'a t -> 'a option -> unit 148 | (** [set c state s] sets the state [state] of [c] to [s]. [state] is 149 | cleared in [c] if [s] is [None]. *) 150 | end 151 | 152 | val init : t -> Cmarkit.Doc.t -> unit 153 | (** [init c] calls the initialisation function of [c]'s 154 | {!val-renderer}. Note, this is done automatically by {!val-doc}. *) 155 | 156 | (** {1:render Rendering functions} 157 | 158 | These function append data to the {!buffer} of the context. For more 159 | specialized rendering functions, see the corresponding rendering 160 | backends. *) 161 | 162 | val byte : t -> char -> unit 163 | (** [byte c b] renders byte [b] verbatim on [c]. *) 164 | 165 | val utf_8_uchar : t -> Uchar.t -> unit 166 | (** [utf_8_uchar c u] renders the UTF-8 encoding of [u] on [c]. *) 167 | 168 | val string : t -> string -> unit 169 | (** [string c s] renders string [s] verbatim on [c]. *) 170 | 171 | val inline : t -> Cmarkit.Inline.t -> unit 172 | (** [inline c i] renders inline [i] on [c]. This invokes the 173 | {{!compose}composition} of inline renderers of [c]. *) 174 | 175 | val block : t -> Cmarkit.Block.t -> unit 176 | (** [block c b] renders block [b] on [c]. This invokes the 177 | {{!compose}composition} of block renderers of [c]. *) 178 | 179 | val doc : t -> Cmarkit.Doc.t -> unit 180 | (** [doc c d] initializes [c] with {!init} and renders document [d] on [c]. 181 | This invokes the {{!compose}composition} of document renderers of [c]. *) 182 | end 183 | 184 | (** {1:example Extending renderers} 185 | 186 | This example extends the {!Cmarkit_html.val-renderer} but it 187 | applies {e mutatis mutandis} to the other backend document 188 | renderers. 189 | 190 | Let's assume you want to: 191 | 192 | {ul 193 | {- Extend the abstract syntax tree with a [Doc] block which 194 | allows to splice documents in another one (note that 195 | splicing is already built-in via the {!Cmarkit.Block.extension-Blocks} 196 | block case).} 197 | {- Change the rendering of {!Cmarkit.Inline.extension-Image} inlines to 198 | render HTML [video] or [audio] elements depending on the link's 199 | destination suffix.} 200 | {- For the rest use the built-in {!Cmarkit_html.renderer} renderer 201 | as it exists.}} 202 | 203 | This boils down to: 204 | 205 | {ol 206 | {- Add a new case to the abstract syntax tree.} 207 | {- Define a [custom_html] renderer which treats 208 | {!Cmarkit.Inline.Image} and the new [Doc] case the way we 209 | see it fit and return [false] otherwise to use the built-in renderer. } 210 | {- {!compose} [custom_html] with {!Cmarkit_html.val-renderer}}} 211 | 212 | {[ 213 | type Cmarkit.Block.t += Doc of Cmarkit.Doc.t (* 1 *) 214 | 215 | let media_link c l = 216 | let has_ext s ext = String.ends_with ~suffix:ext s in 217 | let is_video s = List.exists (has_ext s) [".mp4"; ".webm"] in 218 | let is_audio s = List.exists (has_ext s) [".mp3"; ".flac"] in 219 | let defs = Cmarkit_renderer.Context.get_defs c in 220 | match Cmarkit.Inline.Link.reference_definition defs l with 221 | | Some Cmarkit.Link_definition.Def (ld, _) -> 222 | let start_tag = match Cmarkit.Link_definition.dest ld with 223 | | Some (src, _) when is_video src -> Some (" Some (" None 226 | in 227 | begin match start_tag with 228 | | None -> false (* let the default HTML renderer handle that *) 229 | | Some (start_tag, src) -> 230 | (* More could be done with the reference title and link text *) 231 | Cmarkit_renderer.Context.string c start_tag; 232 | Cmarkit_renderer.Context.string c {| src="|}; 233 | Cmarkit_html.pct_encoded_string c src; 234 | Cmarkit_renderer.Context.string c {|" />|}; 235 | true 236 | end 237 | | None | Some _ -> false (* let the default HTML renderer that *) 238 | 239 | let custom_html = 240 | let inline c = function 241 | | Cmarkit.Inline.Image (l, _) -> media_link c l 242 | | _ -> false (* let the default HTML renderer handle that *) 243 | in 244 | let block c = function 245 | | Doc d -> 246 | (* It's important to recurse via Cmarkit_renderer.Context.block *) 247 | Cmarkit_renderer.Context.block c (Cmarkit.Doc.block d); true 248 | | _ -> false (* let the default HTML renderer handle that *) 249 | in 250 | Cmarkit_renderer.make ~inline ~block () (* 2 *) 251 | 252 | let custom_html_of_doc ~safe doc = 253 | let default = Cmarkit_html.renderer ~safe () in 254 | let r = Cmarkit_renderer.compose default custom_html in (* 3 *) 255 | Cmarkit_renderer.doc_to_string r doc 256 | ]} 257 | 258 | The [custom_html_of_doc] function performs your extended 259 | renderings. *) 260 | -------------------------------------------------------------------------------- /support/unicode_data.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | let pp_uchar ppf u = Format.fprintf ppf "U+%04X" (Uchar.to_int u) 7 | 8 | let fold_uchars f acc = 9 | let rec loop f acc u = 10 | let acc = f acc u in 11 | if Uchar.equal u Uchar.max then acc else loop f acc (Uchar.succ u) 12 | in 13 | loop f acc Uchar.min 14 | 15 | let sat_list p = 16 | let add acc u = if p u then u :: acc else acc in 17 | List.rev (fold_uchars add []) 18 | 19 | (* See https://spec.commonmark.org/current/#unicode-whitespace-character *) 20 | let is_whitespace u = 21 | let is_zs = Uucp.Gc.general_category u = `Zs in 22 | let u = Uchar.to_int u in 23 | is_zs || u = 0x0009 || u = 0x000A || u = 0x000C || u = 0x000d 24 | 25 | (* See https://spec.commonmark.org/current/#ascii-punctuation-character *) 26 | let is_ascii_punctuation u = 27 | let u = Uchar.to_int u in 28 | (0x0021 <= u && u <= 0x002F) || 29 | (0x003A <= u && u <= 0x0040) || 30 | (0x005B <= u && u <= 0x0060) || 31 | (0x007B <= u && u <= 0x007E) 32 | 33 | let is_punctuation u = match Uucp.Gc.general_category u with 34 | | `Pc | `Pd | `Pe | `Pf | `Pi | `Po | `Ps -> true 35 | | _ -> is_ascii_punctuation u 36 | 37 | let whitespace_list = sat_list is_whitespace 38 | let punctuation_list = sat_list is_punctuation 39 | 40 | let case_fold_map = 41 | let uchar_map acc u = match Uucp.Case.Fold.fold u with 42 | | `Self -> acc 43 | | `Uchars f -> 44 | let esc u = Printf.sprintf "\\u{%04X}" (Uchar.to_int u) in 45 | (u, String.concat "" (List.map esc f)) :: acc 46 | in 47 | List.rev (fold_uchars uchar_map []) 48 | 49 | let byte_size v = 50 | let words = Obj.reachable_words (Obj.repr v) in 51 | (words / (Sys.word_size / 8)) 52 | 53 | let case_fold_count = 54 | let add acc u = match Uucp.Case.Fold.fold u with 55 | | `Self -> acc | `Uchars _ -> acc + 1 56 | in 57 | fold_uchars add 0 58 | 59 | let test () = 60 | Printf.printf "whitespace: %d characters\n" (List.length whitespace_list); 61 | Printf.printf "punctuation: %d characters\n" (List.length punctuation_list); 62 | Printf.printf "non-id case fold: %d characters\n" case_fold_count; 63 | () 64 | 65 | let year = (Unix.gmtime (Unix.gettimeofday ())).Unix.tm_year + 1900 66 | 67 | let gen () = 68 | let pp_cp ppf u = Format.fprintf ppf "0x%04X" (Uchar.to_int u) in 69 | let pp_binding ppf (u, s) = Format.fprintf ppf "%a, \"%s\"" pp_cp u s in 70 | let pp_sep ppf () = Format.fprintf ppf ";@ " in 71 | let pp_cps ppf us = Format.pp_print_list ~pp_sep pp_cp ppf us in 72 | let pp_map ppf m = Format.pp_print_list ~pp_sep pp_binding ppf m in 73 | Format.printf 74 | {|(*--------------------------------------------------------------------------- 75 | Copyright (c) %d The cmarkit programmers. All rights reserved. 76 | SPDX-License-Identifier: ISC 77 | ---------------------------------------------------------------------------*) 78 | 79 | (* Do not edit. Data generated by support/unicode_data.ml *) 80 | 81 | let unicode_version = "%s" 82 | 83 | let whitespace =@? 84 | @[<1>[|%a|]@] 85 | 86 | let punctuation =@? 87 | @[<1>[|%a|]@] 88 | 89 | let case_fold =@? 90 | @[<1>[|%a|]@] 91 | %!|} year Uucp.unicode_version pp_cps whitespace_list pp_cps punctuation_list 92 | pp_map case_fold_map 93 | 94 | let main () = match Array.to_list Sys.argv with 95 | | _ :: "-t" :: [] -> test () 96 | | _ :: [] -> gen () 97 | | _ -> Printf.printf "Usage: %s [-t]\n%!" Sys.argv.(0) 98 | 99 | let () = if !Sys.interactive then () else main () 100 | -------------------------------------------------------------------------------- /test/bench.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Benchmarker for Cmarkit. Just renders to HTML the way `cmark` does. *) 7 | 8 | let ( let* ) = Result.bind 9 | 10 | let read_file file = 11 | try 12 | let ic = if file = "-" then stdin else open_in_bin file in 13 | let finally () = if file = "-" then () else close_in_noerr ic in 14 | Fun.protect ~finally @@ fun () -> Ok (In_channel.input_all ic) 15 | with 16 | | Sys_error err -> Error err 17 | 18 | let to_html file exts locs layout unsafe = 19 | let strict = not exts and safe = not unsafe in 20 | let* content = read_file file in 21 | let doc = Cmarkit.Doc.of_string ~layout ~locs ~file ~strict content in 22 | let r = Cmarkit_html.xhtml_renderer ~safe () in 23 | let html = Cmarkit_renderer.doc_to_string r doc in 24 | Ok (print_string html) 25 | 26 | let main () = 27 | let strf = Printf.sprintf in 28 | let usage = "Usage: bench [OPTION]… [FILE.md]" in 29 | let layout = ref false in 30 | let locs = ref false in 31 | let unsafe = ref false in 32 | let exts = ref false in 33 | let file = ref None in 34 | let args = 35 | [ "--layout", Arg.Set layout, "Keep layout information."; 36 | "--locs", Arg.Set locs, "Keep locations."; 37 | "--exts", Arg.Set exts, "Activate supported extensions"; 38 | "--unsafe", Arg.Set unsafe, "Keep HTML blocks and raw HTML"; ] 39 | in 40 | let pos s = match !file with 41 | | Some _ -> raise (Arg.Bad (strf "Don't know what to do with %S" s)) 42 | | None -> file := Some s 43 | in 44 | Arg.parse args pos usage; 45 | let file = Option.value ~default:"-" !file in 46 | match to_html file !exts !locs !layout !unsafe with 47 | | Error e -> Printf.eprintf "bench: %s\n%!" e; 1 48 | | Ok () -> 0 49 | 50 | let () = if !Sys.interactive then () else exit (main ()) 51 | -------------------------------------------------------------------------------- /test/examples.ml: -------------------------------------------------------------------------------- 1 | (* This code is in the public domain *) 2 | 3 | (* index.mld *) 4 | 5 | let cmark_to_html : strict:bool -> safe:bool -> string -> string = 6 | fun ~strict ~safe md -> 7 | let doc = Cmarkit.Doc.of_string ~strict md in 8 | Cmarkit_html.of_doc ~safe doc 9 | 10 | let cmark_to_latex : strict:bool -> string -> string = 11 | fun ~strict md -> 12 | let doc = Cmarkit.Doc.of_string ~strict md in 13 | Cmarkit_latex.of_doc doc 14 | 15 | let cmark_to_commonmark : strict:bool -> string -> string = 16 | fun ~strict md -> 17 | let doc = Cmarkit.Doc.of_string ~layout:true ~strict md in 18 | Cmarkit_commonmark.of_doc doc 19 | 20 | (* Cmarkit_renderer *) 21 | 22 | type Cmarkit.Block.t += Doc of Cmarkit.Doc.t (* 1 *) 23 | 24 | let media_link c l = 25 | let has_ext s ext = String.ends_with ~suffix:ext s in 26 | let is_video s = List.exists (has_ext s) [".mp4"; ".webm"] in 27 | let is_audio s = List.exists (has_ext s) [".mp3"; ".flac"] in 28 | let defs = Cmarkit_renderer.Context.get_defs c in 29 | match Cmarkit.Inline.Link.reference_definition defs l with 30 | | Some Cmarkit.Link_definition.Def (ld, _) -> 31 | let start_tag = match Cmarkit.Link_definition.dest ld with 32 | | Some (src, _) when is_video src -> Some (" Some (" None 35 | in 36 | begin match start_tag with 37 | | None -> false (* let the default HTML renderer handle that *) 38 | | Some (start_tag, src) -> 39 | (* More could be done with the reference title and link text *) 40 | Cmarkit_renderer.Context.string c start_tag; 41 | Cmarkit_renderer.Context.string c {| src="|}; 42 | Cmarkit_html.pct_encoded_string c src; 43 | Cmarkit_renderer.Context.string c {|" />|}; 44 | true 45 | end 46 | | None | Some _ -> false (* let the default HTML renderer that *) 47 | 48 | let custom_html = 49 | let inline c = function 50 | | Cmarkit.Inline.Image (l, _) -> media_link c l 51 | | _ -> false (* let the default HTML renderer handle that *) 52 | in 53 | let block c = function 54 | | Doc d -> 55 | (* It's important to recurse via Cmarkit_renderer.Context.block *) 56 | Cmarkit_renderer.Context.block c (Cmarkit.Doc.block d); true 57 | | _ -> false (* let the default HTML renderer handle that *) 58 | in 59 | Cmarkit_renderer.make ~inline ~block () (* 2 *) 60 | 61 | let custom_html_of_doc ~safe doc = 62 | let default = Cmarkit_html.renderer ~safe () in 63 | let r = Cmarkit_renderer.compose default custom_html in (* 3 *) 64 | Cmarkit_renderer.doc_to_string r doc 65 | 66 | (* Cmarkit.Link_reference *) 67 | 68 | let wikilink = Cmarkit.Meta.key () (* A meta key to recognize them *) 69 | 70 | let make_wikilink label = (* Just a placeholder label definition *) 71 | let meta = Cmarkit.Meta.tag wikilink (Cmarkit.Label.meta label) in 72 | Cmarkit.Label.with_meta meta label 73 | 74 | let with_wikilinks = function 75 | | `Def _ as ctx -> Cmarkit.Label.default_resolver ctx 76 | | `Ref (_, _, (Some _ as def)) -> def (* As per doc definition *) 77 | | `Ref (_, ref, None) -> Some (make_wikilink ref) 78 | 79 | (* Cmarkit.Mapper *) 80 | 81 | let set_unknown_code_block_lang ~lang doc = 82 | let open Cmarkit in 83 | let default = lang, Meta.none in 84 | let block m = function 85 | | Block.Code_block (cb, meta) 86 | when Option.is_none (Block.Code_block.info_string cb) -> 87 | let layout = Block.Code_block.layout cb in 88 | let code = Block.Code_block.code cb in 89 | let cb = Block.Code_block.make ~layout ~info_string:default code in 90 | Mapper.ret (Block.Code_block (cb, meta)) 91 | | _ -> 92 | Mapper.default (* let the mapper thread the map *) 93 | in 94 | let mapper = Mapper.make ~block () in 95 | Mapper.map_doc mapper doc 96 | 97 | (* Cmarkit.Folder *) 98 | 99 | let code_block_langs doc = 100 | let open Cmarkit in 101 | let module String_set = Set.Make (String) in 102 | let block m acc = function 103 | | Block.Code_block (cb, _) -> 104 | let acc = match Block.Code_block.info_string cb with 105 | | None -> acc 106 | | Some (info, _) -> 107 | match Block.Code_block.language_of_info_string info with 108 | | None -> acc 109 | | Some (lang, _) -> String_set.add lang acc 110 | in 111 | Folder.ret acc 112 | | _ -> 113 | Folder.default (* let the folder thread the fold *) 114 | in 115 | let folder = Folder.make ~block () in 116 | let langs = Folder.fold_doc folder String_set.empty doc in 117 | String_set.elements langs 118 | 119 | (* Cmarkit_html *) 120 | 121 | let html_doc_of_md ?(lang = "en") ~title ~safe md = 122 | let doc = Cmarkit.Doc.of_string md in 123 | let r = Cmarkit_html.renderer ~safe () in 124 | let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in 125 | let buffer_add_title = Cmarkit_html.buffer_add_html_escaped_string in 126 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 127 | {| 128 | 129 | 130 | 131 | %a 132 | 133 | 134 | %a 135 | |} 136 | lang buffer_add_title title buffer_add_doc doc 137 | 138 | (* Cmarkit_latex *) 139 | 140 | let latex_doc_of_md ?(title = "") md = 141 | let doc = Cmarkit.Doc.of_string md in 142 | let r = Cmarkit_latex.renderer () in 143 | let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in 144 | let buffer_add_title = Cmarkit_latex.buffer_add_latex_escaped_string in 145 | let maketitle = if title = "" then "" else {|\maketitle|} in 146 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 147 | {|\documentclass{article} 148 | 149 | \usepackage{graphicx} 150 | \usepackage{enumitem} 151 | \usepackage{listings} 152 | \usepackage{hyperref} 153 | \usepackage[normalem]{ulem} 154 | \usepackage[scaled=0.8]{beramono} 155 | \usepackage{fontspec} 156 | 157 | \lstset{ 158 | columns=[c]fixed, 159 | basicstyle=\small\ttfamily, 160 | keywordstyle=\bfseries, 161 | upquote=true, 162 | commentstyle=\slshape, 163 | breaklines=true, 164 | showstringspaces=false} 165 | 166 | \lstdefinelanguage{ocaml}{language=[objective]caml, 167 | literate={'"'}{\textquotesingle "\textquotesingle}3 168 | {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, 169 | } 170 | 171 | \title{%a} 172 | \begin{document} 173 | %s 174 | %a 175 | \end{document}|} buffer_add_title title maketitle buffer_add_doc doc 176 | -------------------------------------------------------------------------------- /test/expect/basic.exts.latex: -------------------------------------------------------------------------------- 1 | \section{Extensions} 2 | 3 | \subsection{Footnotes} 4 | 5 | This is a footnote in history\footnote{\label{fn-1} 6 | And it can have 7 | lazy continuation lines and multiple paragraphs 8 | 9 | If you indent one column after the footnote label start. 10 | 11 | \begin{verbatim} 12 | cb 13 | \end{verbatim} 14 | 15 | \begin{itemize} 16 | \item{} 17 | list item 18 | ablc 19 | \item{} 20 | another item 21 | \end{itemize} 22 | } with mutiple references\textsuperscript{\ref{fn-1}} 23 | and even \textsuperscript{\ref{fn-1}} 24 | 25 | This is no longer the footnote. 26 | 27 | Can we make footnotes in footnotes\footnote{\label{fn-2} 28 | This gets tricky but I guess we could have a footnote\footnote{\label{fn-tricky} 29 | The foot of the footnote. But that's not going to link back\textsuperscript{\ref{fn-2}} 30 | } in 31 | a footnote. Also footnote\textsuperscript{\ref{fn-1}} in footnotes\textsuperscript{\ref{fn-2}} is\footnote{\label{fn-3}} tricky for getting 32 | all back references rendered correctly. 33 | 34 | Second footnote 35 | } ? 36 | 37 | Not the footnote 38 | 39 | Not the footnote but a reference to an empty footnote\textsuperscript{\ref{fn-3}} 40 | 41 | Not a footnote [\textasciicircum{}] 42 | 43 | [\textasciicircum{}]: not a footnote. 44 | 45 | \subsection{Strikethrough} 46 | 47 | The stroken \sout{\emph{emph}}. 48 | 49 | Nesting the nest \sout{\emph{emph} \sout{stroke} \emph{emph \textbf{emph \sout{strikeagain}}}} 50 | 51 | There must be no blanks after the opener and before the closer. This 52 | is \textasciitilde{}\textasciitilde{} not an opener and \textasciitilde{}\textasciitilde{}this won't open \sout{that does}. 53 | 54 | \begin{itemize} 55 | \item{} 56 | Here we have \sout{stroken \texttt{code}}. 57 | \item{} 58 | Here we have \sout{nested \sout{stroken} ok} 59 | \end{itemize} 60 | 61 | \subsection{Math} 62 | 63 | The inline \(\sqrt{x^2-1}\) equation. 64 | 65 | There must be no blanks after the opener and before the closer. This 66 | makes so you can donate \$5 or \$10 dollars here and there without problem. 67 | 68 | There is no such think as nesting \(\sqrt{x^2-1}\)+3\(+3\). As usual 69 | delimiters can be \$escaped\$ \(\sqrt{16\$}\) 70 | 71 | Amazing, this is \href{https://example.org}{hyperlinked math \(3x^2\)} 72 | 73 | The HTML renderer should be careful with \(a < b\) escapes. 74 | 75 | Display math can be in \texttt{math} code blocks. 76 | \[ 77 | \left( \sum_{k=1}^n a_k b_k \right)^2 < \Phi 78 | \] 79 | 80 | But it can also be in \[ \left( \sum_{k=1}^n 81 | a_k b_k \right)^2 < \Phi \] 82 | 83 | \subsection{List task items} 84 | 85 | \begin{itemize} 86 | \item{} \lbrack\phantom{x}\rbrack \enspace 87 | Task open 88 | \item{} \lbrack x\rbrack \enspace 89 | Task done 90 | \item{} \lbrack X\rbrack \enspace 91 | Task done 92 | \item{} \lbrack ✓\rbrack \enspace 93 | Task done (U+2713, CHECK MARK) 94 | \item{} \lbrack ✔\rbrack \enspace 95 | Task done (U+2714, HEAVY CHECK MARK) 96 | Indent 97 | \item{} 98 | Of course this can all be nested 99 | 100 | \begin{itemize} 101 | \item{} \lbrack 𐄂\rbrack \enspace 102 | Task done (U+10102, AEGEAN CHECK MARK) 103 | It will be done for sure. 104 | 105 | \begin{verbatim} 106 | code block 107 | \end{verbatim} 108 | 109 | Not a code block 110 | \item{} \lbrack x\rbrack \enspace 111 | Task done 112 | \item{} \lbrack ~\rbrack \enspace 113 | Task cancelled 114 | Paragraphy 115 | \item{} \lbrack ~\rbrack \enspace 116 | Task canceled 117 | 118 | \begin{verbatim} 119 | we have a code block here too. 120 | \end{verbatim} 121 | \item{} 122 | [x]Not a task 123 | \item{} 124 | [x] Not a task 125 | \end{itemize} 126 | \item{} \lbrack\phantom{x}\rbrack \enspace\item{} \lbrack\phantom{x}\rbrack \enspace 127 | a 128 | 129 | \begin{verbatim} 130 | Code 131 | \end{verbatim} 132 | 133 | Not code 134 | \item{} \lbrack\phantom{x}\rbrack \enspace 135 | \begin{verbatim} 136 | Code 137 | \end{verbatim} 138 | 139 | Not code 140 | \end{itemize} 141 | 142 | \subsection{Tables} 143 | 144 | A sample table: 145 | 146 | \bigskip 147 | \begin{tabular}{llll} 148 | \multicolumn{1}{c}{\bfseries{}Id} 149 | & 150 | \multicolumn{1}{r}{\bfseries{}Name} 151 | & 152 | \multicolumn{1}{l}{\bfseries{}Description} 153 | & 154 | \multicolumn{1}{r}{\bfseries{}Link} 155 | \\ 156 | \hline 157 | \multicolumn{1}{c}{1} 158 | & 159 | \multicolumn{1}{r}{OCaml} 160 | & 161 | \multicolumn{1}{l}{The OCaml website} 162 | & 163 | \multicolumn{1}{r}{\url{https://ocaml.org}} 164 | \\ 165 | \multicolumn{1}{c}{2} 166 | & 167 | \multicolumn{1}{r}{Haskell} 168 | & 169 | \multicolumn{1}{l}{The Haskell website} 170 | & 171 | \multicolumn{1}{r}{\url{https://haskell.org}} 172 | \\ 173 | \multicolumn{1}{c}{3} 174 | & 175 | \multicolumn{1}{r}{MDN Web docs} 176 | & 177 | \multicolumn{1}{l}{Web dev docs} 178 | & 179 | \multicolumn{1}{r}{\url{https://developer.mozilla.org/}} 180 | \\ 181 | \multicolumn{1}{c}{4} 182 | & 183 | \multicolumn{1}{r}{Wikipedia} 184 | & 185 | \multicolumn{1}{l}{The Free Encyclopedia} 186 | & 187 | \multicolumn{1}{r}{\url{https://wikipedia.org}} 188 | \\ 189 | \hline 190 | \end{tabular} 191 | \bigskip 192 | 193 | Testing these non separator pipes. 194 | 195 | \bigskip 196 | \begin{tabular}{lll} 197 | {\bfseries{}Fancy} 198 | & 199 | {\bfseries{}maybe} 200 | & 201 | {\bfseries{}hu|glu} 202 | \\ 203 | \hline 204 | {\emph{a | b}} 205 | & 206 | {\texttt{code |}} 207 | & 208 | {\href{https://example.org}{bl|a}} 209 | \\ 210 | {not | two cols} 211 | & 212 | {\(\sqrt(x^2 - 1)\)} 213 | & 214 | \\ 215 | \hline 216 | \end{tabular} 217 | \bigskip 218 | 219 | A table with changing labels and alignement: 220 | 221 | \bigskip 222 | \begin{tabular}{ll} 223 | {\bfseries{}h1} 224 | & 225 | \multicolumn{1}{c}{\bfseries{}h2} 226 | \\ 227 | \hline 228 | {1} 229 | & 230 | \multicolumn{1}{c}{2} 231 | \\ 232 | \multicolumn{1}{l}{\bfseries{}h3} 233 | & 234 | \multicolumn{1}{r}{\bfseries{}h4} 235 | \\ 236 | \hline 237 | \multicolumn{1}{l}{3} 238 | & 239 | \multicolumn{1}{r}{4} 240 | \\ 241 | \hline 242 | \end{tabular} 243 | \bigskip 244 | 245 | A simple header less table with left and right aligned columns 246 | 247 | \bigskip 248 | \begin{tabular}{ll} 249 | \hline 250 | \multicolumn{1}{l}{1} 251 | & 252 | \multicolumn{1}{r}{2} 253 | \\ 254 | \hline 255 | \end{tabular} 256 | \bigskip 257 | 258 | The simplest table: 259 | 260 | \bigskip 261 | \begin{tabular}{ll} 262 | \hline 263 | {1} 264 | & 265 | {2} 266 | \\ 267 | \hline 268 | \end{tabular} 269 | \bigskip 270 | 271 | A header only table: 272 | 273 | \bigskip 274 | \begin{tabular}{ll} 275 | \multicolumn{1}{c}{\bfseries{}h1} 276 | & 277 | \multicolumn{1}{c}{\bfseries{}h2} 278 | \\ 279 | \hline 280 | \hline 281 | \end{tabular} 282 | \bigskip 283 | 284 | Maximal number of columns all rows defines number of colums: 285 | 286 | \bigskip 287 | \begin{tabular}{llll} 288 | \multicolumn{1}{l}{\bfseries{}h1} 289 | & 290 | \multicolumn{1}{c}{\bfseries{}h2} 291 | & 292 | \multicolumn{1}{r}{\bfseries{}h3} 293 | & 294 | \\ 295 | \hline 296 | \multicolumn{1}{l}{left} 297 | & 298 | \multicolumn{1}{c}{center} 299 | & 300 | \multicolumn{1}{r}{right} 301 | & 302 | \\ 303 | \multicolumn{1}{l}{ha!} 304 | & 305 | \multicolumn{1}{c}{four} 306 | & 307 | \multicolumn{1}{r}{columns} 308 | & 309 | {in fact} 310 | \\ 311 | \multicolumn{1}{l}{} 312 | & 313 | \multicolumn{1}{c}{} 314 | & 315 | \multicolumn{1}{r}{} 316 | & 317 | {} 318 | \\ 319 | \multicolumn{1}{l}{} 320 | & 321 | \multicolumn{1}{c}{} 322 | & 323 | \multicolumn{1}{r}{} 324 | & 325 | {a} 326 | \\ 327 | \hline 328 | \end{tabular} 329 | \bigskip 330 | 331 | Header less table: 332 | 333 | \bigskip 334 | \begin{tabular}{ll} 335 | \hline 336 | {header} 337 | & 338 | {less} 339 | \\ 340 | {this} 341 | & 342 | {is} 343 | \\ 344 | \hline 345 | \end{tabular} 346 | \bigskip 347 | 348 | Another quoted header less table with aligement 349 | 350 | \begin{quote} 351 | \bigskip 352 | \begin{tabular}{ll} 353 | \hline 354 | \multicolumn{1}{r}{header} 355 | & 356 | \multicolumn{1}{r}{less} 357 | \\ 358 | \multicolumn{1}{r}{again} 359 | & 360 | \multicolumn{1}{r}{aligned} 361 | \\ 362 | \hline 363 | \end{tabular} 364 | \bigskip 365 | \end{quote} 366 | 367 | This is an empty table with three columns: 368 | 369 | \bigskip 370 | \begin{tabular}{lll} 371 | \hline 372 | {} 373 | & 374 | {} 375 | & 376 | {} 377 | \\ 378 | \hline 379 | \end{tabular} 380 | \bigskip 381 | -------------------------------------------------------------------------------- /test/expect/basic.exts.md: -------------------------------------------------------------------------------- 1 | # Extensions 2 | 3 | ## Footnotes 4 | 5 | This is a footnote in history[^1] with mutiple references[^1] 6 | and even [text references][^1] 7 | 8 | [^1]: And it can have 9 | lazy continuation lines and multiple paragraphs 10 | 11 | If you indent one column after the footnote label start. 12 | 13 | cb 14 | * list item 15 | ablc 16 | * another item 17 | 18 | 19 | This is no longer the footnote. 20 | 21 | Can we make footnotes in footnotes[^2] ? 22 | 23 | [^2]: This gets tricky but I guess we could have a footnote[^tricky] in 24 | a footnote. Also footnote[^1] in footnotes[^2] is[^3] tricky for getting 25 | all back references rendered correctly. 26 | 27 | [^tricky]: The foot of the footnote. But that's not going to link back[^2] 28 | 29 | Second footnote 30 | 31 | Not the footnote 32 | 33 | [^3]: 34 | 35 | Not the footnote but a reference to an empty footnote[^3] 36 | 37 | Not a footnote [^] 38 | 39 | [^]: not a footnote. 40 | 41 | 42 | ## Strikethrough 43 | 44 | The stroken ~~*emph*~~. 45 | 46 | Nesting the nest ~~*emph* ~~stroke~~ *emph **emph ~~strikeagain~~***~~ 47 | 48 | There must be no blanks after the opener and before the closer. This 49 | is ~~ not an opener and ~~this won't open ~~that does~~. 50 | 51 | * Here we have ~~stroken `code`~~. 52 | * Here we have ~~nested ~~stroken~~ ok~~ 53 | 54 | ## Math 55 | 56 | The inline $\sqrt{x^2-1}$ equation. 57 | 58 | There must be no blanks after the opener and before the closer. This 59 | makes so you can donate $5 or $10 dollars here and there without problem. 60 | 61 | There is no such think as nesting $\sqrt{x^2-1}$+3$+3$. As usual 62 | delimiters can be \$escaped\$ $\sqrt{16\$}$ 63 | 64 | Amazing, this is [hyperlinked math $3x^2$](https://example.org) 65 | 66 | The HTML renderer should be careful with $a < b$ escapes. 67 | 68 | Display math can be in `math` code blocks. 69 | 70 | ```math 71 | \left( \sum_{k=1}^n a_k b_k \right)^2 < \Phi 72 | ``` 73 | 74 | But it can also be in $$ \left( \sum_{k=1}^n 75 | a_k b_k \right)^2 < \Phi $$ 76 | 77 | 78 | ## List task items 79 | 80 | * [ ] Task open 81 | * [x] Task done 82 | * [X] Task done 83 | * [✓] Task done (U+2713, CHECK MARK) 84 | * [✔] Task done (U+2714, HEAVY CHECK MARK) 85 | Indent 86 | * Of course this can all be nested 87 | * [𐄂] Task done (U+10102, AEGEAN CHECK MARK) 88 | It will be done for sure. 89 | 90 | code block 91 | Not a code block 92 | * [x] Task done 93 | * [~] Task cancelled 94 | Paragraphy 95 | * [~] Task canceled 96 | 97 | we have a code block here too. 98 | * [x]Not a task 99 | * \[x] Not a task 100 | 101 | * [ ] 102 | * [ ] a 103 | 104 | Code 105 | Not code 106 | 107 | * [ ] 108 | Code 109 | Not code 110 | 111 | 112 | ## Tables 113 | 114 | A sample table: 115 | 116 | | Id | Name | Description | Link | 117 | |:--:|------:|:-----------------------|--------------------:| 118 | | 1 | OCaml | The OCaml website | | 119 | | 2 | Haskell | The Haskell website | | 120 | | 3 | MDN Web docs | Web dev docs | | 121 | | 4 | Wikipedia | The Free Encyclopedia | | 122 | 123 | Testing these non separator pipes. 124 | 125 | | Fancy | maybe | hu\|glu | 126 | |-------|-------|-------| 127 | | *a | b* | `code |` | [bl|a] | 128 | | not \| two cols | $\sqrt(x^2 - 1)$ | 129 | 130 | [bl|a]: https://example.org 131 | 132 | 133 | A table with changing labels and alignement: 134 | 135 | | h1 | h2 | 136 | |-----|:---:| 137 | | 1 | 2 | 138 | | h3 | h4 | 139 | |:----|----:| 140 | | 3 | 4 | 141 | 142 | A simple header less table with left and right aligned columns 143 | 144 | |:--|--:| 145 | | 1 | 2 | 146 | 147 | The simplest table: 148 | 149 | | 1 | 2 | 150 | 151 | A header only table: 152 | 153 | | h1 | h2 | 154 | |:--:|:--:| 155 | 156 | Maximal number of columns all rows defines number of colums: 157 | 158 | | h1 | h2 | h3 | 159 | |:---|:--:|---:| 160 | |left | center | right | 161 | | ha! | four | columns | in fact | 162 | ||||| 163 | ||||a| 164 | 165 | Header less table: 166 | 167 | |header|less| 168 | |this | is | 169 | 170 | Another quoted header less table with aligement 171 | 172 | > |----:|----:| 173 | > | header | less | 174 | > | again | aligned | 175 | 176 | This is an empty table with three columns: 177 | 178 | | ||| 179 | 180 | -------------------------------------------------------------------------------- /test/expect/basic.exts.trip.md: -------------------------------------------------------------------------------- 1 | # Extensions 2 | 3 | ## Footnotes 4 | 5 | This is a footnote in history[^1] with mutiple references[^1] 6 | and even [text references][^1] 7 | 8 | [^1]: And it can have 9 | lazy continuation lines and multiple paragraphs 10 | 11 | If you indent one column after the footnote label start. 12 | 13 | cb 14 | * list item 15 | ablc 16 | * another item 17 | 18 | 19 | This is no longer the footnote. 20 | 21 | Can we make footnotes in footnotes[^2] ? 22 | 23 | [^2]: This gets tricky but I guess we could have a footnote[^tricky] in 24 | a footnote. Also footnote[^1] in footnotes[^2] is[^3] tricky for getting 25 | all back references rendered correctly. 26 | 27 | [^tricky]: The foot of the footnote. But that's not going to link back[^2] 28 | 29 | Second footnote 30 | 31 | Not the footnote 32 | 33 | [^3]: 34 | 35 | Not the footnote but a reference to an empty footnote[^3] 36 | 37 | Not a footnote \[^\] 38 | 39 | \[^\]: not a footnote. 40 | 41 | 42 | ## Strikethrough 43 | 44 | The stroken ~~*emph*~~. 45 | 46 | Nesting the nest ~~*emph* ~~stroke~~ *emph **emph ~~strikeagain~~***~~ 47 | 48 | There must be no blanks after the opener and before the closer. This 49 | is \~~ not an opener and \~~this won't open ~~that does~~. 50 | 51 | * Here we have ~~stroken `code`~~. 52 | * Here we have ~~nested ~~stroken~~ ok~~ 53 | 54 | ## Math 55 | 56 | The inline $\sqrt{x^2-1}$ equation. 57 | 58 | There must be no blanks after the opener and before the closer. This 59 | makes so you can donate \$5 or \$10 dollars here and there without problem. 60 | 61 | There is no such think as nesting $\sqrt{x^2-1}$\+3$+3$. As usual 62 | delimiters can be \$escaped\$ $\sqrt{16\$}$ 63 | 64 | Amazing, this is [hyperlinked math $3x^2$](https://example.org) 65 | 66 | The HTML renderer should be careful with $a < b$ escapes. 67 | 68 | Display math can be in `math` code blocks. 69 | 70 | ```math 71 | \left( \sum_{k=1}^n a_k b_k \right)^2 < \Phi 72 | ``` 73 | 74 | But it can also be in $$ \left( \sum_{k=1}^n 75 | a_k b_k \right)^2 < \Phi $$ 76 | 77 | 78 | ## List task items 79 | 80 | * [ ] Task open 81 | * [x] Task done 82 | * [X] Task done 83 | * [✓] Task done (U+2713, CHECK MARK) 84 | * [✔] Task done (U+2714, HEAVY CHECK MARK) 85 | Indent 86 | * Of course this can all be nested 87 | * [𐄂] Task done (U+10102, AEGEAN CHECK MARK) 88 | It will be done for sure. 89 | 90 | code block 91 | Not a code block 92 | * [x] Task done 93 | * [~] Task cancelled 94 | Paragraphy 95 | * [~] Task canceled 96 | 97 | we have a code block here too. 98 | * \[x\]Not a task 99 | * \[x\] Not a task 100 | 101 | * [ ] 102 | * [ ] a 103 | 104 | Code 105 | Not code 106 | 107 | * [ ] 108 | Code 109 | Not code 110 | 111 | 112 | ## Tables 113 | 114 | A sample table: 115 | 116 | | Id | Name | Description | Link | 117 | |:--:|------:|:-----------------------|--------------------:| 118 | | 1 | OCaml | The OCaml website | | 119 | | 2 | Haskell | The Haskell website | | 120 | | 3 | MDN Web docs | Web dev docs | | 121 | | 4 | Wikipedia | The Free Encyclopedia | | 122 | 123 | Testing these non separator pipes. 124 | 125 | | Fancy | maybe | hu\|glu | 126 | |-------|-------|-------| 127 | | *a \| b* | `code |` | [bl|a] | 128 | | not \| two cols | $\sqrt(x^2 - 1)$ | 129 | 130 | [bl|a]: https://example.org 131 | 132 | 133 | A table with changing labels and alignement: 134 | 135 | | h1 | h2 | 136 | |-----|:---:| 137 | | 1 | 2 | 138 | | h3 | h4 | 139 | |:----|----:| 140 | | 3 | 4 | 141 | 142 | A simple header less table with left and right aligned columns 143 | 144 | |:--|--:| 145 | | 1 | 2 | 146 | 147 | The simplest table: 148 | 149 | | 1 | 2 | 150 | 151 | A header only table: 152 | 153 | | h1 | h2 | 154 | |:--:|:--:| 155 | 156 | Maximal number of columns all rows defines number of colums: 157 | 158 | | h1 | h2 | h3 | 159 | |:---|:--:|---:| 160 | |left | center | right | 161 | | ha\! | four | columns | in fact | 162 | ||||| 163 | ||||a| 164 | 165 | Header less table: 166 | 167 | |header|less| 168 | |this | is | 169 | 170 | Another quoted header less table with aligement 171 | 172 | > |----:|----:| 173 | > | header | less | 174 | > | again | aligned | 175 | 176 | This is an empty table with three columns: 177 | 178 | | ||| 179 | 180 | -------------------------------------------------------------------------------- /test/expect/basic.latex: -------------------------------------------------------------------------------- 1 | \section{Basic tests} 2 | 3 | Basic tests for all CommonMark constructs. 4 | 5 | \subsection{Testing autolinks} 6 | 7 | This is an \url{http://example.org} and another one \url{mailto:you@example.org}. 8 | 9 | \subsection{Testing breaks} 10 | 11 | A line ending (not in a code span or HTML tag) that is preceded by two 12 | or more spaces and does not occur at the end of a block is parsed as a 13 | hard line break. 14 | 15 | So this means we had softbreaks so far and now we get \\ 16 | a hard break\\ 17 | and another one. 18 | 19 | \begin{quote} 20 | So this means we had softbreaks so far and now we get \\ 21 | a hard break\\ 22 | and another one. 23 | This is very soooft. 24 | \end{quote} 25 | 26 | \subsection{Testing code spans} 27 | 28 | This is a multi-line code\texttt{ code span `` it has backticks in there} 29 | 30 | Sometimes code spans \texttt{`can have really ``` strange layout}. Do you fancy \texttt{`A\_polymorphic\_variant} ? 31 | 32 | \subsection{Testing emphasis} 33 | 34 | There is \emph{more} than \emph{one syntax} for \textbf{emphasis} and \textbf{strong 35 | emphasis}. We should be careful about \textbf{embedded * marker}. This 36 | will be \textbf{tricky * to handle}. This \emph{is not ** what} you want ? 37 | 38 | \subsection{Testing links, images and link reference definitions} 39 | 40 | This is an \protect\includegraphics{/heyho} 41 | 42 | That is totally \href{/hohoho}{colla psed} and 43 | that is \href{/veryshort}{\texttt{short cuted}} 44 | 45 | Shortcuts can be better than \href{/veryshort}{full references} but not 46 | always and we'd like to trip their \href{/veryshort}{label}. 47 | 48 | \begin{quote}\end{quote} 49 | 50 | \subsection{Testing raw HTML} 51 | 52 | Haha % Raw CommonMark HTML omitted 53 | a% Raw CommonMark HTML omitted 54 | % Raw CommonMark HTML omitted 55 | hihi this is not the end yet. 56 | 57 | foo % Raw CommonMark HTML omitted 58 | u% Raw CommonMark HTML omitted 59 | 60 | 61 | \begin{quote} 62 | Haha % Raw CommonMark HTML omitted 63 | a% Raw CommonMark HTML omitted 64 | % Raw CommonMark HTML omitted 65 | hihi this is not the end yet. 66 | \end{quote} 67 | 68 | \subsection{Testing blank lines} 69 | 70 | Impressive isn't it ? 71 | 72 | \subsection{Testing block quotes} 73 | 74 | \begin{quote} 75 | \begin{quote} 76 | How is 77 | Nestyfing going on 78 | These irregularities \textbf{will} normalize 79 | We keep only the first block quote indent 80 | \end{quote} 81 | \end{quote} 82 | 83 | \begin{quote} 84 | \subsection{Further tests} 85 | \end{quote} 86 | 87 | We need a little quote here 88 | 89 | \begin{quote} 90 | It's warranted. 91 | \end{quote} 92 | 93 | \subsection{Testing code blocks} 94 | 95 | \begin{lstlisting}[language=layout] 96 | \end{lstlisting} 97 | 98 | \begin{lstlisting}[language=ocaml] 99 | 100 | type t = 101 | | A of int 102 | | B of string 103 | 104 | let square x = x *. x 105 | \end{lstlisting} 106 | 107 | The indented code block: 108 | 109 | \begin{verbatim} 110 | a b c d 111 | a b c d 112 | a b c d 113 | 114 | 115 | a 116 | a b c 117 | \end{verbatim} 118 | 119 | \begin{quote} 120 | \begin{lstlisting}[language=ocaml] 121 | 122 | type t = 123 | | A of int 124 | | B of string 125 | 126 | let square x = x *. x 127 | \end{lstlisting} 128 | \end{quote} 129 | 130 | \subsection{Testing headings} 131 | 132 | \section{aaa 133 | aaaa} 134 | 135 | \begin{quote} 136 | \subsection{bbb \texttt{hey} 137 | bbbb} 138 | \end{quote} 139 | 140 | \section{That's one way} 141 | 142 | \subsubsection{It's a long way to the heading} 143 | 144 | \subsection{Testing HTML block} 145 | 146 | % CommonMark HTML block omitted 147 | 148 | \begin{itemize} 149 | \item{} 150 | % CommonMark HTML block omitted 151 | \end{itemize} 152 | 153 | \subsection{Testing lists} 154 | 155 | The \texttt{square} function is the root. There are reasons for this: 156 | 157 | \begin{enumerate} 158 | \item{} 159 | There is no reason. There should be a reason or an \url{http://example.org} 160 | \item{} 161 | Maybe that's the reason. But it may not be the reason. 162 | \item{} 163 | Is reason the only tool ? 164 | \end{enumerate} 165 | 166 | \begin{quote} 167 | Quoted bullets 168 | 169 | \begin{itemize} 170 | \item{} 171 | Is this important ? 172 | \end{itemize} 173 | \end{quote} 174 | 175 | \begin{itemize} 176 | \item{} 177 | \begin{itemize} 178 | \item{} 179 | Well it's in the spec 180 | \end{itemize} 181 | \item{}\end{itemize} 182 | 183 | Empty list item above 184 | 185 | \subsection{Testing paragraphs} 186 | 187 | We really want your paragraph layout preserved. 188 | Really ? 189 | Really. 190 | Really. 191 | Really. 192 | 193 | \begin{quote} 194 | We really want your paragraph layout preserved. 195 | Really ? 196 | Really. 197 | Really. 198 | Really. 199 | \end{quote} 200 | 201 | \subsection{Testing thematic breaks} 202 | 203 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 204 | 205 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 206 | 207 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 208 | 209 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 210 | 211 | \begin{quote} 212 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 213 | \end{quote} 214 | -------------------------------------------------------------------------------- /test/expect/basic.md: -------------------------------------------------------------------------------- 1 | Basic tests 2 | =========== 3 | 4 | Basic tests for all CommonMark constructs. 5 | 6 | ## Testing autolinks 7 | 8 | This is an and another one . 9 | 10 | 11 | ## Testing breaks 12 | 13 | A line ending (not in a code span or HTML tag) that is preceded by two 14 | or more spaces and does not occur at the end of a block is parsed as a 15 | hard line break. 16 | 17 | So this means we had softbreaks so far and now we get \ 18 | a hard break 19 | and another one. 20 | 21 | > So this means we had softbreaks so far and now we get \ 22 | > a hard break 23 | > and another one. 24 | > This is very soooft. 25 | 26 | ## Testing code spans 27 | 28 | This is a multi-line code` 29 | code span `` it has backticks 30 | in there` 31 | 32 | Sometimes code spans `` `can have 33 | really ``` 34 | strange 35 | layout ``. Do you fancy `` `A_polymorphic_variant `` ? 36 | 37 | 38 | ## Testing emphasis 39 | 40 | There is _more_ than *one syntax* for __emphasis__ and **strong 41 | emphasis**. We should be careful about **embedded * marker**. This 42 | will be **tricky * to handle**. This *is not ** what* you want ? 43 | 44 | 45 | ## Testing links, images and link reference definitions 46 | 47 | This is an ![inline image]( 48 | /heyho (The 49 | multine title)) 50 | 51 | That is totally [colla psed][] and 52 | that is [`short cuted`] 53 | 54 | Shortcuts can be better than [full references][`short 55 | cuted`] but not 56 | always and we'd like to trip their [label][`short cuted`]. 57 | 58 | > [colla psed]: /hohoho "And again these 59 | > multi 60 | > line titles" 61 | 62 | [`short cuted`]: /veryshort "But very 63 | important" 64 | 65 | 66 | ## Testing raw HTML 67 | 68 | Haha a hihi this is not the end yet. 70 | 71 | foo u 72 | 73 | > Haha a data="foo" > hihi this is not the end yet. 75 | 76 | ## Testing blank lines 77 | 78 | 79 | 80 | Impressive isn't it ? 81 | 82 | ## Testing block quotes 83 | 84 | 85 | > > How is 86 | > > Nestyfing going on 87 | >> These irregularities **will** normalize 88 | > We keep only the first block quote indent 89 | 90 | > ## Further tests ####### 91 | 92 | We need a little quote here 93 | > It's warranted. 94 | 95 | 96 | ## Testing code blocks 97 | 98 | ``` layout after info is not kept 99 | ``` 100 | 101 | ``` ocaml module M 102 | 103 | type t = 104 | | A of int 105 | | B of string 106 | 107 | let square x = x *. x 108 | ```` 109 | 110 | The indented code block: 111 | 112 | a b c d 113 | a b c d 114 | a b c d 115 | 116 | 117 | a 118 | a b c 119 | 120 | 121 | > ``` ocaml module M 122 | > 123 | > type t = 124 | > | A of int 125 | > | B of string 126 | > 127 | > let square x = x *. x 128 | > ```` 129 | 130 | 131 | ## Testing headings 132 | 133 | aaa 134 | aaaa 135 | ======== 136 | 137 | > bbb `hey` 138 | > bbbb 139 | > -------- 140 | 141 | # That's one way 142 | 143 | ### It's a long way to the heading 144 | 145 | ## Testing HTML block 146 | 147 | 150 | 151 | * 154 | 155 | ## Testing lists 156 | 157 | The `square` function is the root. There are reasons for this: 158 | 159 | 1. There is no reason. There should be a reason or an 160 | 2. Maybe that's the reason. But it may not be the reason. 161 | 3. Is reason the only tool ? 162 | 163 | > Quoted bullets 164 | > * Is this important ? 165 | * * Well it's in the spec 166 | * 167 | Empty list item above 168 | 169 | ## Testing paragraphs 170 | 171 | We really want your paragraph layout preserved. 172 | Really ? 173 | Really. 174 | Really. 175 | Really. 176 | 177 | 178 | > We really want your paragraph layout preserved. 179 | > Really ? 180 | > Really. 181 | > Really. 182 | > Really. 183 | 184 | 185 | 186 | ## Testing thematic breaks 187 | 188 | *** 189 | --- 190 | ___ 191 | 192 | _ _ _ _ _ 193 | 194 | > ******* 195 | -------------------------------------------------------------------------------- /test/expect/basic.trip.md: -------------------------------------------------------------------------------- 1 | Basic tests 2 | =========== 3 | 4 | Basic tests for all CommonMark constructs. 5 | 6 | ## Testing autolinks 7 | 8 | This is an and another one . 9 | 10 | 11 | ## Testing breaks 12 | 13 | A line ending (not in a code span or HTML tag) that is preceded by two 14 | or more spaces and does not occur at the end of a block is parsed as a 15 | hard line break. 16 | 17 | So this means we had softbreaks so far and now we get \ 18 | a hard break 19 | and another one. 20 | 21 | > So this means we had softbreaks so far and now we get \ 22 | > a hard break 23 | > and another one. 24 | > This is very soooft. 25 | 26 | ## Testing code spans 27 | 28 | This is a multi-line code` 29 | code span `` it has backticks 30 | in there` 31 | 32 | Sometimes code spans `` `can have 33 | really ``` 34 | strange 35 | layout ``. Do you fancy `` `A_polymorphic_variant `` ? 36 | 37 | 38 | ## Testing emphasis 39 | 40 | There is _more_ than *one syntax* for __emphasis__ and **strong 41 | emphasis**. We should be careful about **embedded \* marker**. This 42 | will be **tricky \* to handle**. This *is not \*\* what* you want ? 43 | 44 | 45 | ## Testing links, images and link reference definitions 46 | 47 | This is an ![inline image]( 48 | /heyho (The 49 | multine title)) 50 | 51 | That is totally [colla psed][] and 52 | that is [`short cuted`] 53 | 54 | Shortcuts can be better than [full references][`short 55 | cuted`] but not 56 | always and we'd like to trip their [label][`short cuted`]. 57 | 58 | > [colla psed]: /hohoho "And again these 59 | > multi 60 | > line titles" 61 | 62 | [`short cuted`]: /veryshort "But very 63 | important" 64 | 65 | 66 | ## Testing raw HTML 67 | 68 | Haha a hihi this is not the end yet. 70 | 71 | foo u 72 | 73 | > Haha a data="foo" > hihi this is not the end yet. 75 | 76 | ## Testing blank lines 77 | 78 | 79 | 80 | Impressive isn't it ? 81 | 82 | ## Testing block quotes 83 | 84 | 85 | > > How is 86 | > > Nestyfing going on 87 | > > These irregularities **will** normalize 88 | > > We keep only the first block quote indent 89 | 90 | > ## Further tests ####### 91 | 92 | We need a little quote here 93 | > It's warranted. 94 | 95 | 96 | ## Testing code blocks 97 | 98 | ``` layout after info is not kept 99 | ``` 100 | 101 | ``` ocaml module M 102 | 103 | type t = 104 | | A of int 105 | | B of string 106 | 107 | let square x = x *. x 108 | ```` 109 | 110 | The indented code block: 111 | 112 | a b c d 113 | a b c d 114 | a b c d 115 | 116 | 117 | a 118 | a b c 119 | 120 | 121 | > ``` ocaml module M 122 | > 123 | > type t = 124 | > | A of int 125 | > | B of string 126 | > 127 | > let square x = x *. x 128 | > ```` 129 | 130 | 131 | ## Testing headings 132 | 133 | aaa 134 | aaaa 135 | ======== 136 | 137 | > bbb `hey` 138 | > bbbb 139 | > -------- 140 | 141 | # That's one way 142 | 143 | ### It's a long way to the heading 144 | 145 | ## Testing HTML block 146 | 147 | 150 | 151 | * 154 | 155 | ## Testing lists 156 | 157 | The `square` function is the root. There are reasons for this: 158 | 159 | 1. There is no reason. There should be a reason or an 160 | 2. Maybe that's the reason. But it may not be the reason. 161 | 3. Is reason the only tool ? 162 | 163 | > Quoted bullets 164 | > * Is this important ? 165 | * * Well it's in the spec 166 | * 167 | Empty list item above 168 | 169 | ## Testing paragraphs 170 | 171 | We really want your paragraph layout preserved. 172 | Really ? 173 | Really. 174 | Really. 175 | Really. 176 | 177 | 178 | > We really want your paragraph layout preserved. 179 | > Really ? 180 | > Really. 181 | > Really. 182 | > Really. 183 | 184 | 185 | 186 | ## Testing thematic breaks 187 | 188 | *** 189 | --- 190 | ___ 191 | 192 | _ _ _ _ _ 193 | 194 | > ******* 195 | -------------------------------------------------------------------------------- /test/expect/bug-18.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Bug-18 7 | 206 | 207 | 208 |

Issue #18

209 |

When a list marker is followed by end of file, we crash.

210 |
    211 |
  • Item 1
  • 212 |
  • Item 2
  • 213 |
  • 214 |
215 | 216 | -------------------------------------------------------------------------------- /test/expect/bug-18.latex: -------------------------------------------------------------------------------- 1 | \section{Issue \#18} 2 | 3 | When a list marker is followed by end of file, we crash. 4 | 5 | \begin{itemize} 6 | \item{} 7 | Item 1 8 | \item{} 9 | Item 2 10 | \item{}\end{itemize} 11 | -------------------------------------------------------------------------------- /test/expect/bug-18.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bug-18.md", lines 1-7, characters 0-2 3 | Heading, level 1: 4 | File "bug-18.md", line 1, characters 0-11 5 | Text: 6 | File "bug-18.md", line 1, characters 2-11 7 | Blank line: 8 | File "bug-18.md", line 2 9 | Paragraph: 10 | File "bug-18.md", line 3, characters 0-56 11 | Text: 12 | File "bug-18.md", line 3, characters 0-56 13 | Blank line: 14 | File "bug-18.md", line 4 15 | List (tight:true): 16 | File "bug-18.md", lines 5-7, characters 1-2 17 | List item: 18 | File "bug-18.md", line 5, characters 1-9 19 | List marker: 20 | File "bug-18.md", line 5, characters 1-2 21 | Paragraph: 22 | File "bug-18.md", line 5, characters 3-9 23 | Text: 24 | File "bug-18.md", line 5, characters 3-9 25 | List item: 26 | File "bug-18.md", line 6, characters 1-9 27 | List marker: 28 | File "bug-18.md", line 6, characters 1-2 29 | Paragraph: 30 | File "bug-18.md", line 6, characters 3-9 31 | Text: 32 | File "bug-18.md", line 6, characters 3-9 33 | List item: 34 | File "bug-18.md", line 7, characters 1-2 35 | List marker: 36 | File "bug-18.md", line 7, characters 1-2 37 | Blank line: 38 | File "bug-18.md", line 7, characters 2-2 -------------------------------------------------------------------------------- /test/expect/bug-18.md: -------------------------------------------------------------------------------- 1 | # Issue #18 2 | 3 | When a list marker is followed by end of file, we crash. 4 | 5 | - Item 1 6 | - Item 2 7 | - -------------------------------------------------------------------------------- /test/expect/bug-18.nolayout.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bug-18.md", lines 1-7, characters 0-2 3 | Heading, level 1: 4 | File "bug-18.md", line 1, characters 0-11 5 | Text: 6 | File "bug-18.md", line 1, characters 2-11 7 | Blank line: 8 | File "bug-18.md", line 2 9 | Paragraph: 10 | File "bug-18.md", line 3, characters 0-56 11 | Text: 12 | File "bug-18.md", line 3, characters 0-56 13 | Blank line: 14 | File "bug-18.md", line 4 15 | List (tight:true): 16 | File "bug-18.md", lines 5-7, characters 1-2 17 | List item: 18 | File "bug-18.md", line 5, characters 1-9 19 | List marker: 20 | File "bug-18.md", line 5, characters 1-2 21 | Paragraph: 22 | File "bug-18.md", line 5, characters 3-9 23 | Text: 24 | File "bug-18.md", line 5, characters 3-9 25 | List item: 26 | File "bug-18.md", line 6, characters 1-9 27 | List marker: 28 | File "bug-18.md", line 6, characters 1-2 29 | Paragraph: 30 | File "bug-18.md", line 6, characters 3-9 31 | Text: 32 | File "bug-18.md", line 6, characters 3-9 33 | List item: 34 | File "bug-18.md", line 7, characters 1-2 35 | List marker: 36 | File "bug-18.md", line 7, characters 1-2 37 | Blank line: 38 | File "bug-18.md", line 7, characters 2-2 -------------------------------------------------------------------------------- /test/expect/bug-18.trip.md: -------------------------------------------------------------------------------- 1 | # Issue \#18 2 | 3 | When a list marker is followed by end of file, we crash. 4 | 5 | - Item 1 6 | - Item 2 7 | - -------------------------------------------------------------------------------- /test/expect/bugs.exts.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Bugs.exts 7 | 206 | 207 | 208 |

Bugs

209 |

Add a section for the bug and the CommonMark that triggers it as 210 | follows:

211 |
# Bug #NUM
212 | 
213 | The triggering CommonMark
214 | 
215 |

Bug #10

216 |

In cells toplevel text nodes not at the beginning or end of the cell 217 | get dropped.

218 |
219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 |
Foo
a or b
before a or b after
before a or bafter
beforeaorbafter
aa

foo

Bug #15

241 |

Invalid markup generated for cancelled task.

242 |
    243 |
  • This has been cancelled
  • 244 |
245 | 246 | -------------------------------------------------------------------------------- /test/expect/bugs.exts.latex: -------------------------------------------------------------------------------- 1 | \section{Bugs} 2 | 3 | Add a section for the bug and the CommonMark that triggers it as 4 | follows: 5 | 6 | \begin{verbatim} 7 | # Bug #NUM 8 | 9 | The triggering CommonMark 10 | \end{verbatim} 11 | 12 | \section{Bug \#10} 13 | 14 | In cells toplevel text nodes not at the beginning or end of the cell 15 | get dropped. 16 | 17 | \bigskip 18 | \begin{tabular}{l} 19 | {\bfseries{}Foo} 20 | \\ 21 | \hline 22 | {\texttt{a} or \texttt{b}} 23 | \\ 24 | {before \texttt{a} or \texttt{b} after} 25 | \\ 26 | {before \texttt{a} or \texttt{b}after} 27 | \\ 28 | {before\texttt{a}or\texttt{b}after} 29 | \\ 30 | {\emph{a}\texttt{a}} 31 | \\ 32 | {% Raw CommonMark HTML omitted 33 | foo% Raw CommonMark HTML omitted 34 | } 35 | \\ 36 | \hline 37 | \end{tabular} 38 | \bigskip 39 | 40 | \section{Bug \#15} 41 | 42 | Invalid markup generated for cancelled task. 43 | 44 | \begin{itemize} 45 | \item{} \lbrack ~\rbrack \enspace 46 | This has been cancelled 47 | \end{itemize} 48 | -------------------------------------------------------------------------------- /test/expect/bugs.exts.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bugs.exts.md", lines 1-32 3 | Heading, level 1: 4 | File "bugs.exts.md", lines 1-2, characters 0-4 5 | Text: 6 | File "bugs.exts.md", line 1, characters 0-4 7 | Setext underline: 8 | File "bugs.exts.md", line 2, characters 0-4 9 | Blank line: 10 | File "bugs.exts.md", line 3 11 | Paragraph: 12 | File "bugs.exts.md", lines 4-5, characters 0-8 13 | Inlines: 14 | File "bugs.exts.md", lines 4-5, characters 0-8 15 | Text: 16 | File "bugs.exts.md", line 4, characters 0-64 17 | Soft break: 18 | File "bugs.exts.md", lines 4-5, characters 64-0 19 | Layout before: 20 | File "bugs.exts.md", line 4, characters 64-65 21 | Layout after: 22 | File "bugs.exts.md", line 5 23 | Text: 24 | File "bugs.exts.md", line 5, characters 0-8 25 | Blank line: 26 | File "bugs.exts.md", line 6 27 | Code block: 28 | File "bugs.exts.md", lines 7-11, characters 0-3 29 | Opening fence: 30 | File "bugs.exts.md", line 7, characters 0-3 31 | Code line: 32 | File "bugs.exts.md", line 8, characters 0-10 33 | Code line: 34 | File "bugs.exts.md", line 9 35 | Code line: 36 | File "bugs.exts.md", line 10, characters 0-25 37 | Closing fence: 38 | File "bugs.exts.md", line 11, characters 0-3 39 | Blank line: 40 | File "bugs.exts.md", line 12 41 | Heading, level 1: 42 | File "bugs.exts.md", line 13, characters 0-9 43 | Text: 44 | File "bugs.exts.md", line 13, characters 2-9 45 | Blank line: 46 | File "bugs.exts.md", line 14 47 | Paragraph: 48 | File "bugs.exts.md", lines 15-16, characters 0-12 49 | Inlines: 50 | File "bugs.exts.md", lines 15-16, characters 0-12 51 | Text: 52 | File "bugs.exts.md", line 15, characters 0-68 53 | Soft break: 54 | File "bugs.exts.md", lines 15-16, characters 68-0 55 | Layout before: 56 | File "bugs.exts.md", line 15, characters 68-68 57 | Layout after: 58 | File "bugs.exts.md", line 16 59 | Text: 60 | File "bugs.exts.md", line 16, characters 0-12 61 | Blank line: 62 | File "bugs.exts.md", line 17 63 | Table: 64 | File "bugs.exts.md", lines 18-25, characters 0-27 65 | Header row: 66 | File "bugs.exts.md", line 18, characters 0-27 67 | Text: 68 | File "bugs.exts.md", line 18, characters 3-6 69 | Separator line: 70 | File "bugs.exts.md", line 19, characters 0-27 71 | Separator: 72 | File "bugs.exts.md", line 19, characters 1-26 73 | Data row: 74 | File "bugs.exts.md", line 20, characters 0-27 75 | Inlines: 76 | File "bugs.exts.md", line 20, characters 2-12 77 | Code span: 78 | File "bugs.exts.md", line 20, characters 2-5 79 | Code span line: 80 | File "bugs.exts.md", line 20, characters 3-4 81 | Text: 82 | File "bugs.exts.md", line 20, characters 5-9 83 | Code span: 84 | File "bugs.exts.md", line 20, characters 9-12 85 | Code span line: 86 | File "bugs.exts.md", line 20, characters 10-11 87 | Data row: 88 | File "bugs.exts.md", line 21, characters 0-27 89 | Inlines: 90 | File "bugs.exts.md", line 21, characters 2-25 91 | Text: 92 | File "bugs.exts.md", line 21, characters 2-9 93 | Code span: 94 | File "bugs.exts.md", line 21, characters 9-12 95 | Code span line: 96 | File "bugs.exts.md", line 21, characters 10-11 97 | Text: 98 | File "bugs.exts.md", line 21, characters 12-16 99 | Code span: 100 | File "bugs.exts.md", line 21, characters 16-19 101 | Code span line: 102 | File "bugs.exts.md", line 21, characters 17-18 103 | Text: 104 | File "bugs.exts.md", line 21, characters 19-25 105 | Data row: 106 | File "bugs.exts.md", line 22, characters 0-27 107 | Inlines: 108 | File "bugs.exts.md", line 22, characters 2-24 109 | Text: 110 | File "bugs.exts.md", line 22, characters 2-9 111 | Code span: 112 | File "bugs.exts.md", line 22, characters 9-12 113 | Code span line: 114 | File "bugs.exts.md", line 22, characters 10-11 115 | Text: 116 | File "bugs.exts.md", line 22, characters 12-16 117 | Code span: 118 | File "bugs.exts.md", line 22, characters 16-19 119 | Code span line: 120 | File "bugs.exts.md", line 22, characters 17-18 121 | Text: 122 | File "bugs.exts.md", line 22, characters 19-24 123 | Data row: 124 | File "bugs.exts.md", line 23, characters 0-27 125 | Inlines: 126 | File "bugs.exts.md", line 23, characters 2-21 127 | Text: 128 | File "bugs.exts.md", line 23, characters 2-8 129 | Code span: 130 | File "bugs.exts.md", line 23, characters 8-11 131 | Code span line: 132 | File "bugs.exts.md", line 23, characters 9-10 133 | Text: 134 | File "bugs.exts.md", line 23, characters 11-13 135 | Code span: 136 | File "bugs.exts.md", line 23, characters 13-16 137 | Code span line: 138 | File "bugs.exts.md", line 23, characters 14-15 139 | Text: 140 | File "bugs.exts.md", line 23, characters 16-21 141 | Data row: 142 | File "bugs.exts.md", line 24, characters 0-27 143 | Inlines: 144 | File "bugs.exts.md", line 24, characters 2-8 145 | Emphasis: 146 | File "bugs.exts.md", line 24, characters 2-5 147 | Text: 148 | File "bugs.exts.md", line 24, characters 3-4 149 | Code span: 150 | File "bugs.exts.md", line 24, characters 5-8 151 | Code span line: 152 | File "bugs.exts.md", line 24, characters 6-7 153 | Data row: 154 | File "bugs.exts.md", line 25, characters 0-27 155 | Inlines: 156 | File "bugs.exts.md", line 25, characters 2-12 157 | Raw HTML: 158 | File "bugs.exts.md", line 25, characters 2-5 159 | Raw HTML line: 160 | File "bugs.exts.md", line 25, characters 2-5 161 | Text: 162 | File "bugs.exts.md", line 25, characters 5-8 163 | Raw HTML: 164 | File "bugs.exts.md", line 25, characters 8-12 165 | Raw HTML line: 166 | File "bugs.exts.md", line 25, characters 8-12 167 | Blank line: 168 | File "bugs.exts.md", line 26 169 | Heading, level 1: 170 | File "bugs.exts.md", line 27, characters 0-9 171 | Text: 172 | File "bugs.exts.md", line 27, characters 2-9 173 | Blank line: 174 | File "bugs.exts.md", line 28 175 | Paragraph: 176 | File "bugs.exts.md", line 29, characters 0-44 177 | Text: 178 | File "bugs.exts.md", line 29, characters 0-44 179 | Blank line: 180 | File "bugs.exts.md", line 30 181 | List (tight:true): 182 | File "bugs.exts.md", line 31, characters 0-29 183 | List item: 184 | File "bugs.exts.md", line 31, characters 0-29 185 | List marker: 186 | File "bugs.exts.md", line 31, characters 0-1 187 | Task marker: 188 | File "bugs.exts.md", line 31, characters 2-5 189 | Paragraph: 190 | File "bugs.exts.md", line 31, characters 6-29 191 | Text: 192 | File "bugs.exts.md", line 31, characters 6-29 193 | Blank line: 194 | File "bugs.exts.md", line 32 -------------------------------------------------------------------------------- /test/expect/bugs.exts.md: -------------------------------------------------------------------------------- 1 | Bugs 2 | ==== 3 | 4 | Add a section for the bug and the CommonMark that triggers it as 5 | follows: 6 | 7 | ``` 8 | # Bug #NUM 9 | 10 | The triggering CommonMark 11 | ``` 12 | 13 | # Bug #10 14 | 15 | In cells toplevel text nodes not at the beginning or end of the cell 16 | get dropped. 17 | 18 | | Foo | 19 | |-------------------------| 20 | | `a` or `b` | 21 | | before `a` or `b` after | 22 | | before `a` or `b`after | 23 | | before`a`or`b`after | 24 | | *a*`a` | 25 | |

foo

| 26 | 27 | # Bug #15 28 | 29 | Invalid markup generated for cancelled task. 30 | 31 | * [~] This has been cancelled 32 | -------------------------------------------------------------------------------- /test/expect/bugs.exts.nolayout.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bugs.exts.md", lines 1-32 3 | Heading, level 1: 4 | File "bugs.exts.md", lines 1-2, characters 0-4 5 | Text: 6 | File "bugs.exts.md", line 1, characters 0-4 7 | Setext underline: 8 | File "bugs.exts.md", line 2, characters 0-4 9 | Blank line: 10 | File "bugs.exts.md", line 3 11 | Paragraph: 12 | File "bugs.exts.md", lines 4-5, characters 0-8 13 | Inlines: 14 | File "bugs.exts.md", lines 4-5, characters 0-8 15 | Text: 16 | File "bugs.exts.md", line 4, characters 0-64 17 | Soft break: 18 | File "bugs.exts.md", lines 4-5, characters 64-0 19 | Layout before: 20 | File "-" 21 | Layout after: 22 | File "-" 23 | Text: 24 | File "bugs.exts.md", line 5, characters 0-8 25 | Blank line: 26 | File "bugs.exts.md", line 6 27 | Code block: 28 | File "bugs.exts.md", lines 7-11, characters 0-3 29 | Opening fence: 30 | File "-" 31 | Code line: 32 | File "bugs.exts.md", line 8, characters 0-10 33 | Code line: 34 | File "bugs.exts.md", line 9 35 | Code line: 36 | File "bugs.exts.md", line 10, characters 0-25 37 | Closing fence: 38 | File "-" 39 | Blank line: 40 | File "bugs.exts.md", line 12 41 | Heading, level 1: 42 | File "bugs.exts.md", line 13, characters 0-9 43 | Text: 44 | File "bugs.exts.md", line 13, characters 2-9 45 | Blank line: 46 | File "bugs.exts.md", line 14 47 | Paragraph: 48 | File "bugs.exts.md", lines 15-16, characters 0-12 49 | Inlines: 50 | File "bugs.exts.md", lines 15-16, characters 0-12 51 | Text: 52 | File "bugs.exts.md", line 15, characters 0-68 53 | Soft break: 54 | File "bugs.exts.md", lines 15-16, characters 68-0 55 | Layout before: 56 | File "-" 57 | Layout after: 58 | File "-" 59 | Text: 60 | File "bugs.exts.md", line 16, characters 0-12 61 | Blank line: 62 | File "bugs.exts.md", line 17 63 | Table: 64 | File "bugs.exts.md", lines 18-25, characters 0-27 65 | Header row: 66 | File "bugs.exts.md", line 18, characters 0-27 67 | Text: 68 | File "bugs.exts.md", line 18, characters 3-6 69 | Separator line: 70 | File "bugs.exts.md", line 19, characters 0-27 71 | Separator: 72 | File "bugs.exts.md", line 19, characters 1-26 73 | Data row: 74 | File "bugs.exts.md", line 20, characters 0-27 75 | Inlines: 76 | File "bugs.exts.md", line 20, characters 2-12 77 | Code span: 78 | File "bugs.exts.md", line 20, characters 2-5 79 | Code span line: 80 | File "bugs.exts.md", line 20, characters 3-4 81 | Text: 82 | File "bugs.exts.md", line 20, characters 5-9 83 | Code span: 84 | File "bugs.exts.md", line 20, characters 9-12 85 | Code span line: 86 | File "bugs.exts.md", line 20, characters 10-11 87 | Data row: 88 | File "bugs.exts.md", line 21, characters 0-27 89 | Inlines: 90 | File "bugs.exts.md", line 21, characters 2-25 91 | Text: 92 | File "bugs.exts.md", line 21, characters 2-9 93 | Code span: 94 | File "bugs.exts.md", line 21, characters 9-12 95 | Code span line: 96 | File "bugs.exts.md", line 21, characters 10-11 97 | Text: 98 | File "bugs.exts.md", line 21, characters 12-16 99 | Code span: 100 | File "bugs.exts.md", line 21, characters 16-19 101 | Code span line: 102 | File "bugs.exts.md", line 21, characters 17-18 103 | Text: 104 | File "bugs.exts.md", line 21, characters 19-25 105 | Data row: 106 | File "bugs.exts.md", line 22, characters 0-27 107 | Inlines: 108 | File "bugs.exts.md", line 22, characters 2-24 109 | Text: 110 | File "bugs.exts.md", line 22, characters 2-9 111 | Code span: 112 | File "bugs.exts.md", line 22, characters 9-12 113 | Code span line: 114 | File "bugs.exts.md", line 22, characters 10-11 115 | Text: 116 | File "bugs.exts.md", line 22, characters 12-16 117 | Code span: 118 | File "bugs.exts.md", line 22, characters 16-19 119 | Code span line: 120 | File "bugs.exts.md", line 22, characters 17-18 121 | Text: 122 | File "bugs.exts.md", line 22, characters 19-24 123 | Data row: 124 | File "bugs.exts.md", line 23, characters 0-27 125 | Inlines: 126 | File "bugs.exts.md", line 23, characters 2-21 127 | Text: 128 | File "bugs.exts.md", line 23, characters 2-8 129 | Code span: 130 | File "bugs.exts.md", line 23, characters 8-11 131 | Code span line: 132 | File "bugs.exts.md", line 23, characters 9-10 133 | Text: 134 | File "bugs.exts.md", line 23, characters 11-13 135 | Code span: 136 | File "bugs.exts.md", line 23, characters 13-16 137 | Code span line: 138 | File "bugs.exts.md", line 23, characters 14-15 139 | Text: 140 | File "bugs.exts.md", line 23, characters 16-21 141 | Data row: 142 | File "bugs.exts.md", line 24, characters 0-27 143 | Inlines: 144 | File "bugs.exts.md", line 24, characters 2-8 145 | Emphasis: 146 | File "bugs.exts.md", line 24, characters 2-5 147 | Text: 148 | File "bugs.exts.md", line 24, characters 3-4 149 | Code span: 150 | File "bugs.exts.md", line 24, characters 5-8 151 | Code span line: 152 | File "bugs.exts.md", line 24, characters 6-7 153 | Data row: 154 | File "bugs.exts.md", line 25, characters 0-27 155 | Inlines: 156 | File "bugs.exts.md", line 25, characters 2-12 157 | Raw HTML: 158 | File "bugs.exts.md", line 25, characters 2-5 159 | Raw HTML line: 160 | File "bugs.exts.md", line 25, characters 2-5 161 | Text: 162 | File "bugs.exts.md", line 25, characters 5-8 163 | Raw HTML: 164 | File "bugs.exts.md", line 25, characters 8-12 165 | Raw HTML line: 166 | File "bugs.exts.md", line 25, characters 8-12 167 | Blank line: 168 | File "bugs.exts.md", line 26 169 | Heading, level 1: 170 | File "bugs.exts.md", line 27, characters 0-9 171 | Text: 172 | File "bugs.exts.md", line 27, characters 2-9 173 | Blank line: 174 | File "bugs.exts.md", line 28 175 | Paragraph: 176 | File "bugs.exts.md", line 29, characters 0-44 177 | Text: 178 | File "bugs.exts.md", line 29, characters 0-44 179 | Blank line: 180 | File "bugs.exts.md", line 30 181 | List (tight:true): 182 | File "bugs.exts.md", line 31, characters 0-29 183 | List item: 184 | File "bugs.exts.md", line 31, characters 0-29 185 | List marker: 186 | File "bugs.exts.md", line 31, characters 0-1 187 | Task marker: 188 | File "bugs.exts.md", line 31, characters 2-5 189 | Paragraph: 190 | File "bugs.exts.md", line 31, characters 6-29 191 | Text: 192 | File "bugs.exts.md", line 31, characters 6-29 193 | Blank line: 194 | File "bugs.exts.md", line 32 -------------------------------------------------------------------------------- /test/expect/bugs.exts.trip.md: -------------------------------------------------------------------------------- 1 | Bugs 2 | ==== 3 | 4 | Add a section for the bug and the CommonMark that triggers it as 5 | follows: 6 | 7 | ``` 8 | # Bug #NUM 9 | 10 | The triggering CommonMark 11 | ``` 12 | 13 | # Bug \#10 14 | 15 | In cells toplevel text nodes not at the beginning or end of the cell 16 | get dropped. 17 | 18 | | Foo | 19 | |-------------------------| 20 | | `a` or `b` | 21 | | before `a` or `b` after | 22 | | before `a` or `b`after | 23 | | before`a`or`b`after | 24 | | *a*`a` | 25 | |

foo

| 26 | 27 | # Bug \#15 28 | 29 | Invalid markup generated for cancelled task. 30 | 31 | * [~] This has been cancelled 32 | -------------------------------------------------------------------------------- /test/expect/bugs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Bugs 7 | 206 | 207 | 208 |

Bugs

209 |

Add a section for the bug and the CommonMark that triggers it as 210 | follows:

211 |
# Issue #NUM
212 | 
213 | The triggering CommonMark
214 | 
215 |

Issue #11

216 |

Escape ordered item markers at the beginning of paragraphs correctly. 217 | These should be paragraphs when rendered to markdown not list items.

218 |

1.

219 |

2.

220 |

23.

221 |

24)

222 |

1234567890. This is not a list marker no need to escape it.

223 | 224 | -------------------------------------------------------------------------------- /test/expect/bugs.latex: -------------------------------------------------------------------------------- 1 | \section{Bugs} 2 | 3 | Add a section for the bug and the CommonMark that triggers it as 4 | follows: 5 | 6 | \begin{verbatim} 7 | # Issue #NUM 8 | 9 | The triggering CommonMark 10 | \end{verbatim} 11 | 12 | \section{Issue \#11} 13 | 14 | Escape ordered item markers at the beginning of paragraphs correctly. 15 | These should be paragraphs when rendered to markdown not list items. 16 | 17 | 1. 18 | 19 | 2. 20 | 21 | 23. 22 | 23 | 24) 24 | 25 | 1234567890. This is not a list marker no need to escape it. 26 | -------------------------------------------------------------------------------- /test/expect/bugs.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bugs.md", lines 1-33 3 | Heading, level 1: 4 | File "bugs.md", lines 1-2, characters 0-4 5 | Text: 6 | File "bugs.md", line 1, characters 0-4 7 | Setext underline: 8 | File "bugs.md", line 2, characters 0-4 9 | Blank line: 10 | File "bugs.md", line 3 11 | Paragraph: 12 | File "bugs.md", lines 4-5, characters 0-8 13 | Inlines: 14 | File "bugs.md", lines 4-5, characters 0-8 15 | Text: 16 | File "bugs.md", line 4, characters 0-64 17 | Soft break: 18 | File "bugs.md", lines 4-5, characters 64-0 19 | Layout before: 20 | File "bugs.md", line 4, characters 64-65 21 | Layout after: 22 | File "bugs.md", line 5 23 | Text: 24 | File "bugs.md", line 5, characters 0-8 25 | Blank line: 26 | File "bugs.md", line 6 27 | Code block: 28 | File "bugs.md", lines 7-11, characters 0-3 29 | Opening fence: 30 | File "bugs.md", line 7, characters 0-3 31 | Code line: 32 | File "bugs.md", line 8, characters 0-12 33 | Code line: 34 | File "bugs.md", line 9 35 | Code line: 36 | File "bugs.md", line 10, characters 0-25 37 | Closing fence: 38 | File "bugs.md", line 11, characters 0-3 39 | Blank line: 40 | File "bugs.md", line 12 41 | Heading, level 1: 42 | File "bugs.md", line 13, characters 0-11 43 | Text: 44 | File "bugs.md", line 13, characters 2-11 45 | Blank line: 46 | File "bugs.md", line 14 47 | Paragraph: 48 | File "bugs.md", lines 15-16, characters 0-68 49 | Inlines: 50 | File "bugs.md", lines 15-16, characters 0-68 51 | Text: 52 | File "bugs.md", line 15, characters 0-69 53 | Soft break: 54 | File "bugs.md", lines 15-16, characters 69-0 55 | Layout before: 56 | File "bugs.md", line 15, characters 69-69 57 | Layout after: 58 | File "bugs.md", line 16 59 | Text: 60 | File "bugs.md", line 16, characters 0-68 61 | Blank line: 62 | File "bugs.md", line 17 63 | Paragraph: 64 | File "bugs.md", line 18, characters 0-3 65 | Text: 66 | File "bugs.md", line 18, characters 0-3 67 | Blank line: 68 | File "bugs.md", line 19 69 | Paragraph: 70 | File "bugs.md", line 20, characters 0-3 71 | Text: 72 | File "bugs.md", line 20, characters 0-3 73 | Blank line: 74 | File "bugs.md", line 21 75 | Paragraph: 76 | File "bugs.md", line 22, characters 0-4 77 | Text: 78 | File "bugs.md", line 22, characters 0-4 79 | Blank line: 80 | File "bugs.md", line 23 81 | Blank line: 82 | File "bugs.md", line 24 83 | Paragraph: 84 | File "bugs.md", line 25, characters 0-4 85 | Text: 86 | File "bugs.md", line 25, characters 0-4 87 | Blank line: 88 | File "bugs.md", line 26 89 | Paragraph: 90 | File "bugs.md", line 27, characters 0-59 91 | Text: 92 | File "bugs.md", line 27, characters 0-59 93 | Blank line: 94 | File "bugs.md", line 28 95 | Blank line: 96 | File "bugs.md", line 29 97 | Blank line: 98 | File "bugs.md", line 30 99 | Blank line: 100 | File "bugs.md", line 31 101 | Blank line: 102 | File "bugs.md", line 32 103 | Blank line: 104 | File "bugs.md", line 33 -------------------------------------------------------------------------------- /test/expect/bugs.md: -------------------------------------------------------------------------------- 1 | Bugs 2 | ==== 3 | 4 | Add a section for the bug and the CommonMark that triggers it as 5 | follows: 6 | 7 | ``` 8 | # Issue #NUM 9 | 10 | The triggering CommonMark 11 | ``` 12 | 13 | # Issue #11 14 | 15 | Escape ordered item markers at the beginning of paragraphs correctly. 16 | These should be paragraphs when rendered to markdown not list items. 17 | 18 | 1\. 19 | 20 | 2\. 21 | 22 | 23\. 23 | 24 | 25 | 24\) 26 | 27 | 1234567890. This is not a list marker no need to escape it. 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /test/expect/bugs.nolayout.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bugs.md", lines 1-33 3 | Heading, level 1: 4 | File "bugs.md", lines 1-2, characters 0-4 5 | Text: 6 | File "bugs.md", line 1, characters 0-4 7 | Setext underline: 8 | File "bugs.md", line 2, characters 0-4 9 | Blank line: 10 | File "bugs.md", line 3 11 | Paragraph: 12 | File "bugs.md", lines 4-5, characters 0-8 13 | Inlines: 14 | File "bugs.md", lines 4-5, characters 0-8 15 | Text: 16 | File "bugs.md", line 4, characters 0-64 17 | Soft break: 18 | File "bugs.md", lines 4-5, characters 64-0 19 | Layout before: 20 | File "-" 21 | Layout after: 22 | File "-" 23 | Text: 24 | File "bugs.md", line 5, characters 0-8 25 | Blank line: 26 | File "bugs.md", line 6 27 | Code block: 28 | File "bugs.md", lines 7-11, characters 0-3 29 | Opening fence: 30 | File "-" 31 | Code line: 32 | File "bugs.md", line 8, characters 0-12 33 | Code line: 34 | File "bugs.md", line 9 35 | Code line: 36 | File "bugs.md", line 10, characters 0-25 37 | Closing fence: 38 | File "-" 39 | Blank line: 40 | File "bugs.md", line 12 41 | Heading, level 1: 42 | File "bugs.md", line 13, characters 0-11 43 | Text: 44 | File "bugs.md", line 13, characters 2-11 45 | Blank line: 46 | File "bugs.md", line 14 47 | Paragraph: 48 | File "bugs.md", lines 15-16, characters 0-68 49 | Inlines: 50 | File "bugs.md", lines 15-16, characters 0-68 51 | Text: 52 | File "bugs.md", line 15, characters 0-69 53 | Soft break: 54 | File "bugs.md", lines 15-16, characters 69-0 55 | Layout before: 56 | File "-" 57 | Layout after: 58 | File "-" 59 | Text: 60 | File "bugs.md", line 16, characters 0-68 61 | Blank line: 62 | File "bugs.md", line 17 63 | Paragraph: 64 | File "bugs.md", line 18, characters 0-3 65 | Text: 66 | File "bugs.md", line 18, characters 0-3 67 | Blank line: 68 | File "bugs.md", line 19 69 | Paragraph: 70 | File "bugs.md", line 20, characters 0-3 71 | Text: 72 | File "bugs.md", line 20, characters 0-3 73 | Blank line: 74 | File "bugs.md", line 21 75 | Paragraph: 76 | File "bugs.md", line 22, characters 0-4 77 | Text: 78 | File "bugs.md", line 22, characters 0-4 79 | Blank line: 80 | File "bugs.md", line 23 81 | Blank line: 82 | File "bugs.md", line 24 83 | Paragraph: 84 | File "bugs.md", line 25, characters 0-4 85 | Text: 86 | File "bugs.md", line 25, characters 0-4 87 | Blank line: 88 | File "bugs.md", line 26 89 | Paragraph: 90 | File "bugs.md", line 27, characters 0-59 91 | Text: 92 | File "bugs.md", line 27, characters 0-59 93 | Blank line: 94 | File "bugs.md", line 28 95 | Blank line: 96 | File "bugs.md", line 29 97 | Blank line: 98 | File "bugs.md", line 30 99 | Blank line: 100 | File "bugs.md", line 31 101 | Blank line: 102 | File "bugs.md", line 32 103 | Blank line: 104 | File "bugs.md", line 33 -------------------------------------------------------------------------------- /test/expect/bugs.trip.md: -------------------------------------------------------------------------------- 1 | Bugs 2 | ==== 3 | 4 | Add a section for the bug and the CommonMark that triggers it as 5 | follows: 6 | 7 | ``` 8 | # Issue #NUM 9 | 10 | The triggering CommonMark 11 | ``` 12 | 13 | # Issue \#11 14 | 15 | Escape ordered item markers at the beginning of paragraphs correctly. 16 | These should be paragraphs when rendered to markdown not list items. 17 | 18 | 1\. 19 | 20 | 2\. 21 | 22 | 23\. 23 | 24 | 25 | 24\) 26 | 27 | 1234567890. This is not a list marker no need to escape it. 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /test/expect/spec.trip: -------------------------------------------------------------------------------- 1 | [ OK ] 274 out of 652 are correct. 2 | [TRIP] 378 out of 652 round trip. 3 | [ OK ] All 652 on parse without layout. 4 | -------------------------------------------------------------------------------- /test/expect/test.expect: -------------------------------------------------------------------------------- 1 | Expectation for mapper table bug #14: 2 | 3 | | a | b | c | 4 | |---|---|---| 5 | | a | b | c | 6 | | | b | c | 7 | | | | c | 8 | 9 | -------------------------------------------------------------------------------- /test/pathological.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_std 7 | open Result.Syntax 8 | 9 | let range ~first ~last = 10 | let rec loop acc k = if k < first then acc else loop (k :: acc) (k-1) in 11 | loop [] last 12 | 13 | (* Pathological tests for CommonMark parsers. 14 | 15 | These tests are from: 16 | 17 | https://github.com/commonmark/cmark/blob/master/test/pathological_tests.py 18 | 19 | The test expectations there use regexps with constant n matches 20 | which Str doesn't support. Instead we make the expectations more 21 | precise and trim and map newlines to spaces the HTML renders to 22 | avoid rendering layout discrepancies. *) 23 | 24 | let massage s = String.trim (String.map (function '\n' -> ' ' | c -> c) s) 25 | 26 | type test = { doc : string; i : string; exp : string; } 27 | 28 | let tests = 29 | let n = 30000 (* should be pair *) in 30 | let p s = Fmt.str "

%s

" s in 31 | let ( + ) = ( ^ ) and cat = String.concat "" in 32 | let ( * ) s n = cat @@ List.map (Fun.const s) (range ~first:1 ~last:n) in 33 | [ { doc = "Nested strong emphasis"; 34 | i = "*a **a "*n + "b" + " a** a*"*n; 35 | exp = p @@ "a a "*n + "b" + " a a"*n }; 36 | { doc = "Many emphasis closers with no openers"; 37 | i = "a_ "*n; 38 | exp = p @@ "a_ "*(n - 1) + "a_" }; 39 | { doc = "Many emphasis openers with no closers"; 40 | i = "_a "*n; 41 | exp = p @@ "_a "*(n - 1) + "_a" }; 42 | { doc = "Many link closers with no openers"; 43 | i = "a]"*n; 44 | exp = p @@ "a]"*n }; 45 | { doc = "Many link openers with no closers"; 46 | i = "[a"*n; 47 | exp = p @@ "[a"*n; }; 48 | { doc = "Mismatched openers and closers"; 49 | i = "*a_ "*n; 50 | exp = p @@ "*a_ "*(n-1) + "*a_" }; 51 | { doc = "Cmark issue #389"; 52 | i = "*a "*n + "_a*_ "*n; 53 | exp = p @@ "a "*n + "_a_ "*(n - 1) + "_a_" }; 54 | { doc = "Openers and closers multiple of 3"; 55 | i = "a**b" + "c* "*n; 56 | exp = p @@ "a**b" + "c* "*(n - 1) + "c*" }; 57 | { doc = "Link openers and emph closers"; 58 | i = "[ a_"*n; 59 | exp = p @@ "[ a_"*n }; 60 | { doc = "Sequence '[ (](' repeated"; 61 | i = "[ (]("*n; 62 | exp = p @@ "[ (]("*n; }; 63 | { doc = "Sequence '![[]()' repeated"; 64 | i = "![[]()"*n; 65 | exp = p @@ {|![|}*n; }; 66 | { doc = "Hard link/emphasis case"; 67 | i = "**x [a*b**c*](d)"; 68 | exp = p @@ {|**x ab**c|} }; 69 | { doc = "Nested brackets [* a ]*"; 70 | i = "["*n + "a" + "]"*n; 71 | exp = p @@ "["*n + "a" + "]"*n }; 72 | { doc = "Nested block quotes"; 73 | i = "> "*n + "a"; 74 | exp = "
"*n + p "a" + "
"*n }; 75 | { doc = "Deeply nested lists"; 76 | i = cat (List.map (fun n -> " "*n + "* a\n") (range ~first:0 ~last:499)); 77 | exp = "
    "+"
  • a
      "*499+"
    • a
    "+"
"*499 }; 78 | { doc = "U+0000 in input"; 79 | i = "abc\x00de\x00"; 80 | exp = p @@ "abc\u{FFFD}de\u{FFFD}" }; 81 | { doc = "Backticks"; 82 | i = cat (List.map (fun n -> "e" + "`"*n) (range ~first:1 ~last:2500)); 83 | exp = 84 | p @@ cat (List.map (fun n -> "e" + "`"*n) (range ~first:1 ~last:2500))}; 85 | { doc = "Unclosed inline link <>"; 86 | i = "[a]("*(n/2) + "a" + ""* (n/2); }; 97 | { doc = "Many references"; 98 | i = 99 | cat (List.map (fun n -> Fmt.str "[%d]: u\n" n) (range ~first:1 ~last:n)) 100 | + "[0]"*n; 101 | exp = p @@ "[0]"*n; } 102 | ] 103 | 104 | (* Run commands on a deadline. Something like this should be added to B0_kit. *) 105 | 106 | type deadline_exit = [ Os.Cmd.status | `Timeout ] 107 | type deadline_run = Mtime.Span.t * deadline_exit 108 | 109 | let deadline_run ~timeout ?env ?cwd ?stdin ?stdout ?stderr cmd = 110 | let rec wait ~deadline dur pid = 111 | let* st = Os.Cmd.spawn_poll_status pid in 112 | match st with 113 | | Some st -> Ok (Os.Mtime.count dur, (st :> deadline_exit)) 114 | | None -> 115 | if Mtime.Span.compare (Os.Mtime.count dur) deadline < 0 116 | then (ignore (Os.sleep Mtime.Span.ms); wait ~deadline dur pid) else 117 | let* () = Os.Cmd.kill pid Sys.sigkill in 118 | let* _st = Os.Cmd.spawn_wait_status pid in 119 | Ok (Os.Mtime.count dur, `Timeout) 120 | in 121 | let* pid = Os.Cmd.spawn ?env ?cwd ?stdin ?stdout ?stderr cmd in 122 | wait ~deadline:timeout (Os.Mtime.counter ()) pid 123 | 124 | (* Running the tests *) 125 | 126 | type test_exit = [ deadline_exit | `Unexpected of string * string ] 127 | 128 | let pp_ok = Fmt.st [`Fg `Green] 129 | let pp_err = Fmt.st [`Fg `Red] 130 | let pp_test_exit ppf = function 131 | | `Exited 0 -> Fmt.pf ppf "%a" pp_ok "ok" 132 | | `Exited n -> Fmt.pf ppf "%a with %d" pp_err "exited" n 133 | | `Signaled sg -> Fmt.pf ppf "%a with %a" pp_err "signaled" Fmt.sys_signal sg 134 | | `Timeout -> Fmt.pf ppf "%a" pp_err "timed out" 135 | | `Unexpected (exp, res)-> 136 | let pp_data = Fmt.truncated ~max:50 in 137 | Fmt.pf ppf "@[%a:@,Expect: %a@,Found : %a@,@]" 138 | pp_err "unexpected output" pp_data exp pp_data res 139 | 140 | let pp_tests_params ppf (timeout_s, cmd) = 141 | Fmt.pf ppf "@[Testing: %a@,Timeout: %a@]" 142 | Cmd.pp cmd Mtime.Span.pp timeout_s 143 | 144 | let pp_tests_summary ppf (count, fail, dur) = match fail = 0 with 145 | | true -> 146 | Fmt.pf ppf "[ %a ] All %d tests succeeded in %a" 147 | pp_ok "OK" count Mtime.Span.pp dur 148 | | false -> 149 | Fmt.pf ppf "[%a] %d out of %d tests failed in %a" 150 | pp_err "FAIL" fail count Mtime.Span.pp dur 151 | 152 | let run_test ~timeout t cmd = 153 | Result.join @@ Os.File.with_tmp_fd @@ fun tmpfile fd -> 154 | let stdin = Os.Cmd.in_string t.i in 155 | let stdout = Os.Cmd.out_fd ~close:false fd in 156 | let* dur, exit = deadline_run ~timeout ~stdin ~stdout cmd in 157 | if exit <> `Exited 0 then Ok (dur, (exit :> test_exit)) else 158 | let* res = Os.File.read tmpfile in 159 | let res = massage res in 160 | if String.equal (String.trim t.exp) res 161 | then Ok (dur, (exit :> test_exit)) else Ok (dur, `Unexpected (t.exp, res)) 162 | 163 | let run_tests ~timeout cmd = 164 | let do_test t (dur, fail, i) = 165 | Fmt.pr "%2d. %s: @?" i t.doc; 166 | let* d, exit = run_test ~timeout t cmd in 167 | let fail = match exit with `Exited 0 -> fail | _ -> fail + 1 in 168 | Fmt.pr "%a in %a@]@." pp_test_exit exit Mtime.Span.pp d; 169 | Ok (Mtime.Span.add dur d, fail, i + 1) 170 | in 171 | Log.if_error ~use:2 @@ 172 | let* cmd = Os.Cmd.get cmd in 173 | let init = Mtime.Span.zero, 0, 1 in 174 | Log.stdout (fun m -> m "%a" pp_tests_params (timeout, cmd)); 175 | let* dur, fail, i = List.fold_stop_on_error do_test tests init in 176 | Log.stdout (fun m -> m "%a" pp_tests_summary (i - 1, fail, dur)); 177 | Ok (Int.min fail 1) 178 | 179 | let dump_tests dir = 180 | let dump_test dir t i = 181 | let name = Fmt.str "patho-test-%02d" i in 182 | let force = true and make_path = true in 183 | let src = Fpath.(dir / name + ".md") in 184 | let exp = Fpath.(dir / name + ".exp") in 185 | let* () = Os.File.write ~force ~make_path src t.i in 186 | let* () = Os.File.write ~force ~make_path exp t.exp in 187 | Ok (i + 1) 188 | in 189 | Log.if_error ~use:3 @@ 190 | let* dir = Fpath.of_string dir in 191 | List.fold_stop_on_error (dump_test dir) tests 1 192 | 193 | let main () = 194 | let usage = 195 | "Usage: pathological -- TOOL ARG…\n\ 196 | TOOL must read CommonMark on stdin and write HTML on stdout." 197 | in 198 | let dump_dir = ref None and timeout_s = ref 1 and cmd = ref [] in 199 | let set_dump_dir s = dump_dir := Some s in 200 | let add_arg s = cmd := s :: !cmd in 201 | let args = 202 | [ "--timeout-s", Arg.Set_int timeout_s, " Timeout in secs (defaults to 1)"; 203 | "-d", Arg.String set_dump_dir, "DIR Don't test, dump tests to DIR"; 204 | "--", Arg.Rest add_arg, "TOOL ARG… Executable to test."; ] 205 | in 206 | Arg.parse args add_arg usage; 207 | match !dump_dir with 208 | | Some dir -> dump_tests dir 209 | | None -> 210 | let timeout = Mtime.Span.(!timeout_s * s) in 211 | let cmd = Cmd.of_list Fun.id (List.rev !cmd) in 212 | if Cmd.is_empty cmd 213 | then (Log.err (fun m -> m "No tool specified. Try '--help'."); exit 2) 214 | else run_tests ~timeout cmd 215 | 216 | let () = if !Sys.interactive then () else exit (main ()) 217 | -------------------------------------------------------------------------------- /test/spec.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_std 7 | open Result.Syntax 8 | open B0_json 9 | 10 | let version = "0.30" 11 | type test = 12 | { markdown : string; 13 | html : string; 14 | example : int; 15 | start_line : int; 16 | end_line : int; 17 | section : string } 18 | 19 | let test markdown html example start_line end_line section = 20 | { markdown; html; example; start_line; end_line; section } 21 | 22 | let testq = 23 | Jsonq.(succeed test $ 24 | mem "markdown" string $ 25 | mem "html" string $ 26 | mem "example" int $ 27 | mem "start_line" int $ 28 | mem "end_line" int $ 29 | mem "section" string) 30 | 31 | let parse_tests file = 32 | let* data = Os.File.read (Fpath.v file) in 33 | let* json = Json.of_string ~file data in 34 | let tests = Jsonq.array testq in 35 | Jsonq.query tests json 36 | 37 | let diff ~spec cmarkit = 38 | let retract_result = function Ok s | Error s -> s in 39 | retract_result @@ 40 | let color = match Fmt.styler () with 41 | | Fmt.Plain -> "--color=never" 42 | | Fmt.Ansi -> "--color=always" 43 | in 44 | let* diff = 45 | Os.Cmd.get Cmd.(arg "git" % "diff" % "--ws-error-highlight=all" % 46 | "--no-index" % "--patience" % color) 47 | in 48 | Result.join @@ Os.Dir.with_tmp @@ fun dir -> 49 | let force = false and make_path = false in 50 | let* () = Os.File.write ~force ~make_path Fpath.(dir / "spec") spec in 51 | let* () = Os.File.write ~force ~make_path Fpath.(dir / "cmarkit") cmarkit in 52 | let env = ["GIT_CONFIG_SYSTEM=/dev/null"; "GIT_CONFIG_GLOBAL=/dev/null"; ] in 53 | let trim = false in 54 | Result.map snd @@ 55 | Os.Cmd.run_status_out ~env ~trim ~cwd:dir Cmd.(diff % "spec" % "cmarkit") 56 | 57 | let ok = Fmt.st [`Fg `Green] 58 | let fail = Fmt.st [`Fg `Red] 59 | 60 | let cli ~exe () = 61 | let usage = Fmt.str "Usage %s [--file FILE.json] NUM[-NUM]…" exe in 62 | let show_diff = ref false in 63 | let file = ref "test/spec.json" in 64 | let args = 65 | [ "--file", Arg.Set_string file, Fmt.str "Test file (defaults to %s)" !file; 66 | "--show-diff", Arg.Set show_diff, 67 | "Show diffs of correct CommonMark renders" ] 68 | in 69 | let examples = ref [] in 70 | let pos s = try examples := int_of_string s :: !examples with 71 | | Failure _ -> 72 | try 73 | match String.cut_left ~sep:"-" s with 74 | | None -> failwith "" 75 | | Some (l, r) -> 76 | let l = int_of_string l in 77 | let r = int_of_string r in 78 | let lo, hi = if l < r then l, r else r, l in 79 | for i = hi downto lo do examples := i :: !examples done 80 | with 81 | | Failure _ -> 82 | raise (Arg.Bad 83 | (Fmt.str "Argument %S: not an example number or range" s)) 84 | in 85 | Arg.parse args pos usage; 86 | !show_diff, !file, (List.rev !examples) 87 | -------------------------------------------------------------------------------- /test/spec.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Specification test parser *) 7 | 8 | val version : string 9 | 10 | type test = 11 | { markdown : string; 12 | html : string; 13 | example : int; 14 | start_line : int; 15 | end_line : int; 16 | section : string } 17 | 18 | val parse_tests : string -> (test list, string) result 19 | 20 | val diff : spec:string -> string -> string 21 | 22 | val ok : string B0_std.Fmt.t 23 | val fail : string B0_std.Fmt.t 24 | val cli : exe:string -> unit -> bool * string * int list 25 | -------------------------------------------------------------------------------- /test/test.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | let test_mapper_table_bug_14 () = 7 | let table = 8 | "| a | b | c |\n\ 9 | |---|---|---|\n\ 10 | | a | b | c |\n\ 11 | | | b | c |\n\ 12 | | | | c |\n" 13 | in 14 | let doc = Cmarkit.Doc.of_string ~layout:true ~strict:false table in 15 | let mdoc = Cmarkit.Mapper.map_doc (Cmarkit.Mapper.make ()) doc in 16 | print_endline "Expectation for mapper table bug #14:\n"; 17 | print_endline (Cmarkit_commonmark.of_doc mdoc); 18 | () 19 | 20 | let main () = 21 | test_mapper_table_bug_14 (); 22 | () 23 | 24 | let () = if !Sys.interactive then () else main () 25 | -------------------------------------------------------------------------------- /test/test_spec.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_std 7 | open Result.Syntax 8 | open B0_json 9 | 10 | let status ~pass ex_num = 11 | Log.stdout @@ fun m -> 12 | let pp_ex ppf n = 13 | Fmt.pf ppf "https://spec.commonmark.org/%s/#example-%d" Spec.version n 14 | in 15 | let pp, st = if pass then Spec.ok, "PASS" else Spec.fail, "FAIL" in 16 | m "[%a] %a" pp st Fmt.(code' pp_ex) ex_num 17 | 18 | let renderer = 19 | (* Specification tests render empty elements as XHTML. *) 20 | Cmarkit_html.xhtml_renderer ~safe:false () 21 | 22 | let test (t : Spec.test) = 23 | let doc = Cmarkit.Doc.of_string t.markdown in 24 | let html = Cmarkit_renderer.doc_to_string renderer doc in 25 | if String.equal html t.html then Ok ((* status ~pass:true t.example *)) else 26 | let diff = String.concat "\n" [t.markdown; Spec.diff ~spec:t.html html] in 27 | status ~pass:false t.example; 28 | Log.stdout (fun m -> m "%s" diff); 29 | Error () 30 | 31 | let run_tests test_file examples (* empty is all *) = 32 | let log_ok n = Log.stdout @@ fun m -> 33 | m "[ %a ] All %d tests succeeded." Spec.ok "OK" n 34 | in 35 | let log_fail n f = Log.stdout @@ fun m -> 36 | m "[%a] %d out of %d tests failed." Spec.fail "FAIL" f n 37 | in 38 | Log.if_error ~use:1 @@ 39 | let* tests = Spec.parse_tests test_file in 40 | let select (t : Spec.test) = examples = [] || List.mem t.example examples in 41 | let do_test (n, fail as acc) t = 42 | if not (select t) then acc else 43 | match test t with 44 | | Ok () -> (n + 1, fail) 45 | | Error () -> (n + 1, fail + 1) 46 | in 47 | let n, fail = List.fold_left do_test (0, 0) tests in 48 | if fail = 0 then (log_ok n; Ok 0) else (log_fail n fail; Ok 1) 49 | 50 | let main () = 51 | let _, file, examples = Spec.cli ~exe:"test_spec" () in 52 | run_tests file examples 53 | 54 | let () = if !Sys.interactive then () else exit (main ()) 55 | -------------------------------------------------------------------------------- /tool/cmd_commonmark.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Std 7 | open Result.Syntax 8 | 9 | let diff src render = 10 | let env = ["GIT_CONFIG_SYSTEM=/dev/null"; "GIT_CONFIG_GLOBAL=/dev/null"; ] in 11 | let set_env = match Sys.win32 with 12 | | true -> String.concat "" (List.map (fun e -> "set " ^ e ^ " && ") env) 13 | | false -> String.concat " " env 14 | in 15 | let diff = "git diff --ws-error-highlight=all --no-index --patience " in 16 | let src_file = "src" and render_file = "render" in 17 | let cmd = String.concat " " [set_env; diff; src_file; render_file] in 18 | Result.join @@ Result.join @@ Os.with_tmp_dir @@ fun dir -> 19 | Os.with_cwd dir @@ fun () -> 20 | let* () = Os.write_file src_file src in 21 | let* () = Os.write_file render_file render in 22 | Ok (Sys.command cmd) 23 | 24 | let commonmark files strict no_layout dodiff html_diff = 25 | let op = match html_diff, dodiff with 26 | | true, _ -> `Html_diff | false, true -> `Diff | false, false -> `Render 27 | in 28 | let layout = not no_layout in 29 | let commonmark ~file contents = 30 | let doc = Cmarkit.Doc.of_string ~file ~layout ~strict contents in 31 | Cmarkit_commonmark.of_doc doc 32 | in 33 | match op with 34 | | `Render -> 35 | let output_cmark ~file src = print_string (commonmark ~file src) in 36 | Std.process_files output_cmark files 37 | | `Diff -> 38 | let trips = ref [] in 39 | let add ~file src = trips := (src, commonmark ~file src) :: !trips in 40 | let c = Std.process_files add files in 41 | if c <> 0 then c else 42 | let src = String.concat "\n" (List.rev_map fst !trips) in 43 | let outs = String.concat "\n" (List.rev_map snd !trips) in 44 | (match diff src outs with 45 | | Ok exit -> if exit = 0 then 0 else Exit.err_diff 46 | | Error err -> Log.err "%s" err; Cmdliner.Cmd.Exit.some_error) 47 | | `Html_diff -> 48 | let htmls = ref [] in 49 | let add ~file src = 50 | let doc = Cmarkit.Doc.of_string ~file ~layout ~strict src in 51 | let doc_html = Cmarkit_html.of_doc ~safe:false doc in 52 | let md = Cmarkit_commonmark.of_doc doc in 53 | let doc' = Cmarkit.Doc.of_string ~layout ~strict md in 54 | let doc_html' = Cmarkit_html.of_doc ~safe:false doc' in 55 | htmls := (doc_html, doc_html') :: !htmls 56 | in 57 | let c = Std.process_files add files in 58 | if c <> 0 then c else 59 | let html = String.concat "\n" (List.rev_map fst !htmls) in 60 | let html' = String.concat "\n" (List.rev_map snd !htmls) in 61 | match diff html html' with 62 | | Ok exit -> if exit = 0 then 0 else Exit.err_diff 63 | | Error err -> Log.err "%s" err; Cmdliner.Cmd.Exit.some_error 64 | 65 | (* Command line interface *) 66 | 67 | open Cmdliner 68 | 69 | let diff = 70 | let doc = "Output difference between the source and its CommonMark \ 71 | rendering (needs $(b,git) in your $(b,PATH)). If there are \ 72 | differences check that the HTML renderings do not differ with \ 73 | option $(b,--html-diff)." 74 | in 75 | Arg.(value & flag & info ["diff"] ~doc) 76 | 77 | let html_diff = 78 | let doc = "Output difference between the source HTML rendering \ 79 | and the HTML rendering of its CommonMark rendering \ 80 | (needs $(b,git) in your $(b,PATH)). If there are no \ 81 | differences the CommonMark rendering is said to be correct." 82 | in 83 | Arg.(value & flag & info ["html-diff"] ~doc) 84 | 85 | let v = 86 | let doc = "Render CommonMark to CommonMark" in 87 | let exits = Exit.exits_with_err_diff in 88 | let man = [ 89 | `S Manpage.s_description; 90 | `P "$(tname) outputs a CommonMark document. Multiple input 91 | files are concatenated and separated by a newline."; 92 | `Pre "$(mname) $(tname) $(b,README.md > README-trip.md)"; `Noblank; 93 | `Pre "$(mname) $(tname) $(b,--diff README.md)"; `Noblank; 94 | `Pre "$(mname) $(tname) $(b,--html-diff README.md)"; 95 | `P "Layout is preserved on a best-effort basis. Some things are not \ 96 | attempted like preserving entities and character references, \ 97 | preserving the exact line by line indentation layout of container \ 98 | blocks, preserving lazy continuation lines, preserving the \ 99 | identation of blank lines, keeping track of used newlines \ 100 | except for the first one."; 101 | `P "Consult the documentation of the $(b,cmarkit) OCaml library for \ 102 | more details about the limitations."; 103 | `Blocks Cli.common_man; ] 104 | in 105 | Cmd.v (Cmd.info "commonmark" ~doc ~exits ~man) @@ 106 | Term.(const commonmark $ Cli.files $ Cli.strict $ Cli.no_layout $ 107 | diff $ html_diff) 108 | -------------------------------------------------------------------------------- /tool/cmd_commonmark.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val v : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t 7 | (** [v] is the command line for [commonmark]. *) 8 | -------------------------------------------------------------------------------- /tool/cmd_html.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val v : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t 7 | (** [v] is the command line for [html]. *) 8 | -------------------------------------------------------------------------------- /tool/cmd_latex.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Std 7 | open Cmarkit 8 | 9 | let built_in_preamble = ref "" (* See at the end of the module *) 10 | 11 | let buffer_add_inline_preamble b p = 12 | Buffer.add_char b '\n'; Buffer.add_string b p; Buffer.add_char b '\n' 13 | 14 | let buffer_add_inline_preamble_option b = function 15 | | None -> () | Some p -> buffer_add_inline_preamble b p 16 | 17 | let buffer_add_inline_preambles b files = 18 | let add_file b file = 19 | let preamble = Os.read_file file |> Result.to_failure in 20 | buffer_add_inline_preamble b (String.trim preamble) 21 | in 22 | List.iter (add_file b) files 23 | 24 | let text_inline t = Inline.Text (t, Meta.none) 25 | let untilted_inline = text_inline "Untilted" 26 | 27 | let lift_headings_map ~extract_title doc = 28 | let open Cmarkit in 29 | let title = ref None in 30 | let block m = function 31 | | Block.Heading (h, meta) as b -> 32 | let inline = Block.Heading.inline h in 33 | if extract_title && Option.is_none !title 34 | then (title := Some inline; Mapper.delete) else 35 | let level = Block.Heading.level h in 36 | if level = 1 then Mapper.ret b else 37 | let id = Block.Heading.id h in 38 | let level = level - 1 in 39 | let h = Block.Heading.make ?id ~level inline in 40 | Mapper.ret (Block.Heading (h, meta)) 41 | | _ -> Mapper.default 42 | in 43 | let doc = Mapper.map_doc (Mapper.make ~block ()) doc in 44 | let title = Option.value ~default:untilted_inline !title in 45 | title, doc 46 | 47 | let empty_defs = Cmarkit.Label.Map.empty 48 | let buffer_add_docs ?(defs = empty_defs) ~accumulate_defs parse r b files = 49 | let rec loop defs = function 50 | | [] -> () 51 | | file :: files -> 52 | let md = Os.read_file file |> Result.to_failure in 53 | let _, doc = parse ~extract_title:false ~file ~defs md in 54 | let defs = if accumulate_defs then Cmarkit.Doc.defs doc else empty_defs in 55 | Cmarkit_renderer.buffer_add_doc r b doc; 56 | if files <> [] then Buffer.add_char b '\n'; 57 | loop defs files 58 | in 59 | loop defs files 60 | 61 | let buffer_add_title r doc b title = 62 | let ctx = Cmarkit_renderer.Context.make r b in 63 | let () = Cmarkit_renderer.Context.init ctx doc in 64 | Cmarkit_renderer.Context.inline ctx title 65 | 66 | let buffer_add_author b = function 67 | | None -> () | Some a -> 68 | Buffer.add_string b "\n\\author{"; 69 | Buffer.add_string b a; Buffer.add_char b '}' 70 | 71 | let title_of_file f = 72 | if f = "-" then "Untitled" else 73 | String.capitalize_ascii (Filename.remove_extension (Filename.basename f)) 74 | 75 | let doc 76 | ~accumulate_defs ~extract_title parse r ~author ~title ~inline_preambles 77 | ~keep_built_in_preambles files 78 | = 79 | let built_in_preamble = 80 | if inline_preambles = [] || keep_built_in_preambles 81 | then Some (!built_in_preamble) else None 82 | in 83 | let file, files = List.hd files, List.tl files in 84 | let md = Os.read_file file |> Result.to_failure in 85 | let title, doc = 86 | let defs = empty_defs in 87 | match title with 88 | | Some t -> text_inline t, snd (parse ~extract_title:false ~file ~defs md) 89 | | None -> 90 | if extract_title then parse ~extract_title:true ~file ~defs md else 91 | let title = text_inline (title_of_file file) in 92 | (title, snd (parse ~extract_title:false ~file ~defs md)) 93 | in 94 | let defs = if accumulate_defs then Cmarkit.Doc.defs doc else empty_defs in 95 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 96 | {|\documentclass{article} 97 | %a%a 98 | %a\title{%a} 99 | \begin{document} 100 | \maketitle 101 | %a%a%a 102 | \end{document} 103 | |} 104 | buffer_add_inline_preamble_option built_in_preamble 105 | buffer_add_inline_preambles inline_preambles 106 | buffer_add_author author 107 | (buffer_add_title r doc) title 108 | (Cmarkit_renderer.buffer_add_doc r) doc 109 | Buffer.add_string (if files <> [] then "\n" else "") 110 | (buffer_add_docs ~defs ~accumulate_defs parse r) files 111 | 112 | let latex 113 | files quiet accumulate_defs strict heading_auto_ids backend_blocks 114 | lift_headings docu title author inline_preambles keep_built_in_preambles 115 | first_heading_level 116 | = 117 | let resolver = Label_resolver.v ~quiet in 118 | let r = Cmarkit_latex.renderer ~backend_blocks ~first_heading_level () in 119 | let parse ~extract_title ~file ~defs md = 120 | let doc = 121 | Cmarkit.Doc.of_string ~resolver ~defs ~heading_auto_ids ~file ~strict md 122 | in 123 | if lift_headings then lift_headings_map ~extract_title doc else 124 | untilted_inline, doc 125 | in 126 | try 127 | let s = match docu with 128 | | true -> 129 | doc ~accumulate_defs ~extract_title:lift_headings parse 130 | ~author ~title ~inline_preambles ~keep_built_in_preambles r files 131 | | false -> 132 | Printf.kbprintf Buffer.contents (Buffer.create 2048) "%a" 133 | (buffer_add_docs ~accumulate_defs parse r) files; 134 | in 135 | print_string s; 0 136 | with 137 | | Failure err -> Log.err "%s" err; Exit.err_file 138 | 139 | (* Command line interface *) 140 | 141 | open Cmdliner 142 | 143 | let author = 144 | let doc = "Document author when $(b,--doc) is used. $(docv) is interpreted \ 145 | as raw LaTeX." 146 | in 147 | Arg.(value & opt (some string) None & info ["a"; "author"] ~doc ~docv:"NAME") 148 | 149 | let backend_blocks = 150 | let doc = "Code blocks with language $(b,=latex) are included verbatim \ 151 | in the output. Other code blocks with language starting \ 152 | with $(b,=) are dropped. This does not activate math support, \ 153 | use $(b,--exts) for that." 154 | in 155 | Cli.backend_blocks ~doc 156 | 157 | let inline_preambles = 158 | let doc = "Add the content of LaTeX file $(docv) to the document preamble \ 159 | when $(b,--doc) is used. If unspecified a built-in preamble is \ 160 | written directly in the document (use $(b,-k) to keep it even \ 161 | when this option is specified). Repeatable." 162 | in 163 | Arg.(value & opt_all string [] & 164 | info ~doc ["inline-preamble"] ~docv:"FILE.latex") 165 | 166 | let keep_built_in_preamble = 167 | let doc = "Keep built-in preamble even if one is specified via \ 168 | $(b,--inline-preamble)." 169 | in 170 | Arg.(value & flag & info ["k"; "keep-built-in-preamble"] ~doc) 171 | 172 | let lift_headings = 173 | let doc = "Lift headings one level up and, when $(b,--doc) is used, \ 174 | extract the first heading (of any level) to take it as the \ 175 | title; unless a title is specified via the $(b,--title) option. \ 176 | This is useful for certain CommonMark documents like READMEs \ 177 | for which taking the headings literally results in unnatural \ 178 | sectioning." 179 | in 180 | Arg.(value & flag & info ["l"; "lift-headings"] ~doc) 181 | 182 | let first_level_heading = 183 | let level_enum = 184 | [ "part", Cmarkit_latex.Part; "chapter", Chapter; 185 | "section", Section; "subsection", Subsection ] 186 | in 187 | let doc = 188 | Printf.sprintf 189 | "Use LaTeX heading level $(docv) for the first CommonMark heading level. \ 190 | $(docv) must be %s." (Arg.doc_alts_enum level_enum) 191 | in 192 | Arg.(value & opt (Arg.enum level_enum) Cmarkit_latex.Section & 193 | Arg.info ["first-heading-level"] ~doc ~docv:"LEVEL") 194 | 195 | let v = 196 | let doc = "Render CommonMark to LaTeX" in 197 | let man = [ 198 | `S Manpage.s_description; 199 | `P "$(tname) outputs a LaTeX fragment or document on standard output."; 200 | `Pre "$(mname) $(tname) $(b,-e -c -l -h README.md > README.latex)";`Noblank; 201 | `Pre "$(b,tlmgr install enumitem listings hyperref ulem bera fontspec)"; 202 | `Noblank; 203 | `Pre "$(b,xelatex README.latex)"; 204 | `Blocks Cli.common_man ] 205 | in 206 | Cmd.v (Cmd.info "latex" ~doc ~man) @@ 207 | Term.(const latex $ Cli.files $ Cli.quiet $ Cli.accumulate_defs $ Cli.strict $ 208 | Cli.heading_auto_ids $ backend_blocks $ lift_headings $ 209 | Cli.docu $ Cli.title $ author $ inline_preambles $ 210 | keep_built_in_preamble $ first_level_heading) 211 | 212 | (* Built-in LaTeX preamable, defined that way to avoid source clutter *) 213 | 214 | let () = built_in_preamble := 215 | {|\usepackage{graphicx} 216 | \usepackage{enumitem} 217 | \usepackage{listings} 218 | \usepackage{hyperref} 219 | \usepackage[normalem]{ulem} 220 | \usepackage[scaled=0.8]{beramono} 221 | \usepackage{fontspec} 222 | 223 | \lstset{ 224 | columns=[c]fixed, 225 | basicstyle=\small\ttfamily, 226 | keywordstyle=\bfseries, 227 | upquote=true, 228 | commentstyle=\slshape, 229 | breaklines=true, 230 | showstringspaces=false} 231 | 232 | \lstdefinelanguage{ocaml}{language=[objective]caml, 233 | literate={'"'}{\textquotesingle "\textquotesingle}3 234 | {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, 235 | } 236 | 237 | \renewcommand{\arraystretch}{1.3} 238 | |} 239 | -------------------------------------------------------------------------------- /tool/cmd_latex.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val v : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t 7 | (** [v] is the command line for [latex]. *) 8 | -------------------------------------------------------------------------------- /tool/cmd_locs.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Std 7 | open Cmarkit 8 | 9 | let strf = Printf.sprintf 10 | let pf = Format.fprintf 11 | let cut = Format.pp_print_cut 12 | let indent ppf n = for i = 1 to n do Format.pp_print_space ppf () done 13 | let loc kind ~indent:n ppf m = 14 | pf ppf "@[@[%a%s:@]@,@[%a%a@]@]" 15 | indent n kind 16 | indent n Textloc.pp_ocaml (Meta.textloc m) 17 | 18 | let block_line kind ~indent ppf (_, m) = loc kind ~indent ppf m 19 | let tight_block_line kind ~indent ppf (_, (_, m)) = loc kind ~indent ppf m 20 | let tight_block_lines kind ~indent ppf ls = 21 | Format.pp_print_list (tight_block_line kind ~indent) ppf ls 22 | 23 | let label ~indent ppf l = 24 | tight_block_lines "Label" ~indent ppf (Label.text l) 25 | 26 | let defined_label ~indent ppf l = 27 | tight_block_lines "Defined label" ~indent ppf (Label.text l) 28 | 29 | let label_def ~indent ppf l = 30 | tight_block_lines "Label definition" ~indent ppf (Label.text l) 31 | 32 | let link_definition ~indent ppf ld = 33 | let label ppf = function 34 | | None -> () | Some l -> cut ppf (); label ~indent ppf l 35 | in 36 | let defined_label ppf = function 37 | | None -> () | Some l -> cut ppf (); defined_label ~indent ppf l 38 | in 39 | let dest ppf = function 40 | | None -> () | Some (_, m) -> cut ppf (); loc "Destination" ~indent ppf m 41 | in 42 | let title ppf = function 43 | | None -> () | Some ls -> cut ppf (); tight_block_lines "Title" ~indent ppf ls 44 | in 45 | pf ppf "%a%a%a%a" 46 | label (Link_definition.label ld) 47 | defined_label (Link_definition.defined_label ld) 48 | dest (Link_definition.dest ld) 49 | title (Link_definition.title ld) 50 | 51 | let link_reference ~indent:n ppf = function 52 | | `Ref (_, l, ref) -> 53 | label ~indent:n ppf l; cut ppf (); label_def ~indent:n ppf ref 54 | | `Inline (ld, m) -> 55 | pf ppf "%a%a" (loc "Inline" ~indent:n) m (link_definition ~indent:n) ld 56 | 57 | let rec inlines ~indent ppf = function 58 | | [] -> () | is -> cut ppf (); Format.pp_print_list (inline ~indent) ppf is 59 | 60 | and link kind ~indent:n ppf (l, m) = 61 | pf ppf "@[%a@,%a@,%a@]" 62 | (loc kind ~indent:n) m 63 | (inline ~indent:(n + 2)) (Inline.Link.text l) 64 | (link_reference ~indent:(n + 2)) (Inline.Link.reference l) 65 | 66 | and inline ~indent:n ppf = function 67 | | Inline.Autolink (a, m) -> 68 | let is_email = Inline.Autolink.is_email a in 69 | let link = Inline.Autolink.link a in 70 | let autolink = strf "Autolink (email:%b)" is_email in 71 | pf ppf "@[%a@,%a@]" 72 | (loc autolink ~indent:n) m (loc "Link" ~indent:(n + 2)) (snd link) 73 | | Inline.Break (b, m) -> 74 | let label = match Inline.Break.type' b with 75 | | `Hard -> "Hard break" | `Soft -> "Soft break" 76 | in 77 | let layout_before = Inline.Break.layout_before b in 78 | let layout_after = Inline.Break.layout_after b in 79 | pf ppf "@[%a@,%a@,%a@]" 80 | (loc label ~indent:n) m 81 | (loc "Layout before" ~indent:(n + 2)) (snd layout_before) 82 | (loc "Layout after" ~indent:(n + 2)) (snd layout_after) 83 | | Inline.Code_span (c, m) -> 84 | let line = tight_block_line "Code span line" ~indent:(n + 2) in 85 | pf ppf "@[%a@,%a@]" 86 | (loc "Code span" ~indent:n) m 87 | (Format.pp_print_list line) (Inline.Code_span.code_layout c) 88 | | Inline.Emphasis (e, m) -> 89 | let i = Inline.Emphasis.inline e in 90 | pf ppf "@[%a@,%a@]" 91 | (loc "Emphasis" ~indent:n) m (inline ~indent:(n + 2)) i 92 | | Inline.Image i -> 93 | link "Image" ~indent:n ppf i 94 | | Inline.Inlines (is, m) -> 95 | pf ppf "@[%a%a@]" 96 | (loc "Inlines" ~indent:n) m (inlines ~indent:(n + 2)) is 97 | | Inline.Link l -> 98 | link "Link" ~indent:n ppf l 99 | | Inline.Raw_html (r, m) -> 100 | let line = tight_block_line "Raw HTML line" ~indent:(n + 2) in 101 | pf ppf "@[%a@,%a@]" 102 | (loc "Raw HTML" ~indent:n) m (Format.pp_print_list line) r 103 | | Inline.Strong_emphasis (e, m) -> 104 | let i = Inline.Emphasis.inline e in 105 | pf ppf "@[%a@,%a@]" 106 | (loc "Strong emphasis" ~indent:n) m (inline ~indent:(n + 2)) i 107 | | Inline.Text (t, m) -> 108 | loc "Text" ~indent:n ppf m 109 | | Inline.Ext_strikethrough (s, m) -> 110 | let i = Inline.Strikethrough.inline s in 111 | pf ppf "@[%a@,%a@]" 112 | (loc "Strikethrough" ~indent:n) m (inline ~indent:(n + 2)) i 113 | | Inline.Ext_math_span (ms, m) -> 114 | let display = Inline.Math_span.display ms in 115 | let line = tight_block_line "Math span line" ~indent:(n + 2) in 116 | pf ppf "@[%a@,%a@]" 117 | (loc (if display then "Math display span" else "Math span") ~indent:n) m 118 | (Format.pp_print_list line) (Inline.Math_span.tex_layout ms) 119 | | _ -> 120 | indent ppf n; Format.pp_print_string ppf "Unknown Cmarkit inline" 121 | 122 | let code_block ~indent:n label cb m ppf = 123 | let line ppf (_, m) = loc "Code line" ~indent:(n + 2) ppf m in 124 | let lines ppf = function 125 | | [] -> () | ls -> cut ppf (); Format.pp_print_list line ppf ls 126 | in 127 | let info_string ppf = function 128 | | None -> () | Some (_, m) -> 129 | cut ppf (); loc "Info string" ~indent:(n + 2) ppf m 130 | in 131 | let opening_fence ppf cb = match Block.Code_block.layout cb with 132 | | `Indented -> () | `Fenced f -> 133 | cut ppf (); 134 | loc "Opening fence" ~indent:(n + 2) ppf (snd f.opening_fence) 135 | in 136 | let closing_fence ppf cb = match Block.Code_block.layout cb with 137 | | `Indented -> () | `Fenced f -> 138 | match f.closing_fence with 139 | | None -> () | Some (_, m) -> 140 | cut ppf (); loc "Closing fence" ~indent:(n + 2) ppf m 141 | in 142 | pf ppf "@[%a%a%a%a%a@]" 143 | (loc label ~indent:n) m 144 | opening_fence cb 145 | info_string (Block.Code_block.info_string cb) 146 | lines (Block.Code_block.code cb) 147 | closing_fence cb 148 | 149 | let rec blocks ~indent ppf = function 150 | | [] -> () | bs -> cut ppf (); Format.pp_print_list (block ~indent) ppf bs 151 | 152 | and block ~indent:n ppf = function 153 | | Block.Blank_line (_, m) -> 154 | loc "Blank line" ~indent:n ppf m 155 | | Block.Block_quote (bq, m) -> 156 | let b = Block.Block_quote.block bq in 157 | pf ppf "@[%a@,%a@]" 158 | (loc "Block quote" ~indent:n) m (block ~indent:(n + 2)) b 159 | | Block.Blocks (bs, m) -> 160 | pf ppf "@[%a%a@]" 161 | (loc "Blocks" ~indent:n) m (blocks ~indent:(n + 2)) bs 162 | | Block.Code_block (cb, m) -> 163 | code_block ~indent:n "Code block" cb m ppf 164 | | Block.Heading (h, m) -> 165 | let level = Block.Heading.level h in 166 | let heading = "Heading, level " ^ Int.to_string level in 167 | let setext_underline ppf h = match Block.Heading.layout h with 168 | | `Atx _ -> () | `Setext st -> 169 | cut ppf (); 170 | loc "Setext underline" ~indent:(n + 2) ppf (snd st.underline_count) 171 | in 172 | let i = Block.Heading.inline h in 173 | pf ppf "@[%a@,%a%a@]" 174 | (loc heading ~indent:n) m (inline ~indent:(n + 2)) i 175 | setext_underline h 176 | | Block.Html_block (lines, m) -> 177 | pf ppf "@[%a@,%a@]" 178 | (loc "HTML block" ~indent:n) m 179 | (Format.pp_print_list (block_line "HTML line" ~indent:(n + 2))) lines 180 | | Block.Link_reference_definition ((ld : Link_definition.t), m) -> 181 | pf ppf "@[%a%a@]" 182 | (loc "Link reference definition" ~indent:n) m 183 | (link_definition ~indent:(n + 2)) ld 184 | | Block.List (l, m) -> 185 | let task_marker ppf i = match Block.List_item.ext_task_marker i with 186 | | None -> () 187 | | Some (_, m) -> 188 | cut ppf (); (loc ~indent:(n + 4) "Task marker") ppf m 189 | in 190 | let list_item ppf (i, m) = 191 | pf ppf "@[%a@,%a%a@,%a@]" 192 | (loc ~indent:(n + 2) "List item") m 193 | (loc ~indent:(n + 4) "List marker") (snd (Block.List_item.marker i)) 194 | task_marker i 195 | (block ~indent:(n + 4)) (Block.List_item.block i) 196 | in 197 | let list = strf "List (tight:%b)" (Block.List'.tight l) in 198 | let items = Block.List'.items l in 199 | pf ppf "@[%a@,%a@]" 200 | (loc list ~indent:n) m (Format.pp_print_list list_item) items 201 | | Block.Paragraph (p, m) -> 202 | pf ppf "@[%a@,%a@]" 203 | (loc "Paragraph" ~indent:n) m 204 | (inline ~indent:(n + 2)) (Block.Paragraph.inline p) 205 | | Block.Thematic_break (_, m) -> 206 | loc "Thematic break" ~indent:n ppf m 207 | | Block.Ext_math_block (cb, m) -> 208 | code_block ~indent:n "Math block" cb m ppf 209 | | Block.Ext_table (t, m) -> 210 | let col ~indent:n ppf (i, _) = inline ~indent:n ppf i in 211 | let row ~indent:n ppf = function 212 | | (`Header is, m), _ -> 213 | pf ppf "@[%a@,%a@]" 214 | (loc "Header row" ~indent:n) m 215 | (Format.pp_print_list (col ~indent:(n + 2))) is 216 | | (`Data is, m), _ -> 217 | pf ppf "@[%a@,%a@]" 218 | (loc "Data row" ~indent:n) m 219 | (Format.pp_print_list (col ~indent:(n + 2))) is 220 | | (`Sep seps, m), _ -> 221 | pf ppf "@[%a@,%a@]" 222 | (loc "Separator line" ~indent:n) m 223 | (Format.pp_print_list (loc "Separator" ~indent:(n + 2))) 224 | (List.map snd seps) 225 | in 226 | pf ppf "@[%a@,%a@]" 227 | (loc ~indent:n "Table") m 228 | (Format.pp_print_list (row ~indent:(n + 2))) (Block.Table.rows t) 229 | | Block.Ext_footnote_definition (fn, m) -> 230 | let b = Block.Footnote.block fn in 231 | let l = Block.Footnote.label fn in 232 | pf ppf "@[%a@,%a@,%a@]" 233 | (loc "Footnote definition" ~indent:n) m 234 | (label ~indent:(n + 2)) l (block ~indent:(n + 2)) b 235 | | _ -> 236 | indent ppf n; Format.pp_print_string ppf "Unknown Cmarkit block" 237 | 238 | let doc_locs ppf doc = block ~indent:0 ppf (Doc.block doc) 239 | 240 | let locs files strict no_layout = 241 | let locs ~file contents = 242 | let locs = true and layout = not no_layout in 243 | let doc = Cmarkit.Doc.of_string ~file ~locs ~layout ~strict contents in 244 | doc_locs Format.std_formatter doc 245 | in 246 | Std.process_files locs files 247 | 248 | (* Command line interface *) 249 | 250 | open Cmdliner 251 | 252 | let v = 253 | let doc = "Show CommonMark parse locations" in 254 | let exits = Exit.exits in 255 | let man = [ 256 | `S Manpage.s_description; 257 | `P "$(tname) outputs CommonMark parse locations."; 258 | `Blocks Cli.common_man; ] 259 | in 260 | Cmd.v (Cmd.info "locs" ~doc ~exits ~man) @@ 261 | Term.(const locs $ Cli.files $ Cli.strict $ Cli.no_layout) 262 | -------------------------------------------------------------------------------- /tool/cmd_locs.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val v : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t 7 | (** [v] is the command line for [locs]. *) 8 | -------------------------------------------------------------------------------- /tool/cmd_main.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Std 7 | open Cmdliner 8 | 9 | let cmds = [ Cmd_commonmark.v; Cmd_html.v; Cmd_latex.v; Cmd_locs.v; ] 10 | 11 | let cmarkit = 12 | let doc = "Process CommonMark files" in 13 | let exits = Exit.exits_with_err_diff in 14 | let man = [ 15 | `S Manpage.s_description; 16 | `P "$(mname) processes CommonMark files"; 17 | `Blocks Cli.common_man; ] 18 | in 19 | Cmd.group (Cmd.info "cmarkit" ~version:"%%VERSION%%" ~doc ~exits ~man) @@ 20 | cmds 21 | 22 | let main () = exit (Cmd.eval' cmarkit) 23 | let () = if !Sys.interactive then () else main () 24 | -------------------------------------------------------------------------------- /tool/cmd_main.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | -------------------------------------------------------------------------------- /tool/std.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | type fpath = string 7 | 8 | module Result = struct 9 | include Result 10 | let to_failure = function Ok v -> v | Error err -> failwith err 11 | module Syntax = struct 12 | let ( let* ) = Result.bind 13 | end 14 | end 15 | 16 | module Log = struct 17 | let exec = Filename.basename Sys.executable_name 18 | 19 | let err fmt = 20 | Format.fprintf Format.err_formatter ("%s: @[" ^^ fmt ^^ "@]@.") exec 21 | 22 | let warn fmt = 23 | Format.fprintf Format.err_formatter ("@[" ^^ fmt ^^ "@]@.") 24 | 25 | let on_error ~use r f = match r with 26 | | Ok v -> f v | Error e -> err "%s" e; use 27 | end 28 | 29 | module Label_resolver = struct 30 | (* A label resolver that warns on redefinitions *) 31 | 32 | let warn_label_redefinition ~current ~prev = 33 | let open Cmarkit in 34 | let pp_loc = Textloc.pp_ocaml in 35 | let current_text = Label.text_to_string current in 36 | let current = Meta.textloc (Label.meta current) in 37 | let prev = Meta.textloc (Label.meta prev) in 38 | if Textloc.is_none current then 39 | Log.warn "Warning: @[Ignoring redefinition of label %S.@,\ 40 | Invoke with option --locs to get file locations.@,@]" 41 | current_text 42 | else 43 | Log.warn "@[%a:@,Warning: \ 44 | @[Ignoring redefinition of label %S. \ 45 | Previous definition:@,%a@]@,@]" 46 | pp_loc current current_text pp_loc prev 47 | 48 | let v ~quiet = function 49 | | `Ref (_, _, ref) -> ref 50 | | `Def (None, current) -> Some current 51 | | `Def (Some prev, current) -> 52 | if not quiet then warn_label_redefinition ~current ~prev; None 53 | end 54 | 55 | module Os = struct 56 | 57 | (* Emulate B0_std.Os functionality to eschew the dep *) 58 | 59 | let read_file file = 60 | try 61 | let ic = if file = "-" then stdin else open_in_bin file in 62 | let finally () = if file = "-" then () else close_in_noerr ic in 63 | Fun.protect ~finally @@ fun () -> Ok (In_channel.input_all ic) 64 | with 65 | | Sys_error err -> Error err 66 | 67 | let write_file file s = 68 | try 69 | let oc = if file = "-" then stdout else open_out_bin file in 70 | let finally () = if file = "-" then () else close_out_noerr oc in 71 | Fun.protect ~finally @@ fun () -> Ok (Out_channel.output_string oc s) 72 | with 73 | | Sys_error err -> Error err 74 | 75 | let with_tmp_dir f = 76 | try 77 | let tmpdir = 78 | let file = Filename.temp_file "cmarkit" "dir" in 79 | (Sys.remove file; Sys.mkdir file 0o700; file) 80 | in 81 | let finally () = try Sys.rmdir tmpdir with Sys_error _ -> () in 82 | Fun.protect ~finally @@ fun () -> Ok (f tmpdir) 83 | with 84 | | Sys_error err -> Error ("Making temporary dir: " ^ err) 85 | 86 | let with_cwd cwd f = 87 | try 88 | let curr = Sys.getcwd () in 89 | let () = Sys.chdir cwd in 90 | let finally () = try Sys.chdir curr with Sys_error _ -> () in 91 | Fun.protect ~finally @@ fun () -> Ok (f ()) 92 | with 93 | | Sys_error err -> Error ("With cwd: " ^ err) 94 | end 95 | 96 | module Exit = struct 97 | open Cmdliner 98 | 99 | type code = Cmdliner.Cmd.Exit.code 100 | let err_file = 1 101 | let err_diff = 2 102 | 103 | let exits = 104 | Cmd.Exit.info err_file ~doc:"on file read errors." :: 105 | Cmd.Exit.defaults 106 | 107 | let exits_with_err_diff = 108 | Cmd.Exit.info err_diff ~doc:"on render differences." :: exits 109 | end 110 | 111 | let process_files f files = 112 | let rec loop = function 113 | | [] -> 0 114 | | file :: files -> 115 | Log.on_error ~use:Exit.err_file (Os.read_file file) @@ fun content -> 116 | f ~file content; loop files 117 | in 118 | loop files 119 | 120 | module Cli = struct 121 | open Cmdliner 122 | 123 | let accumulate_defs = 124 | let doc = 125 | "Accumulate label definitions from one input file to the other \ 126 | (in left to right command line order). Link reference definitions and \ 127 | footnote definitions of previous files can be used and override \ 128 | those made in subsequent ones." 129 | in 130 | Arg.(value & flag & info ["D"; "accumulate-defs"] ~doc) 131 | 132 | let backend_blocks ~doc = 133 | Arg.(value & flag & info ["b"; "backend-blocks"] ~doc) 134 | 135 | let docu = 136 | let doc = "Output a complete document rather than a fragment." in 137 | Arg.(value & flag & info ["c"; "doc"] ~doc) 138 | 139 | let files = 140 | let doc = "$(docv) is the CommonMark file to process (repeatable). Reads \ 141 | from $(b,stdin) if none or $(b,-) is specified." in 142 | Arg.(value & pos_all string ["-"] & info [] ~doc ~docv:"FILE.md") 143 | 144 | let heading_auto_ids = 145 | let doc = "Automatically generate heading identifiers." in 146 | Arg.(value & flag & info ["h"; "heading-auto-ids"] ~doc) 147 | 148 | let lang = 149 | let doc = "Language (BCP47) of the document when $(b,--doc) is used." in 150 | let docv = "LANG" in 151 | Arg.(value & opt string "en" & info ["l"; "lang"] ~doc ~docv) 152 | 153 | let no_layout = 154 | let doc = "Drop layout information during parsing." in 155 | Arg.(value & flag & info ["no-layout"] ~doc) 156 | 157 | let quiet = 158 | let doc = "Be quiet. Do not report label redefinition warnings." in 159 | Arg.(value & flag & info ["q"; "quiet"] ~doc) 160 | 161 | let safe = 162 | let safe = 163 | let doc = "Drop raw HTML and dangerous URLs (default). If \ 164 | you are serious about XSS prevention, better pipe \ 165 | the output to a dedicated HTML sanitizer." 166 | in 167 | Arg.info ["safe"] ~doc 168 | in 169 | let unsafe = 170 | let doc = "Keep raw HTML and dangerous URLs. See option $(b,--safe)." in 171 | Arg.info ["u"; "unsafe"] ~doc 172 | in 173 | Arg.(value & vflag true [true, safe; false, unsafe]) 174 | 175 | let strict = 176 | let extended = 177 | let doc = "Activate supported extensions: strikethrough ($(b,~~)), \ 178 | LaTeX math ($(b,\\$), $(b,\\$\\$) and $(b,math) code blocks), \ 179 | footnotes ($(b,[^id])), task items \ 180 | ($(b,[ ]), $(b,[x]), $(b,[~])) and pipe tables. \ 181 | See the library documentation for more information." 182 | in 183 | Arg.(value & flag & info ["e"; "exts"] ~doc) 184 | in 185 | Term.app (Term.const Bool.not) extended 186 | 187 | let title = 188 | let doc = "Title of the document when $(b,--doc) is used. Derived from \ 189 | the filename of the first input file if unspecified." 190 | in 191 | let docv = "TITLE" in 192 | Arg.(value & opt (some string) None & info ["t"; "title"] ~doc ~docv) 193 | 194 | let common_man = 195 | [ `S Manpage.s_bugs; 196 | `P "This program is distributed with the $(b,cmarkit) OCaml library. \ 197 | See $(i,https://erratique.ch/software/cmarkit) for contact \ 198 | information."; 199 | `S Manpage.s_see_also; 200 | `P "More information about the renderers can be found in the \ 201 | documentation of the $(b,cmarkit) OCaml library. Consult \ 202 | $(b,odig doc cmarkit) or the online documentation." ] 203 | end 204 | -------------------------------------------------------------------------------- /tool/std.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | type fpath = string 7 | 8 | module Result : sig 9 | include module type of Result 10 | val to_failure : ('a, string) result -> 'a 11 | 12 | module Syntax : sig 13 | val (let*) : ('a, 'e) result -> ('a -> ('b, 'e) result) -> ('b, 'e) result 14 | end 15 | end 16 | 17 | module Log : sig 18 | val err : ('a, Format.formatter, unit, unit) format4 -> 'a 19 | val warn : ('a, Format.formatter, unit, unit) format4 -> 'a 20 | val on_error : use:'a -> ('b, string) result -> ('b -> 'a) -> 'a 21 | end 22 | 23 | module Label_resolver : sig 24 | val v : quiet:bool -> Cmarkit.Label.resolver 25 | end 26 | 27 | module Os : sig 28 | val read_file : fpath -> (string, string) result 29 | val write_file : fpath -> string -> (unit, string) result 30 | val with_tmp_dir : (fpath -> 'a) -> ('a, string) result 31 | val with_cwd : fpath -> (unit -> 'a) -> ('a, string) result 32 | end 33 | 34 | module Exit : sig 35 | type code = Cmdliner.Cmd.Exit.code 36 | val err_file : code 37 | val err_diff : code 38 | val exits : Cmdliner.Cmd.Exit.info list 39 | val exits_with_err_diff : Cmdliner.Cmd.Exit.info list 40 | end 41 | 42 | val process_files : (file:fpath -> string -> 'a) -> string list -> Exit.code 43 | 44 | module Cli : sig 45 | open Cmdliner 46 | 47 | val accumulate_defs : bool Term.t 48 | val backend_blocks : doc:string -> bool Term.t 49 | val docu : bool Term.t 50 | val files : string list Term.t 51 | val heading_auto_ids : bool Term.t 52 | val lang : string Term.t 53 | val no_layout : bool Term.t 54 | val quiet : bool Term.t 55 | val safe : bool Term.t 56 | val strict : bool Term.t 57 | val title : string option Term.t 58 | 59 | val common_man : Manpage.block list 60 | end 61 | --------------------------------------------------------------------------------