├── BRZO ├── .gitignore ├── .ocp-indent ├── .merlin ├── _tags ├── test ├── expect │ ├── bug-18.md │ ├── bug-18.trip.md │ ├── test.expect │ ├── bug-18.latex │ ├── bugs.latex │ ├── bugs.md │ ├── bugs.trip.md │ ├── bugs.exts.md │ ├── bugs.exts.trip.md │ ├── bugs.exts.latex │ ├── bug-18.locs │ ├── bug-18.nolayout.locs │ ├── bugs.nolayout.locs │ ├── bugs.locs │ ├── basic.md │ ├── basic.trip.md │ ├── basic.exts.md │ ├── basic.exts.trip.md │ ├── basic.latex │ ├── bugs.exts.nolayout.locs │ ├── bugs.exts.locs │ ├── bug-18.html │ ├── basic.exts.latex │ ├── bugs.html │ └── bugs.exts.html ├── test_spec.ml ├── spec.mli ├── bench.ml ├── spec.ml ├── test_issues.ml ├── examples.ml └── test_pathological.ml ├── src ├── cmarkit.mllib ├── tool │ ├── cmarkit_main.mli │ ├── cmd_html.mli │ ├── cmd_latex.mli │ ├── cmd_locs.mli │ ├── cmd_commonmark.mli │ ├── cmarkit_main.ml │ ├── cmarkit_cli.mli │ ├── cmarkit_std.mli │ ├── cmarkit_std.ml │ ├── cmarkit_cli.ml │ ├── cmd_commonmark.ml │ ├── cmd_latex.ml │ └── cmd_locs.ml ├── cmarkit_data.mli ├── cmarkit_data.ml ├── cmarkit_renderer.ml ├── cmarkit_html.mli ├── cmarkit_latex.mli ├── cmarkit_renderer.mli └── cmarkit_commonmark.mli ├── pkg ├── META └── pkg.ml ├── LICENSE.md ├── opam ├── README.md ├── doc └── index.mld ├── DEVEL.md ├── CHANGES.md ├── support └── generate_data.ml └── B0.ml /BRZO: -------------------------------------------------------------------------------- 1 | (srcs-x pkg tmp) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _b0 2 | _build 3 | tmp 4 | *.install -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | strict_with=always,match_clause=4,strict_else=never -------------------------------------------------------------------------------- /.merlin: -------------------------------------------------------------------------------- 1 | PKG b0.kit uucp cmdliner 2 | S src 3 | S test 4 | B _b0/** -------------------------------------------------------------------------------- /_tags: -------------------------------------------------------------------------------- 1 | true : bin_annot, safe_string 2 | <_b0> : -traverse 3 | : include 4 | : package(cmdliner) 5 | -------------------------------------------------------------------------------- /test/expect/bug-18.md: -------------------------------------------------------------------------------- 1 | # Issue #18 2 | 3 | When a list marker is followed by end of file, we crash. 4 | 5 | - Item 1 6 | - Item 2 7 | - -------------------------------------------------------------------------------- /test/expect/bug-18.trip.md: -------------------------------------------------------------------------------- 1 | # Issue #18 2 | 3 | When a list marker is followed by end of file, we crash. 4 | 5 | - Item 1 6 | - Item 2 7 | - -------------------------------------------------------------------------------- /test/expect/test.expect: -------------------------------------------------------------------------------- 1 | Expectation for mapper table bug #14: 2 | 3 | | a | b | c | 4 | |---|---|---| 5 | | a | b | c | 6 | | | b | c | 7 | | | | c | 8 | 9 | -------------------------------------------------------------------------------- /src/cmarkit.mllib: -------------------------------------------------------------------------------- 1 | Cmarkit_data 2 | Cmarkit_data_uchar 3 | Cmarkit_data_html 4 | Cmarkit_base 5 | Cmarkit 6 | Cmarkit_renderer 7 | Cmarkit_commonmark 8 | Cmarkit_html 9 | Cmarkit_latex -------------------------------------------------------------------------------- /test/expect/bug-18.latex: -------------------------------------------------------------------------------- 1 | \section{Issue \#18} 2 | 3 | When a list marker is followed by end of file, we crash. 4 | 5 | \begin{itemize} 6 | \item{} 7 | Item 1 8 | \item{} 9 | Item 2 10 | \item{}\end{itemize} 11 | -------------------------------------------------------------------------------- /pkg/META: -------------------------------------------------------------------------------- 1 | description = "CommonMark parser and renderer for OCaml" 2 | version = "%%VERSION_NUM%%" 3 | requires = "" 4 | archive(byte) = "cmarkit.cma" 5 | archive(native) = "cmarkit.cmxa" 6 | plugin(byte) = "cmarkit.cma" 7 | plugin(native) = "cmarkit.cmxs" 8 | exists_if = "cmarkit.cma cmarkit.cmxa" 9 | -------------------------------------------------------------------------------- /src/tool/cmarkit_main.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | -------------------------------------------------------------------------------- /src/tool/cmd_html.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val cmd : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t 7 | (** [cmd] is the command line for [html]. *) 8 | -------------------------------------------------------------------------------- /src/tool/cmd_latex.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val cmd : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t 7 | (** [cmd] is the command line for [latex]. *) 8 | -------------------------------------------------------------------------------- /src/tool/cmd_locs.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val cmd : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t 7 | (** [cmd] is the command line for [locs]. *) 8 | -------------------------------------------------------------------------------- /src/tool/cmd_commonmark.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | val cmd : Cmdliner.Cmd.Exit.code Cmdliner.Cmd.t 7 | (** [cmd] is the command line for [commonmark]. *) 8 | -------------------------------------------------------------------------------- /test/expect/bugs.latex: -------------------------------------------------------------------------------- 1 | \section{Bugs} 2 | 3 | Add a section for the bug and the CommonMark that triggers it as 4 | follows: 5 | 6 | \begin{verbatim} 7 | # Issue #NUM 8 | 9 | The triggering CommonMark 10 | \end{verbatim} 11 | 12 | \section{Issue \#11} 13 | 14 | Escape ordered item markers at the beginning of paragraphs correctly. 15 | These should be paragraphs when rendered to markdown not list items. 16 | 17 | 1. 18 | 19 | 2. 20 | 21 | 23. 22 | 23 | 24) 24 | 25 | 1234567890. This is not a list marker no need to escape it. 26 | -------------------------------------------------------------------------------- /test/expect/bugs.md: -------------------------------------------------------------------------------- 1 | Bugs 2 | ==== 3 | 4 | Add a section for the bug and the CommonMark that triggers it as 5 | follows: 6 | 7 | ``` 8 | # Issue #NUM 9 | 10 | The triggering CommonMark 11 | ``` 12 | 13 | # Issue #11 14 | 15 | Escape ordered item markers at the beginning of paragraphs correctly. 16 | These should be paragraphs when rendered to markdown not list items. 17 | 18 | 1\. 19 | 20 | 2\. 21 | 22 | 23\. 23 | 24 | 25 | 24\) 26 | 27 | 1234567890. This is not a list marker no need to escape it. 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /test/expect/bugs.trip.md: -------------------------------------------------------------------------------- 1 | Bugs 2 | ==== 3 | 4 | Add a section for the bug and the CommonMark that triggers it as 5 | follows: 6 | 7 | ``` 8 | # Issue #NUM 9 | 10 | The triggering CommonMark 11 | ``` 12 | 13 | # Issue #11 14 | 15 | Escape ordered item markers at the beginning of paragraphs correctly. 16 | These should be paragraphs when rendered to markdown not list items. 17 | 18 | 1\. 19 | 20 | 2\. 21 | 22 | 23\. 23 | 24 | 25 | 24\) 26 | 27 | 1234567890. This is not a list marker no need to escape it. 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /pkg/pkg.ml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ocaml 2 | #use "topfind" 3 | #require "topkg" 4 | open Topkg 5 | 6 | let cmdliner = Conf.with_pkg "cmdliner" 7 | 8 | let () = 9 | Pkg.describe "cmarkit" @@ fun c -> 10 | let cmdliner = Conf.value c cmdliner in 11 | let api = ["Cmarkit"; "Cmarkit_renderer"; 12 | "Cmarkit_commonmark"; "Cmarkit_html"; "Cmarkit_latex"] 13 | in 14 | Ok [ Pkg.mllib ~api "src/cmarkit.mllib"; 15 | Pkg.doc "doc/index.mld" ~dst:"odoc-pages/index.mld"; 16 | Pkg.bin ~cond:cmdliner "src/tool/cmarkit_main" ~dst:"cmarkit" ] 17 | -------------------------------------------------------------------------------- /test/expect/bugs.exts.md: -------------------------------------------------------------------------------- 1 | Bugs 2 | ==== 3 | 4 | Add a section for the bug and the CommonMark that triggers it as 5 | follows: 6 | 7 | ``` 8 | # Bug #NUM 9 | 10 | The triggering CommonMark 11 | ``` 12 | 13 | # Bug #10 14 | 15 | In cells toplevel text nodes not at the beginning or end of the cell 16 | get dropped. 17 | 18 | | Foo | 19 | |-------------------------| 20 | | `a` or `b` | 21 | | before `a` or `b` after | 22 | | before `a` or `b`after | 23 | | before`a`or`b`after | 24 | | *a*`a` | 25 | |

foo

| 26 | 27 | # Bug #15 28 | 29 | Invalid markup generated for cancelled task. 30 | 31 | * [~] This has been cancelled 32 | -------------------------------------------------------------------------------- /test/expect/bugs.exts.trip.md: -------------------------------------------------------------------------------- 1 | Bugs 2 | ==== 3 | 4 | Add a section for the bug and the CommonMark that triggers it as 5 | follows: 6 | 7 | ``` 8 | # Bug #NUM 9 | 10 | The triggering CommonMark 11 | ``` 12 | 13 | # Bug #10 14 | 15 | In cells toplevel text nodes not at the beginning or end of the cell 16 | get dropped. 17 | 18 | | Foo | 19 | |-------------------------| 20 | | `a` or `b` | 21 | | before `a` or `b` after | 22 | | before `a` or `b`after | 23 | | before`a`or`b`after | 24 | | *a*`a` | 25 | |

foo

| 26 | 27 | # Bug #15 28 | 29 | Invalid markup generated for cancelled task. 30 | 31 | * [~] This has been cancelled 32 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 The cmarkit programmers 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /src/tool/cmarkit_main.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Cmdliner 7 | 8 | let cmd = 9 | let doc = "Process CommonMark files" in 10 | let exits = Cmarkit_cli.Exit.exits_with_err_diff in 11 | let man = [ 12 | `S Manpage.s_description; 13 | `P "$(cmd) processes CommonMark files"; 14 | `Blocks Cmarkit_cli.common_man; ] 15 | in 16 | Cmd.group (Cmd.info "cmarkit" ~version:"%%VERSION%%" ~doc ~exits ~man) @@ 17 | [ Cmd_commonmark.cmd; Cmd_html.cmd; Cmd_latex.cmd; Cmd_locs.cmd ] 18 | 19 | let main () = exit (Cmd.eval' cmd) 20 | let () = if !Sys.interactive then () else main () 21 | -------------------------------------------------------------------------------- /test/expect/bugs.exts.latex: -------------------------------------------------------------------------------- 1 | \section{Bugs} 2 | 3 | Add a section for the bug and the CommonMark that triggers it as 4 | follows: 5 | 6 | \begin{verbatim} 7 | # Bug #NUM 8 | 9 | The triggering CommonMark 10 | \end{verbatim} 11 | 12 | \section{Bug \#10} 13 | 14 | In cells toplevel text nodes not at the beginning or end of the cell 15 | get dropped. 16 | 17 | \bigskip 18 | \begin{tabular}{l} 19 | {\bfseries{}Foo} 20 | \\ 21 | \hline 22 | {\texttt{a} or \texttt{b}} 23 | \\ 24 | {before \texttt{a} or \texttt{b} after} 25 | \\ 26 | {before \texttt{a} or \texttt{b}after} 27 | \\ 28 | {before\texttt{a}or\texttt{b}after} 29 | \\ 30 | {\emph{a}\texttt{a}} 31 | \\ 32 | {% Raw CommonMark HTML omitted 33 | foo% Raw CommonMark HTML omitted 34 | } 35 | \\ 36 | \hline 37 | \end{tabular} 38 | \bigskip 39 | 40 | \section{Bug \#15} 41 | 42 | Invalid markup generated for cancelled task. 43 | 44 | \begin{itemize} 45 | \item{} \lbrack ~\rbrack \enspace 46 | This has been cancelled 47 | \end{itemize} 48 | -------------------------------------------------------------------------------- /src/tool/cmarkit_cli.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2025 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Cmarkit_std 7 | open Cmdliner 8 | 9 | module Exit : sig 10 | type code = Cmdliner.Cmd.Exit.code 11 | val err_file : code 12 | val err_diff : code 13 | val exits : Cmdliner.Cmd.Exit.info list 14 | val exits_with_err_diff : Cmdliner.Cmd.Exit.info list 15 | end 16 | 17 | val process_files : (file:fpath -> string -> 'a) -> string list -> Exit.code 18 | 19 | val accumulate_defs : bool Term.t 20 | val backend_blocks : doc:string -> bool Term.t 21 | val docu : bool Term.t 22 | val files : string list Term.t 23 | val heading_auto_ids : bool Term.t 24 | val lang : string Term.t 25 | val no_layout : bool Term.t 26 | val quiet : bool Term.t 27 | val safe : bool Term.t 28 | val strict : bool Term.t 29 | val title : string option Term.t 30 | 31 | val common_man : Manpage.block list 32 | -------------------------------------------------------------------------------- /src/tool/cmarkit_std.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | type fpath = string 7 | 8 | module Result : sig 9 | include module type of Result 10 | val to_failure : ('a, string) result -> 'a 11 | 12 | module Syntax : sig 13 | val (let*) : ('a, 'e) result -> ('a -> ('b, 'e) result) -> ('b, 'e) result 14 | end 15 | end 16 | 17 | module Log : sig 18 | val err : ('a, Format.formatter, unit, unit) format4 -> 'a 19 | val warn : ('a, Format.formatter, unit, unit) format4 -> 'a 20 | val on_error : use:'a -> ('b, string) result -> ('b -> 'a) -> 'a 21 | end 22 | 23 | module Label_resolver : sig 24 | val v : quiet:bool -> Cmarkit.Label.resolver 25 | end 26 | 27 | module Os : sig 28 | val read_file : fpath -> (string, string) result 29 | val write_file : fpath -> string -> (unit, string) result 30 | val with_tmp_dir : (fpath -> 'a) -> ('a, string) result 31 | val with_cwd : fpath -> (unit -> 'a) -> ('a, string) result 32 | end 33 | -------------------------------------------------------------------------------- /test/expect/bug-18.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bug-18.md", lines 1-7, characters 0-2 3 | Heading, level 1: 4 | File "bug-18.md", line 1, characters 0-11 5 | Text: 6 | File "bug-18.md", line 1, characters 2-11 7 | Blank line: 8 | File "bug-18.md", line 2 9 | Paragraph: 10 | File "bug-18.md", line 3, characters 0-56 11 | Text: 12 | File "bug-18.md", line 3, characters 0-56 13 | Blank line: 14 | File "bug-18.md", line 4 15 | List (tight:true): 16 | File "bug-18.md", lines 5-7, characters 1-2 17 | List item: 18 | File "bug-18.md", line 5, characters 1-9 19 | List marker: 20 | File "bug-18.md", line 5, characters 1-2 21 | Paragraph: 22 | File "bug-18.md", line 5, characters 3-9 23 | Text: 24 | File "bug-18.md", line 5, characters 3-9 25 | List item: 26 | File "bug-18.md", line 6, characters 1-9 27 | List marker: 28 | File "bug-18.md", line 6, characters 1-2 29 | Paragraph: 30 | File "bug-18.md", line 6, characters 3-9 31 | Text: 32 | File "bug-18.md", line 6, characters 3-9 33 | List item: 34 | File "bug-18.md", line 7, characters 1-2 35 | List marker: 36 | File "bug-18.md", line 7, characters 1-2 37 | Blank line: 38 | File "bug-18.md", line 7, characters 2-2 -------------------------------------------------------------------------------- /test/expect/bug-18.nolayout.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bug-18.md", lines 1-7, characters 0-2 3 | Heading, level 1: 4 | File "bug-18.md", line 1, characters 0-11 5 | Text: 6 | File "bug-18.md", line 1, characters 2-11 7 | Blank line: 8 | File "bug-18.md", line 2 9 | Paragraph: 10 | File "bug-18.md", line 3, characters 0-56 11 | Text: 12 | File "bug-18.md", line 3, characters 0-56 13 | Blank line: 14 | File "bug-18.md", line 4 15 | List (tight:true): 16 | File "bug-18.md", lines 5-7, characters 1-2 17 | List item: 18 | File "bug-18.md", line 5, characters 1-9 19 | List marker: 20 | File "bug-18.md", line 5, characters 1-2 21 | Paragraph: 22 | File "bug-18.md", line 5, characters 3-9 23 | Text: 24 | File "bug-18.md", line 5, characters 3-9 25 | List item: 26 | File "bug-18.md", line 6, characters 1-9 27 | List marker: 28 | File "bug-18.md", line 6, characters 1-2 29 | Paragraph: 30 | File "bug-18.md", line 6, characters 3-9 31 | Text: 32 | File "bug-18.md", line 6, characters 3-9 33 | List item: 34 | File "bug-18.md", line 7, characters 1-2 35 | List marker: 36 | File "bug-18.md", line 7, characters 1-2 37 | Blank line: 38 | File "bug-18.md", line 7, characters 2-2 -------------------------------------------------------------------------------- /src/cmarkit_data.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Data needed for CommonMark parsing. *) 7 | 8 | (** {1:unicode Unicode data} *) 9 | 10 | val unicode_version : string 11 | (** [unicode_version] is the supported Unicode version. *) 12 | 13 | val is_unicode_whitespace : Uchar.t -> bool 14 | (** [is_unicode_whitespace u] is [true] iff 15 | [u] is a CommonMark 16 | {{:https://spec.commonmark.org/current/#unicode-whitespace-character} 17 | Unicode whitespace character}. *) 18 | 19 | val is_unicode_punctuation : Uchar.t -> bool 20 | (** [is_unicode_punctuation u] is [true] iff 21 | [u] is a CommonMark 22 | {{:https://spec.commonmark.org/current/#unicode-punctuation-character} 23 | Unicode punctuation character}. *) 24 | 25 | val unicode_case_fold : Uchar.t -> string option 26 | (** [unicode_case_fold u] is the UTF-8 encoding of [u]'s Unicode 27 | {{:http://www.unicode.org/reports/tr44/#Case_Folding}case fold} or 28 | [None] if [u] case folds to itself. *) 29 | 30 | (** {1:html HTML data} *) 31 | 32 | val html_entity : string -> string option 33 | (** [html_entity e] is the UTF-8 data for of the HTML entity {e name} 34 | (without [&] and [;]) [e]. *) 35 | -------------------------------------------------------------------------------- /test/test_spec.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_std 7 | open B0_testing 8 | 9 | let renderer = (* Specification tests render empty elements as XHTML. *) 10 | Cmarkit_html.xhtml_renderer ~safe:false () 11 | 12 | let test_spec_args = Test.Arg.make () 13 | let test_spec = 14 | Test.test' test_spec_args "specification examples" @@ fun (tests, label) -> 15 | Spec.test_examples ~label tests @@ fun t -> 16 | let doc = Cmarkit.Doc.of_string t.Spec.markdown in 17 | let html = Cmarkit_renderer.doc_to_string renderer doc in 18 | if String.equal html t.html then Test.pass () else 19 | begin 20 | Test.fail "%a" Spec.pp_test_url t; 21 | Test.log_raw "@[Source:@,%aRender:@,%a@]@?" 22 | Fmt.lines t.Spec.markdown 23 | (Test.Diff.pp Test.T.lines ~fnd:html ~exp:t.html) () 24 | end 25 | 26 | let main () = 27 | Test.main' @@ 28 | let open Cmdliner.Term.Syntax in 29 | let+ file = Spec.file and+ ids = Spec.ids in 30 | fun () -> 31 | let tests = Spec.parse_tests file |> Test.error_to_failstop in 32 | let select = Spec.select tests ids in 33 | Test.autorun ~args:Test.Arg.[value test_spec_args select] () 34 | 35 | let () = if !Sys.interactive then () else exit (main ()) 36 | -------------------------------------------------------------------------------- /test/spec.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Specification test parser and runner *) 7 | 8 | open B0_std 9 | 10 | val version : string 11 | (** The specification version. *) 12 | 13 | type id = int 14 | (** The type for example identifiers. *) 15 | 16 | type test = 17 | { markdown : string; 18 | html : string; 19 | id : id; 20 | start_line : int; 21 | end_line : int; 22 | section : string } 23 | (** The type for tests. *) 24 | 25 | val pp_test_url : test Fmt.t 26 | (** [pp_test_url] formats an URL that points to the test. *) 27 | 28 | val parse_tests : Fpath.t -> (test list, string) result 29 | (** [parse_tests f] parses the specification JSON test file. *) 30 | 31 | val select : test list -> id list -> test list * string 32 | (** [select tests ids] selects the tests with given [ids] (empty 33 | is all) and returns a label to print *) 34 | 35 | val test_examples : label:string -> test list -> (test -> unit) -> unit 36 | (** [tests_examples ts f] tests all [tests] with [f] as in a 37 | [B0_testing] block that reports assertion and failure counts as 38 | examples count. [label] is logged before the tests are performed.*) 39 | 40 | (** {1:cli Command line} *) 41 | 42 | val file : Fpath.t Cmdliner.Term.t 43 | (** [file] are options to specify the jsont test file. *) 44 | 45 | val ids : id list Cmdliner.Term.t 46 | (** [ids] are positional argument that specify a test file. *) 47 | -------------------------------------------------------------------------------- /src/cmarkit_data.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Unicode character data 7 | 8 | XXX. For now we kept that simple and use the Stdlib's Set and 9 | Maps. Bring in Uucp's tmapbool and tmap if that turns out to be too 10 | costly in space or time. *) 11 | 12 | module Uset = struct 13 | include Set.Make (Uchar) 14 | let of_array = 15 | let add acc u = add (Uchar.unsafe_of_int u) acc in 16 | Array.fold_left add empty 17 | end 18 | 19 | module Umap = struct 20 | include Map.Make (Uchar) 21 | let of_array = 22 | let add acc (u, f) = add (Uchar.unsafe_of_int u) f acc in 23 | Array.fold_left add empty 24 | end 25 | 26 | let whitespace_uset = Uset.of_array Cmarkit_data_uchar.whitespace 27 | let punctuation_uset = Uset.of_array Cmarkit_data_uchar.punctuation 28 | let case_fold_umap = Umap.of_array Cmarkit_data_uchar.case_fold 29 | 30 | let unicode_version = Cmarkit_data_uchar.unicode_version 31 | let is_unicode_whitespace u = Uset.mem u whitespace_uset 32 | let is_unicode_punctuation u = Uset.mem u punctuation_uset 33 | let unicode_case_fold u = Umap.find_opt u case_fold_umap 34 | 35 | (* HTML entity data. *) 36 | 37 | module String_map = Map.Make (String) 38 | 39 | let html_entity_smap = 40 | let add acc (entity, rep) = String_map.add entity rep acc in 41 | Array.fold_left add String_map.empty Cmarkit_data_html.entities 42 | 43 | let html_entity e = String_map.find_opt e html_entity_smap 44 | -------------------------------------------------------------------------------- /test/bench.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Benchmarker for Cmarkit. Just renders to HTML the way `cmark` does. *) 7 | 8 | let ( let* ) = Result.bind 9 | 10 | let read_file file = 11 | try 12 | let ic = if file = "-" then stdin else open_in_bin file in 13 | let finally () = if file = "-" then () else close_in_noerr ic in 14 | Fun.protect ~finally @@ fun () -> Ok (In_channel.input_all ic) 15 | with 16 | | Sys_error err -> Error err 17 | 18 | let to_html file exts locs layout unsafe = 19 | let strict = not exts and safe = not unsafe in 20 | let* content = read_file file in 21 | let doc = Cmarkit.Doc.of_string ~layout ~locs ~file ~strict content in 22 | let r = Cmarkit_html.xhtml_renderer ~safe () in 23 | let html = Cmarkit_renderer.doc_to_string r doc in 24 | Ok (print_string html) 25 | 26 | let main () = 27 | let strf = Printf.sprintf in 28 | let usage = "Usage: bench [OPTION]… [FILE.md]" in 29 | let layout = ref false in 30 | let locs = ref false in 31 | let unsafe = ref false in 32 | let exts = ref false in 33 | let file = ref None in 34 | let args = 35 | [ "--layout", Arg.Set layout, "Keep layout information."; 36 | "--locs", Arg.Set locs, "Keep locations."; 37 | "--exts", Arg.Set exts, "Activate supported extensions"; 38 | "--unsafe", Arg.Set unsafe, "Keep HTML blocks and raw HTML"; ] 39 | in 40 | let pos s = match !file with 41 | | Some _ -> raise (Arg.Bad (strf "Don't know what to do with %S" s)) 42 | | None -> file := Some s 43 | in 44 | Arg.parse args pos usage; 45 | let file = Option.value ~default:"-" !file in 46 | match to_html file !exts !locs !layout !unsafe with 47 | | Error e -> Printf.eprintf "bench: %s\n%!" e; 1 48 | | Ok () -> 0 49 | 50 | let () = if !Sys.interactive then () else exit (main ()) 51 | -------------------------------------------------------------------------------- /opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | name: "cmarkit" 3 | synopsis: "CommonMark parser and renderer for OCaml" 4 | description: """\ 5 | Cmarkit parses the [CommonMark specification]. It provides: 6 | 7 | - A CommonMark parser for UTF-8 encoded documents. Link label resolution 8 | can be customized and a non-strict parsing mode can be activated to add: 9 | strikethrough, LaTeX math, footnotes, task items and tables. 10 | 11 | - An extensible abstract syntax tree for CommonMark documents with 12 | source location tracking and best-effort source layout preservation. 13 | 14 | - Abstract syntax tree mapper and folder abstractions for quick and 15 | concise tree transformations. 16 | 17 | - Extensible renderers for HTML, LaTeX and CommonMark with source 18 | layout preservation. 19 | 20 | Cmarkit is distributed under the ISC license. It has no dependencies. 21 | 22 | [CommonMark specification]: https://spec.commonmark.org/ 23 | 24 | Homepage: """ 25 | maintainer: "Daniel Bünzli " 26 | authors: "The cmarkit programmers" 27 | license: "ISC" 28 | tags: ["codec" "commonmark" "markdown" "org:erratique"] 29 | homepage: "https://erratique.ch/software/cmarkit" 30 | doc: "https://erratique.ch/software/cmarkit/doc" 31 | bug-reports: "https://github.com/dbuenzli/cmarkit/issues" 32 | depends: [ 33 | "ocaml" {>= "4.14.0"} 34 | "ocamlfind" {build} 35 | "ocamlbuild" {build} 36 | "topkg" {build & >= "1.1.0"} 37 | "uucp" {dev} 38 | "b0" {dev & with-test} 39 | ] 40 | depopts: ["cmdliner"] 41 | conflicts: [ 42 | "cmdliner" {< "2.0.0"} 43 | ] 44 | build: [ 45 | [ 46 | "ocaml" 47 | "pkg/pkg.ml" 48 | "build" 49 | "--dev-pkg" 50 | "%{dev}%" 51 | "--with-cmdliner" 52 | "%{cmdliner:installed}%" 53 | ] 54 | [ 55 | "cmdliner" 56 | "install" 57 | "tool-support" 58 | "--update-opam-install=%{_:name}%.install" 59 | "_build/src/tool/cmarkit_main.native:cmarkit" {ocaml:native} 60 | "_build/src/tool/cmarkit_main.byte:cmarkit" {!ocaml:native} 61 | "_build/cmdliner-install" 62 | ] {cmdliner:installed} 63 | ] 64 | dev-repo: "git+https://erratique.ch/repos/cmarkit.git" 65 | x-maintenance-intent: ["(latest)"] 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cmarkit — CommonMark parser and renderer for OCaml 2 | ================================================== 3 | %%VERSION%% 4 | 5 | Cmarkit parses the [CommonMark specification]. It provides: 6 | 7 | - A CommonMark parser for UTF-8 encoded documents. Link label resolution 8 | can be customized and a non-strict parsing mode can be activated to add: 9 | strikethrough, LaTeX math, footnotes, task items and tables. 10 | 11 | - An extensible abstract syntax tree for CommonMark documents with 12 | source location tracking and best-effort source layout preservation. 13 | 14 | - Abstract syntax tree mapper and folder abstractions for quick and 15 | concise tree transformations. 16 | 17 | - Extensible renderers for HTML, LaTeX and CommonMark with source 18 | layout preservation. 19 | 20 | Cmarkit is distributed under the ISC license. It has no dependencies. 21 | 22 | [CommonMark specification]: https://spec.commonmark.org/ 23 | 24 | Homepage: 25 | 26 | ## Installation 27 | 28 | cmarkit can be installed with `opam`: 29 | 30 | opam install cmarkit 31 | opam install cmarkit cmdliner # For the cmarkit tool 32 | 33 | If you don't use `opam` consult the [`opam`](opam) file for build 34 | instructions. 35 | 36 | ## Documentation 37 | 38 | The documentation can be consulted [online] or via `odig doc cmarkit`. 39 | 40 | Questions are welcome but better asked on the [OCaml forum] than on 41 | the issue tracker. 42 | 43 | [online]: https://erratique.ch/software/cmarkit/doc 44 | [OCaml forum]: https://discuss.ocaml.org/ 45 | 46 | ## Sample programs 47 | 48 | The [`cmarkit`] tool parses and renders CommonMark files in various 49 | ways. 50 | 51 | See also [`bench.ml`] and the [doc examples]. 52 | 53 | [`cmarkit`]: test/cmarkit_tool.ml 54 | [`bench.ml`]: test/bench.ml 55 | [doc examples]: test/examples.ml 56 | 57 | ## Acknowledgements 58 | 59 | A grant from the [OCaml Software Foundation] helped to bring the first 60 | public release of `cmarkit`. 61 | 62 | The `cmarkit` implementation benefited from the work of John 63 | MacFarlane ([spec][CommonMark specification], [`cmark`]) and Martin 64 | Mitáš ([`md4c`]). 65 | 66 | [`cmark`]: https://github.com/commonmark/cmark 67 | [`md4c`]: https://github.com/mity/md4c 68 | [OCaml Software Foundation]: http://ocaml-sf.org/ 69 | -------------------------------------------------------------------------------- /doc/index.mld: -------------------------------------------------------------------------------- 1 | {0 Cmarkit {%html: %%VERSION%%%}} 2 | 3 | Cmarkit parses the {{:https://spec.commonmark.org/current}CommonMark 4 | specification}. It provides: 5 | 6 | - A CommonMark parser for UTF-8 encoded documents. Link label resolution 7 | can be {{!Cmarkit.Label.resolvers}customized} and a non-strict 8 | parsing mode can be activated to {{!Cmarkit.extensions}add}: strikethrough, 9 | L{^A}T{_E}X math, footnotes, task items and tables. 10 | - An extensible abstract syntax tree for CommonMark documents with source 11 | location tracking and best effort {{!Cmarkit_commonmark.layout}source layout 12 | preservation}. 13 | - Abstract syntax tree {{!Cmarkit.Mapper}mapper} and {{!Cmarkit.Folder}folder} 14 | abstractions for quick and concise tree transformations. 15 | - Extensible renderers for {{!Cmarkit_html}HTML}, 16 | {{!Cmarkit_latex}L{^A}T{_E}X} and {{!Cmarkit_commonmark}CommonMark} with 17 | source layout preservation. 18 | 19 | See the {{!quick}quick start}. 20 | 21 | {1:cmarkit_library Library [cmarkit]} 22 | 23 | {!modules: 24 | Cmarkit 25 | Cmarkit_renderer 26 | Cmarkit_commonmark 27 | Cmarkit_html 28 | Cmarkit_latex 29 | } 30 | 31 | {1:quick Quick start} 32 | 33 | The following functions render CommonMark snippets using the built-in 34 | renderers. The parsing bit via {!Cmarkit.Doc.of_string} is always the same 35 | except for CommonMark rendering where we make sure to keep the layout 36 | for {{!Cmarkit_commonmark.layout}source layout preservation}. 37 | 38 | If [strict] is [true] the CommonMark specification is strictly 39 | followed otherwise the built-in {{!Cmarkit.extensions}extensions} are 40 | enabled. 41 | 42 | {[ 43 | let cmark_to_html : strict:bool -> safe:bool -> string -> string = 44 | fun ~strict ~safe md -> 45 | let doc = Cmarkit.Doc.of_string ~strict md in 46 | Cmarkit_html.of_doc ~safe doc 47 | 48 | let cmark_to_latex : strict:bool -> string -> string = 49 | fun ~strict md -> 50 | let doc = Cmarkit.Doc.of_string ~strict md in 51 | Cmarkit_latex.of_doc doc 52 | 53 | let cmark_to_commonmark : strict:bool -> string -> string = 54 | fun ~strict md -> 55 | let doc = Cmarkit.Doc.of_string ~layout:true ~strict md in 56 | Cmarkit_commonmark.of_doc doc 57 | ]} 58 | 59 | If you want to: 60 | 61 | {ul 62 | {- Extend the abstract syntax tree or the renderers, see 63 | {{!Cmarkit_renderer.example}this example}.} 64 | {- Map parts of an abstract syntax, see {!Cmarkit.Mapper}.} 65 | {- Fold over parts of an abstract syntax, see {!Cmarkit.Folder}.} 66 | {- Interfere with link label definition and resolution, see 67 | {{!Cmarkit.Label}labels} and their 68 | {{!Cmarkit.Label.resolvers}resolvers}.}} 69 | -------------------------------------------------------------------------------- /src/cmarkit_renderer.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (* Renderers *) 7 | 8 | module Dict = Cmarkit_base.Dict 9 | 10 | type t = 11 | { init_context : context -> Cmarkit.Doc.t -> unit; 12 | inline : inline; 13 | block : block; 14 | doc : doc; } 15 | 16 | and context = 17 | { renderer : t; 18 | mutable state : Dict.t; 19 | b : Buffer.t; 20 | mutable doc : Cmarkit.Doc.t } 21 | 22 | and inline = context -> Cmarkit.Inline.t -> bool 23 | and block = context -> Cmarkit.Block.t -> bool 24 | and doc = context -> Cmarkit.Doc.t -> bool 25 | 26 | let nop _ _ = () 27 | let none _ _ = false 28 | 29 | let make 30 | ?(init_context = nop) ?(inline = none) ?(block = none) ?(doc = none) () 31 | = 32 | { init_context; inline; block; doc } 33 | 34 | let compose g f = 35 | let init_context c d = g.init_context c d; f.init_context c d in 36 | let block c b = f.block c b || g.block c b in 37 | let inline c i = f.inline c i || g.inline c i in 38 | let doc c d = f.doc c d || g.doc c d in 39 | { init_context; inline; block; doc } 40 | 41 | let init_context r = r.init_context 42 | let inline r = r.inline 43 | let block r = r.block 44 | let doc r = r.doc 45 | 46 | module Context = struct 47 | type t = context 48 | let make renderer b = 49 | { renderer; b; state = Dict.empty; doc = Cmarkit.Doc.empty } 50 | 51 | let buffer c = c.b 52 | let renderer c = c.renderer 53 | let get_doc (c : context) = c.doc 54 | let get_defs (c : context) = Cmarkit.Doc.defs c.doc 55 | 56 | module State = struct 57 | type 'a t = 'a Dict.key 58 | let make = Dict.key 59 | let find c st = Dict.find st c.state 60 | let get c st = Option.get (Dict.find st c.state) 61 | let set c st = function 62 | | None -> c.state <- Dict.remove st c.state 63 | | Some s -> c.state <- Dict.add st s c.state 64 | end 65 | 66 | let init c d = c.renderer.init_context c d 67 | 68 | let invalid_inline _ = invalid_arg "Unknown Cmarkit.Inline.t case" 69 | let invalid_block _ = invalid_arg "Unknown Cmarkit.Block.t case" 70 | let unhandled_doc _ = invalid_arg "Unhandled Cmarkit.Doc.t" 71 | 72 | let byte r c = Buffer.add_char r.b c 73 | let utf_8_uchar r u = Buffer.add_utf_8_uchar r.b u 74 | let string c s = Buffer.add_string c.b s 75 | let inline c i = ignore (c.renderer.inline c i || invalid_inline i) 76 | let block c b = ignore (c.renderer.block c b || invalid_block b) 77 | let doc (c : context) d = 78 | c.doc <- d; init c d; 79 | ignore (c.renderer.doc c d || unhandled_doc d); 80 | c.doc <- Cmarkit.Doc.empty 81 | end 82 | 83 | let doc_to_string r d = 84 | let b = Buffer.create 1024 in 85 | let c = Context.make r b in 86 | Context.doc c d; Buffer.contents b 87 | 88 | let buffer_add_doc r b d = Context.doc (Context.make r b) d 89 | -------------------------------------------------------------------------------- /test/expect/bugs.nolayout.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bugs.md", lines 1-33 3 | Heading, level 1: 4 | File "bugs.md", lines 1-2, characters 0-4 5 | Text: 6 | File "bugs.md", line 1, characters 0-4 7 | Setext underline: 8 | File "bugs.md", line 2, characters 0-4 9 | Blank line: 10 | File "bugs.md", line 3 11 | Paragraph: 12 | File "bugs.md", lines 4-5, characters 0-8 13 | Inlines: 14 | File "bugs.md", lines 4-5, characters 0-8 15 | Text: 16 | File "bugs.md", line 4, characters 0-64 17 | Soft break: 18 | File "bugs.md", lines 4-5, characters 64-0 19 | Layout before: 20 | File "-" 21 | Layout after: 22 | File "-" 23 | Text: 24 | File "bugs.md", line 5, characters 0-8 25 | Blank line: 26 | File "bugs.md", line 6 27 | Code block: 28 | File "bugs.md", lines 7-11, characters 0-3 29 | Opening fence: 30 | File "-" 31 | Code line: 32 | File "bugs.md", line 8, characters 0-12 33 | Code line: 34 | File "bugs.md", line 9 35 | Code line: 36 | File "bugs.md", line 10, characters 0-25 37 | Closing fence: 38 | File "-" 39 | Blank line: 40 | File "bugs.md", line 12 41 | Heading, level 1: 42 | File "bugs.md", line 13, characters 0-11 43 | Text: 44 | File "bugs.md", line 13, characters 2-11 45 | Blank line: 46 | File "bugs.md", line 14 47 | Paragraph: 48 | File "bugs.md", lines 15-16, characters 0-68 49 | Inlines: 50 | File "bugs.md", lines 15-16, characters 0-68 51 | Text: 52 | File "bugs.md", line 15, characters 0-69 53 | Soft break: 54 | File "bugs.md", lines 15-16, characters 69-0 55 | Layout before: 56 | File "-" 57 | Layout after: 58 | File "-" 59 | Text: 60 | File "bugs.md", line 16, characters 0-68 61 | Blank line: 62 | File "bugs.md", line 17 63 | Paragraph: 64 | File "bugs.md", line 18, characters 0-3 65 | Text: 66 | File "bugs.md", line 18, characters 0-3 67 | Blank line: 68 | File "bugs.md", line 19 69 | Paragraph: 70 | File "bugs.md", line 20, characters 0-3 71 | Text: 72 | File "bugs.md", line 20, characters 0-3 73 | Blank line: 74 | File "bugs.md", line 21 75 | Paragraph: 76 | File "bugs.md", line 22, characters 0-4 77 | Text: 78 | File "bugs.md", line 22, characters 0-4 79 | Blank line: 80 | File "bugs.md", line 23 81 | Blank line: 82 | File "bugs.md", line 24 83 | Paragraph: 84 | File "bugs.md", line 25, characters 0-4 85 | Text: 86 | File "bugs.md", line 25, characters 0-4 87 | Blank line: 88 | File "bugs.md", line 26 89 | Paragraph: 90 | File "bugs.md", line 27, characters 0-59 91 | Text: 92 | File "bugs.md", line 27, characters 0-59 93 | Blank line: 94 | File "bugs.md", line 28 95 | Blank line: 96 | File "bugs.md", line 29 97 | Blank line: 98 | File "bugs.md", line 30 99 | Blank line: 100 | File "bugs.md", line 31 101 | Blank line: 102 | File "bugs.md", line 32 103 | Blank line: 104 | File "bugs.md", line 33 -------------------------------------------------------------------------------- /test/expect/bugs.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bugs.md", lines 1-33 3 | Heading, level 1: 4 | File "bugs.md", lines 1-2, characters 0-4 5 | Text: 6 | File "bugs.md", line 1, characters 0-4 7 | Setext underline: 8 | File "bugs.md", line 2, characters 0-4 9 | Blank line: 10 | File "bugs.md", line 3 11 | Paragraph: 12 | File "bugs.md", lines 4-5, characters 0-8 13 | Inlines: 14 | File "bugs.md", lines 4-5, characters 0-8 15 | Text: 16 | File "bugs.md", line 4, characters 0-64 17 | Soft break: 18 | File "bugs.md", lines 4-5, characters 64-0 19 | Layout before: 20 | File "bugs.md", line 4, characters 64-65 21 | Layout after: 22 | File "bugs.md", line 5 23 | Text: 24 | File "bugs.md", line 5, characters 0-8 25 | Blank line: 26 | File "bugs.md", line 6 27 | Code block: 28 | File "bugs.md", lines 7-11, characters 0-3 29 | Opening fence: 30 | File "bugs.md", line 7, characters 0-3 31 | Code line: 32 | File "bugs.md", line 8, characters 0-12 33 | Code line: 34 | File "bugs.md", line 9 35 | Code line: 36 | File "bugs.md", line 10, characters 0-25 37 | Closing fence: 38 | File "bugs.md", line 11, characters 0-3 39 | Blank line: 40 | File "bugs.md", line 12 41 | Heading, level 1: 42 | File "bugs.md", line 13, characters 0-11 43 | Text: 44 | File "bugs.md", line 13, characters 2-11 45 | Blank line: 46 | File "bugs.md", line 14 47 | Paragraph: 48 | File "bugs.md", lines 15-16, characters 0-68 49 | Inlines: 50 | File "bugs.md", lines 15-16, characters 0-68 51 | Text: 52 | File "bugs.md", line 15, characters 0-69 53 | Soft break: 54 | File "bugs.md", lines 15-16, characters 69-0 55 | Layout before: 56 | File "bugs.md", line 15, characters 69-69 57 | Layout after: 58 | File "bugs.md", line 16 59 | Text: 60 | File "bugs.md", line 16, characters 0-68 61 | Blank line: 62 | File "bugs.md", line 17 63 | Paragraph: 64 | File "bugs.md", line 18, characters 0-3 65 | Text: 66 | File "bugs.md", line 18, characters 0-3 67 | Blank line: 68 | File "bugs.md", line 19 69 | Paragraph: 70 | File "bugs.md", line 20, characters 0-3 71 | Text: 72 | File "bugs.md", line 20, characters 0-3 73 | Blank line: 74 | File "bugs.md", line 21 75 | Paragraph: 76 | File "bugs.md", line 22, characters 0-4 77 | Text: 78 | File "bugs.md", line 22, characters 0-4 79 | Blank line: 80 | File "bugs.md", line 23 81 | Blank line: 82 | File "bugs.md", line 24 83 | Paragraph: 84 | File "bugs.md", line 25, characters 0-4 85 | Text: 86 | File "bugs.md", line 25, characters 0-4 87 | Blank line: 88 | File "bugs.md", line 26 89 | Paragraph: 90 | File "bugs.md", line 27, characters 0-59 91 | Text: 92 | File "bugs.md", line 27, characters 0-59 93 | Blank line: 94 | File "bugs.md", line 28 95 | Blank line: 96 | File "bugs.md", line 29 97 | Blank line: 98 | File "bugs.md", line 30 99 | Blank line: 100 | File "bugs.md", line 31 101 | Blank line: 102 | File "bugs.md", line 32 103 | Blank line: 104 | File "bugs.md", line 33 -------------------------------------------------------------------------------- /DEVEL.md: -------------------------------------------------------------------------------- 1 | This project uses (perhaps the development version of) [`b0`] for 2 | development. Consult [b0 occasionally] for quick hints on how to 3 | perform common development tasks. 4 | 5 | [`b0`]: https://erratique.ch/software/b0 6 | [b0 occasionally]: https://erratique.ch/software/b0/doc/occasionally.html 7 | 8 | # Benchmark parse to HTML rendering 9 | 10 | ```sh 11 | time cmark --unsafe /file/to/md > /dev/null 12 | time $(b0 --path -- bench --unsafe /file/to/md) > /dev/null 13 | ``` 14 | 15 | # Specification tests 16 | 17 | To run the specification tests use: 18 | 19 | ```sh 20 | b0 -- test_spec # All examples 21 | b0 -- test_spec 1-10 34 56 # Specific examples 22 | ``` 23 | 24 | # CommonMark renderer tests 25 | 26 | To test the CommonMark renderer on the specification tests use: 27 | 28 | ```sh 29 | b0 -- test_commonmark_render # All examples 30 | b0 -- test_commonmark_render 1-10 32 56 # Specific examples 31 | b0 -- test_commonmark_render --show-diff # Show correct render diffs 32 | ``` 33 | 34 | Given a source a *correct* render yields the same HTML and it *round 35 | trips* if the source is byte-for-byte equivalent. Using `--show-diff` 36 | on an example that does not round trip shows the reason and the diff. 37 | 38 | A first test is also done on parses without layout preservation to 39 | check they are correct. 40 | 41 | # Pathological tests 42 | 43 | The [pathological tests][p] of `cmark` have been ported to 44 | [`test/test_pathological.ml`]. You can run them on any executable that 45 | reads CommonMark on standard input and writes HTML rendering on 46 | standard output. 47 | 48 | ```sh 49 | b0 -- test_pathological -- cmark 50 | b0 -- test_pathological --help 51 | b0 -- test_pathological --dump /tmp/ # Dump tests and expectations 52 | ``` 53 | 54 | An action allows to run the tool on a build of the `cmarkit` tool: 55 | 56 | ``` 57 | b0 -- test_cmarkit_pathological 58 | ``` 59 | 60 | [p]: https://github.com/commonmark/cmark/blob/master/test/pathological_tests.py 61 | [`test/pathological.ml`]: src/cmarkit.ml 62 | 63 | # Expectation tests 64 | 65 | To add a new test, add an `.md` test in `test/expect`, run the tests 66 | and add the new generated files to the repo. 67 | 68 | ```sh 69 | b0 -- expect 70 | b0 -- expect --help 71 | ``` 72 | 73 | # Specification update 74 | 75 | If there's a specification version update. The `commonmark_version` 76 | variable must be updated in both in [`B0.ml`] and in [`src/cmarkit.ml`]. 77 | A `s/old_version/new_version/g` should be performed on `.mli` files. 78 | 79 | The repository has the CommonMark specification test file in 80 | [`test/spec.json`]. 81 | 82 | To update it invoke: 83 | 84 | ```sh 85 | b0 -- update_spec_tests 86 | ``` 87 | 88 | Note that the numbers in `test/test_render_md.ml` may need to be updated 89 | so that the examples match. 90 | 91 | [`test/spec.json`]: test/spec.json 92 | [`src/cmarkit.ml`]: src/cmarkit.ml 93 | [`B0.ml`]: B0.ml 94 | 95 | # Unicode data update 96 | 97 | The library contains Unicode data generated in the file 98 | [`src/cmarkit_data_uchar.ml`] 99 | 100 | To update it invoke: 101 | 102 | ```sh 103 | opem install uucp 104 | b0 -- generate-data 105 | ``` 106 | 107 | [`src/cmarkit_data_uchar.ml`]: src/cmarkit_data_uchar.ml 108 | -------------------------------------------------------------------------------- /src/tool/cmarkit_std.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | type fpath = string 7 | 8 | module Result = struct 9 | include Result 10 | let to_failure = function Ok v -> v | Error err -> failwith err 11 | module Syntax = struct 12 | let ( let* ) = Result.bind 13 | end 14 | end 15 | 16 | module Log = struct 17 | let exec = Filename.basename Sys.executable_name 18 | 19 | let err fmt = 20 | Format.fprintf Format.err_formatter ("%s: @[" ^^ fmt ^^ "@]@.") exec 21 | 22 | let warn fmt = 23 | Format.fprintf Format.err_formatter ("@[" ^^ fmt ^^ "@]@.") 24 | 25 | let on_error ~use r f = match r with 26 | | Ok v -> f v | Error e -> err "%s" e; use 27 | end 28 | 29 | module Label_resolver = struct 30 | (* A label resolver that warns on redefinitions *) 31 | 32 | let warn_label_redefinition ~current ~prev = 33 | let open Cmarkit in 34 | let pp_loc = Textloc.pp_ocaml in 35 | let current_text = Label.text_to_string current in 36 | let current = Meta.textloc (Label.meta current) in 37 | let prev = Meta.textloc (Label.meta prev) in 38 | if Textloc.is_none current then 39 | Log.warn "Warning: @[Ignoring redefinition of label %S.@,\ 40 | Invoke with option --locs to get file locations.@,@]" 41 | current_text 42 | else 43 | Log.warn "@[%a:@,Warning: \ 44 | @[Ignoring redefinition of label %S. \ 45 | Previous definition:@,%a@]@,@]" 46 | pp_loc current current_text pp_loc prev 47 | 48 | let v ~quiet = function 49 | | `Ref (_, _, ref) -> ref 50 | | `Def (None, current) -> Some current 51 | | `Def (Some prev, current) -> 52 | if not quiet then warn_label_redefinition ~current ~prev; None 53 | end 54 | 55 | module Os = struct 56 | 57 | (* Emulate B0_std.Os functionality to eschew the dep *) 58 | 59 | let read_file file = 60 | try 61 | let ic = if file = "-" then stdin else open_in_bin file in 62 | let finally () = if file = "-" then () else close_in_noerr ic in 63 | Fun.protect ~finally @@ fun () -> Ok (In_channel.input_all ic) 64 | with 65 | | Sys_error err -> Error err 66 | 67 | let write_file file s = 68 | try 69 | let oc = if file = "-" then stdout else open_out_bin file in 70 | let finally () = if file = "-" then () else close_out_noerr oc in 71 | Fun.protect ~finally @@ fun () -> Ok (Out_channel.output_string oc s) 72 | with 73 | | Sys_error err -> Error err 74 | 75 | let with_tmp_dir f = 76 | try 77 | let tmpdir = 78 | let file = Filename.temp_file "cmarkit" "dir" in 79 | (Sys.remove file; Sys.mkdir file 0o700; file) 80 | in 81 | let finally () = try Sys.rmdir tmpdir with Sys_error _ -> () in 82 | Fun.protect ~finally @@ fun () -> Ok (f tmpdir) 83 | with 84 | | Sys_error err -> Error ("Making temporary dir: " ^ err) 85 | 86 | let with_cwd cwd f = 87 | try 88 | let curr = Sys.getcwd () in 89 | let () = Sys.chdir cwd in 90 | let finally () = try Sys.chdir curr with Sys_error _ -> () in 91 | Fun.protect ~finally @@ fun () -> Ok (f ()) 92 | with 93 | | Sys_error err -> Error ("With cwd: " ^ err) 94 | end 95 | -------------------------------------------------------------------------------- /test/spec.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_std 7 | open Result.Syntax 8 | open B0_json 9 | 10 | let version = "0.31.2" 11 | 12 | type id = int 13 | type test = 14 | { markdown : string; 15 | html : string; 16 | id : id; 17 | start_line : int; 18 | end_line : int; 19 | section : string } 20 | 21 | let test markdown html id start_line end_line section = 22 | { markdown; html; id; start_line; end_line; section } 23 | 24 | let pp_test_url = 25 | Fmt.code' @@ fun ppf test -> 26 | Fmt.pf ppf "https://spec.commonmark.org/%s/#example-%d" version test.id 27 | 28 | let parse_tests file = 29 | let testq = 30 | Jsonq.(succeed test $ 31 | mem "markdown" string $ 32 | mem "html" string $ 33 | mem "example" int $ 34 | mem "start_line" int $ 35 | mem "end_line" int $ 36 | mem "section" string) 37 | in 38 | let* data = Os.File.read file in 39 | let* json = Json.of_string ~file:(Fpath.to_string file) data in 40 | let tests = Jsonq.array testq in 41 | Jsonq.query tests json 42 | 43 | let select tests = function 44 | | [] -> tests, "Testing all examples" 45 | | ids -> 46 | List.filter (fun t -> List.mem t.id ids) tests, 47 | let ids = Fmt.str "@[%a@]" Fmt.(list ~sep:comma int) ids in 48 | Fmt.str "@[Testing example %a@]" Fmt.(truncated ~max:60) ids 49 | 50 | let test_examples ~label tests f = 51 | (* We use a block to report number of tested examples *) 52 | let open B0_testing in 53 | let example = Fmt.cardinal ~one:(Fmt.any "example") () in 54 | let pass ?__POS__ count = 55 | Test.log "%a %a %a" Test.Fmt.count count example count Test.Fmt.passed () 56 | in 57 | let fail ?__POS__ count ~assertions = 58 | Test.log "%a %a %a" 59 | Test.Fmt.fail_count_ratio (count, assertions) example assertions 60 | Test.Fmt.failed () 61 | in 62 | Test.log "%s" label; 63 | Test.block ~pass ~fail (fun () -> List.iter f tests) 64 | 65 | (* Command line *) 66 | 67 | let range_conv = 68 | let parser s = match int_of_string_opt s with 69 | | Some i -> Ok [i] 70 | | None -> 71 | try 72 | let exit_on_none = function None -> raise Exit | Some s -> s in 73 | let (l, r) = String.split_first ~sep:"-" s |> exit_on_none in 74 | let l = int_of_string_opt l |> exit_on_none in 75 | let r = int_of_string_opt r |> exit_on_none in 76 | let lo, hi = if l < r then l, r else r, l in 77 | let acc = ref [] in 78 | for i = hi downto lo do acc := i :: !acc done; 79 | Ok !acc 80 | with 81 | | Exit -> Fmt.error "%S: not a number or range number" s 82 | in 83 | let pp = Fmt.(list ~sep:sp int) in 84 | let docv = "NUM[-NUM]" in 85 | Cmdliner.Arg.Conv.make ~docv ~parser ~pp () 86 | 87 | let ids = 88 | let doc = 89 | "$(docv) are the identifiers of the examples to test (none is all)" 90 | in 91 | let nums = Cmdliner.Arg.(value & pos_all range_conv [] & info [] ~doc) in 92 | Cmdliner.Term.(const List.concat $ nums) 93 | 94 | let file = 95 | let doc = "$(docv) is the test file." in 96 | let default = Fpath.v "test/spec.json" in 97 | Cmdliner.Arg.(value & opt B0_std_cli.filepath default & info ["file"] ~doc) 98 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - `cmarkit` tool: enable file completion on file arguments. 4 | 5 | v0.4.0 2025-11-01 Zagreb 6 | ------------------------ 7 | 8 | - Support for the CommonMark 0.31.2 specification (#17). 9 | 10 | - Change task items extension semantics: the task marker is no longer 11 | considered part of the list marker. The new semantics can lead to 12 | surprises with item subparagraphs which can show up as indented code 13 | blocks, but it avoids huge indentations for subtasks and is consistent 14 | with what at least GFM and `md4c` do. 15 | Thanks to Thomas Gazagnaire for the report (#24). 16 | 17 | - `Cmarkit_latex`. Add option `?first_heading_level` to the renderer 18 | to set the LaTeX heading level to use for the first CommonMark 19 | heading level. A corresponding option `--first-heading-level` is 20 | added to `cmarkit latex`. 21 | Thanks to Léo Andrès for the patch (#16). 22 | 23 | - `cmarkit html` command: add option `--body-id` to identify page body 24 | elements. 25 | 26 | - `cmarkit` tool: install manpages and completions. 27 | 28 | - Less eager escaping of `#` characters in CommonMark renderings. 29 | Thanks to Thomas Gazagnaire for the report (#25). 30 | 31 | - Less eager escaping of `.` and `)` characters in CommonMark rendering. 32 | Thanks to Ty Overby for the report (#19). 33 | 34 | - Fix incorrect parsing of code spans if they start with an escaped 35 | backtick (#21). 36 | 37 | - Fix incorrect escaping of backticks in CommonMark renderings (#26). 38 | 39 | - Fix incorrect escaping of tildes for CommonMark rendering interpreted 40 | with extensions (strikethrough becomes code fence). 41 | Thanks to Tianyi Song for the report (#20). 42 | 43 | - Fix `Cmarkit.Mapper`. Do not drop empty table cells. 44 | Thanks to Hannes Mehnert for the report (#14). 45 | 46 | - Fix out of bounds exception when lists are terminated by the end of file. 47 | Thanks to Ty Overby for the report (#18). 48 | 49 | - Fix invalid HTML markup generated for cancelled task items. 50 | Thanks to Sebastien Mondet for the report (#15). 51 | 52 | - Fix misspelling of `--leading_l` variable in `cmarkit html`'s 53 | CSS file. 54 | 55 | - Updated data for Unicode 17.0.0. 56 | 57 | - Require (depopt) `cmdliner` 2.0.0. 58 | 59 | 60 | v0.3.0 2023-12-12 La Forclaz (VS) 61 | --------------------------------- 62 | 63 | - Fix ordered item marker escaping. Thanks to Rafał Gwoździński for 64 | the report (#11). 65 | 66 | - Data updated for Unicode 15.1.0 (no changes except 67 | for the value of `Cmarkit.Doc.unicode_version`). 68 | 69 | - Fix table extension column parsing, toplevel text inlines were being 70 | dropped. Thanks to Javier Chávarri for the report (#10). 71 | 72 | - `List_item.make`, change default value of `after_marker` from 0 to 1. 73 | We don't want to generate invalid CommonMark by default. Thanks to 74 | Rafał Gwoździński for the report (#9). 75 | 76 | - Add option `-f/--full-featured`, to `cmarkit html`. A synonym for a 77 | bunch of existing options to generate a publishable document with extensions 78 | and math rendering without hassle. See `cmarkit html --help` for details. 79 | 80 | v0.2.0 2023-05-10 La Forclaz (VS) 81 | --------------------------------- 82 | 83 | - Fix bug in `Block_lines.list_of_string`. Thanks to Rafał Gwoździński 84 | for the report and the fix (#7, #8). 85 | - `Cmarkit.Mapper`. Fix non-sensical default map for `Image` nodes: do 86 | not delete `Image` nodes whose alt text maps to `None`, replace the 87 | alt text by `Inline.empty`. Thanks to Nicolás Ojeda Bär for the 88 | report and the fix (#6). 89 | 90 | v0.1.0 2023-04-06 La Forclaz (VS) 91 | --------------------------------- 92 | 93 | First release. 94 | 95 | Supported by a grant from the OCaml Software Foundation. 96 | -------------------------------------------------------------------------------- /test/expect/basic.md: -------------------------------------------------------------------------------- 1 | Basic tests 2 | =========== 3 | 4 | Basic tests for all CommonMark constructs. 5 | 6 | ## Testing autolinks 7 | 8 | This is an and another one . 9 | 10 | 11 | ## Testing breaks 12 | 13 | A line ending (not in a code span or HTML tag) that is preceded by two 14 | or more spaces and does not occur at the end of a block is parsed as a 15 | hard line break. 16 | 17 | So this means we had softbreaks so far and now we get \ 18 | a hard break 19 | and another one. 20 | 21 | > So this means we had softbreaks so far and now we get \ 22 | > a hard break 23 | > and another one. 24 | > This is very soooft. 25 | 26 | ## Testing code spans 27 | 28 | This is a multi-line code` 29 | code span `` it has backticks 30 | in there` 31 | 32 | Sometimes code spans `` `can have 33 | really ``` 34 | strange 35 | layout ``. Do you fancy `` `A_polymorphic_variant `` ? 36 | 37 | 38 | ## Testing emphasis 39 | 40 | There is _more_ than *one syntax* for __emphasis__ and **strong 41 | emphasis**. We should be careful about **embedded * marker**. This 42 | will be **tricky * to handle**. This *is not ** what* you want ? 43 | 44 | 45 | ## Testing links, images and link reference definitions 46 | 47 | This is an ![inline image]( 48 | /heyho (The 49 | multine title)) 50 | 51 | That is totally [colla psed][] and 52 | that is [`short cuted`] 53 | 54 | Shortcuts can be better than [full references][`short 55 | cuted`] but not 56 | always and we'd like to trip their [label][`short cuted`]. 57 | 58 | > [colla psed]: /hohoho "And again these 59 | > multi 60 | > line titles" 61 | 62 | [`short cuted`]: /veryshort "But very 63 | important" 64 | 65 | 66 | ## Testing raw HTML 67 | 68 | Haha a hihi this is not the end yet. 70 | 71 | foo u 72 | 73 | > Haha a data="foo" > hihi this is not the end yet. 75 | 76 | ## Testing blank lines 77 | 78 | 79 | 80 | Impressive isn't it ? 81 | 82 | ## Testing block quotes 83 | 84 | 85 | > > How is 86 | > > Nestyfing going on 87 | >> These irregularities **will** normalize 88 | > We keep only the first block quote indent 89 | 90 | > ## Further tests ####### 91 | 92 | We need a little quote here 93 | > It's warranted. 94 | 95 | 96 | ## Testing code blocks 97 | 98 | ``` layout after info is not kept 99 | ``` 100 | 101 | ``` ocaml module M 102 | 103 | type t = 104 | | A of int 105 | | B of string 106 | 107 | let square x = x *. x 108 | ```` 109 | 110 | The indented code block: 111 | 112 | a b c d 113 | a b c d 114 | a b c d 115 | 116 | 117 | a 118 | a b c 119 | 120 | 121 | > ``` ocaml module M 122 | > 123 | > type t = 124 | > | A of int 125 | > | B of string 126 | > 127 | > let square x = x *. x 128 | > ```` 129 | 130 | 131 | ## Testing headings 132 | 133 | aaa 134 | aaaa 135 | ======== 136 | 137 | > bbb `hey` 138 | > bbbb 139 | > -------- 140 | 141 | # That's one way 142 | 143 | ### It's a long way to the heading 144 | 145 | ## Testing HTML block 146 | 147 | 150 | 151 | * 154 | 155 | ## Testing lists 156 | 157 | The `square` function is the root. There are reasons for this: 158 | 159 | 1. There is no reason. There should be a reason or an 160 | 2. Maybe that's the reason. But it may not be the reason. 161 | 3. Is reason the only tool ? 162 | 163 | > Quoted bullets 164 | > * Is this important ? 165 | * * Well it's in the spec 166 | * 167 | Empty list item above 168 | 169 | ## Testing paragraphs 170 | 171 | We really want your paragraph layout preserved. 172 | Really ? 173 | Really. 174 | Really. 175 | Really. 176 | 177 | 178 | > We really want your paragraph layout preserved. 179 | > Really ? 180 | > Really. 181 | > Really. 182 | > Really. 183 | 184 | 185 | 186 | ## Testing thematic breaks 187 | 188 | *** 189 | --- 190 | ___ 191 | 192 | _ _ _ _ _ 193 | 194 | > ******* 195 | -------------------------------------------------------------------------------- /test/expect/basic.trip.md: -------------------------------------------------------------------------------- 1 | Basic tests 2 | =========== 3 | 4 | Basic tests for all CommonMark constructs. 5 | 6 | ## Testing autolinks 7 | 8 | This is an and another one . 9 | 10 | 11 | ## Testing breaks 12 | 13 | A line ending (not in a code span or HTML tag) that is preceded by two 14 | or more spaces and does not occur at the end of a block is parsed as a 15 | hard line break. 16 | 17 | So this means we had softbreaks so far and now we get \ 18 | a hard break 19 | and another one. 20 | 21 | > So this means we had softbreaks so far and now we get \ 22 | > a hard break 23 | > and another one. 24 | > This is very soooft. 25 | 26 | ## Testing code spans 27 | 28 | This is a multi-line code` 29 | code span `` it has backticks 30 | in there` 31 | 32 | Sometimes code spans `` `can have 33 | really ``` 34 | strange 35 | layout ``. Do you fancy `` `A_polymorphic_variant `` ? 36 | 37 | 38 | ## Testing emphasis 39 | 40 | There is _more_ than *one syntax* for __emphasis__ and **strong 41 | emphasis**. We should be careful about **embedded \* marker**. This 42 | will be **tricky \* to handle**. This *is not \*\* what* you want ? 43 | 44 | 45 | ## Testing links, images and link reference definitions 46 | 47 | This is an ![inline image]( 48 | /heyho (The 49 | multine title)) 50 | 51 | That is totally [colla psed][] and 52 | that is [`short cuted`] 53 | 54 | Shortcuts can be better than [full references][`short 55 | cuted`] but not 56 | always and we'd like to trip their [label][`short cuted`]. 57 | 58 | > [colla psed]: /hohoho "And again these 59 | > multi 60 | > line titles" 61 | 62 | [`short cuted`]: /veryshort "But very 63 | important" 64 | 65 | 66 | ## Testing raw HTML 67 | 68 | Haha a hihi this is not the end yet. 70 | 71 | foo u 72 | 73 | > Haha a data="foo" > hihi this is not the end yet. 75 | 76 | ## Testing blank lines 77 | 78 | 79 | 80 | Impressive isn't it ? 81 | 82 | ## Testing block quotes 83 | 84 | 85 | > > How is 86 | > > Nestyfing going on 87 | > > These irregularities **will** normalize 88 | > > We keep only the first block quote indent 89 | 90 | > ## Further tests ####### 91 | 92 | We need a little quote here 93 | > It's warranted. 94 | 95 | 96 | ## Testing code blocks 97 | 98 | ``` layout after info is not kept 99 | ``` 100 | 101 | ``` ocaml module M 102 | 103 | type t = 104 | | A of int 105 | | B of string 106 | 107 | let square x = x *. x 108 | ```` 109 | 110 | The indented code block: 111 | 112 | a b c d 113 | a b c d 114 | a b c d 115 | 116 | 117 | a 118 | a b c 119 | 120 | 121 | > ``` ocaml module M 122 | > 123 | > type t = 124 | > | A of int 125 | > | B of string 126 | > 127 | > let square x = x *. x 128 | > ```` 129 | 130 | 131 | ## Testing headings 132 | 133 | aaa 134 | aaaa 135 | ======== 136 | 137 | > bbb `hey` 138 | > bbbb 139 | > -------- 140 | 141 | # That's one way 142 | 143 | ### It's a long way to the heading 144 | 145 | ## Testing HTML block 146 | 147 | 150 | 151 | * 154 | 155 | ## Testing lists 156 | 157 | The `square` function is the root. There are reasons for this: 158 | 159 | 1. There is no reason. There should be a reason or an 160 | 2. Maybe that's the reason. But it may not be the reason. 161 | 3. Is reason the only tool ? 162 | 163 | > Quoted bullets 164 | > * Is this important ? 165 | * * Well it's in the spec 166 | * 167 | Empty list item above 168 | 169 | ## Testing paragraphs 170 | 171 | We really want your paragraph layout preserved. 172 | Really ? 173 | Really. 174 | Really. 175 | Really. 176 | 177 | 178 | > We really want your paragraph layout preserved. 179 | > Really ? 180 | > Really. 181 | > Really. 182 | > Really. 183 | 184 | 185 | 186 | ## Testing thematic breaks 187 | 188 | *** 189 | --- 190 | ___ 191 | 192 | _ _ _ _ _ 193 | 194 | > ******* 195 | -------------------------------------------------------------------------------- /src/tool/cmarkit_cli.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2025 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Cmarkit_std 7 | 8 | module Exit = struct 9 | open Cmdliner 10 | 11 | type code = Cmdliner.Cmd.Exit.code 12 | let err_file = 1 13 | let err_diff = 2 14 | 15 | let exits = 16 | Cmd.Exit.info err_file ~doc:"on file read errors." :: 17 | Cmd.Exit.defaults 18 | 19 | let exits_with_err_diff = 20 | Cmd.Exit.info err_diff ~doc:"on render differences." :: exits 21 | end 22 | 23 | let process_files f files = 24 | let rec loop = function 25 | | [] -> 0 26 | | file :: files -> 27 | Log.on_error ~use:Exit.err_file (Os.read_file file) @@ fun content -> 28 | f ~file content; loop files 29 | in 30 | loop files 31 | 32 | open Cmdliner 33 | 34 | let accumulate_defs = 35 | let doc = 36 | "Accumulate label definitions from one input file to the other \ 37 | (in left to right command line order). Link reference definitions and \ 38 | footnote definitions of previous files can be used and override \ 39 | those made in subsequent ones." 40 | in 41 | Arg.(value & flag & info ["D"; "accumulate-defs"] ~doc) 42 | 43 | let backend_blocks ~doc = 44 | Arg.(value & flag & info ["b"; "backend-blocks"] ~doc) 45 | 46 | let docu = 47 | let doc = "Output a complete document rather than a fragment." in 48 | Arg.(value & flag & info ["c"; "doc"] ~doc) 49 | 50 | let files = 51 | let doc = "$(docv) is the CommonMark file to process (repeatable). Reads \ 52 | from $(b,stdin) if none or $(b,-) is specified." in 53 | Arg.(value & pos_all filepath ["-"] & info [] ~doc ~docv:"FILE.md") 54 | 55 | let heading_auto_ids = 56 | let doc = "Automatically generate heading identifiers." in 57 | Arg.(value & flag & info ["h"; "heading-auto-ids"] ~doc) 58 | 59 | let lang = 60 | let doc = "Language (BCP47) of the document when $(b,--doc) is used." in 61 | let docv = "LANG" in 62 | Arg.(value & opt string "en" & info ["l"; "lang"] ~doc ~docv) 63 | 64 | let no_layout = 65 | let doc = "Drop layout information during parsing." in 66 | Arg.(value & flag & info ["no-layout"] ~doc) 67 | 68 | let quiet = 69 | let doc = "Be quiet. Do not report label redefinition warnings." in 70 | Arg.(value & flag & info ["q"; "quiet"] ~doc) 71 | 72 | let safe = 73 | let safe = 74 | let doc = "Drop raw HTML and dangerous URLs (default). If \ 75 | you are serious about XSS prevention, better pipe \ 76 | the output to a dedicated HTML sanitizer." 77 | in 78 | Arg.info ["safe"] ~doc 79 | in 80 | let unsafe = 81 | let doc = "Keep raw HTML and dangerous URLs. See option $(b,--safe)." in 82 | Arg.info ["u"; "unsafe"] ~doc 83 | in 84 | Arg.(value & vflag true [true, safe; false, unsafe]) 85 | 86 | let strict = 87 | let extended = 88 | let doc = "Activate supported extensions: strikethrough ($(b,~~)), \ 89 | LaTeX math ($(b,\\$), $(b,\\$\\$) and $(b,math) code blocks), \ 90 | footnotes ($(b,[^id])), task items \ 91 | ($(b,[ ]), $(b,[x]), $(b,[~])) and pipe tables. \ 92 | See the library documentation for more information." 93 | in 94 | Arg.(value & flag & info ["e"; "exts"] ~doc) 95 | in 96 | Term.app (Term.const Bool.not) extended 97 | 98 | let title = 99 | let doc = "Title of the document when $(b,--doc) is used. Derived from \ 100 | the filename of the first input file if unspecified." 101 | in 102 | let docv = "TITLE" in 103 | Arg.(value & opt (some string) None & info ["t"; "title"] ~doc ~docv) 104 | 105 | let common_man = 106 | [ `S Manpage.s_bugs; 107 | `P "This program is distributed with the $(b,cmarkit) OCaml library. \ 108 | See $(i,https://erratique.ch/software/cmarkit) for contact \ 109 | information."; 110 | `S Manpage.s_see_also; 111 | `P "More information about the renderers can be found in the \ 112 | documentation of the $(b,cmarkit) OCaml library. Consult \ 113 | $(b,odig doc cmarkit) or the online documentation." ] 114 | -------------------------------------------------------------------------------- /test/expect/basic.exts.md: -------------------------------------------------------------------------------- 1 | # Extensions 2 | 3 | ## Footnotes 4 | 5 | This is a footnote in history[^1] with mutiple references[^1] 6 | and even [text references][^1] 7 | 8 | [^1]: And it can have 9 | lazy continuation lines and multiple paragraphs 10 | 11 | If you indent one column after the footnote label start. 12 | 13 | cb 14 | * list item 15 | ablc 16 | * another item 17 | 18 | 19 | This is no longer the footnote. 20 | 21 | Can we make footnotes in footnotes[^2] ? 22 | 23 | [^2]: This gets tricky but I guess we could have a footnote[^tricky] in 24 | a footnote. Also footnote[^1] in footnotes[^2] is[^3] tricky for getting 25 | all back references rendered correctly. 26 | 27 | [^tricky]: The foot of the footnote. But that's not going to link back[^2] 28 | 29 | Second footnote 30 | 31 | Not the footnote 32 | 33 | [^3]: 34 | 35 | Not the footnote but a reference to an empty footnote[^3] 36 | 37 | Not a footnote [^] 38 | 39 | [^]: not a footnote. 40 | 41 | 42 | ## Strikethrough 43 | 44 | The stroken ~~*emph*~~. 45 | 46 | Nesting the nest ~~*emph* ~~stroke~~ *emph **emph ~~strikeagain~~***~~ 47 | 48 | There must be no blanks after the opener and before the closer. This 49 | is ~~ not an opener and ~~this won't open ~~that does~~. 50 | 51 | * Here we have ~~stroken `code`~~. 52 | * Here we have ~~nested ~~stroken~~ ok~~ 53 | 54 | ## Math 55 | 56 | The inline $\sqrt{x^2-1}$ equation. 57 | 58 | There must be no blanks after the opener and before the closer. This 59 | makes so you can donate $5 or $10 dollars here and there without problem. 60 | 61 | There is no such think as nesting $\sqrt{x^2-1}$+3$+3$. As usual 62 | delimiters can be \$escaped\$ $\sqrt{16\$}$ 63 | 64 | Amazing, this is [hyperlinked math $3x^2$](https://example.org) 65 | 66 | The HTML renderer should be careful with $a < b$ escapes. 67 | 68 | Display math can be in `math` code blocks. 69 | 70 | ```math 71 | \left( \sum_{k=1}^n a_k b_k \right)^2 < \Phi 72 | ``` 73 | 74 | But it can also be in $$ \left( \sum_{k=1}^n 75 | a_k b_k \right)^2 < \Phi $$ 76 | 77 | 78 | ## List task items 79 | 80 | * [ ] Task open 81 | * [x] Task done 82 | * [X] Task done 83 | * [✓] Task done (U+2713, CHECK MARK) 84 | * [✔] Task done (U+2714, HEAVY CHECK MARK) 85 | Indent 86 | * Of course this can all be nested 87 | * [𐄂] Task done (U+10102, AEGEAN CHECK MARK) 88 | It will be done for sure. 89 | 90 | code block 91 | Not a code block 92 | * [x] Task done 93 | * [~] Task cancelled 94 | Paragraphy 95 | * [~] Task canceled 96 | 97 | we have a code block here too. 98 | * [x]Not a task 99 | * \[x] Not a task 100 | 101 | * [ ] task 102 | * [ ] sub task 103 | 104 | * [ ] 105 | * [ ] a 106 | 107 | Code 108 | Not code 109 | 110 | * [ ] 111 | Code 112 | Not code 113 | 114 | 115 | ## Tables 116 | 117 | A sample table: 118 | 119 | | Id | Name | Description | Link | 120 | |:--:|------:|:-----------------------|--------------------:| 121 | | 1 | OCaml | The OCaml website | | 122 | | 2 | Haskell | The Haskell website | | 123 | | 3 | MDN Web docs | Web dev docs | | 124 | | 4 | Wikipedia | The Free Encyclopedia | | 125 | 126 | Testing these non separator pipes. 127 | 128 | | Fancy | maybe | hu\|glu | 129 | |-------|-------|-------| 130 | | *a | b* | `code |` | [bl|a] | 131 | | not \| two cols | $\sqrt(x^2 - 1)$ | 132 | 133 | [bl|a]: https://example.org 134 | 135 | 136 | A table with changing labels and alignement: 137 | 138 | | h1 | h2 | 139 | |-----|:---:| 140 | | 1 | 2 | 141 | | h3 | h4 | 142 | |:----|----:| 143 | | 3 | 4 | 144 | 145 | A simple header less table with left and right aligned columns 146 | 147 | |:--|--:| 148 | | 1 | 2 | 149 | 150 | The simplest table: 151 | 152 | | 1 | 2 | 153 | 154 | A header only table: 155 | 156 | | h1 | h2 | 157 | |:--:|:--:| 158 | 159 | Maximal number of columns all rows defines number of colums: 160 | 161 | | h1 | h2 | h3 | 162 | |:---|:--:|---:| 163 | |left | center | right | 164 | | ha! | four | columns | in fact | 165 | ||||| 166 | ||||a| 167 | 168 | Header less table: 169 | 170 | |header|less| 171 | |this | is | 172 | 173 | Another quoted header less table with aligement 174 | 175 | > |----:|----:| 176 | > | header | less | 177 | > | again | aligned | 178 | 179 | This is an empty table with three columns: 180 | 181 | | ||| 182 | 183 | -------------------------------------------------------------------------------- /test/expect/basic.exts.trip.md: -------------------------------------------------------------------------------- 1 | # Extensions 2 | 3 | ## Footnotes 4 | 5 | This is a footnote in history[^1] with mutiple references[^1] 6 | and even [text references][^1] 7 | 8 | [^1]: And it can have 9 | lazy continuation lines and multiple paragraphs 10 | 11 | If you indent one column after the footnote label start. 12 | 13 | cb 14 | * list item 15 | ablc 16 | * another item 17 | 18 | 19 | This is no longer the footnote. 20 | 21 | Can we make footnotes in footnotes[^2] ? 22 | 23 | [^2]: This gets tricky but I guess we could have a footnote[^tricky] in 24 | a footnote. Also footnote[^1] in footnotes[^2] is[^3] tricky for getting 25 | all back references rendered correctly. 26 | 27 | [^tricky]: The foot of the footnote. But that's not going to link back[^2] 28 | 29 | Second footnote 30 | 31 | Not the footnote 32 | 33 | [^3]: 34 | 35 | Not the footnote but a reference to an empty footnote[^3] 36 | 37 | Not a footnote \[^\] 38 | 39 | \[^\]: not a footnote. 40 | 41 | 42 | ## Strikethrough 43 | 44 | The stroken ~~*emph*~~. 45 | 46 | Nesting the nest ~~*emph* ~~stroke~~ *emph **emph ~~strikeagain~~***~~ 47 | 48 | There must be no blanks after the opener and before the closer. This 49 | is \~~ not an opener and \~~this won't open ~~that does~~. 50 | 51 | * Here we have ~~stroken `code`~~. 52 | * Here we have ~~nested ~~stroken~~ ok~~ 53 | 54 | ## Math 55 | 56 | The inline $\sqrt{x^2-1}$ equation. 57 | 58 | There must be no blanks after the opener and before the closer. This 59 | makes so you can donate \$5 or \$10 dollars here and there without problem. 60 | 61 | There is no such think as nesting $\sqrt{x^2-1}$\+3$+3$. As usual 62 | delimiters can be \$escaped\$ $\sqrt{16\$}$ 63 | 64 | Amazing, this is [hyperlinked math $3x^2$](https://example.org) 65 | 66 | The HTML renderer should be careful with $a < b$ escapes. 67 | 68 | Display math can be in `math` code blocks. 69 | 70 | ```math 71 | \left( \sum_{k=1}^n a_k b_k \right)^2 < \Phi 72 | ``` 73 | 74 | But it can also be in $$ \left( \sum_{k=1}^n 75 | a_k b_k \right)^2 < \Phi $$ 76 | 77 | 78 | ## List task items 79 | 80 | * [ ] Task open 81 | * [x] Task done 82 | * [X] Task done 83 | * [✓] Task done (U+2713, CHECK MARK) 84 | * [✔] Task done (U+2714, HEAVY CHECK MARK) 85 | Indent 86 | * Of course this can all be nested 87 | * [𐄂] Task done (U+10102, AEGEAN CHECK MARK) 88 | It will be done for sure. 89 | 90 | code block 91 | Not a code block 92 | * [x] Task done 93 | * [~] Task cancelled 94 | Paragraphy 95 | * [~] Task canceled 96 | 97 | we have a code block here too. 98 | * \[x\]Not a task 99 | * \[x\] Not a task 100 | 101 | * [ ] task 102 | * [ ] sub task 103 | 104 | * [ ] 105 | * [ ] a 106 | 107 | Code 108 | Not code 109 | 110 | * [ ] 111 | Code 112 | Not code 113 | 114 | 115 | ## Tables 116 | 117 | A sample table: 118 | 119 | | Id | Name | Description | Link | 120 | |:--:|------:|:-----------------------|--------------------:| 121 | | 1 | OCaml | The OCaml website | | 122 | | 2 | Haskell | The Haskell website | | 123 | | 3 | MDN Web docs | Web dev docs | | 124 | | 4 | Wikipedia | The Free Encyclopedia | | 125 | 126 | Testing these non separator pipes. 127 | 128 | | Fancy | maybe | hu\|glu | 129 | |-------|-------|-------| 130 | | *a \| b* | `code |` | [bl|a] | 131 | | not \| two cols | $\sqrt(x^2 - 1)$ | 132 | 133 | [bl|a]: https://example.org 134 | 135 | 136 | A table with changing labels and alignement: 137 | 138 | | h1 | h2 | 139 | |-----|:---:| 140 | | 1 | 2 | 141 | | h3 | h4 | 142 | |:----|----:| 143 | | 3 | 4 | 144 | 145 | A simple header less table with left and right aligned columns 146 | 147 | |:--|--:| 148 | | 1 | 2 | 149 | 150 | The simplest table: 151 | 152 | | 1 | 2 | 153 | 154 | A header only table: 155 | 156 | | h1 | h2 | 157 | |:--:|:--:| 158 | 159 | Maximal number of columns all rows defines number of colums: 160 | 161 | | h1 | h2 | h3 | 162 | |:---|:--:|---:| 163 | |left | center | right | 164 | | ha\! | four | columns | in fact | 165 | ||||| 166 | ||||a| 167 | 168 | Header less table: 169 | 170 | |header|less| 171 | |this | is | 172 | 173 | Another quoted header less table with aligement 174 | 175 | > |----:|----:| 176 | > | header | less | 177 | > | again | aligned | 178 | 179 | This is an empty table with three columns: 180 | 181 | | ||| 182 | 183 | -------------------------------------------------------------------------------- /support/generate_data.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | let str = Format.sprintf 7 | let exec = Filename.basename Sys.executable_name 8 | 9 | let pp_uchar ppf u = Format.fprintf ppf "U+%04X" (Uchar.to_int u) 10 | 11 | let fold_uchars f acc = 12 | let rec loop f acc u = 13 | let acc = f acc u in 14 | if Uchar.equal u Uchar.max then acc else loop f acc (Uchar.succ u) 15 | in 16 | loop f acc Uchar.min 17 | 18 | let sat_list p = 19 | let add acc u = if p u then u :: acc else acc in 20 | List.rev (fold_uchars add []) 21 | 22 | (* See https://spec.commonmark.org/current/#unicode-whitespace-character *) 23 | let is_whitespace u = 24 | let is_zs = Uucp.Gc.general_category u = `Zs in 25 | let u = Uchar.to_int u in 26 | is_zs || u = 0x0009 || u = 0x000A || u = 0x000C || u = 0x000d 27 | 28 | (* See https://spec.commonmark.org/current/#ascii-punctuation-character *) 29 | let is_ascii_punctuation u = 30 | let u = Uchar.to_int u in 31 | (0x0021 <= u && u <= 0x002F) || 32 | (0x003A <= u && u <= 0x0040) || 33 | (0x005B <= u && u <= 0x0060) || 34 | (0x007B <= u && u <= 0x007E) 35 | 36 | let is_punctuation u = match Uucp.Gc.general_category u with 37 | | `Pc | `Pd | `Pe | `Pf | `Pi | `Po | `Ps -> true 38 | | `Sm | `Sc | `Sk | `So -> true 39 | | _ -> is_ascii_punctuation u 40 | 41 | let whitespace_list = sat_list is_whitespace 42 | let punctuation_list = sat_list is_punctuation 43 | 44 | let case_fold_map = 45 | let uchar_map acc u = match Uucp.Case.Fold.fold u with 46 | | `Self -> acc 47 | | `Uchars f -> 48 | let esc u = Printf.sprintf "\\u{%04X}" (Uchar.to_int u) in 49 | (u, String.concat "" (List.map esc f)) :: acc 50 | in 51 | List.rev (fold_uchars uchar_map []) 52 | 53 | let byte_size v = 54 | let words = Obj.reachable_words (Obj.repr v) in 55 | (words / (Sys.word_size / 8)) 56 | 57 | let case_fold_count = 58 | let add acc u = match Uucp.Case.Fold.fold u with 59 | | `Self -> acc | `Uchars _ -> acc + 1 60 | in 61 | fold_uchars add 0 62 | 63 | let test () = 64 | Printf.printf "whitespace: %d characters\n" (List.length whitespace_list); 65 | Printf.printf "punctuation: %d characters\n" (List.length punctuation_list); 66 | Printf.printf "non-id case fold: %d characters\n" case_fold_count; 67 | () 68 | 69 | let year = (Unix.gmtime (Unix.gettimeofday ())).Unix.tm_year + 1900 70 | 71 | let gen ppf () = 72 | let pp_cp ppf u = Format.fprintf ppf "0x%04X" (Uchar.to_int u) in 73 | let pp_binding ppf (u, s) = Format.fprintf ppf "%a, \"%s\"" pp_cp u s in 74 | let pp_sep ppf () = Format.fprintf ppf ";@ " in 75 | let pp_cps ppf us = Format.pp_print_list ~pp_sep pp_cp ppf us in 76 | let pp_map ppf m = Format.pp_print_list ~pp_sep pp_binding ppf m in 77 | Format.fprintf ppf 78 | {|(*--------------------------------------------------------------------------- 79 | Copyright (c) %d The cmarkit programmers. All rights reserved. 80 | SPDX-License-Identifier: ISC 81 | ---------------------------------------------------------------------------*) 82 | 83 | (* Do not edit. Data generated by support/unicode_data.ml *) 84 | 85 | let unicode_version = "%s" 86 | 87 | let whitespace =@? 88 | @[<1>[|%a|]@] 89 | 90 | let punctuation =@? 91 | @[<1>[|%a|]@] 92 | 93 | let case_fold =@? 94 | @[<1>[|%a|]@] 95 | %!|} year Uucp.unicode_version pp_cps whitespace_list pp_cps punctuation_list 96 | pp_map case_fold_map 97 | 98 | 99 | let process do_test outf = 100 | if do_test then test () else 101 | let generate pp outf = 102 | try 103 | let oc = if outf = "-" then stdout else open_out_bin outf in 104 | try 105 | let ppf = Format.formatter_of_out_channel oc in 106 | pp ppf (); 107 | Format.pp_print_flush ppf (); 108 | close_out oc 109 | with Sys_error _ as e -> close_out oc; raise e 110 | with Sys_error e -> Printf.eprintf "%s\n%!" e; exit 1 111 | in 112 | generate gen outf 113 | 114 | let main () = 115 | let usage = str "Usage: %s [OPTION]…\nOptions:" exec in 116 | let test = ref false in 117 | let outf = ref None in 118 | let options = 119 | [ "-o", Arg.String (fun s -> outf := Some s), 120 | " output file, defaults to src/cmarkit_data_uchar.ml"; 121 | "-t", Arg.Set test, 122 | "Do not generate, test data"; 123 | ] 124 | in 125 | let no_pos s = raise (Arg.Bad (str "Don't know what to do with %S" s)) in 126 | Arg.parse (Arg.align options) no_pos usage; 127 | let outf = Option.value ~default:"src/cmarkit_data_uchar.ml" !outf in 128 | process !test outf; 129 | 0 130 | 131 | let () = if !Sys.interactive then () else exit (main ()) 132 | -------------------------------------------------------------------------------- /test/expect/basic.latex: -------------------------------------------------------------------------------- 1 | \section{Basic tests} 2 | 3 | Basic tests for all CommonMark constructs. 4 | 5 | \subsection{Testing autolinks} 6 | 7 | This is an \url{http://example.org} and another one \url{mailto:you@example.org}. 8 | 9 | \subsection{Testing breaks} 10 | 11 | A line ending (not in a code span or HTML tag) that is preceded by two 12 | or more spaces and does not occur at the end of a block is parsed as a 13 | hard line break. 14 | 15 | So this means we had softbreaks so far and now we get \\ 16 | a hard break\\ 17 | and another one. 18 | 19 | \begin{quote} 20 | So this means we had softbreaks so far and now we get \\ 21 | a hard break\\ 22 | and another one. 23 | This is very soooft. 24 | \end{quote} 25 | 26 | \subsection{Testing code spans} 27 | 28 | This is a multi-line code\texttt{ code span `` it has backticks in there} 29 | 30 | Sometimes code spans \texttt{`can have really ``` strange layout}. Do you fancy \texttt{`A\_polymorphic\_variant} ? 31 | 32 | \subsection{Testing emphasis} 33 | 34 | There is \emph{more} than \emph{one syntax} for \textbf{emphasis} and \textbf{strong 35 | emphasis}. We should be careful about \textbf{embedded * marker}. This 36 | will be \textbf{tricky * to handle}. This \emph{is not ** what} you want ? 37 | 38 | \subsection{Testing links, images and link reference definitions} 39 | 40 | This is an \protect\includegraphics{/heyho} 41 | 42 | That is totally \href{/hohoho}{colla psed} and 43 | that is \href{/veryshort}{\texttt{short cuted}} 44 | 45 | Shortcuts can be better than \href{/veryshort}{full references} but not 46 | always and we'd like to trip their \href{/veryshort}{label}. 47 | 48 | \begin{quote}\end{quote} 49 | 50 | \subsection{Testing raw HTML} 51 | 52 | Haha % Raw CommonMark HTML omitted 53 | a% Raw CommonMark HTML omitted 54 | % Raw CommonMark HTML omitted 55 | hihi this is not the end yet. 56 | 57 | foo % Raw CommonMark HTML omitted 58 | u% Raw CommonMark HTML omitted 59 | 60 | 61 | \begin{quote} 62 | Haha % Raw CommonMark HTML omitted 63 | a% Raw CommonMark HTML omitted 64 | % Raw CommonMark HTML omitted 65 | hihi this is not the end yet. 66 | \end{quote} 67 | 68 | \subsection{Testing blank lines} 69 | 70 | Impressive isn't it ? 71 | 72 | \subsection{Testing block quotes} 73 | 74 | \begin{quote} 75 | \begin{quote} 76 | How is 77 | Nestyfing going on 78 | These irregularities \textbf{will} normalize 79 | We keep only the first block quote indent 80 | \end{quote} 81 | \end{quote} 82 | 83 | \begin{quote} 84 | \subsection{Further tests} 85 | \end{quote} 86 | 87 | We need a little quote here 88 | 89 | \begin{quote} 90 | It's warranted. 91 | \end{quote} 92 | 93 | \subsection{Testing code blocks} 94 | 95 | \begin{lstlisting}[language=layout] 96 | \end{lstlisting} 97 | 98 | \begin{lstlisting}[language=ocaml] 99 | 100 | type t = 101 | | A of int 102 | | B of string 103 | 104 | let square x = x *. x 105 | \end{lstlisting} 106 | 107 | The indented code block: 108 | 109 | \begin{verbatim} 110 | a b c d 111 | a b c d 112 | a b c d 113 | 114 | 115 | a 116 | a b c 117 | \end{verbatim} 118 | 119 | \begin{quote} 120 | \begin{lstlisting}[language=ocaml] 121 | 122 | type t = 123 | | A of int 124 | | B of string 125 | 126 | let square x = x *. x 127 | \end{lstlisting} 128 | \end{quote} 129 | 130 | \subsection{Testing headings} 131 | 132 | \section{aaa 133 | aaaa} 134 | 135 | \begin{quote} 136 | \subsection{bbb \texttt{hey} 137 | bbbb} 138 | \end{quote} 139 | 140 | \section{That's one way} 141 | 142 | \subsubsection{It's a long way to the heading} 143 | 144 | \subsection{Testing HTML block} 145 | 146 | % CommonMark HTML block omitted 147 | 148 | \begin{itemize} 149 | \item{} 150 | % CommonMark HTML block omitted 151 | \end{itemize} 152 | 153 | \subsection{Testing lists} 154 | 155 | The \texttt{square} function is the root. There are reasons for this: 156 | 157 | \begin{enumerate} 158 | \item{} 159 | There is no reason. There should be a reason or an \url{http://example.org} 160 | \item{} 161 | Maybe that's the reason. But it may not be the reason. 162 | \item{} 163 | Is reason the only tool ? 164 | \end{enumerate} 165 | 166 | \begin{quote} 167 | Quoted bullets 168 | 169 | \begin{itemize} 170 | \item{} 171 | Is this important ? 172 | \end{itemize} 173 | \end{quote} 174 | 175 | \begin{itemize} 176 | \item{} 177 | \begin{itemize} 178 | \item{} 179 | Well it's in the spec 180 | \end{itemize} 181 | \item{}\end{itemize} 182 | 183 | Empty list item above 184 | 185 | \subsection{Testing paragraphs} 186 | 187 | We really want your paragraph layout preserved. 188 | Really ? 189 | Really. 190 | Really. 191 | Really. 192 | 193 | \begin{quote} 194 | We really want your paragraph layout preserved. 195 | Really ? 196 | Really. 197 | Really. 198 | Really. 199 | \end{quote} 200 | 201 | \subsection{Testing thematic breaks} 202 | 203 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 204 | 205 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 206 | 207 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 208 | 209 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 210 | 211 | \begin{quote} 212 | \begin{center}\rule{0.5\linewidth}{.25pt}\end{center} 213 | \end{quote} 214 | -------------------------------------------------------------------------------- /src/tool/cmd_commonmark.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Cmarkit_std 7 | open Result.Syntax 8 | 9 | let diff src render = 10 | let env = ["GIT_CONFIG_SYSTEM=/dev/null"; "GIT_CONFIG_GLOBAL=/dev/null"; ] in 11 | let set_env = match Sys.win32 with 12 | | true -> String.concat "" (List.map (fun e -> "set " ^ e ^ " && ") env) 13 | | false -> String.concat " " env 14 | in 15 | let diff = "git diff --ws-error-highlight=all --no-index --patience " in 16 | let src_file = "src" and render_file = "render" in 17 | let cmd = String.concat " " [set_env; diff; src_file; render_file] in 18 | Result.join @@ Result.join @@ Os.with_tmp_dir @@ fun dir -> 19 | Os.with_cwd dir @@ fun () -> 20 | let* () = Os.write_file src_file src in 21 | let* () = Os.write_file render_file render in 22 | Ok (Sys.command cmd) 23 | 24 | let commonmark ~files ~strict ~no_layout ~diff:dodiff ~html_diff = 25 | let op = match html_diff, dodiff with 26 | | true, _ -> `Html_diff | false, true -> `Diff | false, false -> `Render 27 | in 28 | let layout = not no_layout in 29 | let commonmark ~file contents = 30 | let doc = Cmarkit.Doc.of_string ~file ~layout ~strict contents in 31 | Cmarkit_commonmark.of_doc doc 32 | in 33 | match op with 34 | | `Render -> 35 | let output_cmark ~file src = print_string (commonmark ~file src) in 36 | Cmarkit_cli.process_files output_cmark files 37 | | `Diff -> 38 | let trips = ref [] in 39 | let add ~file src = trips := (src, commonmark ~file src) :: !trips in 40 | let c = Cmarkit_cli.process_files add files in 41 | if c <> 0 then c else 42 | let src = String.concat "\n" (List.rev_map fst !trips) in 43 | let outs = String.concat "\n" (List.rev_map snd !trips) in 44 | (match diff src outs with 45 | | Ok exit -> if exit = 0 then 0 else Cmarkit_cli.Exit.err_diff 46 | | Error err -> Log.err "%s" err; Cmdliner.Cmd.Exit.some_error) 47 | | `Html_diff -> 48 | let htmls = ref [] in 49 | let add ~file src = 50 | let doc = Cmarkit.Doc.of_string ~file ~layout ~strict src in 51 | let doc_html = Cmarkit_html.of_doc ~safe:false doc in 52 | let md = Cmarkit_commonmark.of_doc doc in 53 | let doc' = Cmarkit.Doc.of_string ~layout ~strict md in 54 | let doc_html' = Cmarkit_html.of_doc ~safe:false doc' in 55 | htmls := (doc_html, doc_html') :: !htmls 56 | in 57 | let c = Cmarkit_cli.process_files add files in 58 | if c <> 0 then c else 59 | let html = String.concat "\n" (List.rev_map fst !htmls) in 60 | let html' = String.concat "\n" (List.rev_map snd !htmls) in 61 | match diff html html' with 62 | | Ok exit -> if exit = 0 then 0 else Cmarkit_cli.Exit.err_diff 63 | | Error err -> Log.err "%s" err; Cmdliner.Cmd.Exit.some_error 64 | 65 | (* Command line interface *) 66 | 67 | open Cmdliner 68 | open Cmdliner.Term.Syntax 69 | 70 | let diff = 71 | let doc = 72 | "Output difference between the source and its CommonMark rendering \ 73 | (needs $(b,git) in your $(b,PATH)). If there are differences check \ 74 | that the HTML renderings do not differ with option $(b,--html-diff)." 75 | in 76 | Arg.(value & flag & info ["diff"] ~doc) 77 | 78 | let html_diff = 79 | let doc = 80 | "Output difference between the source HTML rendering and the HTML \ 81 | rendering of its CommonMark rendering (needs $(b,git) in your \ 82 | $(b,PATH)). If there are no differences the CommonMark rendering \ 83 | is said to be correct." 84 | in 85 | Arg.(value & flag & info ["html-diff"] ~doc) 86 | 87 | let cmd = 88 | let doc = "Render CommonMark to CommonMark" in 89 | let exits = Cmarkit_cli.Exit.exits_with_err_diff in 90 | let man = [ 91 | `S Manpage.s_description; 92 | `P "$(cmd) outputs a CommonMark document. Multiple input files are \ 93 | concatenated and separated by a newline."; 94 | `Pre "$(cmd) $(b,README.md > README-trip.md)"; `Noblank; 95 | `Pre "$(cmd) $(b,--diff README.md)"; `Noblank; 96 | `Pre "$(cmd) $(b,--html-diff README.md)"; 97 | `P "Layout is preserved on a best-effort basis. Some things are not \ 98 | attempted like preserving entities and character references, \ 99 | preserving the exact line by line indentation layout of container \ 100 | blocks, preserving lazy continuation lines, preserving the \ 101 | identation of blank lines, keeping track of used newlines \ 102 | except for the first one."; 103 | `P "Consult the documentation of the $(b,cmarkit) OCaml library for \ 104 | more details about the limitations."; 105 | `Blocks Cmarkit_cli.common_man; ] 106 | in 107 | Cmd.make (Cmd.info "commonmark" ~doc ~exits ~man) @@ 108 | let+ files = Cmarkit_cli.files and+ strict = Cmarkit_cli.strict 109 | and+ no_layout = Cmarkit_cli.no_layout and+ diff and+ html_diff in 110 | commonmark ~files ~strict ~no_layout ~diff ~html_diff 111 | -------------------------------------------------------------------------------- /test/test_issues.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_std 7 | open B0_testing 8 | 9 | let html ?(safe = true) ~strict md = 10 | Cmarkit_html.of_doc ~safe (Cmarkit.Doc.of_string ~strict md) 11 | 12 | let commonmark ?(layout = true) ~strict md = 13 | Cmarkit_commonmark.of_doc (Cmarkit.Doc.of_string ~layout ~strict md) 14 | 15 | let correct_commonmark_render ?(layout = true) ~strict ~fnd ~exp () = 16 | let fnd_doc = Cmarkit.Doc.of_string ~layout ~strict fnd in 17 | let exp_doc = Cmarkit.Doc.of_string ~layout ~strict exp in 18 | let fnd_html = Cmarkit_html.of_doc ~safe:false fnd_doc in 19 | let exp_html = Cmarkit_html.of_doc ~safe:false exp_doc in 20 | if String.equal fnd_html exp_html then Test.pass () else 21 | begin 22 | let kind = if strict then "strict" else "extended" in 23 | Test.fail "Incorrect %s CommonMark rendering" kind; 24 | Test.log_raw 25 | "@[Source:@,%a@,Markdown render diff:@,%a\ 26 | HTML render diff:@,%a@]@?" 27 | Fmt.lines exp 28 | (Test.Diff.pp Test.T.lines ~fnd ~exp) () 29 | (Test.Diff.pp Test.T.lines ~fnd:fnd_html ~exp:exp_html) () 30 | end 31 | 32 | let checked_commonmark ?layout ~strict src = 33 | let fnd = commonmark ?layout ~strict src in 34 | correct_commonmark_render ?layout ~strict ~fnd ~exp:src (); 35 | fnd 36 | 37 | (* Tests *) 38 | 39 | let test_dot_escapes = 40 | Test.test "item maker escapes renders (#19)" @@ fun () -> 41 | let src = {|3.14 is pi|} in 42 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 43 | {|3.14 is pi|}; 44 | let src = {|pi is 3.14|} in 45 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 46 | {|pi is 3.14|}; 47 | let src = {|pi is approximately 3.|} in 48 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 49 | {|pi is approximately 3.|}; 50 | let src = {|3\. is approximately pi|} in 51 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 52 | {|3\. is approximately pi|}; 53 | let src = 54 | {|1\. 55 | Above needs an escape below does not (but we don't have the state) 56 | 1. 57 | |} 58 | in 59 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 60 | {|1\. 61 | Above needs an escape below does not (but we don't have the state) 62 | 1\. 63 | |}; 64 | () 65 | 66 | let test_hash_escapes = 67 | Test.test "hash escapes renders (#25)" @@ fun () -> 68 | let src = {|hello #world|} in 69 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 70 | {|hello #world|}; 71 | Snap.lines (checked_commonmark ~strict:false src) @@ __POS_OF__ 72 | {|hello #world|}; 73 | let src = {|## foo #\##|} in 74 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 75 | {|## foo ##\#|}; 76 | Snap.lines (checked_commonmark ~strict:false src) @@ __POS_OF__ 77 | {|## foo ##\#|}; 78 | let src = {|### foo ### |} in 79 | Snap.lines (checked_commonmark ~layout:false ~strict:true src) @@ __POS_OF__ 80 | {|### foo|}; 81 | Snap.lines (checked_commonmark ~strict:false src) @@ __POS_OF__ 82 | {|### foo ### |}; 83 | () 84 | 85 | let test_tilde_escapes = 86 | Test.test "tilde escapes renders (#20)" @@ fun () -> 87 | (* The escaping here differs in strict or non-strict mode because 88 | the AST is different *) 89 | let src = {|\~~~strike me~~|} in 90 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 91 | {|\~~~strike me\~~|}; 92 | Snap.lines (checked_commonmark ~strict:false src) @@ __POS_OF__ 93 | {|\~~~strike me~~|}; 94 | () 95 | 96 | let test_backtick_escapes = 97 | Test.test "backtick escapes renders (#26)" @@ fun () -> 98 | let src = {|```foo``|} (* This is not code *) in 99 | Snap.lines (checked_commonmark ~strict:true src) @@ __POS_OF__ 100 | {|\`\`\`foo\`\`|}; 101 | Snap.lines (checked_commonmark ~strict:false src) @@ __POS_OF__ 102 | {|\`\`\`foo\`\`|}; 103 | () 104 | 105 | let test_code_span_escape = 106 | Test.test "code span escape start (#21)" @@ fun () -> 107 | let this_is_code = 108 | {|\```the code``|} 109 | in 110 | Snap.lines (html ~strict:true this_is_code) @@ __POS_OF__ 111 | {|

`the code

112 | |}; 113 | Snap.lines (html ~strict:false this_is_code) @@ __POS_OF__ 114 | {|

`the code

115 | |}; 116 | () 117 | 118 | let test_nested_tasks = 119 | Test.test "nested tasks semantics (#24)" @@ fun () -> 120 | let tasks = (* This should be nested lists both with extensions and without *) 121 | {| 122 | - [ ] hey 123 | - [ ] ho 124 | - [ ] sub 125 | |} 126 | in 127 | Snap.lines (html ~strict:true tasks) @@ __POS_OF__ 128 | {|
    129 |
  • [ ] hey
  • 130 |
  • [ ] ho 131 |
      132 |
    • [ ] sub
    • 133 |
    134 |
  • 135 |
136 | |}; 137 | Snap.lines (html ~strict:false tasks) @@ __POS_OF__ 138 | {|
    139 |
  • hey
  • 140 |
  • ho 141 |
      142 |
    • sub
    • 143 |
    144 |
  • 145 |
146 | |}; 147 | let indentation_woes = (* Shows suboptimal identation behaviour *) 148 | {| 149 | - [ ] task 150 | 151 | description 152 | |} 153 | in 154 | Snap.lines (html ~strict:false indentation_woes) @@ __POS_OF__ 155 | {|
    156 |
  • 157 |

    task

    158 |
    description
    159 | 
    160 |
  • 161 |
162 | |}; 163 | () 164 | 165 | let test_mapper_table_bug_14 = 166 | Test.test "mapper table bug (#14)" @@ fun () -> 167 | let table = 168 | "| a | b | c |\n\ 169 | |---|---|---|\n\ 170 | | a | b | c |\n\ 171 | | | b | c |\n\ 172 | | | | c |\n" 173 | in 174 | let doc = Cmarkit.Doc.of_string ~layout:true ~strict:false table in 175 | let mdoc = Cmarkit.Mapper.map_doc (Cmarkit.Mapper.make ()) doc in 176 | let mdoc = Cmarkit_commonmark.of_doc mdoc in 177 | Snap.lines mdoc @@ __POS_OF__ 178 | "| a | b | c |\n\ 179 | |---|---|---|\n\ 180 | | a | b | c |\n\ 181 | | | b | c |\n\ 182 | | | | c |\n"; 183 | () 184 | 185 | let main () = Test.main @@ fun () -> Test.autorun () 186 | let () = if !Sys.interactive then () else exit (main ()) 187 | -------------------------------------------------------------------------------- /test/examples.ml: -------------------------------------------------------------------------------- 1 | (* This code is in the public domain *) 2 | 3 | (* index.mld *) 4 | 5 | let cmark_to_html : strict:bool -> safe:bool -> string -> string = 6 | fun ~strict ~safe md -> 7 | let doc = Cmarkit.Doc.of_string ~strict md in 8 | Cmarkit_html.of_doc ~safe doc 9 | 10 | let cmark_to_latex : strict:bool -> string -> string = 11 | fun ~strict md -> 12 | let doc = Cmarkit.Doc.of_string ~strict md in 13 | Cmarkit_latex.of_doc doc 14 | 15 | let cmark_to_commonmark : strict:bool -> string -> string = 16 | fun ~strict md -> 17 | let doc = Cmarkit.Doc.of_string ~layout:true ~strict md in 18 | Cmarkit_commonmark.of_doc doc 19 | 20 | (* Cmarkit_renderer *) 21 | 22 | type Cmarkit.Block.t += Doc of Cmarkit.Doc.t (* 1 *) 23 | 24 | let media_link c l = 25 | let has_ext s ext = String.ends_with ~suffix:ext s in 26 | let is_video s = List.exists (has_ext s) [".mp4"; ".webm"] in 27 | let is_audio s = List.exists (has_ext s) [".mp3"; ".flac"] in 28 | let defs = Cmarkit_renderer.Context.get_defs c in 29 | match Cmarkit.Inline.Link.reference_definition defs l with 30 | | Some Cmarkit.Link_definition.Def (ld, _) -> 31 | let start_tag = match Cmarkit.Link_definition.dest ld with 32 | | Some (src, _) when is_video src -> Some (" Some (" None 35 | in 36 | begin match start_tag with 37 | | None -> false (* let the default HTML renderer handle that *) 38 | | Some (start_tag, src) -> 39 | (* More could be done with the reference title and link text *) 40 | Cmarkit_renderer.Context.string c start_tag; 41 | Cmarkit_renderer.Context.string c {| src="|}; 42 | Cmarkit_html.pct_encoded_string c src; 43 | Cmarkit_renderer.Context.string c {|" />|}; 44 | true 45 | end 46 | | None | Some _ -> false (* let the default HTML renderer that *) 47 | 48 | let custom_html = 49 | let inline c = function 50 | | Cmarkit.Inline.Image (l, _) -> media_link c l 51 | | _ -> false (* let the default HTML renderer handle that *) 52 | in 53 | let block c = function 54 | | Doc d -> 55 | (* It's important to recurse via Cmarkit_renderer.Context.block *) 56 | Cmarkit_renderer.Context.block c (Cmarkit.Doc.block d); true 57 | | _ -> false (* let the default HTML renderer handle that *) 58 | in 59 | Cmarkit_renderer.make ~inline ~block () (* 2 *) 60 | 61 | let custom_html_of_doc ~safe doc = 62 | let default = Cmarkit_html.renderer ~safe () in 63 | let r = Cmarkit_renderer.compose default custom_html in (* 3 *) 64 | Cmarkit_renderer.doc_to_string r doc 65 | 66 | (* Cmarkit.Link_reference *) 67 | 68 | let wikilink = Cmarkit.Meta.key () (* A meta key to recognize them *) 69 | 70 | let make_wikilink label = (* Just a placeholder label definition *) 71 | let meta = Cmarkit.Meta.tag wikilink (Cmarkit.Label.meta label) in 72 | Cmarkit.Label.with_meta meta label 73 | 74 | let with_wikilinks = function 75 | | `Def _ as ctx -> Cmarkit.Label.default_resolver ctx 76 | | `Ref (_, _, (Some _ as def)) -> def (* As per doc definition *) 77 | | `Ref (_, ref, None) -> Some (make_wikilink ref) 78 | 79 | (* Cmarkit.Mapper *) 80 | 81 | let set_unknown_code_block_lang ~lang doc = 82 | let open Cmarkit in 83 | let default = lang, Meta.none in 84 | let block m = function 85 | | Block.Code_block (cb, meta) 86 | when Option.is_none (Block.Code_block.info_string cb) -> 87 | let layout = Block.Code_block.layout cb in 88 | let code = Block.Code_block.code cb in 89 | let cb = Block.Code_block.make ~layout ~info_string:default code in 90 | Mapper.ret (Block.Code_block (cb, meta)) 91 | | _ -> 92 | Mapper.default (* let the mapper thread the map *) 93 | in 94 | let mapper = Mapper.make ~block () in 95 | Mapper.map_doc mapper doc 96 | 97 | (* Cmarkit.Folder *) 98 | 99 | let code_block_langs doc = 100 | let open Cmarkit in 101 | let module String_set = Set.Make (String) in 102 | let block m acc = function 103 | | Block.Code_block (cb, _) -> 104 | let acc = match Block.Code_block.info_string cb with 105 | | None -> acc 106 | | Some (info, _) -> 107 | match Block.Code_block.language_of_info_string info with 108 | | None -> acc 109 | | Some (lang, _) -> String_set.add lang acc 110 | in 111 | Folder.ret acc 112 | | _ -> 113 | Folder.default (* let the folder thread the fold *) 114 | in 115 | let folder = Folder.make ~block () in 116 | let langs = Folder.fold_doc folder String_set.empty doc in 117 | String_set.elements langs 118 | 119 | (* Cmarkit_html *) 120 | 121 | let html_doc_of_md ?(lang = "en") ~title ~safe md = 122 | let doc = Cmarkit.Doc.of_string md in 123 | let r = Cmarkit_html.renderer ~safe () in 124 | let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in 125 | let buffer_add_title = Cmarkit_html.buffer_add_html_escaped_string in 126 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 127 | {| 128 | 129 | 130 | 131 | %a 132 | 133 | 134 | %a 135 | |} 136 | lang buffer_add_title title buffer_add_doc doc 137 | 138 | (* Cmarkit_latex *) 139 | 140 | let latex_doc_of_md ?(title = "") md = 141 | let doc = Cmarkit.Doc.of_string md in 142 | let r = Cmarkit_latex.renderer () in 143 | let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in 144 | let buffer_add_title = Cmarkit_latex.buffer_add_latex_escaped_string in 145 | let maketitle = if title = "" then "" else {|\maketitle|} in 146 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 147 | {|\documentclass{article} 148 | 149 | \usepackage{graphicx} 150 | \usepackage{enumitem} 151 | \usepackage{listings} 152 | \usepackage{hyperref} 153 | \usepackage[normalem]{ulem} 154 | \usepackage[scaled=0.8]{beramono} 155 | \usepackage{fontspec} 156 | 157 | \lstset{ 158 | columns=[c]fixed, 159 | basicstyle=\small\ttfamily, 160 | keywordstyle=\bfseries, 161 | upquote=true, 162 | commentstyle=\slshape, 163 | breaklines=true, 164 | showstringspaces=false} 165 | 166 | \lstdefinelanguage{ocaml}{language=[objective]caml, 167 | literate={'"'}{\textquotesingle "\textquotesingle}3 168 | {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, 169 | } 170 | 171 | \title{%a} 172 | \begin{document} 173 | %s 174 | %a 175 | \end{document}|} buffer_add_title title maketitle buffer_add_doc doc 176 | -------------------------------------------------------------------------------- /B0.ml: -------------------------------------------------------------------------------- 1 | open B0_kit.V000 2 | open Result.Syntax 3 | 4 | let commonmark_version = 5 | (* If you update this, also update Cmarkit.commonmark_version 6 | and the links in src/*.mli *) 7 | "0.31.2" 8 | 9 | (* OCaml library names *) 10 | 11 | let cmarkit = B0_ocaml.libname "cmarkit" 12 | let cmdliner = B0_ocaml.libname "cmdliner" 13 | let uucp = B0_ocaml.libname "uucp" 14 | let unix = B0_ocaml.libname "unix" 15 | 16 | let b0_std = B0_ocaml.libname "b0.std" 17 | let b0_file = B0_ocaml.libname "b0.file" 18 | 19 | (* Libraries *) 20 | 21 | let cmarkit_lib = 22 | let srcs = [ `Dir ~/"src" ] in 23 | B0_ocaml.lib cmarkit ~name:"cmarkit-lib" ~doc:"The cmarkit library" ~srcs 24 | 25 | (* Tools *) 26 | 27 | let cmarkit_tool = 28 | let srcs = [ `Dir ~/"src/tool" ] in 29 | let requires = [cmarkit; cmdliner] in 30 | B0_ocaml.exe "cmarkit" ~public:true ~doc:"The cmarkit tool" ~srcs ~requires 31 | 32 | (* Unicode data support *) 33 | 34 | let generate_data = 35 | let doc = "Generate cmarkit Unicode data" in 36 | let srcs = [ `File ~/"support/generate_data.ml" ] in 37 | let requires = [uucp; unix] in 38 | let meta = B0_meta.(empty |> tag build |> ~~ B0_unit.Action.cwd `Scope_dir) in 39 | B0_ocaml.exe "generate-data" ~doc ~srcs ~meta ~requires 40 | 41 | (* Tests *) 42 | 43 | let test ?(requires = []) = 44 | B0_ocaml.test ~requires:(cmdliner :: b0_std :: cmarkit :: requires) 45 | 46 | let update_spec_tests = 47 | let doc = "Update the CommonMark spec tests" in 48 | B0_unit.of_action "update_spec_tests" ~doc @@ 49 | fun env _ ~args:_ -> 50 | let tests = 51 | Fmt.str "https://spec.commonmark.org/%s/spec.json" commonmark_version 52 | in 53 | let dst = B0_env.in_scope_dir env ~/"test/spec.json" in 54 | let force = true and make_path = false in 55 | B0_action_kit.download_url env ~force ~make_path tests ~dst 56 | 57 | let spec_srcs = [`File ~/"test/spec.mli"; `File ~/"test/spec.ml"] 58 | 59 | let test_spec = 60 | let doc = "Test CommonMark specification conformance tests" in 61 | test ~/"test/test_spec.ml" ~doc ~srcs:spec_srcs 62 | 63 | let test_commonmark_render = 64 | let doc = "Test CommonMark renderer (notably on conformance tests)" in 65 | test ~/"test/test_commonmark_render.ml" ~doc ~srcs:spec_srcs 66 | 67 | let test_bugs = 68 | let doc = "Tests for reported issues" in 69 | let requires = [cmdliner; b0_std; cmarkit] in 70 | B0_ocaml.test ~/"test/test_issues.ml" ~doc ~requires 71 | 72 | let test_pathological = 73 | let doc = "Test a CommonMark parser on pathological tests" in 74 | test ~/"test/test_pathological.ml" ~doc ~requires:[unix] ~run:false 75 | 76 | let test_cmarkit_pathological = 77 | let doc = "Run pathological tests on the cmarkit tool" in 78 | let units = [cmarkit_tool; test_pathological] in 79 | let do_run = false (* switch to true when we pass *) in 80 | let meta = B0_meta.(empty |> tag test |> ~~ run do_run) in 81 | B0_unit.of_action' "test_cmarkit_pathological" ~doc ~units ~meta @@ 82 | fun env _ ~args -> 83 | let* cmarkit = B0_env.unit_exe_file env cmarkit_tool in 84 | let* test_pathological = B0_env.unit_exe_file_cmd env test_pathological in 85 | let cmd = Cmd.(test_pathological %% args % "--" %% path cmarkit % "html") in 86 | match Os.Cmd.run_status cmd with 87 | | Ok (`Exited c) -> Ok (Os.Exit.Code c) 88 | | Ok (`Signaled _ as st) -> Fmt.error "%a" Os.Cmd.pp_cmd_status (cmd, st) 89 | | Error _ as e -> e 90 | 91 | let examples = 92 | let doc = "Doc sample code" in 93 | B0_ocaml.test ~/"test/examples.ml" ~doc ~run:false ~requires:[cmarkit] 94 | 95 | let bench = 96 | let doc = "Simple standard CommonMark to HTML renderer for benchmarking" in 97 | let srcs = [ `File ~/"test/bench.ml" ] in 98 | let requires = [cmarkit] in 99 | let meta = B0_meta.(empty |> tag bench) in 100 | B0_ocaml.exe "bench" ~doc ~meta ~srcs ~requires 101 | 102 | (* Expectation tests 103 | 104 | FIXME eventually get rid of B0_expect. We need to meld it 105 | into B0_testing. *) 106 | 107 | let expect_cmarkit_renders ctx = 108 | let cmarkit = B0_expect.get_unit_exe_file_cmd ctx cmarkit_tool in 109 | let renderers = (* command, output suffix *) 110 | [ Cmd.(arg "html" % "-c" % "--unsafe"), ".html"; 111 | Cmd.(arg "latex"), ".latex"; 112 | Cmd.(arg "commonmark"), ".trip.md"; 113 | Cmd.(arg "locs"), ".locs"; 114 | Cmd.(arg "locs" % "--no-layout"), ".nolayout.locs"; ] 115 | in 116 | let test_renderer ctx cmarkit file (cmd, ext) = 117 | let with_exts = Fpath.has_ext ".exts.md" file in 118 | let cmd = Cmd.(cmd %% if' with_exts (arg "--exts") %% path file) in 119 | let cwd = B0_expect.base ctx and stdout = Fpath.(file -+ ext) in 120 | B0_expect.stdout ctx ~cwd ~stdout Cmd.(cmarkit %% cmd) 121 | in 122 | let test_file ctx cmarkit file = 123 | List.iter (test_renderer ctx cmarkit file) renderers 124 | in 125 | let test_files = 126 | let base_files = B0_expect.base_files ctx ~rel:true ~recurse:false in 127 | let input f = Fpath.has_ext ".md" f && not (Fpath.has_ext ".trip.md" f) in 128 | List.filter input base_files 129 | in 130 | List.iter (test_file ctx cmarkit) test_files 131 | 132 | let expect = 133 | let doc = "Test expectations" in 134 | let meta = B0_meta.(empty |> tag test |> tag run) in 135 | let units = [cmarkit_tool] in 136 | B0_unit.of_action' "expect" ~meta ~units ~doc @@ 137 | B0_expect.action_func ~base:(Fpath.v "test/expect") @@ fun ctx -> 138 | expect_cmarkit_renders ctx; 139 | () 140 | 141 | (* Packs *) 142 | 143 | let default = 144 | let meta = 145 | B0_meta.empty 146 | |> ~~ B0_meta.authors ["The cmarkit programmers"] 147 | |> ~~ B0_meta.maintainers ["Daniel Bünzli "] 148 | |> ~~ B0_meta.homepage "https://erratique.ch/software/cmarkit" 149 | |> ~~ B0_meta.online_doc "https://erratique.ch/software/cmarkit/doc" 150 | |> ~~ B0_meta.licenses ["ISC"] 151 | |> ~~ B0_meta.repo "git+https://erratique.ch/repos/cmarkit.git" 152 | |> ~~ B0_meta.issues "https://github.com/dbuenzli/cmarkit/issues" 153 | |> ~~ B0_meta.description_tags 154 | ["codec"; "commonmark"; "markdown"; "org:erratique"; ] 155 | |> B0_meta.tag B0_opam.tag 156 | |> ~~ B0_opam.depopts ["cmdliner", ""] 157 | |> ~~ B0_opam.conflicts [ "cmdliner", {|< "2.0.0"|}] 158 | |> ~~ B0_opam.depends 159 | [ "ocaml", {|>= "4.14.0"|}; 160 | "ocamlfind", {|build|}; 161 | "ocamlbuild", {|build|}; 162 | "topkg", {|build & >= "1.1.0"|}; 163 | "uucp", {|dev|}; 164 | "b0", {|dev & with-test|}; 165 | ] 166 | |> ~~ B0_opam.build 167 | {|[["ocaml" "pkg/pkg.ml" "build" "--dev-pkg" "%{dev}%" 168 | "--with-cmdliner" "%{cmdliner:installed}%"] 169 | ["cmdliner" "install" "tool-support" 170 | "--update-opam-install=%{_:name}%.install" 171 | "_build/src/tool/cmarkit_main.native:cmarkit" {ocaml:native} 172 | "_build/src/tool/cmarkit_main.byte:cmarkit" {!ocaml:native} 173 | "_build/cmdliner-install"] {cmdliner:installed} ]|} 174 | in 175 | B0_pack.make "default" ~doc:"cmarkit package" ~meta ~locked:true @@ 176 | B0_unit.list () 177 | -------------------------------------------------------------------------------- /test/expect/bugs.exts.nolayout.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bugs.exts.md", lines 1-32 3 | Heading, level 1: 4 | File "bugs.exts.md", lines 1-2, characters 0-4 5 | Text: 6 | File "bugs.exts.md", line 1, characters 0-4 7 | Setext underline: 8 | File "bugs.exts.md", line 2, characters 0-4 9 | Blank line: 10 | File "bugs.exts.md", line 3 11 | Paragraph: 12 | File "bugs.exts.md", lines 4-5, characters 0-8 13 | Inlines: 14 | File "bugs.exts.md", lines 4-5, characters 0-8 15 | Text: 16 | File "bugs.exts.md", line 4, characters 0-64 17 | Soft break: 18 | File "bugs.exts.md", lines 4-5, characters 64-0 19 | Layout before: 20 | File "-" 21 | Layout after: 22 | File "-" 23 | Text: 24 | File "bugs.exts.md", line 5, characters 0-8 25 | Blank line: 26 | File "bugs.exts.md", line 6 27 | Code block: 28 | File "bugs.exts.md", lines 7-11, characters 0-3 29 | Opening fence: 30 | File "-" 31 | Code line: 32 | File "bugs.exts.md", line 8, characters 0-10 33 | Code line: 34 | File "bugs.exts.md", line 9 35 | Code line: 36 | File "bugs.exts.md", line 10, characters 0-25 37 | Closing fence: 38 | File "-" 39 | Blank line: 40 | File "bugs.exts.md", line 12 41 | Heading, level 1: 42 | File "bugs.exts.md", line 13, characters 0-9 43 | Text: 44 | File "bugs.exts.md", line 13, characters 2-9 45 | Blank line: 46 | File "bugs.exts.md", line 14 47 | Paragraph: 48 | File "bugs.exts.md", lines 15-16, characters 0-12 49 | Inlines: 50 | File "bugs.exts.md", lines 15-16, characters 0-12 51 | Text: 52 | File "bugs.exts.md", line 15, characters 0-68 53 | Soft break: 54 | File "bugs.exts.md", lines 15-16, characters 68-0 55 | Layout before: 56 | File "-" 57 | Layout after: 58 | File "-" 59 | Text: 60 | File "bugs.exts.md", line 16, characters 0-12 61 | Blank line: 62 | File "bugs.exts.md", line 17 63 | Table: 64 | File "bugs.exts.md", lines 18-25, characters 0-27 65 | Header row: 66 | File "bugs.exts.md", line 18, characters 0-27 67 | Text: 68 | File "bugs.exts.md", line 18, characters 3-6 69 | Separator line: 70 | File "bugs.exts.md", line 19, characters 0-27 71 | Separator: 72 | File "bugs.exts.md", line 19, characters 1-26 73 | Data row: 74 | File "bugs.exts.md", line 20, characters 0-27 75 | Inlines: 76 | File "bugs.exts.md", line 20, characters 2-12 77 | Code span: 78 | File "bugs.exts.md", line 20, characters 2-5 79 | Code span line: 80 | File "bugs.exts.md", line 20, characters 3-4 81 | Text: 82 | File "bugs.exts.md", line 20, characters 5-9 83 | Code span: 84 | File "bugs.exts.md", line 20, characters 9-12 85 | Code span line: 86 | File "bugs.exts.md", line 20, characters 10-11 87 | Data row: 88 | File "bugs.exts.md", line 21, characters 0-27 89 | Inlines: 90 | File "bugs.exts.md", line 21, characters 2-25 91 | Text: 92 | File "bugs.exts.md", line 21, characters 2-9 93 | Code span: 94 | File "bugs.exts.md", line 21, characters 9-12 95 | Code span line: 96 | File "bugs.exts.md", line 21, characters 10-11 97 | Text: 98 | File "bugs.exts.md", line 21, characters 12-16 99 | Code span: 100 | File "bugs.exts.md", line 21, characters 16-19 101 | Code span line: 102 | File "bugs.exts.md", line 21, characters 17-18 103 | Text: 104 | File "bugs.exts.md", line 21, characters 19-25 105 | Data row: 106 | File "bugs.exts.md", line 22, characters 0-27 107 | Inlines: 108 | File "bugs.exts.md", line 22, characters 2-24 109 | Text: 110 | File "bugs.exts.md", line 22, characters 2-9 111 | Code span: 112 | File "bugs.exts.md", line 22, characters 9-12 113 | Code span line: 114 | File "bugs.exts.md", line 22, characters 10-11 115 | Text: 116 | File "bugs.exts.md", line 22, characters 12-16 117 | Code span: 118 | File "bugs.exts.md", line 22, characters 16-19 119 | Code span line: 120 | File "bugs.exts.md", line 22, characters 17-18 121 | Text: 122 | File "bugs.exts.md", line 22, characters 19-24 123 | Data row: 124 | File "bugs.exts.md", line 23, characters 0-27 125 | Inlines: 126 | File "bugs.exts.md", line 23, characters 2-21 127 | Text: 128 | File "bugs.exts.md", line 23, characters 2-8 129 | Code span: 130 | File "bugs.exts.md", line 23, characters 8-11 131 | Code span line: 132 | File "bugs.exts.md", line 23, characters 9-10 133 | Text: 134 | File "bugs.exts.md", line 23, characters 11-13 135 | Code span: 136 | File "bugs.exts.md", line 23, characters 13-16 137 | Code span line: 138 | File "bugs.exts.md", line 23, characters 14-15 139 | Text: 140 | File "bugs.exts.md", line 23, characters 16-21 141 | Data row: 142 | File "bugs.exts.md", line 24, characters 0-27 143 | Inlines: 144 | File "bugs.exts.md", line 24, characters 2-8 145 | Emphasis: 146 | File "bugs.exts.md", line 24, characters 2-5 147 | Text: 148 | File "bugs.exts.md", line 24, characters 3-4 149 | Code span: 150 | File "bugs.exts.md", line 24, characters 5-8 151 | Code span line: 152 | File "bugs.exts.md", line 24, characters 6-7 153 | Data row: 154 | File "bugs.exts.md", line 25, characters 0-27 155 | Inlines: 156 | File "bugs.exts.md", line 25, characters 2-12 157 | Raw HTML: 158 | File "bugs.exts.md", line 25, characters 2-5 159 | Raw HTML line: 160 | File "bugs.exts.md", line 25, characters 2-5 161 | Text: 162 | File "bugs.exts.md", line 25, characters 5-8 163 | Raw HTML: 164 | File "bugs.exts.md", line 25, characters 8-12 165 | Raw HTML line: 166 | File "bugs.exts.md", line 25, characters 8-12 167 | Blank line: 168 | File "bugs.exts.md", line 26 169 | Heading, level 1: 170 | File "bugs.exts.md", line 27, characters 0-9 171 | Text: 172 | File "bugs.exts.md", line 27, characters 2-9 173 | Blank line: 174 | File "bugs.exts.md", line 28 175 | Paragraph: 176 | File "bugs.exts.md", line 29, characters 0-44 177 | Text: 178 | File "bugs.exts.md", line 29, characters 0-44 179 | Blank line: 180 | File "bugs.exts.md", line 30 181 | List (tight:true): 182 | File "bugs.exts.md", line 31, characters 0-29 183 | List item: 184 | File "bugs.exts.md", line 31, characters 0-29 185 | List marker: 186 | File "bugs.exts.md", line 31, characters 0-1 187 | Task marker: 188 | File "bugs.exts.md", line 31, characters 2-5 189 | Paragraph: 190 | File "bugs.exts.md", line 31, characters 6-29 191 | Text: 192 | File "bugs.exts.md", line 31, characters 6-29 193 | Blank line: 194 | File "bugs.exts.md", line 32 -------------------------------------------------------------------------------- /test/expect/bugs.exts.locs: -------------------------------------------------------------------------------- 1 | Blocks: 2 | File "bugs.exts.md", lines 1-32 3 | Heading, level 1: 4 | File "bugs.exts.md", lines 1-2, characters 0-4 5 | Text: 6 | File "bugs.exts.md", line 1, characters 0-4 7 | Setext underline: 8 | File "bugs.exts.md", line 2, characters 0-4 9 | Blank line: 10 | File "bugs.exts.md", line 3 11 | Paragraph: 12 | File "bugs.exts.md", lines 4-5, characters 0-8 13 | Inlines: 14 | File "bugs.exts.md", lines 4-5, characters 0-8 15 | Text: 16 | File "bugs.exts.md", line 4, characters 0-64 17 | Soft break: 18 | File "bugs.exts.md", lines 4-5, characters 64-0 19 | Layout before: 20 | File "bugs.exts.md", line 4, characters 64-65 21 | Layout after: 22 | File "bugs.exts.md", line 5 23 | Text: 24 | File "bugs.exts.md", line 5, characters 0-8 25 | Blank line: 26 | File "bugs.exts.md", line 6 27 | Code block: 28 | File "bugs.exts.md", lines 7-11, characters 0-3 29 | Opening fence: 30 | File "bugs.exts.md", line 7, characters 0-3 31 | Code line: 32 | File "bugs.exts.md", line 8, characters 0-10 33 | Code line: 34 | File "bugs.exts.md", line 9 35 | Code line: 36 | File "bugs.exts.md", line 10, characters 0-25 37 | Closing fence: 38 | File "bugs.exts.md", line 11, characters 0-3 39 | Blank line: 40 | File "bugs.exts.md", line 12 41 | Heading, level 1: 42 | File "bugs.exts.md", line 13, characters 0-9 43 | Text: 44 | File "bugs.exts.md", line 13, characters 2-9 45 | Blank line: 46 | File "bugs.exts.md", line 14 47 | Paragraph: 48 | File "bugs.exts.md", lines 15-16, characters 0-12 49 | Inlines: 50 | File "bugs.exts.md", lines 15-16, characters 0-12 51 | Text: 52 | File "bugs.exts.md", line 15, characters 0-68 53 | Soft break: 54 | File "bugs.exts.md", lines 15-16, characters 68-0 55 | Layout before: 56 | File "bugs.exts.md", line 15, characters 68-68 57 | Layout after: 58 | File "bugs.exts.md", line 16 59 | Text: 60 | File "bugs.exts.md", line 16, characters 0-12 61 | Blank line: 62 | File "bugs.exts.md", line 17 63 | Table: 64 | File "bugs.exts.md", lines 18-25, characters 0-27 65 | Header row: 66 | File "bugs.exts.md", line 18, characters 0-27 67 | Text: 68 | File "bugs.exts.md", line 18, characters 3-6 69 | Separator line: 70 | File "bugs.exts.md", line 19, characters 0-27 71 | Separator: 72 | File "bugs.exts.md", line 19, characters 1-26 73 | Data row: 74 | File "bugs.exts.md", line 20, characters 0-27 75 | Inlines: 76 | File "bugs.exts.md", line 20, characters 2-12 77 | Code span: 78 | File "bugs.exts.md", line 20, characters 2-5 79 | Code span line: 80 | File "bugs.exts.md", line 20, characters 3-4 81 | Text: 82 | File "bugs.exts.md", line 20, characters 5-9 83 | Code span: 84 | File "bugs.exts.md", line 20, characters 9-12 85 | Code span line: 86 | File "bugs.exts.md", line 20, characters 10-11 87 | Data row: 88 | File "bugs.exts.md", line 21, characters 0-27 89 | Inlines: 90 | File "bugs.exts.md", line 21, characters 2-25 91 | Text: 92 | File "bugs.exts.md", line 21, characters 2-9 93 | Code span: 94 | File "bugs.exts.md", line 21, characters 9-12 95 | Code span line: 96 | File "bugs.exts.md", line 21, characters 10-11 97 | Text: 98 | File "bugs.exts.md", line 21, characters 12-16 99 | Code span: 100 | File "bugs.exts.md", line 21, characters 16-19 101 | Code span line: 102 | File "bugs.exts.md", line 21, characters 17-18 103 | Text: 104 | File "bugs.exts.md", line 21, characters 19-25 105 | Data row: 106 | File "bugs.exts.md", line 22, characters 0-27 107 | Inlines: 108 | File "bugs.exts.md", line 22, characters 2-24 109 | Text: 110 | File "bugs.exts.md", line 22, characters 2-9 111 | Code span: 112 | File "bugs.exts.md", line 22, characters 9-12 113 | Code span line: 114 | File "bugs.exts.md", line 22, characters 10-11 115 | Text: 116 | File "bugs.exts.md", line 22, characters 12-16 117 | Code span: 118 | File "bugs.exts.md", line 22, characters 16-19 119 | Code span line: 120 | File "bugs.exts.md", line 22, characters 17-18 121 | Text: 122 | File "bugs.exts.md", line 22, characters 19-24 123 | Data row: 124 | File "bugs.exts.md", line 23, characters 0-27 125 | Inlines: 126 | File "bugs.exts.md", line 23, characters 2-21 127 | Text: 128 | File "bugs.exts.md", line 23, characters 2-8 129 | Code span: 130 | File "bugs.exts.md", line 23, characters 8-11 131 | Code span line: 132 | File "bugs.exts.md", line 23, characters 9-10 133 | Text: 134 | File "bugs.exts.md", line 23, characters 11-13 135 | Code span: 136 | File "bugs.exts.md", line 23, characters 13-16 137 | Code span line: 138 | File "bugs.exts.md", line 23, characters 14-15 139 | Text: 140 | File "bugs.exts.md", line 23, characters 16-21 141 | Data row: 142 | File "bugs.exts.md", line 24, characters 0-27 143 | Inlines: 144 | File "bugs.exts.md", line 24, characters 2-8 145 | Emphasis: 146 | File "bugs.exts.md", line 24, characters 2-5 147 | Text: 148 | File "bugs.exts.md", line 24, characters 3-4 149 | Code span: 150 | File "bugs.exts.md", line 24, characters 5-8 151 | Code span line: 152 | File "bugs.exts.md", line 24, characters 6-7 153 | Data row: 154 | File "bugs.exts.md", line 25, characters 0-27 155 | Inlines: 156 | File "bugs.exts.md", line 25, characters 2-12 157 | Raw HTML: 158 | File "bugs.exts.md", line 25, characters 2-5 159 | Raw HTML line: 160 | File "bugs.exts.md", line 25, characters 2-5 161 | Text: 162 | File "bugs.exts.md", line 25, characters 5-8 163 | Raw HTML: 164 | File "bugs.exts.md", line 25, characters 8-12 165 | Raw HTML line: 166 | File "bugs.exts.md", line 25, characters 8-12 167 | Blank line: 168 | File "bugs.exts.md", line 26 169 | Heading, level 1: 170 | File "bugs.exts.md", line 27, characters 0-9 171 | Text: 172 | File "bugs.exts.md", line 27, characters 2-9 173 | Blank line: 174 | File "bugs.exts.md", line 28 175 | Paragraph: 176 | File "bugs.exts.md", line 29, characters 0-44 177 | Text: 178 | File "bugs.exts.md", line 29, characters 0-44 179 | Blank line: 180 | File "bugs.exts.md", line 30 181 | List (tight:true): 182 | File "bugs.exts.md", line 31, characters 0-29 183 | List item: 184 | File "bugs.exts.md", line 31, characters 0-29 185 | List marker: 186 | File "bugs.exts.md", line 31, characters 0-1 187 | Task marker: 188 | File "bugs.exts.md", line 31, characters 2-5 189 | Paragraph: 190 | File "bugs.exts.md", line 31, characters 6-29 191 | Text: 192 | File "bugs.exts.md", line 31, characters 6-29 193 | Blank line: 194 | File "bugs.exts.md", line 32 -------------------------------------------------------------------------------- /src/cmarkit_html.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Rendering CommonMark to HTML. 7 | 8 | Generates HTML fragments, consult the 9 | {{!integration}integration notes} for requirements on the webpage. 10 | 11 | See {{!page-index.quick}a quick example} and 12 | {{!page_frame}another one}. 13 | 14 | {b Warning.} Rendering outputs are unstable, they may be tweaked 15 | even between minor versions of the library. *) 16 | 17 | (** {1:rendering Rendering} *) 18 | 19 | val of_doc : ?backend_blocks:bool -> safe:bool -> Cmarkit.Doc.t -> string 20 | (** [of_doc ~safe d] is an HTML fragment for [d]. See {!renderer} 21 | for more details and documentation about rendering options. *) 22 | 23 | (** {1:renderers Renderers} *) 24 | 25 | val renderer : ?backend_blocks:bool -> safe:bool -> unit -> Cmarkit_renderer.t 26 | (** [renderer ~safe ()] is the default HTML renderer. This renders the 27 | strict CommonMark abstract syntax tree and the supported Cmarkit 28 | {{!Cmarkit.extensions}extensions}. 29 | 30 | The inline, block and document renderers always return 31 | [true]. Unknown block and inline values are rendered by an HTML 32 | comment. 33 | 34 | The following options are available: 35 | 36 | {ul 37 | {- [safe], if [true] {{!Cmarkit.Block.extension-Html_block}HTML blocks} and 38 | {{!Cmarkit.Inline.extension-Raw_html}raw HTML inlines} are discarded and 39 | replaced by an HTML comment in the output. Besides the URLs of 40 | autolinks, links and images that satisfy 41 | {!Cmarkit.Inline.Link.is_unsafe} are replaced by the empty string. 42 | 43 | Using safe renderings is a good first step at preventing 44 | {{:https://en.wikipedia.org/wiki/Cross-site_scripting}XSS} from 45 | untrusted user inputs but you should rather post-process rendering 46 | outputs with a dedicated HTML sanitizer.} 47 | {- [backend_blocks], if [true], code blocks with language [=html] 48 | are written verbatim in the output (iff [safe] is [true]) and 49 | any other code block whose langage starts with [=] is 50 | dropped. Defaults to [false].}} 51 | 52 | See {{!Cmarkit_renderer.example}this example} to extend or 53 | selectively override the renderer. *) 54 | 55 | val xhtml_renderer : 56 | ?backend_blocks:bool -> safe:bool -> unit -> Cmarkit_renderer.t 57 | (** [xhtml_renderer] is like {!val-renderer} but explicitely closes 58 | empty tags to possibly make the output valid XML. Note that it 59 | still renders HTML blocks and inline raw HTML unless {!safe} is 60 | [true] (which also suppresses some URLs). 61 | 62 | See {{!Cmarkit_renderer.example}this example} to extend or 63 | selectively override the renderer. *) 64 | 65 | (** {1:render Render functions} 66 | 67 | Only useful if you extend the renderer. *) 68 | 69 | val safe : Cmarkit_renderer.context -> bool 70 | (** [safe c] is [true] if a safe rendering is requested. 71 | See {!renderer} for more information. *) 72 | 73 | val html_escaped_uchar : Cmarkit_renderer.context -> Uchar.t -> unit 74 | (** [html_escaped_uchar c u] renders the UTF-8 encoding of [u] on [c] 75 | with HTML markup delimiters [<] [>] [&] and ["] escaped 76 | to HTML entities (Single quotes ['] are not escaped use ["] to delimit your 77 | attributes). This also renders U+0000 to {!Uchar.rep}. *) 78 | 79 | val buffer_add_html_escaped_uchar : Buffer.t -> Uchar.t -> unit 80 | (** [buffer_add_html_escaped_uchar] is {!html_escaped_uchar} but appends 81 | to a buffer value. *) 82 | 83 | val html_escaped_string : Cmarkit_renderer.context -> string -> unit 84 | (** [html_escaped_string c s] renders string [s] on [c] with HTML 85 | markup delimiters [<], [>], [&], and ["] escaped to HTML 86 | entities (Single quotes ['] are not escaped, use ["] to delimit your 87 | attributes). *) 88 | 89 | val buffer_add_html_escaped_string : Buffer.t -> string -> unit 90 | (** [buffer_add_html_escaped_string] is {!html_escaped_string} but appends 91 | to a buffer value. *) 92 | 93 | val pct_encoded_string : Cmarkit_renderer.context -> string -> unit 94 | (** [pct_encoded_string c s] renders string [s] on [c] with everything 95 | percent encoded except [%] and the 96 | {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.3} 97 | [unreserved]}, 98 | {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2} 99 | [sub-delims]} 100 | and the {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2} 101 | [gen-delims]} 102 | URI characters except brackets [\[] and [\]] (to match the [cmark] tool). 103 | 104 | In other words only characters [%] [a-z] [A-Z] [0-9] [-] [.] [_] [~] [!] 105 | [$] [&] ['] [(] [)] [*] [+] [,] [;] [=] [:] [/] [?] [#] [@] 106 | are not percent-encoded. 107 | 108 | {b Warning.} The function also replaces both [&] and ['] by their 109 | corresponding HTML entities, so you can't use this in a context 110 | that doesn't allow entities. Besides this assumes [s] may already 111 | have percent encoded bits so it doesn't percent encode [%], as such you 112 | can't use this as a general percent encode function. *) 113 | 114 | val buffer_add_pct_encoded_string : Buffer.t -> string -> unit 115 | (** [buffer_add_pct_encoded_string b s] is {!pct_encoded_string} but 116 | appends to a buffer value. *) 117 | 118 | (** {1:integration HTML integration notes} 119 | 120 | {2:code_blocks Code blocks} 121 | 122 | If a language [lang] can be extracted from the info string of a 123 | code block with 124 | {!Cmarkit.Block.Code_block.language_of_info_string}, a 125 | [language-lang] class is added to the corresponding [code] 126 | element. If you want to highlight the syntax, adding 127 | {{:https://highlightjs.org/}highlight.js} to your page is an 128 | option. 129 | 130 | {2:ids Heading identifiers} 131 | 132 | Headings identifiers and anchors are added to the output whenever 133 | {!Cmarkit.Block.Heading.val-id} holds a value. If the identifier 134 | already exists it is made unique by appending ["-"] and the first 135 | number starting from 1 that makes it unique. 136 | 137 | {2:math Maths} 138 | 139 | If your document has {!Cmarkit.Inline.extension-Ext_math_span} 140 | inlines or {!Cmarkit.Block.extension-Ext_math_block} blocks, the 141 | default renderer outputs them in [\(], [\)] and 142 | [\\[], [\\]] delimiters. You should add 143 | {{:https://katex.org/}K{^A}T{_E}X} or 144 | {{:https://www.mathjax.org/}MathJax} in your page to let these 145 | bits be rendered by the typography they deserve. 146 | 147 | {2:page_frame Page frame} 148 | 149 | The default renderers only generate HTML fragments. You may 150 | want to add a page frame. For example: 151 | {[ 152 | let html_doc_of_md ?(lang = "en") ~title ~safe md = 153 | let doc = Cmarkit.Doc.of_string md in 154 | let r = Cmarkit_html.renderer ~safe () in 155 | let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in 156 | let buffer_add_title = Cmarkit_html.buffer_add_html_escaped_string in 157 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 158 | {| 159 | 160 | 161 | 162 | %a 163 | 164 | 165 | %a 166 | |} 167 | lang buffer_add_title title buffer_add_doc doc 168 | ]} 169 | *) 170 | -------------------------------------------------------------------------------- /test/expect/bug-18.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Bug-18 7 | 206 | 207 | 208 |

Issue #18

209 |

When a list marker is followed by end of file, we crash.

210 |
    211 |
  • Item 1
  • 212 |
  • Item 2
  • 213 |
  • 214 |
215 | 216 | -------------------------------------------------------------------------------- /test/expect/basic.exts.latex: -------------------------------------------------------------------------------- 1 | \section{Extensions} 2 | 3 | \subsection{Footnotes} 4 | 5 | This is a footnote in history\footnote{\label{fn-1} 6 | And it can have 7 | lazy continuation lines and multiple paragraphs 8 | 9 | If you indent one column after the footnote label start. 10 | 11 | \begin{verbatim} 12 | cb 13 | \end{verbatim} 14 | 15 | \begin{itemize} 16 | \item{} 17 | list item 18 | ablc 19 | \item{} 20 | another item 21 | \end{itemize} 22 | } with mutiple references\textsuperscript{\ref{fn-1}} 23 | and even \textsuperscript{\ref{fn-1}} 24 | 25 | This is no longer the footnote. 26 | 27 | Can we make footnotes in footnotes\footnote{\label{fn-2} 28 | This gets tricky but I guess we could have a footnote\footnote{\label{fn-tricky} 29 | The foot of the footnote. But that's not going to link back\textsuperscript{\ref{fn-2}} 30 | } in 31 | a footnote. Also footnote\textsuperscript{\ref{fn-1}} in footnotes\textsuperscript{\ref{fn-2}} is\footnote{\label{fn-3}} tricky for getting 32 | all back references rendered correctly. 33 | 34 | Second footnote 35 | } ? 36 | 37 | Not the footnote 38 | 39 | Not the footnote but a reference to an empty footnote\textsuperscript{\ref{fn-3}} 40 | 41 | Not a footnote [\textasciicircum{}] 42 | 43 | [\textasciicircum{}]: not a footnote. 44 | 45 | \subsection{Strikethrough} 46 | 47 | The stroken \sout{\emph{emph}}. 48 | 49 | Nesting the nest \sout{\emph{emph} \sout{stroke} \emph{emph \textbf{emph \sout{strikeagain}}}} 50 | 51 | There must be no blanks after the opener and before the closer. This 52 | is \textasciitilde{}\textasciitilde{} not an opener and \textasciitilde{}\textasciitilde{}this won't open \sout{that does}. 53 | 54 | \begin{itemize} 55 | \item{} 56 | Here we have \sout{stroken \texttt{code}}. 57 | \item{} 58 | Here we have \sout{nested \sout{stroken} ok} 59 | \end{itemize} 60 | 61 | \subsection{Math} 62 | 63 | The inline \(\sqrt{x^2-1}\) equation. 64 | 65 | There must be no blanks after the opener and before the closer. This 66 | makes so you can donate \$5 or \$10 dollars here and there without problem. 67 | 68 | There is no such think as nesting \(\sqrt{x^2-1}\)+3\(+3\). As usual 69 | delimiters can be \$escaped\$ \(\sqrt{16\$}\) 70 | 71 | Amazing, this is \href{https://example.org}{hyperlinked math \(3x^2\)} 72 | 73 | The HTML renderer should be careful with \(a < b\) escapes. 74 | 75 | Display math can be in \texttt{math} code blocks. 76 | \[ 77 | \left( \sum_{k=1}^n a_k b_k \right)^2 < \Phi 78 | \] 79 | 80 | But it can also be in \[ \left( \sum_{k=1}^n 81 | a_k b_k \right)^2 < \Phi \] 82 | 83 | \subsection{List task items} 84 | 85 | \begin{itemize} 86 | \item{} \lbrack\phantom{x}\rbrack \enspace 87 | Task open 88 | \item{} \lbrack x\rbrack \enspace 89 | Task done 90 | \item{} \lbrack X\rbrack \enspace 91 | Task done 92 | \item{} \lbrack ✓\rbrack \enspace 93 | Task done (U+2713, CHECK MARK) 94 | \item{} \lbrack ✔\rbrack \enspace 95 | Task done (U+2714, HEAVY CHECK MARK) 96 | Indent 97 | \item{} 98 | Of course this can all be nested 99 | 100 | \begin{itemize} 101 | \item{} \lbrack 𐄂\rbrack \enspace 102 | Task done (U+10102, AEGEAN CHECK MARK) 103 | It will be done for sure. 104 | 105 | \begin{verbatim} 106 | code block 107 | \end{verbatim} 108 | 109 | Not a code block 110 | \item{} \lbrack x\rbrack \enspace 111 | Task done 112 | \item{} \lbrack ~\rbrack \enspace 113 | Task cancelled 114 | Paragraphy 115 | \item{} \lbrack ~\rbrack \enspace 116 | Task canceled 117 | 118 | \begin{verbatim} 119 | we have a code block here too. 120 | \end{verbatim} 121 | \item{} 122 | [x]Not a task 123 | \item{} 124 | [x] Not a task 125 | \end{itemize} 126 | \item{} \lbrack\phantom{x}\rbrack \enspace 127 | task 128 | 129 | \begin{itemize} 130 | \item{} \lbrack\phantom{x}\rbrack \enspace 131 | sub task 132 | \end{itemize} 133 | \item{} \lbrack\phantom{x}\rbrack \enspace\item{} \lbrack\phantom{x}\rbrack \enspace 134 | a 135 | 136 | \begin{verbatim} 137 | Code 138 | \end{verbatim} 139 | 140 | Not code 141 | \item{} \lbrack\phantom{x}\rbrack \enspace 142 | \begin{verbatim} 143 | Code 144 | \end{verbatim} 145 | 146 | Not code 147 | \end{itemize} 148 | 149 | \subsection{Tables} 150 | 151 | A sample table: 152 | 153 | \bigskip 154 | \begin{tabular}{llll} 155 | \multicolumn{1}{c}{\bfseries{}Id} 156 | & 157 | \multicolumn{1}{r}{\bfseries{}Name} 158 | & 159 | \multicolumn{1}{l}{\bfseries{}Description} 160 | & 161 | \multicolumn{1}{r}{\bfseries{}Link} 162 | \\ 163 | \hline 164 | \multicolumn{1}{c}{1} 165 | & 166 | \multicolumn{1}{r}{OCaml} 167 | & 168 | \multicolumn{1}{l}{The OCaml website} 169 | & 170 | \multicolumn{1}{r}{\url{https://ocaml.org}} 171 | \\ 172 | \multicolumn{1}{c}{2} 173 | & 174 | \multicolumn{1}{r}{Haskell} 175 | & 176 | \multicolumn{1}{l}{The Haskell website} 177 | & 178 | \multicolumn{1}{r}{\url{https://haskell.org}} 179 | \\ 180 | \multicolumn{1}{c}{3} 181 | & 182 | \multicolumn{1}{r}{MDN Web docs} 183 | & 184 | \multicolumn{1}{l}{Web dev docs} 185 | & 186 | \multicolumn{1}{r}{\url{https://developer.mozilla.org/}} 187 | \\ 188 | \multicolumn{1}{c}{4} 189 | & 190 | \multicolumn{1}{r}{Wikipedia} 191 | & 192 | \multicolumn{1}{l}{The Free Encyclopedia} 193 | & 194 | \multicolumn{1}{r}{\url{https://wikipedia.org}} 195 | \\ 196 | \hline 197 | \end{tabular} 198 | \bigskip 199 | 200 | Testing these non separator pipes. 201 | 202 | \bigskip 203 | \begin{tabular}{lll} 204 | {\bfseries{}Fancy} 205 | & 206 | {\bfseries{}maybe} 207 | & 208 | {\bfseries{}hu|glu} 209 | \\ 210 | \hline 211 | {\emph{a | b}} 212 | & 213 | {\texttt{code |}} 214 | & 215 | {\href{https://example.org}{bl|a}} 216 | \\ 217 | {not | two cols} 218 | & 219 | {\(\sqrt(x^2 - 1)\)} 220 | & 221 | \\ 222 | \hline 223 | \end{tabular} 224 | \bigskip 225 | 226 | A table with changing labels and alignement: 227 | 228 | \bigskip 229 | \begin{tabular}{ll} 230 | {\bfseries{}h1} 231 | & 232 | \multicolumn{1}{c}{\bfseries{}h2} 233 | \\ 234 | \hline 235 | {1} 236 | & 237 | \multicolumn{1}{c}{2} 238 | \\ 239 | \multicolumn{1}{l}{\bfseries{}h3} 240 | & 241 | \multicolumn{1}{r}{\bfseries{}h4} 242 | \\ 243 | \hline 244 | \multicolumn{1}{l}{3} 245 | & 246 | \multicolumn{1}{r}{4} 247 | \\ 248 | \hline 249 | \end{tabular} 250 | \bigskip 251 | 252 | A simple header less table with left and right aligned columns 253 | 254 | \bigskip 255 | \begin{tabular}{ll} 256 | \hline 257 | \multicolumn{1}{l}{1} 258 | & 259 | \multicolumn{1}{r}{2} 260 | \\ 261 | \hline 262 | \end{tabular} 263 | \bigskip 264 | 265 | The simplest table: 266 | 267 | \bigskip 268 | \begin{tabular}{ll} 269 | \hline 270 | {1} 271 | & 272 | {2} 273 | \\ 274 | \hline 275 | \end{tabular} 276 | \bigskip 277 | 278 | A header only table: 279 | 280 | \bigskip 281 | \begin{tabular}{ll} 282 | \multicolumn{1}{c}{\bfseries{}h1} 283 | & 284 | \multicolumn{1}{c}{\bfseries{}h2} 285 | \\ 286 | \hline 287 | \hline 288 | \end{tabular} 289 | \bigskip 290 | 291 | Maximal number of columns all rows defines number of colums: 292 | 293 | \bigskip 294 | \begin{tabular}{llll} 295 | \multicolumn{1}{l}{\bfseries{}h1} 296 | & 297 | \multicolumn{1}{c}{\bfseries{}h2} 298 | & 299 | \multicolumn{1}{r}{\bfseries{}h3} 300 | & 301 | \\ 302 | \hline 303 | \multicolumn{1}{l}{left} 304 | & 305 | \multicolumn{1}{c}{center} 306 | & 307 | \multicolumn{1}{r}{right} 308 | & 309 | \\ 310 | \multicolumn{1}{l}{ha!} 311 | & 312 | \multicolumn{1}{c}{four} 313 | & 314 | \multicolumn{1}{r}{columns} 315 | & 316 | {in fact} 317 | \\ 318 | \multicolumn{1}{l}{} 319 | & 320 | \multicolumn{1}{c}{} 321 | & 322 | \multicolumn{1}{r}{} 323 | & 324 | {} 325 | \\ 326 | \multicolumn{1}{l}{} 327 | & 328 | \multicolumn{1}{c}{} 329 | & 330 | \multicolumn{1}{r}{} 331 | & 332 | {a} 333 | \\ 334 | \hline 335 | \end{tabular} 336 | \bigskip 337 | 338 | Header less table: 339 | 340 | \bigskip 341 | \begin{tabular}{ll} 342 | \hline 343 | {header} 344 | & 345 | {less} 346 | \\ 347 | {this} 348 | & 349 | {is} 350 | \\ 351 | \hline 352 | \end{tabular} 353 | \bigskip 354 | 355 | Another quoted header less table with aligement 356 | 357 | \begin{quote} 358 | \bigskip 359 | \begin{tabular}{ll} 360 | \hline 361 | \multicolumn{1}{r}{header} 362 | & 363 | \multicolumn{1}{r}{less} 364 | \\ 365 | \multicolumn{1}{r}{again} 366 | & 367 | \multicolumn{1}{r}{aligned} 368 | \\ 369 | \hline 370 | \end{tabular} 371 | \bigskip 372 | \end{quote} 373 | 374 | This is an empty table with three columns: 375 | 376 | \bigskip 377 | \begin{tabular}{lll} 378 | \hline 379 | {} 380 | & 381 | {} 382 | & 383 | {} 384 | \\ 385 | \hline 386 | \end{tabular} 387 | \bigskip 388 | -------------------------------------------------------------------------------- /test/expect/bugs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Bugs 7 | 206 | 207 | 208 |

Bugs

209 |

Add a section for the bug and the CommonMark that triggers it as 210 | follows:

211 |
# Issue #NUM
212 | 
213 | The triggering CommonMark
214 | 
215 |

Issue #11

216 |

Escape ordered item markers at the beginning of paragraphs correctly. 217 | These should be paragraphs when rendered to markdown not list items.

218 |

1.

219 |

2.

220 |

23.

221 |

24)

222 |

1234567890. This is not a list marker no need to escape it.

223 | 224 | -------------------------------------------------------------------------------- /src/cmarkit_latex.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Rendering CommonMark to L{^A}T{_E}X. 7 | 8 | Generates L{^A}T{_E}X fragments, consult the {{!integration} 9 | integration notes} for requirements on the document. 10 | 11 | See {{!page-index.quick}a quick example} and {{!doc_frame}another one}. 12 | 13 | {b Warning.} Rendering outputs are unstable, they may be tweaked even 14 | between minor versions of the library. *) 15 | 16 | (** {1:rendering Rendering} *) 17 | 18 | type heading_level = 19 | | Part 20 | | Chapter 21 | | Section 22 | | Subsection (** *) 23 | (** The type for main L{^A}T{_E}X headings levels. *) 24 | 25 | val of_doc : 26 | ?backend_blocks:bool -> ?first_heading_level:heading_level -> 27 | Cmarkit.Doc.t -> string 28 | (** [of_doc d] is a L{^A}T{_E}X fragment for [d]. See {!val-renderer} 29 | for more details and documentation about rendering options. *) 30 | 31 | (** {1:renderer Renderer} *) 32 | 33 | val renderer : 34 | ?backend_blocks:bool -> ?first_heading_level:heading_level -> unit -> 35 | Cmarkit_renderer.t 36 | (** [renderer] is a default L{^A}T{_E}X renderer. This renders 37 | the strict CommonMark abstract syntax tree and the supported 38 | Cmarkit {{!Cmarkit.extensions}extensions}. 39 | 40 | The inline, block and document renderers always return 41 | [true]. Unknown block and inline values are rendered by a 42 | L{^A}T{_E}X comment. 43 | 44 | The following options are available: 45 | 46 | {ul 47 | {- [backend_blocks], if [true], code blocks with language [=latex] 48 | are written verbatim in the output and any other code block whose 49 | langage starts with [=] is dropped. Defaults to [false].} 50 | {- [first_heading_level], the L{^A}T{_E}X heading level to use 51 | for the first CommonMark heading level. Defaults to [Section].}} 52 | 53 | See {{!Cmarkit_renderer.example}this example} to extend or 54 | selectively override the renderer. *) 55 | 56 | (** {1:render Render functions} 57 | 58 | Only useful if you extend the renderer. *) 59 | 60 | val newline : Cmarkit_renderer.context -> unit 61 | (** [newline c] starts a new line. Except on the first call on [c] which is 62 | a nop. *) 63 | 64 | val latex_escaped_uchar : Cmarkit_renderer.context -> Uchar.t -> unit 65 | (** [latex_escaped_uchar c u] renders the UTF-8 encoding of [u] on [c] 66 | propertly escaped for L{^A}T{_E}X. That is the characters 67 | [&] [%] [$] [#] [_] [{] [}] [~] [^] [\ ] 68 | are escaped. This also renders U+0000 to {!Uchar.rep}. *) 69 | 70 | val buffer_add_latex_escaped_uchar : Buffer.t -> Uchar.t -> unit 71 | (** [buffer_add_latex_escaped_uchar] is {!latex_escaped_uchar} but appends 72 | to a buffer value. *) 73 | 74 | val latex_escaped_string : Cmarkit_renderer.context -> string -> unit 75 | (** [latex_escaped_string c s] renders string [s] on [c] with 76 | characters [&] [%] [$] [#] [_] [{] [}] [~] [^] [\ ] escaped. This 77 | also escapes U+0000 to {!Uchar.rep}. *) 78 | 79 | val buffer_add_latex_escaped_string : Buffer.t -> string -> unit 80 | (** [buffer_add_latex_escaped_string] is {!latex_escaped_string} 81 | but acts on a buffer value. *) 82 | 83 | (** {1:integration L{^A}T{_E}X integration notes} 84 | 85 | Along with the built-in [graphicx] package, the following 86 | L{^A}T{_E}X packages are needed to use the outputs of the default 87 | renderer: 88 | {v 89 | tlmgr install enumitem listings hyperref # Required 90 | tlmgr install ulem # Strikethrough extension 91 | tlmgr install bera fontspec # Optional 92 | v} 93 | This means you should have at least the following in your 94 | document preamble: 95 | {v 96 | % Required 97 | \usepackage{graphicx} 98 | \usepackage{enumitem} 99 | \usepackage{listings} 100 | \usepackage{hyperref} 101 | \usepackage[normalem]{ulem} % Strikethrough extension 102 | 103 | % Optional 104 | \usepackage[scaled=0.8]{beramono} % A font for code blocks 105 | \usepackage{fontspec} % Supports more Unicode characters 106 | v} 107 | 108 | See the sections below for more details. 109 | 110 | {2:char_encoding Character encoding} 111 | 112 | The output is UTF-8 encoded. 113 | {{:https://tug.org/TUGboat/tb39-1/tb121ltnews28.pdf}It became} the 114 | the default encoding for L{^A}T{_E}X in 2018. But if you are using 115 | an older version a [\usepackage[utf8]{inputenc}] may be needed. 116 | 117 | Using [xelatex] rather than [pdflatex] will not get stuck on missing 118 | glyphs. 119 | 120 | {2:links Autolinks and links} 121 | 122 | The {{:https://www.ctan.org/pkg/hyperref}[hyperref]} package is 123 | used to render links ([\href]) and autolink ([\url]). Link 124 | destination starting with a [#] are assumed to refer to 125 | {{!labels}section labels} and are rendered using the [\hyperref] 126 | macro, with the [#] chopped. 127 | 128 | {2:images Images} 129 | 130 | Images are inserted using the 131 | {{:https://ctan.org/pkg/graphicx}graphicx}'s package. Only 132 | images with relative URLs are supported, those that point 133 | to external ressources on the www are turned into links. 134 | 135 | {2:labels Section labels} 136 | 137 | Section labels are added to the output whenever 138 | {!Cmarkit.Block.Heading.val-id} holds a value. If the identifier 139 | already exists it is made unique by appending ["-"] and the first 140 | number starting from 1 that makes it unique. Also the character 141 | [_] seems problematic in labels even when escaped, we map it to [-] 142 | (if you know any better get in touch). 143 | 144 | {2:lists Lists} 145 | 146 | To support the starting point of ordereded lists without having to 147 | fiddle with [enumi] counters, the 148 | {{:https://www.ctan.org/pkg/enumitem}[enumitem]} package is used. 149 | 150 | {2:code_blocks Code blocks} 151 | 152 | If a language [lang] can be 153 | {{!Cmarkit.Block.Code_block.language_of_info_string}extracted} 154 | from a code block info string, the 155 | {{:https://www.ctan.org/pkg/listings}[listings]} package is used 156 | with the corresponding language in a [lstlisting] environment. 157 | Otherwise the built-in [verbatim] environment is used. 158 | 159 | Note that the [listings] package has no definition for the [ocaml] 160 | language, the default renderings are a bit subpar and 161 | break on character literals with double quotes. This improves things: 162 | {v 163 | \lstset{ 164 | columns=[c]fixed, 165 | basicstyle=\small\ttfamily, 166 | keywordstyle=\bfseries, 167 | upquote=true, 168 | commentstyle=\slshape, 169 | breaklines=true, 170 | showstringspaces=false} 171 | 172 | \lstdefinelanguage{ocaml}{language=[objective]caml, 173 | % Fixes double quotes in char literals 174 | literate={'"'}{\textquotesingle "\textquotesingle}3 175 | {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, 176 | } 177 | v} 178 | 179 | {2:doc_frame Document frame} 180 | 181 | The default renderer only generates L{^A}T{_E}X fragments. You 182 | may want to add a document frame. For example: 183 | {[ 184 | let latex_doc_of_md ?(title = "") md = 185 | let doc = Cmarkit.Doc.of_string md in 186 | let r = Cmarkit_latex.renderer () in 187 | let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in 188 | let buffer_add_title = Cmarkit_latex.buffer_add_latex_escaped_string in 189 | let maketitle = if title = "" then "" else {|\maketitle|} in 190 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 191 | {|\documentclass{article} 192 | 193 | \usepackage{graphicx} 194 | \usepackage{enumitem} 195 | \usepackage{listings} 196 | \usepackage{hyperref} 197 | \usepackage[normalem]{ulem} 198 | \usepackage[scaled=0.8]{beramono} 199 | \usepackage{fontspec} 200 | 201 | \lstset{ 202 | columns=[c]fixed, 203 | basicstyle=\small\ttfamily, 204 | keywordstyle=\bfseries, 205 | upquote=true, 206 | commentstyle=\slshape, 207 | breaklines=true, 208 | showstringspaces=false} 209 | 210 | \lstdefinelanguage{ocaml}{language=[objective]caml, 211 | literate={'"'}{\textquotesingle "\textquotesingle}3 212 | {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, 213 | } 214 | 215 | \title{%a} 216 | \begin{document} 217 | %s 218 | %a 219 | \end{document}|} buffer_add_title title maketitle buffer_add_doc doc 220 | ]} 221 | 222 | Ignore this: ". 223 | *) 224 | -------------------------------------------------------------------------------- /test/test_pathological.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open B0_std 7 | open Result.Syntax 8 | open B0_testing 9 | 10 | let range ~first ~last = 11 | let rec loop acc k = if k < first then acc else loop (k :: acc) (k-1) in 12 | loop [] last 13 | 14 | (* Run commands on a deadline. Something like this should be added to B0_kit. *) 15 | 16 | type deadline_exit = [ Os.Cmd.status | `Timeout ] 17 | type deadline_run = Mtime.Span.t * deadline_exit 18 | 19 | let deadline_run ~timeout ?env ?cwd ?stdin ?stdout ?stderr cmd = 20 | let rec wait ~deadline dur pid = 21 | let* st = Os.Cmd.spawn_poll_status pid in 22 | match st with 23 | | Some st -> Ok (Os.Mtime.count dur, (st :> deadline_exit)) 24 | | None -> 25 | if Mtime.Span.compare (Os.Mtime.count dur) deadline < 0 26 | then (ignore (Os.sleep Mtime.Span.ms); wait ~deadline dur pid) else 27 | let* () = Os.Cmd.kill pid Sys.sigkill in 28 | let* _st = Os.Cmd.spawn_wait_status pid in 29 | Ok (Os.Mtime.count dur, `Timeout) 30 | in 31 | let* pid = Os.Cmd.spawn ?env ?cwd ?stdin ?stdout ?stderr cmd in 32 | wait ~deadline:timeout (Os.Mtime.counter ()) pid 33 | 34 | (* Pathological tests for CommonMark parsers. 35 | 36 | These tests are from: 37 | 38 | https://github.com/commonmark/cmark/blob/master/test/pathological_tests.py 39 | 40 | The test expectations there use regexps with constant n matches 41 | which Str doesn't support. Instead we make the expectations more 42 | precise and trim and map newlines to spaces the HTML renders to 43 | avoid rendering layout discrepancies. *) 44 | 45 | let massage s = String.trim (String.map (function '\n' -> ' ' | c -> c) s) 46 | 47 | type test = { doc : string; i : string; exp : string; } 48 | 49 | let tests = 50 | let n = 30000 (* should be pair *) in 51 | let p s = Fmt.str "

%s

" s in 52 | let ( + ) = ( ^ ) and cat = String.concat "" in 53 | let ( * ) s n = cat @@ List.map (Fun.const s) (range ~first:1 ~last:n) in 54 | [ { doc = "nested strong emphasis"; 55 | i = "*a **a "*n + "b" + " a** a*"*n; 56 | exp = p @@ "a a "*n + "b" + " a a"*n }; 57 | { doc = "many emphasis closers with no openers"; 58 | i = "a_ "*n; 59 | exp = p @@ "a_ "*(n - 1) + "a_" }; 60 | { doc = "many emphasis openers with no closers"; 61 | i = "_a "*n; 62 | exp = p @@ "_a "*(n - 1) + "_a" }; 63 | { doc = "many link closers with no openers"; 64 | i = "a]"*n; 65 | exp = p @@ "a]"*n }; 66 | { doc = "many link openers with no closers"; 67 | i = "[a"*n; 68 | exp = p @@ "[a"*n; }; 69 | { doc = "mismatched openers and closers"; 70 | i = "*a_ "*n; 71 | exp = p @@ "*a_ "*(n-1) + "*a_" }; 72 | { doc = "cmark issue #389"; 73 | i = "*a "*n + "_a*_ "*n; 74 | exp = p @@ "a "*n + "_a_ "*(n - 1) + "_a_" }; 75 | { doc = "openers and closers multiple of 3"; 76 | i = "a**b" + "c* "*n; 77 | exp = p @@ "a**b" + "c* "*(n - 1) + "c*" }; 78 | { doc = "link openers and emph closers"; 79 | i = "[ a_"*n; 80 | exp = p @@ "[ a_"*n }; 81 | { doc = "sequence '[ (](' repeated"; 82 | i = "[ (]("*n; 83 | exp = p @@ "[ (]("*n; }; 84 | { doc = "sequence '![[]()' repeated"; 85 | i = "![[]()"*n; 86 | exp = p @@ {|![|}*n; }; 87 | { doc = "Hard link/emphasis case"; 88 | i = "**x [a*b**c*](d)"; 89 | exp = p @@ {|**x ab**c|} }; 90 | { doc = "nested brackets [* a ]*"; 91 | i = "["*n + "a" + "]"*n; 92 | exp = p @@ "["*n + "a" + "]"*n }; 93 | { doc = "nested block quotes"; 94 | i = "> "*n + "a"; 95 | exp = "
"*n + p "a" + "
"*n }; 96 | { doc = "deeply nested lists"; 97 | i = cat (List.map (fun n -> " "*n + "* a\n") (range ~first:0 ~last:499)); 98 | exp = "
    "+"
  • a
      "*499+"
    • a
    "+"
"*499 }; 99 | { doc = "U+0000 in input"; 100 | i = "abc\x00de\x00"; 101 | exp = p @@ "abc\u{FFFD}de\u{FFFD}" }; 102 | { doc = "backticks"; 103 | i = cat (List.map (fun n -> "e" + "`"*n) (range ~first:1 ~last:2500)); 104 | exp = 105 | p @@ cat (List.map (fun n -> "e" + "`"*n) (range ~first:1 ~last:2500))}; 106 | { doc = "unclosed inline link <>"; 107 | i = "[a]("*(n/2) + "a" + ""* (n/2); }; 118 | { doc = "many references"; 119 | i = 120 | cat (List.map (fun n -> Fmt.str "[%d]: u\n" n) (range ~first:1 ~last:n)) 121 | + "[0]"*n; 122 | exp = p @@ "[0]"*n; } 123 | ] 124 | 125 | (* Dump the tests *) 126 | 127 | let dump_tests dir = 128 | let dump_test dir t i = 129 | let name = Fmt.str "patho-test-%02d" i in 130 | let force = true and make_path = true in 131 | let src = Fpath.(dir / name + ".md") in 132 | let exp = Fpath.(dir / name + ".exp") in 133 | let* () = Os.File.write ~force ~make_path src t.i in 134 | let* () = Os.File.write ~force ~make_path exp t.exp in 135 | Ok (i + 1) 136 | in 137 | match List.fold_stop_on_error (dump_test dir) tests 1 with 138 | | Error _ as e -> Test.error_to_failstop e | Ok _ -> () 139 | 140 | (* Run the tests *) 141 | 142 | let run_test t (timeout, cmd) = 143 | let pp_err = Fmt.st [`Fg `Red] in 144 | Test.error_to_failstop @@ 145 | Result.join @@ Os.File.with_tmp_fd @@ fun tmpfile fd -> 146 | let stdin = Os.Cmd.in_string t.i in 147 | let stdout = Os.Cmd.out_fd ~close:false fd in 148 | let* dur, exit = deadline_run ~timeout ~stdin ~stdout cmd in 149 | match exit with 150 | | `Exited 0 -> 151 | let* fnd = Os.File.read tmpfile in 152 | let fnd = massage fnd in 153 | if String.equal (String.trim t.exp) fnd then begin 154 | Test.pass (); 155 | Test.log " %a in %a" Test.Fmt.passed () Mtime.Span.pp dur; 156 | Ok () 157 | end else begin 158 | let pp_data = Fmt.truncated ~max:50 in 159 | Test.failstop " @[@[%a in %a@]@,Expected: %a@,Found : %a@]" 160 | pp_err "unexpected output" Mtime.Span.pp dur 161 | pp_data t.exp pp_data fnd 162 | end 163 | | `Exited n -> 164 | Test.failstop " %a with %d in %a" pp_err "exited" n Mtime.Span.pp dur 165 | | `Signaled sg -> 166 | Test.failstop " %a with %d in %a" pp_err "signaled" sg Mtime.Span.pp dur 167 | | `Timeout -> 168 | Test.failstop " %a in %a" pp_err "timed out" Mtime.Span.pp dur 169 | 170 | let params = Test.Arg.make () 171 | let mk_test t = Test.test' params t.doc (run_test t) 172 | 173 | let test_pathological ~timeout_s ~dump ~tool ~tool_args () = match dump with 174 | | Some dir -> dump_tests dir 175 | | None -> 176 | match tool with 177 | | None -> 178 | Test.failstop "No tool to test specified. See %a" Fmt.code "--help" 179 | | Some t -> 180 | let timeout = Mtime.Span.(timeout_s * s) in 181 | let cmd = Cmd.(tool t %% list tool_args) in 182 | List.iter (fun t -> let t = mk_test t in ignore t) tests; 183 | Test.log "Testing tool %s" t; 184 | Test.log "Timeout after: %a" Mtime.Span.pp timeout; 185 | let args = Test.Arg.[value params (timeout, cmd)] in 186 | Test.autorun ~args () 187 | 188 | (* Command line interface *) 189 | 190 | open Cmdliner 191 | open Cmdliner.Term.Syntax 192 | 193 | let timeout_s = 194 | let doc = "$(docv) is the timeout in seconds." in 195 | Arg.(value & opt int 1 & info ["timeout-s"] ~doc) 196 | 197 | let dump = 198 | let doc = "Do not test, dump the tests to directory $(docv)" in 199 | Arg.(value & opt (some B0_std_cli.dirpath) None & info ["dump"] ~doc) 200 | 201 | let cli_arg ~docv = 202 | let completion = Arg.Completion.complete_restart in 203 | Arg.Conv.of_conv ~docv Arg.string ~completion 204 | 205 | let tool = 206 | let doc = 207 | "The tool to test. Must read CommonMark on stdin and write HTML on stdout." 208 | in 209 | Arg.(value & pos 0 (some (cli_arg ~docv:"TOOL")) None & info [] ~doc) 210 | 211 | let tool_args = 212 | let doc = 213 | "Argument for the tool. Start with a $(b,--) token \ 214 | otherwise options get interpreted by $(tool)." 215 | in 216 | Arg.(value & pos_right 0 (cli_arg ~docv:"ARG") [] & info [] ~doc) 217 | 218 | let main () = 219 | Test.main' @@ 220 | let+ timeout_s and+ dump and+ tool and+ tool_args in 221 | test_pathological ~timeout_s ~dump ~tool ~tool_args 222 | 223 | let () = if !Sys.interactive then () else exit (main ()) 224 | -------------------------------------------------------------------------------- /test/expect/bugs.exts.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Bugs.exts 7 | 206 | 207 | 208 |

Bugs

209 |

Add a section for the bug and the CommonMark that triggers it as 210 | follows:

211 |
# Bug #NUM
212 | 
213 | The triggering CommonMark
214 | 
215 |

Bug #10

216 |

In cells toplevel text nodes not at the beginning or end of the cell 217 | get dropped.

218 |
219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 |
Foo
a or b
before a or b after
before a or bafter
beforeaorbafter
aa

foo

Bug #15

241 |

Invalid markup generated for cancelled task.

242 |
    243 |
  • This has been cancelled
  • 244 |
245 | 246 | -------------------------------------------------------------------------------- /src/tool/cmd_latex.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Cmarkit_std 7 | open Cmarkit 8 | 9 | let built_in_preamble = ref "" (* See at the end of the module *) 10 | 11 | let buffer_add_inline_preamble b p = 12 | Buffer.add_char b '\n'; Buffer.add_string b p; Buffer.add_char b '\n' 13 | 14 | let buffer_add_inline_preamble_option b = function 15 | | None -> () | Some p -> buffer_add_inline_preamble b p 16 | 17 | let buffer_add_inline_preambles b files = 18 | let add_file b file = 19 | let preamble = Os.read_file file |> Result.to_failure in 20 | buffer_add_inline_preamble b (String.trim preamble) 21 | in 22 | List.iter (add_file b) files 23 | 24 | let text_inline t = Inline.Text (t, Meta.none) 25 | let untilted_inline = text_inline "Untilted" 26 | 27 | let lift_headings_map ~extract_title doc = 28 | let open Cmarkit in 29 | let title = ref None in 30 | let block m = function 31 | | Block.Heading (h, meta) as b -> 32 | let inline = Block.Heading.inline h in 33 | if extract_title && Option.is_none !title 34 | then (title := Some inline; Mapper.delete) else 35 | let level = Block.Heading.level h in 36 | if level = 1 then Mapper.ret b else 37 | let id = Block.Heading.id h in 38 | let level = level - 1 in 39 | let h = Block.Heading.make ?id ~level inline in 40 | Mapper.ret (Block.Heading (h, meta)) 41 | | _ -> Mapper.default 42 | in 43 | let doc = Mapper.map_doc (Mapper.make ~block ()) doc in 44 | let title = Option.value ~default:untilted_inline !title in 45 | title, doc 46 | 47 | let empty_defs = Cmarkit.Label.Map.empty 48 | let buffer_add_docs ?(defs = empty_defs) ~accumulate_defs parse r b files = 49 | let rec loop defs = function 50 | | [] -> () 51 | | file :: files -> 52 | let md = Os.read_file file |> Result.to_failure in 53 | let _, doc = parse ~extract_title:false ~file ~defs md in 54 | let defs = if accumulate_defs then Cmarkit.Doc.defs doc else empty_defs in 55 | Cmarkit_renderer.buffer_add_doc r b doc; 56 | if files <> [] then Buffer.add_char b '\n'; 57 | loop defs files 58 | in 59 | loop defs files 60 | 61 | let buffer_add_title r doc b title = 62 | let ctx = Cmarkit_renderer.Context.make r b in 63 | let () = Cmarkit_renderer.Context.init ctx doc in 64 | Cmarkit_renderer.Context.inline ctx title 65 | 66 | let buffer_add_author b = function 67 | | None -> () | Some a -> 68 | Buffer.add_string b "\n\\author{"; 69 | Buffer.add_string b a; Buffer.add_char b '}' 70 | 71 | let title_of_file f = 72 | if f = "-" then "Untitled" else 73 | String.capitalize_ascii (Filename.remove_extension (Filename.basename f)) 74 | 75 | let doc 76 | ~accumulate_defs ~extract_title parse r ~author ~title ~inline_preambles 77 | ~keep_built_in_preamble files 78 | = 79 | let built_in_preamble = 80 | if inline_preambles = [] || keep_built_in_preamble 81 | then Some (!built_in_preamble) else None 82 | in 83 | let file, files = List.hd files, List.tl files in 84 | let md = Os.read_file file |> Result.to_failure in 85 | let title, doc = 86 | let defs = empty_defs in 87 | match title with 88 | | Some t -> text_inline t, snd (parse ~extract_title:false ~file ~defs md) 89 | | None -> 90 | if extract_title then parse ~extract_title:true ~file ~defs md else 91 | let title = text_inline (title_of_file file) in 92 | (title, snd (parse ~extract_title:false ~file ~defs md)) 93 | in 94 | let defs = if accumulate_defs then Cmarkit.Doc.defs doc else empty_defs in 95 | Printf.kbprintf Buffer.contents (Buffer.create 1024) 96 | {|\documentclass{article} 97 | %a%a 98 | %a\title{%a} 99 | \begin{document} 100 | \maketitle 101 | %a%a%a 102 | \end{document} 103 | |} 104 | buffer_add_inline_preamble_option built_in_preamble 105 | buffer_add_inline_preambles inline_preambles 106 | buffer_add_author author 107 | (buffer_add_title r doc) title 108 | (Cmarkit_renderer.buffer_add_doc r) doc 109 | Buffer.add_string (if files <> [] then "\n" else "") 110 | (buffer_add_docs ~defs ~accumulate_defs parse r) files 111 | 112 | let latex 113 | ~files ~quiet ~accumulate_defs ~strict ~heading_auto_ids ~backend_blocks 114 | ~lift_headings ~docu ~title ~author ~inline_preambles 115 | ~keep_built_in_preamble ~first_heading_level 116 | = 117 | let resolver = Label_resolver.v ~quiet in 118 | let r = Cmarkit_latex.renderer ~backend_blocks ~first_heading_level () in 119 | let parse ~extract_title ~file ~defs md = 120 | let doc = 121 | Cmarkit.Doc.of_string ~resolver ~defs ~heading_auto_ids ~file ~strict md 122 | in 123 | if lift_headings then lift_headings_map ~extract_title doc else 124 | untilted_inline, doc 125 | in 126 | try 127 | let s = match docu with 128 | | true -> 129 | doc ~accumulate_defs ~extract_title:lift_headings parse 130 | ~author ~title ~inline_preambles ~keep_built_in_preamble r files 131 | | false -> 132 | Printf.kbprintf Buffer.contents (Buffer.create 2048) "%a" 133 | (buffer_add_docs ~accumulate_defs parse r) files; 134 | in 135 | print_string s; 0 136 | with 137 | | Failure err -> Log.err "%s" err; Cmarkit_cli.Exit.err_file 138 | 139 | (* Command line interface *) 140 | 141 | open Cmdliner 142 | open Cmdliner.Term.Syntax 143 | 144 | let author = 145 | let doc = 146 | "Document author when $(b,--doc) is used. $(docv) is interpreted as \ 147 | raw LaTeX." 148 | in 149 | Arg.(value & opt (some string) None & info ["a"; "author"] ~doc ~docv:"NAME") 150 | 151 | let backend_blocks = 152 | let doc = 153 | "Code blocks with language $(b,=latex) are included verbatim in the \ 154 | output. Other code blocks with language starting with $(b,=) are \ 155 | dropped. This does not activate math support, use $(b,--exts) for that." 156 | in 157 | Cmarkit_cli.backend_blocks ~doc 158 | 159 | let inline_preambles = 160 | let doc = 161 | "Add the content of LaTeX file $(docv) to the document preamble when \ 162 | $(b,--doc) is used. If unspecified a built-in preamble is written \ 163 | directly in the document (use $(b,-k) to keep it even when this option \ 164 | is specified). Repeatable." 165 | in 166 | Arg.(value & opt_all filepath [] & 167 | info ~doc ["inline-preamble"] ~docv:"FILE.latex") 168 | 169 | let keep_built_in_preamble = 170 | let doc = 171 | "Keep built-in preamble even if one is specified via \ 172 | $(b,--inline-preamble)." 173 | in 174 | Arg.(value & flag & info ["k"; "keep-built-in-preamble"] ~doc) 175 | 176 | let lift_headings = 177 | let doc = 178 | "Lift headings one level up and, when $(b,--doc) is used, extract the \ 179 | first heading (of any level) to take it as the title; unless a title \ 180 | is specified via the $(b,--title) option. This is useful for certain \ 181 | CommonMark documents like READMEs for which taking the headings \ 182 | literally results in unnatural sectioning." 183 | in 184 | Arg.(value & flag & info ["l"; "lift-headings"] ~doc) 185 | 186 | let first_heading_level = 187 | let level_enum = 188 | [ "part", Cmarkit_latex.Part; "chapter", Chapter; 189 | "section", Section; "subsection", Subsection ] 190 | in 191 | let doc = 192 | Printf.sprintf 193 | "Use LaTeX heading level $(docv) for the first CommonMark heading level. \ 194 | $(docv) must be %s." (Arg.doc_alts_enum level_enum) 195 | in 196 | Arg.(value & opt (Arg.enum level_enum) Cmarkit_latex.Section & 197 | Arg.info ["first-heading-level"] ~doc ~docv:"LEVEL") 198 | 199 | let cmd = 200 | let doc = "Render CommonMark to LaTeX" in 201 | let man = [ 202 | `S Manpage.s_description; 203 | `P "$(cmd) outputs a LaTeX fragment or document on standard output."; 204 | `Pre "$(cmd) $(b,-e -c -l -h README.md > README.latex)";`Noblank; 205 | `Pre "$(b,tlmgr install enumitem listings hyperref ulem bera fontspec)"; 206 | `Noblank; 207 | `Pre "$(b,xelatex README.latex)"; 208 | `Blocks Cmarkit_cli.common_man ] 209 | in 210 | Cmd.make (Cmd.info "latex" ~doc ~man) @@ 211 | let+ files = Cmarkit_cli.files and+ quiet = Cmarkit_cli.quiet 212 | and+ accumulate_defs = Cmarkit_cli.accumulate_defs 213 | and+ strict = Cmarkit_cli.strict 214 | and+ heading_auto_ids = Cmarkit_cli.heading_auto_ids and+ backend_blocks 215 | and+ lift_headings and+ docu = Cmarkit_cli.docu and+ title = Cmarkit_cli.title 216 | and+ author and+ inline_preambles and+ keep_built_in_preamble 217 | and+ first_heading_level in 218 | latex ~files ~quiet ~accumulate_defs ~strict ~heading_auto_ids ~backend_blocks 219 | ~lift_headings ~docu ~title ~author ~inline_preambles 220 | ~keep_built_in_preamble ~first_heading_level 221 | 222 | (* Built-in LaTeX preamable, defined that way to avoid source clutter *) 223 | 224 | let () = built_in_preamble := 225 | {|\usepackage{graphicx} 226 | \usepackage{enumitem} 227 | \usepackage{listings} 228 | \usepackage{hyperref} 229 | \usepackage[normalem]{ulem} 230 | \usepackage[scaled=0.8]{beramono} 231 | \usepackage{fontspec} 232 | 233 | \lstset{ 234 | columns=[c]fixed, 235 | basicstyle=\small\ttfamily, 236 | keywordstyle=\bfseries, 237 | upquote=true, 238 | commentstyle=\slshape, 239 | breaklines=true, 240 | showstringspaces=false} 241 | 242 | \lstdefinelanguage{ocaml}{language=[objective]caml, 243 | literate={'"'}{\textquotesingle "\textquotesingle}3 244 | {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, 245 | } 246 | 247 | \renewcommand{\arraystretch}{1.3} 248 | |} 249 | -------------------------------------------------------------------------------- /src/cmarkit_renderer.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2021 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Renderer abstraction. 7 | 8 | Stateful renderer abstraction to render documents in {!Stdlib.Buffer.t} 9 | values. 10 | 11 | {b Note.} This is a low-level interface. For quick and standard 12 | renderings see {!Cmarkit_html.of_doc}, {!Cmarkit_latex.of_doc} and 13 | {!Cmarkit_commonmark.of_doc}. If you want to extend them, 14 | see {{!example}this example}. *) 15 | 16 | (** {1:rendering Rendering} *) 17 | 18 | type t 19 | (** The type for renderers. *) 20 | 21 | val doc_to_string : t -> Cmarkit.Doc.t -> string 22 | (** [doc_to_string r d] renders document [d] to a string using renderer [r]. *) 23 | 24 | val buffer_add_doc : t -> Buffer.t -> Cmarkit.Doc.t -> unit 25 | (** [buffer_add_doc r b d] renders document [d] on buffer [b] using 26 | renderer [r]. *) 27 | 28 | (** {1:renderers Renderers} *) 29 | 30 | type context 31 | (** The type for rendering contexts, holds a renderer, a 32 | {!Stdlib.Buffer.t} value to act on and rendering state. *) 33 | 34 | type inline = context -> Cmarkit.Inline.t -> bool 35 | (** The type for inline renderers. 36 | 37 | Return [false] if you are not interested in rendering the given 38 | inline. Use {!Context.inline} and {!Context.block} on the given 39 | context if you need to invoke the renderer recursively. *) 40 | 41 | type block = context -> Cmarkit.Block.t -> bool 42 | (** The type for block renderers. 43 | 44 | Return [false] if you are not interested in rendering the given 45 | block. Use {!Context.inline} and {!Context.block} with the given 46 | context if you need to invoke the renderer recursively. *) 47 | 48 | type doc = context -> Cmarkit.Doc.t -> bool 49 | (** The type for document renderers. 50 | 51 | Return [false] if you are not interested in rendering the given 52 | document. Use {!Context.inline}, {!Context.block} and {!Context.doc} 53 | with the given context if you need to invoke the renderer recursively. *) 54 | 55 | val make : 56 | ?init_context:(context -> Cmarkit.Doc.t -> unit) -> 57 | ?inline:inline -> ?block:block -> ?doc:doc -> unit -> t 58 | (** [make ?init_context ?inline ?block ?doc ()] is a renderer using 59 | [inline], [block], [doc] to render documents. They all default to 60 | [(fun _ _ -> false)], which means that by default they defer to 61 | next renderer (see {!compose}). 62 | 63 | [init_context] is used to initialize the context for the renderer 64 | before a document render. It defaults to [fun _ _ -> ()]. *) 65 | 66 | val compose : t -> t -> t 67 | (** [compose g f] renders first with [f] and if a renderer returns [false], 68 | falls back on its counterpart in [g]. 69 | 70 | The {!init_context} of the result calls [g]'s initialization 71 | context function first, followed by the one of [f]. This means 72 | [f]'s initialization function can assume the context is already 73 | setup for [g]. *) 74 | 75 | (** {2:accessors Accessors} 76 | 77 | Normally you should not need these but you may want to peek 78 | into other renderers. *) 79 | 80 | val init_context : t -> (context -> Cmarkit.Doc.t -> unit) 81 | (** [init_context r] is the context initalization function for [r]. *) 82 | 83 | val inline : t -> inline 84 | (** [inline r] is the inline renderer of [r]. *) 85 | 86 | val block : t -> block 87 | (** [block_renderer r] is the block renderer of [r]. *) 88 | 89 | val doc : t -> doc 90 | (** [doc_renderer r] is the documentation renderer of [r]. *) 91 | 92 | (** {1:context Rendering contexts} *) 93 | 94 | (** Rendering contexts. *) 95 | module Context : sig 96 | 97 | (** {1:contexts Contexts} *) 98 | 99 | type renderer := t 100 | 101 | type t = context 102 | (** The type for rendering contexts. *) 103 | 104 | val make : renderer -> Buffer.t -> t 105 | (** [make r b] is a context using renderer [r] to render documents 106 | on buffer [b]. 107 | 108 | The renderer [r] must be able to handle any inline, block and 109 | document values (i.e. its renderers should always return [true]) 110 | otherwise [Invalid_argument] may raise on renders. 111 | 112 | This means the last renderer you {{!compose}compose with} should 113 | always have catch all cases returning [true]; after possibly 114 | indicating in the output that something was missed. The built-in 115 | renderers {!Cmarkit_commonmark.val-renderer}, 116 | {!Cmarkit_html.val-renderer} and {!Cmarkit_latex.val-renderer} 117 | do have these catch all cases. *) 118 | 119 | val renderer : t -> renderer 120 | (** [renderer c] is the renderer of [c]. *) 121 | 122 | val buffer : t -> Buffer.t 123 | (** [buffer c] is the buffer of [c]. *) 124 | 125 | val get_doc : t -> Cmarkit.Doc.t 126 | (** [get_doc c] is the document being rendered. *) 127 | 128 | val get_defs : t -> Cmarkit.Label.defs 129 | (** [get_defs c] is [Doc.defs (get_doc c)]. *) 130 | 131 | (** Custom context state. *) 132 | module State : sig 133 | 134 | type 'a t 135 | (** The type for custom state of type ['a]. *) 136 | 137 | val make : unit -> 'a t 138 | (** [make ()] is a new bit of context state. *) 139 | 140 | val find : context -> 'a t -> 'a option 141 | (** [find c state] is the state [state] of context [c], if any. *) 142 | 143 | val get : context -> 'a t -> 'a 144 | (** [get c state] is the state [state] of context [c], raises 145 | [Invalid_argument] if there is no state [state] in [c]. *) 146 | 147 | val set : context -> 'a t -> 'a option -> unit 148 | (** [set c state s] sets the state [state] of [c] to [s]. [state] is 149 | cleared in [c] if [s] is [None]. *) 150 | end 151 | 152 | val init : t -> Cmarkit.Doc.t -> unit 153 | (** [init c] calls the initialisation function of [c]'s 154 | {!val-renderer}. Note, this is done automatically by {!val-doc}. *) 155 | 156 | (** {1:render Rendering functions} 157 | 158 | These function append data to the {!buffer} of the context. For more 159 | specialized rendering functions, see the corresponding rendering 160 | backends. *) 161 | 162 | val byte : t -> char -> unit 163 | (** [byte c b] renders byte [b] verbatim on [c]. *) 164 | 165 | val utf_8_uchar : t -> Uchar.t -> unit 166 | (** [utf_8_uchar c u] renders the UTF-8 encoding of [u] on [c]. *) 167 | 168 | val string : t -> string -> unit 169 | (** [string c s] renders string [s] verbatim on [c]. *) 170 | 171 | val inline : t -> Cmarkit.Inline.t -> unit 172 | (** [inline c i] renders inline [i] on [c]. This invokes the 173 | {{!compose}composition} of inline renderers of [c]. *) 174 | 175 | val block : t -> Cmarkit.Block.t -> unit 176 | (** [block c b] renders block [b] on [c]. This invokes the 177 | {{!compose}composition} of block renderers of [c]. *) 178 | 179 | val doc : t -> Cmarkit.Doc.t -> unit 180 | (** [doc c d] initializes [c] with {!init} and renders document [d] on [c]. 181 | This invokes the {{!compose}composition} of document renderers of [c]. *) 182 | end 183 | 184 | (** {1:example Extending renderers} 185 | 186 | This example extends the {!Cmarkit_html.val-renderer} but it 187 | applies {e mutatis mutandis} to the other backend document 188 | renderers. 189 | 190 | Let's assume you want to: 191 | 192 | {ul 193 | {- Extend the abstract syntax tree with a [Doc] block which 194 | allows to splice documents in another one (note that 195 | splicing is already built-in via the {!Cmarkit.Block.extension-Blocks} 196 | block case).} 197 | {- Change the rendering of {!Cmarkit.Inline.extension-Image} inlines to 198 | render HTML [video] or [audio] elements depending on the link's 199 | destination suffix.} 200 | {- For the rest use the built-in {!Cmarkit_html.renderer} renderer 201 | as it exists.}} 202 | 203 | This boils down to: 204 | 205 | {ol 206 | {- Add a new case to the abstract syntax tree.} 207 | {- Define a [custom_html] renderer which treats 208 | {!Cmarkit.Inline.Image} and the new [Doc] case the way we 209 | see it fit and return [false] otherwise to use the built-in renderer. } 210 | {- {!compose} [custom_html] with {!Cmarkit_html.val-renderer}}} 211 | 212 | {[ 213 | type Cmarkit.Block.t += Doc of Cmarkit.Doc.t (* 1 *) 214 | 215 | let media_link c l = 216 | let has_ext s ext = String.ends_with ~suffix:ext s in 217 | let is_video s = List.exists (has_ext s) [".mp4"; ".webm"] in 218 | let is_audio s = List.exists (has_ext s) [".mp3"; ".flac"] in 219 | let defs = Cmarkit_renderer.Context.get_defs c in 220 | match Cmarkit.Inline.Link.reference_definition defs l with 221 | | Some Cmarkit.Link_definition.Def (ld, _) -> 222 | let start_tag = match Cmarkit.Link_definition.dest ld with 223 | | Some (src, _) when is_video src -> Some (" Some (" None 226 | in 227 | begin match start_tag with 228 | | None -> false (* let the default HTML renderer handle that *) 229 | | Some (start_tag, src) -> 230 | (* More could be done with the reference title and link text *) 231 | Cmarkit_renderer.Context.string c start_tag; 232 | Cmarkit_renderer.Context.string c {| src="|}; 233 | Cmarkit_html.pct_encoded_string c src; 234 | Cmarkit_renderer.Context.string c {|" />|}; 235 | true 236 | end 237 | | None | Some _ -> false (* let the default HTML renderer that *) 238 | 239 | let custom_html = 240 | let inline c = function 241 | | Cmarkit.Inline.Image (l, _) -> media_link c l 242 | | _ -> false (* let the default HTML renderer handle that *) 243 | in 244 | let block c = function 245 | | Doc d -> 246 | (* It's important to recurse via Cmarkit_renderer.Context.block *) 247 | Cmarkit_renderer.Context.block c (Cmarkit.Doc.block d); true 248 | | _ -> false (* let the default HTML renderer handle that *) 249 | in 250 | Cmarkit_renderer.make ~inline ~block () (* 2 *) 251 | 252 | let custom_html_of_doc ~safe doc = 253 | let default = Cmarkit_html.renderer ~safe () in 254 | let r = Cmarkit_renderer.compose default custom_html in (* 3 *) 255 | Cmarkit_renderer.doc_to_string r doc 256 | ]} 257 | 258 | The [custom_html_of_doc] function performs your extended 259 | renderings. *) 260 | -------------------------------------------------------------------------------- /src/tool/cmd_locs.ml: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | open Cmarkit_std 7 | open Cmarkit 8 | 9 | let strf = Printf.sprintf 10 | let pf = Format.fprintf 11 | let cut = Format.pp_print_cut 12 | let indent ppf n = for i = 1 to n do Format.pp_print_space ppf () done 13 | let loc kind ~indent:n ppf m = 14 | pf ppf "@[@[%a%s:@]@,@[%a%a@]@]" 15 | indent n kind 16 | indent n Textloc.pp_ocaml (Meta.textloc m) 17 | 18 | let block_line kind ~indent ppf (_, m) = loc kind ~indent ppf m 19 | let tight_block_line kind ~indent ppf (_, (_, m)) = loc kind ~indent ppf m 20 | let tight_block_lines kind ~indent ppf ls = 21 | Format.pp_print_list (tight_block_line kind ~indent) ppf ls 22 | 23 | let label ~indent ppf l = 24 | tight_block_lines "Label" ~indent ppf (Label.text l) 25 | 26 | let defined_label ~indent ppf l = 27 | tight_block_lines "Defined label" ~indent ppf (Label.text l) 28 | 29 | let label_def ~indent ppf l = 30 | tight_block_lines "Label definition" ~indent ppf (Label.text l) 31 | 32 | let link_definition ~indent ppf ld = 33 | let label ppf = function 34 | | None -> () | Some l -> cut ppf (); label ~indent ppf l 35 | in 36 | let defined_label ppf = function 37 | | None -> () | Some l -> cut ppf (); defined_label ~indent ppf l 38 | in 39 | let dest ppf = function 40 | | None -> () | Some (_, m) -> cut ppf (); loc "Destination" ~indent ppf m 41 | in 42 | let title ppf = function 43 | | None -> () | Some ls -> cut ppf (); tight_block_lines "Title" ~indent ppf ls 44 | in 45 | pf ppf "%a%a%a%a" 46 | label (Link_definition.label ld) 47 | defined_label (Link_definition.defined_label ld) 48 | dest (Link_definition.dest ld) 49 | title (Link_definition.title ld) 50 | 51 | let link_reference ~indent:n ppf = function 52 | | `Ref (_, l, ref) -> 53 | label ~indent:n ppf l; cut ppf (); label_def ~indent:n ppf ref 54 | | `Inline (ld, m) -> 55 | pf ppf "%a%a" (loc "Inline" ~indent:n) m (link_definition ~indent:n) ld 56 | 57 | let rec inlines ~indent ppf = function 58 | | [] -> () | is -> cut ppf (); Format.pp_print_list (inline ~indent) ppf is 59 | 60 | and link kind ~indent:n ppf (l, m) = 61 | pf ppf "@[%a@,%a@,%a@]" 62 | (loc kind ~indent:n) m 63 | (inline ~indent:(n + 2)) (Inline.Link.text l) 64 | (link_reference ~indent:(n + 2)) (Inline.Link.reference l) 65 | 66 | and inline ~indent:n ppf = function 67 | | Inline.Autolink (a, m) -> 68 | let is_email = Inline.Autolink.is_email a in 69 | let link = Inline.Autolink.link a in 70 | let autolink = strf "Autolink (email:%b)" is_email in 71 | pf ppf "@[%a@,%a@]" 72 | (loc autolink ~indent:n) m (loc "Link" ~indent:(n + 2)) (snd link) 73 | | Inline.Break (b, m) -> 74 | let label = match Inline.Break.type' b with 75 | | `Hard -> "Hard break" | `Soft -> "Soft break" 76 | in 77 | let layout_before = Inline.Break.layout_before b in 78 | let layout_after = Inline.Break.layout_after b in 79 | pf ppf "@[%a@,%a@,%a@]" 80 | (loc label ~indent:n) m 81 | (loc "Layout before" ~indent:(n + 2)) (snd layout_before) 82 | (loc "Layout after" ~indent:(n + 2)) (snd layout_after) 83 | | Inline.Code_span (c, m) -> 84 | let line = tight_block_line "Code span line" ~indent:(n + 2) in 85 | pf ppf "@[%a@,%a@]" 86 | (loc "Code span" ~indent:n) m 87 | (Format.pp_print_list line) (Inline.Code_span.code_layout c) 88 | | Inline.Emphasis (e, m) -> 89 | let i = Inline.Emphasis.inline e in 90 | pf ppf "@[%a@,%a@]" 91 | (loc "Emphasis" ~indent:n) m (inline ~indent:(n + 2)) i 92 | | Inline.Image i -> 93 | link "Image" ~indent:n ppf i 94 | | Inline.Inlines (is, m) -> 95 | pf ppf "@[%a%a@]" 96 | (loc "Inlines" ~indent:n) m (inlines ~indent:(n + 2)) is 97 | | Inline.Link l -> 98 | link "Link" ~indent:n ppf l 99 | | Inline.Raw_html (r, m) -> 100 | let line = tight_block_line "Raw HTML line" ~indent:(n + 2) in 101 | pf ppf "@[%a@,%a@]" 102 | (loc "Raw HTML" ~indent:n) m (Format.pp_print_list line) r 103 | | Inline.Strong_emphasis (e, m) -> 104 | let i = Inline.Emphasis.inline e in 105 | pf ppf "@[%a@,%a@]" 106 | (loc "Strong emphasis" ~indent:n) m (inline ~indent:(n + 2)) i 107 | | Inline.Text (t, m) -> 108 | loc "Text" ~indent:n ppf m 109 | | Inline.Ext_strikethrough (s, m) -> 110 | let i = Inline.Strikethrough.inline s in 111 | pf ppf "@[%a@,%a@]" 112 | (loc "Strikethrough" ~indent:n) m (inline ~indent:(n + 2)) i 113 | | Inline.Ext_math_span (ms, m) -> 114 | let display = Inline.Math_span.display ms in 115 | let line = tight_block_line "Math span line" ~indent:(n + 2) in 116 | pf ppf "@[%a@,%a@]" 117 | (loc (if display then "Math display span" else "Math span") ~indent:n) m 118 | (Format.pp_print_list line) (Inline.Math_span.tex_layout ms) 119 | | _ -> 120 | indent ppf n; Format.pp_print_string ppf "Unknown Cmarkit inline" 121 | 122 | let code_block ~indent:n label cb m ppf = 123 | let line ppf (_, m) = loc "Code line" ~indent:(n + 2) ppf m in 124 | let lines ppf = function 125 | | [] -> () | ls -> cut ppf (); Format.pp_print_list line ppf ls 126 | in 127 | let info_string ppf = function 128 | | None -> () | Some (_, m) -> 129 | cut ppf (); loc "Info string" ~indent:(n + 2) ppf m 130 | in 131 | let opening_fence ppf cb = match Block.Code_block.layout cb with 132 | | `Indented -> () | `Fenced f -> 133 | cut ppf (); 134 | loc "Opening fence" ~indent:(n + 2) ppf (snd f.opening_fence) 135 | in 136 | let closing_fence ppf cb = match Block.Code_block.layout cb with 137 | | `Indented -> () | `Fenced f -> 138 | match f.closing_fence with 139 | | None -> () | Some (_, m) -> 140 | cut ppf (); loc "Closing fence" ~indent:(n + 2) ppf m 141 | in 142 | pf ppf "@[%a%a%a%a%a@]" 143 | (loc label ~indent:n) m 144 | opening_fence cb 145 | info_string (Block.Code_block.info_string cb) 146 | lines (Block.Code_block.code cb) 147 | closing_fence cb 148 | 149 | let rec blocks ~indent ppf = function 150 | | [] -> () | bs -> cut ppf (); Format.pp_print_list (block ~indent) ppf bs 151 | 152 | and block ~indent:n ppf = function 153 | | Block.Blank_line (_, m) -> 154 | loc "Blank line" ~indent:n ppf m 155 | | Block.Block_quote (bq, m) -> 156 | let b = Block.Block_quote.block bq in 157 | pf ppf "@[%a@,%a@]" 158 | (loc "Block quote" ~indent:n) m (block ~indent:(n + 2)) b 159 | | Block.Blocks (bs, m) -> 160 | pf ppf "@[%a%a@]" 161 | (loc "Blocks" ~indent:n) m (blocks ~indent:(n + 2)) bs 162 | | Block.Code_block (cb, m) -> 163 | code_block ~indent:n "Code block" cb m ppf 164 | | Block.Heading (h, m) -> 165 | let level = Block.Heading.level h in 166 | let heading = "Heading, level " ^ Int.to_string level in 167 | let setext_underline ppf h = match Block.Heading.layout h with 168 | | `Atx _ -> () | `Setext st -> 169 | cut ppf (); 170 | loc "Setext underline" ~indent:(n + 2) ppf (snd st.underline_count) 171 | in 172 | let i = Block.Heading.inline h in 173 | pf ppf "@[%a@,%a%a@]" 174 | (loc heading ~indent:n) m (inline ~indent:(n + 2)) i 175 | setext_underline h 176 | | Block.Html_block (lines, m) -> 177 | pf ppf "@[%a@,%a@]" 178 | (loc "HTML block" ~indent:n) m 179 | (Format.pp_print_list (block_line "HTML line" ~indent:(n + 2))) lines 180 | | Block.Link_reference_definition ((ld : Link_definition.t), m) -> 181 | pf ppf "@[%a%a@]" 182 | (loc "Link reference definition" ~indent:n) m 183 | (link_definition ~indent:(n + 2)) ld 184 | | Block.List (l, m) -> 185 | let task_marker ppf i = match Block.List_item.ext_task_marker i with 186 | | None -> () 187 | | Some (_, m) -> 188 | cut ppf (); (loc ~indent:(n + 4) "Task marker") ppf m 189 | in 190 | let list_item ppf (i, m) = 191 | pf ppf "@[%a@,%a%a@,%a@]" 192 | (loc ~indent:(n + 2) "List item") m 193 | (loc ~indent:(n + 4) "List marker") (snd (Block.List_item.marker i)) 194 | task_marker i 195 | (block ~indent:(n + 4)) (Block.List_item.block i) 196 | in 197 | let list = strf "List (tight:%b)" (Block.List'.tight l) in 198 | let items = Block.List'.items l in 199 | pf ppf "@[%a@,%a@]" 200 | (loc list ~indent:n) m (Format.pp_print_list list_item) items 201 | | Block.Paragraph (p, m) -> 202 | pf ppf "@[%a@,%a@]" 203 | (loc "Paragraph" ~indent:n) m 204 | (inline ~indent:(n + 2)) (Block.Paragraph.inline p) 205 | | Block.Thematic_break (_, m) -> 206 | loc "Thematic break" ~indent:n ppf m 207 | | Block.Ext_math_block (cb, m) -> 208 | code_block ~indent:n "Math block" cb m ppf 209 | | Block.Ext_table (t, m) -> 210 | let col ~indent:n ppf (i, _) = inline ~indent:n ppf i in 211 | let row ~indent:n ppf = function 212 | | (`Header is, m), _ -> 213 | pf ppf "@[%a@,%a@]" 214 | (loc "Header row" ~indent:n) m 215 | (Format.pp_print_list (col ~indent:(n + 2))) is 216 | | (`Data is, m), _ -> 217 | pf ppf "@[%a@,%a@]" 218 | (loc "Data row" ~indent:n) m 219 | (Format.pp_print_list (col ~indent:(n + 2))) is 220 | | (`Sep seps, m), _ -> 221 | pf ppf "@[%a@,%a@]" 222 | (loc "Separator line" ~indent:n) m 223 | (Format.pp_print_list (loc "Separator" ~indent:(n + 2))) 224 | (List.map snd seps) 225 | in 226 | pf ppf "@[%a@,%a@]" 227 | (loc ~indent:n "Table") m 228 | (Format.pp_print_list (row ~indent:(n + 2))) (Block.Table.rows t) 229 | | Block.Ext_footnote_definition (fn, m) -> 230 | let b = Block.Footnote.block fn in 231 | let l = Block.Footnote.label fn in 232 | pf ppf "@[%a@,%a@,%a@]" 233 | (loc "Footnote definition" ~indent:n) m 234 | (label ~indent:(n + 2)) l (block ~indent:(n + 2)) b 235 | | _ -> 236 | indent ppf n; Format.pp_print_string ppf "Unknown Cmarkit block" 237 | 238 | let doc_locs ppf doc = block ~indent:0 ppf (Doc.block doc) 239 | 240 | let locs ~files ~strict ~no_layout = 241 | let locs ~file contents = 242 | let locs = true and layout = not no_layout in 243 | let doc = Cmarkit.Doc.of_string ~file ~locs ~layout ~strict contents in 244 | doc_locs Format.std_formatter doc 245 | in 246 | Cmarkit_cli.process_files locs files 247 | 248 | (* Command line interface *) 249 | 250 | open Cmdliner 251 | open Cmdliner.Term.Syntax 252 | 253 | let cmd = 254 | let doc = "Show CommonMark parse locations" in 255 | let exits = Cmarkit_cli.Exit.exits in 256 | let man = [ 257 | `S Manpage.s_description; 258 | `P "$(tname) outputs CommonMark parse locations."; 259 | `Blocks Cmarkit_cli.common_man; ] 260 | in 261 | Cmd.make (Cmd.info "locs" ~doc ~exits ~man) @@ 262 | let+ files = Cmarkit_cli.files and+ strict = Cmarkit_cli.strict 263 | and+ no_layout = Cmarkit_cli.no_layout in 264 | locs ~files ~strict ~no_layout 265 | -------------------------------------------------------------------------------- /src/cmarkit_commonmark.mli: -------------------------------------------------------------------------------- 1 | (*--------------------------------------------------------------------------- 2 | Copyright (c) 2023 The cmarkit programmers. All rights reserved. 3 | SPDX-License-Identifier: ISC 4 | ---------------------------------------------------------------------------*) 5 | 6 | (** Rendering CommonMark to CommonMark. 7 | 8 | Generates CommonMark. If your document was parsed with 9 | [layout:true], it preserves most of the source layout on output. 10 | This won't be perfect, make sure you understand the 11 | {{!layout}details} before reporting issues. 12 | 13 | See {{!page-index.quick}an example}. 14 | 15 | {b Warning.} Rendering outputs are unstable. They may be tweaked 16 | even between minor versions of the library. *) 17 | 18 | (** {1:rendering Rendering} *) 19 | 20 | val of_doc : Cmarkit.Doc.t -> string 21 | (** [of_doc d] is a CommonMark document for [d]. See {!val-renderer} for 22 | more details. *) 23 | 24 | (** {1:renderer Renderer} *) 25 | 26 | val renderer : unit -> Cmarkit_renderer.t 27 | (** [renderer ()] is the default CommonMark renderer. This renders 28 | the strict CommonMark abstract syntax tree and the supported 29 | Cmarkit {{!Cmarkit.extensions}extensions}. 30 | 31 | The inline, block and document renderers always return 32 | [true]. Unknown block and inline values are rendered by an HTML 33 | comment (as permitted by the CommonMark specification). 34 | 35 | See {{!Cmarkit_renderer.example}this example} to extend or 36 | selectively override the renderer. *) 37 | 38 | (** {1:render Render functions} 39 | 40 | Only useful if you extend the renderer. *) 41 | 42 | (** {2:indents Newlines and indentation} *) 43 | 44 | val newline : Cmarkit_renderer.context -> unit 45 | (** [newline c] starts a new line, except on the first call on [c] which is 46 | a nop. *) 47 | 48 | type indent = 49 | [ `I of int (** Identation by given amount. *) 50 | | `L of int * string * int * Uchar.t option 51 | (** Indent before, list marker, indent after, list item task extension *) 52 | | `Q of int (** Identation followed by a block quote marker and a space *) 53 | | `Fn of int * Cmarkit.Label.t (** Indent before, label (footnote extension)*)] 54 | (** The type for specifying block indentation. *) 55 | 56 | val push_indent : Cmarkit_renderer.context -> indent -> unit 57 | (** [push_indent c i] pushes [i] on the current indentation of [c]. This 58 | does not render anything. *) 59 | 60 | val pop_indent : Cmarkit_renderer.context -> unit 61 | (** [pop_indent c] pops the last indentation pushed on [c]. This 62 | does not render anything. *) 63 | 64 | val indent : Cmarkit_renderer.context -> unit 65 | (** [indent i c] outputs current indentation on [c]. Note that [`L] 66 | and [`Fn] get replaced by an [`I] indent on subsequent lines, that 67 | is the list or foonote marker is output only once. *) 68 | 69 | (** {2:bslash Backslash escaping} *) 70 | 71 | module Char_set : Set.S with type elt = char 72 | (** Sets of US-ASCII characters. *) 73 | 74 | val escaped_string : 75 | ?esc_ctrl:bool -> Cmarkit_renderer.context -> Char_set.t -> string -> unit 76 | (** [escaped_string ?esc_ctrl c cs s] renders [s] on [c] with 77 | characters in [cs] backslash escaped. If [esc_ctrl] is [true] 78 | (default) {{:https://spec.commonmark.org/0.31.2/#ascii-control-character} 79 | ASCII control characters} are escaped to decimal escapes. *) 80 | 81 | val buffer_add_escaped_string : 82 | ?esc_ctrl:bool -> Buffer.t -> Char_set.t -> string -> unit 83 | (** [buffer_add_escaped_string b cs s] is {!escaped_string} but 84 | appends to a buffer value. *) 85 | 86 | val escaped_text : Cmarkit_renderer.context -> string -> unit 87 | (** [escaped_text c s] renders [s] on [c] trying to be smart about escaping 88 | Commonmark structural symbols for {!Cmarkit.Inline.extension-Text} inlines. 89 | We assume text can be anywhere in a sequence of inlines and in particular 90 | that it can start a line. This function also takes into account 91 | the existence of the {{!Cmarkit.extensions}extensions}. 92 | 93 | As such we escape: 94 | 95 | {ul 96 | {- These block markers: [-] [+] [_] [=] only if present at [s.[0]].} 97 | {- Only if at the end of the string or if followed by a space or a tab: [#]} 98 | {- Only the first of a run longer than 1: [~] 99 | (avoid creating a {{!Cmarkit.ext_strikethrough}strikethrough extension} 100 | or a code fence).} 101 | {- [&] if followed by an US-ASCII letter or [#].} 102 | {- [!] or [~] if it is the last character of [s] (could respectively 103 | create an unwanted image link if followed by a link inline or 104 | a code fence if followed by a {{!Cmarkit.ext_strikethrough}strikethrough} 105 | inline).} 106 | {- [.] or [)] only if preceeded by at most 9 digits to the start of text 107 | and followed by a space, tab or the end of string.} 108 | {- Everywhere, [`] [*] [_] [\ ] [<] [>] [\[] [\]], 109 | {{:https://spec.commonmark.org/0.31.2/#ascii-control-character} 110 | ASCII control characters}, [$] ({{!Cmarkit.ext_math_inline}inline math 111 | extension}), [|] ({{!Cmarkit.ext_tables}table extension}) }} *) 112 | 113 | val buffer_add_escaped_text : Buffer.t -> string -> unit 114 | (** [buffer_add_escaped_text b s] is {!escaped_text} but appends to 115 | a buffer value. *) 116 | 117 | (** {1:layout Source layout preservation} 118 | 119 | The abstract syntax tree has a few block cases and data fields to 120 | represent the source document layout. This allows to update 121 | CommonMark documents without normalizing them too much when they 122 | are {{!Cmarkit.Doc.of_string}parsed} with [layout:true]. 123 | 124 | To keep things reasonably simple a few things are {b not} attempted like: 125 | 126 | {ol 127 | {- Preserving entities and character references.} 128 | {- Preserving the exact line by line indentation layout of container 129 | blocks.} 130 | {- Preserving lazy continuation lines.} 131 | {- Keeping track of used newlines except for the first one.} 132 | {- Preserving layout source location information when it can be 133 | reconstructed from the document data source location.}} 134 | 135 | In general we try to keep the following desirable properties 136 | for the abstract syntax tree definition: 137 | 138 | {ol 139 | {- Layout information should not interfere with document data or 140 | be affected by it. Otherwise data updates also needs to update 141 | the layout data, which is error prone and unconvenient.} 142 | {- Abstract syntax trees resulting from the source document, from 143 | renders of the source document parsed with or without 144 | [layout:tree] should all render to the same HTML.}} 145 | 146 | In practice CommonMark being not context free point 1. is not 147 | always achieved. In particular in {!Cmarkit.Inline.extension-Code_span} the 148 | number of delimiting backticks depends on the code content 149 | ({!Cmarkit.Inline.Code_span.of_string}, computes that for you). 150 | 151 | The renderer performs almost no checks on the layout data. You 152 | should be careful if you fill these yourself since you could 153 | generate CommonMark that will be misinterpreted. Layout 154 | data of pristine nodes coming out of {!Cmarkit.Doc.of_string}, created 155 | with the {!Cmarkit.Inline} and {!Cmarkit.Block} constructors 156 | should not need your attention (respect their input constraints 157 | though). *) 158 | 159 | (** {2:rendering_class Classifying renderings} 160 | 161 | We say that a CommonMark render: 162 | {ul 163 | {- is {e correct}, if the result renders the same HTML 164 | as the source document. This can be checked with the 165 | [cmarkit] tool included in the distribution: 166 | {[ 167 | cmarkit commonmark --html-diff mydoc.md 168 | ]} 169 | If a difference shows up, the rendering is said to be {e incorrect}.} 170 | {- {e round trips}, if the result is byte-for-byte equal to the 171 | source document. This can be checked with the [cmarkit] tool 172 | included in the distribution: 173 | {[ 174 | cmarkit commonmark --diff mydoc.md 175 | ]} 176 | If a difference shows up, the rendering does not round trip but 177 | it may still be correct.}} *) 178 | 179 | (** {2:known_diffs Known correct differences} 180 | 181 | In general lack of round trip is due to: 182 | 183 | {ul 184 | {- Loss of layout on input (see above).} 185 | {- Eager escaping of CommonMark delimiters (the escape strategy 186 | is {{!escaped_text}here}).} 187 | {- Churn around blank lines which can be part of blocks without 188 | adhering to their structural convention.}} 189 | 190 | Please do not report issues for differences that are due to the 191 | following: 192 | 193 | {ol 194 | {- Source US-ASCII control characters in textual data render as decimal 195 | character references in the output.} 196 | {- Source entity and character references are lost during parsing and 197 | thus replaced by their definition in the output.} 198 | {- Source tab stop advances may be replaced by spaces in the output.} 199 | {- Source escaped characters may end up unescaped in the output.} 200 | {- Source unescaped characters may end up escaped in the output.} 201 | {- Source lazy continuation lines are made part of blocks in the output.} 202 | {- Source indented blank lines following indented code blocks 203 | lose four spaces of indentation (as per specification these are not 204 | part of the block).} 205 | {- Source unindented blank lines in indented code blocks are indented 206 | in the output.} 207 | {- Source fenced code block indentation is retained from the opening 208 | fence and used for the following lines in the output.} 209 | {- Source block quote indentation is retained from the first line 210 | and used for the following lines in the output. The optional space 211 | following the quotation mark ['>'] is made mandatory. } 212 | {- Source list item indentation is regularized, in particular blank lines 213 | will indent.} 214 | {- Source list item that start with an empty line get a space after 215 | their marker.} 216 | {- The newline used in the output is the one found in the rendered 217 | {!Cmarkit.Doc.t} value.}} 218 | 219 | {e Simple} and {e implemented} round trip improvements to the 220 | renderer are welcome. 221 | 222 | {2:known_incorrect Known incorrect renderings} 223 | 224 | Please do not report issues incorrect renderings that are due to the 225 | following (and unlikely to be fixed): 226 | 227 | {ol 228 | {- Use of entities and character references around structural 229 | CommonMark symbols can make things go wrong. These get resolved 230 | after inline parsing because they can't be used to stand for 231 | structural CommonMark symbols, however once they have been resolved they 232 | can interact with parsing. Here is an example: 233 | {[ 234 | *emph * 235 | ]} 236 | It parses as emphasis. But if we render it to CommonMark 237 | non-breaking space renders as is and we get: 238 | {[ 239 | *emph * 240 | ]} 241 | which no longer parses as emphasis. 242 | 243 | Note in this particular case it is possible to do something 244 | about it by being smarter about the context when escaping. However 245 | there's a trade-off between renderer complexity and the (conjectured) 246 | paucity of these cases.} 247 | } 248 | 249 | Otherwise, if you spot an incorrect rendering please report a minimal 250 | reproduction case. 251 | 252 | {e Simple} and {e implemented} round trip improvements to the 253 | renderer are welcome. 254 | *) 255 | --------------------------------------------------------------------------------