├── .gitattributes ├── .github └── workflows │ ├── deploy-doc.yml │ └── workflow.yml ├── .gitignore ├── .ocamlformat ├── .ocp-indent ├── CHANGES.md ├── LICENSE ├── Makefile ├── README.md ├── bin ├── dune └── main.ml ├── dune-project ├── omd.opam ├── src ├── ast.ml ├── ast_block.ml ├── ast_constructors.ml ├── ast_inline.ml ├── block_parser.ml ├── block_parser.mli ├── dune ├── html.ml ├── html.mli ├── omd.ml ├── omd.mli ├── parser.ml ├── sexp.ml ├── strSlice.ml ├── strSlice.mli └── toc.ml ├── tests ├── John_MacFarlane_said_peg-markdown_takes_forever_to_process_this--jgm_peg-markdown_issues_28.md ├── attributes.md ├── blackbox │ ├── dune │ ├── emphasis.t │ ├── heading-id.t │ ├── normalize-label.t │ └── regression-224.t ├── def_list.md ├── dune ├── dune.inc ├── expect_tests.ml ├── extra_table_tests.md ├── extract_tests.ml ├── gfm_table_spec.md ├── omd.ml └── spec.txt └── tools ├── dune ├── entities.json └── gen_entities.ml /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ml* text eol=lf 2 | *.md text eol=lf 3 | *.html text eol=lf 4 | dune.inc linguist-generated=true 5 | -------------------------------------------------------------------------------- /.github/workflows/deploy-doc.yml: -------------------------------------------------------------------------------- 1 | name: Deploy odoc 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | deploy-doc: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v3 14 | 15 | - name: Use OCaml 4.13.x 16 | uses: ocaml/setup-ocaml@v2 17 | with: 18 | ocaml-compiler: 4.13.x 19 | dune-cache: true 20 | 21 | - name: Deploy odoc to GitHub Pages 22 | uses: ocaml/setup-ocaml/deploy-doc@v2 23 | -------------------------------------------------------------------------------- /.github/workflows/workflow.yml: -------------------------------------------------------------------------------- 1 | name: Main workflow 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | 9 | jobs: 10 | build: 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | os: 15 | - macos-latest 16 | - ubuntu-latest 17 | - windows-latest 18 | ocaml-compiler: 19 | # Decision on version matrix informed by https://discuss.ocaml.org/t/which-ocaml-compiler-versions-should-we-run-against-in-ci/7933/2 20 | - 4.13.x 21 | - 4.14.x 22 | include: 23 | - ocaml-compiler: 5.0.x 24 | os: 25 | - ubuntu-latest 26 | - ocaml-compiler: 5.0.x 27 | os: 28 | - macos-latest 29 | 30 | runs-on: ${{ matrix.os }} 31 | steps: 32 | - name: Checkout code 33 | uses: actions/checkout@v3 34 | 35 | - name: Use OCaml ${{ matrix.ocaml-compiler }} 36 | uses: ocaml/setup-ocaml@v2 37 | with: 38 | ocaml-compiler: ${{ matrix.ocaml-compiler }} 39 | 40 | - name: Install deps 41 | run: opam install . --deps-only --with-doc --with-test 42 | 43 | - name: Build 44 | run: opam exec -- make build 45 | 46 | - name: Test 47 | run: opam exec -- make test 48 | 49 | lint-doc: 50 | runs-on: ubuntu-latest 51 | steps: 52 | - name: Checkout code 53 | uses: actions/checkout@v3 54 | 55 | - name: Use OCaml 4.14.x 56 | uses: ocaml/setup-ocaml@v2 57 | with: 58 | ocaml-compiler: 4.14.x 59 | dune-cache: true 60 | 61 | - name: Lint doc 62 | uses: ocaml/setup-ocaml/lint-doc@v2 63 | 64 | lint-fmt: 65 | runs-on: ubuntu-latest 66 | steps: 67 | - name: Checkout code 68 | uses: actions/checkout@v3 69 | 70 | - name: Use OCaml 4.14.x 71 | uses: ocaml/setup-ocaml@v2 72 | with: 73 | ocaml-compiler: 4.14.x 74 | dune-cache: true 75 | 76 | - name: Lint fmt 77 | uses: ocaml/setup-ocaml/lint-fmt@v2 78 | 79 | lint-opam: 80 | runs-on: ubuntu-latest 81 | steps: 82 | - name: Checkout code 83 | uses: actions/checkout@v3 84 | 85 | - name: Use OCaml 4.14.x 86 | uses: ocaml/setup-ocaml@v2 87 | with: 88 | ocaml-compiler: 4.14.x 89 | dune-cache: true 90 | 91 | - name: Lint opam 92 | uses: ocaml/setup-ocaml/lint-opam@v2 93 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | .merlin 3 | *.install 4 | _opam 5 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | version = 0.21.0 2 | exp-grouping = preserve 3 | break-fun-sig = fit-or-vertical 4 | break-fun-decl = fit-or-vertical 5 | wrap-fun-args = false 6 | dock-collection-brackets = false 7 | break-separators = before 8 | break-infix = fit-or-vertical 9 | type-decl = sparse 10 | -------------------------------------------------------------------------------- /.ocp-indent: -------------------------------------------------------------------------------- 1 | match_clause=4 2 | strict_with=auto 3 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | 2.0.0~alpha4 2 | ------------ 3 | 4 | - Add an `empty` inline value (#298 @cuihtlauac) 5 | 6 | - Remove stdcompat dependency (#304 #305 @hannesm, review by @samoht) 7 | 8 | - Minimum supported OCaml version is now 4.13 (#304 @hannesm) 9 | 10 | 2.0.0~alpha3 11 | ------------ 12 | 13 | - Expose the HTML escape function `htmlentities` (#295 @cuihtlauac) 14 | 15 | - Support generation of identifiers in headers (#294, @tatchi) 16 | 17 | - Support GitHub-Flavoured Markdown tables (#292, @bobatkey) 18 | 19 | - Update parser to support CommonMark Spec 0.30 (#266, @SquidDev) 20 | 21 | - Preserve the order of input files in the HTML output to stdout (#258, 22 | @patricoferris) 23 | 24 | - Fix all deviations from CommonMark Spec 0.30 (#284, #283, #278, #277, #269, 25 | @tatchi) 26 | 27 | 2.0.0~alpha2 28 | ------------ 29 | 30 | - Clean up AST representation (#229, @sonologico) 31 | 32 | - Restore table of contents generation functionality (#240, @sonologico) 33 | 34 | - Fix parsing of <...>-style links and inline code parsing (#233, @SquidDev) 35 | 36 | - Fix handing of the nonbreaking whitespace character, ASCII code 160 (#247, 37 | @shonfeder) 38 | 39 | - Downgrade dune requirements to 2.0 (@nobj) 40 | 41 | 42 | 2.0.0~alpha1 43 | ------------ 44 | 45 | - Lower OCaml requirement to 4.04.2 (#213, @jfrolich) 46 | 47 | - Big refactoring by @nojb. Changes in interface (simplification of code, might 48 | affect performance a little). To be tested! 49 | 50 | 1.3.x 51 | ----- 52 | 53 | - might stop checking validity of HTML tag *names* and accept any XML-parsable 54 | tag name. 55 | 56 | 1.2.5 57 | ----- 58 | 59 | - only fixes a single bug (an ordered list could be transformed into an 60 | unordered list) 61 | 62 | 1.2.4 63 | ----- 64 | 65 | - only fixes a single bug (some spaces were wrongly handled in the HTML parsing) 66 | 67 | 1.2.2/3 68 | ------- 69 | 70 | - fix a few issues with HTML parsing. 71 | 72 | 1.2.1 73 | ----- 74 | 75 | - mainly fixes issues with HTML parsing. 76 | 77 | 1.2.0 78 | ----- 79 | 80 | - introduces options `-w` and `-W`. Fixes mostly concern subtle uses of `\n`s in 81 | HTML and Markdown outputs. 82 | 83 | 1.1.2 84 | ----- 85 | 86 | - fix: some URL-related parsing issues. 87 | 88 | 1.1.0/1.1.1 89 | ----------- 90 | 91 | - fix: some HTML-related issues. 92 | 93 | 1.0.1 94 | ----- 95 | 96 | - fixes some parsing issues, improves output. (2014-10-02) 97 | 98 | 1.0.0 99 | ----- 100 | 101 | - warning: this release is only partially compatible with previous versions. 102 | 103 | - accept HTML blocks which directly follow each other 104 | 105 | - fix: accept all XML-compatible attribute names for HTML 106 | attributes 107 | 108 | - fix backslash-escaping for hash-ending ATX-titles + fix Markdown output for 109 | Html_block 110 | 111 | - fix (HTML parsing) bugs introduced in 1.0.0.b and 1.0.0.c 112 | 113 | - rewrite parser of block HTML to use the updated Omd.t 114 | 115 | - rewrite parser of inline HTML to use the updated Omd.t 116 | 117 | - upgrade Omd.t for HTML representation 118 | 119 | There will not be any newer 0.9.x release although new bugs have been 120 | discovered. Thus it's recommended to upgrade to the latest 1.x.y. 121 | 122 | 0.9.7 123 | ----- 124 | 125 | - introduction of media:end + bug fixes. 126 | 127 | If you need to have a version that still has `Tag of extension` instead of `Tag 128 | of name * extension` and don't want to upgrade, you may use 0.9.3 129 | 130 | 0.9.6 131 | ----- 132 | 133 | - fix a bug (concerning extensions) introduced by 0.9.4. 134 | 135 | 0.9.5 136 | ----- 137 | 138 | - bug fix + `Tag of extension` changed to `Tag of name * extension` 139 | 140 | 0.9.4 141 | ----- 142 | 143 | - fixes a bug for the new feature 144 | 145 | 0.9.3 146 | ----- 147 | 148 | - new feature `media:type="text/omd"`. This version is recommended if you do 149 | not use that new feature and want to use 0.9.x 150 | 151 | 0.9.2 152 | ----- 153 | 154 | - not released... 155 | 156 | older versions 157 | -------------- 158 | 159 | - cf. [commit log](https://github.com/ocaml/omd/commits/master) 160 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2013-2018, Philippe Wang 4 | Copyright (c) 2018, Nicolás Ojeda Bär 5 | 6 | Permission to use, copy, modify, and/or distribute this software for any 7 | purpose with or without fee is hereby granted, provided that the above 8 | copyright notice and this permission notice appear in all copies. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ## 2 | # Omd 3 | # 4 | # @file 5 | 6 | .PHONY: test build fmt deps 7 | 8 | build: deps 9 | dune build 10 | 11 | deps: 12 | opam install . --deps-only --yes 13 | 14 | test: 15 | dune build @gen --auto-promote 16 | dune runtest 17 | 18 | fmt: 19 | dune build @fmt --auto-promote 20 | # end 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | `omd`: Markdown library and tool in OCaml 2 | ========================================= 3 | 4 | Omd is an OCaml library designed to parse, manipulate, and print Markdown into 5 | different formats. In addition to the library, a command-line tool `omd` is 6 | included to easily convert markdown into HTML. 7 | 8 | Omd aims for compliance with the [CommonMark](https://commonmark.org/) standard. 9 | We are currently compliant with [0.30 of the ComonMark 10 | spec](https://spec.commonmark.org/0.30/). 11 | 12 | Omd is developed on GitHub. If you need to report an issue, please do so at 13 | https://github.com/ocaml/omd/issues. 14 | 15 | Installation 16 | ------------ 17 | 18 | The recommended way to install `omd` is via the [opam package manager][opam]. 19 | 20 | You can install versions published to opam with: 21 | 22 | ```sh 23 | $ opam install omd 24 | ``` 25 | 26 | You can install the current development version from the GitHub repository with 27 | 28 | ```sh 29 | $ opam pin git@github.com:ocaml/omd.git 30 | ``` 31 | 32 | Documentation 33 | ------------- 34 | 35 | - View API docs online: https://ocaml.github.io/omd 36 | - View the API docs locally: `odig doc omd` 37 | 38 | Building from source 39 | -------------------- 40 | 41 | You can also build it manually from source with: 42 | 43 | ```sh 44 | $ git clone https://github.com/ocaml/omd.git 45 | $ cd omd 46 | $ make build 47 | ``` 48 | 49 | You can run the test suite with 50 | 51 | ```sh 52 | $ make test 53 | ``` 54 | 55 | Dependencies 56 | ------------ 57 | 58 | The minimum version of OCaml required is 4.13. 59 | 60 | Dependencies are tracked in the [dune-project](./dune-project) and can be 61 | installed by running: 62 | 63 | ```sh 64 | $ opam install . --deps-only 65 | # or 66 | $ make deps 67 | ``` 68 | 69 | History 70 | ------- 71 | 72 | Omd 1 was developed by [Philippe Wang](https://github.com/pw374/) at [OCaml 73 | Labs](http://ocaml.io/) in [Cambridge](http://www.cl.cam.ac.uk). 74 | 75 | Its development was motivated by at least these facts: 76 | 77 | - We wanted an OCaml implementation of Markdown; some OCaml parsers of Markdown 78 | existed before but they were incomplete. It's easier for an OCaml project to 79 | depend on an pure-OCaml implementation of Markdown than to depend some 80 | interface to a library implemented using another language, and this is ever 81 | more important since [Opam](https://opam.ocaml.org) exists. 82 | 83 | - We wanted to provide a way to make the contents of the 84 | [OCaml.org](http://ocaml.org/) website be essentially in Markdown instead of 85 | HTML. And we wanted to this website to be implemented in OCaml. 86 | 87 | - Having an OCaml implementation of Markdown is virtually mandatory for those 88 | who want to use a Markdown parser in a [Mirage](http://www.openmirage.org) 89 | application. Note that OMD has replaced the previous Markdown parser of 90 | [COW](https://github.com/mirage/ocaml-cow), which has been developed as part 91 | of the Mirage project. 92 | 93 | Omd 2 started development in 2020, beginning [Nicolás Ojeda 94 | Bär](https://github.com/nojb)'s redesign and rewrite, and is currently ongoing. 95 | Omd 2 has yet to reach feature parity with Omd 1. 96 | 97 | Thanks 98 | ------ 99 | 100 | Special thanks for feedback and contributions to this project goes out to: 101 | 102 | - [Christophe Troestler](https://github.com/Chris00) 103 | - [Ashish Argawal](https://github.com/agarwal) 104 | - [Sebastien Mondet](https://github.com/smondet) 105 | - [Thomas Gazagnaire](https://github.com/samoht) 106 | - [Daniel Bünzli](https://github.com/dbuenzli) 107 | - [Amir Chaudry](https://github.com/amirmc) 108 | - [Anil Madhavapeddy](https://github.com/avsm/) 109 | - [David Sheets](https://github.com/dsheets/) 110 | - [Jeremy Yallop](https://github.com/yallop/) 111 | - [Nicolás Ojeda Bär](https://github.com/nojb) 112 | - [Raphael Sousa Santos](https://sonologi.co/) 113 | - [Corentin Leruth](https://github.com/tatchi) 114 | - [Bob Atkey](https://bentnib.org/) 115 | - *please insert your name here if you believe you've been forgotten* 116 | -------------------------------------------------------------------------------- /bin/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name main) 3 | (public_name omd) 4 | (libraries omd dune-build-info)) 5 | -------------------------------------------------------------------------------- /bin/main.ml: -------------------------------------------------------------------------------- 1 | let with_open_in fn f = 2 | let ic = open_in fn in 3 | match f ic with 4 | | r -> 5 | close_in ic; 6 | r 7 | | exception e -> 8 | close_in_noerr ic; 9 | raise e 10 | 11 | let with_open_out fn f = 12 | let oc = open_out fn in 13 | match f oc with 14 | | r -> 15 | close_out oc; 16 | r 17 | | exception e -> 18 | close_out_noerr oc; 19 | raise e 20 | 21 | let process ?auto_identifiers ic oc = 22 | let md = Omd.of_channel ic in 23 | output_string oc (Omd.to_html ?auto_identifiers md) 24 | 25 | let print_version () = 26 | let version = 27 | match Build_info.V1.version () with 28 | | None -> "n/a" 29 | | Some v -> Build_info.V1.Version.to_string v 30 | in 31 | print_endline version; 32 | exit 0 33 | 34 | let input = ref [] 35 | let output = ref "" 36 | let auto_identifiers = ref None 37 | 38 | let spec = 39 | [ ( "-o" 40 | , Arg.Set_string output 41 | , " file.html Specify the output file (default is stdout)." ) 42 | ; ( "--auto-identifiers" 43 | , Arg.Bool (fun x -> auto_identifiers := Some x) 44 | , " Should identifiers be automatically assigned to headings." ) 45 | ; ( "--version" 46 | , Arg.Unit print_version 47 | , " Display the version of the currently installed omd." ) 48 | ; ( "--" 49 | , Rest (fun s -> input := s :: !input) 50 | , " Consider all remaining arguments as input file names." ) 51 | ] 52 | 53 | let main () = 54 | Arg.parse 55 | (Arg.align spec) 56 | (fun s -> input := s :: !input) 57 | "omd [options] [inputfile1 .. inputfileN] [options]"; 58 | let with_output f = 59 | if !output = "" then f stdout else with_open_out !output f 60 | in 61 | let auto_identifiers = !auto_identifiers in 62 | with_output @@ fun oc -> 63 | if !input = [] then process ?auto_identifiers stdin oc 64 | else 65 | let f filename = 66 | with_open_in filename @@ fun ic -> process ?auto_identifiers ic oc 67 | in 68 | List.(iter f (rev !input)) 69 | 70 | let () = 71 | try main () with 72 | | Sys_error msg -> 73 | Printf.eprintf "Error: %s\n" msg; 74 | exit 1 75 | | exn -> 76 | Printf.eprintf "Error: %s\n" (Printexc.to_string exn); 77 | Printexc.print_backtrace stderr; 78 | exit 1 79 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.7) 2 | (name omd) 3 | (version 2.0.0~alpha1) 4 | 5 | (cram enable) 6 | 7 | (generate_opam_files) 8 | 9 | (license ISC) 10 | (authors "Philippe Wang " 11 | "Nicolás Ojeda Bär ") 12 | (maintainers "Shon Feder " 13 | "Raphael Sousa Santos <@sonologico>") 14 | (source (github ocaml/omd)) 15 | 16 | (package 17 | (name omd) 18 | (synopsis "A Markdown frontend in pure OCaml") 19 | (description 20 | "This Markdown library is implemented using only pure OCaml (including 21 | I/O operations provided by the standard OCaml compiler distribution). 22 | OMD is meant to be as faithful as possible to the original Markdown. 23 | Additionally, OMD implements a few Github markdown features, an 24 | extension mechanism, and some other features. Note that the opam 25 | package installs both the OMD library and the command line tool `omd`.") 26 | (tags (org:ocamllabs org:mirage)) 27 | (depends 28 | (ocaml (>= 4.13)) 29 | uutf 30 | uucp 31 | uunf 32 | (dune-build-info (>= 2.7)) 33 | (ppx_expect :with-test))) 34 | -------------------------------------------------------------------------------- /omd.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "2.0.0~alpha1" 4 | synopsis: "A Markdown frontend in pure OCaml" 5 | description: """ 6 | This Markdown library is implemented using only pure OCaml (including 7 | I/O operations provided by the standard OCaml compiler distribution). 8 | OMD is meant to be as faithful as possible to the original Markdown. 9 | Additionally, OMD implements a few Github markdown features, an 10 | extension mechanism, and some other features. Note that the opam 11 | package installs both the OMD library and the command line tool `omd`.""" 12 | maintainer: [ 13 | "Shon Feder " "Raphael Sousa Santos <@sonologico>" 14 | ] 15 | authors: [ 16 | "Philippe Wang " 17 | "Nicolás Ojeda Bär " 18 | ] 19 | license: "ISC" 20 | tags: ["org:ocamllabs" "org:mirage"] 21 | homepage: "https://github.com/ocaml/omd" 22 | bug-reports: "https://github.com/ocaml/omd/issues" 23 | depends: [ 24 | "dune" {>= "2.7"} 25 | "ocaml" {>= "4.13"} 26 | "uutf" 27 | "uucp" 28 | "uunf" 29 | "dune-build-info" {>= "2.7"} 30 | "ppx_expect" {with-test} 31 | "odoc" {with-doc} 32 | ] 33 | build: [ 34 | ["dune" "subst"] {dev} 35 | [ 36 | "dune" 37 | "build" 38 | "-p" 39 | name 40 | "-j" 41 | jobs 42 | "@install" 43 | "@runtest" {with-test} 44 | "@doc" {with-doc} 45 | ] 46 | ] 47 | dev-repo: "git+https://github.com/ocaml/omd.git" 48 | -------------------------------------------------------------------------------- /src/ast.ml: -------------------------------------------------------------------------------- 1 | module Impl = struct 2 | include Ast_inline 3 | include Ast_block.List_types 4 | include Ast_block.Table_alignments 5 | include Ast_block.WithInline 6 | 7 | type attributes = (string * string) list 8 | type doc = attributes block list 9 | end 10 | 11 | module type Intf = module type of Impl 12 | 13 | module Util = struct 14 | include Impl 15 | 16 | let same_block_list_kind k1 k2 = 17 | match (k1, k2) with 18 | | Ordered (_, c1), Ordered (_, c2) | Bullet c1, Bullet c2 -> c1 = c2 19 | | _ -> false 20 | end 21 | -------------------------------------------------------------------------------- /src/ast_block.ml: -------------------------------------------------------------------------------- 1 | module type BlockContent = sig 2 | type 'a t 3 | end 4 | 5 | module StringContent = struct 6 | type 'attr t = string 7 | end 8 | 9 | module InlineContent = struct 10 | type 'attr t = 'attr Ast_inline.inline 11 | end 12 | 13 | module List_types = struct 14 | type list_type = 15 | | Ordered of int * char 16 | | Bullet of char 17 | 18 | type list_spacing = 19 | | Loose 20 | | Tight 21 | end 22 | 23 | module Table_alignments = struct 24 | type cell_alignment = 25 | | Default 26 | | Left 27 | | Centre 28 | | Right 29 | end 30 | 31 | open List_types 32 | open Table_alignments 33 | 34 | module Make (C : BlockContent) = struct 35 | type 'attr def_elt = 36 | { term : 'attr C.t 37 | ; defs : 'attr C.t list 38 | } 39 | 40 | (* A value of type 'attr is present in all variants of this type. We use it to associate 41 | extra information to each node in the AST. Cn the common case, the attributes type defined 42 | above is used. We might eventually have an alternative function to parse blocks while keeping 43 | concrete information such as source location and we'll use it for that as well. *) 44 | type 'attr block = 45 | | Paragraph of 'attr * 'attr C.t 46 | | List of 'attr * list_type * list_spacing * 'attr block list list 47 | | Blockquote of 'attr * 'attr block list 48 | | Thematic_break of 'attr 49 | | Heading of 'attr * int * 'attr C.t 50 | | Code_block of 'attr * string * string 51 | | Html_block of 'attr * string 52 | | Definition_list of 'attr * 'attr def_elt list 53 | | Table of 'attr * ('attr C.t * cell_alignment) list * 'attr C.t list list 54 | (** A table is represented by a header row, which is a list of pairs of 55 | header cells and alignments, and a list of rows *) 56 | end 57 | 58 | module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct 59 | module SrcBlock = Make (Src) 60 | module DstBlock = Make (Dst) 61 | 62 | let rec map (f : 'attr Src.t -> 'attr Dst.t) : 63 | 'attr SrcBlock.block -> 'attr DstBlock.block = function 64 | | SrcBlock.Paragraph (attr, x) -> DstBlock.Paragraph (attr, f x) 65 | | List (attr, ty, sp, bl) -> 66 | List (attr, ty, sp, List.map (List.map (map f)) bl) 67 | | Blockquote (attr, xs) -> Blockquote (attr, List.map (map f) xs) 68 | | Thematic_break attr -> Thematic_break attr 69 | | Heading (attr, level, text) -> Heading (attr, level, f text) 70 | | Definition_list (attr, l) -> 71 | let f { SrcBlock.term; defs } = 72 | { DstBlock.term = f term; defs = List.map f defs } 73 | in 74 | Definition_list (attr, List.map f l) 75 | | Code_block (attr, label, code) -> Code_block (attr, label, code) 76 | | Html_block (attr, x) -> Html_block (attr, x) 77 | | Table (attr, headers, rows) -> 78 | Table 79 | ( attr 80 | , List.map (fun (header, alignment) -> (f header, alignment)) headers 81 | , List.map (List.map f) rows ) 82 | end 83 | 84 | module Mapper = MakeMapper (StringContent) (InlineContent) 85 | module Raw = Make (StringContent) 86 | module WithInline = Make (InlineContent) 87 | -------------------------------------------------------------------------------- /src/ast_constructors.ml: -------------------------------------------------------------------------------- 1 | open Ast.Impl 2 | 3 | module type Intf = sig 4 | (** Functions to help constructing the elements of a {!doc}. 5 | 6 | E.g., 7 | 8 | {[ 9 | let open Omd.Ctor in 10 | let para = 11 | p ~attrs:[ ("class", "my-para") ] [ txt "Content of"; em "this"; txt "paragraph" ] 12 | in 13 | [ blockquote [ para; hr; p [ txt "Content of second paragraph" ] ] ] 14 | ]} 15 | 16 | Produces 17 | 18 | {v 19 |
20 |

Content ofthisparagraph

21 |
22 |

Content of second paragraph

23 |
24 | v} 25 | 26 | The optional [attrs] argument always defaults to an empty list, and can 27 | generally be omitted. *) 28 | 29 | (** {3 Constructors for inline elements} *) 30 | 31 | val empty : attributes inline 32 | (** [empty] is an empty inline element. *) 33 | 34 | val txt : ?attrs:attributes -> string -> attributes inline 35 | (** [txt ~attrs s] is {{!Text} [Text (attrs, s)]}. *) 36 | 37 | val em : ?attrs:attributes -> string -> attributes inline 38 | (** [em ~attrs s] is {{!Emph} [Emph (attrs, txt s)]}. See {!txt}. *) 39 | 40 | val strong : ?attrs:attributes -> string -> attributes inline 41 | (** [strong ~attrs s] is {{!Strong} [Strong (attrs, txt s)]}. See {!txt}. *) 42 | 43 | val code : ?attrs:attributes -> string -> attributes inline 44 | (** [code ~attrs s] is {{!Code} [Code (attrs, s)]}. *) 45 | 46 | val br : attributes inline 47 | (** [br] is {{!Hard_break}[Hard_break []]}. *) 48 | 49 | val nl : attributes inline 50 | (** [nl] is {{!Soft_break}[Soft_break []]}. *) 51 | 52 | val a : 53 | ?attrs:attributes 54 | -> ?title:string 55 | -> url:string 56 | -> string 57 | -> attributes inline 58 | (** [a ~attrs ~title ~url label] is a link around the text of [label], 59 | pointing to the [url], with the optional title [title] and additional [attrs]. 60 | See {!Link}. *) 61 | 62 | val img : 63 | ?attrs:attributes 64 | -> ?title:string 65 | -> alt:string 66 | -> string 67 | -> attributes inline 68 | (** [img ~attrs ~title ~alt src] is an image from the given [src] that has the 69 | [alt] text as a fallback, with the optional title [title] and additional 70 | [attrs]. See {!Image}. *) 71 | 72 | val html : string -> attributes inline 73 | (** [html s] is an inline HTML string. See {!Html}. *) 74 | 75 | (** {3 Constructors for block-level elements} *) 76 | 77 | val p : ?attrs:attributes -> attributes inline list -> attributes block 78 | (** [p ~attrs inlines] is a pragraph block holding the given [inline] 79 | elements. See {!Paragraph}. *) 80 | 81 | val ul : 82 | ?attrs:attributes 83 | -> ?spacing:list_spacing 84 | -> attributes block list list 85 | -> attributes block 86 | (** [ul ~attrs ~spacing items] is an unordered list with the specified [spacing], listing 87 | the given [items]. Each item is a list of block elements. 88 | 89 | - [spacing] defaults to {!Loose}. 90 | 91 | E.g., 92 | 93 | {[ 94 | ul ~spacing:Tight 95 | [ [ p [ txt "Item 1" ] ] 96 | ; [ p [ txt "Item 2" ] ] 97 | ; [ p [ txt "Item 3" ] ] 98 | ] 99 | ]} 100 | 101 | See {!List} and {!Bullet}. *) 102 | 103 | val ol : 104 | ?attrs:attributes 105 | -> ?start:int 106 | -> ?char:[ `Dot | `Paren ] 107 | -> ?spacing:list_spacing 108 | -> attributes block list list 109 | -> attributes block 110 | (** [ol ~attrs ~start ~char ~spacing items] is like {!ul}, but constructs an ordered list, 111 | where [start] is the number to start enumerating from, and [char] indicates the 112 | character following the number in the enumeration. 113 | 114 | - [char] can be either [`Dot] indicating ['.'] or [`Paren] indicating [')'], and 115 | defaults to [`Dot]. 116 | - [start] defaults to [1]. 117 | 118 | See {!List} and {!Ordered}. *) 119 | 120 | val blockquote : 121 | ?attrs:attributes -> attributes block list -> attributes block 122 | (** [blockquote ~attrs blocks] is a blockquote element containing the given 123 | [blocks]. See {!Blockquote}. *) 124 | 125 | val hr : attributes block 126 | (** [hr] is {{!Thematic_break} [Thematic_break []]}. *) 127 | 128 | val h : ?attrs:attributes -> int -> attributes inline list -> attributes block 129 | (** [h ~attrs level inlines] is a heading of the given [level] comprised of 130 | the [inlines]. See {!Heading}. *) 131 | 132 | val code_bl : ?attrs:attributes -> ?lang:string -> string -> attributes block 133 | (** [code_bl ~attrs ~lang code] is a code block labeled with language [lang]. 134 | 135 | - [lang] defaults to being absent. 136 | 137 | See {!Code_block} *) 138 | 139 | val html_bl : ?attrs:attributes -> string -> attributes block 140 | (** [html_bl ~attrs html] is a block-level element of raw HTML. See {!Html_block}. *) 141 | 142 | type 'attr ctor_def_elt = 143 | { term : 'attr inline list 144 | ; defs : 'attr inline list list 145 | } 146 | (** Type for the items given to {!dl} definition lists. It is isomorphic to {!def_elt}. *) 147 | 148 | val dl : ?attrs:attributes -> attributes ctor_def_elt list -> attributes block 149 | (** [dl ~attrs elements] is a definition list of the given [elements]. See 150 | {!Definition_list}. 151 | 152 | E.g., 153 | 154 | {[ 155 | dl 156 | [ { term = [ txt "def term 1" ] 157 | ; defs = 158 | [ [ txt "definition 1.1" ] 159 | ; [ txt "definition 1.2" ] 160 | ; [ txt "definition 1.3" ] 161 | ] 162 | } 163 | ; { term = [ txt "def term 2" ] 164 | ; defs = 165 | [ [ txt "definition 2.1" ] 166 | ; [ txt "definition 2.2" ] 167 | ; [ txt "definition 2.3" ] 168 | ] 169 | } 170 | ] 171 | ]} *) 172 | end 173 | 174 | module Impl : Intf = struct 175 | let concat elems = Concat ([], elems) 176 | let empty = concat [] 177 | let txt ?(attrs = []) s = Text (attrs, s) 178 | let em ?(attrs = []) s = Emph (attrs, txt s) 179 | let strong ?(attrs = []) s = Strong (attrs, txt s) 180 | let code ?(attrs = []) s = Code (attrs, s) 181 | let br = Hard_break [] 182 | let nl = Soft_break [] 183 | 184 | let a ?(attrs = []) ?title ~url label = 185 | Link (attrs, { label = txt label; destination = url; title }) 186 | 187 | let img ?(attrs = []) ?title ~alt src = 188 | Image (attrs, { label = txt alt; destination = src; title }) 189 | 190 | (* Note that attributes are not actually supported Html nodes currently. *) 191 | let html s = Html ([], s) 192 | 193 | (* Block constructors *) 194 | 195 | let p ?(attrs = []) inlines = Paragraph (attrs, concat inlines) 196 | 197 | let ul ?(attrs = []) ?(spacing = Loose) items = 198 | List (attrs, Bullet '-', spacing, items) 199 | 200 | let ol ?(attrs = []) ?(start = 1) ?(char = `Dot) ?(spacing = Loose) items = 201 | let c = match char with `Dot -> '.' | `Paren -> ')' in 202 | List (attrs, Ordered (start, c), spacing, items) 203 | 204 | let blockquote ?(attrs = []) blocks = Blockquote (attrs, blocks) 205 | let hr = Thematic_break [] 206 | let h ?(attrs = []) level inlines = Heading (attrs, level, concat inlines) 207 | let code_bl ?(attrs = []) ?(lang = "") s = Code_block (attrs, lang, s) 208 | let html_bl ?(attrs = []) s = Html_block (attrs, s) 209 | 210 | type 'attr ctor_def_elt = 211 | { term : 'attr inline list 212 | ; defs : 'attr inline list list 213 | } 214 | 215 | let dl ?(attrs = []) (items : 'attr ctor_def_elt list) = 216 | let def_elt_of_pair { term; defs } : 'attr def_elt = 217 | let term = concat term in 218 | let defs = List.map concat defs in 219 | { term; defs } 220 | in 221 | let def_elts = List.map def_elt_of_pair items in 222 | Definition_list (attrs, def_elts) 223 | end 224 | -------------------------------------------------------------------------------- /src/ast_inline.ml: -------------------------------------------------------------------------------- 1 | (* TODO The presence of `attrs` in several of these nodes is leaking an 2 | implementation detail: we have no support for attributes in `Concat` 3 | `Soft_break` or `Html` nodes. The attributes are just dropped during 4 | rendering. Should we remove this from the UI, or should we include 5 | those somehow? Or should we include these in the document model, but 6 | but with the caveat that most renderings of the document don't support 7 | attributes in these nodes? *) 8 | type 'attr inline = 9 | | Concat of 'attr * 'attr inline list 10 | | Text of 'attr * string 11 | | Emph of 'attr * 'attr inline 12 | | Strong of 'attr * 'attr inline 13 | | Code of 'attr * string 14 | | Hard_break of 'attr 15 | | Soft_break of 'attr 16 | | Link of 'attr * 'attr link 17 | | Image of 'attr * 'attr link 18 | | Html of 'attr * string 19 | 20 | and 'attr link = 21 | { label : 'attr inline 22 | ; destination : string 23 | ; title : string option 24 | } 25 | -------------------------------------------------------------------------------- /src/block_parser.ml: -------------------------------------------------------------------------------- 1 | open Ast.Util 2 | module Raw = Ast_block.Raw 3 | 4 | module Pre = struct 5 | type container = 6 | | Rblockquote of t 7 | | Rlist of 8 | list_type 9 | * list_spacing 10 | * bool 11 | * int 12 | * attributes Raw.block list list 13 | * t 14 | | Rparagraph of string list 15 | | Rfenced_code of 16 | int 17 | * int 18 | * Parser.code_block_kind 19 | * (string * string) 20 | * string list 21 | * attributes 22 | | Rindented_code of string list 23 | | Rhtml of Parser.html_kind * string list 24 | | Rdef_list of string * string list 25 | | Rtable_header of StrSlice.t list * string 26 | | Rtable of (string * cell_alignment) list * string list list 27 | | Rempty 28 | 29 | and t = 30 | { blocks : attributes Raw.block list 31 | ; next : container 32 | } 33 | 34 | let concat l = String.concat "\n" (List.rev l) ^ "\n" 35 | 36 | let trim_left s = 37 | let rec loop i = 38 | if i >= String.length s then i 39 | else match s.[i] with ' ' | '\t' -> loop (succ i) | _ -> i 40 | in 41 | let i = loop 0 in 42 | if i > 0 then String.sub s i (String.length s - i) else s 43 | 44 | let link_reference_definitions s = 45 | let defs, off = Parser.link_reference_definitions (Parser.P.of_string s) in 46 | let s = String.sub s off (String.length s - off) |> String.trim in 47 | (defs, s) 48 | 49 | let rec close link_defs { blocks; next } = 50 | let finish = finish link_defs in 51 | match next with 52 | | Rblockquote state -> Raw.Blockquote ([], finish state) :: blocks 53 | | Rlist (ty, sp, _, _, closed_items, state) -> 54 | List ([], ty, sp, List.rev (finish state :: closed_items)) :: blocks 55 | | Rparagraph l -> 56 | let s = concat (List.map trim_left l) in 57 | let defs, off = 58 | Parser.link_reference_definitions (Parser.P.of_string s) 59 | in 60 | let s = String.sub s off (String.length s - off) |> String.trim in 61 | link_defs := defs @ !link_defs; 62 | if s = "" then blocks else Paragraph ([], s) :: blocks 63 | | Rfenced_code (_, _, _kind, (label, _other), [], attr) -> 64 | Code_block (attr, label, "") :: blocks 65 | | Rfenced_code (_, _, _kind, (label, _other), l, attr) -> 66 | Code_block (attr, label, concat l) :: blocks 67 | | Rdef_list (term, defs) -> 68 | let l, blocks = 69 | match blocks with 70 | | Definition_list (_, l) :: b -> (l, b) 71 | | b -> ([], b) 72 | in 73 | Definition_list ([], l @ [ { term; defs = List.rev defs } ]) :: blocks 74 | | Rindented_code l -> 75 | (* TODO: trim from the right *) 76 | let rec loop = function "" :: l -> loop l | _ as l -> l in 77 | Code_block ([], "", concat (loop l)) :: blocks 78 | | Rhtml (_, l) -> Html_block ([], concat l) :: blocks 79 | | Rtable_header (_header, line) -> 80 | (* FIXME: this will only ever get called on the very last 81 | line. Should it do the link definitions? *) 82 | close link_defs { blocks; next = Rparagraph [ line ] } 83 | | Rtable (header, rows) -> Table ([], header, List.rev rows) :: blocks 84 | | Rempty -> blocks 85 | 86 | and finish link_defs state = List.rev (close link_defs state) 87 | 88 | let empty = { blocks = []; next = Rempty } 89 | let classify_line s = Parser.parse s 90 | 91 | let classify_delimiter s = 92 | let left, s = 93 | match StrSlice.head s with 94 | | Some ':' -> (true, StrSlice.drop 1 s) 95 | | _ -> (false, s) 96 | in 97 | let right, s = 98 | match StrSlice.last s with 99 | | Some ':' -> (true, StrSlice.drop_last s) 100 | | _ -> (false, s) 101 | in 102 | if StrSlice.exists (fun c -> c <> '-') s then None 103 | else 104 | match (left, right) with 105 | | true, true -> Some Centre 106 | | true, false -> Some Left 107 | | false, true -> Some Right 108 | | false, false -> Some Default 109 | 110 | let match_table_headers headers delimiters = 111 | let rec loop processed = function 112 | | [], [] -> Some (List.rev processed) 113 | | header :: headers, line :: delimiters -> ( 114 | match classify_delimiter line with 115 | | None -> None 116 | | Some alignment -> 117 | loop 118 | ((StrSlice.to_string header, alignment) :: processed) 119 | (headers, delimiters)) 120 | | [], _ :: _ | _ :: _, [] -> None 121 | in 122 | loop [] (headers, delimiters) 123 | 124 | let rec match_row_length l1 l2 = 125 | match (l1, l2) with 126 | | [], _ -> [] 127 | | l1, [] -> List.init (List.length l1) (fun _ -> "") 128 | | _ :: l1, x :: l2 -> StrSlice.to_string x :: match_row_length l1 l2 129 | 130 | let rec process link_defs { blocks; next } s = 131 | let process = process link_defs in 132 | let close = close link_defs in 133 | let finish = finish link_defs in 134 | match (next, classify_line s) with 135 | | Rempty, Parser.Lempty -> { blocks; next = Rempty } 136 | | Rempty, Lblockquote s -> { blocks; next = Rblockquote (process empty s) } 137 | | Rempty, Lthematic_break -> 138 | { blocks = Thematic_break [] :: blocks; next = Rempty } 139 | | Rempty, Lsetext_heading { level = 2; len } when len >= 3 -> 140 | { blocks = Thematic_break [] :: blocks; next = Rempty } 141 | | Rempty, Latx_heading (level, text, attr) -> 142 | { blocks = Heading (attr, level, text) :: blocks; next = Rempty } 143 | | Rempty, Lfenced_code (ind, num, q, info, a) -> 144 | { blocks; next = Rfenced_code (ind, num, q, info, [], a) } 145 | | Rempty, Lhtml (_, kind) -> process { blocks; next = Rhtml (kind, []) } s 146 | | Rempty, Lindented_code s -> 147 | { blocks; next = Rindented_code [ StrSlice.to_string s ] } 148 | | Rempty, Llist_item (kind, indent, s) -> 149 | { blocks 150 | ; next = Rlist (kind, Tight, false, indent, [], process empty s) 151 | } 152 | | Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _ | Ltable_line []) -> 153 | { blocks; next = Rparagraph [ StrSlice.to_string s ] } 154 | | Rempty, Ltable_line items -> 155 | { blocks; next = Rtable_header (items, StrSlice.to_string s) } 156 | | Rparagraph [ h ], Ldef_list def -> 157 | { blocks; next = Rdef_list (h, [ def ]) } 158 | | Rdef_list (term, defs), Ldef_list def -> 159 | { blocks; next = Rdef_list (term, def :: defs) } 160 | | Rparagraph _, Llist_item ((Ordered (1, _) | Bullet _), _, s1) 161 | when not (Parser.is_empty (Parser.P.of_string (StrSlice.to_string s1))) -> 162 | process { blocks = close { blocks; next }; next = Rempty } s 163 | | ( Rparagraph _ 164 | , ( Lempty | Lblockquote _ | Lthematic_break | Latx_heading _ 165 | | Lfenced_code _ 166 | | Lhtml (true, _) ) ) -> 167 | process { blocks = close { blocks; next }; next = Rempty } s 168 | | Rparagraph (_ :: _ as lines), Lsetext_heading { level; _ } -> 169 | let text = concat (List.map trim_left lines) in 170 | let defs, text = link_reference_definitions text in 171 | link_defs := defs @ !link_defs; 172 | if text = "" then 173 | (* Happens when there's nothing between the [link reference definition] and the [setext heading]. 174 | 175 | [foo]: /foo-url 176 | === 177 | [foo] 178 | 179 | In that case, there's nothing to make as Heading. We can simply add `===` as Rparagraph 180 | *) 181 | { blocks; next = Rparagraph [ StrSlice.to_string s ] } 182 | else { blocks = Heading ([], level, text) :: blocks; next = Rempty } 183 | | Rparagraph lines, _ -> 184 | { blocks; next = Rparagraph (StrSlice.to_string s :: lines) } 185 | | Rfenced_code (_, num, q, _, _, _), Lfenced_code (_, num', q1, ("", _), _) 186 | when num' >= num && q = q1 -> 187 | { blocks = close { blocks; next }; next = Rempty } 188 | | Rfenced_code (ind, num, q, info, lines, a), _ -> 189 | let s = 190 | let ind = min (Parser.indent s) ind in 191 | if ind > 0 then StrSlice.offset ind s else s 192 | in 193 | { blocks 194 | ; next = 195 | Rfenced_code (ind, num, q, info, StrSlice.to_string s :: lines, a) 196 | } 197 | | Rdef_list (term, d :: defs), Lparagraph -> 198 | { blocks 199 | ; next = Rdef_list (term, (d ^ "\n" ^ StrSlice.to_string s) :: defs) 200 | } 201 | | Rdef_list _, _ -> 202 | process { blocks = close { blocks; next }; next = Rempty } s 203 | | Rtable_header (headers, line), Ltable_line items -> ( 204 | match match_table_headers headers items with 205 | | Some headers -> 206 | (* Makes sure that there are the same number of delimiters 207 | as headers. See 208 | https://github.github.com/gfm/#example-203 *) 209 | { blocks; next = Rtable (headers, []) } 210 | | None -> 211 | (* Reinterpret the previous line as the start of a 212 | paragraph. *) 213 | process { blocks; next = Rparagraph [ line ] } s) 214 | | Rtable_header (_, line), _ -> 215 | (* If we only have a potential header, and the current line 216 | doesn't look like a table delimiter, then reinterpret the 217 | previous line as the start of a paragraph. *) 218 | process { blocks; next = Rparagraph [ line ] } s 219 | | Rtable (header, rows), Ltable_line row -> 220 | (* Make sure the number of items in the row is consistent with 221 | the headers and the rest of the rows. See 222 | https://github.github.com/gfm/#example-204 *) 223 | let row = match_row_length header row in 224 | { blocks; next = Rtable (header, row :: rows) } 225 | | Rtable (header, rows), (Lparagraph | Lsetext_heading _) -> 226 | (* Treat a contiguous line after a table as a row, even if it 227 | doesn't contain any '|' 228 | characters. https://github.github.com/gfm/#example-202 *) 229 | let row = match_row_length header [ s ] in 230 | { blocks; next = Rtable (header, row :: rows) } 231 | | Rtable _, _ -> 232 | process { blocks = close { blocks; next }; next = Rempty } s 233 | | Rindented_code lines, Lindented_code s -> 234 | { blocks; next = Rindented_code (StrSlice.to_string s :: lines) } 235 | | Rindented_code lines, Lempty -> 236 | let n = min (Parser.indent s) 4 in 237 | let s = StrSlice.offset n s in 238 | { blocks; next = Rindented_code (StrSlice.to_string s :: lines) } 239 | | Rindented_code _, _ -> 240 | process { blocks = close { blocks; next }; next = Rempty } s 241 | | Rhtml ((Hcontains l as k), lines), _ 242 | when List.exists (fun t -> StrSlice.contains t s) l -> 243 | { blocks = 244 | close { blocks; next = Rhtml (k, StrSlice.to_string s :: lines) } 245 | ; next = Rempty 246 | } 247 | | Rhtml (Hblank, _), Lempty -> 248 | { blocks = close { blocks; next }; next = Rempty } 249 | | Rhtml (k, lines), _ -> 250 | { blocks; next = Rhtml (k, StrSlice.to_string s :: lines) } 251 | | Rblockquote state, Lblockquote s -> 252 | { blocks; next = Rblockquote (process state s) } 253 | | Rlist (kind, style, _, ind, items, state), Lempty -> 254 | { blocks 255 | ; next = Rlist (kind, style, true, ind, items, process state s) 256 | } 257 | | Rlist (_, _, true, ind, _, { blocks = []; next = Rempty }), _ 258 | when Parser.indent s >= ind -> 259 | process { blocks = close { blocks; next }; next = Rempty } s 260 | | Rlist (kind, style, prev_empty, ind, items, state), _ 261 | when Parser.indent s >= ind -> 262 | let s = StrSlice.offset ind s in 263 | let state = process state s in 264 | let style = 265 | let rec new_block = function 266 | | Rblockquote { blocks = []; next } 267 | | Rlist (_, _, _, _, _, { blocks = []; next }) -> 268 | new_block next 269 | | Rparagraph [ _ ] 270 | | Rfenced_code (_, _, _, _, [], _) 271 | | Rindented_code [ _ ] 272 | | Rhtml (_, [ _ ]) -> 273 | true 274 | | _ -> false 275 | in 276 | if prev_empty && new_block state.next then Loose else style 277 | in 278 | { blocks; next = Rlist (kind, style, false, ind, items, state) } 279 | | ( Rlist (kind, style, prev_empty, _, items, state) 280 | , Llist_item (kind', ind, s) ) 281 | when same_block_list_kind kind kind' -> 282 | let style = if prev_empty then Loose else style in 283 | { blocks 284 | ; next = 285 | Rlist 286 | (kind, style, false, ind, finish state :: items, process empty s) 287 | } 288 | | (Rlist _ | Rblockquote _), _ -> ( 289 | let rec loop = function 290 | | Rlist (kind, style, prev_empty, ind, items, { blocks; next }) -> ( 291 | match loop next with 292 | | Some next -> 293 | Some 294 | (Rlist 295 | (kind, style, prev_empty, ind, items, { blocks; next })) 296 | | None -> None) 297 | | Rblockquote { blocks; next } -> ( 298 | match loop next with 299 | | Some next -> Some (Rblockquote { blocks; next }) 300 | | None -> None) 301 | | Rparagraph (_ :: _ as lines) -> ( 302 | match classify_line s with 303 | | Parser.Lparagraph | Lindented_code _ 304 | | Lsetext_heading { level = 1; _ } 305 | | Lhtml (false, _) -> 306 | Some (Rparagraph (StrSlice.to_string s :: lines)) 307 | | _ -> None) 308 | | _ -> None 309 | in 310 | match loop next with 311 | | Some next -> { blocks; next } 312 | | None -> process { blocks = close { blocks; next }; next = Rempty } s) 313 | 314 | let process link_defs state s = process link_defs state (StrSlice.of_string s) 315 | 316 | let of_channel ic = 317 | let link_defs = ref [] in 318 | let rec loop state = 319 | match input_line ic with 320 | | s -> loop (process link_defs state s) 321 | | exception End_of_file -> 322 | let blocks = finish link_defs state in 323 | (blocks, List.rev !link_defs) 324 | in 325 | loop empty 326 | 327 | let read_line s off = 328 | let buf = Buffer.create 128 in 329 | let rec loop cr_read off = 330 | if off >= String.length s then (Buffer.contents buf, None) 331 | else 332 | match s.[off] with 333 | | '\n' -> (Buffer.contents buf, Some (succ off)) 334 | | '\r' -> 335 | if cr_read then Buffer.add_char buf '\r'; 336 | loop true (succ off) 337 | | c -> 338 | if cr_read then Buffer.add_char buf '\r'; 339 | Buffer.add_char buf c; 340 | loop false (succ off) 341 | in 342 | loop false off 343 | 344 | let of_string s = 345 | let link_defs = ref [] in 346 | let rec loop state = function 347 | | None -> 348 | let blocks = finish link_defs state in 349 | (blocks, List.rev !link_defs) 350 | | Some off -> 351 | let s, off = read_line s off in 352 | loop (process link_defs state s) off 353 | in 354 | loop empty (Some 0) 355 | end 356 | -------------------------------------------------------------------------------- /src/block_parser.mli: -------------------------------------------------------------------------------- 1 | open Ast.Impl 2 | module Raw = Ast_block.Raw 3 | 4 | module Pre : sig 5 | val of_channel : 6 | in_channel -> attributes Raw.block list * attributes Parser.link_def list 7 | 8 | val of_string : 9 | string -> attributes Raw.block list * attributes Parser.link_def list 10 | end 11 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name omd) 3 | (public_name omd) 4 | (libraries uutf uucp uunf)) 5 | 6 | (rule 7 | (with-stdout-to 8 | entities.ml 9 | (chdir 10 | ../tools 11 | (run ./gen_entities.exe %{dep:../tools/entities.json})))) 12 | -------------------------------------------------------------------------------- /src/html.ml: -------------------------------------------------------------------------------- 1 | open Ast.Impl 2 | 3 | type element_type = 4 | | Inline 5 | | Block 6 | | Table 7 | 8 | type t = 9 | | Element of element_type * string * attributes * t option 10 | | Text of string 11 | | Raw of string 12 | | Null 13 | | Concat of t * t 14 | 15 | let elt etype name attrs childs = Element (etype, name, attrs, childs) 16 | let text s = Text s 17 | let raw s = Raw s 18 | 19 | let concat t1 t2 = 20 | match (t1, t2) with Null, t | t, Null -> t | _ -> Concat (t1, t2) 21 | 22 | let concat_map f l = List.fold_left (fun accu x -> concat accu (f x)) Null l 23 | 24 | let concat_map2 f l1 l2 = 25 | List.fold_left2 (fun accu x y -> concat accu (f x y)) Null l1 l2 26 | 27 | (* only convert when "necessary" *) 28 | let htmlentities s = 29 | let b = Buffer.create (String.length s) in 30 | let rec loop i = 31 | if i >= String.length s then Buffer.contents b 32 | else begin 33 | begin 34 | match s.[i] with 35 | | '"' -> Buffer.add_string b """ 36 | | '&' -> Buffer.add_string b "&" 37 | | '<' -> Buffer.add_string b "<" 38 | | '>' -> Buffer.add_string b ">" 39 | | c -> Buffer.add_char b c 40 | end; 41 | loop (succ i) 42 | end 43 | in 44 | loop 0 45 | 46 | let add_attrs_to_buffer buf attrs = 47 | let f (k, v) = Printf.bprintf buf " %s=\"%s\"" k (htmlentities v) in 48 | List.iter f attrs 49 | 50 | let rec add_to_buffer buf = function 51 | | Element (eltype, name, attrs, None) -> 52 | Printf.bprintf buf "<%s%a />" name add_attrs_to_buffer attrs; 53 | if eltype = Block then Buffer.add_char buf '\n' 54 | | Element (eltype, name, attrs, Some c) -> 55 | Printf.bprintf 56 | buf 57 | "<%s%a>%s%a%s" 58 | name 59 | add_attrs_to_buffer 60 | attrs 61 | (match eltype with Table -> "\n" | _ -> "") 62 | add_to_buffer 63 | c 64 | name 65 | (match eltype with Table | Block -> "\n" | _ -> "") 66 | | Text s -> Buffer.add_string buf (htmlentities s) 67 | | Raw s -> Buffer.add_string buf s 68 | | Null -> () 69 | | Concat (t1, t2) -> 70 | add_to_buffer buf t1; 71 | add_to_buffer buf t2 72 | 73 | let escape_uri s = 74 | let b = Buffer.create (String.length s) in 75 | String.iter 76 | (function 77 | | ( '!' | '*' | '\'' | '(' | ')' | ';' | ':' | '@' | '=' | '+' | '$' | ',' 78 | | '/' | '?' | '%' | '#' 79 | | 'A' .. 'Z' 80 | | 'a' .. 'z' 81 | | '0' .. '9' 82 | | '-' | '_' | '.' | '~' | '&' ) as c -> 83 | Buffer.add_char b c 84 | | _ as c -> Printf.bprintf b "%%%2X" (Char.code c)) 85 | s; 86 | Buffer.contents b 87 | 88 | let trim_start_while p s = 89 | let start = ref true in 90 | let b = Buffer.create (String.length s) in 91 | Uutf.String.fold_utf_8 92 | (fun () _ -> function 93 | | `Malformed _ -> Buffer.add_string b s 94 | | `Uchar u when p u && !start -> () 95 | | `Uchar u when !start -> 96 | start := false; 97 | Uutf.Buffer.add_utf_8 b u 98 | | `Uchar u -> Uutf.Buffer.add_utf_8 b u) 99 | () 100 | s; 101 | Buffer.contents b 102 | 103 | let underscore = Uchar.of_char '_' 104 | let hyphen = Uchar.of_char '-' 105 | let period = Uchar.of_char '.' 106 | let is_white_space = Uucp.White.is_white_space 107 | let is_alphabetic = Uucp.Alpha.is_alphabetic 108 | let is_hex_digit = Uucp.Num.is_hex_digit 109 | 110 | module Identifiers : sig 111 | type t 112 | 113 | val empty : t 114 | 115 | val touch : string -> t -> int * t 116 | (** Bump the frequency count for the given string. 117 | It returns the previous count (before bumping) *) 118 | end = struct 119 | module SMap = Map.Make (String) 120 | 121 | type t = int SMap.t 122 | 123 | let empty = SMap.empty 124 | let count s t = match SMap.find_opt s t with None -> 0 | Some x -> x 125 | let incr s t = SMap.add s (count s t + 1) t 126 | 127 | let touch s t = 128 | let count = count s t in 129 | (count, incr s t) 130 | end 131 | 132 | (* Based on pandoc algorithm to derive id's. 133 | See: https://pandoc.org/MANUAL.html#extension-auto_identifiers *) 134 | let slugify s = 135 | let s = trim_start_while (fun c -> not (is_alphabetic c)) s in 136 | let length = String.length s in 137 | let b = Buffer.create length in 138 | let last_is_ws = ref false in 139 | let add_to_buffer u = 140 | if !last_is_ws = true then begin 141 | Uutf.Buffer.add_utf_8 b (Uchar.of_char '-'); 142 | last_is_ws := false 143 | end; 144 | Uutf.Buffer.add_utf_8 b u 145 | in 146 | let fold () _ = function 147 | | `Malformed _ -> add_to_buffer Uutf.u_rep 148 | | `Uchar u when is_white_space u && not !last_is_ws -> last_is_ws := true 149 | | `Uchar u when is_white_space u && !last_is_ws -> () 150 | | `Uchar u -> 151 | (if is_alphabetic u || is_hex_digit u then 152 | match Uucp.Case.Map.to_lower u with 153 | | `Self -> add_to_buffer u 154 | | `Uchars us -> List.iter add_to_buffer us); 155 | if u = underscore || u = hyphen || u = period then add_to_buffer u 156 | in 157 | Uutf.String.fold_utf_8 fold () s; 158 | Buffer.contents b 159 | 160 | let to_plain_text t = 161 | let buf = Buffer.create 1024 in 162 | let rec go : _ inline -> unit = function 163 | | Concat (_, l) -> List.iter go l 164 | | Text (_, t) | Code (_, t) -> Buffer.add_string buf t 165 | | Emph (_, i) 166 | | Strong (_, i) 167 | | Link (_, { label = i; _ }) 168 | | Image (_, { label = i; _ }) -> 169 | go i 170 | | Hard_break _ | Soft_break _ -> Buffer.add_char buf ' ' 171 | | Html _ -> () 172 | in 173 | go t; 174 | Buffer.contents buf 175 | 176 | let nl = Raw "\n" 177 | 178 | let rec url label destination title attrs = 179 | let attrs = 180 | match title with None -> attrs | Some title -> ("title", title) :: attrs 181 | in 182 | let attrs = ("href", escape_uri destination) :: attrs in 183 | elt Inline "a" attrs (Some (inline label)) 184 | 185 | and img label destination title attrs = 186 | let attrs = 187 | match title with None -> attrs | Some title -> ("title", title) :: attrs 188 | in 189 | let attrs = 190 | ("src", escape_uri destination) :: ("alt", to_plain_text label) :: attrs 191 | in 192 | elt Inline "img" attrs None 193 | 194 | and inline = function 195 | | Ast.Impl.Concat (_, l) -> concat_map inline l 196 | | Text (_, t) -> text t 197 | | Emph (attr, il) -> elt Inline "em" attr (Some (inline il)) 198 | | Strong (attr, il) -> elt Inline "strong" attr (Some (inline il)) 199 | | Code (attr, s) -> elt Inline "code" attr (Some (text s)) 200 | | Hard_break attr -> concat (elt Inline "br" attr None) nl 201 | | Soft_break _ -> nl 202 | | Html (_, body) -> raw body 203 | | Link (attr, { label; destination; title }) -> 204 | url label destination title attr 205 | | Image (attr, { label; destination; title }) -> 206 | img label destination title attr 207 | 208 | let alignment_attributes = function 209 | | Default -> [] 210 | | Left -> [ ("align", "left") ] 211 | | Right -> [ ("align", "right") ] 212 | | Centre -> [ ("align", "center") ] 213 | 214 | let table_header headers = 215 | elt 216 | Table 217 | "thead" 218 | [] 219 | (Some 220 | (elt 221 | Table 222 | "tr" 223 | [] 224 | (Some 225 | (concat_map 226 | (fun (header, alignment) -> 227 | let attrs = alignment_attributes alignment in 228 | elt Block "th" attrs (Some (inline header))) 229 | headers)))) 230 | 231 | let table_body headers rows = 232 | elt 233 | Table 234 | "tbody" 235 | [] 236 | (Some 237 | (concat_map 238 | (fun row -> 239 | elt 240 | Table 241 | "tr" 242 | [] 243 | (Some 244 | (concat_map2 245 | (fun (_, alignment) cell -> 246 | let attrs = alignment_attributes alignment in 247 | elt Block "td" attrs (Some (inline cell))) 248 | headers 249 | row))) 250 | rows)) 251 | 252 | let rec block ~auto_identifiers = function 253 | | Blockquote (attr, q) -> 254 | elt 255 | Block 256 | "blockquote" 257 | attr 258 | (Some (concat nl (concat_map (block ~auto_identifiers) q))) 259 | | Paragraph (attr, md) -> elt Block "p" attr (Some (inline md)) 260 | | List (attr, ty, sp, bl) -> 261 | let name = match ty with Ordered _ -> "ol" | Bullet _ -> "ul" in 262 | let attr = 263 | match ty with 264 | | Ordered (n, _) when n <> 1 -> ("start", string_of_int n) :: attr 265 | | _ -> attr 266 | in 267 | let li t = 268 | let block' t = 269 | match (t, sp) with 270 | | Paragraph (_, t), Tight -> concat (inline t) nl 271 | | _ -> block ~auto_identifiers t 272 | in 273 | let nl = if sp = Tight then Null else nl in 274 | elt Block "li" [] (Some (concat nl (concat_map block' t))) 275 | in 276 | elt Block name attr (Some (concat nl (concat_map li bl))) 277 | | Code_block (attr, label, code) -> 278 | let code_attr = 279 | if String.trim label = "" then [] 280 | else [ ("class", "language-" ^ label) ] 281 | in 282 | let c = text code in 283 | elt Block "pre" attr (Some (elt Inline "code" code_attr (Some c))) 284 | | Thematic_break attr -> elt Block "hr" attr None 285 | | Html_block (_, body) -> raw body 286 | | Heading (attr, level, text) -> 287 | let name = 288 | match level with 289 | | 1 -> "h1" 290 | | 2 -> "h2" 291 | | 3 -> "h3" 292 | | 4 -> "h4" 293 | | 5 -> "h5" 294 | | 6 -> "h6" 295 | | _ -> "p" 296 | in 297 | elt Block name attr (Some (inline text)) 298 | | Definition_list (attr, l) -> 299 | let f { term; defs } = 300 | concat 301 | (elt Block "dt" [] (Some (inline term))) 302 | (concat_map (fun s -> elt Block "dd" [] (Some (inline s))) defs) 303 | in 304 | elt Block "dl" attr (Some (concat_map f l)) 305 | | Table (attr, headers, []) -> 306 | elt Table "table" attr (Some (table_header headers)) 307 | | Table (attr, headers, rows) -> 308 | elt 309 | Table 310 | "table" 311 | attr 312 | (Some (concat (table_header headers) (table_body headers rows))) 313 | 314 | let of_doc ?(auto_identifiers = true) doc = 315 | let identifiers = Identifiers.empty in 316 | let f identifiers = function 317 | | Heading (attr, level, text) -> 318 | let attr, identifiers = 319 | if (not auto_identifiers) || List.mem_assoc "id" attr then 320 | (attr, identifiers) 321 | else 322 | let id = slugify (to_plain_text text) in 323 | (* Default identifier if empty. It matches what pandoc does. *) 324 | let id = if id = "" then "section" else id in 325 | let count, identifiers = Identifiers.touch id identifiers in 326 | let id = 327 | if count = 0 then id else Printf.sprintf "%s-%i" id count 328 | in 329 | (("id", id) :: attr, identifiers) 330 | in 331 | (Heading (attr, level, text), identifiers) 332 | | _ as c -> (c, identifiers) 333 | in 334 | let html, _ = 335 | List.fold_left 336 | (fun (accu, ids) x -> 337 | let x', ids = f ids x in 338 | let el = concat accu (block ~auto_identifiers x') in 339 | (el, ids)) 340 | (Null, identifiers) 341 | doc 342 | in 343 | html 344 | 345 | let to_string t = 346 | let buf = Buffer.create 1024 in 347 | add_to_buffer buf t; 348 | Buffer.contents buf 349 | -------------------------------------------------------------------------------- /src/html.mli: -------------------------------------------------------------------------------- 1 | open Ast.Impl 2 | 3 | type element_type = 4 | | Inline 5 | | Block 6 | | Table 7 | 8 | type t = 9 | | Element of element_type * string * attributes * t option 10 | | Text of string 11 | | Raw of string 12 | | Null 13 | | Concat of t * t 14 | 15 | val htmlentities : string -> string 16 | val of_doc : ?auto_identifiers:bool -> attributes block list -> t 17 | val to_string : t -> string 18 | -------------------------------------------------------------------------------- /src/omd.ml: -------------------------------------------------------------------------------- 1 | (* The document model *) 2 | 3 | include Ast.Impl 4 | 5 | (* Helper functions for construction document AST *) 6 | 7 | module Ctor = Ast_constructors.Impl 8 | 9 | (* Table of contents *) 10 | 11 | let headers = Toc.headers 12 | let toc = Toc.toc 13 | 14 | (* Conversion *) 15 | 16 | let parse_inline defs s = Parser.inline defs (Parser.P.of_string s) 17 | 18 | let parse_inlines (md, defs) : doc = 19 | let defs = 20 | let f (def : attributes Parser.link_def) = 21 | { def with label = Parser.normalize def.label } 22 | in 23 | List.map f defs 24 | in 25 | List.map (Ast_block.Mapper.map (parse_inline defs)) md 26 | 27 | let escape_html_entities = Html.htmlentities 28 | let of_channel ic : doc = parse_inlines (Block_parser.Pre.of_channel ic) 29 | let of_string s = parse_inlines (Block_parser.Pre.of_string s) 30 | 31 | let to_html ?auto_identifiers doc = 32 | Html.to_string (Html.of_doc ?auto_identifiers doc) 33 | 34 | let to_sexp ast = Format.asprintf "@[%a@]@." Sexp.print (Sexp.create ast) 35 | -------------------------------------------------------------------------------- /src/omd.mli: -------------------------------------------------------------------------------- 1 | (** {1 A markdown parser in OCaml} *) 2 | 3 | (** {2 The document model} 4 | 5 | The following types define the AST representing Omd's document model. *) 6 | 7 | include Ast.Intf 8 | 9 | (** {2 Helper functions for constructing the document AST } *) 10 | 11 | module Ctor : Ast_constructors.Intf 12 | 13 | (** {2 Generating and constructing tables of contents} *) 14 | 15 | val headers : 16 | ?remove_links:bool -> 'attr block list -> ('attr * int * 'attr inline) list 17 | 18 | val toc : ?start:int list -> ?depth:int -> doc -> doc 19 | 20 | (** {2 Helper functions} *) 21 | 22 | val escape_html_entities : string -> string 23 | (** Perform escaping of HTML entities. Turns: ['"'] into ["""], 24 | ['&'] into ["&"], ['<'] in ["<"] and ['>'] into [">"] 25 | *) 26 | 27 | (** {2 Converting to and from documents} *) 28 | 29 | val of_channel : in_channel -> doc 30 | val of_string : string -> doc 31 | val to_html : ?auto_identifiers:bool -> doc -> string 32 | val to_sexp : doc -> string 33 | -------------------------------------------------------------------------------- /src/parser.ml: -------------------------------------------------------------------------------- 1 | open Ast.Impl 2 | 3 | type 'attr link_def = 4 | { label : string 5 | ; destination : string 6 | ; title : string option 7 | ; attributes : 'attr 8 | } 9 | 10 | let is_whitespace = function 11 | | ' ' | '\t' | '\010' .. '\013' -> true 12 | | _ -> false 13 | 14 | let is_punct = function 15 | | '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' 16 | | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\\' | ']' | '^' 17 | | '_' | '`' | '{' | '|' | '}' | '~' -> 18 | true 19 | | _ -> false 20 | 21 | exception Fail 22 | (** Raised when a parser fails, used for control flow rather than error 23 | handling *) 24 | 25 | (** Stateful parser combinators *) 26 | module P : sig 27 | type state 28 | type 'a t = state -> 'a 29 | 30 | val of_string : string -> state 31 | 32 | val peek : char option t 33 | (** [Some c] if [c] is the next character in the input, or [None] 34 | if the input is exhausted. 35 | 36 | NOTE: Does not advance the state. *) 37 | 38 | val peek_exn : char t 39 | (** the next character in the input, or raises [Fail] if the 40 | input is exhausted. 41 | 42 | NOTE: Does not advance the state. *) 43 | 44 | val peek_before : char -> state -> char 45 | (** the previous character in the input, or the next 46 | character, if we are at the start of the input. 47 | 48 | NOTE: Does not advance the state. *) 49 | 50 | val peek_after : char -> state -> char 51 | (** the character after the next in the input, or the next 52 | character, if we are at the end of the input. 53 | 54 | NOTE: Does not advance the state. *) 55 | 56 | val pos : state -> int 57 | val range : state -> int -> int -> string 58 | val set_pos : state -> int -> unit 59 | 60 | val junk : unit t 61 | (** ignores the next character in the input *) 62 | 63 | val char : char -> unit t 64 | (** accepts a [c] *) 65 | 66 | val next : char t 67 | 68 | val ws : unit t 69 | (** accepts 0 or more white space characters *) 70 | 71 | val sp : unit t 72 | (** accepts 0 or more spaces or tabs *) 73 | 74 | val ws1 : unit t 75 | (** accepts 1 or more spaces or tabs, fails if none is found *) 76 | 77 | val ( ||| ) : 'a t -> 'a t -> 'a t 78 | (** [p ||| q] tries to accept [p], but in case [p] fails, it accepts [q] 79 | (which can fail). *) 80 | 81 | val ( >>> ) : unit t -> 'a t -> 'a t 82 | (** [p >>> q] accepts [p] followed by [q], returning whatever [q] does *) 83 | 84 | val ( <<< ) : 'a t -> unit t -> 'a t 85 | (** [p >>> q] accepts [p] followed by [q], returning whatever [q] does *) 86 | 87 | val protect : 'a t -> 'a t 88 | (** run the given parser, resetting the state back to it's initial condition 89 | if the parser fails *) 90 | 91 | val pair : 'a t -> 'b t -> ('a * 'b) t 92 | 93 | val on_sub : (StrSlice.t -> 'a * StrSlice.t) -> 'a t 94 | (** Given a function [f] that takes a prefix of a string slice to a value [x] 95 | of type ['a] and some remainder of the slice, [on_sub f] produces [x] from 96 | the state, and advances the input the length of the slice that was 97 | consumed by [f]. *) 98 | end = struct 99 | type state = 100 | { str : string 101 | ; mutable pos : int 102 | } 103 | 104 | let of_string str = { str; pos = 0 } 105 | 106 | type 'a t = state -> 'a 107 | 108 | let char c st = 109 | if st.pos >= String.length st.str then raise Fail 110 | else if st.str.[st.pos] <> c then raise Fail 111 | else st.pos <- st.pos + 1 112 | 113 | let next st = 114 | if st.pos >= String.length st.str then raise Fail 115 | else 116 | let c = st.str.[st.pos] in 117 | st.pos <- st.pos + 1; 118 | c 119 | 120 | let peek st = 121 | if st.pos >= String.length st.str then None else Some st.str.[st.pos] 122 | 123 | let peek_exn st = match peek st with Some c -> c | None -> raise Fail 124 | let peek_before c st = if st.pos = 0 then c else st.str.[st.pos - 1] 125 | 126 | let peek_after c st = 127 | if st.pos + 1 >= String.length st.str then c else st.str.[st.pos + 1] 128 | 129 | let pos st = st.pos 130 | let range st pos n = String.sub st.str pos n 131 | let set_pos st pos = st.pos <- pos 132 | let junk st = if st.pos < String.length st.str then st.pos <- st.pos + 1 133 | 134 | let protect p st = 135 | let off = pos st in 136 | try p st 137 | with e -> 138 | set_pos st off; 139 | raise e 140 | 141 | let ( ||| ) p1 p2 st = try protect p1 st with Fail -> p2 st 142 | 143 | let ws st = 144 | let rec loop () = 145 | if is_whitespace (peek_exn st) then ( 146 | junk st; 147 | loop ()) 148 | in 149 | try loop () with Fail -> () 150 | 151 | let sp st = 152 | let rec loop () = 153 | match peek_exn st with 154 | | ' ' | '\t' -> 155 | junk st; 156 | loop () 157 | | _ -> () 158 | in 159 | try loop () with Fail -> () 160 | 161 | let ws1 st = 162 | if is_whitespace (peek_exn st) then ( 163 | junk st; 164 | ws st) 165 | else raise Fail 166 | 167 | let ( >>> ) p q st = 168 | p st; 169 | q st 170 | 171 | let ( <<< ) p q st = 172 | let x = p st in 173 | q st; 174 | x 175 | 176 | let pair p q st = 177 | let x = p st in 178 | let y = q st in 179 | (x, y) 180 | 181 | let on_sub fn st = 182 | let result, s = fn (StrSlice.of_string ~off:st.pos st.str) in 183 | st.pos <- StrSlice.get_offset s; 184 | result 185 | end 186 | 187 | type html_kind = 188 | | Hcontains of string list 189 | | Hblank 190 | 191 | type code_block_kind = 192 | | Tilde 193 | | Backtick 194 | 195 | type t = 196 | | Lempty 197 | | Lblockquote of StrSlice.t 198 | | Lthematic_break 199 | | Latx_heading of int * string * attributes 200 | | Lsetext_heading of 201 | { level : int 202 | ; len : int 203 | } (** the level of the heading and how long the underline marker is *) 204 | | Lfenced_code of int * int * code_block_kind * (string * string) * attributes 205 | | Lindented_code of StrSlice.t 206 | | Lhtml of bool * html_kind 207 | | Llist_item of list_type * int * StrSlice.t 208 | | Lparagraph 209 | | Ldef_list of string 210 | | Ltable_line of StrSlice.t list 211 | 212 | (* drop up to 3 spaces, returning the number of spaces dropped and the remainder of the string *) 213 | let sp3 s = 214 | match StrSlice.take 3 s with 215 | | [ ' '; ' '; ' ' ] -> (3, StrSlice.drop 3 s) 216 | | ' ' :: ' ' :: _ -> (2, StrSlice.drop 2 s) 217 | | ' ' :: _ -> (1, StrSlice.drop 1 s) 218 | | _ -> (0, s) 219 | 220 | (** TODO Why is this here? Doesn't it almost exactly repeat the one in [P], only with slices? 221 | Why is this kind of repetition needed? *) 222 | let ( ||| ) p1 p2 s = try p1 s with Fail -> p2 s 223 | 224 | let trim_leading_ws s = StrSlice.drop_while is_whitespace s 225 | let trim_trailing_ws s = StrSlice.drop_last_while is_whitespace s 226 | let trim_ws s = trim_leading_ws s |> trim_trailing_ws 227 | let is_empty s = StrSlice.is_empty (trim_leading_ws s) 228 | 229 | (* See https://spec.commonmark.org/0.30/#thematic-breaks *) 230 | let thematic_break = 231 | (* Accepts thematic break chars or fail, counting how many chars we find *) 232 | let f symb c count = 233 | if Char.equal symb c then succ count 234 | else if is_whitespace c then 235 | (* Thematic break chars can be separated by spaces *) 236 | count 237 | else raise Fail 238 | in 239 | fun s -> 240 | match StrSlice.head s with 241 | | Some (('*' | '_' | '-') as symb) -> 242 | if StrSlice.fold_left (f symb) 0 s >= 3 then 243 | (* Three or more of the same thematic break chars found *) 244 | Lthematic_break 245 | else raise Fail 246 | | Some _ | None -> raise Fail 247 | 248 | (* See https://spec.commonmark.org/0.30/#setext-heading *) 249 | let setext_heading s = 250 | (* The first char determines if possible setext and the level of the heading *) 251 | let level, symb = 252 | match StrSlice.head s with 253 | | Some '=' -> (1, '=') 254 | | Some '-' -> (2, '-') 255 | | _ -> raise Fail 256 | in 257 | let heading_chars, rest = 258 | StrSlice.split_at (fun c -> not (Char.equal c symb)) s 259 | in 260 | let len = StrSlice.length heading_chars in 261 | if Char.equal symb '-' && len = 1 then 262 | (* can be interpreted as an empty list item *) 263 | raise Fail 264 | else if not (StrSlice.for_all is_whitespace rest) then 265 | (* if anything except whitespace is left, it can't be a setext heading underline *) 266 | raise Fail 267 | else Lsetext_heading { level; len } 268 | 269 | (* Parses a string slice in pandoc-style into an association list 270 | 271 | See https://pandoc.org/MANUAL.html#extension-header_attributes *) 272 | let parse_attributes s = 273 | let attributes = String.split_on_char ' ' s in 274 | let f (id, classes, acc) s = 275 | if s = "" then (id, classes, acc) 276 | else 277 | match s.[0] with 278 | | '#' -> (Some (String.sub s 1 (String.length s - 1)), classes, acc) 279 | | '.' -> (id, String.sub s 1 (String.length s - 1) :: classes, acc) 280 | | _ -> ( 281 | let attr = String.split_on_char '=' s in 282 | match attr with 283 | | [] -> (id, classes, acc) 284 | | h :: t -> (id, classes, (h, String.concat "=" t) :: acc)) 285 | in 286 | let id, classes, acc = List.fold_left f (None, [], []) attributes in 287 | let acc = List.rev acc in 288 | let acc = 289 | match classes with 290 | | [] -> acc 291 | | _ :: _ -> 292 | let classes = String.concat " " (List.rev classes) in 293 | ("class", classes) :: acc 294 | in 295 | match id with Some id -> ("id", id) :: acc | None -> acc 296 | 297 | (* Parses a string slice into an attribute list (possibly empty) and the non-attribute part of the string 298 | 299 | These are pandoc style attributes https://pandoc.org/MANUAL.html#extension-attributes *) 300 | let attribute_string s = 301 | let buf = Buffer.create 64 in 302 | let rec loop s = 303 | match StrSlice.head s with 304 | | None -> (StrSlice.of_string (Buffer.contents buf), None) 305 | | Some ('\\' as c) -> ( 306 | let s = StrSlice.tail s in 307 | match StrSlice.head s with 308 | | Some c when is_punct c -> 309 | Buffer.add_char buf c; 310 | loop (StrSlice.tail s) 311 | | Some _ | None -> 312 | Buffer.add_char buf c; 313 | loop s) 314 | | Some '{' -> 315 | let buf' = Buffer.create 64 in 316 | let rec loop' s = 317 | match StrSlice.head s with 318 | | Some '}' -> ( 319 | (* Found a closing bracket not at the end of the line *) 320 | let s = StrSlice.tail s in 321 | match StrSlice.head s with 322 | | None -> 323 | (* At end of line, so we've finished parsing the attributes *) 324 | ( StrSlice.of_string (Buffer.contents buf) 325 | , Some (Buffer.contents buf') ) 326 | | Some _ -> 327 | (* Not at end of line, so this can't be a set of attributes *) 328 | Buffer.add_char buf '{'; 329 | Buffer.add_buffer buf buf'; 330 | Buffer.add_char buf '}'; 331 | loop s) 332 | | None -> 333 | Buffer.add_char buf '{'; 334 | Buffer.add_buffer buf buf'; 335 | (StrSlice.of_string (Buffer.contents buf), None) 336 | | Some '{' -> 337 | Buffer.add_char buf '{'; 338 | Buffer.add_buffer buf buf'; 339 | Buffer.reset buf'; 340 | loop' (StrSlice.tail s) 341 | | Some c -> 342 | Buffer.add_char buf' c; 343 | loop' (StrSlice.tail s) 344 | in 345 | loop' (StrSlice.tail s) 346 | | Some c -> 347 | Buffer.add_char buf c; 348 | loop (StrSlice.tail s) 349 | in 350 | let s', a = loop (trim_leading_ws s) in 351 | let attrs = Option.map parse_attributes a |> Option.value ~default:[] in 352 | (s', attrs) 353 | 354 | let atx_heading s = 355 | let rec loop n s = 356 | if n > 6 then raise Fail; 357 | match StrSlice.head s with 358 | | Some '#' -> loop (succ n) (StrSlice.tail s) 359 | | Some w when is_whitespace w -> 360 | let s, a = 361 | match StrSlice.last s with 362 | | Some '}' -> attribute_string s 363 | | _ -> (s, []) 364 | in 365 | let s = trim_ws s in 366 | let rec loop t = 367 | match StrSlice.last t with 368 | | Some '#' -> loop (StrSlice.drop_last t) 369 | | Some w when is_whitespace w -> trim_trailing_ws t 370 | | None -> trim_trailing_ws t 371 | | Some _ -> s 372 | in 373 | Latx_heading (n, StrSlice.to_string (trim_leading_ws (loop s)), a) 374 | | Some _ -> raise Fail 375 | | None -> Latx_heading (n, StrSlice.to_string s, []) 376 | in 377 | loop 0 s 378 | 379 | let entity s = 380 | match StrSlice.take 2 s with 381 | | '#' :: ('x' | 'X') :: _ -> 382 | let rec loop m n s = 383 | if m > 6 then raise Fail; 384 | match StrSlice.head s with 385 | | Some ('a' .. 'f' as c) -> 386 | loop 387 | (succ m) 388 | ((n * 16) + Char.code c - Char.code 'a' + 10) 389 | (StrSlice.tail s) 390 | | Some ('A' .. 'F' as c) -> 391 | loop 392 | (succ m) 393 | ((n * 16) + Char.code c - Char.code 'A' + 10) 394 | (StrSlice.tail s) 395 | | Some ('0' .. '9' as c) -> 396 | loop 397 | (succ m) 398 | ((n * 16) + Char.code c - Char.code '0') 399 | (StrSlice.tail s) 400 | | Some ';' -> 401 | if m = 0 then raise Fail; 402 | let u = 403 | if n = 0 || not (Uchar.is_valid n) then Uchar.rep 404 | else Uchar.of_int n 405 | in 406 | ([ u ], StrSlice.tail s) 407 | | Some _ | None -> raise Fail 408 | in 409 | loop 0 0 (StrSlice.drop 2 s) 410 | | '#' :: _ -> 411 | let rec loop m n s = 412 | if m > 7 then raise Fail; 413 | match StrSlice.head s with 414 | | Some ('0' .. '9' as c) -> 415 | loop 416 | (succ m) 417 | ((n * 10) + Char.code c - Char.code '0') 418 | (StrSlice.tail s) 419 | | Some ';' -> 420 | if m = 0 then raise Fail; 421 | let u = 422 | if n = 0 || not (Uchar.is_valid n) then Uchar.rep 423 | else Uchar.of_int n 424 | in 425 | ([ u ], StrSlice.tail s) 426 | | Some _ | None -> raise Fail 427 | in 428 | loop 0 0 (StrSlice.tail s) 429 | | ('a' .. 'z' | 'A' .. 'Z') :: _ -> 430 | let rec loop len t = 431 | match StrSlice.head t with 432 | | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9') -> 433 | loop (succ len) (StrSlice.tail t) 434 | | Some ';' -> ( 435 | let name = StrSlice.to_string (StrSlice.sub ~len s) in 436 | match Entities.f name with 437 | | [] -> raise Fail 438 | | cps -> (cps, StrSlice.tail t)) 439 | | Some _ | None -> raise Fail 440 | in 441 | loop 1 (StrSlice.tail s) 442 | | _ -> raise Fail 443 | 444 | let info_string c s = 445 | let buf = Buffer.create 17 in 446 | let s, a = 447 | match StrSlice.last s with Some '}' -> attribute_string s | _ -> (s, []) 448 | in 449 | let s = trim_ws s in 450 | let rec loop s = 451 | match StrSlice.head s with 452 | (* TODO use is_whitespace *) 453 | | Some (' ' | '\t' | '\010' .. '\013') | None -> 454 | if c = '`' && StrSlice.exists (function '`' -> true | _ -> false) s 455 | then raise Fail; 456 | ((Buffer.contents buf, StrSlice.to_string (trim_leading_ws s)), a) 457 | | Some '`' when c = '`' -> raise Fail 458 | | Some ('\\' as c) -> ( 459 | let s = StrSlice.tail s in 460 | match StrSlice.head s with 461 | | Some c when is_punct c -> 462 | Buffer.add_char buf c; 463 | loop (StrSlice.tail s) 464 | | Some _ | None -> 465 | Buffer.add_char buf c; 466 | loop s) 467 | | Some ('&' as c) -> ( 468 | let s = StrSlice.tail s in 469 | match entity s with 470 | | ul, s -> 471 | List.iter (Uutf.Buffer.add_utf_8 buf) ul; 472 | loop s 473 | | exception Fail -> 474 | Buffer.add_char buf c; 475 | loop s) 476 | | Some c -> 477 | Buffer.add_char buf c; 478 | loop (StrSlice.tail s) 479 | in 480 | loop (trim_leading_ws s) 481 | 482 | let fenced_code ind s = 483 | match StrSlice.head s with 484 | | Some (('`' | '~') as c) -> 485 | let rec loop n s = 486 | match StrSlice.head s with 487 | | Some c1 when c = c1 -> loop (succ n) (StrSlice.tail s) 488 | | Some _ | None -> 489 | if n < 3 then raise Fail; 490 | let s, a = info_string c s in 491 | let c = if c = '`' then Backtick else Tilde in 492 | Lfenced_code (ind, n, c, s, a) 493 | in 494 | loop 1 (StrSlice.tail s) 495 | | Some _ | None -> raise Fail 496 | 497 | let indent s = 498 | let rec loop n s = 499 | match StrSlice.head s with 500 | | Some ' ' -> loop (n + 1) (StrSlice.tail s) 501 | | Some '\t' -> loop (n + 4) (StrSlice.tail s) 502 | | Some _ | None -> n 503 | in 504 | loop 0 s 505 | 506 | let unordered_list_item ind s = 507 | match StrSlice.head s with 508 | | Some (('+' | '-' | '*') as c) -> 509 | let s = StrSlice.tail s in 510 | if is_empty s then Llist_item (Bullet c, 2 + ind, s) 511 | else 512 | let n = indent s in 513 | if n = 0 then raise Fail; 514 | let n = if n <= 4 then n else 1 in 515 | Llist_item (Bullet c, n + 1 + ind, StrSlice.offset n s) 516 | | Some _ | None -> raise Fail 517 | 518 | let ordered_list_item ind s = 519 | let rec loop n m s = 520 | match StrSlice.head s with 521 | | Some ('0' .. '9' as c) -> 522 | if n >= 9 then raise Fail; 523 | loop (succ n) ((m * 10) + Char.code c - Char.code '0') (StrSlice.tail s) 524 | | Some (('.' | ')') as c) -> 525 | let s = StrSlice.tail s in 526 | if is_empty s then Llist_item (Ordered (m, c), n + 1 + ind, s) 527 | else 528 | let ind' = indent s in 529 | if ind' = 0 then raise Fail; 530 | let ind' = if ind' <= 4 then ind' else 1 in 531 | Llist_item (Ordered (m, c), n + ind + ind' + 1, StrSlice.offset ind' s) 532 | | Some _ | None -> raise Fail 533 | in 534 | loop 0 0 s 535 | 536 | let tag_name s0 = 537 | match StrSlice.head s0 with 538 | | Some ('a' .. 'z' | 'A' .. 'Z') -> 539 | let rec loop len s = 540 | match StrSlice.head s with 541 | | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-') -> 542 | loop (succ len) (StrSlice.tail s) 543 | | Some _ | None -> (StrSlice.to_string (StrSlice.sub s0 ~len), s) 544 | in 545 | loop 1 (StrSlice.tail s0) 546 | | Some _ | None -> raise Fail 547 | 548 | let known_tags = 549 | [ "address" 550 | ; "aside" 551 | ; "base" 552 | ; "basefont" 553 | ; "blockquote" 554 | ; "body" 555 | ; "caption" 556 | ; "center" 557 | ; "col" 558 | ; "colgroup" 559 | ; "dd" 560 | ; "details" 561 | ; "dialog" 562 | ; "dir" 563 | ; "div" 564 | ; "dl" 565 | ; "dt" 566 | ; "fieldset" 567 | ; "figcaption" 568 | ; "figure" 569 | ; "footer" 570 | ; "form" 571 | ; "frame" 572 | ; "frameset" 573 | ; "h1" 574 | ; "h2" 575 | ; "h3" 576 | ; "h4" 577 | ; "h5" 578 | ; "h6" 579 | ; "head" 580 | ; "header" 581 | ; "hr" 582 | ; "html" 583 | ; "iframe" 584 | ; "legend" 585 | ; "li" 586 | ; "link" 587 | ; "main" 588 | ; "menu" 589 | ; "menuitem" 590 | ; "meta" 591 | ; "nav" 592 | ; "noframes" 593 | ; "ol" 594 | ; "optgroup" 595 | ; "option" 596 | ; "p" 597 | ; "param" 598 | ; "section" 599 | ; "source" 600 | ; "summary" 601 | ; "table" 602 | ; "tbody" 603 | ; "td" 604 | ; "tfoot" 605 | ; "th" 606 | ; "thead" 607 | ; "title" 608 | ; "tr" 609 | ; "track" 610 | ; "ul" 611 | ] 612 | 613 | let special_tags = [ "pre"; "script"; "style"; "textarea" ] 614 | 615 | let known_tag s = 616 | let s = String.lowercase_ascii s in 617 | List.mem s known_tags 618 | 619 | let special_tag s = 620 | let s = String.lowercase_ascii s in 621 | List.mem s special_tags 622 | 623 | let closing_tag s = 624 | let s = trim_leading_ws s in 625 | match StrSlice.head s with 626 | | Some '>' -> 627 | if not (is_empty (StrSlice.tail s)) then raise Fail; 628 | Lhtml (false, Hblank) 629 | | Some _ | None -> raise Fail 630 | 631 | let special_tag tag s = 632 | if not (special_tag tag) then raise Fail; 633 | match StrSlice.head s with 634 | | Some (' ' | '\t' | '\010' .. '\013' | '>') | None -> 635 | Lhtml (true, Hcontains [ ""; ""; "" ]) 636 | | Some _ -> raise Fail 637 | 638 | let known_tag tag s = 639 | if not (known_tag tag) then raise Fail; 640 | match StrSlice.take 2 s with 641 | | (' ' | '\t' | '\010' .. '\013') :: _ | [] | '>' :: _ | '/' :: '>' :: _ -> 642 | Lhtml (true, Hblank) 643 | | _ -> raise Fail 644 | 645 | (** TODO Why these repeated functions that look just like thos in [P]? *) 646 | let ws1 s = 647 | match StrSlice.head s with 648 | | Some w when is_whitespace w -> trim_leading_ws s 649 | | Some _ | None -> raise Fail 650 | 651 | let attribute_name s = 652 | match StrSlice.head s with 653 | | Some ('a' .. 'z' | 'A' .. 'Z' | '_' | ':') -> 654 | let rec loop s = 655 | match StrSlice.head s with 656 | | Some ('a' .. 'z' | 'A' .. 'Z' | '_' | '.' | ':' | '0' .. '9') -> 657 | loop (StrSlice.tail s) 658 | | Some _ | None -> s 659 | in 660 | loop s 661 | | Some _ | None -> raise Fail 662 | 663 | let attribute_value s = 664 | match StrSlice.head s with 665 | | Some (('\'' | '"') as c) -> 666 | let rec loop s = 667 | match StrSlice.head s with 668 | | Some c1 when c = c1 -> StrSlice.tail s 669 | | Some _ -> loop (StrSlice.tail s) 670 | | None -> raise Fail 671 | in 672 | loop (StrSlice.tail s) 673 | | Some _ -> 674 | let rec loop first s = 675 | match StrSlice.head s with 676 | | Some 677 | (' ' | '\t' | '\010' .. '\013' | '"' | '\'' | '=' | '<' | '>' | '`') 678 | | None -> 679 | if first then raise Fail; 680 | s 681 | | Some _ -> loop false (StrSlice.tail s) 682 | in 683 | loop true s 684 | | None -> raise Fail 685 | 686 | let attribute s = 687 | let s = ws1 s in 688 | let s = attribute_name s in 689 | let s = trim_leading_ws s in 690 | match StrSlice.head s with 691 | | Some '=' -> 692 | let s = trim_leading_ws (StrSlice.tail s) in 693 | attribute_value s 694 | | Some _ | None -> s 695 | 696 | let attributes s = 697 | let rec loop s = match attribute s with s -> loop s | exception Fail -> s in 698 | loop s 699 | 700 | let open_tag s = 701 | let s = attributes s in 702 | let s = trim_leading_ws s in 703 | let n = 704 | match StrSlice.take 2 s with 705 | | '/' :: '>' :: _ -> 2 706 | | '>' :: _ -> 1 707 | | _ -> raise Fail 708 | in 709 | if not (is_empty (StrSlice.drop n s)) then raise Fail; 710 | Lhtml (false, Hblank) 711 | 712 | let raw_html s = 713 | match StrSlice.take 10 s with 714 | | '<' :: '?' :: _ -> Lhtml (true, Hcontains [ "?>" ]) 715 | | '<' :: '!' :: '-' :: '-' :: _ -> Lhtml (true, Hcontains [ "-->" ]) 716 | | '<' :: '!' :: '[' :: 'C' :: 'D' :: 'A' :: 'T' :: 'A' :: '[' :: _ -> 717 | Lhtml (true, Hcontains [ "]]>" ]) 718 | | '<' :: '!' :: _ -> Lhtml (true, Hcontains [ ">" ]) 719 | | '<' :: '/' :: _ -> 720 | let tag, s = tag_name (StrSlice.drop 2 s) in 721 | (known_tag tag ||| closing_tag) s 722 | | '<' :: _ -> 723 | let tag, s = tag_name (StrSlice.drop 1 s) in 724 | (special_tag tag ||| known_tag tag ||| open_tag) s 725 | | _ -> raise Fail 726 | 727 | let blank s = 728 | if not (is_empty s) then raise Fail; 729 | Lempty 730 | 731 | let tag_string s = 732 | let buf = Buffer.create 17 in 733 | let s, a = 734 | match StrSlice.last s with Some '}' -> attribute_string s | _ -> (s, []) 735 | in 736 | let s = trim_ws s in 737 | let rec loop s = 738 | match StrSlice.head s with 739 | (* TODO use is_whitespace *) 740 | | Some (' ' | '\t' | '\010' .. '\013') | None -> (Buffer.contents buf, a) 741 | | Some c -> 742 | Buffer.add_char buf c; 743 | loop (StrSlice.tail s) 744 | in 745 | loop (trim_leading_ws s) 746 | 747 | let def_list s = 748 | let s = StrSlice.tail s in 749 | match StrSlice.head s with 750 | | Some w when is_whitespace w -> 751 | Ldef_list (String.trim (StrSlice.to_string s)) 752 | | _ -> raise Fail 753 | 754 | let indented_code ind s = 755 | if indent s + ind < 4 then raise Fail; 756 | Lindented_code (StrSlice.offset (4 - ind) s) 757 | 758 | (* A sequence of cell contents separated by unescaped '|' 759 | characters. *) 760 | let table_row ~pipe_prefix s = 761 | let rec loop items seen_pipe s = 762 | match StrSlice.index_unescaped '|' s with 763 | | None -> 764 | if StrSlice.for_all is_whitespace s then (items, seen_pipe) 765 | else (s :: items, false) 766 | | Some i -> 767 | let item = StrSlice.take_prefix i s in 768 | loop (item :: items) true (StrSlice.drop (i + 1) s) 769 | in 770 | let items, terminating_pipe = loop [] pipe_prefix s in 771 | match (pipe_prefix, items, terminating_pipe) with 772 | | true, _, _ | _, _ :: _, true | _, _ :: _ :: _, _ -> 773 | Ltable_line (List.rev_map StrSlice.trim items) 774 | | _ -> raise Fail 775 | 776 | let parse s0 = 777 | let ind, s = sp3 s0 in 778 | match StrSlice.head s with 779 | | Some '>' -> 780 | let s = StrSlice.offset 1 s in 781 | let s = if indent s > 0 then StrSlice.offset 1 s else s in 782 | Lblockquote s 783 | | Some '=' -> (setext_heading ||| table_row ~pipe_prefix:false) s 784 | | Some '-' -> 785 | (setext_heading 786 | ||| thematic_break 787 | ||| unordered_list_item ind 788 | ||| table_row ~pipe_prefix:false) 789 | s 790 | | Some '_' -> thematic_break s 791 | | Some '#' -> atx_heading s 792 | | Some ('~' | '`') -> fenced_code ind s 793 | | Some '<' -> raw_html s 794 | | Some '*' -> (thematic_break ||| unordered_list_item ind) s 795 | | Some '+' -> unordered_list_item ind s 796 | | Some '0' .. '9' -> 797 | (ordered_list_item ind ||| table_row ~pipe_prefix:false) s 798 | | Some ':' -> (def_list ||| table_row ~pipe_prefix:false) s 799 | | Some '|' -> table_row ~pipe_prefix:true (StrSlice.tail s) 800 | | Some _ -> (blank ||| indented_code ind ||| table_row ~pipe_prefix:false) s 801 | | None -> Lempty 802 | 803 | let parse s = try parse s with Fail -> Lparagraph 804 | 805 | open P 806 | 807 | let is_empty st = 808 | let off = pos st in 809 | try 810 | let rec loop () = 811 | match next st with 812 | | c when is_whitespace c -> loop () 813 | | _ -> 814 | set_pos st off; 815 | false 816 | in 817 | loop () 818 | with Fail -> 819 | set_pos st off; 820 | true 821 | 822 | let inline_attribute_string s = 823 | let ppos = pos s in 824 | ws s; 825 | let a = 826 | match peek s with 827 | | Some '{' -> 828 | let buf = Buffer.create 64 in 829 | let rec loop s pos = 830 | match peek s with 831 | | Some '}' -> 832 | junk s; 833 | Some (Buffer.contents buf) 834 | | None | Some '{' -> 835 | set_pos s pos; 836 | None 837 | | Some c -> 838 | Buffer.add_char buf c; 839 | junk s; 840 | loop s pos 841 | in 842 | junk s; 843 | loop s (pos s) 844 | | _ -> None 845 | in 846 | let attr = Option.map parse_attributes a |> Option.value ~default:[] in 847 | if attr = [] then set_pos s ppos; 848 | attr 849 | 850 | let entity buf st = 851 | junk st; 852 | match on_sub entity st with 853 | | cs -> List.iter (Uutf.Buffer.add_utf_8 buf) cs 854 | | exception Fail -> Buffer.add_char buf '&' 855 | 856 | module Pre = struct 857 | type delim = 858 | | Ws 859 | | Punct 860 | | Other 861 | 862 | type emph_style = 863 | | Star 864 | | Underscore 865 | 866 | type link_kind = 867 | | Img 868 | | Url 869 | 870 | type t = 871 | | Bang_left_bracket 872 | | Left_bracket of link_kind 873 | | Emph of delim * delim * emph_style * int 874 | | R of attributes inline 875 | 876 | let concat = function [ x ] -> x | l -> Concat ([], l) 877 | 878 | let left_flanking = function 879 | | Emph (_, Other, _, _) | Emph ((Ws | Punct), Punct, _, _) -> true 880 | | _ -> false 881 | 882 | let right_flanking = function 883 | | Emph (Other, _, _, _) | Emph (Punct, (Ws | Punct), _, _) -> true 884 | | _ -> false 885 | 886 | let is_opener = function 887 | | Emph (pre, _, Underscore, _) as x -> 888 | left_flanking x && ((not (right_flanking x)) || pre = Punct) 889 | | Emph (_, _, Star, _) as x -> left_flanking x 890 | | _ -> false 891 | 892 | let is_closer = function 893 | | Emph (_, post, Underscore, _) as x -> 894 | right_flanking x && ((not (left_flanking x)) || post = Punct) 895 | | Emph (_, _, Star, _) as x -> right_flanking x 896 | | _ -> false 897 | 898 | let classify_delim = function 899 | | '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' 900 | | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\\' 901 | | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~' -> 902 | Punct 903 | | ' ' | '\t' | '\010' .. '\013' | '\160' -> Ws 904 | | _ -> Other 905 | 906 | let to_r = function 907 | | Bang_left_bracket -> Text ([], "![") 908 | | Left_bracket Img -> Text ([], "![") 909 | | Left_bracket Url -> Text ([], "[") 910 | | Emph (_, _, Star, n) -> Text ([], String.make n '*') 911 | | Emph (_, _, Underscore, n) -> Text ([], String.make n '_') 912 | | R x -> x 913 | 914 | let rec find_next_emph = function 915 | | Emph (pre, post, style, n) :: _ -> Some (pre, post, style, n) 916 | | _ :: xs -> find_next_emph xs 917 | | [] -> None 918 | 919 | let rec find_next_closer_emph = function 920 | | (Emph (pre, post, style, n) as e) :: _ when is_closer e -> 921 | Some (pre, post, style, n) 922 | | _ :: xs -> find_next_closer_emph xs 923 | | [] -> None 924 | 925 | (* Checks the lengths of two different emphasis delimiters to see if there can be a match. 926 | 927 | From the spec: "If one of the delimiters can both open and close emphasis, then the sum of the lengths 928 | of the delimiter runs containing the opening and closing delimiters must not be 929 | a multiple of 3 unless both lengths are multiples of 3" *) 930 | let is_emph_match n1 n2 = 931 | (* 932 | - *foo**bar**baz* 933 | 934 | *foo** -> the second delimiter ** is both an opening and closing delimiter. 935 | The sum of the length of both delimiters is 3, so they can't be matched. 936 | 937 | **bar** -> they are both opening and closing delemiters. 938 | Their sum is 4 which is not a multiple of 3 so they can be matched to produce bar 939 | 940 | The end result is: foobarbaz 941 | 942 | - *foo***bar**baz* 943 | 944 | *foo*** -> *** is both an opening and closing delimiter. 945 | Their sum is 4 so they can be matched to produce: foo** 946 | 947 | **bar** -> they are both opening and closing delemiters. 948 | Their sum is 4 which is not a multiple of 3 so they can be matched to produce bar 949 | 950 | The end result is: foobarbaz* 951 | 952 | - ***foo***bar**baz* 953 | 954 | ***foo*** -> the second delimiter *** is both an opening and closing delimiter. 955 | Their sum is 6 which is a multiple of 3. However, both lengths are multiples of 3 956 | so they can be matched to produce: foo 957 | 958 | bar**baz* -> ** is both an opening and closing delimiter. 959 | Their sum is 3 so they can't be matched 960 | 961 | The end result is: foobar**baz* 962 | *) 963 | if (n1 + n2) mod 3 = 0 && n1 mod 3 != 0 && n2 mod 3 != 0 then false 964 | else true 965 | 966 | let rec parse_emph = function 967 | | (Emph (pre, _, q1, n1) as x1) :: xs when is_opener x1 -> 968 | let rec loop acc = function 969 | | (Emph (_, post, q2, n2) as x2) :: xs1 as xs 970 | when is_closer x2 && q1 = q2 -> 971 | (* At this point we have an openener followed by a closer. Both are of the same style (either * or _) *) 972 | if (is_opener x2 || is_closer x1) && not (is_emph_match n1 n2) 973 | then 974 | (* 975 | The second delimiter (the closer) is also an opener, and both delimiters don't match together, 976 | according to the "mod 3" rule. In that case, we check if the next delimiter can match. 977 | 978 | *foo**bar**baz* The second delimiter that's both an opener/closer ( ** before bar) 979 | matches with the next delimiter ( ** after bar). They'll become 980 | bar. The end result will be: foobarbaz 981 | 982 | 983 | *foo**bar*baz* The second delimiter that's both an opener/closer ( ** before bar) 984 | doesn't match with the next delimiter ( * after bar). **bar will be 985 | considered as regular text. The end result will be: foo**barbaz* 986 | *) 987 | match find_next_emph xs1 with 988 | | Some (_, _, _, n3) when is_emph_match n3 n2 -> 989 | let xs' = parse_emph xs in 990 | loop acc xs' 991 | | _ -> loop (x2 :: acc) xs1 992 | else 993 | let xs = 994 | if n1 >= 2 && n2 >= 2 then 995 | if n2 > 2 then Emph (Other, post, q2, n2 - 2) :: xs1 996 | else xs1 997 | else if n2 > 1 then Emph (Punct, post, q2, n2 - 1) :: xs1 998 | else xs1 999 | in 1000 | let r = 1001 | let il = concat (List.map to_r (List.rev acc)) in 1002 | if n1 >= 2 && n2 >= 2 then R (Strong ([], il)) :: xs 1003 | else R (Emph ([], il)) :: xs 1004 | in 1005 | let r = 1006 | if n1 >= 2 && n2 >= 2 then 1007 | if n1 > 2 then Emph (pre, Other, q1, n1 - 2) :: r else r 1008 | else if n1 > 1 then Emph (pre, Punct, q1, n1 - 1) :: r 1009 | else r 1010 | in 1011 | parse_emph r 1012 | | (Emph (_, _, q2, _) as x2) :: xs1 as xs when is_opener x2 -> 1013 | (* 1014 | This case happens when we encounter a second opener delimiter. We look ahead for the next closer, 1015 | and if the next closer is of the same style, we can match them together. 1016 | 1017 | *foo _bar_ baz_ The second opener (_ before `bar`) is of the same style as the next closer 1018 | (_ after `bar`). We can match them to produce bar 1019 | The end result will be: *foo bar baz_ 1020 | 1021 | 1022 | *foo _bar* baz_ The second opener (_ before `bar`) is not of the same style as the next closer 1023 | ( * after `bar`). They can't be matched so we'll consider _bar as regular text. 1024 | The end result will be: foo _bar baz_ 1025 | *) 1026 | let is_next_closer_same = 1027 | match find_next_closer_emph xs1 with 1028 | | None -> false 1029 | | Some (_, _, q3, _) -> q2 = q3 1030 | in 1031 | if not is_next_closer_same then loop (x2 :: acc) xs1 1032 | else loop acc (parse_emph xs) 1033 | | x :: xs -> loop (x :: acc) xs 1034 | | [] -> x1 :: List.rev acc 1035 | in 1036 | loop [] xs 1037 | | x :: xs -> x :: parse_emph xs 1038 | | [] -> [] 1039 | 1040 | let parse_emph xs = concat (List.map to_r (parse_emph xs)) 1041 | end 1042 | 1043 | let escape buf st = 1044 | if next st <> '\\' then raise Fail; 1045 | match peek st with 1046 | | Some c when is_punct c -> 1047 | junk st; 1048 | Buffer.add_char buf c 1049 | | Some _ | None -> Buffer.add_char buf '\\' 1050 | 1051 | let link_label allow_balanced_brackets st = 1052 | if peek_exn st <> '[' then raise Fail; 1053 | junk st; 1054 | let buf = Buffer.create 17 in 1055 | let rec loop n nonempty = 1056 | match peek_exn st with 1057 | | ']' when n = 0 -> 1058 | junk st; 1059 | if not nonempty then raise Fail; 1060 | Buffer.contents buf 1061 | | ']' as c -> 1062 | assert (n > 0); 1063 | junk st; 1064 | Buffer.add_char buf c; 1065 | loop (pred n) true 1066 | | '\\' as c -> 1067 | junk st; 1068 | Buffer.add_char buf c; 1069 | begin 1070 | match peek_exn st with 1071 | | c when is_punct c -> 1072 | junk st; 1073 | Buffer.add_char buf c 1074 | | _ -> () 1075 | end; 1076 | loop n true 1077 | | '[' when not allow_balanced_brackets -> raise Fail 1078 | | '[' as c -> 1079 | junk st; 1080 | Buffer.add_char buf c; 1081 | loop (succ n) true 1082 | | w when is_whitespace w -> 1083 | junk st; 1084 | Buffer.add_char buf w; 1085 | loop n nonempty 1086 | | c -> 1087 | junk st; 1088 | Buffer.add_char buf c; 1089 | loop n true 1090 | in 1091 | loop 0 false 1092 | 1093 | type add_uchar_result = 1094 | { start : bool 1095 | ; seen_ws : bool 1096 | } 1097 | 1098 | (* based on https://erratique.ch/software/uucp/doc/Uucp/Case/index.html#caselesseq *) 1099 | let normalize s = 1100 | let canonical_caseless_key s = 1101 | let b = Buffer.create (String.length s * 2) in 1102 | let to_nfd_and_utf_8 = 1103 | let n = Uunf.create `NFD in 1104 | let rec add v = 1105 | match Uunf.add n v with 1106 | | `Await | `End -> () 1107 | | `Uchar u -> 1108 | Uutf.Buffer.add_utf_8 b u; 1109 | add `Await 1110 | in 1111 | add 1112 | in 1113 | let add_nfd = 1114 | let n = Uunf.create `NFD in 1115 | let rec add v = 1116 | match Uunf.add n v with 1117 | | `Await | `End -> () 1118 | | `Uchar u -> 1119 | (match Uucp.Case.Fold.fold u with 1120 | | `Self -> to_nfd_and_utf_8 (`Uchar u) 1121 | | `Uchars us -> List.iter (fun u -> to_nfd_and_utf_8 (`Uchar u)) us); 1122 | add `Await 1123 | in 1124 | add 1125 | in 1126 | let uspace = `Uchar (Uchar.of_char ' ') in 1127 | let add_uchar { start; seen_ws } _ = function 1128 | | `Malformed _ -> 1129 | add_nfd (`Uchar Uutf.u_rep); 1130 | { start = false; seen_ws = false } 1131 | | `Uchar u as uchar -> 1132 | if Uucp.White.is_white_space u then { start; seen_ws = true } 1133 | else ( 1134 | if (not start) && seen_ws then add_nfd uspace; 1135 | add_nfd uchar; 1136 | { start = false; seen_ws = false }) 1137 | in 1138 | let (_ : add_uchar_result) = 1139 | Uutf.String.fold_utf_8 add_uchar { start = true; seen_ws = false } s 1140 | in 1141 | add_nfd `End; 1142 | to_nfd_and_utf_8 `End; 1143 | Buffer.contents b 1144 | in 1145 | canonical_caseless_key s 1146 | 1147 | let tag_name st = 1148 | match peek_exn st with 1149 | | 'a' .. 'z' | 'A' .. 'Z' -> 1150 | junk st; 1151 | let rec loop () = 1152 | match peek st with 1153 | | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-') -> 1154 | junk st; 1155 | loop () 1156 | | Some _ | None -> () 1157 | in 1158 | loop () 1159 | | _ -> raise Fail 1160 | 1161 | let ws_buf buf st = 1162 | let rec loop () = 1163 | match peek st with 1164 | | Some w when is_whitespace w -> 1165 | Buffer.add_char buf w; 1166 | junk st; 1167 | loop () 1168 | | Some _ | None -> () 1169 | in 1170 | loop () 1171 | 1172 | let closing_tag st = 1173 | let start = pos st in 1174 | if next st <> '<' then raise Fail; 1175 | if next st <> '/' then raise Fail; 1176 | tag_name st; 1177 | ws st; 1178 | if next st <> '>' then raise Fail; 1179 | range st start (pos st - start) 1180 | 1181 | let list p st = 1182 | let rec loop () = 1183 | match protect p st with () -> loop () | exception Fail -> () 1184 | in 1185 | loop () 1186 | 1187 | let single_quoted_attribute st = 1188 | if next st <> '\'' then raise Fail; 1189 | let rec loop () = 1190 | match peek_exn st with 1191 | | '\'' -> junk st 1192 | (* | '&' -> *) 1193 | (* entity buf st; loop () *) 1194 | | _ -> 1195 | junk st; 1196 | loop () 1197 | in 1198 | loop () 1199 | 1200 | let double_quoted_attribute st = 1201 | if next st <> '"' then raise Fail; 1202 | let rec loop () = 1203 | match peek_exn st with 1204 | | '"' -> junk st 1205 | (* | '&' -> *) 1206 | (* entity buf st; loop () *) 1207 | | _ -> 1208 | junk st; 1209 | loop () 1210 | in 1211 | loop () 1212 | 1213 | let unquoted_attribute st = 1214 | let rec loop n = 1215 | match peek_exn st with 1216 | | ' ' | '\t' | '\010' .. '\013' | '"' | '\'' | '=' | '<' | '>' | '`' -> 1217 | if n = 0 then raise Fail 1218 | (* | '&' -> *) 1219 | (* entity buf st; loop () *) 1220 | | _ -> 1221 | junk st; 1222 | loop (succ n) 1223 | in 1224 | loop 0 1225 | 1226 | let attribute_value st = 1227 | match peek_exn st with 1228 | | '\'' -> single_quoted_attribute st 1229 | | '"' -> double_quoted_attribute st 1230 | | _ -> unquoted_attribute st 1231 | 1232 | let attribute_name st = 1233 | match peek_exn st with 1234 | | 'a' .. 'z' | 'A' .. 'Z' | '_' | ':' -> 1235 | junk st; 1236 | let rec loop () = 1237 | match peek st with 1238 | | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' | '.' | ':' | '-') -> 1239 | junk st; 1240 | loop () 1241 | | Some _ | None -> () 1242 | in 1243 | loop () 1244 | | _ -> raise Fail 1245 | 1246 | let option d p st = match protect p st with r -> r | exception Fail -> d 1247 | let some p st = Some (p st) 1248 | let attribute_value_specification = ws >>> char '=' >>> ws >>> attribute_value 1249 | 1250 | let ws1_buf buf st = 1251 | match peek st with 1252 | | Some w when is_whitespace w -> ws_buf buf st 1253 | | Some _ | None -> raise Fail 1254 | 1255 | let attribute st = 1256 | ws1 st; 1257 | attribute_name st; 1258 | option () attribute_value_specification st 1259 | 1260 | let open_tag st = 1261 | let start = pos st in 1262 | if next st <> '<' then raise Fail; 1263 | tag_name st; 1264 | list attribute st; 1265 | ws st; 1266 | begin 1267 | match peek_exn st with 1268 | | '/' -> junk st 1269 | | _ -> () 1270 | end; 1271 | if next st <> '>' then raise Fail; 1272 | range st start (pos st - start) 1273 | 1274 | let html_comment st = 1275 | let buf = Buffer.create 17 in 1276 | if next st <> '<' then raise Fail; 1277 | if next st <> '!' then raise Fail; 1278 | if next st <> '-' then raise Fail; 1279 | if next st <> '-' then raise Fail; 1280 | Buffer.add_string buf ""; 1290 | Buffer.contents buf 1291 | | '>' when start -> raise Fail 1292 | | _ -> 1293 | Buffer.add_char buf c; 1294 | loop false) 1295 | | '>' when start -> raise Fail 1296 | | '&' -> 1297 | entity buf st; 1298 | loop false 1299 | | _ as c -> 1300 | junk st; 1301 | Buffer.add_char buf c; 1302 | loop false 1303 | in 1304 | loop true 1305 | 1306 | let processing_instruction st = 1307 | let buf = Buffer.create 17 in 1308 | if next st <> '<' then raise Fail; 1309 | if next st <> '?' then raise Fail; 1310 | Buffer.add_string buf " ( 1314 | junk st; 1315 | match peek_exn st with 1316 | | '>' -> 1317 | junk st; 1318 | Buffer.add_string buf "?>"; 1319 | Buffer.contents buf 1320 | | _ -> 1321 | Buffer.add_char buf c; 1322 | loop ()) 1323 | | '&' -> 1324 | entity buf st; 1325 | loop () 1326 | | _ as c -> 1327 | junk st; 1328 | Buffer.add_char buf c; 1329 | loop () 1330 | in 1331 | loop () 1332 | 1333 | let cdata_section st = 1334 | let buf = Buffer.create 17 in 1335 | if next st <> '<' then raise Fail; 1336 | if next st <> '!' then raise Fail; 1337 | if next st <> '[' then raise Fail; 1338 | if next st <> 'C' then raise Fail; 1339 | if next st <> 'D' then raise Fail; 1340 | if next st <> 'A' then raise Fail; 1341 | if next st <> 'T' then raise Fail; 1342 | if next st <> 'A' then raise Fail; 1343 | if next st <> '[' then raise Fail; 1344 | Buffer.add_string buf " ( 1348 | junk st; 1349 | match peek_exn st with 1350 | | ']' as c1 -> ( 1351 | junk st; 1352 | match peek_exn st with 1353 | | '>' -> 1354 | junk st; 1355 | Buffer.add_string buf "]]>"; 1356 | Buffer.contents buf 1357 | | _ -> 1358 | Buffer.add_char buf c; 1359 | Buffer.add_char buf c1; 1360 | loop ()) 1361 | | _ -> 1362 | Buffer.add_char buf c; 1363 | loop ()) 1364 | | '&' -> 1365 | entity buf st; 1366 | loop () 1367 | | _ as c -> 1368 | junk st; 1369 | Buffer.add_char buf c; 1370 | loop () 1371 | in 1372 | loop () 1373 | 1374 | let declaration st = 1375 | let buf = Buffer.create 17 in 1376 | if next st <> '<' then raise Fail; 1377 | if next st <> '!' then raise Fail; 1378 | Buffer.add_string buf " 1381 | let rec loop () = 1382 | match peek_exn st with 1383 | | 'A' .. 'Z' as c -> 1384 | junk st; 1385 | Buffer.add_char buf c; 1386 | loop () 1387 | | w when is_whitespace w -> 1388 | ws1_buf buf st; 1389 | let rec loop () = 1390 | match peek_exn st with 1391 | | '>' as c -> 1392 | junk st; 1393 | Buffer.add_char buf c; 1394 | Buffer.contents buf 1395 | | '&' -> 1396 | entity buf st; 1397 | loop () 1398 | | _ as c -> 1399 | junk st; 1400 | Buffer.add_char buf c; 1401 | loop () 1402 | in 1403 | loop () 1404 | | _ -> raise Fail 1405 | in 1406 | loop () 1407 | | _ -> raise Fail 1408 | 1409 | let link_destination st = 1410 | let buf = Buffer.create 17 in 1411 | match peek_exn st with 1412 | | '<' -> 1413 | junk st; 1414 | let rec loop () = 1415 | match peek_exn st with 1416 | | '>' -> 1417 | junk st; 1418 | Buffer.contents buf 1419 | | '\010' .. '\013' | '<' -> raise Fail 1420 | | '\\' -> 1421 | escape buf st; 1422 | loop () 1423 | | '&' -> 1424 | entity buf st; 1425 | loop () 1426 | | _ as c -> 1427 | junk st; 1428 | Buffer.add_char buf c; 1429 | loop () 1430 | in 1431 | loop () 1432 | | _ -> 1433 | let rec loop n = 1434 | match peek st with 1435 | | Some ('(' as c) -> 1436 | junk st; 1437 | Buffer.add_char buf c; 1438 | loop (succ n) 1439 | | Some ')' when n = 0 -> 1440 | if Buffer.length buf = 0 then raise Fail; 1441 | Buffer.contents buf 1442 | | Some (')' as c) -> 1443 | junk st; 1444 | Buffer.add_char buf c; 1445 | loop (pred n) 1446 | | Some '\\' -> 1447 | escape buf st; 1448 | loop n 1449 | | Some '&' -> 1450 | entity buf st; 1451 | loop n 1452 | | Some (' ' | '\t' | '\x00' .. '\x1F' | '\x7F') | None -> 1453 | if n > 0 || Buffer.length buf = 0 then raise Fail; 1454 | Buffer.contents buf 1455 | | Some c -> 1456 | junk st; 1457 | Buffer.add_char buf c; 1458 | loop n 1459 | in 1460 | loop 0 1461 | 1462 | let eol st = 1463 | match peek st with Some '\n' -> junk st | Some _ -> raise Fail | None -> () 1464 | 1465 | let link_title st = 1466 | let buf = Buffer.create 17 in 1467 | match peek_exn st with 1468 | | ('\'' | '"') as c -> 1469 | junk st; 1470 | let rec loop () = 1471 | match peek_exn st with 1472 | | '\\' -> 1473 | escape buf st; 1474 | loop () 1475 | | '&' -> 1476 | entity buf st; 1477 | loop () 1478 | | _ as c1 when c = c1 -> 1479 | junk st; 1480 | Buffer.contents buf 1481 | | _ as c1 -> 1482 | junk st; 1483 | Buffer.add_char buf c1; 1484 | loop () 1485 | in 1486 | loop () 1487 | | '(' -> 1488 | junk st; 1489 | let rec loop () = 1490 | match peek_exn st with 1491 | | '\\' -> 1492 | escape buf st; 1493 | loop () 1494 | | '&' -> 1495 | entity buf st; 1496 | loop () 1497 | | ')' -> 1498 | junk st; 1499 | Buffer.contents buf 1500 | | _ as c -> 1501 | junk st; 1502 | Buffer.add_char buf c; 1503 | loop () 1504 | in 1505 | loop () 1506 | | _ -> raise Fail 1507 | 1508 | let space st = match peek_exn st with ' ' -> junk st | _ -> raise Fail 1509 | 1510 | let many p st = 1511 | try 1512 | while true do 1513 | p st 1514 | done 1515 | with Fail -> () 1516 | 1517 | let scheme st = 1518 | match peek_exn st with 1519 | | 'a' .. 'z' | 'A' .. 'Z' -> 1520 | let rec loop n = 1521 | if n < 32 then 1522 | match peek st with 1523 | | Some ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '+' | '.' | '-') -> 1524 | junk st; 1525 | loop (succ n) 1526 | | Some _ | None -> n 1527 | else n 1528 | in 1529 | let n = loop 0 in 1530 | if n < 2 then raise Fail 1531 | | _ -> raise Fail 1532 | 1533 | let absolute_uri st = 1534 | let p = pos st in 1535 | scheme st; 1536 | if next st <> ':' then raise Fail; 1537 | let rec loop () = 1538 | match peek st with 1539 | | Some 1540 | ( ' ' | '\t' 1541 | | '\010' .. '\013' 1542 | | '\x00' .. '\x1F' 1543 | | '\x7F' .. '\x9F' 1544 | | '<' | '>' ) 1545 | | None -> 1546 | let txt = range st p (pos st - p) in 1547 | (txt, txt) 1548 | | Some _ -> 1549 | junk st; 1550 | loop () 1551 | in 1552 | loop () 1553 | 1554 | let email_address st = 1555 | let p = pos st in 1556 | let rec loop n = 1557 | match peek_exn st with 1558 | | 'a' .. 'z' 1559 | | 'A' .. 'Z' 1560 | | '0' .. '9' 1561 | | '.' | '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '/' | '=' | '?' 1562 | | '^' | '_' | '`' | '{' | '|' | '}' | '~' | '-' -> 1563 | junk st; 1564 | loop (succ n) 1565 | | '@' -> 1566 | junk st; 1567 | let label st = 1568 | let let_dig st = 1569 | match peek_exn st with 1570 | | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> 1571 | junk st; 1572 | false 1573 | | '-' -> 1574 | junk st; 1575 | true 1576 | | _ -> raise Fail 1577 | in 1578 | if let_dig st then raise Fail; 1579 | let rec loop last = 1580 | match let_dig st with 1581 | | r -> loop r 1582 | | exception Fail -> if last then raise Fail 1583 | in 1584 | loop false 1585 | in 1586 | label st; 1587 | list (char '.' >>> label) st; 1588 | let txt = range st p (pos st - p) in 1589 | (txt, "mailto:" ^ txt) 1590 | | _ -> raise Fail 1591 | in 1592 | loop 0 1593 | 1594 | let autolink st = 1595 | match peek_exn st with 1596 | | '<' -> 1597 | junk st; 1598 | let label, destination = (absolute_uri ||| email_address) st in 1599 | if next st <> '>' then raise Fail; 1600 | { Ast.Impl.label = Text ([], label); destination; title = None } 1601 | | _ -> raise Fail 1602 | 1603 | let inline_link = 1604 | char '(' 1605 | >>> ws 1606 | >>> option 1607 | ("", None) 1608 | (pair link_destination (option None (ws1 >>> some link_title))) 1609 | <<< ws 1610 | <<< char ')' 1611 | 1612 | let get_buf buf = 1613 | let s = Buffer.contents buf in 1614 | Buffer.clear buf; 1615 | s 1616 | 1617 | let text buf acc = 1618 | if Buffer.length buf = 0 then acc else Pre.R (Text ([], get_buf buf)) :: acc 1619 | 1620 | let inline_pre buf acc st = 1621 | let pos = pos st in 1622 | let rec gobble_open_backtick n = 1623 | match peek st with 1624 | | Some '`' -> 1625 | junk st; 1626 | gobble_open_backtick (succ n) 1627 | | Some _ -> 1628 | let acc = text buf acc in 1629 | let bufcode = Buffer.create 17 in 1630 | let finish () = 1631 | let content = Buffer.contents bufcode in 1632 | let content = 1633 | if String.for_all (fun c -> c = ' ') content then content 1634 | else if 1635 | String.length content >= 2 1636 | && content.[0] = ' ' 1637 | && content.[String.length content - 1] = ' ' 1638 | then String.sub content 1 (String.length content - 2) 1639 | else content 1640 | in 1641 | let attr = inline_attribute_string st in 1642 | Pre.R (Code (attr, content)) :: acc 1643 | in 1644 | let rec gobble_body start m = 1645 | match peek st with 1646 | | Some '`' -> 1647 | junk st; 1648 | gobble_body start (succ m) 1649 | | _ when m = n -> finish () 1650 | | Some c when is_whitespace c -> 1651 | if m > 0 then Buffer.add_string bufcode (String.make m '`'); 1652 | Buffer.add_char bufcode (if c = '\010' then ' ' else c); 1653 | junk st; 1654 | gobble_body (start && m = 0) 0 1655 | | Some c -> 1656 | junk st; 1657 | (* if seen_ws then Buffer.add_char bufcode ' '; *) 1658 | if m > 0 then Buffer.add_string bufcode (String.make m '`'); 1659 | Buffer.add_char bufcode c; 1660 | gobble_body false 0 1661 | | None -> 1662 | Buffer.add_string buf (range st pos n); 1663 | set_pos st (pos + n); 1664 | acc 1665 | in 1666 | gobble_body true 0 1667 | | None -> 1668 | Buffer.add_string buf (String.make n '`'); 1669 | acc 1670 | in 1671 | gobble_open_backtick 0 1672 | 1673 | let rec inline defs st = 1674 | let buf = Buffer.create 0 in 1675 | let text acc = text buf acc in 1676 | let rec reference_link kind acc st = 1677 | let off0 = pos st in 1678 | match protect (link_label true) st with 1679 | | lab -> ( 1680 | let reflink lab = 1681 | let s = normalize lab in 1682 | match 1683 | List.find_opt 1684 | (fun ({ label; _ } : attributes link_def) -> label = s) 1685 | defs 1686 | with 1687 | | Some { label = _; destination; title; attributes = attr } -> 1688 | let lab1 = inline defs (of_string lab) in 1689 | let r = 1690 | let def = { label = lab1; destination; title } in 1691 | match kind with 1692 | | Pre.Img -> Image (attr, def) 1693 | | Url -> Link (attr, def) 1694 | in 1695 | loop (Pre.R r :: text acc) st 1696 | | None -> 1697 | if kind = Img then Buffer.add_char buf '!'; 1698 | Buffer.add_char buf '['; 1699 | let acc = text acc in 1700 | set_pos st (succ off0); 1701 | loop acc st 1702 | in 1703 | match peek st with 1704 | | Some '[' -> ( 1705 | if peek_after '\000' st = ']' then ( 1706 | junk st; 1707 | junk st; 1708 | reflink lab) 1709 | else 1710 | match protect (link_label false) st with 1711 | | _ -> 1712 | set_pos st off0; 1713 | junk st; 1714 | loop (Left_bracket kind :: text acc) st 1715 | | exception Fail -> reflink lab) 1716 | | Some '(' -> ( 1717 | match protect inline_link st with 1718 | | _ -> 1719 | set_pos st off0; 1720 | junk st; 1721 | loop (Left_bracket kind :: text acc) st 1722 | | exception Fail -> reflink lab) 1723 | | Some _ | None -> reflink lab) 1724 | | exception Fail -> 1725 | junk st; 1726 | loop (Left_bracket kind :: text acc) st 1727 | and loop ~seen_link acc st = 1728 | match peek_exn st with 1729 | | '<' as c -> ( 1730 | match protect autolink st with 1731 | | def -> 1732 | let attr = inline_attribute_string st in 1733 | loop ~seen_link (Pre.R (Link (attr, def)) :: text acc) st 1734 | | exception Fail -> ( 1735 | match 1736 | protect 1737 | (closing_tag 1738 | ||| open_tag 1739 | ||| html_comment 1740 | ||| declaration 1741 | ||| cdata_section 1742 | ||| processing_instruction) 1743 | st 1744 | with 1745 | | tag -> loop ~seen_link (Pre.R (Html ([], tag)) :: text acc) st 1746 | | exception Fail -> 1747 | junk st; 1748 | Buffer.add_char buf c; 1749 | loop ~seen_link acc st)) 1750 | | '\n' -> 1751 | junk st; 1752 | sp st; 1753 | loop ~seen_link (Pre.R (Soft_break []) :: text acc) st 1754 | | ' ' as c -> ( 1755 | junk st; 1756 | match peek st with 1757 | | Some ' ' -> ( 1758 | match protect (many space >>> char '\n' >>> many space) st with 1759 | | () -> loop ~seen_link (Pre.R (Hard_break []) :: text acc) st 1760 | | exception Fail -> 1761 | junk st; 1762 | Buffer.add_string buf " "; 1763 | loop ~seen_link acc st) 1764 | | Some '\n' -> loop ~seen_link acc st 1765 | | Some _ | None -> 1766 | Buffer.add_char buf c; 1767 | loop ~seen_link acc st) 1768 | | '`' -> loop ~seen_link (inline_pre buf acc st) st 1769 | | '\\' as c -> ( 1770 | junk st; 1771 | match peek st with 1772 | | Some '\n' -> 1773 | junk st; 1774 | loop ~seen_link (Pre.R (Hard_break []) :: text acc) st 1775 | | Some c when is_punct c -> 1776 | junk st; 1777 | Buffer.add_char buf c; 1778 | loop ~seen_link acc st 1779 | | Some _ | None -> 1780 | Buffer.add_char buf c; 1781 | loop ~seen_link acc st) 1782 | | '!' as c -> ( 1783 | junk st; 1784 | match peek st with 1785 | | Some '[' -> reference_link ~seen_link Img (text acc) st 1786 | | Some _ | None -> 1787 | Buffer.add_char buf c; 1788 | loop ~seen_link acc st) 1789 | | '&' -> 1790 | entity buf st; 1791 | loop ~seen_link acc st 1792 | | ']' -> 1793 | junk st; 1794 | let acc = text acc in 1795 | let rec aux ~seen_link xs = function 1796 | | Pre.Left_bracket Url :: acc' when seen_link -> 1797 | Buffer.add_char buf ']'; 1798 | let acc' = List.rev_append (Pre.R (Text ([], "[")) :: xs) acc' in 1799 | loop ~seen_link acc' st 1800 | | Left_bracket k :: acc' -> ( 1801 | match peek st with 1802 | | Some '(' -> ( 1803 | match protect inline_link st with 1804 | | destination, title -> 1805 | let attr = inline_attribute_string st in 1806 | let r = 1807 | let label = Pre.parse_emph xs in 1808 | let def = { label; destination; title } in 1809 | match k with 1810 | | Img -> Image (attr, def) 1811 | | Url -> Link (attr, def) 1812 | in 1813 | loop ~seen_link (Pre.R r :: acc') st 1814 | | exception Fail -> 1815 | Buffer.add_char buf ']'; 1816 | loop ~seen_link acc st) 1817 | | Some '[' -> ( 1818 | let label = Pre.parse_emph xs in 1819 | let off1 = pos st in 1820 | match link_label false st with 1821 | | lab -> ( 1822 | let s = normalize lab in 1823 | match 1824 | List.find_opt 1825 | (fun ({ label; _ } : attributes link_def) -> 1826 | label = s) 1827 | defs 1828 | with 1829 | | Some 1830 | { label = _; destination; title; attributes = attr } 1831 | -> 1832 | let def = { label; destination; title } in 1833 | let r = 1834 | match k with 1835 | | Img -> Image (attr, def) 1836 | | Url -> Link (attr, def) 1837 | in 1838 | loop ~seen_link (Pre.R r :: acc') st 1839 | | None -> 1840 | if k = Img then Buffer.add_char buf '!'; 1841 | Buffer.add_char buf '['; 1842 | let acc = Pre.R label :: text acc' in 1843 | Buffer.add_char buf ']'; 1844 | set_pos st off1; 1845 | loop ~seen_link acc st) 1846 | | exception Fail -> 1847 | if k = Img then Buffer.add_char buf '!'; 1848 | Buffer.add_char buf '['; 1849 | let acc = Pre.R label :: text acc in 1850 | Buffer.add_char buf ']'; 1851 | set_pos st off1; 1852 | loop ~seen_link acc st) 1853 | | Some _ | None -> 1854 | Buffer.add_char buf ']'; 1855 | loop ~seen_link acc st) 1856 | | (Pre.R (Link _) as x) :: acc' -> aux ~seen_link:true (x :: xs) acc' 1857 | | x :: acc' -> aux ~seen_link (x :: xs) acc' 1858 | | [] -> 1859 | Buffer.add_char buf ']'; 1860 | loop ~seen_link acc st 1861 | in 1862 | aux ~seen_link [] acc 1863 | | '[' -> reference_link ~seen_link Url acc st 1864 | | ('*' | '_') as c -> 1865 | let pre = peek_before ' ' st in 1866 | let f post n st = 1867 | let pre = pre |> Pre.classify_delim in 1868 | let post = post |> Pre.classify_delim in 1869 | let e = if c = '*' then Pre.Star else Pre.Underscore in 1870 | loop ~seen_link (Pre.Emph (pre, post, e, n) :: text acc) st 1871 | in 1872 | let rec aux n = 1873 | match peek st with 1874 | | Some c1 when c1 = c -> 1875 | junk st; 1876 | aux (succ n) 1877 | | Some c1 -> f c1 n st 1878 | | None -> f ' ' n st 1879 | in 1880 | aux 0 1881 | | _ as c -> 1882 | junk st; 1883 | Buffer.add_char buf c; 1884 | loop ~seen_link acc st 1885 | | exception Fail -> Pre.parse_emph (List.rev (text acc)) 1886 | in 1887 | loop ~seen_link:false [] st 1888 | 1889 | let sp3 st = 1890 | match peek_exn st with 1891 | | ' ' -> ( 1892 | junk st; 1893 | match peek_exn st with 1894 | | ' ' -> ( 1895 | junk st; 1896 | match peek_exn st with 1897 | | ' ' -> 1898 | junk st; 1899 | 3 1900 | | _ -> 2 1901 | | exception Fail -> 2) 1902 | | _ -> 1 1903 | | exception Fail -> 1) 1904 | | _ -> 0 1905 | | exception Fail -> 0 1906 | 1907 | let link_reference_definition st : attributes link_def = 1908 | (* TODO remove duplicated ws/ws1 functions? *) 1909 | let ws st = 1910 | let rec loop seen_nl = 1911 | match peek st with 1912 | | Some w when is_whitespace w -> 1913 | junk st; 1914 | loop seen_nl 1915 | | Some '\n' when not seen_nl -> 1916 | junk st; 1917 | loop true 1918 | | Some _ | None -> () 1919 | in 1920 | loop false 1921 | in 1922 | let ws1 st = 1923 | match next st with w when is_whitespace w -> ws st | _ -> raise Fail 1924 | in 1925 | ignore (sp3 st); 1926 | let label = link_label false st in 1927 | if next st <> ':' then raise Fail; 1928 | ws st; 1929 | let destination = link_destination st in 1930 | let attributes = inline_attribute_string st in 1931 | match protect (ws1 >>> link_title <<< sp <<< eol) st with 1932 | | title -> { label; destination; title = Some title; attributes } 1933 | | exception Fail -> 1934 | (sp >>> eol) st; 1935 | { label; destination; title = None; attributes } 1936 | 1937 | let link_reference_definitions st = 1938 | let rec loop acc = 1939 | match protect link_reference_definition st with 1940 | | def -> loop (def :: acc) 1941 | | exception Fail -> (acc, pos st) 1942 | in 1943 | loop [] 1944 | -------------------------------------------------------------------------------- /src/sexp.ml: -------------------------------------------------------------------------------- 1 | open Ast.Impl 2 | 3 | type t = 4 | | Atom of string 5 | | List of t list 6 | 7 | let atom s = Atom s 8 | 9 | let rec link { label; destination; title; _ } = 10 | let title = match title with Some title -> [ Atom title ] | None -> [] in 11 | List (Atom "link" :: inline label :: Atom destination :: title) 12 | 13 | and inline = function 14 | | Concat (_, xs) -> List (Atom "concat" :: List.map inline xs) 15 | | Text (_, s) -> Atom s 16 | | Emph (_, il) -> List [ Atom "emph"; inline il ] 17 | | Strong (_, il) -> List [ Atom "strong"; inline il ] 18 | | Code _ -> Atom "code" (* FIXME: this seems broken? *) 19 | | Hard_break _ -> Atom "hard-break" 20 | | Soft_break _ -> Atom "soft-break" 21 | | Link (_, def) -> List [ Atom "url"; link def ] 22 | | Html (_, s) -> List [ Atom "html"; Atom s ] 23 | | Image _ -> Atom "img" 24 | 25 | let table_header (header, alignment) = 26 | List 27 | [ inline header 28 | ; (match alignment with 29 | | Default -> Atom "default" 30 | | Left -> Atom "left" 31 | | Centre -> Atom "centre" 32 | | Right -> Atom "right") 33 | ] 34 | 35 | let rec block = function 36 | | Paragraph (_, x) -> List [ Atom "paragraph"; inline x ] 37 | | List (_, _, _, bls) -> 38 | List 39 | (Atom "list" 40 | :: List.map (fun xs -> List (Atom "list-item" :: List.map block xs)) bls 41 | ) 42 | | Blockquote (_, xs) -> List (Atom "blockquote" :: List.map block xs) 43 | | Thematic_break _ -> Atom "thematic-break" 44 | | Heading (_, level, text) -> 45 | List [ Atom "heading"; Atom (string_of_int level); inline text ] 46 | | Code_block (_, info, _) -> List [ Atom "code-block"; Atom info ] 47 | | Html_block (_, s) -> List [ Atom "html"; Atom s ] 48 | | Definition_list (_, l) -> 49 | List 50 | [ Atom "def-list" 51 | ; List 52 | (List.map 53 | (fun elt -> 54 | List [ inline elt.term; List (List.map inline elt.defs) ]) 55 | l) 56 | ] 57 | | Table (_, headers, rows) -> 58 | List 59 | [ Atom "table" 60 | ; List (List.map table_header headers) 61 | ; List (List.map (fun row -> List (List.map inline row)) rows) 62 | ] 63 | 64 | let create ast = List (List.map block ast) 65 | 66 | let needs_quotes s = 67 | let rec loop i = 68 | if i >= String.length s then false 69 | else 70 | match s.[i] with 71 | | ' ' | '\t' | '\x00' .. '\x1F' | '\x7F' .. '\x9F' -> true 72 | | _ -> loop (succ i) 73 | in 74 | loop 0 75 | 76 | let rec print ppf = function 77 | | Atom s when needs_quotes s -> Format.fprintf ppf "%S" s 78 | | Atom s -> Format.pp_print_string ppf s 79 | | List l -> 80 | Format.fprintf 81 | ppf 82 | "@[<1>(%a)@]" 83 | (Format.pp_print_list ~pp_sep:Format.pp_print_space print) 84 | l 85 | -------------------------------------------------------------------------------- /src/strSlice.ml: -------------------------------------------------------------------------------- 1 | type t = 2 | { base : string 3 | ; off : int 4 | ; len : int 5 | } 6 | 7 | let of_string ?(off = 0) base = { base; off; len = String.length base - off } 8 | let to_string { base; off; len } = String.sub base off len 9 | let print ppf s = Format.fprintf ppf "%S" (to_string s) 10 | let get_offset { off; _ } = off 11 | let length { len; _ } = len 12 | let is_empty s = length s = 0 13 | 14 | let offset n { base; off; len } = 15 | if n < 0 then invalid_arg "offset"; 16 | let rec loop n base off len = 17 | if n = 0 || len = 0 then { base; off; len } 18 | else 19 | match base.[off] with 20 | | '\t' -> 21 | let ts = ((off + 4) / 4 * 4) - off in 22 | let b = Buffer.create len in 23 | Buffer.add_substring b base 0 off; 24 | for _ = 1 to ts do 25 | Buffer.add_char b ' ' 26 | done; 27 | Buffer.add_substring b base (off + 1) (len - 1); 28 | loop n (Buffer.contents b) off (len + ts - 1) 29 | | _ -> loop (n - 1) base (off + 1) (len - 1) 30 | in 31 | loop n base off len 32 | 33 | let lexbuf s = Lexing.from_string (to_string s) 34 | 35 | let contains s1 { base; off; len } = 36 | let rec loop off = 37 | if off + String.length s1 > len then false 38 | else s1 = String.sub base off (String.length s1) || loop (off + 1) 39 | in 40 | loop off 41 | 42 | let head = function 43 | | { len = 0; _ } -> None 44 | | { base; off; _ } -> Some base.[off] 45 | 46 | let last = function 47 | | { len = 0; _ } -> None 48 | | { base; off; len } -> Some base.[off + len - 1] 49 | 50 | let tail = function 51 | | { len = 0; _ } as s -> s 52 | | { base; off; len } -> { base; off = succ off; len = pred len } 53 | 54 | let uncons s = head s |> Option.map (fun hd -> (hd, tail s)) 55 | 56 | let take n s = 57 | if n < 0 then invalid_arg "take"; 58 | let rec loop n s = 59 | if n = 0 || length s = 0 then [] 60 | else match head s with Some c -> c :: loop (pred n) (tail s) | None -> [] 61 | in 62 | loop n s 63 | 64 | let take_prefix n s = 65 | if n < 0 then invalid_arg "take_prefix"; 66 | let len = min n s.len in 67 | { s with len } 68 | 69 | let drop n s = 70 | if n < 0 then invalid_arg "drop"; 71 | (* len should not be reduced below 0, as strings cannot have a negative length *) 72 | let len = max (s.len - n) 0 in 73 | (* off should not exceed the length of the base string *) 74 | let off = min (s.off + n) (String.length s.base) in 75 | { s with off; len } 76 | 77 | let drop_last = function 78 | | { len = 0; _ } as s -> s 79 | | { base; off; len } -> { base; off; len = pred len } 80 | 81 | let rec drop_while f s = 82 | match uncons s with Some (x, s') when f x -> drop_while f s' | _ -> s 83 | 84 | let rec drop_last_while f s = 85 | match last s with 86 | | Some l when f l -> drop_last_while f (drop_last s) 87 | | _ -> s 88 | 89 | let index f s = 90 | let len = length s in 91 | let rest = drop_while (fun c -> not (f c)) s in 92 | let idx = len - length rest in 93 | if idx = len then None else Some idx 94 | 95 | (* Uncomment to test *) 96 | (* TODO: rig up method to unit test our utilities *) 97 | (* let () = *) 98 | (* let index c = index (Char.equal c) in *) 99 | (* let s = of_string "abcd" in *) 100 | (* assert (index 'a' s = Some 0); *) 101 | (* assert (index 'b' s = Some 1); *) 102 | (* assert (index 'c' s = Some 2); *) 103 | (* assert (index 'z' s = None) *) 104 | 105 | let split_at f s = 106 | match index f s with 107 | | None -> (s, offset (length s) s) 108 | | Some idx -> ({ s with len = idx }, offset idx s) 109 | 110 | (* Uncomment to test *) 111 | (* TODO: rig up method to unit test our utilities *) 112 | (* let () = *) 113 | (* let f x = x = 'c' in *) 114 | (* let before, rest = split_at f (of_string "abcdef") in *) 115 | (* assert ("ab" = to_string before); *) 116 | (* assert ("cdef" = to_string rest); *) 117 | (* let before, rest = split_at f (of_string "cab") in *) 118 | (* assert ("" = to_string before); *) 119 | (* assert ("cab" = to_string rest); *) 120 | (* let before, rest = split_at f (of_string "aaa") in *) 121 | (* assert ("aaa" = to_string before); *) 122 | (* assert ("" = to_string rest) *) 123 | 124 | let index_unescaped sep s = 125 | let rec loop idx state = 126 | if idx = s.off + s.len then None 127 | (* If we get here and we're inside a verbatim span, what to do? *) 128 | else 129 | match (state, s.base.[idx]) with 130 | | `normal, '\\' -> loop (idx + 1) `escape 131 | | `normal, '`' -> loop (idx + 1) (`verbatim_open 1) 132 | | `normal, c when c = sep -> Some (idx - s.off) 133 | | `normal, _ -> loop (idx + 1) `normal 134 | | `escape, _ -> loop (idx + 1) `normal 135 | | `verbatim_open n, '`' -> loop (idx + 1) (`verbatim_open (n + 1)) 136 | | `verbatim_open n, _ -> loop (idx + 1) (`within_verbatim n) 137 | | `within_verbatim 1, '`' -> loop (idx + 1) `normal 138 | | `within_verbatim n, '`' -> loop (idx + 1) (`verbatim_close (n, n - 1)) 139 | | `within_verbatim n, _ -> loop (idx + 1) (`within_verbatim n) 140 | | `verbatim_close (_, 1), '`' -> loop (idx + 1) `normal 141 | | `verbatim_close (n, k), '`' -> 142 | loop (idx + 1) (`verbatim_close (n, k - 1)) 143 | | `verbatim_close (n, _), _ -> loop (idx + 1) (`within_verbatim n) 144 | in 145 | loop s.off `normal 146 | 147 | let exists f s = 148 | let rec loop s i = 149 | if i >= s.len then false 150 | else if f s.base.[s.off + i] then true 151 | else loop s (succ i) 152 | in 153 | loop s 0 154 | 155 | let for_all f s = not (exists (fun c -> not (f c)) s) 156 | 157 | let sub ~len s = 158 | if len > s.len then invalid_arg "sub"; 159 | { s with len } 160 | 161 | let fold_left f init s = 162 | let rec aux acc rest = 163 | match uncons rest with None -> acc | Some (x, xs) -> aux (f x acc) xs 164 | in 165 | aux init s 166 | 167 | (* let () = *) 168 | (* let s = of_string "abcde" in *) 169 | (* assert (fold_left (fun _ n -> n + 1) 0 s = 5); *) 170 | (* assert (fold_left (fun c s -> String.make 2 c ^ s) "" s = "eeddccbbaa") *) 171 | 172 | let trim s = 173 | let is_whitespace = function 174 | | ' ' | '\t' | '\010' .. '\013' -> true 175 | | _ -> false 176 | in 177 | drop_while is_whitespace (drop_last_while is_whitespace s) 178 | -------------------------------------------------------------------------------- /src/strSlice.mli: -------------------------------------------------------------------------------- 1 | (* Implementation of string slices over a base string via an offset *) 2 | 3 | type t 4 | 5 | val of_string : ?off:int -> string -> t 6 | val to_string : t -> string 7 | val offset : int -> t -> t 8 | val lexbuf : t -> Lexing.lexbuf 9 | val contains : string -> t -> bool 10 | val length : t -> int 11 | 12 | val index : (char -> bool) -> t -> int option 13 | (** [index c s] is [Some i] where [i] is the index of the character in [s] for 14 | which [f] is first true, or [None] if [f] holds for no characters in [s]. *) 15 | 16 | val index_unescaped : char -> t -> int option 17 | (** [index_unescaped c s] is [Some i] where [i] is index of the first 18 | occurrence of the character [c] in [s] that is not preceeded by a 19 | backslash ['\\'] and not within a verbatim inline, or [None] if 20 | there is no such [c] in [s]. *) 21 | 22 | val print : Format.formatter -> t -> unit 23 | val head : t -> char option 24 | val tail : t -> t 25 | 26 | val uncons : t -> (char * t) option 27 | (** [uncons s] is [Some (h, t)] where [h] is [head s] and [t] is [tail s], 28 | or [None] if [is_empty s] *) 29 | 30 | val last : t -> char option 31 | (** [last s] is the [Some c] if [c] is the last character of [s], or else [None] if [s] is empty *) 32 | 33 | val drop_last : t -> t 34 | (** [drop_last s] is the [s] without its last character *) 35 | 36 | val take : int -> t -> char list 37 | (** [take n s] is a list of the first [n] characters of [s] *) 38 | 39 | val take_prefix : int -> t -> t 40 | (** [take_prefix n s] is the slice consisting of the first [n] 41 | characters of [s]. *) 42 | 43 | val drop : int -> t -> t 44 | (** [drop n s] is [s] with the first [n] characters dropped *) 45 | 46 | val drop_while : (char -> bool) -> t -> t 47 | (** [drop_while f s] is [s] with the longest prefix for which [f] is true for 48 | every character dropped *) 49 | 50 | val drop_last_while : (char -> bool) -> t -> t 51 | (** [drop_last_while f s] is [s] with the longest suffix for which [f] is true for 52 | every character dropped *) 53 | 54 | val split_at : (char -> bool) -> t -> t * t 55 | (** [split_at f s] is [(taken, rest)] where [taken] is the prefix of [s] for 56 | which [f] is [false] and [rest] is remainder, including the first character 57 | for which [f] is [true]. 58 | 59 | E.g., 60 | 61 | {[ 62 | let () = 63 | let f x = x = 'c' in 64 | let before, rest = split_at f (of_string "abcdef") in 65 | assert ("ab" = to_string before); 66 | assert ("cdef" = to_string rest); 67 | let before, rest = split_at f (of_string "cab") in 68 | assert ("" = to_string before); 69 | assert ("cab" = to_string rest); 70 | let before, rest = split_at f (of_string "aaa") in 71 | assert ("aaa" = to_string before); 72 | assert ("" = to_string rest) 73 | ]} 74 | *) 75 | 76 | val fold_left : (char -> 'a -> 'a) -> 'a -> t -> 'a 77 | val for_all : (char -> bool) -> t -> bool 78 | val exists : (char -> bool) -> t -> bool 79 | val is_empty : t -> bool 80 | val get_offset : t -> int 81 | val sub : len:int -> t -> t 82 | 83 | val trim : t -> t 84 | (** [trim s] returns the slice that skips any whitespace at the start 85 | or the end of [s]. *) 86 | -------------------------------------------------------------------------------- /src/toc.ml: -------------------------------------------------------------------------------- 1 | open Ast.Util 2 | 3 | let rec remove_links inline = 4 | match inline with 5 | | Concat (attr, inlines) -> Concat (attr, List.map remove_links inlines) 6 | | Emph (attr, inline) -> Emph (attr, remove_links inline) 7 | | Strong (attr, inline) -> Emph (attr, remove_links inline) 8 | | Link (_, link) -> link.label 9 | | Image (attr, link) -> 10 | Image (attr, { link with label = remove_links link.label }) 11 | | Hard_break _ | Soft_break _ | Html _ | Code _ | Text _ -> inline 12 | 13 | let headers = 14 | let remove_links_f = remove_links in 15 | fun ?(remove_links = false) doc -> 16 | let headers = ref [] in 17 | let rec loop blocks = 18 | List.iter 19 | (function 20 | | Heading (attr, level, inline) -> 21 | let inline = 22 | if remove_links then remove_links_f inline else inline 23 | in 24 | headers := (attr, level, inline) :: !headers 25 | | Blockquote (_, blocks) -> loop blocks 26 | | List (_, _, _, block_lists) -> List.iter loop block_lists 27 | | Paragraph _ | Thematic_break _ | Html_block _ | Definition_list _ 28 | | Code_block _ | Table _ -> 29 | ()) 30 | blocks 31 | in 32 | loop doc; 33 | List.rev !headers 34 | 35 | (* Given a list of headers — in the order of the document — go to the 36 | requested subsection. We first seek for the [number]th header at 37 | [level]. *) 38 | let rec find_start headers level number subsections = 39 | match headers with 40 | | (_, header_level, _) :: tl when header_level > level -> 41 | (* Skip, right [level]-header not yet reached. *) 42 | if number = 0 then 43 | (* Assume empty section at [level], do not consume token. *) 44 | match subsections with 45 | | [] -> headers (* no subsection to find *) 46 | | n :: subsections -> find_start headers (level + 1) n subsections 47 | else find_start tl level number subsections 48 | | (_, header_level, _) :: tl when header_level = level -> 49 | (* At proper [level]. Have we reached the [number] one? *) 50 | if number <= 1 then 51 | match subsections with 52 | | [] -> tl (* no subsection to find *) 53 | | n :: subsections -> find_start tl (level + 1) n subsections 54 | else find_start tl level (number - 1) subsections 55 | | _ -> 56 | (* Sought [level] has not been found in the current section *) 57 | [] 58 | 59 | let unordered_list items = List ([], Bullet '*', Tight, items) 60 | 61 | let find_id attributes = 62 | List.find_map 63 | (function k, v when String.equal "id" k -> Some v | _ -> None) 64 | attributes 65 | 66 | let link attributes label = 67 | let inline = 68 | match find_id attributes with 69 | | None -> label 70 | | Some id -> Link ([], { label; destination = "#" ^ id; title = None }) 71 | in 72 | Paragraph ([], inline) 73 | 74 | let rec make_toc 75 | (headers : ('attr * int * 'a inline) list) 76 | ~min_level 77 | ~max_level = 78 | match headers with 79 | | _ when min_level > max_level -> ([], headers) 80 | | [] -> ([], []) 81 | | (_, level, _) :: _ when level < min_level -> ([], headers) 82 | | (_, level, _) :: tl when level > max_level -> 83 | make_toc tl ~min_level ~max_level 84 | | (attr, level, t) :: tl when level = min_level -> 85 | let sub_toc, tl = make_toc tl ~min_level:(min_level + 1) ~max_level in 86 | let toc_entry = 87 | match sub_toc with 88 | | [] -> [ link attr t ] 89 | | _ -> [ link attr t; unordered_list sub_toc ] 90 | in 91 | let toc, tl = make_toc tl ~min_level ~max_level in 92 | (toc_entry :: toc, tl) 93 | | _ -> 94 | let sub_toc, tl = 95 | make_toc headers ~min_level:(min_level + 1) ~max_level 96 | in 97 | let toc, tl = make_toc tl ~min_level ~max_level in 98 | ([ unordered_list sub_toc ] :: toc, tl) 99 | 100 | let toc ?(start = []) ?(depth = 2) doc = 101 | if depth < 1 then invalid_arg "Omd.toc: ~depth must be >= 1"; 102 | let headers = headers ~remove_links:true doc in 103 | let headers = 104 | match start with 105 | | [] -> headers 106 | | number :: _ when number < 0 -> 107 | invalid_arg "Omd.toc: level 1 start must be >= 0" 108 | | number :: subsections -> find_start headers 1 number subsections 109 | in 110 | let len = List.length start in 111 | let toc, _ = make_toc headers ~min_level:(len + 1) ~max_level:(len + depth) in 112 | match toc with [] -> [] | _ -> [ unordered_list toc ] 113 | -------------------------------------------------------------------------------- /tests/John_MacFarlane_said_peg-markdown_takes_forever_to_process_this--jgm_peg-markdown_issues_28.md: -------------------------------------------------------------------------------- 1 | ***************************************[[[[[[[[[[[[[[[[[[[[-----------------]]]]]]]]]]]]]]]]]*************************** 2 | 3 | [[[[[[[[[[[[[[[[[[[[-----------------]]]]]]]]]]]]]]]]]: :: 4 | -------------------------------------------------------------------------------- /tests/attributes.md: -------------------------------------------------------------------------------- 1 | ## Tests for attributes 2 | 3 | Headings with classes: 4 | 5 | ```````````````````````````````` example 6 | Heading classes attributes testing 7 | 8 | # Heading 1 {.class1} 9 | 10 | # Heading 2 # {.class2} 11 | 12 | # Heading 3 {.class3a .class3b} 13 | . 14 |

Heading classes attributes testing

15 |

Heading 1

16 |

Heading 2

17 |

Heading 3

18 | ```````````````````````````````` 19 | 20 | Headings with attributes: 21 | 22 | ```````````````````````````````` example 23 | Heading data attributes testing 24 | 25 | # Heading 1 {data1=value1} 26 | 27 | # Heading 2 # {data2=value2} 28 | 29 | # Heading 3 {data1=value3a data2=value3b} 30 | . 31 |

Heading data attributes testing

32 |

Heading 1

33 |

Heading 2

34 |

Heading 3

35 | ```````````````````````````````` 36 | 37 | Headings with ids: 38 | 39 | ```````````````````````````````` example 40 | Heading id attributes testing 41 | 42 | # Heading 1 {#id1} 43 | 44 | # Heading 2 # {#id2} 45 | 46 | # Heading 3 {#id3a #id3b} 47 | . 48 |

Heading id attributes testing

49 |

Heading 1

50 |

Heading 2

51 |

Heading 3

52 | ```````````````````````````````` 53 | 54 | Inline code with classes: 55 | 56 | ```````````````````````````````` example 57 | Code blocks class attributes testing 58 | 59 | `Code block 1`{.class1} 60 | 61 | `Code block 2`{.class2a .class2b} 62 | . 63 |

Code blocks class attributes testing

64 |

Code block 1

65 |

Code block 2

66 | ```````````````````````````````` 67 | 68 | Inline code with attributes: 69 | 70 | ```````````````````````````````` example 71 | Code blocks data attributes testing 72 | 73 | `Code block 1`{data1=value1} 74 | 75 | `Code block 2`{data1=value2a data2=value2b} 76 | . 77 |

Code blocks data attributes testing

78 |

Code block 1

79 |

Code block 2

80 | ```````````````````````````````` 81 | 82 | Inline code with ids: 83 | 84 | ```````````````````````````````` example 85 | Code blocks id attributes testing 86 | 87 | `Code block 1`{#id1} 88 | 89 | `Code block 2`{#id2a #id2b} 90 | . 91 |

Code blocks id attributes testing

92 |

Code block 1

93 |

Code block 2

94 | ```````````````````````````````` 95 | 96 | Code blocks with classes: 97 | 98 | ```````````````````````````````` example 99 | Code blocks class attributes testing 100 | 101 | ``` {.class1} 102 | Code block 1 103 | ``` 104 | 105 | ``` {.class2.a .class2.b} 106 | Code block 2 107 | ``` 108 | . 109 |

Code blocks class attributes testing

110 |
Code block 1
111 | 
112 |
Code block 2
113 | 
114 | ```````````````````````````````` 115 | 116 | Code blocks with attributes: 117 | 118 | ```````````````````````````````` example 119 | Code blocks data attributes testing 120 | 121 | ``` {data1=value1} 122 | Code block 1 123 | ``` 124 | 125 | ``` {data1=value2.a data2=value2.b} 126 | Code block 2 127 | ``` 128 | . 129 |

Code blocks data attributes testing

130 |
Code block 1
131 | 
132 |
Code block 2
133 | 
134 | ```````````````````````````````` 135 | 136 | Code blocks with ids: 137 | 138 | ```````````````````````````````` example 139 | Code blocks id attributes testing 140 | 141 | ``` {#id1} 142 | Code block 1 143 | ``` 144 | 145 | ``` {#id2.a #id2.b} 146 | Code block 2 147 | ``` 148 | . 149 |

Code blocks id attributes testing

150 |
Code block 1
151 | 
152 |
Code block 2
153 | 
154 | ```````````````````````````````` 155 | 156 | Links with classes: 157 | 158 | ```````````````````````````````` example 159 | Link class attributes testing 160 | 161 | [Link 1](url_1){.class1} 162 | 163 | [Link 2](url_2){.class2a .class2b} 164 | 165 | ![Link 1](url_1){.class1} 166 | 167 | ![Link 2](url_2){.class2a .class2b} 168 | . 169 |

Link class attributes testing

170 |

Link 1

171 |

Link 2

172 |

Link 1

173 |

Link 2

174 | ```````````````````````````````` 175 | 176 | Links with attributes: 177 | 178 | ```````````````````````````````` example 179 | Link data attributes testing 180 | 181 | [Link 1](url_1){data1=value1} 182 | 183 | [Link 2](url_2){data1=value2a data2=value2b} 184 | 185 | ![Link 1](url_1){data1=value1} 186 | 187 | ![Link 2](url_2){data1=value2a data2=value2b} 188 | . 189 |

Link data attributes testing

190 |

Link 1

191 |

Link 2

192 |

Link 1

193 |

Link 2

194 | ```````````````````````````````` 195 | 196 | Links with ids: 197 | 198 | ```````````````````````````````` example 199 | Link id attributes testing 200 | 201 | [Link 1](url_1){#id1} 202 | 203 | [Link 2](url_2){#id2a #id2b} 204 | 205 | ![Link 1](url_1){#id1} 206 | 207 | ![Link 2](url_2){#id2a #id2b} 208 | . 209 |

Link id attributes testing

210 |

Link 1

211 |

Link 2

212 |

Link 1

213 |

Link 2

214 | ```````````````````````````````` 215 | 216 | Refs with classes: 217 | 218 | ```````````````````````````````` example 219 | Ref class attributes testing 220 | 221 | [Ref 1][ref_1] 222 | 223 | [Ref 2][ref_2] 224 | 225 | ![Ref 3][ref_3] 226 | 227 | ![Ref 4][ref_4] 228 | 229 | [ref_1]: ref_1 {.class1} 230 | [ref_2]: ref_2 {.class2a .class2b} 231 | [ref_3]: ref_3 {.class1} 232 | [ref_4]: ref_4 {.class2a .class2b} 233 | . 234 |

Ref class attributes testing

235 |

Ref 1

236 |

Ref 2

237 |

Ref 3

238 |

Ref 4

239 | ```````````````````````````````` 240 | 241 | Refs with attributes: 242 | 243 | ```````````````````````````````` example 244 | Ref data attributes testing 245 | 246 | [Ref 1][ref_1] 247 | 248 | [Ref 2][ref_2] 249 | 250 | ![Ref 3][ref_3] 251 | 252 | ![Ref 4][ref_4] 253 | 254 | [ref_1]: ref_1 {data1=value1} 255 | [ref_2]: ref_2 {data1=value2a data2=value2b} 256 | [ref_3]: ref_3 {data1=value1} 257 | [ref_4]: ref_4 {data1=value2a data2=value2b} 258 | . 259 |

Ref data attributes testing

260 |

Ref 1

261 |

Ref 2

262 |

Ref 3

263 |

Ref 4

264 | ```````````````````````````````` 265 | 266 | Refs with ids: 267 | 268 | ```````````````````````````````` example 269 | Ref id attributes testing 270 | 271 | [Ref 1][ref_1] 272 | 273 | [Ref 2][ref_2] 274 | 275 | ![Ref 3][ref_3] 276 | 277 | ![Ref 4][ref_4] 278 | 279 | [ref_1]: ref_1 {#id1} 280 | [ref_2]: ref_2 {#id2a #id2b} 281 | [ref_3]: ref_3 {#id1} 282 | [ref_4]: ref_4 {#id2a #id2b} 283 | . 284 |

Ref id attributes testing

285 |

Ref 1

286 |

Ref 2

287 |

Ref 3

288 |

Ref 4

289 | ```````````````````````````````` 290 | -------------------------------------------------------------------------------- /tests/blackbox/dune: -------------------------------------------------------------------------------- 1 | (cram 2 | (deps %{bin:omd})) 3 | -------------------------------------------------------------------------------- /tests/blackbox/emphasis.t: -------------------------------------------------------------------------------- 1 | Extra test not covered by the conformance tests 2 | 3 | $ omd << "MD" 4 | > foo**bar* 5 | > MD 6 |

foo**bar*

7 | -------------------------------------------------------------------------------- /tests/blackbox/heading-id.t: -------------------------------------------------------------------------------- 1 | $ omd << "MD" 2 | > ### This is an Header Without Id 3 | > ### 1 2 Header that starts with 2 numbers 4 | > ### Header with an id {#header-id} 5 | > ### Maître d'hÔtel 😬 6 | > ### 👋👋 ÔHey! 👋👋 7 | > ### 👋👋 ÔHey! 👋👋 8 | > ### *Dogs*?--in *my* house? 9 | > ### [HTML], [S5], or [RTF]? 10 | > ### 3. Applications 11 | > ### hello.world 12 | > ### -hello- 13 | > ### with multiple spaces 14 | > ### with   spaces 15 | > ### 33 16 | > ### 17 | > MD 18 |

This is an Header Without Id

19 |

1 2 Header that starts with 2 numbers

20 |

Header with an id

21 |

Maître d'hÔtel 😬

22 |

👋👋 ÔHey! 👋👋

23 |

👋👋 ÔHey! 👋👋

24 |

Dogs?--in my house?

25 |

[HTML], [S5], or [RTF]?

26 |

3. Applications

27 |

hello.world

28 |

-hello-

29 |

with multiple spaces

30 |

with   spaces

31 |

33

32 |

33 | -------------------------------------------------------------------------------- /tests/blackbox/normalize-label.t: -------------------------------------------------------------------------------- 1 | Case insensitive comparison 2 | $ omd << "MD" 3 | > [ΑΓΩ]: /url 4 | > 5 | > [αγω] 6 | > MD 7 |

αγω

8 | 9 | Collapse consecutive internal spaces, tabs 10 | $ omd << "MD" 11 | > [ΑΓ Ω]: /url 12 | > 13 | > [αγ ω] 14 | > MD 15 |

αγ ω

16 | 17 | Strip leading and trailing spaces, tabs 18 | $ omd << "MD" 19 | > [ ΑΓΩ ]: /url 20 | > 21 | > [αγω] 22 | > MD 23 |

αγω

24 | 25 | Doesn't match due to the internal space 26 | $ omd << "MD" 27 | > [ΑΓΩ]: /url 28 | > 29 | > [α γω] 30 | > MD 31 |

[α γω]

32 | -------------------------------------------------------------------------------- /tests/blackbox/regression-224.t: -------------------------------------------------------------------------------- 1 | Regression test for https://github.com/ocaml/omd/issues/224 2 | 3 | $ omd << "MD" 4 | > hello_world: 5 | > ``` 6 | > is_this_code_ 7 | > ``` 8 | > MD 9 |

hello_world:

10 |
is_this_code_
11 |   
12 | -------------------------------------------------------------------------------- /tests/def_list.md: -------------------------------------------------------------------------------- 1 | ## Definition lists 2 | 3 | ```````````````````````````````` example 4 | First Term 5 | : This is the definition of the first term. 6 | 7 | Second Term 8 | : This is one definition of the second term. 9 | : This is another definition of the second term. 10 | which is multiline 11 | 12 | : This is not a correct definition list 13 | . 14 |
First Term
15 |
This is the definition of the first term.
16 |
Second Term
17 |
This is one definition of the second term.
18 |
This is another definition of the second term. 19 | which is multiline
20 |
21 |

: This is not a correct definition list

22 | ```````````````````````````````` 23 | -------------------------------------------------------------------------------- /tests/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name expect_tests) 3 | (modules expect_tests) 4 | (inline_tests) 5 | (preprocess 6 | (pps ppx_expect)) 7 | (libraries omd)) 8 | 9 | (executable 10 | (name extract_tests) 11 | (libraries str) 12 | (modules extract_tests)) 13 | 14 | (rule 15 | (with-stdout-to 16 | dune.inc.new 17 | (run ./extract_tests.exe -write-dune-file %{dep:spec.txt} 18 | %{dep:gfm_table_spec.md} %{dep:extra_table_tests.md} %{dep:attributes.md} 19 | %{dep:def_list.md}))) 20 | 21 | (include dune.inc) 22 | 23 | (executable 24 | (name omd) 25 | (libraries str omd) 26 | (modules omd)) 27 | 28 | (rule 29 | (alias gen) 30 | (action 31 | (diff dune.inc dune.inc.new))) 32 | -------------------------------------------------------------------------------- /tests/expect_tests.ml: -------------------------------------------------------------------------------- 1 | let show x = Omd.to_html x |> print_string 2 | 3 | let%expect_test "construct inline elements" = 4 | show 5 | Omd.Ctor. 6 | [ p 7 | [ em "emphasized" 8 | ; br 9 | ; strong ~attrs:[ ("class", "my-class") ] "strong" 10 | ; nl 11 | ; code "some code" 12 | ; nl 13 | ; a "label" ~url:"my/page/url" 14 | ; nl 15 | ; a "other label" ~url:"my/other/page" ~title:"title text" 16 | ; nl 17 | ; img "my/img/src" ~alt:"Some alt text" 18 | ; nl 19 | ; img "my/img/src" ~alt:"Some alt text" ~title:"some title" 20 | ; nl 21 | ; html "inline html here" 22 | ] 23 | ]; 24 | [%expect 25 | {| 26 |

emphasized
27 | strong 28 | some code 29 | label 30 | other label 31 | Some alt text 32 | Some alt text 33 | inline html here

|}] 34 | 35 | let%expect_test "construct headings" = 36 | show Omd.Ctor.[ h 1 ~attrs:[ ("class", "my-class") ] [ txt "Heading 1" ] ]; 37 | [%expect {|

Heading 1

|}]; 38 | show Omd.Ctor.[ h 6 [ txt "Heading 6"; em "with emphasis!" ] ]; 39 | [%expect 40 | {|
Heading 6with emphasis!
|}] 41 | 42 | let%expect_test "construct lists" = 43 | show 44 | Omd.Ctor. 45 | [ ul 46 | ~spacing:Tight 47 | [ [ p [ txt "Item 1" ] ] 48 | ; [ p [ txt "Item 2" ] ] 49 | ; [ p [ txt "Item 3"; strong "with strength!" ] ] 50 | ] 51 | ]; 52 | [%expect 53 | {| 54 |
    55 |
  • Item 1 56 |
  • 57 |
  • Item 2 58 |
  • 59 |
  • Item 3with strength! 60 |
  • 61 |
|}]; 62 | show 63 | Omd.Ctor. 64 | [ ol 65 | [ [ p [ txt "Item 1" ] ] 66 | ; [ p [ txt "Item 2" ] ] 67 | ; [ p [ txt "Item 3" ] ] 68 | ] 69 | ]; 70 | [%expect 71 | {| 72 |
    73 |
  1. 74 |

    Item 1

    75 |
  2. 76 |
  3. 77 |

    Item 2

    78 |
  4. 79 |
  5. 80 |

    Item 3

    81 |
  6. 82 |
|}] 83 | 84 | let%expect_test "construct paragraphs and blockquotes with hrs" = 85 | let para = 86 | Omd.Ctor.( 87 | p 88 | ~attrs:[ ("class", "my-para") ] 89 | [ txt "Contet of"; em "this"; txt "paragraph" ]) 90 | in 91 | show 92 | Omd.Ctor. 93 | [ blockquote [ para; hr; p [ txt "Content of second paragraph" ] ] ]; 94 | [%expect 95 | {| 96 |
97 |

Contet ofthisparagraph

98 |
99 |

Content of second paragraph

100 |
|}] 101 | 102 | let%expect_test "construct code blocks" = 103 | show 104 | Omd.Ctor. 105 | [ code_bl 106 | ~attrs:[ ("class", "my-code") ] 107 | ~lang:"ocaml" 108 | "let foo = bar + bing" 109 | ]; 110 | [%expect 111 | {|
let foo = bar + bing
|}] 112 | 113 | let%expect_test "construct html blocks" = 114 | show 115 | Omd.Ctor. 116 | [ html_bl "

Some inline HTML here

" 117 | ]; 118 | [%expect {|

Some inline HTML here

|}] 119 | 120 | let%expect_test "construct definition list" = 121 | show 122 | Omd.Ctor. 123 | [ dl 124 | [ { term = [ txt "def term 1" ] 125 | ; defs = 126 | [ [ txt "definition 1.1" ] 127 | ; [ txt "definition 1.2" ] 128 | ; [ txt "definition 1.3" ] 129 | ] 130 | } 131 | ; { term = [ txt "def term 2" ] 132 | ; defs = 133 | [ [ txt "definition 2.1" ] 134 | ; [ txt "definition 2.2" ] 135 | ; [ txt "definition 2.3" ] 136 | ] 137 | } 138 | ] 139 | ]; 140 | [%expect 141 | {| 142 |
def term 1
143 |
definition 1.1
144 |
definition 1.2
145 |
definition 1.3
146 |
def term 2
147 |
definition 2.1
148 |
definition 2.2
149 |
definition 2.3
150 |
|}] 151 | -------------------------------------------------------------------------------- /tests/extra_table_tests.md: -------------------------------------------------------------------------------- 1 | ## Additional Table Tests 2 | 3 | Complete table 4 | 5 | ```````````````````````````````` example 6 | | abc | def | **ghi** | 7 | |:----|:-----:|----------:| 8 | | 1 | 2 | [link][0] | 9 | | 3 | 4 | `code` | 10 | | 5 | `6` | \| `|` | 11 | 12 | [0]: https://example.com 13 | . 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 |
abcdefghi
12link
34code
56| |
40 | ```````````````````````````````` 41 | 42 | 43 | Not a table (no delimiter) 44 | 45 | ```````````````````````````````` example 46 | | abc | 47 | | def | 48 | . 49 |

| abc | 50 | | def |

51 | ```````````````````````````````` 52 | 53 | Too few columns in a row gets expanded 54 | 55 | ```````````````````````````````` example 56 | | a | b | 57 | |---|---| 58 | | 1 | 59 | . 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 |
ab
1
74 | ```````````````````````````````` 75 | 76 | Table with no columns not allowed 77 | 78 | ````````````````````````````````example 79 | | 80 | | 81 | . 82 |

| 83 | |

84 | ```````````````````````````````` 85 | 86 | Minimal table 1 87 | 88 | ```````````````````````````````` example 89 | h| 90 | -| 91 | . 92 | 93 | 94 | 95 | 96 | 97 | 98 |
h
99 | ```````````````````````````````` 100 | 101 | Minimal table 2 102 | 103 | ```````````````````````````````` example 104 | |h 105 | |- 106 | . 107 | 108 | 109 | 110 | 111 | 112 | 113 |
h
114 | ```````````````````````````````` 115 | 116 | Minimal table 3 117 | 118 | ```````````````````````````````` example 119 | || 120 | || 121 | . 122 | 123 | 124 | 125 | 126 | 127 | 128 |
129 | ```````````````````````````````` 130 | 131 | Escaped `|` characters 132 | 133 | ```````````````````````````````` example 134 | \||\| 135 | -|- 136 | | 137 | . 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 |
||
152 | ```````````````````````````````` 153 | 154 | `|` characters inside code spans without escaping 155 | 156 | ```````````````````````````````` example 157 | abc | `|` | def 158 | ----|-----|------------- 159 | ghi | | `` `| ``jkl 160 | . 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 |
abc|def
ghi`|jkl
177 | ```````````````````````````````` 178 | 179 | Cells starting with numbers 180 | 181 | ```````````````````````````````` example 182 | 0 | 1 183 | --|-- 184 | 3 | 4 185 | . 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 |
01
34
200 | ```````````````````````````````` 201 | 202 | Setext headings or cells? A setext heading marker isn't a start of a 203 | new block, so it gets treated as if it were a single element row. 204 | 205 | ```````````````````````````````` example 206 | = | b 207 | --|-- 208 | = 209 | . 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 |
=b
=
224 | ```````````````````````````````` 225 | 226 | Tables in a list 227 | 228 | ```````````````````````````````` example 229 | 1. abc | def 230 | ----|---- 231 | 1 | 2 232 | 233 | 2. | abc | def | 234 | |-----|-----| 235 | | 1. | 2. | 236 | . 237 |
    238 |
  1. 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 |
    abcdef
    12
  2. 252 |
  3. 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 |
    abcdef
    1.2.
  4. 266 |
267 | ```````````````````````````````` 268 | -------------------------------------------------------------------------------- /tests/extract_tests.ml: -------------------------------------------------------------------------------- 1 | (* Extract test cases from Spec *) 2 | let protect ~finally f = 3 | match f () with 4 | | exception e -> 5 | finally (); 6 | raise e 7 | | r -> 8 | finally (); 9 | r 10 | 11 | let disabled = [] 12 | 13 | let with_open_in fn f = 14 | let ic = open_in fn in 15 | protect ~finally:(fun () -> close_in_noerr ic) (fun () -> f ic) 16 | 17 | let with_open_out fn f = 18 | let oc = open_out fn in 19 | protect ~finally:(fun () -> close_out_noerr oc) (fun () -> f oc) 20 | 21 | let begins_with s s' = 22 | String.length s >= String.length s' && String.sub s 0 (String.length s') = s' 23 | 24 | let test_delim = "````````````````````````````````" 25 | let tab_re = Str.regexp_string "→" 26 | let insert_tabs s = Str.global_replace tab_re "\t" s 27 | 28 | type test = 29 | { filename : string 30 | ; example : int 31 | ; markdown : string 32 | ; html : string 33 | } 34 | 35 | let add_line buf l = 36 | Buffer.add_string buf (insert_tabs l); 37 | Buffer.add_char buf '\n' 38 | 39 | let parse_test_spec filename = 40 | let buf = Buffer.create 256 in 41 | with_open_in filename @@ fun ic -> 42 | let rec go tests example = 43 | match input_line ic with 44 | | exception End_of_file -> List.rev tests 45 | | line -> 46 | if begins_with line test_delim then begin 47 | Buffer.clear buf; 48 | let rec get_test () = 49 | let line = input_line ic in 50 | if line = "." then begin 51 | let markdown = Buffer.contents buf in 52 | Buffer.clear buf; 53 | let rec get_html () = 54 | let line = input_line ic in 55 | if begins_with line test_delim then 56 | let html = Buffer.contents buf in 57 | { filename; example; markdown; html } 58 | else begin 59 | add_line buf line; 60 | get_html () 61 | end 62 | in 63 | get_html () 64 | end 65 | else begin 66 | add_line buf line; 67 | get_test () 68 | end 69 | in 70 | go (get_test () :: tests) (succ example) 71 | end 72 | else go tests example 73 | in 74 | go [] 1 75 | 76 | let write_dune_file test_specs tests = 77 | let pp ppf { filename; example; _ } = 78 | let base = Filename.remove_extension filename in 79 | Format.fprintf ppf "@ %s-%03d.md %s-%03d.html" base example base example 80 | in 81 | Format.printf 82 | "@[(rule@ @[(deps %s)@]@ @[(targets%t)@]@ @[(action@ \ 83 | (run ./extract_tests.exe -generate-test-files %%{deps}))@])@]@." 84 | (String.concat " " test_specs) 85 | (fun ppf -> List.iter (pp ppf) tests); 86 | List.iter 87 | (fun { filename; example; _ } -> 88 | let base = Filename.remove_extension filename in 89 | Format.printf 90 | "@[(rule@ @[(action@ @[(with-stdout-to \ 91 | %s-%03d.html.new@ @[(run@ ./omd.exe@ \ 92 | %%{dep:%s-%03d.md})@])@])@])@]@." 93 | base 94 | example 95 | base 96 | example; 97 | Format.printf 98 | "@[(rule@ @[(alias %s-%03d)@]@ @[(action@ \ 99 | @[(diff@ %s-%03d.html %s-%03d.html.new)@])@])@]@." 100 | base 101 | example 102 | base 103 | example 104 | base 105 | example) 106 | tests; 107 | let pp ppf { filename; example; _ } = 108 | let base = Filename.remove_extension filename in 109 | if not (List.mem example disabled) then 110 | Format.fprintf ppf "@ (alias %s-%03d)" base example 111 | in 112 | Format.printf 113 | "@[(alias@ (name runtest)@ @[(deps%t)@])@]@." 114 | (fun ppf -> List.iter (pp ppf) tests) 115 | 116 | let li_begin_re = Str.regexp_string "
  • \n" 117 | let li_end_re = Str.regexp_string "\n
  • " 118 | 119 | let normalize_html s = 120 | Str.global_replace li_end_re "" (Str.global_replace li_begin_re "
  • " s) 121 | 122 | let generate_test_files tests = 123 | let f { filename; example; markdown; html } = 124 | let base = Filename.remove_extension filename in 125 | with_open_out (Printf.sprintf "%s-%03d.md" base example) (fun oc -> 126 | output_string oc markdown); 127 | with_open_out (Printf.sprintf "%s-%03d.html" base example) (fun oc -> 128 | output_string oc (normalize_html html)) 129 | in 130 | List.iter f tests 131 | 132 | type mode = 133 | | Generate_test_files 134 | | Write_dune_file 135 | 136 | let mode = ref None 137 | 138 | let spec = 139 | let set x () = mode := Some x in 140 | [ ( "-generate-test-files" 141 | , Arg.Unit (set Generate_test_files) 142 | , " Generate test files" ) 143 | ; ("-write-dune-file", Arg.Unit (set Write_dune_file), " Write dune file") 144 | ] 145 | 146 | let test_specs = ref [] 147 | let add_to_list l x = l := x :: !l 148 | 149 | let () = 150 | Arg.parse (Arg.align spec) (add_to_list test_specs) ""; 151 | let test_specs = List.rev !test_specs in 152 | let tests = List.flatten (List.map parse_test_spec test_specs) in 153 | match !mode with 154 | | None -> () 155 | | Some Generate_test_files -> generate_test_files tests 156 | | Some Write_dune_file -> write_dune_file test_specs tests 157 | -------------------------------------------------------------------------------- /tests/gfm_table_spec.md: -------------------------------------------------------------------------------- 1 | *Note on the Omd implementation:* Table specification excerpted from 2 | [this commit][0] in the GitHub-Flavored Markdown repository. There is 3 | one alteration (noted below), where the GFM spec appears to contradict 4 | the CommonMark spec (and itself) on the treatment of escape characters 5 | in code spans. 6 | 7 | [0]: https://github.com/github/cmark-gfm/blob/6a6e335709ef68cf2c616eeaf61b09ed4c654669/test/spec.txt 8 | 9 | ## Tables (extension) 10 | 11 | GFM enables the `table` extension, where an additional leaf block type is 12 | available. 13 | 14 | A [table](@) is an arrangement of data with rows and columns, consisting of a 15 | single header row, a [delimiter row] separating the header from the data, and 16 | zero or more data rows. 17 | 18 | Each row consists of cells containing arbitrary text, in which [inlines] are 19 | parsed, separated by pipes (`|`). A leading and trailing pipe is also 20 | recommended for clarity of reading, and if there's otherwise parsing ambiguity. 21 | Spaces between pipes and cell content are trimmed. Block-level elements cannot 22 | be inserted in a table. 23 | 24 | The [delimiter row](@) consists of cells whose only content are hyphens (`-`), 25 | and optionally, a leading or trailing colon (`:`), or both, to indicate left, 26 | right, or center alignment respectively. 27 | 28 | ```````````````````````````````` example table 29 | | foo | bar | 30 | | --- | --- | 31 | | baz | bim | 32 | . 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 |
    foobar
    bazbim
    47 | ```````````````````````````````` 48 | 49 | Cells in one column don't need to match length, though it's easier to read if 50 | they are. Likewise, use of leading and trailing pipes may be inconsistent: 51 | 52 | ```````````````````````````````` example table 53 | | abc | defghi | 54 | :-: | -----------: 55 | bar | baz 56 | . 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 |
    abcdefghi
    barbaz
    71 | ```````````````````````````````` 72 | 73 | Include a pipe in a cell's content by escaping it, including inside other 74 | inline spans: 75 | 76 | **Omd alteration**: the escape `\` in the code span is preserved here, 77 | in accordance with the rules for code spans in the CommonMark and GFM 78 | specs. 79 | 80 | ```````````````````````````````` example table 81 | | f\|oo | 82 | | ------ | 83 | | b `\|` az | 84 | | b **\|** im | 85 | . 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 |
    f|oo
    b \| az
    b | im
    101 | ```````````````````````````````` 102 | 103 | The table is broken at the first empty line, or beginning of another 104 | block-level structure: 105 | 106 | ```````````````````````````````` example table 107 | | abc | def | 108 | | --- | --- | 109 | | bar | baz | 110 | > bar 111 | . 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 |
    abcdef
    barbaz
    126 |
    127 |

    bar

    128 |
    129 | ```````````````````````````````` 130 | 131 | ```````````````````````````````` example table 132 | | abc | def | 133 | | --- | --- | 134 | | bar | baz | 135 | bar 136 | 137 | bar 138 | . 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 |
    abcdef
    barbaz
    bar
    157 |

    bar

    158 | ```````````````````````````````` 159 | 160 | The header row must match the [delimiter row] in the number of cells. If not, 161 | a table will not be recognized: 162 | 163 | ```````````````````````````````` example table 164 | | abc | def | 165 | | --- | 166 | | bar | 167 | . 168 |

    | abc | def | 169 | | --- | 170 | | bar |

    171 | ```````````````````````````````` 172 | 173 | The remainder of the table's rows may vary in the number of cells. If there 174 | are a number of cells fewer than the number of cells in the header row, empty 175 | cells are inserted. If there are greater, the excess is ignored: 176 | 177 | ```````````````````````````````` example table 178 | | abc | def | 179 | | --- | --- | 180 | | bar | 181 | | bar | baz | boo | 182 | . 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 |
    abcdef
    bar
    barbaz
    201 | ```````````````````````````````` 202 | 203 | If there are no rows in the body, no `` is generated in HTML output: 204 | 205 | ```````````````````````````````` example table 206 | | abc | def | 207 | | --- | --- | 208 | . 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 |
    abcdef
    217 | ```````````````````````````````` 218 | -------------------------------------------------------------------------------- /tests/omd.ml: -------------------------------------------------------------------------------- 1 | let protect ~finally f = 2 | match f () with 3 | | exception e -> 4 | finally (); 5 | raise e 6 | | r -> 7 | finally (); 8 | r 9 | 10 | let li_begin_re = Str.regexp_string "
  • \n" 11 | let li_end_re = Str.regexp_string "\n
  • " 12 | 13 | let normalize_html s = 14 | Str.global_replace li_end_re "" (Str.global_replace li_begin_re "
  • " s) 15 | 16 | let with_open_in fn f = 17 | let ic = open_in fn in 18 | protect ~finally:(fun () -> close_in_noerr ic) (fun () -> f ic) 19 | 20 | let () = 21 | with_open_in Sys.argv.(1) @@ fun ic -> 22 | print_string 23 | (normalize_html (Omd.to_html ~auto_identifiers:false (Omd.of_channel ic))) 24 | -------------------------------------------------------------------------------- /tools/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name gen_entities)) 3 | -------------------------------------------------------------------------------- /tools/gen_entities.ml: -------------------------------------------------------------------------------- 1 | let f s = 2 | Scanf.sscanf s " \"&%s@;\": { %_S: [%s@], %_S: \"%_s@\" }" (fun s nums -> 3 | ( s 4 | , List.map 5 | (fun s -> int_of_string (String.trim s)) 6 | (String.split_on_char ',' nums) )) 7 | 8 | let main ic = 9 | let rec loop () = 10 | match input_line ic with 11 | | s -> 12 | begin 13 | match f s with 14 | | name, codepoints -> 15 | Printf.printf " | %S -> [" name; 16 | List.iteri 17 | (fun i cp -> 18 | if i > 0 then print_string "; "; 19 | Printf.printf "Uchar.of_int %d" cp) 20 | codepoints; 21 | Printf.printf "]\n" 22 | | exception _ -> () 23 | end; 24 | loop () 25 | | exception End_of_file -> Printf.printf " | _ -> []\n%!" 26 | in 27 | Printf.printf "let f = function\n"; 28 | loop () 29 | 30 | let () = 31 | let ic = open_in Sys.argv.(1) in 32 | main ic; 33 | close_in ic 34 | --------------------------------------------------------------------------------