├── .github └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── cmark.i ├── cmark └── builder.lua ├── cmark_wrap.c ├── ext ├── bench.h ├── blocks.c ├── buffer.c ├── buffer.h ├── case_fold.inc ├── case_fold_switch.inc ├── chunk.h ├── cmark.c ├── cmark.h ├── cmark_ctype.c ├── cmark_ctype.h ├── cmark_export.h ├── cmark_version.h ├── commonmark.c ├── entities.inc ├── houdini.h ├── houdini_href_e.c ├── houdini_html_e.c ├── houdini_html_u.c ├── html.c ├── html_unescape.h ├── inlines.c ├── inlines.h ├── iterator.c ├── iterator.h ├── latex.c ├── man.c ├── node.c ├── node.h ├── parser.h ├── references.c ├── references.h ├── render.c ├── render.h ├── scanners.c ├── scanners.h ├── utf8.c ├── utf8.h └── xml.c ├── rockspec.in ├── spec-tests.lua └── test.t /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | linux: 7 | 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v1 11 | - uses: leafo/gh-actions-lua@v9 12 | - uses: leafo/gh-actions-luarocks@v4 13 | - name: Build and test 14 | run: | 15 | export VERSION=`grep CMARK_VERSION_STRING ext/cmark_version.h | awk '{gsub(/["]/,""); print $3}'` 16 | export REVISION=1 17 | sed -e "s/_VERSION/${VERSION}/g; s/_REVISION/${REVISION}/g" < rockspec.in > cmark-${VERSION}-${REVISION}.rockspec 18 | luarocks --local make cmark-${VERSION}-${REVISION}.rockspec 19 | 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.rockspec 2 | cmark.so 3 | *.o 4 | *.a 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2015, John MacFarlane 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CBITS = ext 2 | VERSION=$(shell grep CMARK_VERSION_STRING $(CBITS)/cmark_version.h | awk '{print $$3}') 3 | REVISION=1 4 | ROCKSPEC=cmark-$(VERSION)-$(REVISION).rockspec 5 | CFLAGS = -fPIC -O3 -I$(CBITS) -I. 6 | SWIG ?= swig 7 | CMARK_DIR ?= ../cmark 8 | OBJS = $(subst .c,.o,$(wildcard $(CBITS)/*.c)) 9 | C_SOURCES=$(wildcard $(CBITS)/*.c $(CBITS)/*.h $(CBITS)/*.inc) 10 | LUASTATIC=lua-5.2.4/src/liblua.a 11 | 12 | .PHONY: clean, distclean, test, all, rocks, update, check 13 | 14 | all: rock 15 | 16 | rock: cmark_wrap.c $(ROCKSPEC) 17 | luarocks --local make $(ROCKSPEC) 18 | 19 | upload: rock 20 | luarocks upload --api-key=${LUAROCKS_API_KEY} $(ROCKSPEC) 21 | 22 | $(ROCKSPEC): rockspec.in 23 | sed -e "s/_VERSION/$(VERSION)/g; s/_REVISION/$(REVISION)/g" $< > $@ 24 | 25 | cmark.so: cmark_wrap.o $(OBJS) 26 | $(CC) -shared -o $@ -I$(CBITS) -llua $^ 27 | 28 | cmark-lua.a: cmark_wrap.o $(OBJS) 29 | ar rcs $@ $^ $(LUASTATIC) 30 | 31 | cmark_wrap.c: cmark.i $(CBITS)/cmark.h 32 | $(SWIG) -o $@ -lua -I$(CBITS) -DCMARK_EXPORT='' $< 33 | 34 | update: $(C_SOURCES) spec-tests.lua 35 | 36 | # requires luarocks install luacheck lua-TestMore 37 | spec-tests.lua: $(CMARK_DIR)/test/spec.txt 38 | python3 $(CMARK_DIR)/test/spec_tests.py -d --spec $(CMARK_DIR)/test/spec.txt | sed -e 's/^\([ \t]*\)"\([^"]*\)":/\1\2 = /' | sed -e 's/^\[/return {/' | sed -e 's/^\]/}/' > $@ 39 | 40 | $(CBITS)/config.h: $(CMARK_DIR)/build/src/config.h 41 | cp $< $@ 42 | 43 | $(CBITS)/cmark_export.h: $(CMARK_DIR)/build/src/cmark_export.h 44 | cp $< $@ 45 | 46 | $(CBITS)/cmark_version.h: $(CMARK_DIR)/build/src/cmark_version.h 47 | cp $< $@ 48 | 49 | $(CBITS)/%: $(CMARK_DIR)/src/% 50 | cp $< $@ 51 | 52 | check: 53 | luacheck cmark/builder.lua 54 | 55 | test: check 56 | prove test.t 57 | 58 | clean: 59 | rm -rf *.o $(CBITS)/*.o $(ROCKSPEC) 60 | 61 | distclean: clean 62 | rm cmark.so 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cmark-lua 2 | ========= 3 | 4 | Lua wrapper for [libcmark](https://github.com/jgm/cmark), 5 | CommonMark parsing and rendering library 6 | 7 | To install: `luarocks install cmark`. 8 | 9 | cmark 10 | ----- 11 | 12 | `cmark` exposes the entire API of libcmark, as documented in 13 | the `cmark(3)` man page. Basic usage: 14 | 15 | ``` lua 16 | local cmark = require("cmark") 17 | 18 | local doc = cmark.parse_document(input, string.len(input), cmark.OPT_DEFAULT) 19 | local html = cmark.render_html(doc, cmark.OPT_DEFAULT) 20 | ``` 21 | 22 | For convenience, constants and functions are renamed so that 23 | an initial `cmark_` or `CMARK_` is omitted: for example, 24 | `CMARK_NODE_PARAGRAPH` is exposed as `cmark.NODE_PARAGRAPH` and 25 | `cmark_parse_document` as `cmark.parse_document`. 26 | 27 | Two additional functions are provided: 28 | 29 | `cmark.parse_string(s, opts)` is like `parse_document`, but 30 | does not require you to specify the length of the input 31 | string. 32 | 33 | `cmark.walk(node)` wraps `cmark`'s iterator interface in a 34 | format that is more lua-esque. Usage example: 35 | 36 | ``` lua 37 | local links = 0 38 | for cur, entering, node_type in cmark.walk(doc) do 39 | if node_type == cmark.NODE_LINK and not entering then 40 | links = links + 1 41 | -- insert " (link #n)" after the link: 42 | local t = cmark.node_new(NODE_TEXT) 43 | cmark.node_set_literal(t, string.format(" (link #%d)", links)) 44 | cmark.node_insert_after(cur, t) 45 | end 46 | end 47 | ``` 48 | 49 | The memory allocated by libcmark for `node` objects must be 50 | freed by the calling program by calling `cmark.node_free` on the 51 | document node. (This will automatically free all children as 52 | well.) 53 | 54 | In addition, a C function 55 | 56 | ``` C 57 | void push_cmark_node(lua_State *L, cmark_node *node) 58 | ``` 59 | 60 | is exported to make it easier to use these functions 61 | from the C API. 62 | 63 | For a higher-level interface, see 64 | [lcmark](https://github.com/jgm/lcmark). 65 | 66 | cmark.builder 67 | ------------- 68 | 69 | A special module, `cmark.builder`, is provided to make it easier 70 | to construct cmark nodes. 71 | 72 | Usage examples: 73 | 74 | ```lua 75 | local b = require 'cmark.builder' 76 | local mydoc = b.document{ 77 | b.paragraph{ 78 | b.text "Hello ", 79 | b.emph{ 80 | b.text "world" }, 81 | b.link{ 82 | url = "http://example.com", 83 | b.text "!" } } } 84 | ``` 85 | 86 | The arguments to builder functions are generally 87 | tables. Key-value pairs are used to set attributes, 88 | and the other values are used as children or literal 89 | string content, as appropriate. 90 | 91 | The library will interpret values as the appropriate 92 | types, when possible. So, you can supply a single 93 | value instead of an array. And you can supply a string 94 | instead of an inline node, or a node instead of a list 95 | item. The following is equivalent to the example above: 96 | 97 | ```lua 98 | local mydoc = b.document{ 99 | b.paragraph{ 100 | "Hello ", b.emph "world", 101 | b.link{ url="http://example.com", "!"} }} 102 | ``` 103 | 104 | The builder functions are 105 | 106 | ```lua 107 | builder.document{block1, block2, ...} 108 | builder.block_quote{block1, block2, ...} 109 | builder.ordered_list{delim = cmark.PAREN_DELIM, item1, item2, ...} 110 | -- attributes: delim, start, tight 111 | builder.bullet_list -- attributes: tight 112 | builder.item 113 | builder.code_block -- attributes: info 114 | builder.html_block 115 | builder.custom_block -- attributes: on_enter, on_exit 116 | builder.thematic_break 117 | builder.heading -- attributes: level 118 | builder.paragraph 119 | builder.text 120 | builder.emph 121 | builder.strong 122 | builder.link -- attributes: title, url 123 | builder.image -- attributes: title, url 124 | builder.linebreak 125 | builder.softbreak 126 | builder.code 127 | builder.html_inline 128 | builder.custom_inline -- attributes: on_enter, on_exit 129 | builder.get_children(node) -- returns children of a node as a table 130 | ``` 131 | 132 | For developers 133 | -------------- 134 | 135 | `make` builds the rock and installs it locally. 136 | 137 | `make test` runs some tests. These are in `test.t`. 138 | You'll need the `prove` executable and the `lua-TestMore` rock. 139 | 140 | `make update` will update the C sources and spec test from the 141 | `../cmark` directory. 142 | 143 | -------------------------------------------------------------------------------- /cmark.i: -------------------------------------------------------------------------------- 1 | %module cmark 2 | %{ 3 | #include "cmark.h" 4 | %} 5 | 6 | // Renames: 7 | // CMARK_NODE_PARAGRAPH -> NODE_PARAGRAPH 8 | // cmark_parse_document -> parse_document 9 | %rename("%(regex:/^(cmark|CMARK)_(.*)/\\2/)s") ""; 10 | 11 | %include "cmark.h" 12 | 13 | %{ 14 | extern void push_cmark_node(lua_State *L, cmark_node *node) 15 | { 16 | SWIG_NewPointerObj(L,node,SWIGTYPE_p_cmark_node,0); 17 | } 18 | %} 19 | 20 | %luacode { 21 | 22 | function cmark.parse_string(s, opts) 23 | return cmark.parse_document(s, string.len(s), opts) 24 | end 25 | 26 | function cmark.walk(node) 27 | local iter = cmark.iter_new(node) 28 | return function() 29 | while true do 30 | local et = cmark.iter_next(iter) 31 | if et == cmark.EVENT_DONE then break end 32 | local cur = cmark.iter_get_node(iter) 33 | return cur, (et == cmark.EVENT_ENTER), cmark.node_get_type(cur) 34 | end 35 | cmark.iter_free(iter) 36 | return nil 37 | end 38 | end 39 | 40 | } 41 | -------------------------------------------------------------------------------- /cmark/builder.lua: -------------------------------------------------------------------------------- 1 | local c = require('cmark') 2 | 3 | local builder = {} 4 | 5 | -- returns 'inline', 'block', 'item', or 'unknown' 6 | local node_get_class = function(node) 7 | local nt = c.node_get_type(node) 8 | if nt == c.NODE_ITEM then 9 | return 'item' 10 | elseif (nt >= c.NODE_FIRST_BLOCK and nt <= c.NODE_LAST_BLOCK) then 11 | return 'block' 12 | elseif (nt >= c.NODE_FIRST_INLINE and nt <= c.NODE_LAST_INLINE) then 13 | return 'inline' 14 | end 15 | return 'unknown' 16 | end 17 | 18 | local add_children 19 | -- 'builder.add_children(node, {node1, node2})' 20 | -- adds 'node1' and 'node2' as children of 'node'. 21 | -- 'builder.add_children(node, {node1, {node2, node3}})' 22 | -- adds 'node1', 'node2', and 'node3' as children of 'node'. 23 | -- 'builder.add_children(node, "hello")' 24 | -- adds a text node with "hello" as child of 'node'. 25 | -- 'builder.add_children(node, node1)' 26 | -- adds 'node1' as a child of 'node'. 27 | -- THe parameter 'contains' is a table with boolean fields 'items', 28 | -- 'blocks', 'inlines', and 'literal' that tells you what kind of 29 | -- children the table can contain. 30 | -- The function returns 'true' or 'nil, msg'. 31 | add_children = function(node, v, contains) 32 | if type(v) == 'nil' then 33 | return true -- just skip a nil 34 | end 35 | if type(v) == 'table' then 36 | for _,x in ipairs(v) do 37 | local ok, msg = add_children(node, x, contains) 38 | if not ok then 39 | return nil, msg 40 | end 41 | end 42 | return true 43 | elseif type(v) == 'function' then 44 | -- e.g. hard_break -- we want hard_break() 45 | local ok, msg = add_children(node, v(), contains) 46 | return ok, msg 47 | end 48 | local child 49 | if type(v) == 'userdata' then 50 | child = v 51 | elseif contains.literal then 52 | if c.node_set_literal(node, tostring(v)) then 53 | return true 54 | else 55 | return nil, "Could not set literal" 56 | end 57 | else 58 | -- if v is not a node, make a text node: 59 | child = c.node_new(c.NODE_TEXT) 60 | if not child then 61 | return nil, "Could not create text node" 62 | end 63 | if not c.node_set_literal(child, tostring(v)) then 64 | return nil, "Could not set literal" 65 | end 66 | end 67 | local child_class = node_get_class(child) 68 | if (child_class == 'item' and contains.items) or 69 | (child_class == 'block' and contains.blocks) or 70 | (child_class == 'inline' and contains.inlines) then 71 | if not c.node_append_child(node, child) then 72 | return nil, "Could not append child" 73 | end 74 | elseif child_class == 'block' and contains.items then 75 | local item = c.node_new(c.NODE_ITEM) 76 | if not item then 77 | return nil, "Could not create item node" 78 | end 79 | if not c.node_append_child(item, child) then 80 | return nil, "Could not append child to item" 81 | end 82 | if not c.node_append_child(node, item) then 83 | return nil, "Could not append item to node" 84 | end 85 | elseif child_class == 'inline' and contains.blocks then 86 | local para = c.node_new(c.NODE_PARAGRAPH) 87 | if not c.node_append_child(para, child) then 88 | return nil, "Could not append child to para" 89 | end 90 | if not c.node_append_child(node, para) then 91 | return nil, "Could not append para to node" 92 | end 93 | elseif child_class == 'inline' and contains.items then 94 | local para = c.node_new(c.NODE_PARAGRAPH) 95 | local item = c.node_new(c.NODE_ITEM) 96 | if not c.node_append_child(para, child) then 97 | return nil, "Could not append child to para" 98 | end 99 | if not c.node_append_child(item, para) then 100 | return nil, "Could not append para to item" 101 | end 102 | if not c.node_append_child(node, item) then 103 | return nil, "Could not append item to node" 104 | end 105 | else 106 | return nil, 'Tried to add a node with class ' .. child_class .. 107 | ' to a node with class ' .. node_get_class(node) 108 | end 109 | return true 110 | end 111 | 112 | -- return children as a table 113 | builder.get_children = function(node) 114 | local child = c.node_first_child(node) 115 | local result = {} 116 | while child do 117 | result[#result + 1] = child 118 | child = c.node_next(child) 119 | end 120 | return result 121 | end 122 | 123 | -- contains is a table, with boolean fields 'literal', 'blocks', 'inlines', 124 | -- 'items' 125 | local node = function(node_type, contains, fields) 126 | return function(contents) 127 | local node = c.node_new(node_type) 128 | if not node then 129 | return nil, 'Could not create node of type ' .. tostring(node_type) 130 | end 131 | if contents == nil then 132 | return node 133 | end 134 | -- set the attributes if defined 135 | if fields and type(contents) == 'table' then 136 | for field,func in pairs(fields) do 137 | if contents[field] then 138 | local ok, msg = func(node, contents[field]) 139 | if not ok then 140 | return nil, msg 141 | end 142 | end 143 | end 144 | end 145 | -- treat rest as children 146 | local ok, msg = add_children(node, contents, contains) 147 | if not ok then 148 | return nil, msg 149 | end 150 | return node 151 | end 152 | end 153 | 154 | local function set_tight(n, tight) 155 | local t_int = tight and 1 or 0 156 | return c.node_set_list_tight(n, t_int) 157 | end 158 | 159 | local function set_delim(n, delim) 160 | local delimt 161 | if delim == c.PAREN_DELIM or delim == c.PERIOD_DELIM then 162 | delimt = delim 163 | elseif delim == ')' then 164 | delimt = c.PAREN_DELIM 165 | elseif delim == '.' then 166 | delimt = c.PERIOD_DELIM 167 | else 168 | return nil, 'Unknown delimiter ' .. delim 169 | end 170 | return c.node_set_list_delim(n, delimt) 171 | end 172 | 173 | builder.document = node(c.NODE_DOCUMENT, {blocks = true}) 174 | 175 | builder.block_quote = node(c.NODE_BLOCK_QUOTE, {blocks = true}) 176 | 177 | builder.ordered_list = function(contents) 178 | local n = node(c.NODE_LIST, {items = true}, 179 | {delim = set_delim, 180 | start = c.node_set_list_start, 181 | tight = set_tight, 182 | })(contents) 183 | c.node_set_list_type(n, c.ORDERED_LIST) 184 | return n 185 | end 186 | 187 | builder.bullet_list = function(contents) 188 | local n = node(c.NODE_LIST, {items = true}, 189 | {tight = set_tight, 190 | })(contents) 191 | c.node_set_list_type(n, c.BULLET_LIST) 192 | return n 193 | end 194 | 195 | builder.item = node(c.NODE_ITEM, {blocks = true}) 196 | 197 | builder.code_block = node(c.NODE_CODE_BLOCK, {literal = true}, 198 | { info = c.node_set_fence_info }) 199 | 200 | builder.html_block = node(c.NODE_HTML_BLOCK, {literal = true}) 201 | 202 | builder.custom_block = node(c.NODE_CUSTOM_BLOCK, 203 | {inlines = true, blocks = true, items = true}, 204 | { on_enter = c.node_set_on_enter, on_exit = c.node_set_on_exit }) 205 | 206 | builder.thematic_break = node(c.NODE_THEMATIC_BREAK) 207 | 208 | builder.heading = node(c.NODE_HEADING, {inlines = true}, 209 | { level = c.node_set_heading_level }) 210 | 211 | builder.paragraph = node(c.NODE_PARAGRAPH, {inlines = true}) 212 | 213 | builder.text = node(c.NODE_TEXT, {literal = true}) 214 | 215 | builder.emph = node(c.NODE_EMPH, {inlines = true}) 216 | 217 | builder.strong = node(c.NODE_STRONG, {inlines = true}) 218 | 219 | builder.link = node(c.NODE_LINK, {inlines = true}, 220 | {title = c.node_set_title, url = c.node_set_url}) 221 | 222 | builder.image = node(c.NODE_IMAGE, {inlines = true}, 223 | {title = c.node_set_title, url = c.node_set_url}) 224 | 225 | builder.linebreak = node(c.NODE_LINEBREAK) 226 | 227 | builder.softbreak = node(c.NODE_SOFTBREAK) 228 | 229 | builder.code = node(c.NODE_CODE, {literal = true}) 230 | 231 | builder.html_inline = node(c.NODE_HTML_INLINE, {literal = true}) 232 | 233 | builder.custom_inline = node(c.NODE_CUSTOM_INLINE, {inlines = true}, 234 | { on_enter = c.node_set_on_enter, on_exit = c.node_set_on_exit }) 235 | 236 | return builder 237 | -------------------------------------------------------------------------------- /ext/bench.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_BENCH_H 2 | #define CMARK_BENCH_H 3 | 4 | #include 5 | #include 6 | 7 | #ifdef TIMER 8 | float _cmark_start_time; 9 | float _cmark_end_time; 10 | float _cmark_save_time; 11 | 12 | #define start_timer() \ 13 | _cmark_save_time = _cmark_start_time; \ 14 | _cmark_start_time = (float)clock() / CLOCKS_PER_SEC 15 | 16 | #define end_timer(M) \ 17 | _cmark_end_time = (float)clock() / CLOCKS_PER_SEC; \ 18 | fprintf(stderr, "[TIME] (%s:%d) %4.f ns " M "\n", __FILE__, __LINE__, \ 19 | (_cmark_end_time - _cmark_start_time) * 1000000); \ 20 | _cmark_start_time = _cmark_save_time; 21 | 22 | #else 23 | #define start_timer() 24 | #define end_timer(M) 25 | #endif 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /ext/buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cmark_ctype.h" 11 | #include "buffer.h" 12 | 13 | /* Used as default value for cmark_strbuf->ptr so that people can always 14 | * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. 15 | */ 16 | unsigned char cmark_strbuf__initbuf[1]; 17 | 18 | #ifndef MIN 19 | #define MIN(x, y) ((x < y) ? x : y) 20 | #endif 21 | 22 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, 23 | bufsize_t initial_size) { 24 | buf->mem = mem; 25 | buf->asize = 0; 26 | buf->size = 0; 27 | buf->ptr = cmark_strbuf__initbuf; 28 | 29 | if (initial_size > 0) 30 | cmark_strbuf_grow(buf, initial_size); 31 | } 32 | 33 | static inline void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { 34 | cmark_strbuf_grow(buf, buf->size + add); 35 | } 36 | 37 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { 38 | assert(target_size > 0); 39 | 40 | if (target_size < buf->asize) 41 | return; 42 | 43 | if (target_size > (bufsize_t)(INT32_MAX / 2)) { 44 | fprintf(stderr, 45 | "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n", 46 | (INT32_MAX / 2)); 47 | abort(); 48 | } 49 | 50 | /* Oversize the buffer by 50% to guarantee amortized linear time 51 | * complexity on append operations. */ 52 | bufsize_t new_size = target_size + target_size / 2; 53 | new_size += 1; 54 | new_size = (new_size + 7) & ~7; 55 | 56 | buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, 57 | new_size); 58 | buf->asize = new_size; 59 | } 60 | 61 | void cmark_strbuf_free(cmark_strbuf *buf) { 62 | if (!buf) 63 | return; 64 | 65 | if (buf->ptr != cmark_strbuf__initbuf) 66 | buf->mem->free(buf->ptr); 67 | 68 | cmark_strbuf_init(buf->mem, buf, 0); 69 | } 70 | 71 | void cmark_strbuf_clear(cmark_strbuf *buf) { 72 | buf->size = 0; 73 | 74 | if (buf->asize > 0) 75 | buf->ptr[0] = '\0'; 76 | } 77 | 78 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, 79 | bufsize_t len) { 80 | if (len <= 0 || data == NULL) { 81 | cmark_strbuf_clear(buf); 82 | } else { 83 | if (data != buf->ptr) { 84 | if (len >= buf->asize) 85 | cmark_strbuf_grow(buf, len); 86 | memmove(buf->ptr, data, len); 87 | } 88 | buf->size = len; 89 | buf->ptr[buf->size] = '\0'; 90 | } 91 | } 92 | 93 | void cmark_strbuf_putc(cmark_strbuf *buf, int c) { 94 | S_strbuf_grow_by(buf, 1); 95 | buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); 96 | buf->ptr[buf->size] = '\0'; 97 | } 98 | 99 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, 100 | bufsize_t len) { 101 | if (len <= 0) 102 | return; 103 | 104 | S_strbuf_grow_by(buf, len); 105 | memmove(buf->ptr + buf->size, data, len); 106 | buf->size += len; 107 | buf->ptr[buf->size] = '\0'; 108 | } 109 | 110 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { 111 | cmark_strbuf_put(buf, (const unsigned char *)string, (bufsize_t)strlen(string)); 112 | } 113 | 114 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { 115 | unsigned char *data = buf->ptr; 116 | 117 | if (buf->asize == 0) { 118 | /* return an empty string */ 119 | return (unsigned char *)buf->mem->calloc(1, 1); 120 | } 121 | 122 | cmark_strbuf_init(buf->mem, buf, 0); 123 | return data; 124 | } 125 | 126 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { 127 | if (len < 0) 128 | len = 0; 129 | 130 | if (len < buf->size) { 131 | buf->size = len; 132 | buf->ptr[buf->size] = '\0'; 133 | } 134 | } 135 | 136 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { 137 | if (n > 0) { 138 | if (n > buf->size) 139 | n = buf->size; 140 | buf->size = buf->size - n; 141 | if (buf->size) 142 | memmove(buf->ptr, buf->ptr + n, buf->size); 143 | 144 | buf->ptr[buf->size] = '\0'; 145 | } 146 | } 147 | 148 | void cmark_strbuf_rtrim(cmark_strbuf *buf) { 149 | if (!buf->size) 150 | return; 151 | 152 | while (buf->size > 0) { 153 | if (!cmark_isspace(buf->ptr[buf->size - 1])) 154 | break; 155 | 156 | buf->size--; 157 | } 158 | 159 | buf->ptr[buf->size] = '\0'; 160 | } 161 | 162 | void cmark_strbuf_trim(cmark_strbuf *buf) { 163 | bufsize_t i = 0; 164 | 165 | if (!buf->size) 166 | return; 167 | 168 | while (i < buf->size && cmark_isspace(buf->ptr[i])) 169 | i++; 170 | 171 | cmark_strbuf_drop(buf, i); 172 | 173 | cmark_strbuf_rtrim(buf); 174 | } 175 | 176 | // Destructively modify string, collapsing consecutive 177 | // space and newline characters into a single space. 178 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { 179 | bool last_char_was_space = false; 180 | bufsize_t r, w; 181 | 182 | for (r = 0, w = 0; r < s->size; ++r) { 183 | if (cmark_isspace(s->ptr[r])) { 184 | if (!last_char_was_space) { 185 | s->ptr[w++] = ' '; 186 | last_char_was_space = true; 187 | } 188 | } else { 189 | s->ptr[w++] = s->ptr[r]; 190 | last_char_was_space = false; 191 | } 192 | } 193 | 194 | cmark_strbuf_truncate(s, w); 195 | } 196 | 197 | // Destructively unescape a string: remove backslashes before punctuation chars. 198 | void cmark_strbuf_unescape(cmark_strbuf *buf) { 199 | bufsize_t r, w; 200 | 201 | for (r = 0, w = 0; r < buf->size; ++r) { 202 | if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) 203 | r++; 204 | 205 | buf->ptr[w++] = buf->ptr[r]; 206 | } 207 | 208 | cmark_strbuf_truncate(buf, w); 209 | } 210 | -------------------------------------------------------------------------------- /ext/buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_BUFFER_H 2 | #define CMARK_BUFFER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cmark.h" 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | typedef int32_t bufsize_t; 17 | 18 | typedef struct { 19 | cmark_mem *mem; 20 | unsigned char *ptr; 21 | bufsize_t asize, size; 22 | } cmark_strbuf; 23 | 24 | extern unsigned char cmark_strbuf__initbuf[]; 25 | 26 | #define CMARK_BUF_INIT(mem) \ 27 | { mem, cmark_strbuf__initbuf, 0, 0 } 28 | 29 | /** 30 | * Initialize a cmark_strbuf structure. 31 | * 32 | * For the cases where CMARK_BUF_INIT cannot be used to do static 33 | * initialization. 34 | */ 35 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, 36 | bufsize_t initial_size); 37 | 38 | /** 39 | * Grow the buffer to hold at least `target_size` bytes. 40 | */ 41 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); 42 | 43 | void cmark_strbuf_free(cmark_strbuf *buf); 44 | 45 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); 46 | 47 | /* 48 | static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { 49 | return (char *)buf->ptr; 50 | } 51 | */ 52 | 53 | #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) 54 | 55 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, 56 | bufsize_t len); 57 | void cmark_strbuf_putc(cmark_strbuf *buf, int c); 58 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, 59 | bufsize_t len); 60 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); 61 | void cmark_strbuf_clear(cmark_strbuf *buf); 62 | 63 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); 64 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); 65 | void cmark_strbuf_rtrim(cmark_strbuf *buf); 66 | void cmark_strbuf_trim(cmark_strbuf *buf); 67 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); 68 | void cmark_strbuf_unescape(cmark_strbuf *s); 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /ext/chunk.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_CHUNK_H 2 | #define CMARK_CHUNK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "cmark.h" 8 | #include "buffer.h" 9 | #include "cmark_ctype.h" 10 | 11 | #define CMARK_CHUNK_EMPTY \ 12 | { NULL, 0 } 13 | 14 | typedef struct { 15 | const unsigned char *data; 16 | bufsize_t len; 17 | } cmark_chunk; 18 | 19 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 20 | static inline void cmark_chunk_free(cmark_chunk *c) { 21 | c->data = NULL; 22 | c->len = 0; 23 | } 24 | 25 | static inline void cmark_chunk_ltrim(cmark_chunk *c) { 26 | while (c->len && cmark_isspace(c->data[0])) { 27 | c->data++; 28 | c->len--; 29 | } 30 | } 31 | 32 | static inline void cmark_chunk_rtrim(cmark_chunk *c) { 33 | while (c->len > 0) { 34 | if (!cmark_isspace(c->data[c->len - 1])) 35 | break; 36 | 37 | c->len--; 38 | } 39 | } 40 | 41 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 42 | static inline void cmark_chunk_trim(cmark_chunk *c) { 43 | cmark_chunk_ltrim(c); 44 | cmark_chunk_rtrim(c); 45 | } 46 | 47 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 48 | static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, 49 | bufsize_t offset) { 50 | const unsigned char *p = 51 | (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); 52 | return p ? (bufsize_t)(p - ch->data) : ch->len; 53 | } 54 | 55 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 56 | static inline cmark_chunk cmark_chunk_literal(const char *data) { 57 | bufsize_t len = data ? (bufsize_t)strlen(data) : 0; 58 | cmark_chunk c = {(unsigned char *)data, len}; 59 | return c; 60 | } 61 | 62 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 63 | static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, 64 | bufsize_t len) { 65 | cmark_chunk c = {ch->data + pos, len}; 66 | return c; 67 | } 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /ext/cmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "node.h" 5 | #include "houdini.h" 6 | #include "cmark.h" 7 | #include "buffer.h" 8 | 9 | int cmark_version(void) { return CMARK_VERSION; } 10 | 11 | const char *cmark_version_string(void) { return CMARK_VERSION_STRING; } 12 | 13 | static void *xcalloc(size_t nmem, size_t size) { 14 | void *ptr = calloc(nmem, size); 15 | if (!ptr) { 16 | fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n"); 17 | abort(); 18 | } 19 | return ptr; 20 | } 21 | 22 | static void *xrealloc(void *ptr, size_t size) { 23 | void *new_ptr = realloc(ptr, size); 24 | if (!new_ptr) { 25 | fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n"); 26 | abort(); 27 | } 28 | return new_ptr; 29 | } 30 | 31 | cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; 32 | 33 | cmark_mem *cmark_get_default_mem_allocator(void) { 34 | return &DEFAULT_MEM_ALLOCATOR; 35 | } 36 | 37 | 38 | char *cmark_markdown_to_html(const char *text, size_t len, int options) { 39 | cmark_node *doc; 40 | char *result; 41 | 42 | doc = cmark_parse_document(text, len, options); 43 | 44 | result = cmark_render_html(doc, options); 45 | cmark_node_free(doc); 46 | 47 | return result; 48 | } 49 | -------------------------------------------------------------------------------- /ext/cmark.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_H 2 | #define CMARK_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | /** # NAME 13 | * 14 | * **cmark** - CommonMark parsing, manipulating, and rendering 15 | */ 16 | 17 | /** # DESCRIPTION 18 | * 19 | * ## Simple Interface 20 | */ 21 | 22 | /** Convert 'text' (assumed to be a UTF-8 encoded string with length 23 | * 'len') from CommonMark Markdown to HTML, returning a null-terminated, 24 | * UTF-8-encoded string. It is the caller's responsibility 25 | * to free the returned buffer. 26 | */ 27 | CMARK_EXPORT 28 | char *cmark_markdown_to_html(const char *text, size_t len, int options); 29 | 30 | /** ## Node Structure 31 | */ 32 | 33 | typedef enum { 34 | /* Error status */ 35 | CMARK_NODE_NONE, 36 | 37 | /* Block */ 38 | CMARK_NODE_DOCUMENT, 39 | CMARK_NODE_BLOCK_QUOTE, 40 | CMARK_NODE_LIST, 41 | CMARK_NODE_ITEM, 42 | CMARK_NODE_CODE_BLOCK, 43 | CMARK_NODE_HTML_BLOCK, 44 | CMARK_NODE_CUSTOM_BLOCK, 45 | CMARK_NODE_PARAGRAPH, 46 | CMARK_NODE_HEADING, 47 | CMARK_NODE_THEMATIC_BREAK, 48 | 49 | CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, 50 | CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK, 51 | 52 | /* Inline */ 53 | CMARK_NODE_TEXT, 54 | CMARK_NODE_SOFTBREAK, 55 | CMARK_NODE_LINEBREAK, 56 | CMARK_NODE_CODE, 57 | CMARK_NODE_HTML_INLINE, 58 | CMARK_NODE_CUSTOM_INLINE, 59 | CMARK_NODE_EMPH, 60 | CMARK_NODE_STRONG, 61 | CMARK_NODE_LINK, 62 | CMARK_NODE_IMAGE, 63 | 64 | CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT, 65 | CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE 66 | } cmark_node_type; 67 | 68 | /* For backwards compatibility: */ 69 | #define CMARK_NODE_HEADER CMARK_NODE_HEADING 70 | #define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK 71 | #define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK 72 | #define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE 73 | 74 | typedef enum { 75 | CMARK_NO_LIST, 76 | CMARK_BULLET_LIST, 77 | CMARK_ORDERED_LIST 78 | } cmark_list_type; 79 | 80 | typedef enum { 81 | CMARK_NO_DELIM, 82 | CMARK_PERIOD_DELIM, 83 | CMARK_PAREN_DELIM 84 | } cmark_delim_type; 85 | 86 | typedef struct cmark_node cmark_node; 87 | typedef struct cmark_parser cmark_parser; 88 | typedef struct cmark_iter cmark_iter; 89 | 90 | /** 91 | * ## Custom memory allocator support 92 | */ 93 | 94 | /** Defines the memory allocation functions to be used by CMark 95 | * when parsing and allocating a document tree 96 | */ 97 | typedef struct cmark_mem { 98 | void *(*calloc)(size_t, size_t); 99 | void *(*realloc)(void *, size_t); 100 | void (*free)(void *); 101 | } cmark_mem; 102 | 103 | /** Returns a pointer to the default memory allocator. 104 | */ 105 | CMARK_EXPORT cmark_mem *cmark_get_default_mem_allocator(void); 106 | 107 | /** 108 | * ## Creating and Destroying Nodes 109 | */ 110 | 111 | /** Creates a new node of type 'type'. Note that the node may have 112 | * other required properties, which it is the caller's responsibility 113 | * to assign. 114 | */ 115 | CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type); 116 | 117 | /** Same as `cmark_node_new`, but explicitly listing the memory 118 | * allocator used to allocate the node. Note: be sure to use the same 119 | * allocator for every node in a tree, or bad things can happen. 120 | */ 121 | CMARK_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type, 122 | cmark_mem *mem); 123 | 124 | /** Frees the memory allocated for a node and any children. 125 | */ 126 | CMARK_EXPORT void cmark_node_free(cmark_node *node); 127 | 128 | /** 129 | * ## Tree Traversal 130 | */ 131 | 132 | /** Returns the next node in the sequence after 'node', or NULL if 133 | * there is none. 134 | */ 135 | CMARK_EXPORT cmark_node *cmark_node_next(cmark_node *node); 136 | 137 | /** Returns the previous node in the sequence after 'node', or NULL if 138 | * there is none. 139 | */ 140 | CMARK_EXPORT cmark_node *cmark_node_previous(cmark_node *node); 141 | 142 | /** Returns the parent of 'node', or NULL if there is none. 143 | */ 144 | CMARK_EXPORT cmark_node *cmark_node_parent(cmark_node *node); 145 | 146 | /** Returns the first child of 'node', or NULL if 'node' has no children. 147 | */ 148 | CMARK_EXPORT cmark_node *cmark_node_first_child(cmark_node *node); 149 | 150 | /** Returns the last child of 'node', or NULL if 'node' has no children. 151 | */ 152 | CMARK_EXPORT cmark_node *cmark_node_last_child(cmark_node *node); 153 | 154 | /** 155 | * ## Iterator 156 | * 157 | * An iterator will walk through a tree of nodes, starting from a root 158 | * node, returning one node at a time, together with information about 159 | * whether the node is being entered or exited. The iterator will 160 | * first descend to a child node, if there is one. When there is no 161 | * child, the iterator will go to the next sibling. When there is no 162 | * next sibling, the iterator will return to the parent (but with 163 | * a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will 164 | * return `CMARK_EVENT_DONE` when it reaches the root node again. 165 | * One natural application is an HTML renderer, where an `ENTER` event 166 | * outputs an open tag and an `EXIT` event outputs a close tag. 167 | * An iterator might also be used to transform an AST in some systematic 168 | * way, for example, turning all level-3 headings into regular paragraphs. 169 | * 170 | * void 171 | * usage_example(cmark_node *root) { 172 | * cmark_event_type ev_type; 173 | * cmark_iter *iter = cmark_iter_new(root); 174 | * 175 | * while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 176 | * cmark_node *cur = cmark_iter_get_node(iter); 177 | * // Do something with `cur` and `ev_type` 178 | * } 179 | * 180 | * cmark_iter_free(iter); 181 | * } 182 | * 183 | * Iterators will never return `EXIT` events for leaf nodes, which are nodes 184 | * of type: 185 | * 186 | * * CMARK_NODE_HTML_BLOCK 187 | * * CMARK_NODE_THEMATIC_BREAK 188 | * * CMARK_NODE_CODE_BLOCK 189 | * * CMARK_NODE_TEXT 190 | * * CMARK_NODE_SOFTBREAK 191 | * * CMARK_NODE_LINEBREAK 192 | * * CMARK_NODE_CODE 193 | * * CMARK_NODE_HTML_INLINE 194 | * 195 | * Nodes must only be modified after an `EXIT` event, or an `ENTER` event for 196 | * leaf nodes. 197 | */ 198 | 199 | typedef enum { 200 | CMARK_EVENT_NONE, 201 | CMARK_EVENT_DONE, 202 | CMARK_EVENT_ENTER, 203 | CMARK_EVENT_EXIT 204 | } cmark_event_type; 205 | 206 | /** Creates a new iterator starting at 'root'. The current node and event 207 | * type are undefined until 'cmark_iter_next' is called for the first time. 208 | * The memory allocated for the iterator should be released using 209 | * 'cmark_iter_free' when it is no longer needed. 210 | */ 211 | CMARK_EXPORT 212 | cmark_iter *cmark_iter_new(cmark_node *root); 213 | 214 | /** Frees the memory allocated for an iterator. 215 | */ 216 | CMARK_EXPORT 217 | void cmark_iter_free(cmark_iter *iter); 218 | 219 | /** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`, 220 | * `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`). 221 | */ 222 | CMARK_EXPORT 223 | cmark_event_type cmark_iter_next(cmark_iter *iter); 224 | 225 | /** Returns the current node. 226 | */ 227 | CMARK_EXPORT 228 | cmark_node *cmark_iter_get_node(cmark_iter *iter); 229 | 230 | /** Returns the current event type. 231 | */ 232 | CMARK_EXPORT 233 | cmark_event_type cmark_iter_get_event_type(cmark_iter *iter); 234 | 235 | /** Returns the root node. 236 | */ 237 | CMARK_EXPORT 238 | cmark_node *cmark_iter_get_root(cmark_iter *iter); 239 | 240 | /** Resets the iterator so that the current node is 'current' and 241 | * the event type is 'event_type'. The new current node must be a 242 | * descendant of the root node or the root node itself. 243 | */ 244 | CMARK_EXPORT 245 | void cmark_iter_reset(cmark_iter *iter, cmark_node *current, 246 | cmark_event_type event_type); 247 | 248 | /** 249 | * ## Accessors 250 | */ 251 | 252 | /** Returns the user data of 'node'. 253 | */ 254 | CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node); 255 | 256 | /** Sets arbitrary user data for 'node'. Returns 1 on success, 257 | * 0 on failure. 258 | */ 259 | CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data); 260 | 261 | /** Returns the type of 'node', or `CMARK_NODE_NONE` on error. 262 | */ 263 | CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); 264 | 265 | /** Like 'cmark_node_get_type', but returns a string representation 266 | of the type, or `""`. 267 | */ 268 | CMARK_EXPORT 269 | const char *cmark_node_get_type_string(cmark_node *node); 270 | 271 | /** Returns the string contents of 'node', or an empty 272 | string if none is set. Returns NULL if called on a 273 | node that does not have string content. 274 | */ 275 | CMARK_EXPORT const char *cmark_node_get_literal(cmark_node *node); 276 | 277 | /** Sets the string contents of 'node'. Returns 1 on success, 278 | * 0 on failure. 279 | */ 280 | CMARK_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content); 281 | 282 | /** Returns the heading level of 'node', or 0 if 'node' is not a heading. 283 | */ 284 | CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node); 285 | 286 | /* For backwards compatibility */ 287 | #define cmark_node_get_header_level cmark_node_get_heading_level 288 | #define cmark_node_set_header_level cmark_node_set_heading_level 289 | 290 | /** Sets the heading level of 'node', returning 1 on success and 0 on error. 291 | */ 292 | CMARK_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level); 293 | 294 | /** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node' 295 | * is not a list. 296 | */ 297 | CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node); 298 | 299 | /** Sets the list type of 'node', returning 1 on success and 0 on error. 300 | */ 301 | CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node, 302 | cmark_list_type type); 303 | 304 | /** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node' 305 | * is not a list. 306 | */ 307 | CMARK_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node); 308 | 309 | /** Sets the list delimiter type of 'node', returning 1 on success and 0 310 | * on error. 311 | */ 312 | CMARK_EXPORT int cmark_node_set_list_delim(cmark_node *node, 313 | cmark_delim_type delim); 314 | 315 | /** Returns starting number of 'node', if it is an ordered list, otherwise 0. 316 | */ 317 | CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node); 318 | 319 | /** Sets starting number of 'node', if it is an ordered list. Returns 1 320 | * on success, 0 on failure. 321 | */ 322 | CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start); 323 | 324 | /** Returns 1 if 'node' is a tight list, 0 otherwise. 325 | */ 326 | CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node); 327 | 328 | /** Sets the "tightness" of a list. Returns 1 on success, 0 on failure. 329 | */ 330 | CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight); 331 | 332 | /** Returns the info string from a fenced code block. 333 | */ 334 | CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); 335 | 336 | /** Sets the info string in a fenced code block, returning 1 on 337 | * success and 0 on failure. 338 | */ 339 | CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); 340 | 341 | /** Returns the URL of a link or image 'node', or an empty string 342 | if no URL is set. Returns NULL if called on a node that is 343 | not a link or image. 344 | */ 345 | CMARK_EXPORT const char *cmark_node_get_url(cmark_node *node); 346 | 347 | /** Sets the URL of a link or image 'node'. Returns 1 on success, 348 | * 0 on failure. 349 | */ 350 | CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url); 351 | 352 | /** Returns the title of a link or image 'node', or an empty 353 | string if no title is set. Returns NULL if called on a node 354 | that is not a link or image. 355 | */ 356 | CMARK_EXPORT const char *cmark_node_get_title(cmark_node *node); 357 | 358 | /** Sets the title of a link or image 'node'. Returns 1 on success, 359 | * 0 on failure. 360 | */ 361 | CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title); 362 | 363 | /** Returns the literal "on enter" text for a custom 'node', or 364 | an empty string if no on_enter is set. Returns NULL if called 365 | on a non-custom node. 366 | */ 367 | CMARK_EXPORT const char *cmark_node_get_on_enter(cmark_node *node); 368 | 369 | /** Sets the literal text to render "on enter" for a custom 'node'. 370 | Any children of the node will be rendered after this text. 371 | Returns 1 on success 0 on failure. 372 | */ 373 | CMARK_EXPORT int cmark_node_set_on_enter(cmark_node *node, 374 | const char *on_enter); 375 | 376 | /** Returns the literal "on exit" text for a custom 'node', or 377 | an empty string if no on_exit is set. Returns NULL if 378 | called on a non-custom node. 379 | */ 380 | CMARK_EXPORT const char *cmark_node_get_on_exit(cmark_node *node); 381 | 382 | /** Sets the literal text to render "on exit" for a custom 'node'. 383 | Any children of the node will be rendered before this text. 384 | Returns 1 on success 0 on failure. 385 | */ 386 | CMARK_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit); 387 | 388 | /** Returns the line on which 'node' begins. 389 | */ 390 | CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node); 391 | 392 | /** Returns the column at which 'node' begins. 393 | */ 394 | CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node); 395 | 396 | /** Returns the line on which 'node' ends. 397 | */ 398 | CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node); 399 | 400 | /** Returns the column at which 'node' ends. 401 | */ 402 | CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node); 403 | 404 | /** 405 | * ## Tree Manipulation 406 | */ 407 | 408 | /** Unlinks a 'node', removing it from the tree, but not freeing its 409 | * memory. (Use 'cmark_node_free' for that.) 410 | */ 411 | CMARK_EXPORT void cmark_node_unlink(cmark_node *node); 412 | 413 | /** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure. 414 | */ 415 | CMARK_EXPORT int cmark_node_insert_before(cmark_node *node, 416 | cmark_node *sibling); 417 | 418 | /** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure. 419 | */ 420 | CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling); 421 | 422 | /** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does 423 | * not free its memory). 424 | * Returns 1 on success, 0 on failure. 425 | */ 426 | CMARK_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode); 427 | 428 | /** Adds 'child' to the beginning of the children of 'node'. 429 | * Returns 1 on success, 0 on failure. 430 | */ 431 | CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child); 432 | 433 | /** Adds 'child' to the end of the children of 'node'. 434 | * Returns 1 on success, 0 on failure. 435 | */ 436 | CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); 437 | 438 | /** Consolidates adjacent text nodes. 439 | */ 440 | CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root); 441 | 442 | /** 443 | * ## Parsing 444 | * 445 | * Simple interface: 446 | * 447 | * cmark_node *document = cmark_parse_document("Hello *world*", 13, 448 | * CMARK_OPT_DEFAULT); 449 | * 450 | * Streaming interface: 451 | * 452 | * cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); 453 | * FILE *fp = fopen("myfile.md", "rb"); 454 | * while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { 455 | * cmark_parser_feed(parser, buffer, bytes); 456 | * if (bytes < sizeof(buffer)) { 457 | * break; 458 | * } 459 | * } 460 | * document = cmark_parser_finish(parser); 461 | * cmark_parser_free(parser); 462 | */ 463 | 464 | /** Creates a new parser object. 465 | */ 466 | CMARK_EXPORT 467 | cmark_parser *cmark_parser_new(int options); 468 | 469 | /** Creates a new parser object with the given memory allocator 470 | * 471 | * A generalization of `cmark_parser_new`: 472 | * ```c 473 | * cmark_parser_new(options) 474 | * ``` 475 | * is the same as: 476 | * ```c 477 | * cmark_parser_new_with_mem(options, cmark_get_default_mem_allocator()) 478 | * ``` 479 | */ 480 | CMARK_EXPORT 481 | cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem); 482 | 483 | /** Creates a new parser object with the given node to use as the root 484 | * node of the parsed AST. 485 | * 486 | * When parsing, children are always appended, not prepended; that means 487 | * if `root` already has children, the newly-parsed children will appear 488 | * after the given children. 489 | * 490 | * A generalization of `cmark_parser_new_with_mem`: 491 | * ```c 492 | * cmark_parser_new_with_mem(options, mem) 493 | * ``` 494 | * is approximately the same as: 495 | * ```c 496 | * cmark_parser_new_with_mem_into_root(options, mem, cmark_node_new(CMARK_NODE_DOCUMENT)) 497 | * ``` 498 | * 499 | * This is useful for creating a single document out of multiple parsed 500 | * document fragments. 501 | */ 502 | CMARK_EXPORT 503 | cmark_parser *cmark_parser_new_with_mem_into_root( 504 | int options, cmark_mem *mem, cmark_node *root); 505 | 506 | /** Frees memory allocated for a parser object. 507 | */ 508 | CMARK_EXPORT 509 | void cmark_parser_free(cmark_parser *parser); 510 | 511 | /** Feeds a string of length 'len' to 'parser'. 512 | */ 513 | CMARK_EXPORT 514 | void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); 515 | 516 | /** Finish parsing and return a pointer to a tree of nodes. 517 | */ 518 | CMARK_EXPORT 519 | cmark_node *cmark_parser_finish(cmark_parser *parser); 520 | 521 | /** Parse a CommonMark document in 'buffer' of length 'len'. 522 | * Returns a pointer to a tree of nodes. The memory allocated for 523 | * the node tree should be released using 'cmark_node_free' 524 | * when it is no longer needed. 525 | */ 526 | CMARK_EXPORT 527 | cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); 528 | 529 | /** Parse a CommonMark document in file 'f', returning a pointer to 530 | * a tree of nodes. The memory allocated for the node tree should be 531 | * released using 'cmark_node_free' when it is no longer needed. 532 | */ 533 | CMARK_EXPORT 534 | cmark_node *cmark_parse_file(FILE *f, int options); 535 | 536 | /** 537 | * ## Rendering 538 | */ 539 | 540 | /** Render a 'node' tree as XML. It is the caller's responsibility 541 | * to free the returned buffer. 542 | */ 543 | CMARK_EXPORT 544 | char *cmark_render_xml(cmark_node *root, int options); 545 | 546 | /** Render a 'node' tree as an HTML fragment. It is up to the user 547 | * to add an appropriate header and footer. It is the caller's 548 | * responsibility to free the returned buffer. 549 | */ 550 | CMARK_EXPORT 551 | char *cmark_render_html(cmark_node *root, int options); 552 | 553 | /** Render a 'node' tree as a groff man page, without the header. 554 | * It is the caller's responsibility to free the returned buffer. 555 | */ 556 | CMARK_EXPORT 557 | char *cmark_render_man(cmark_node *root, int options, int width); 558 | 559 | /** Render a 'node' tree as a commonmark document. 560 | * It is the caller's responsibility to free the returned buffer. 561 | */ 562 | CMARK_EXPORT 563 | char *cmark_render_commonmark(cmark_node *root, int options, int width); 564 | 565 | /** Render a 'node' tree as a LaTeX document. 566 | * It is the caller's responsibility to free the returned buffer. 567 | */ 568 | CMARK_EXPORT 569 | char *cmark_render_latex(cmark_node *root, int options, int width); 570 | 571 | /** 572 | * ## Options 573 | */ 574 | 575 | /** Default options. 576 | */ 577 | #define CMARK_OPT_DEFAULT 0 578 | 579 | /** 580 | * ### Options affecting rendering 581 | */ 582 | 583 | /** Include a `data-sourcepos` attribute on all block elements. 584 | */ 585 | #define CMARK_OPT_SOURCEPOS (1 << 1) 586 | 587 | /** Render `softbreak` elements as hard line breaks. 588 | */ 589 | #define CMARK_OPT_HARDBREAKS (1 << 2) 590 | 591 | /** `CMARK_OPT_SAFE` is defined here for API compatibility, 592 | but it no longer has any effect. "Safe" mode is now the default: 593 | set `CMARK_OPT_UNSAFE` to disable it. 594 | */ 595 | #define CMARK_OPT_SAFE (1 << 3) 596 | 597 | /** Render raw HTML and unsafe links (`javascript:`, `vbscript:`, 598 | * `file:`, and `data:`, except for `image/png`, `image/gif`, 599 | * `image/jpeg`, or `image/webp` mime types). By default, 600 | * raw HTML is replaced by a placeholder HTML comment. Unsafe 601 | * links are replaced by empty strings. 602 | */ 603 | #define CMARK_OPT_UNSAFE (1 << 17) 604 | 605 | /** Render `softbreak` elements as spaces. 606 | */ 607 | #define CMARK_OPT_NOBREAKS (1 << 4) 608 | 609 | /** 610 | * ### Options affecting parsing 611 | */ 612 | 613 | /** Legacy option (no effect). 614 | */ 615 | #define CMARK_OPT_NORMALIZE (1 << 8) 616 | 617 | /** Validate UTF-8 in the input before parsing, replacing illegal 618 | * sequences with the replacement character U+FFFD. 619 | */ 620 | #define CMARK_OPT_VALIDATE_UTF8 (1 << 9) 621 | 622 | /** Convert straight quotes to curly, `---` to em dashes, `--` to en dashes. 623 | */ 624 | #define CMARK_OPT_SMART (1 << 10) 625 | 626 | /** 627 | * ## Version information 628 | */ 629 | 630 | /** The library version as integer for runtime checks. Also available as 631 | * macro CMARK_VERSION for compile time checks. 632 | * 633 | * * Bits 16-23 contain the major version. 634 | * * Bits 8-15 contain the minor version. 635 | * * Bits 0-7 contain the patchlevel. 636 | * 637 | * In hexadecimal format, the number 0x010203 represents version 1.2.3. 638 | */ 639 | CMARK_EXPORT 640 | int cmark_version(void); 641 | 642 | /** The library version string for runtime checks. Also available as 643 | * macro CMARK_VERSION_STRING for compile time checks. 644 | */ 645 | CMARK_EXPORT 646 | const char *cmark_version_string(void); 647 | 648 | /** # AUTHORS 649 | * 650 | * John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. 651 | */ 652 | 653 | #ifndef CMARK_NO_SHORT_NAMES 654 | #define NODE_DOCUMENT CMARK_NODE_DOCUMENT 655 | #define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE 656 | #define NODE_LIST CMARK_NODE_LIST 657 | #define NODE_ITEM CMARK_NODE_ITEM 658 | #define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK 659 | #define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK 660 | #define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK 661 | #define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH 662 | #define NODE_HEADING CMARK_NODE_HEADING 663 | #define NODE_HEADER CMARK_NODE_HEADER 664 | #define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK 665 | #define NODE_HRULE CMARK_NODE_HRULE 666 | #define NODE_TEXT CMARK_NODE_TEXT 667 | #define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK 668 | #define NODE_LINEBREAK CMARK_NODE_LINEBREAK 669 | #define NODE_CODE CMARK_NODE_CODE 670 | #define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE 671 | #define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE 672 | #define NODE_EMPH CMARK_NODE_EMPH 673 | #define NODE_STRONG CMARK_NODE_STRONG 674 | #define NODE_LINK CMARK_NODE_LINK 675 | #define NODE_IMAGE CMARK_NODE_IMAGE 676 | #define BULLET_LIST CMARK_BULLET_LIST 677 | #define ORDERED_LIST CMARK_ORDERED_LIST 678 | #define PERIOD_DELIM CMARK_PERIOD_DELIM 679 | #define PAREN_DELIM CMARK_PAREN_DELIM 680 | #endif 681 | 682 | #ifdef __cplusplus 683 | } 684 | #endif 685 | 686 | #endif 687 | -------------------------------------------------------------------------------- /ext/cmark_ctype.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "cmark_ctype.h" 4 | 5 | /** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other 6 | */ 7 | static const uint8_t cmark_ctype_class[256] = { 8 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 9 | /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 10 | /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11 | /* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12 | /* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 13 | /* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 14 | /* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 15 | /* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 16 | /* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0, 17 | /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18 | /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19 | /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20 | /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 | /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 | /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 | /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 | /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 25 | 26 | /** 27 | * Returns 1 if c is a "whitespace" character as defined by the spec. 28 | */ 29 | int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; } 30 | 31 | /** 32 | * Returns 1 if c is an ascii punctuation character. 33 | */ 34 | int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; } 35 | 36 | int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; } 37 | 38 | int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; } 39 | -------------------------------------------------------------------------------- /ext/cmark_ctype.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_CMARK_CTYPE_H 2 | #define CMARK_CMARK_CTYPE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | /** Locale-independent versions of functions from ctype.h. 9 | * We want cmark to behave the same no matter what the system locale. 10 | */ 11 | 12 | int cmark_isspace(char c); 13 | 14 | int cmark_ispunct(char c); 15 | 16 | int cmark_isdigit(char c); 17 | 18 | int cmark_isalpha(char c); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /ext/cmark_export.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CMARK_EXPORT_H 3 | #define CMARK_EXPORT_H 4 | 5 | #ifdef CMARK_STATIC_DEFINE 6 | # define CMARK_EXPORT 7 | # define CMARK_NO_EXPORT 8 | #else 9 | # ifndef CMARK_EXPORT 10 | # ifdef cmark_EXPORTS 11 | /* We are building this library */ 12 | # define CMARK_EXPORT __attribute__((visibility("default"))) 13 | # else 14 | /* We are using this library */ 15 | # define CMARK_EXPORT __attribute__((visibility("default"))) 16 | # endif 17 | # endif 18 | 19 | # ifndef CMARK_NO_EXPORT 20 | # define CMARK_NO_EXPORT __attribute__((visibility("hidden"))) 21 | # endif 22 | #endif 23 | 24 | #ifndef CMARK_DEPRECATED 25 | # define CMARK_DEPRECATED __attribute__ ((__deprecated__)) 26 | #endif 27 | 28 | #ifndef CMARK_DEPRECATED_EXPORT 29 | # define CMARK_DEPRECATED_EXPORT CMARK_EXPORT CMARK_DEPRECATED 30 | #endif 31 | 32 | #ifndef CMARK_DEPRECATED_NO_EXPORT 33 | # define CMARK_DEPRECATED_NO_EXPORT CMARK_NO_EXPORT CMARK_DEPRECATED 34 | #endif 35 | 36 | #if 0 /* DEFINE_NO_DEPRECATED */ 37 | # ifndef CMARK_NO_DEPRECATED 38 | # define CMARK_NO_DEPRECATED 39 | # endif 40 | #endif 41 | 42 | #endif /* CMARK_EXPORT_H */ 43 | -------------------------------------------------------------------------------- /ext/cmark_version.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_VERSION_H 2 | #define CMARK_VERSION_H 3 | 4 | #define CMARK_VERSION ((0 << 16) | (31 << 8) | 1) 5 | #define CMARK_VERSION_STRING "0.31.1" 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /ext/commonmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "cmark.h" 9 | #include "node.h" 10 | #include "buffer.h" 11 | #include "utf8.h" 12 | #include "scanners.h" 13 | #include "render.h" 14 | 15 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 16 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) 17 | #define CR() renderer->cr(renderer) 18 | #define BLANKLINE() renderer->blankline(renderer) 19 | #define ENCODED_SIZE 20 20 | #define LISTMARKER_SIZE 20 21 | 22 | // Functions to convert cmark_nodes to commonmark strings. 23 | 24 | static inline void outc(cmark_renderer *renderer, cmark_escaping escape, 25 | int32_t c, unsigned char nextc) { 26 | bool needs_escaping = false; 27 | bool follows_digit = 28 | renderer->buffer->size > 0 && 29 | cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); 30 | char encoded[ENCODED_SIZE]; 31 | int options = renderer->options; 32 | 33 | needs_escaping = 34 | c < 0x80 && escape != LITERAL && 35 | ((escape == NORMAL && 36 | (c < 0x20 || 37 | c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || 38 | c == '>' || c == '\\' || c == '`' || 39 | (c == '!' && (!nextc || nextc == '[')) || 40 | (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || 41 | ((CMARK_OPT_SMART & options) && 42 | ((c == '-' && nextc == '-') || 43 | (c == '.' && nextc == '.') || 44 | c == '"' || c == '\'')) || 45 | (renderer->begin_content && (c == '-' || c == '+' || c == '=') && 46 | // begin_content doesn't get set to false til we've passed digits 47 | // at the beginning of line, so... 48 | !follows_digit) || 49 | (renderer->begin_content && (c == '.' || c == ')') && follows_digit && 50 | (nextc == 0 || cmark_isspace(nextc))))) || 51 | (escape == URL && 52 | (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || 53 | c == ')' || c == '(')) || 54 | (escape == TITLE && 55 | (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); 56 | 57 | if (needs_escaping) { 58 | if (escape == URL && cmark_isspace(c)) { 59 | // use percent encoding for spaces 60 | snprintf(encoded, ENCODED_SIZE, "%%%2X", c); 61 | cmark_strbuf_puts(renderer->buffer, encoded); 62 | renderer->column += 3; 63 | } else if (cmark_ispunct(c)) { 64 | cmark_render_ascii(renderer, "\\"); 65 | cmark_render_code_point(renderer, c); 66 | } else { // render as entity 67 | snprintf(encoded, ENCODED_SIZE, "&#%d;", c); 68 | cmark_strbuf_puts(renderer->buffer, encoded); 69 | renderer->column += (int)strlen(encoded); 70 | } 71 | } else { 72 | cmark_render_code_point(renderer, c); 73 | } 74 | } 75 | 76 | static int longest_backtick_sequence(const char *code) { 77 | int longest = 0; 78 | int current = 0; 79 | size_t i = 0; 80 | size_t code_len = strlen(code); 81 | while (i <= code_len) { 82 | if (code[i] == '`') { 83 | current++; 84 | } else { 85 | if (current > longest) { 86 | longest = current; 87 | } 88 | current = 0; 89 | } 90 | i++; 91 | } 92 | return longest; 93 | } 94 | 95 | static int shortest_unused_backtick_sequence(const char *code) { 96 | // note: if the shortest sequence is >= 32, this returns 32 97 | // so as not to overflow the bit array. 98 | uint32_t used = 1; 99 | int current = 0; 100 | size_t i = 0; 101 | size_t code_len = strlen(code); 102 | while (i <= code_len) { 103 | if (code[i] == '`') { 104 | current++; 105 | } else { 106 | if (current > 0 && current < 32) { 107 | used |= (1U << current); 108 | } 109 | current = 0; 110 | } 111 | i++; 112 | } 113 | // return number of first bit that is 0: 114 | i = 0; 115 | while (i < 32 && used & 1) { 116 | used = used >> 1; 117 | i++; 118 | } 119 | return (int)i; 120 | } 121 | 122 | static bool is_autolink(cmark_node *node) { 123 | const unsigned char *title; 124 | const unsigned char *url; 125 | cmark_node *link_text; 126 | 127 | if (node->type != CMARK_NODE_LINK) { 128 | return false; 129 | } 130 | 131 | url = node->as.link.url; 132 | if (url == NULL || _scan_scheme(url) == 0) { 133 | return false; 134 | } 135 | 136 | title = node->as.link.title; 137 | // if it has a title, we can't treat it as an autolink: 138 | if (title && title[0]) { 139 | return false; 140 | } 141 | 142 | link_text = node->first_child; 143 | if (link_text == NULL) { 144 | return false; 145 | } 146 | cmark_consolidate_text_nodes(link_text); 147 | if (strncmp((const char *)url, "mailto:", 7) == 0) { 148 | url += 7; 149 | } 150 | return link_text->data != NULL && 151 | strcmp((const char *)url, (char *)link_text->data) == 0; 152 | } 153 | 154 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, 155 | cmark_event_type ev_type, int options) { 156 | cmark_node *tmp; 157 | int list_number; 158 | cmark_delim_type list_delim; 159 | size_t numticks; 160 | bool extra_spaces; 161 | size_t i; 162 | bool entering = (ev_type == CMARK_EVENT_ENTER); 163 | const char *info, *code, *title; 164 | char fencechar[2] = {'\0', '\0'}; 165 | size_t code_len; 166 | char listmarker[LISTMARKER_SIZE]; 167 | const char *emph_delim; 168 | bool first_in_list_item; 169 | bufsize_t marker_width; 170 | bool has_nonspace; 171 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && 172 | !(CMARK_OPT_HARDBREAKS & options); 173 | 174 | // Don't adjust tight list status til we've started the list. 175 | // Otherwise we lose the blank line between a paragraph and 176 | // a following list. 177 | if (entering) { 178 | if (node->parent && node->parent->type == CMARK_NODE_ITEM) { 179 | renderer->in_tight_list_item = node->parent->parent->as.list.tight; 180 | } 181 | } else { 182 | if (node->type == CMARK_NODE_LIST) { 183 | renderer->in_tight_list_item = 184 | node->parent && 185 | node->parent->type == CMARK_NODE_ITEM && 186 | node->parent->parent->as.list.tight; 187 | } 188 | } 189 | 190 | switch (node->type) { 191 | case CMARK_NODE_DOCUMENT: 192 | break; 193 | 194 | case CMARK_NODE_BLOCK_QUOTE: 195 | if (entering) { 196 | LIT("> "); 197 | renderer->begin_content = true; 198 | cmark_strbuf_puts(renderer->prefix, "> "); 199 | } else { 200 | cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); 201 | BLANKLINE(); 202 | } 203 | break; 204 | 205 | case CMARK_NODE_LIST: 206 | if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) { 207 | // this ensures that a following indented code block or list will be 208 | // inteprereted correctly. 209 | CR(); 210 | LIT(""); 211 | BLANKLINE(); 212 | } 213 | break; 214 | 215 | case CMARK_NODE_ITEM: 216 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 217 | marker_width = 4; 218 | } else { 219 | list_number = cmark_node_get_list_start(node->parent); 220 | list_delim = cmark_node_get_list_delim(node->parent); 221 | tmp = node; 222 | while (tmp->prev) { 223 | tmp = tmp->prev; 224 | list_number += 1; 225 | } 226 | // we ensure a width of at least 4 so 227 | // we get nice transition from single digits 228 | // to double 229 | snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, 230 | list_delim == CMARK_PAREN_DELIM ? ")" : ".", 231 | list_number < 10 ? " " : " "); 232 | marker_width = (bufsize_t)strlen(listmarker); 233 | } 234 | if (entering) { 235 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 236 | LIT(" - "); 237 | renderer->begin_content = true; 238 | } else { 239 | LIT(listmarker); 240 | renderer->begin_content = true; 241 | } 242 | for (i = marker_width; i--;) { 243 | cmark_strbuf_putc(renderer->prefix, ' '); 244 | } 245 | } else { 246 | cmark_strbuf_truncate(renderer->prefix, 247 | renderer->prefix->size - marker_width); 248 | CR(); 249 | } 250 | break; 251 | 252 | case CMARK_NODE_HEADING: 253 | if (entering) { 254 | for (i = cmark_node_get_heading_level(node); i > 0; i--) { 255 | LIT("#"); 256 | } 257 | LIT(" "); 258 | renderer->begin_content = true; 259 | renderer->no_linebreaks = true; 260 | } else { 261 | renderer->no_linebreaks = false; 262 | BLANKLINE(); 263 | } 264 | break; 265 | 266 | case CMARK_NODE_CODE_BLOCK: 267 | 268 | first_in_list_item = node->prev == NULL && node->parent && 269 | node->parent->type == CMARK_NODE_ITEM; 270 | 271 | if (!first_in_list_item) { 272 | BLANKLINE(); 273 | } 274 | info = cmark_node_get_fence_info(node); 275 | fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; 276 | code = cmark_node_get_literal(node); 277 | 278 | numticks = longest_backtick_sequence(code) + 1; 279 | if (numticks < 3) { 280 | numticks = 3; 281 | } 282 | for (i = 0; i < numticks; i++) { 283 | LIT(fencechar); 284 | } 285 | LIT(" "); 286 | OUT(info, false, LITERAL); 287 | CR(); 288 | OUT(cmark_node_get_literal(node), false, LITERAL); 289 | CR(); 290 | for (i = 0; i < numticks; i++) { 291 | LIT(fencechar); 292 | } 293 | 294 | BLANKLINE(); 295 | break; 296 | 297 | case CMARK_NODE_HTML_BLOCK: 298 | BLANKLINE(); 299 | OUT(cmark_node_get_literal(node), false, LITERAL); 300 | BLANKLINE(); 301 | break; 302 | 303 | case CMARK_NODE_CUSTOM_BLOCK: 304 | BLANKLINE(); 305 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 306 | false, LITERAL); 307 | BLANKLINE(); 308 | break; 309 | 310 | case CMARK_NODE_THEMATIC_BREAK: 311 | BLANKLINE(); 312 | LIT("-----"); 313 | BLANKLINE(); 314 | break; 315 | 316 | case CMARK_NODE_PARAGRAPH: 317 | if (!entering) { 318 | BLANKLINE(); 319 | } 320 | break; 321 | 322 | case CMARK_NODE_TEXT: 323 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 324 | break; 325 | 326 | case CMARK_NODE_LINEBREAK: 327 | if (!(CMARK_OPT_HARDBREAKS & options)) { 328 | LIT(" "); 329 | } 330 | CR(); 331 | break; 332 | 333 | case CMARK_NODE_SOFTBREAK: 334 | if (CMARK_OPT_HARDBREAKS & options) { 335 | LIT(" "); 336 | CR(); 337 | } else if (!renderer->no_linebreaks && renderer->width == 0 && 338 | !(CMARK_OPT_HARDBREAKS & options) && 339 | !(CMARK_OPT_NOBREAKS & options)) { 340 | CR(); 341 | } else { 342 | OUT(" ", allow_wrap, LITERAL); 343 | } 344 | break; 345 | 346 | case CMARK_NODE_CODE: 347 | code = cmark_node_get_literal(node); 348 | code_len = strlen(code); 349 | numticks = shortest_unused_backtick_sequence(code); 350 | has_nonspace = false; 351 | for (i=0; i < code_len; i++) { 352 | if (code[i] != ' ') { 353 | has_nonspace = true; 354 | break; 355 | } 356 | } 357 | extra_spaces = code_len == 0 || 358 | code[0] == '`' || code[code_len - 1] == '`' || 359 | (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' '); 360 | for (i = 0; i < numticks; i++) { 361 | LIT("`"); 362 | } 363 | if (extra_spaces) { 364 | LIT(" "); 365 | } 366 | OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); 367 | if (extra_spaces) { 368 | LIT(" "); 369 | } 370 | for (i = 0; i < numticks; i++) { 371 | LIT("`"); 372 | } 373 | break; 374 | 375 | case CMARK_NODE_HTML_INLINE: 376 | OUT(cmark_node_get_literal(node), false, LITERAL); 377 | break; 378 | 379 | case CMARK_NODE_CUSTOM_INLINE: 380 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 381 | false, LITERAL); 382 | break; 383 | 384 | case CMARK_NODE_STRONG: 385 | if (entering) { 386 | LIT("**"); 387 | } else { 388 | LIT("**"); 389 | } 390 | break; 391 | 392 | case CMARK_NODE_EMPH: 393 | // If we have EMPH(EMPH(x)), we need to use *_x_* 394 | // because **x** is STRONG(x): 395 | if (node->parent && node->parent->type == CMARK_NODE_EMPH && 396 | node->next == NULL && node->prev == NULL) { 397 | emph_delim = "_"; 398 | } else { 399 | emph_delim = "*"; 400 | } 401 | if (entering) { 402 | LIT(emph_delim); 403 | } else { 404 | LIT(emph_delim); 405 | } 406 | break; 407 | 408 | case CMARK_NODE_LINK: 409 | if (is_autolink(node)) { 410 | if (entering) { 411 | LIT("<"); 412 | if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) { 413 | LIT((const char *)cmark_node_get_url(node) + 7); 414 | } else { 415 | LIT((const char *)cmark_node_get_url(node)); 416 | } 417 | LIT(">"); 418 | // return signal to skip contents of node... 419 | return 0; 420 | } 421 | } else { 422 | if (entering) { 423 | LIT("["); 424 | } else { 425 | LIT("]("); 426 | OUT(cmark_node_get_url(node), false, URL); 427 | title = cmark_node_get_title(node); 428 | if (strlen(title) > 0) { 429 | LIT(" \""); 430 | OUT(title, false, TITLE); 431 | LIT("\""); 432 | } 433 | LIT(")"); 434 | } 435 | } 436 | break; 437 | 438 | case CMARK_NODE_IMAGE: 439 | if (entering) { 440 | LIT("!["); 441 | } else { 442 | LIT("]("); 443 | OUT(cmark_node_get_url(node), false, URL); 444 | title = cmark_node_get_title(node); 445 | if (strlen(title) > 0) { 446 | OUT(" \"", allow_wrap, LITERAL); 447 | OUT(title, false, TITLE); 448 | LIT("\""); 449 | } 450 | LIT(")"); 451 | } 452 | break; 453 | 454 | default: 455 | assert(false); 456 | break; 457 | } 458 | 459 | return 1; 460 | } 461 | 462 | char *cmark_render_commonmark(cmark_node *root, int options, int width) { 463 | if (options & CMARK_OPT_HARDBREAKS) { 464 | // disable breaking on width, since it has 465 | // a different meaning with OPT_HARDBREAKS 466 | width = 0; 467 | } 468 | return cmark_render(root, options, width, outc, S_render_node); 469 | } 470 | -------------------------------------------------------------------------------- /ext/houdini.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_HOUDINI_H 2 | #define CMARK_HOUDINI_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | #include "buffer.h" 11 | 12 | #ifdef HOUDINI_USE_LOCALE 13 | #define _isxdigit(c) isxdigit(c) 14 | #define _isdigit(c) isdigit(c) 15 | #else 16 | /* 17 | * Helper _isdigit methods -- do not trust the current locale 18 | * */ 19 | #define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) 20 | #define _isdigit(c) ((c) >= '0' && (c) <= '9') 21 | #endif 22 | 23 | #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10) 24 | #define HOUDINI_UNESCAPED_SIZE(x) (x) 25 | 26 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, 27 | bufsize_t size); 28 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, 29 | bufsize_t size, int secure); 30 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, 31 | bufsize_t size); 32 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, 33 | bufsize_t size); 34 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, 35 | bufsize_t size); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /ext/houdini_href_e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "houdini.h" 6 | 7 | #if !defined(__has_builtin) 8 | # define __has_builtin(b) 0 9 | #endif 10 | 11 | #if !__has_builtin(__builtin_expect) 12 | # define __builtin_expect(e, v) (e) 13 | #endif 14 | 15 | #define likely(e) __builtin_expect((e), 1) 16 | #define unlikely(e) __builtin_expect((e), 0) 17 | 18 | /* 19 | * The following characters will not be escaped: 20 | * 21 | * -_.+!*'(),%#@?=;:/,+&$~ alphanum 22 | * 23 | * Note that this character set is the addition of: 24 | * 25 | * - The characters which are safe to be in an URL 26 | * - The characters which are *not* safe to be in 27 | * an URL because they are RESERVED characters. 28 | * 29 | * We assume (lazily) that any RESERVED char that 30 | * appears inside an URL is actually meant to 31 | * have its native function (i.e. as an URL 32 | * component/separator) and hence needs no escaping. 33 | * 34 | * There are two exceptions: the characters & (amp) 35 | * and ' (single quote) do not appear in the table. 36 | * They are meant to appear in the URL as components, 37 | * yet they require special HTML-entity escaping 38 | * to generate valid HTML markup. 39 | * 40 | * All other characters will be escaped to %XX. 41 | * 42 | */ 43 | static const char HREF_SAFE[] = { 44 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 46 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 48 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 49 | 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55 | }; 56 | 57 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { 58 | static const uint8_t hex_chars[] = "0123456789ABCDEF"; 59 | bufsize_t i = 0, org; 60 | uint8_t hex_str[3]; 61 | 62 | hex_str[0] = '%'; 63 | 64 | while (i < size) { 65 | org = i; 66 | while (i < size && HREF_SAFE[src[i]] != 0) 67 | i++; 68 | 69 | if (likely(i > org)) 70 | cmark_strbuf_put(ob, src + org, i - org); 71 | 72 | /* escaping */ 73 | if (i >= size) 74 | break; 75 | 76 | switch (src[i]) { 77 | /* amp appears all the time in URLs, but needs 78 | * HTML-entity escaping to be inside an href */ 79 | case '&': 80 | cmark_strbuf_puts(ob, "&"); 81 | break; 82 | 83 | /* the single quote is a valid URL character 84 | * according to the standard; it needs HTML 85 | * entity escaping too */ 86 | case '\'': 87 | cmark_strbuf_puts(ob, "'"); 88 | break; 89 | 90 | /* the space can be escaped to %20 or a plus 91 | * sign. we're going with the generic escape 92 | * for now. the plus thing is more commonly seen 93 | * when building GET strings */ 94 | #if 0 95 | case ' ': 96 | cmark_strbuf_putc(ob, '+'); 97 | break; 98 | #endif 99 | 100 | /* every other character goes with a %XX escaping */ 101 | default: 102 | hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; 103 | hex_str[2] = hex_chars[src[i] & 0xF]; 104 | cmark_strbuf_put(ob, hex_str, 3); 105 | } 106 | 107 | i++; 108 | } 109 | 110 | return 1; 111 | } 112 | -------------------------------------------------------------------------------- /ext/houdini_html_e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "houdini.h" 6 | 7 | #if !defined(__has_builtin) 8 | # define __has_builtin(b) 0 9 | #endif 10 | 11 | #if !__has_builtin(__builtin_expect) 12 | # define __builtin_expect(e, v) (e) 13 | #endif 14 | 15 | #define likely(e) __builtin_expect((e), 1) 16 | #define unlikely(e) __builtin_expect((e), 0) 17 | 18 | /** 19 | * According to the OWASP rules: 20 | * 21 | * & --> & 22 | * < --> < 23 | * > --> > 24 | * " --> " 25 | * ' --> ' ' is not recommended 26 | * / --> / forward slash is included as it helps end an HTML entity 27 | * 28 | */ 29 | static const char HTML_ESCAPE_TABLE[] = { 30 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 32 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 | }; 42 | 43 | static const char *HTML_ESCAPES[] = {"", """, "&", "'", 44 | "/", "<", ">"}; 45 | 46 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, 47 | int secure) { 48 | bufsize_t i = 0, org, esc = 0; 49 | 50 | while (i < size) { 51 | org = i; 52 | while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) 53 | i++; 54 | 55 | if (i > org) 56 | cmark_strbuf_put(ob, src + org, i - org); 57 | 58 | /* escaping */ 59 | if (unlikely(i >= size)) 60 | break; 61 | 62 | /* The forward slash is only escaped in secure mode */ 63 | if ((src[i] == '/' || src[i] == '\'') && !secure) { 64 | cmark_strbuf_putc(ob, src[i]); 65 | } else { 66 | cmark_strbuf_puts(ob, HTML_ESCAPES[esc]); 67 | } 68 | 69 | i++; 70 | } 71 | 72 | return 1; 73 | } 74 | -------------------------------------------------------------------------------- /ext/houdini_html_u.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "buffer.h" 6 | #include "houdini.h" 7 | #include "utf8.h" 8 | #include "entities.inc" 9 | 10 | #if !defined(__has_builtin) 11 | # define __has_builtin(b) 0 12 | #endif 13 | 14 | #if !__has_builtin(__builtin_expect) 15 | # define __builtin_expect(e, v) (e) 16 | #endif 17 | 18 | #define likely(e) __builtin_expect((e), 1) 19 | #define unlikely(e) __builtin_expect((e), 0) 20 | 21 | /* Binary tree lookup code for entities added by JGM */ 22 | 23 | static const unsigned char *S_lookup(int i, int low, int hi, 24 | const unsigned char *s, int len, 25 | bufsize_t *size_out) { 26 | int j; 27 | uint32_t value = cmark_entities[i]; 28 | const unsigned char *ent_name = cmark_entity_text + ENT_TEXT_IDX(value); 29 | int ent_len = ENT_NAME_SIZE(value); 30 | int min_len = len < ent_len ? len : ent_len; 31 | int cmp = 32 | strncmp((const char *)s, (const char *)ent_name, min_len); 33 | if (cmp == 0) 34 | cmp = len - ent_len; 35 | if (cmp == 0) { 36 | *size_out = ENT_REPL_SIZE(value); 37 | return ent_name + ent_len; 38 | } else if (cmp <= 0 && i > low) { 39 | j = i - ((i - low) / 2); 40 | if (j == i) 41 | j -= 1; 42 | return S_lookup(j, low, i - 1, s, len, size_out); 43 | } else if (cmp > 0 && i < hi) { 44 | j = i + ((hi - i) / 2); 45 | if (j == i) 46 | j += 1; 47 | return S_lookup(j, i + 1, hi, s, len, size_out); 48 | } else { 49 | return NULL; 50 | } 51 | } 52 | 53 | static const unsigned char *S_lookup_entity(const unsigned char *s, int len, 54 | bufsize_t *size_out) { 55 | return S_lookup(ENT_TABLE_SIZE / 2, 0, ENT_TABLE_SIZE - 1, s, len, size_out); 56 | } 57 | 58 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, 59 | bufsize_t size) { 60 | bufsize_t i = 0; 61 | 62 | if (size >= 3 && src[0] == '#') { 63 | int codepoint = 0; 64 | int num_digits = 0; 65 | int max_digits = 7; 66 | 67 | if (_isdigit(src[1])) { 68 | for (i = 1; i < size && _isdigit(src[i]); ++i) { 69 | codepoint = (codepoint * 10) + (src[i] - '0'); 70 | 71 | if (codepoint >= 0x110000) { 72 | // Keep counting digits but 73 | // avoid integer overflow. 74 | codepoint = 0x110000; 75 | } 76 | } 77 | 78 | num_digits = i - 1; 79 | max_digits = 7; 80 | } 81 | 82 | else if (src[1] == 'x' || src[1] == 'X') { 83 | for (i = 2; i < size && _isxdigit(src[i]); ++i) { 84 | codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); 85 | 86 | if (codepoint >= 0x110000) { 87 | // Keep counting digits but 88 | // avoid integer overflow. 89 | codepoint = 0x110000; 90 | } 91 | } 92 | 93 | num_digits = i - 2; 94 | max_digits = 6; 95 | } 96 | 97 | if (num_digits >= 1 && num_digits <= max_digits && 98 | i < size && src[i] == ';') { 99 | if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) || 100 | codepoint >= 0x110000) { 101 | codepoint = 0xFFFD; 102 | } 103 | cmark_utf8proc_encode_char(codepoint, ob); 104 | return i + 1; 105 | } 106 | } 107 | 108 | else { 109 | if (size > ENT_MAX_LENGTH) 110 | size = ENT_MAX_LENGTH; 111 | 112 | for (i = ENT_MIN_LENGTH; i < size; ++i) { 113 | if (src[i] == ' ') 114 | break; 115 | 116 | if (src[i] == ';') { 117 | bufsize_t size; 118 | const unsigned char *entity = S_lookup_entity(src, i, &size); 119 | 120 | if (entity != NULL) { 121 | cmark_strbuf_put(ob, entity, size); 122 | return i + 1; 123 | } 124 | 125 | break; 126 | } 127 | } 128 | } 129 | 130 | return 0; 131 | } 132 | 133 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, 134 | bufsize_t size) { 135 | bufsize_t i = 0, org, ent; 136 | 137 | while (i < size) { 138 | org = i; 139 | while (i < size && src[i] != '&') 140 | i++; 141 | 142 | if (likely(i > org)) { 143 | if (unlikely(org == 0)) { 144 | if (i >= size) 145 | return 0; 146 | 147 | cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); 148 | } 149 | 150 | cmark_strbuf_put(ob, src + org, i - org); 151 | } 152 | 153 | /* escaping */ 154 | if (i >= size) 155 | break; 156 | 157 | i++; 158 | 159 | ent = houdini_unescape_ent(ob, src + i, size - i); 160 | i += ent; 161 | 162 | /* not really an entity */ 163 | if (ent == 0) 164 | cmark_strbuf_putc(ob, '&'); 165 | } 166 | 167 | return 1; 168 | } 169 | 170 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, 171 | bufsize_t size) { 172 | if (!houdini_unescape_html(ob, src, size)) 173 | cmark_strbuf_put(ob, src, size); 174 | } 175 | -------------------------------------------------------------------------------- /ext/html.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cmark_ctype.h" 8 | #include "cmark.h" 9 | #include "node.h" 10 | #include "buffer.h" 11 | #include "houdini.h" 12 | #include "scanners.h" 13 | 14 | #define BUFFER_SIZE 100 15 | 16 | // Functions to convert cmark_nodes to HTML strings. 17 | 18 | static void escape_html(cmark_strbuf *dest, const unsigned char *source, 19 | bufsize_t length) { 20 | houdini_escape_html(dest, source, length, 0); 21 | } 22 | 23 | static inline void cr(cmark_strbuf *html) { 24 | if (html->size && html->ptr[html->size - 1] != '\n') 25 | cmark_strbuf_putc(html, '\n'); 26 | } 27 | 28 | struct render_state { 29 | cmark_strbuf *html; 30 | cmark_node *plain; 31 | }; 32 | 33 | static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html, 34 | int options) { 35 | char buffer[BUFFER_SIZE]; 36 | if (CMARK_OPT_SOURCEPOS & options) { 37 | snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"", 38 | cmark_node_get_start_line(node), cmark_node_get_start_column(node), 39 | cmark_node_get_end_line(node), cmark_node_get_end_column(node)); 40 | cmark_strbuf_puts(html, buffer); 41 | } 42 | } 43 | 44 | static int S_render_node(cmark_node *node, cmark_event_type ev_type, 45 | struct render_state *state, int options) { 46 | cmark_node *parent; 47 | cmark_node *grandparent; 48 | cmark_strbuf *html = state->html; 49 | char start_heading[] = "plain == node) { // back at original node 57 | state->plain = NULL; 58 | } 59 | 60 | if (state->plain != NULL) { 61 | switch (node->type) { 62 | case CMARK_NODE_TEXT: 63 | case CMARK_NODE_CODE: 64 | case CMARK_NODE_HTML_INLINE: 65 | escape_html(html, node->data, node->len); 66 | break; 67 | 68 | case CMARK_NODE_LINEBREAK: 69 | case CMARK_NODE_SOFTBREAK: 70 | cmark_strbuf_putc(html, ' '); 71 | break; 72 | 73 | default: 74 | break; 75 | } 76 | return 1; 77 | } 78 | 79 | switch (node->type) { 80 | case CMARK_NODE_DOCUMENT: 81 | break; 82 | 83 | case CMARK_NODE_BLOCK_QUOTE: 84 | if (entering) { 85 | cr(html); 86 | cmark_strbuf_puts(html, "\n"); 89 | } else { 90 | cr(html); 91 | cmark_strbuf_puts(html, "\n"); 92 | } 93 | break; 94 | 95 | case CMARK_NODE_LIST: { 96 | cmark_list_type list_type = (cmark_list_type)node->as.list.list_type; 97 | int start = node->as.list.start; 98 | 99 | if (entering) { 100 | cr(html); 101 | if (list_type == CMARK_BULLET_LIST) { 102 | cmark_strbuf_puts(html, "\n"); 105 | } else if (start == 1) { 106 | cmark_strbuf_puts(html, "\n"); 109 | } else { 110 | snprintf(buffer, BUFFER_SIZE, "
    \n"); 114 | } 115 | } else { 116 | cmark_strbuf_puts(html, 117 | list_type == CMARK_BULLET_LIST ? "\n" : "
\n"); 118 | } 119 | break; 120 | } 121 | 122 | case CMARK_NODE_ITEM: 123 | if (entering) { 124 | cr(html); 125 | cmark_strbuf_puts(html, "'); 128 | } else { 129 | cmark_strbuf_puts(html, "\n"); 130 | } 131 | break; 132 | 133 | case CMARK_NODE_HEADING: 134 | if (entering) { 135 | cr(html); 136 | start_heading[2] = (char)('0' + node->as.heading.level); 137 | cmark_strbuf_puts(html, start_heading); 138 | S_render_sourcepos(node, html, options); 139 | cmark_strbuf_putc(html, '>'); 140 | } else { 141 | end_heading[3] = (char)('0' + node->as.heading.level); 142 | cmark_strbuf_puts(html, end_heading); 143 | cmark_strbuf_puts(html, ">\n"); 144 | } 145 | break; 146 | 147 | case CMARK_NODE_CODE_BLOCK: 148 | cr(html); 149 | 150 | if (node->as.code.info == NULL || node->as.code.info[0] == 0) { 151 | cmark_strbuf_puts(html, ""); 154 | } else { 155 | bufsize_t first_tag = 0; 156 | while (node->as.code.info[first_tag] && 157 | !cmark_isspace(node->as.code.info[first_tag])) { 158 | first_tag += 1; 159 | } 160 | 161 | cmark_strbuf_puts(html, "as.code.info, "language-", 9) != 0) { 165 | cmark_strbuf_puts(html, "language-"); 166 | } 167 | escape_html(html, node->as.code.info, first_tag); 168 | cmark_strbuf_puts(html, "\">"); 169 | } 170 | 171 | escape_html(html, node->data, node->len); 172 | cmark_strbuf_puts(html, "\n"); 173 | break; 174 | 175 | case CMARK_NODE_HTML_BLOCK: 176 | cr(html); 177 | if (!(options & CMARK_OPT_UNSAFE)) { 178 | cmark_strbuf_puts(html, ""); 179 | } else { 180 | cmark_strbuf_put(html, node->data, node->len); 181 | } 182 | cr(html); 183 | break; 184 | 185 | case CMARK_NODE_CUSTOM_BLOCK: { 186 | unsigned char *block = entering ? node->as.custom.on_enter : 187 | node->as.custom.on_exit; 188 | cr(html); 189 | if (block) { 190 | cmark_strbuf_puts(html, (char *)block); 191 | } 192 | cr(html); 193 | break; 194 | } 195 | 196 | case CMARK_NODE_THEMATIC_BREAK: 197 | cr(html); 198 | cmark_strbuf_puts(html, "\n"); 201 | break; 202 | 203 | case CMARK_NODE_PARAGRAPH: 204 | parent = cmark_node_parent(node); 205 | grandparent = cmark_node_parent(parent); 206 | if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) { 207 | tight = grandparent->as.list.tight; 208 | } else { 209 | tight = false; 210 | } 211 | if (!tight) { 212 | if (entering) { 213 | cr(html); 214 | cmark_strbuf_puts(html, "'); 217 | } else { 218 | cmark_strbuf_puts(html, "

\n"); 219 | } 220 | } 221 | break; 222 | 223 | case CMARK_NODE_TEXT: 224 | escape_html(html, node->data, node->len); 225 | break; 226 | 227 | case CMARK_NODE_LINEBREAK: 228 | cmark_strbuf_puts(html, "
\n"); 229 | break; 230 | 231 | case CMARK_NODE_SOFTBREAK: 232 | if (options & CMARK_OPT_HARDBREAKS) { 233 | cmark_strbuf_puts(html, "
\n"); 234 | } else if (options & CMARK_OPT_NOBREAKS) { 235 | cmark_strbuf_putc(html, ' '); 236 | } else { 237 | cmark_strbuf_putc(html, '\n'); 238 | } 239 | break; 240 | 241 | case CMARK_NODE_CODE: 242 | cmark_strbuf_puts(html, ""); 243 | escape_html(html, node->data, node->len); 244 | cmark_strbuf_puts(html, ""); 245 | break; 246 | 247 | case CMARK_NODE_HTML_INLINE: 248 | if (!(options & CMARK_OPT_UNSAFE)) { 249 | cmark_strbuf_puts(html, ""); 250 | } else { 251 | cmark_strbuf_put(html, node->data, node->len); 252 | } 253 | break; 254 | 255 | case CMARK_NODE_CUSTOM_INLINE: { 256 | unsigned char *block = entering ? node->as.custom.on_enter : 257 | node->as.custom.on_exit; 258 | if (block) { 259 | cmark_strbuf_puts(html, (char *)block); 260 | } 261 | break; 262 | } 263 | 264 | case CMARK_NODE_STRONG: 265 | if (entering) { 266 | cmark_strbuf_puts(html, ""); 267 | } else { 268 | cmark_strbuf_puts(html, ""); 269 | } 270 | break; 271 | 272 | case CMARK_NODE_EMPH: 273 | if (entering) { 274 | cmark_strbuf_puts(html, ""); 275 | } else { 276 | cmark_strbuf_puts(html, ""); 277 | } 278 | break; 279 | 280 | case CMARK_NODE_LINK: 281 | if (entering) { 282 | cmark_strbuf_puts(html, "as.link.url && ((options & CMARK_OPT_UNSAFE) || 284 | !(_scan_dangerous_url(node->as.link.url)))) { 285 | houdini_escape_href(html, node->as.link.url, 286 | (bufsize_t)strlen((char *)node->as.link.url)); 287 | } 288 | if (node->as.link.title) { 289 | cmark_strbuf_puts(html, "\" title=\""); 290 | escape_html(html, node->as.link.title, 291 | (bufsize_t)strlen((char *)node->as.link.title)); 292 | } 293 | cmark_strbuf_puts(html, "\">"); 294 | } else { 295 | cmark_strbuf_puts(html, ""); 296 | } 297 | break; 298 | 299 | case CMARK_NODE_IMAGE: 300 | if (entering) { 301 | cmark_strbuf_puts(html, "as.link.url && ((options & CMARK_OPT_UNSAFE) || 303 | !(_scan_dangerous_url(node->as.link.url)))) { 304 | houdini_escape_href(html, node->as.link.url, 305 | (bufsize_t)strlen((char *)node->as.link.url)); 306 | } 307 | cmark_strbuf_puts(html, "\" alt=\""); 308 | state->plain = node; 309 | } else { 310 | if (node->as.link.title) { 311 | cmark_strbuf_puts(html, "\" title=\""); 312 | escape_html(html, node->as.link.title, 313 | (bufsize_t)strlen((char *)node->as.link.title)); 314 | } 315 | 316 | cmark_strbuf_puts(html, "\" />"); 317 | } 318 | break; 319 | 320 | default: 321 | assert(false); 322 | break; 323 | } 324 | 325 | // cmark_strbuf_putc(html, 'x'); 326 | return 1; 327 | } 328 | 329 | char *cmark_render_html(cmark_node *root, int options) { 330 | char *result; 331 | cmark_strbuf html = CMARK_BUF_INIT(root->mem); 332 | cmark_event_type ev_type; 333 | cmark_node *cur; 334 | struct render_state state = {&html, NULL}; 335 | cmark_iter *iter = cmark_iter_new(root); 336 | 337 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 338 | cur = cmark_iter_get_node(iter); 339 | S_render_node(cur, ev_type, &state, options); 340 | } 341 | result = (char *)cmark_strbuf_detach(&html); 342 | 343 | cmark_iter_free(iter); 344 | return result; 345 | } 346 | -------------------------------------------------------------------------------- /ext/inlines.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_INLINES_H 2 | #define CMARK_INLINES_H 3 | 4 | #include "chunk.h" 5 | #include "references.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url); 12 | unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title); 13 | 14 | void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, 15 | cmark_reference_map *refmap, int options); 16 | 17 | bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, 18 | cmark_reference_map *refmap); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /ext/iterator.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "node.h" 6 | #include "cmark.h" 7 | #include "iterator.h" 8 | 9 | static const int S_leaf_mask = 10 | (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) | 11 | (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) | 12 | (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) | 13 | (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE); 14 | 15 | cmark_iter *cmark_iter_new(cmark_node *root) { 16 | if (root == NULL) { 17 | return NULL; 18 | } 19 | cmark_mem *mem = root->mem; 20 | cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter)); 21 | iter->mem = mem; 22 | iter->root = root; 23 | iter->cur.ev_type = CMARK_EVENT_NONE; 24 | iter->cur.node = NULL; 25 | iter->next.ev_type = CMARK_EVENT_ENTER; 26 | iter->next.node = root; 27 | return iter; 28 | } 29 | 30 | void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); } 31 | 32 | static bool S_is_leaf(cmark_node *node) { 33 | return ((1 << node->type) & S_leaf_mask) != 0; 34 | } 35 | 36 | cmark_event_type cmark_iter_next(cmark_iter *iter) { 37 | cmark_event_type ev_type = iter->next.ev_type; 38 | cmark_node *node = iter->next.node; 39 | 40 | iter->cur.ev_type = ev_type; 41 | iter->cur.node = node; 42 | 43 | if (ev_type == CMARK_EVENT_DONE) { 44 | return ev_type; 45 | } 46 | 47 | /* roll forward to next item, setting both fields */ 48 | if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) { 49 | if (node->first_child == NULL) { 50 | /* stay on this node but exit */ 51 | iter->next.ev_type = CMARK_EVENT_EXIT; 52 | } else { 53 | iter->next.ev_type = CMARK_EVENT_ENTER; 54 | iter->next.node = node->first_child; 55 | } 56 | } else if (node == iter->root) { 57 | /* don't move past root */ 58 | iter->next.ev_type = CMARK_EVENT_DONE; 59 | iter->next.node = NULL; 60 | } else if (node->next) { 61 | iter->next.ev_type = CMARK_EVENT_ENTER; 62 | iter->next.node = node->next; 63 | } else if (node->parent) { 64 | iter->next.ev_type = CMARK_EVENT_EXIT; 65 | iter->next.node = node->parent; 66 | } else { 67 | assert(false); 68 | iter->next.ev_type = CMARK_EVENT_DONE; 69 | iter->next.node = NULL; 70 | } 71 | 72 | return ev_type; 73 | } 74 | 75 | void cmark_iter_reset(cmark_iter *iter, cmark_node *current, 76 | cmark_event_type event_type) { 77 | iter->next.ev_type = event_type; 78 | iter->next.node = current; 79 | cmark_iter_next(iter); 80 | } 81 | 82 | cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; } 83 | 84 | cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) { 85 | return iter->cur.ev_type; 86 | } 87 | 88 | cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; } 89 | 90 | void cmark_consolidate_text_nodes(cmark_node *root) { 91 | if (root == NULL) { 92 | return; 93 | } 94 | cmark_iter *iter = cmark_iter_new(root); 95 | cmark_strbuf buf = CMARK_BUF_INIT(iter->mem); 96 | cmark_event_type ev_type; 97 | cmark_node *cur, *tmp, *next; 98 | 99 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 100 | cur = cmark_iter_get_node(iter); 101 | if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT && 102 | cur->next && cur->next->type == CMARK_NODE_TEXT) { 103 | cmark_strbuf_clear(&buf); 104 | cmark_strbuf_put(&buf, cur->data, cur->len); 105 | tmp = cur->next; 106 | while (tmp && tmp->type == CMARK_NODE_TEXT) { 107 | cmark_iter_next(iter); // advance pointer 108 | cmark_strbuf_put(&buf, tmp->data, tmp->len); 109 | cur->end_column = tmp->end_column; 110 | next = tmp->next; 111 | cmark_node_free(tmp); 112 | tmp = next; 113 | } 114 | iter->mem->free(cur->data); 115 | cur->len = buf.size; 116 | cur->data = cmark_strbuf_detach(&buf); 117 | } 118 | } 119 | 120 | cmark_strbuf_free(&buf); 121 | cmark_iter_free(iter); 122 | } 123 | -------------------------------------------------------------------------------- /ext/iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_ITERATOR_H 2 | #define CMARK_ITERATOR_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "cmark.h" 9 | 10 | typedef struct { 11 | cmark_event_type ev_type; 12 | cmark_node *node; 13 | } cmark_iter_state; 14 | 15 | struct cmark_iter { 16 | cmark_mem *mem; 17 | cmark_node *root; 18 | cmark_iter_state cur; 19 | cmark_iter_state next; 20 | }; 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /ext/latex.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | #include "utf8.h" 11 | #include "scanners.h" 12 | #include "render.h" 13 | 14 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 15 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) 16 | #define CR() renderer->cr(renderer) 17 | #define BLANKLINE() renderer->blankline(renderer) 18 | #define LIST_NUMBER_STRING_SIZE 20 19 | 20 | static inline void outc(cmark_renderer *renderer, cmark_escaping escape, 21 | int32_t c, unsigned char nextc) { 22 | if (escape == LITERAL) { 23 | cmark_render_code_point(renderer, c); 24 | return; 25 | } 26 | 27 | switch (c) { 28 | case 123: // '{' 29 | case 125: // '}' 30 | case 35: // '#' 31 | case 37: // '%' 32 | case 38: // '&' 33 | cmark_render_ascii(renderer, "\\"); 34 | cmark_render_code_point(renderer, c); 35 | break; 36 | case 36: // '$' 37 | case 95: // '_' 38 | if (escape == NORMAL) { 39 | cmark_render_ascii(renderer, "\\"); 40 | } 41 | cmark_render_code_point(renderer, c); 42 | break; 43 | case 45: // '-' 44 | if (nextc == 45) { // prevent ligature 45 | cmark_render_ascii(renderer, "-{}"); 46 | } else { 47 | cmark_render_ascii(renderer, "-"); 48 | } 49 | break; 50 | case 126: // '~' 51 | if (escape == NORMAL) { 52 | cmark_render_ascii(renderer, "\\textasciitilde{}"); 53 | } else { 54 | cmark_render_code_point(renderer, c); 55 | } 56 | break; 57 | case 94: // '^' 58 | cmark_render_ascii(renderer, "\\^{}"); 59 | break; 60 | case 92: // '\\' 61 | if (escape == URL) { 62 | // / acts as path sep even on windows: 63 | cmark_render_ascii(renderer, "/"); 64 | } else { 65 | cmark_render_ascii(renderer, "\\textbackslash{}"); 66 | } 67 | break; 68 | case 124: // '|' 69 | cmark_render_ascii(renderer, "\\textbar{}"); 70 | break; 71 | case 60: // '<' 72 | cmark_render_ascii(renderer, "\\textless{}"); 73 | break; 74 | case 62: // '>' 75 | cmark_render_ascii(renderer, "\\textgreater{}"); 76 | break; 77 | case 91: // '[' 78 | case 93: // ']' 79 | cmark_render_ascii(renderer, "{"); 80 | cmark_render_code_point(renderer, c); 81 | cmark_render_ascii(renderer, "}"); 82 | break; 83 | case 34: // '"' 84 | cmark_render_ascii(renderer, "\\textquotedbl{}"); 85 | // requires \usepackage[T1]{fontenc} 86 | break; 87 | case 39: // '\'' 88 | cmark_render_ascii(renderer, "\\textquotesingle{}"); 89 | // requires \usepackage{textcomp} 90 | break; 91 | case 160: // nbsp 92 | cmark_render_ascii(renderer, "~"); 93 | break; 94 | case 8230: // hellip 95 | cmark_render_ascii(renderer, "\\ldots{}"); 96 | break; 97 | case 8216: // lsquo 98 | if (escape == NORMAL) { 99 | cmark_render_ascii(renderer, "`"); 100 | } else { 101 | cmark_render_code_point(renderer, c); 102 | } 103 | break; 104 | case 8217: // rsquo 105 | if (escape == NORMAL) { 106 | cmark_render_ascii(renderer, "\'"); 107 | } else { 108 | cmark_render_code_point(renderer, c); 109 | } 110 | break; 111 | case 8220: // ldquo 112 | if (escape == NORMAL) { 113 | cmark_render_ascii(renderer, "``"); 114 | } else { 115 | cmark_render_code_point(renderer, c); 116 | } 117 | break; 118 | case 8221: // rdquo 119 | if (escape == NORMAL) { 120 | cmark_render_ascii(renderer, "''"); 121 | } else { 122 | cmark_render_code_point(renderer, c); 123 | } 124 | break; 125 | case 8212: // emdash 126 | if (escape == NORMAL) { 127 | cmark_render_ascii(renderer, "---"); 128 | } else { 129 | cmark_render_code_point(renderer, c); 130 | } 131 | break; 132 | case 8211: // endash 133 | if (escape == NORMAL) { 134 | cmark_render_ascii(renderer, "--"); 135 | } else { 136 | cmark_render_code_point(renderer, c); 137 | } 138 | break; 139 | default: 140 | cmark_render_code_point(renderer, c); 141 | } 142 | } 143 | 144 | typedef enum { 145 | NO_LINK, 146 | URL_AUTOLINK, 147 | EMAIL_AUTOLINK, 148 | NORMAL_LINK, 149 | INTERNAL_LINK 150 | } link_type; 151 | 152 | static link_type get_link_type(cmark_node *node) { 153 | size_t title_len, url_len; 154 | cmark_node *link_text; 155 | char *realurl; 156 | int realurllen; 157 | bool isemail = false; 158 | 159 | if (node->type != CMARK_NODE_LINK) { 160 | return NO_LINK; 161 | } 162 | 163 | const char *url = cmark_node_get_url(node); 164 | cmark_chunk url_chunk = cmark_chunk_literal(url); 165 | 166 | if (url && *url == '#') { 167 | return INTERNAL_LINK; 168 | } 169 | 170 | url_len = strlen(url); 171 | if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) { 172 | return NO_LINK; 173 | } 174 | 175 | const char *title = cmark_node_get_title(node); 176 | title_len = strlen(title); 177 | // if it has a title, we can't treat it as an autolink: 178 | if (title_len == 0) { 179 | 180 | link_text = node->first_child; 181 | cmark_consolidate_text_nodes(link_text); 182 | 183 | if (!link_text) 184 | return NO_LINK; 185 | 186 | realurl = (char *)url; 187 | realurllen = (int)url_len; 188 | if (strncmp(realurl, "mailto:", 7) == 0) { 189 | realurl += 7; 190 | realurllen -= 7; 191 | isemail = true; 192 | } 193 | if (realurllen == link_text->len && 194 | strncmp(realurl, (char *)link_text->data, 195 | link_text->len) == 0) { 196 | if (isemail) { 197 | return EMAIL_AUTOLINK; 198 | } else { 199 | return URL_AUTOLINK; 200 | } 201 | } 202 | } 203 | 204 | return NORMAL_LINK; 205 | } 206 | 207 | static int S_get_enumlevel(cmark_node *node) { 208 | int enumlevel = 0; 209 | cmark_node *tmp = node; 210 | while (tmp) { 211 | if (tmp->type == CMARK_NODE_LIST && 212 | cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) { 213 | enumlevel++; 214 | } 215 | tmp = tmp->parent; 216 | } 217 | return enumlevel; 218 | } 219 | 220 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, 221 | cmark_event_type ev_type, int options) { 222 | int list_number; 223 | int enumlevel; 224 | char list_number_string[LIST_NUMBER_STRING_SIZE]; 225 | bool entering = (ev_type == CMARK_EVENT_ENTER); 226 | cmark_list_type list_type; 227 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); 228 | 229 | // avoid warning about unused parameter: 230 | (void)(options); 231 | 232 | switch (node->type) { 233 | case CMARK_NODE_DOCUMENT: 234 | break; 235 | 236 | case CMARK_NODE_BLOCK_QUOTE: 237 | if (entering) { 238 | LIT("\\begin{quote}"); 239 | CR(); 240 | } else { 241 | LIT("\\end{quote}"); 242 | BLANKLINE(); 243 | } 244 | break; 245 | 246 | case CMARK_NODE_LIST: 247 | list_type = cmark_node_get_list_type(node); 248 | if (entering) { 249 | LIT("\\begin{"); 250 | LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); 251 | LIT("}"); 252 | CR(); 253 | list_number = cmark_node_get_list_start(node); 254 | if (list_number > 1) { 255 | enumlevel = S_get_enumlevel(node); 256 | // latex normally supports only five levels 257 | if (enumlevel >= 1 && enumlevel <= 5) { 258 | snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d", 259 | list_number - 1); // the next item will increment this 260 | LIT("\\setcounter{enum"); 261 | switch (enumlevel) { 262 | case 1: LIT("i"); break; 263 | case 2: LIT("ii"); break; 264 | case 3: LIT("iii"); break; 265 | case 4: LIT("iv"); break; 266 | case 5: LIT("v"); break; 267 | default: LIT("i"); break; 268 | } 269 | LIT("}{"); 270 | OUT(list_number_string, false, NORMAL); 271 | LIT("}"); 272 | } 273 | CR(); 274 | } 275 | } else { 276 | LIT("\\end{"); 277 | LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); 278 | LIT("}"); 279 | BLANKLINE(); 280 | } 281 | break; 282 | 283 | case CMARK_NODE_ITEM: 284 | if (entering) { 285 | LIT("\\item "); 286 | } else { 287 | CR(); 288 | } 289 | break; 290 | 291 | case CMARK_NODE_HEADING: 292 | if (entering) { 293 | switch (cmark_node_get_heading_level(node)) { 294 | case 1: 295 | LIT("\\section"); 296 | break; 297 | case 2: 298 | LIT("\\subsection"); 299 | break; 300 | case 3: 301 | LIT("\\subsubsection"); 302 | break; 303 | case 4: 304 | LIT("\\paragraph"); 305 | break; 306 | case 5: 307 | LIT("\\subparagraph"); 308 | break; 309 | } 310 | LIT("{"); 311 | } else { 312 | LIT("}"); 313 | BLANKLINE(); 314 | } 315 | break; 316 | 317 | case CMARK_NODE_CODE_BLOCK: 318 | CR(); 319 | LIT("\\begin{verbatim}"); 320 | CR(); 321 | OUT(cmark_node_get_literal(node), false, LITERAL); 322 | CR(); 323 | LIT("\\end{verbatim}"); 324 | BLANKLINE(); 325 | break; 326 | 327 | case CMARK_NODE_HTML_BLOCK: 328 | break; 329 | 330 | case CMARK_NODE_CUSTOM_BLOCK: 331 | CR(); 332 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 333 | false, LITERAL); 334 | CR(); 335 | break; 336 | 337 | case CMARK_NODE_THEMATIC_BREAK: 338 | BLANKLINE(); 339 | LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"); 340 | BLANKLINE(); 341 | break; 342 | 343 | case CMARK_NODE_PARAGRAPH: 344 | if (!entering) { 345 | BLANKLINE(); 346 | } 347 | break; 348 | 349 | case CMARK_NODE_TEXT: 350 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 351 | break; 352 | 353 | case CMARK_NODE_LINEBREAK: 354 | LIT("\\\\"); 355 | CR(); 356 | break; 357 | 358 | case CMARK_NODE_SOFTBREAK: 359 | if (options & CMARK_OPT_HARDBREAKS) { 360 | LIT("\\\\"); 361 | CR(); 362 | } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { 363 | CR(); 364 | } else { 365 | OUT(" ", allow_wrap, NORMAL); 366 | } 367 | break; 368 | 369 | case CMARK_NODE_CODE: 370 | LIT("\\texttt{"); 371 | OUT(cmark_node_get_literal(node), false, NORMAL); 372 | LIT("}"); 373 | break; 374 | 375 | case CMARK_NODE_HTML_INLINE: 376 | break; 377 | 378 | case CMARK_NODE_CUSTOM_INLINE: 379 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 380 | false, LITERAL); 381 | break; 382 | 383 | case CMARK_NODE_STRONG: 384 | if (entering) { 385 | LIT("\\textbf{"); 386 | } else { 387 | LIT("}"); 388 | } 389 | break; 390 | 391 | case CMARK_NODE_EMPH: 392 | if (entering) { 393 | LIT("\\emph{"); 394 | } else { 395 | LIT("}"); 396 | } 397 | break; 398 | 399 | case CMARK_NODE_LINK: 400 | if (entering) { 401 | const char *url = cmark_node_get_url(node); 402 | // requires \usepackage{hyperref} 403 | switch (get_link_type(node)) { 404 | case URL_AUTOLINK: 405 | LIT("\\url{"); 406 | OUT(url, false, URL); 407 | LIT("}"); 408 | return 0; // Don't process further nodes to avoid double-rendering artefacts 409 | case EMAIL_AUTOLINK: 410 | LIT("\\href{"); 411 | OUT(url, false, URL); 412 | LIT("}\\nolinkurl{"); 413 | break; 414 | case NORMAL_LINK: 415 | LIT("\\href{"); 416 | OUT(url, false, URL); 417 | LIT("}{"); 418 | break; 419 | case INTERNAL_LINK: 420 | LIT("\\protect\\hyperlink{"); 421 | OUT(url + 1, false, URL); 422 | LIT("}{"); 423 | break; 424 | case NO_LINK: 425 | LIT("{"); // error? 426 | } 427 | } else { 428 | LIT("}"); 429 | } 430 | 431 | break; 432 | 433 | case CMARK_NODE_IMAGE: 434 | if (entering) { 435 | LIT("\\protect\\includegraphics{"); 436 | // requires \include{graphicx} 437 | OUT(cmark_node_get_url(node), false, URL); 438 | LIT("}"); 439 | return 0; 440 | } 441 | break; 442 | 443 | default: 444 | assert(false); 445 | break; 446 | } 447 | 448 | return 1; 449 | } 450 | 451 | char *cmark_render_latex(cmark_node *root, int options, int width) { 452 | return cmark_render(root, options, width, outc, S_render_node); 453 | } 454 | -------------------------------------------------------------------------------- /ext/man.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | #include "utf8.h" 11 | #include "render.h" 12 | 13 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 14 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) 15 | #define CR() renderer->cr(renderer) 16 | #define BLANKLINE() renderer->blankline(renderer) 17 | #define LIST_NUMBER_SIZE 20 18 | 19 | // Functions to convert cmark_nodes to groff man strings. 20 | static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, 21 | unsigned char nextc) { 22 | (void)(nextc); 23 | 24 | if (escape == LITERAL) { 25 | cmark_render_code_point(renderer, c); 26 | return; 27 | } 28 | 29 | switch (c) { 30 | case 46: 31 | if (renderer->begin_line) { 32 | cmark_render_ascii(renderer, "\\&."); 33 | } else { 34 | cmark_render_code_point(renderer, c); 35 | } 36 | break; 37 | case 39: 38 | if (renderer->begin_line) { 39 | cmark_render_ascii(renderer, "\\&'"); 40 | } else { 41 | cmark_render_code_point(renderer, c); 42 | } 43 | break; 44 | case 45: 45 | cmark_render_ascii(renderer, "\\-"); 46 | break; 47 | case 92: 48 | cmark_render_ascii(renderer, "\\e"); 49 | break; 50 | case 8216: // left single quote 51 | cmark_render_ascii(renderer, "\\[oq]"); 52 | break; 53 | case 8217: // right single quote 54 | cmark_render_ascii(renderer, "\\[cq]"); 55 | break; 56 | case 8220: // left double quote 57 | cmark_render_ascii(renderer, "\\[lq]"); 58 | break; 59 | case 8221: // right double quote 60 | cmark_render_ascii(renderer, "\\[rq]"); 61 | break; 62 | case 8212: // em dash 63 | cmark_render_ascii(renderer, "\\[em]"); 64 | break; 65 | case 8211: // en dash 66 | cmark_render_ascii(renderer, "\\[en]"); 67 | break; 68 | default: 69 | cmark_render_code_point(renderer, c); 70 | } 71 | } 72 | 73 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, 74 | cmark_event_type ev_type, int options) { 75 | cmark_node *tmp; 76 | int list_number; 77 | bool entering = (ev_type == CMARK_EVENT_ENTER); 78 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); 79 | struct block_number *new_block_number; 80 | cmark_mem *allocator = cmark_get_default_mem_allocator(); 81 | 82 | // avoid unused parameter error: 83 | (void)(options); 84 | 85 | // indent inside nested lists 86 | if (renderer->block_number_in_list_item && 87 | node->type < CMARK_NODE_FIRST_INLINE) { 88 | if (entering) { 89 | renderer->block_number_in_list_item->number += 1; 90 | if (renderer->block_number_in_list_item->number == 2) { 91 | CR(); 92 | LIT(".RS"); // indent 93 | CR(); 94 | } 95 | } 96 | } 97 | 98 | switch (node->type) { 99 | case CMARK_NODE_DOCUMENT: 100 | break; 101 | 102 | case CMARK_NODE_BLOCK_QUOTE: 103 | if (entering) { 104 | CR(); 105 | LIT(".RS"); 106 | CR(); 107 | } else { 108 | CR(); 109 | LIT(".RE"); 110 | CR(); 111 | } 112 | break; 113 | 114 | case CMARK_NODE_LIST: 115 | break; 116 | 117 | case CMARK_NODE_ITEM: 118 | if (entering) { 119 | new_block_number = allocator->calloc(1, sizeof(struct block_number)); 120 | new_block_number->number = 0; 121 | new_block_number->parent = renderer->block_number_in_list_item; 122 | renderer->block_number_in_list_item = new_block_number; 123 | CR(); 124 | LIT(".IP "); 125 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 126 | LIT("\\[bu] 2"); 127 | } else { 128 | list_number = cmark_node_get_list_start(node->parent); 129 | tmp = node; 130 | while (tmp->prev) { 131 | tmp = tmp->prev; 132 | list_number += 1; 133 | } 134 | char list_number_s[LIST_NUMBER_SIZE]; 135 | snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number); 136 | LIT(list_number_s); 137 | } 138 | CR(); 139 | } else { 140 | if (renderer->block_number_in_list_item) { 141 | if (renderer->block_number_in_list_item->number >= 2) { 142 | CR(); 143 | LIT(".RE"); // de-indent 144 | } 145 | new_block_number = renderer->block_number_in_list_item; 146 | renderer->block_number_in_list_item = 147 | renderer->block_number_in_list_item->parent; 148 | allocator->free(new_block_number); 149 | } 150 | CR(); 151 | } 152 | break; 153 | 154 | case CMARK_NODE_HEADING: 155 | if (entering) { 156 | CR(); 157 | LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS"); 158 | CR(); 159 | } else { 160 | CR(); 161 | } 162 | break; 163 | 164 | case CMARK_NODE_CODE_BLOCK: 165 | CR(); 166 | LIT(".IP\n.nf\n\\f[C]\n"); 167 | OUT(cmark_node_get_literal(node), false, NORMAL); 168 | CR(); 169 | LIT("\\f[]\n.fi"); 170 | CR(); 171 | break; 172 | 173 | case CMARK_NODE_HTML_BLOCK: 174 | break; 175 | 176 | case CMARK_NODE_CUSTOM_BLOCK: 177 | CR(); 178 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 179 | false, LITERAL); 180 | CR(); 181 | break; 182 | 183 | case CMARK_NODE_THEMATIC_BREAK: 184 | CR(); 185 | LIT(".PP\n * * * * *"); 186 | CR(); 187 | break; 188 | 189 | case CMARK_NODE_PARAGRAPH: 190 | if (entering) { 191 | // no blank line if first paragraph in list: 192 | if (node->parent && node->parent->type == CMARK_NODE_ITEM && 193 | node->prev == NULL) { 194 | // no blank line or .PP 195 | } else { 196 | CR(); 197 | LIT(".PP"); 198 | CR(); 199 | } 200 | } else { 201 | CR(); 202 | } 203 | break; 204 | 205 | case CMARK_NODE_TEXT: 206 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 207 | break; 208 | 209 | case CMARK_NODE_LINEBREAK: 210 | LIT(".PD 0\n.P\n.PD"); 211 | CR(); 212 | break; 213 | 214 | case CMARK_NODE_SOFTBREAK: 215 | if (options & CMARK_OPT_HARDBREAKS) { 216 | LIT(".PD 0\n.P\n.PD"); 217 | CR(); 218 | } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { 219 | CR(); 220 | } else { 221 | OUT(" ", allow_wrap, LITERAL); 222 | } 223 | break; 224 | 225 | case CMARK_NODE_CODE: 226 | LIT("\\f[C]"); 227 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 228 | LIT("\\f[]"); 229 | break; 230 | 231 | case CMARK_NODE_HTML_INLINE: 232 | break; 233 | 234 | case CMARK_NODE_CUSTOM_INLINE: 235 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 236 | false, LITERAL); 237 | break; 238 | 239 | case CMARK_NODE_STRONG: 240 | if (entering) { 241 | LIT("\\f[B]"); 242 | } else { 243 | LIT("\\f[]"); 244 | } 245 | break; 246 | 247 | case CMARK_NODE_EMPH: 248 | if (entering) { 249 | LIT("\\f[I]"); 250 | } else { 251 | LIT("\\f[]"); 252 | } 253 | break; 254 | 255 | case CMARK_NODE_LINK: 256 | if (!entering) { 257 | LIT(" ("); 258 | OUT(cmark_node_get_url(node), allow_wrap, URL); 259 | LIT(")"); 260 | } 261 | break; 262 | 263 | case CMARK_NODE_IMAGE: 264 | if (entering) { 265 | LIT("[IMAGE: "); 266 | } else { 267 | LIT("]"); 268 | } 269 | break; 270 | 271 | default: 272 | assert(false); 273 | break; 274 | } 275 | 276 | return 1; 277 | } 278 | 279 | char *cmark_render_man(cmark_node *root, int options, int width) { 280 | return cmark_render(root, options, width, S_outc, S_render_node); 281 | } 282 | -------------------------------------------------------------------------------- /ext/node.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "node.h" 6 | 7 | static void S_node_unlink(cmark_node *node); 8 | 9 | static inline bool S_is_block(cmark_node *node) { 10 | if (node == NULL) { 11 | return false; 12 | } 13 | return node->type >= CMARK_NODE_FIRST_BLOCK && 14 | node->type <= CMARK_NODE_LAST_BLOCK; 15 | } 16 | 17 | static inline bool S_is_inline(cmark_node *node) { 18 | if (node == NULL) { 19 | return false; 20 | } 21 | return node->type >= CMARK_NODE_FIRST_INLINE && 22 | node->type <= CMARK_NODE_LAST_INLINE; 23 | } 24 | 25 | static bool S_can_contain(cmark_node *node, cmark_node *child) { 26 | if (node == NULL || child == NULL || node == child) { 27 | return false; 28 | } 29 | 30 | // Verify that child is not an ancestor of node. 31 | if (child->first_child != NULL) { 32 | cmark_node *cur = node->parent; 33 | 34 | while (cur != NULL) { 35 | if (cur == child) { 36 | return false; 37 | } 38 | cur = cur->parent; 39 | } 40 | } 41 | 42 | if (child->type == CMARK_NODE_DOCUMENT) { 43 | return false; 44 | } 45 | 46 | switch (node->type) { 47 | case CMARK_NODE_DOCUMENT: 48 | case CMARK_NODE_BLOCK_QUOTE: 49 | case CMARK_NODE_ITEM: 50 | return S_is_block(child) && child->type != CMARK_NODE_ITEM; 51 | 52 | case CMARK_NODE_LIST: 53 | return child->type == CMARK_NODE_ITEM; 54 | 55 | case CMARK_NODE_CUSTOM_BLOCK: 56 | return true; 57 | 58 | case CMARK_NODE_PARAGRAPH: 59 | case CMARK_NODE_HEADING: 60 | case CMARK_NODE_EMPH: 61 | case CMARK_NODE_STRONG: 62 | case CMARK_NODE_LINK: 63 | case CMARK_NODE_IMAGE: 64 | case CMARK_NODE_CUSTOM_INLINE: 65 | return S_is_inline(child); 66 | 67 | default: 68 | break; 69 | } 70 | 71 | return false; 72 | } 73 | 74 | cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { 75 | cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node)); 76 | node->mem = mem; 77 | node->type = (uint16_t)type; 78 | 79 | switch (node->type) { 80 | case CMARK_NODE_HEADING: 81 | node->as.heading.level = 1; 82 | break; 83 | 84 | case CMARK_NODE_LIST: { 85 | cmark_list *list = &node->as.list; 86 | list->list_type = CMARK_BULLET_LIST; 87 | list->start = 0; 88 | list->tight = false; 89 | break; 90 | } 91 | 92 | default: 93 | break; 94 | } 95 | 96 | return node; 97 | } 98 | 99 | cmark_node *cmark_node_new(cmark_node_type type) { 100 | extern cmark_mem DEFAULT_MEM_ALLOCATOR; 101 | return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR); 102 | } 103 | 104 | // Free a cmark_node list and any children. 105 | static void S_free_nodes(cmark_node *e) { 106 | cmark_mem *mem = e->mem; 107 | cmark_node *next; 108 | while (e != NULL) { 109 | switch (e->type) { 110 | case CMARK_NODE_CODE_BLOCK: 111 | mem->free(e->data); 112 | mem->free(e->as.code.info); 113 | break; 114 | case CMARK_NODE_TEXT: 115 | case CMARK_NODE_HTML_INLINE: 116 | case CMARK_NODE_CODE: 117 | case CMARK_NODE_HTML_BLOCK: 118 | mem->free(e->data); 119 | break; 120 | case CMARK_NODE_LINK: 121 | case CMARK_NODE_IMAGE: 122 | mem->free(e->as.link.url); 123 | mem->free(e->as.link.title); 124 | break; 125 | case CMARK_NODE_CUSTOM_BLOCK: 126 | case CMARK_NODE_CUSTOM_INLINE: 127 | mem->free(e->as.custom.on_enter); 128 | mem->free(e->as.custom.on_exit); 129 | break; 130 | default: 131 | break; 132 | } 133 | if (e->last_child) { 134 | // Splice children into list 135 | e->last_child->next = e->next; 136 | e->next = e->first_child; 137 | } 138 | next = e->next; 139 | mem->free(e); 140 | e = next; 141 | } 142 | } 143 | 144 | void cmark_node_free(cmark_node *node) { 145 | S_node_unlink(node); 146 | node->next = NULL; 147 | S_free_nodes(node); 148 | } 149 | 150 | cmark_node_type cmark_node_get_type(cmark_node *node) { 151 | if (node == NULL) { 152 | return CMARK_NODE_NONE; 153 | } else { 154 | return (cmark_node_type)node->type; 155 | } 156 | } 157 | 158 | const char *cmark_node_get_type_string(cmark_node *node) { 159 | if (node == NULL) { 160 | return "NONE"; 161 | } 162 | 163 | switch (node->type) { 164 | case CMARK_NODE_NONE: 165 | return "none"; 166 | case CMARK_NODE_DOCUMENT: 167 | return "document"; 168 | case CMARK_NODE_BLOCK_QUOTE: 169 | return "block_quote"; 170 | case CMARK_NODE_LIST: 171 | return "list"; 172 | case CMARK_NODE_ITEM: 173 | return "item"; 174 | case CMARK_NODE_CODE_BLOCK: 175 | return "code_block"; 176 | case CMARK_NODE_HTML_BLOCK: 177 | return "html_block"; 178 | case CMARK_NODE_CUSTOM_BLOCK: 179 | return "custom_block"; 180 | case CMARK_NODE_PARAGRAPH: 181 | return "paragraph"; 182 | case CMARK_NODE_HEADING: 183 | return "heading"; 184 | case CMARK_NODE_THEMATIC_BREAK: 185 | return "thematic_break"; 186 | case CMARK_NODE_TEXT: 187 | return "text"; 188 | case CMARK_NODE_SOFTBREAK: 189 | return "softbreak"; 190 | case CMARK_NODE_LINEBREAK: 191 | return "linebreak"; 192 | case CMARK_NODE_CODE: 193 | return "code"; 194 | case CMARK_NODE_HTML_INLINE: 195 | return "html_inline"; 196 | case CMARK_NODE_CUSTOM_INLINE: 197 | return "custom_inline"; 198 | case CMARK_NODE_EMPH: 199 | return "emph"; 200 | case CMARK_NODE_STRONG: 201 | return "strong"; 202 | case CMARK_NODE_LINK: 203 | return "link"; 204 | case CMARK_NODE_IMAGE: 205 | return "image"; 206 | } 207 | 208 | return ""; 209 | } 210 | 211 | cmark_node *cmark_node_next(cmark_node *node) { 212 | if (node == NULL) { 213 | return NULL; 214 | } else { 215 | return node->next; 216 | } 217 | } 218 | 219 | cmark_node *cmark_node_previous(cmark_node *node) { 220 | if (node == NULL) { 221 | return NULL; 222 | } else { 223 | return node->prev; 224 | } 225 | } 226 | 227 | cmark_node *cmark_node_parent(cmark_node *node) { 228 | if (node == NULL) { 229 | return NULL; 230 | } else { 231 | return node->parent; 232 | } 233 | } 234 | 235 | cmark_node *cmark_node_first_child(cmark_node *node) { 236 | if (node == NULL) { 237 | return NULL; 238 | } else { 239 | return node->first_child; 240 | } 241 | } 242 | 243 | cmark_node *cmark_node_last_child(cmark_node *node) { 244 | if (node == NULL) { 245 | return NULL; 246 | } else { 247 | return node->last_child; 248 | } 249 | } 250 | 251 | static bufsize_t cmark_set_cstr(cmark_mem *mem, unsigned char **dst, 252 | const char *src) { 253 | unsigned char *old = *dst; 254 | bufsize_t len; 255 | 256 | if (src && src[0]) { 257 | len = (bufsize_t)strlen(src); 258 | *dst = (unsigned char *)mem->realloc(NULL, len + 1); 259 | memcpy(*dst, src, len + 1); 260 | } else { 261 | len = 0; 262 | *dst = NULL; 263 | } 264 | if (old) { 265 | mem->free(old); 266 | } 267 | 268 | return len; 269 | } 270 | 271 | void *cmark_node_get_user_data(cmark_node *node) { 272 | if (node == NULL) { 273 | return NULL; 274 | } else { 275 | return node->user_data; 276 | } 277 | } 278 | 279 | int cmark_node_set_user_data(cmark_node *node, void *user_data) { 280 | if (node == NULL) { 281 | return 0; 282 | } 283 | node->user_data = user_data; 284 | return 1; 285 | } 286 | 287 | const char *cmark_node_get_literal(cmark_node *node) { 288 | if (node == NULL) { 289 | return NULL; 290 | } 291 | 292 | switch (node->type) { 293 | case CMARK_NODE_HTML_BLOCK: 294 | case CMARK_NODE_TEXT: 295 | case CMARK_NODE_HTML_INLINE: 296 | case CMARK_NODE_CODE: 297 | case CMARK_NODE_CODE_BLOCK: 298 | return node->data ? (char *)node->data : ""; 299 | 300 | default: 301 | break; 302 | } 303 | 304 | return NULL; 305 | } 306 | 307 | int cmark_node_set_literal(cmark_node *node, const char *content) { 308 | if (node == NULL) { 309 | return 0; 310 | } 311 | 312 | switch (node->type) { 313 | case CMARK_NODE_HTML_BLOCK: 314 | case CMARK_NODE_TEXT: 315 | case CMARK_NODE_HTML_INLINE: 316 | case CMARK_NODE_CODE: 317 | case CMARK_NODE_CODE_BLOCK: 318 | node->len = cmark_set_cstr(node->mem, &node->data, content); 319 | return 1; 320 | 321 | default: 322 | break; 323 | } 324 | 325 | return 0; 326 | } 327 | 328 | int cmark_node_get_heading_level(cmark_node *node) { 329 | if (node == NULL) { 330 | return 0; 331 | } 332 | 333 | switch (node->type) { 334 | case CMARK_NODE_HEADING: 335 | return node->as.heading.level; 336 | 337 | default: 338 | break; 339 | } 340 | 341 | return 0; 342 | } 343 | 344 | int cmark_node_set_heading_level(cmark_node *node, int level) { 345 | if (node == NULL || level < 1 || level > 6) { 346 | return 0; 347 | } 348 | 349 | switch (node->type) { 350 | case CMARK_NODE_HEADING: 351 | node->as.heading.level = level; 352 | return 1; 353 | 354 | default: 355 | break; 356 | } 357 | 358 | return 0; 359 | } 360 | 361 | cmark_list_type cmark_node_get_list_type(cmark_node *node) { 362 | if (node == NULL) { 363 | return CMARK_NO_LIST; 364 | } 365 | 366 | if (node->type == CMARK_NODE_LIST) { 367 | return (cmark_list_type)node->as.list.list_type; 368 | } else { 369 | return CMARK_NO_LIST; 370 | } 371 | } 372 | 373 | int cmark_node_set_list_type(cmark_node *node, cmark_list_type type) { 374 | if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) { 375 | return 0; 376 | } 377 | 378 | if (node == NULL) { 379 | return 0; 380 | } 381 | 382 | if (node->type == CMARK_NODE_LIST) { 383 | node->as.list.list_type = (unsigned char)type; 384 | return 1; 385 | } else { 386 | return 0; 387 | } 388 | } 389 | 390 | cmark_delim_type cmark_node_get_list_delim(cmark_node *node) { 391 | if (node == NULL) { 392 | return CMARK_NO_DELIM; 393 | } 394 | 395 | if (node->type == CMARK_NODE_LIST) { 396 | return (cmark_delim_type)node->as.list.delimiter; 397 | } else { 398 | return CMARK_NO_DELIM; 399 | } 400 | } 401 | 402 | int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim) { 403 | if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) { 404 | return 0; 405 | } 406 | 407 | if (node == NULL) { 408 | return 0; 409 | } 410 | 411 | if (node->type == CMARK_NODE_LIST) { 412 | node->as.list.delimiter = (unsigned char)delim; 413 | return 1; 414 | } else { 415 | return 0; 416 | } 417 | } 418 | 419 | int cmark_node_get_list_start(cmark_node *node) { 420 | if (node == NULL) { 421 | return 0; 422 | } 423 | 424 | if (node->type == CMARK_NODE_LIST) { 425 | return node->as.list.start; 426 | } else { 427 | return 0; 428 | } 429 | } 430 | 431 | int cmark_node_set_list_start(cmark_node *node, int start) { 432 | if (node == NULL || start < 0) { 433 | return 0; 434 | } 435 | 436 | if (node->type == CMARK_NODE_LIST) { 437 | node->as.list.start = start; 438 | return 1; 439 | } else { 440 | return 0; 441 | } 442 | } 443 | 444 | int cmark_node_get_list_tight(cmark_node *node) { 445 | if (node == NULL) { 446 | return 0; 447 | } 448 | 449 | if (node->type == CMARK_NODE_LIST) { 450 | return node->as.list.tight; 451 | } else { 452 | return 0; 453 | } 454 | } 455 | 456 | int cmark_node_set_list_tight(cmark_node *node, int tight) { 457 | if (node == NULL) { 458 | return 0; 459 | } 460 | 461 | if (node->type == CMARK_NODE_LIST) { 462 | node->as.list.tight = tight == 1; 463 | return 1; 464 | } else { 465 | return 0; 466 | } 467 | } 468 | 469 | const char *cmark_node_get_fence_info(cmark_node *node) { 470 | if (node == NULL) { 471 | return NULL; 472 | } 473 | 474 | if (node->type == CMARK_NODE_CODE_BLOCK) { 475 | return node->as.code.info ? (char *)node->as.code.info : ""; 476 | } else { 477 | return NULL; 478 | } 479 | } 480 | 481 | int cmark_node_set_fence_info(cmark_node *node, const char *info) { 482 | if (node == NULL) { 483 | return 0; 484 | } 485 | 486 | if (node->type == CMARK_NODE_CODE_BLOCK) { 487 | cmark_set_cstr(node->mem, &node->as.code.info, info); 488 | return 1; 489 | } else { 490 | return 0; 491 | } 492 | } 493 | 494 | const char *cmark_node_get_url(cmark_node *node) { 495 | if (node == NULL) { 496 | return NULL; 497 | } 498 | 499 | switch (node->type) { 500 | case CMARK_NODE_LINK: 501 | case CMARK_NODE_IMAGE: 502 | return node->as.link.url ? (char *)node->as.link.url : ""; 503 | default: 504 | break; 505 | } 506 | 507 | return NULL; 508 | } 509 | 510 | int cmark_node_set_url(cmark_node *node, const char *url) { 511 | if (node == NULL) { 512 | return 0; 513 | } 514 | 515 | switch (node->type) { 516 | case CMARK_NODE_LINK: 517 | case CMARK_NODE_IMAGE: 518 | cmark_set_cstr(node->mem, &node->as.link.url, url); 519 | return 1; 520 | default: 521 | break; 522 | } 523 | 524 | return 0; 525 | } 526 | 527 | const char *cmark_node_get_title(cmark_node *node) { 528 | if (node == NULL) { 529 | return NULL; 530 | } 531 | 532 | switch (node->type) { 533 | case CMARK_NODE_LINK: 534 | case CMARK_NODE_IMAGE: 535 | return node->as.link.title ? (char *)node->as.link.title : ""; 536 | default: 537 | break; 538 | } 539 | 540 | return NULL; 541 | } 542 | 543 | int cmark_node_set_title(cmark_node *node, const char *title) { 544 | if (node == NULL) { 545 | return 0; 546 | } 547 | 548 | switch (node->type) { 549 | case CMARK_NODE_LINK: 550 | case CMARK_NODE_IMAGE: 551 | cmark_set_cstr(node->mem, &node->as.link.title, title); 552 | return 1; 553 | default: 554 | break; 555 | } 556 | 557 | return 0; 558 | } 559 | 560 | const char *cmark_node_get_on_enter(cmark_node *node) { 561 | if (node == NULL) { 562 | return NULL; 563 | } 564 | 565 | switch (node->type) { 566 | case CMARK_NODE_CUSTOM_INLINE: 567 | case CMARK_NODE_CUSTOM_BLOCK: 568 | return node->as.custom.on_enter ? (char *)node->as.custom.on_enter : ""; 569 | default: 570 | break; 571 | } 572 | 573 | return NULL; 574 | } 575 | 576 | int cmark_node_set_on_enter(cmark_node *node, const char *on_enter) { 577 | if (node == NULL) { 578 | return 0; 579 | } 580 | 581 | switch (node->type) { 582 | case CMARK_NODE_CUSTOM_INLINE: 583 | case CMARK_NODE_CUSTOM_BLOCK: 584 | cmark_set_cstr(node->mem, &node->as.custom.on_enter, on_enter); 585 | return 1; 586 | default: 587 | break; 588 | } 589 | 590 | return 0; 591 | } 592 | 593 | const char *cmark_node_get_on_exit(cmark_node *node) { 594 | if (node == NULL) { 595 | return NULL; 596 | } 597 | 598 | switch (node->type) { 599 | case CMARK_NODE_CUSTOM_INLINE: 600 | case CMARK_NODE_CUSTOM_BLOCK: 601 | return node->as.custom.on_exit ? (char *)node->as.custom.on_exit : ""; 602 | default: 603 | break; 604 | } 605 | 606 | return NULL; 607 | } 608 | 609 | int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) { 610 | if (node == NULL) { 611 | return 0; 612 | } 613 | 614 | switch (node->type) { 615 | case CMARK_NODE_CUSTOM_INLINE: 616 | case CMARK_NODE_CUSTOM_BLOCK: 617 | cmark_set_cstr(node->mem, &node->as.custom.on_exit, on_exit); 618 | return 1; 619 | default: 620 | break; 621 | } 622 | 623 | return 0; 624 | } 625 | 626 | int cmark_node_get_start_line(cmark_node *node) { 627 | if (node == NULL) { 628 | return 0; 629 | } 630 | return node->start_line; 631 | } 632 | 633 | int cmark_node_get_start_column(cmark_node *node) { 634 | if (node == NULL) { 635 | return 0; 636 | } 637 | return node->start_column; 638 | } 639 | 640 | int cmark_node_get_end_line(cmark_node *node) { 641 | if (node == NULL) { 642 | return 0; 643 | } 644 | return node->end_line; 645 | } 646 | 647 | int cmark_node_get_end_column(cmark_node *node) { 648 | if (node == NULL) { 649 | return 0; 650 | } 651 | return node->end_column; 652 | } 653 | 654 | // Unlink a node without adjusting its next, prev, and parent pointers. 655 | static void S_node_unlink(cmark_node *node) { 656 | if (node == NULL) { 657 | return; 658 | } 659 | 660 | if (node->prev) { 661 | node->prev->next = node->next; 662 | } 663 | if (node->next) { 664 | node->next->prev = node->prev; 665 | } 666 | 667 | // Adjust first_child and last_child of parent. 668 | cmark_node *parent = node->parent; 669 | if (parent) { 670 | if (parent->first_child == node) { 671 | parent->first_child = node->next; 672 | } 673 | if (parent->last_child == node) { 674 | parent->last_child = node->prev; 675 | } 676 | } 677 | } 678 | 679 | void cmark_node_unlink(cmark_node *node) { 680 | S_node_unlink(node); 681 | 682 | node->next = NULL; 683 | node->prev = NULL; 684 | node->parent = NULL; 685 | } 686 | 687 | int cmark_node_insert_before(cmark_node *node, cmark_node *sibling) { 688 | if (node == NULL || sibling == NULL) { 689 | return 0; 690 | } 691 | 692 | if (!node->parent || !S_can_contain(node->parent, sibling)) { 693 | return 0; 694 | } 695 | 696 | S_node_unlink(sibling); 697 | 698 | cmark_node *old_prev = node->prev; 699 | 700 | // Insert 'sibling' between 'old_prev' and 'node'. 701 | if (old_prev) { 702 | old_prev->next = sibling; 703 | } 704 | sibling->prev = old_prev; 705 | sibling->next = node; 706 | node->prev = sibling; 707 | 708 | // Set new parent. 709 | cmark_node *parent = node->parent; 710 | sibling->parent = parent; 711 | 712 | // Adjust first_child of parent if inserted as first child. 713 | if (parent && !old_prev) { 714 | parent->first_child = sibling; 715 | } 716 | 717 | return 1; 718 | } 719 | 720 | int cmark_node_insert_after(cmark_node *node, cmark_node *sibling) { 721 | if (node == NULL || sibling == NULL) { 722 | return 0; 723 | } 724 | 725 | if (!node->parent || !S_can_contain(node->parent, sibling)) { 726 | return 0; 727 | } 728 | 729 | S_node_unlink(sibling); 730 | 731 | cmark_node *old_next = node->next; 732 | 733 | // Insert 'sibling' between 'node' and 'old_next'. 734 | if (old_next) { 735 | old_next->prev = sibling; 736 | } 737 | sibling->next = old_next; 738 | sibling->prev = node; 739 | node->next = sibling; 740 | 741 | // Set new parent. 742 | cmark_node *parent = node->parent; 743 | sibling->parent = parent; 744 | 745 | // Adjust last_child of parent if inserted as last child. 746 | if (parent && !old_next) { 747 | parent->last_child = sibling; 748 | } 749 | 750 | return 1; 751 | } 752 | 753 | int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode) { 754 | if (!cmark_node_insert_before(oldnode, newnode)) { 755 | return 0; 756 | } 757 | cmark_node_unlink(oldnode); 758 | return 1; 759 | } 760 | 761 | int cmark_node_prepend_child(cmark_node *node, cmark_node *child) { 762 | if (!S_can_contain(node, child)) { 763 | return 0; 764 | } 765 | 766 | S_node_unlink(child); 767 | 768 | cmark_node *old_first_child = node->first_child; 769 | 770 | child->next = old_first_child; 771 | child->prev = NULL; 772 | child->parent = node; 773 | node->first_child = child; 774 | 775 | if (old_first_child) { 776 | old_first_child->prev = child; 777 | } else { 778 | // Also set last_child if node previously had no children. 779 | node->last_child = child; 780 | } 781 | 782 | return 1; 783 | } 784 | 785 | int cmark_node_append_child(cmark_node *node, cmark_node *child) { 786 | if (!S_can_contain(node, child)) { 787 | return 0; 788 | } 789 | 790 | S_node_unlink(child); 791 | 792 | cmark_node *old_last_child = node->last_child; 793 | 794 | child->next = NULL; 795 | child->prev = old_last_child; 796 | child->parent = node; 797 | node->last_child = child; 798 | 799 | if (old_last_child) { 800 | old_last_child->next = child; 801 | } else { 802 | // Also set first_child if node previously had no children. 803 | node->first_child = child; 804 | } 805 | 806 | return 1; 807 | } 808 | 809 | static void S_print_error(FILE *out, cmark_node *node, const char *elem) { 810 | if (out == NULL) { 811 | return; 812 | } 813 | fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem, 814 | cmark_node_get_type_string(node), node->start_line, 815 | node->start_column); 816 | } 817 | 818 | int cmark_node_check(cmark_node *node, FILE *out) { 819 | cmark_node *cur; 820 | int errors = 0; 821 | 822 | if (!node) { 823 | return 0; 824 | } 825 | 826 | cur = node; 827 | for (;;) { 828 | if (cur->first_child) { 829 | if (cur->first_child->prev != NULL) { 830 | S_print_error(out, cur->first_child, "prev"); 831 | cur->first_child->prev = NULL; 832 | ++errors; 833 | } 834 | if (cur->first_child->parent != cur) { 835 | S_print_error(out, cur->first_child, "parent"); 836 | cur->first_child->parent = cur; 837 | ++errors; 838 | } 839 | cur = cur->first_child; 840 | continue; 841 | } 842 | 843 | next_sibling: 844 | if (cur == node) { 845 | break; 846 | } 847 | if (cur->next) { 848 | if (cur->next->prev != cur) { 849 | S_print_error(out, cur->next, "prev"); 850 | cur->next->prev = cur; 851 | ++errors; 852 | } 853 | if (cur->next->parent != cur->parent) { 854 | S_print_error(out, cur->next, "parent"); 855 | cur->next->parent = cur->parent; 856 | ++errors; 857 | } 858 | cur = cur->next; 859 | continue; 860 | } 861 | 862 | if (cur->parent->last_child != cur) { 863 | S_print_error(out, cur->parent, "last_child"); 864 | cur->parent->last_child = cur; 865 | ++errors; 866 | } 867 | cur = cur->parent; 868 | goto next_sibling; 869 | } 870 | 871 | return errors; 872 | } 873 | -------------------------------------------------------------------------------- /ext/node.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_NODE_H 2 | #define CMARK_NODE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cmark.h" 13 | #include "buffer.h" 14 | 15 | typedef struct { 16 | int marker_offset; 17 | int padding; 18 | int start; 19 | unsigned char list_type; 20 | unsigned char delimiter; 21 | unsigned char bullet_char; 22 | bool tight; 23 | } cmark_list; 24 | 25 | typedef struct { 26 | unsigned char *info; 27 | uint8_t fence_length; 28 | uint8_t fence_offset; 29 | unsigned char fence_char; 30 | int8_t fenced; 31 | } cmark_code; 32 | 33 | typedef struct { 34 | int internal_offset; 35 | int8_t level; 36 | bool setext; 37 | } cmark_heading; 38 | 39 | typedef struct { 40 | unsigned char *url; 41 | unsigned char *title; 42 | } cmark_link; 43 | 44 | typedef struct { 45 | unsigned char *on_enter; 46 | unsigned char *on_exit; 47 | } cmark_custom; 48 | 49 | enum cmark_node__internal_flags { 50 | CMARK_NODE__OPEN = (1 << 0), 51 | CMARK_NODE__LAST_LINE_BLANK = (1 << 1), 52 | CMARK_NODE__LAST_LINE_CHECKED = (1 << 2), 53 | CMARK_NODE__LIST_LAST_LINE_BLANK = (1 << 3), 54 | }; 55 | 56 | struct cmark_node { 57 | cmark_mem *mem; 58 | 59 | struct cmark_node *next; 60 | struct cmark_node *prev; 61 | struct cmark_node *parent; 62 | struct cmark_node *first_child; 63 | struct cmark_node *last_child; 64 | 65 | void *user_data; 66 | 67 | unsigned char *data; 68 | bufsize_t len; 69 | 70 | int start_line; 71 | int start_column; 72 | int end_line; 73 | int end_column; 74 | uint16_t type; 75 | uint16_t flags; 76 | 77 | union { 78 | cmark_list list; 79 | cmark_code code; 80 | cmark_heading heading; 81 | cmark_link link; 82 | cmark_custom custom; 83 | int html_block_type; 84 | } as; 85 | }; 86 | 87 | CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out); 88 | 89 | #ifdef __cplusplus 90 | } 91 | #endif 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /ext/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_AST_H 2 | #define CMARK_AST_H 3 | 4 | #include 5 | #include "references.h" 6 | #include "node.h" 7 | #include "buffer.h" 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | #define MAX_LINK_LABEL_LENGTH 1000 14 | 15 | struct cmark_parser { 16 | struct cmark_mem *mem; 17 | struct cmark_reference_map *refmap; 18 | struct cmark_node *root; 19 | struct cmark_node *current; 20 | int line_number; 21 | bufsize_t offset; 22 | bufsize_t column; 23 | bufsize_t first_nonspace; 24 | bufsize_t first_nonspace_column; 25 | bufsize_t thematic_break_kill_pos; 26 | int indent; 27 | bool blank; 28 | bool partially_consumed_tab; 29 | cmark_strbuf curline; 30 | bufsize_t last_line_length; 31 | cmark_strbuf linebuf; 32 | cmark_strbuf content; 33 | int options; 34 | bool last_buffer_ended_with_cr; 35 | unsigned int total_size; 36 | }; 37 | 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /ext/references.c: -------------------------------------------------------------------------------- 1 | #include "cmark.h" 2 | #include "utf8.h" 3 | #include "parser.h" 4 | #include "references.h" 5 | #include "inlines.h" 6 | #include "chunk.h" 7 | 8 | static void reference_free(cmark_reference_map *map, cmark_reference *ref) { 9 | cmark_mem *mem = map->mem; 10 | if (ref != NULL) { 11 | mem->free(ref->label); 12 | mem->free(ref->url); 13 | mem->free(ref->title); 14 | mem->free(ref); 15 | } 16 | } 17 | 18 | // normalize reference: collapse internal whitespace to single space, 19 | // remove leading/trailing whitespace, case fold 20 | // Return NULL if the reference name is actually empty (i.e. composed 21 | // solely from whitespace) 22 | static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) { 23 | cmark_strbuf normalized = CMARK_BUF_INIT(mem); 24 | unsigned char *result; 25 | 26 | if (ref == NULL) 27 | return NULL; 28 | 29 | if (ref->len == 0) 30 | return NULL; 31 | 32 | cmark_utf8proc_case_fold(&normalized, ref->data, ref->len); 33 | cmark_strbuf_trim(&normalized); 34 | cmark_strbuf_normalize_whitespace(&normalized); 35 | 36 | result = cmark_strbuf_detach(&normalized); 37 | assert(result); 38 | 39 | if (result[0] == '\0') { 40 | mem->free(result); 41 | return NULL; 42 | } 43 | 44 | return result; 45 | } 46 | 47 | void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, 48 | cmark_chunk *url, cmark_chunk *title) { 49 | cmark_reference *ref; 50 | unsigned char *reflabel = normalize_reference(map->mem, label); 51 | 52 | /* empty reference name, or composed from only whitespace */ 53 | if (reflabel == NULL) 54 | return; 55 | 56 | assert(map->sorted == NULL); 57 | 58 | ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); 59 | ref->label = reflabel; 60 | ref->url = cmark_clean_url(map->mem, url); 61 | ref->title = cmark_clean_title(map->mem, title); 62 | ref->age = map->size; 63 | ref->next = map->refs; 64 | 65 | if (ref->url != NULL) 66 | ref->size += (int)strlen((char*)ref->url); 67 | if (ref->title != NULL) 68 | ref->size += (int)strlen((char*)ref->title); 69 | 70 | map->refs = ref; 71 | map->size++; 72 | } 73 | 74 | static int 75 | labelcmp(const unsigned char *a, const unsigned char *b) { 76 | return strcmp((const char *)a, (const char *)b); 77 | } 78 | 79 | static int 80 | refcmp(const void *p1, const void *p2) { 81 | cmark_reference *r1 = *(cmark_reference **)p1; 82 | cmark_reference *r2 = *(cmark_reference **)p2; 83 | int res = labelcmp(r1->label, r2->label); 84 | return res ? res : ((int)r1->age - (int)r2->age); 85 | } 86 | 87 | static int 88 | refsearch(const void *label, const void *p2) { 89 | cmark_reference *ref = *(cmark_reference **)p2; 90 | return labelcmp((const unsigned char *)label, ref->label); 91 | } 92 | 93 | static void sort_references(cmark_reference_map *map) { 94 | unsigned int i = 0, last = 0, size = map->size; 95 | cmark_reference *r = map->refs, **sorted = NULL; 96 | 97 | sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *)); 98 | while (r) { 99 | sorted[i++] = r; 100 | r = r->next; 101 | } 102 | 103 | qsort(sorted, size, sizeof(cmark_reference *), refcmp); 104 | 105 | for (i = 1; i < size; i++) { 106 | if (labelcmp(sorted[i]->label, sorted[last]->label) != 0) 107 | sorted[++last] = sorted[i]; 108 | } 109 | map->sorted = sorted; 110 | map->size = last + 1; 111 | } 112 | 113 | // Returns reference if refmap contains a reference with matching 114 | // label, otherwise NULL. 115 | cmark_reference *cmark_reference_lookup(cmark_reference_map *map, 116 | cmark_chunk *label) { 117 | cmark_reference **ref = NULL; 118 | cmark_reference *r = NULL; 119 | unsigned char *norm; 120 | 121 | if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) 122 | return NULL; 123 | 124 | if (map == NULL || !map->size) 125 | return NULL; 126 | 127 | norm = normalize_reference(map->mem, label); 128 | if (norm == NULL) 129 | return NULL; 130 | 131 | if (!map->sorted) 132 | sort_references(map); 133 | 134 | ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), 135 | refsearch); 136 | map->mem->free(norm); 137 | 138 | if (ref != NULL) { 139 | r = ref[0]; 140 | /* Check for expansion limit */ 141 | if (map->max_ref_size && r->size > map->max_ref_size - map->ref_size) 142 | return NULL; 143 | map->ref_size += r->size; 144 | } 145 | 146 | return r; 147 | } 148 | 149 | void cmark_reference_map_free(cmark_reference_map *map) { 150 | cmark_reference *ref; 151 | 152 | if (map == NULL) 153 | return; 154 | 155 | ref = map->refs; 156 | while (ref) { 157 | cmark_reference *next = ref->next; 158 | reference_free(map, ref); 159 | ref = next; 160 | } 161 | 162 | map->mem->free(map->sorted); 163 | map->mem->free(map); 164 | } 165 | 166 | cmark_reference_map *cmark_reference_map_new(cmark_mem *mem) { 167 | cmark_reference_map *map = 168 | (cmark_reference_map *)mem->calloc(1, sizeof(cmark_reference_map)); 169 | map->mem = mem; 170 | return map; 171 | } 172 | -------------------------------------------------------------------------------- /ext/references.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_REFERENCES_H 2 | #define CMARK_REFERENCES_H 3 | 4 | #include "chunk.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct cmark_reference { 11 | struct cmark_reference *next; 12 | unsigned char *label; 13 | unsigned char *url; 14 | unsigned char *title; 15 | unsigned int age; 16 | unsigned int size; 17 | }; 18 | 19 | typedef struct cmark_reference cmark_reference; 20 | 21 | struct cmark_reference_map { 22 | cmark_mem *mem; 23 | cmark_reference *refs; 24 | cmark_reference **sorted; 25 | unsigned int size; 26 | unsigned int ref_size; 27 | unsigned int max_ref_size; 28 | }; 29 | 30 | typedef struct cmark_reference_map cmark_reference_map; 31 | 32 | cmark_reference_map *cmark_reference_map_new(cmark_mem *mem); 33 | void cmark_reference_map_free(cmark_reference_map *map); 34 | cmark_reference *cmark_reference_lookup(cmark_reference_map *map, 35 | cmark_chunk *label); 36 | void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, 37 | cmark_chunk *url, cmark_chunk *title); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /ext/render.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "buffer.h" 3 | #include "cmark.h" 4 | #include "utf8.h" 5 | #include "render.h" 6 | #include "node.h" 7 | #include "cmark_ctype.h" 8 | 9 | static inline void S_cr(cmark_renderer *renderer) { 10 | if (renderer->need_cr < 1) { 11 | renderer->need_cr = 1; 12 | } 13 | } 14 | 15 | static inline void S_blankline(cmark_renderer *renderer) { 16 | if (renderer->need_cr < 2) { 17 | renderer->need_cr = 2; 18 | } 19 | } 20 | 21 | static void S_out(cmark_renderer *renderer, const char *source, bool wrap, 22 | cmark_escaping escape) { 23 | int length = (int)strlen(source); 24 | unsigned char nextc; 25 | int32_t c; 26 | int i = 0; 27 | int last_nonspace; 28 | int len; 29 | int k = renderer->buffer->size - 1; 30 | 31 | wrap = wrap && !renderer->no_linebreaks; 32 | 33 | if (renderer->in_tight_list_item && renderer->need_cr > 1) { 34 | renderer->need_cr = 1; 35 | } 36 | while (renderer->need_cr) { 37 | if (k < 0 || renderer->buffer->ptr[k] == '\n') { 38 | k -= 1; 39 | } else { 40 | cmark_strbuf_putc(renderer->buffer, '\n'); 41 | if (renderer->need_cr > 1) { 42 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 43 | renderer->prefix->size); 44 | } 45 | } 46 | renderer->column = 0; 47 | renderer->last_breakable = 0; 48 | renderer->begin_line = true; 49 | renderer->begin_content = true; 50 | renderer->need_cr -= 1; 51 | } 52 | 53 | while (i < length) { 54 | if (renderer->begin_line) { 55 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 56 | renderer->prefix->size); 57 | // note: this assumes prefix is ascii: 58 | renderer->column = renderer->prefix->size; 59 | } 60 | 61 | len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c); 62 | if (len == -1) { // error condition 63 | return; // return without rendering rest of string 64 | } 65 | nextc = source[i + len]; 66 | if (c == 32 && wrap) { 67 | if (!renderer->begin_line) { 68 | last_nonspace = renderer->buffer->size; 69 | cmark_strbuf_putc(renderer->buffer, ' '); 70 | renderer->column += 1; 71 | renderer->begin_line = false; 72 | renderer->begin_content = false; 73 | // skip following spaces 74 | while (source[i + 1] == ' ') { 75 | i++; 76 | } 77 | // We don't allow breaks that make a digit the first character 78 | // because this causes problems with commonmark output. 79 | if (!cmark_isdigit(source[i + 1])) { 80 | renderer->last_breakable = last_nonspace; 81 | } 82 | } 83 | 84 | } else if (escape == LITERAL) { 85 | if (c == 10) { 86 | cmark_strbuf_putc(renderer->buffer, '\n'); 87 | renderer->column = 0; 88 | renderer->begin_line = true; 89 | renderer->begin_content = true; 90 | renderer->last_breakable = 0; 91 | } else { 92 | cmark_render_code_point(renderer, c); 93 | renderer->begin_line = false; 94 | // we don't set 'begin_content' to false til we've 95 | // finished parsing a digit. Reason: in commonmark 96 | // we need to escape a potential list marker after 97 | // a digit: 98 | renderer->begin_content = 99 | renderer->begin_content && cmark_isdigit(c) == 1; 100 | } 101 | } else { 102 | (renderer->outc)(renderer, escape, c, nextc); 103 | renderer->begin_line = false; 104 | renderer->begin_content = 105 | renderer->begin_content && cmark_isdigit(c) == 1; 106 | } 107 | 108 | // If adding the character went beyond width, look for an 109 | // earlier place where the line could be broken: 110 | if (renderer->width > 0 && renderer->column > renderer->width && 111 | !renderer->begin_line && renderer->last_breakable > 0) { 112 | 113 | // copy from last_breakable to remainder 114 | unsigned char *src = renderer->buffer->ptr + 115 | renderer->last_breakable + 1; 116 | bufsize_t remainder_len = renderer->buffer->size - 117 | renderer->last_breakable - 1; 118 | unsigned char *remainder = 119 | (unsigned char *)renderer->mem->realloc(NULL, remainder_len); 120 | memcpy(remainder, src, remainder_len); 121 | // truncate at last_breakable 122 | cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable); 123 | // add newline, prefix, and remainder 124 | cmark_strbuf_putc(renderer->buffer, '\n'); 125 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 126 | renderer->prefix->size); 127 | cmark_strbuf_put(renderer->buffer, remainder, remainder_len); 128 | renderer->column = renderer->prefix->size + remainder_len; 129 | renderer->mem->free(remainder); 130 | renderer->last_breakable = 0; 131 | renderer->begin_line = false; 132 | renderer->begin_content = false; 133 | } 134 | 135 | i += len; 136 | } 137 | } 138 | 139 | // Assumes no newlines, assumes ascii content: 140 | void cmark_render_ascii(cmark_renderer *renderer, const char *s) { 141 | int origsize = renderer->buffer->size; 142 | cmark_strbuf_puts(renderer->buffer, s); 143 | renderer->column += renderer->buffer->size - origsize; 144 | } 145 | 146 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { 147 | cmark_utf8proc_encode_char(c, renderer->buffer); 148 | renderer->column += 1; 149 | } 150 | 151 | char *cmark_render(cmark_node *root, int options, int width, 152 | void (*outc)(cmark_renderer *, cmark_escaping, int32_t, 153 | unsigned char), 154 | int (*render_node)(cmark_renderer *renderer, 155 | cmark_node *node, 156 | cmark_event_type ev_type, int options)) { 157 | cmark_mem *mem = root->mem; 158 | cmark_strbuf pref = CMARK_BUF_INIT(mem); 159 | cmark_strbuf buf = CMARK_BUF_INIT(mem); 160 | cmark_node *cur; 161 | cmark_event_type ev_type; 162 | char *result; 163 | cmark_iter *iter = cmark_iter_new(root); 164 | 165 | cmark_renderer renderer = {options, 166 | mem, &buf, &pref, 0, width, 167 | 0, 0, true, true, false, 168 | false, NULL, 169 | outc, S_cr, S_blankline, S_out}; 170 | 171 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 172 | cur = cmark_iter_get_node(iter); 173 | if (!render_node(&renderer, cur, ev_type, options)) { 174 | // a false value causes us to skip processing 175 | // the node's contents. this is used for 176 | // autolinks. 177 | cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT); 178 | } 179 | } 180 | 181 | // ensure final newline 182 | if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { 183 | cmark_strbuf_putc(renderer.buffer, '\n'); 184 | } 185 | 186 | result = (char *)cmark_strbuf_detach(renderer.buffer); 187 | 188 | cmark_iter_free(iter); 189 | cmark_strbuf_free(renderer.prefix); 190 | cmark_strbuf_free(renderer.buffer); 191 | 192 | return result; 193 | } 194 | -------------------------------------------------------------------------------- /ext/render.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_RENDER_H 2 | #define CMARK_RENDER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | #include "buffer.h" 12 | 13 | typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping; 14 | 15 | struct block_number { 16 | int number; 17 | struct block_number *parent; 18 | }; 19 | 20 | struct cmark_renderer { 21 | int options; 22 | cmark_mem *mem; 23 | cmark_strbuf *buffer; 24 | cmark_strbuf *prefix; 25 | int column; 26 | int width; 27 | int need_cr; 28 | bufsize_t last_breakable; 29 | bool begin_line; 30 | bool begin_content; 31 | bool no_linebreaks; 32 | bool in_tight_list_item; 33 | struct block_number *block_number_in_list_item; 34 | void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char); 35 | void (*cr)(struct cmark_renderer *); 36 | void (*blankline)(struct cmark_renderer *); 37 | void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping); 38 | }; 39 | 40 | typedef struct cmark_renderer cmark_renderer; 41 | 42 | void cmark_render_ascii(cmark_renderer *renderer, const char *s); 43 | 44 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); 45 | 46 | char *cmark_render(cmark_node *root, int options, int width, 47 | void (*outc)(cmark_renderer *, cmark_escaping, int32_t, 48 | unsigned char), 49 | int (*render_node)(cmark_renderer *renderer, 50 | cmark_node *node, 51 | cmark_event_type ev_type, int options)); 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /ext/scanners.h: -------------------------------------------------------------------------------- 1 | #include "cmark.h" 2 | #include "chunk.h" 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, 9 | bufsize_t offset); 10 | bufsize_t _scan_scheme(const unsigned char *p); 11 | bufsize_t _scan_autolink_uri(const unsigned char *p); 12 | bufsize_t _scan_autolink_email(const unsigned char *p); 13 | bufsize_t _scan_html_tag(const unsigned char *p); 14 | bufsize_t _scan_html_comment(const unsigned char *p); 15 | bufsize_t _scan_html_pi(const unsigned char *p); 16 | bufsize_t _scan_html_declaration(const unsigned char *p); 17 | bufsize_t _scan_html_cdata(const unsigned char *p); 18 | bufsize_t _scan_html_block_start(const unsigned char *p); 19 | bufsize_t _scan_html_block_start_7(const unsigned char *p); 20 | bufsize_t _scan_html_block_end_1(const unsigned char *p); 21 | bufsize_t _scan_html_block_end_2(const unsigned char *p); 22 | bufsize_t _scan_html_block_end_3(const unsigned char *p); 23 | bufsize_t _scan_html_block_end_4(const unsigned char *p); 24 | bufsize_t _scan_html_block_end_5(const unsigned char *p); 25 | bufsize_t _scan_link_title(const unsigned char *p); 26 | bufsize_t _scan_spacechars(const unsigned char *p); 27 | bufsize_t _scan_atx_heading_start(const unsigned char *p); 28 | bufsize_t _scan_setext_heading_line(const unsigned char *p); 29 | bufsize_t _scan_open_code_fence(const unsigned char *p); 30 | bufsize_t _scan_close_code_fence(const unsigned char *p); 31 | bufsize_t _scan_dangerous_url(const unsigned char *p); 32 | 33 | #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) 34 | #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) 35 | #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n) 36 | #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n) 37 | #define scan_html_comment(c, n) _scan_at(&_scan_html_comment, c, n) 38 | #define scan_html_pi(c, n) _scan_at(&_scan_html_pi, c, n) 39 | #define scan_html_declaration(c, n) _scan_at(&_scan_html_declaration, c, n) 40 | #define scan_html_cdata(c, n) _scan_at(&_scan_html_cdata, c, n) 41 | #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n) 42 | #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n) 43 | #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n) 44 | #define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n) 45 | #define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n) 46 | #define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n) 47 | #define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n) 48 | #define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n) 49 | #define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n) 50 | #define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n) 51 | #define scan_setext_heading_line(c, n) \ 52 | _scan_at(&_scan_setext_heading_line, c, n) 53 | #define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n) 54 | #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n) 55 | #define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n) 56 | 57 | #ifdef __cplusplus 58 | } 59 | #endif 60 | -------------------------------------------------------------------------------- /ext/utf8.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cmark_ctype.h" 6 | #include "utf8.h" 7 | 8 | static const int8_t utf8proc_utf8class[256] = { 9 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 13 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 18 | 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 19 | 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}; 20 | 21 | static void encode_unknown(cmark_strbuf *buf) { 22 | static const uint8_t repl[] = {239, 191, 189}; 23 | cmark_strbuf_put(buf, repl, 3); 24 | } 25 | 26 | static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) { 27 | int length, i; 28 | 29 | if (!str_len) 30 | return 0; 31 | 32 | length = utf8proc_utf8class[str[0]]; 33 | 34 | if (!length) 35 | return -1; 36 | 37 | if (str_len >= 0 && (bufsize_t)length > str_len) 38 | return -str_len; 39 | 40 | for (i = 1; i < length; i++) { 41 | if ((str[i] & 0xC0) != 0x80) 42 | return -i; 43 | } 44 | 45 | return length; 46 | } 47 | 48 | // Validate a single UTF-8 character according to RFC 3629. 49 | static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) { 50 | int length = utf8proc_utf8class[str[0]]; 51 | 52 | if (!length) 53 | return -1; 54 | 55 | if ((bufsize_t)length > str_len) 56 | return -str_len; 57 | 58 | switch (length) { 59 | case 2: 60 | if ((str[1] & 0xC0) != 0x80) 61 | return -1; 62 | if (str[0] < 0xC2) { 63 | // Overlong 64 | return -length; 65 | } 66 | break; 67 | 68 | case 3: 69 | if ((str[1] & 0xC0) != 0x80) 70 | return -1; 71 | if ((str[2] & 0xC0) != 0x80) 72 | return -2; 73 | if (str[0] == 0xE0) { 74 | if (str[1] < 0xA0) { 75 | // Overlong 76 | return -length; 77 | } 78 | } else if (str[0] == 0xED) { 79 | if (str[1] >= 0xA0) { 80 | // Surrogate 81 | return -length; 82 | } 83 | } 84 | break; 85 | 86 | case 4: 87 | if ((str[1] & 0xC0) != 0x80) 88 | return -1; 89 | if ((str[2] & 0xC0) != 0x80) 90 | return -2; 91 | if ((str[3] & 0xC0) != 0x80) 92 | return -3; 93 | if (str[0] == 0xF0) { 94 | if (str[1] < 0x90) { 95 | // Overlong 96 | return -length; 97 | } 98 | } else if (str[0] >= 0xF4) { 99 | if (str[0] > 0xF4 || str[1] >= 0x90) { 100 | // Above 0x10FFFF 101 | return -length; 102 | } 103 | } 104 | break; 105 | } 106 | 107 | return length; 108 | } 109 | 110 | void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line, 111 | bufsize_t size) { 112 | bufsize_t i = 0; 113 | 114 | while (i < size) { 115 | bufsize_t org = i; 116 | int charlen = 0; 117 | 118 | while (i < size) { 119 | if (line[i] < 0x80 && line[i] != 0) { 120 | i++; 121 | } else if (line[i] >= 0x80) { 122 | charlen = utf8proc_valid(line + i, size - i); 123 | if (charlen < 0) { 124 | charlen = -charlen; 125 | break; 126 | } 127 | i += charlen; 128 | } else if (line[i] == 0) { 129 | // ASCII NUL is technically valid but rejected 130 | // for security reasons. 131 | charlen = 1; 132 | break; 133 | } 134 | } 135 | 136 | if (i > org) { 137 | cmark_strbuf_put(ob, line + org, i - org); 138 | } 139 | 140 | if (i >= size) { 141 | break; 142 | } else { 143 | // Invalid UTF-8 144 | encode_unknown(ob); 145 | i += charlen; 146 | } 147 | } 148 | } 149 | 150 | int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, 151 | int32_t *dst) { 152 | int length; 153 | int32_t uc = -1; 154 | 155 | *dst = -1; 156 | length = utf8proc_charlen(str, str_len); 157 | if (length < 0) 158 | return -1; 159 | 160 | switch (length) { 161 | case 1: 162 | uc = str[0]; 163 | break; 164 | case 2: 165 | uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); 166 | if (uc < 0x80) 167 | uc = -1; 168 | break; 169 | case 3: 170 | uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F); 171 | if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) 172 | uc = -1; 173 | break; 174 | case 4: 175 | uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) + 176 | ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); 177 | if (uc < 0x10000 || uc >= 0x110000) 178 | uc = -1; 179 | break; 180 | } 181 | 182 | if (uc < 0) 183 | return -1; 184 | 185 | *dst = uc; 186 | return length; 187 | } 188 | 189 | void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) { 190 | uint8_t dst[4]; 191 | bufsize_t len = 0; 192 | 193 | assert(uc >= 0); 194 | 195 | if (uc < 0x80) { 196 | dst[0] = (uint8_t)(uc); 197 | len = 1; 198 | } else if (uc < 0x800) { 199 | dst[0] = (uint8_t)(0xC0 + (uc >> 6)); 200 | dst[1] = 0x80 + (uc & 0x3F); 201 | len = 2; 202 | } else if (uc < 0x10000) { 203 | dst[0] = (uint8_t)(0xE0 + (uc >> 12)); 204 | dst[1] = 0x80 + ((uc >> 6) & 0x3F); 205 | dst[2] = 0x80 + (uc & 0x3F); 206 | len = 3; 207 | } else if (uc < 0x110000) { 208 | dst[0] = (uint8_t)(0xF0 + (uc >> 18)); 209 | dst[1] = 0x80 + ((uc >> 12) & 0x3F); 210 | dst[2] = 0x80 + ((uc >> 6) & 0x3F); 211 | dst[3] = 0x80 + (uc & 0x3F); 212 | len = 4; 213 | } else { 214 | encode_unknown(buf); 215 | return; 216 | } 217 | 218 | cmark_strbuf_put(buf, dst, len); 219 | } 220 | 221 | #include "case_fold.inc" 222 | 223 | int cf_compare(const void *v1, const void *v2) { 224 | uint32_t entry1 = *(uint32_t *) v1; 225 | uint32_t entry2 = *(uint32_t *) v2; 226 | 227 | return (int32_t) CF_CODE_POINT(entry1) - (int32_t) CF_CODE_POINT(entry2); 228 | } 229 | 230 | void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, 231 | bufsize_t len) { 232 | int32_t c; 233 | 234 | while (len > 0) { 235 | bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c); 236 | 237 | if (char_len == 1) { 238 | if (c >= 'A' && c <= 'Z') 239 | c += 'a' - 'A'; 240 | cmark_strbuf_putc(dest, c); 241 | } else if (c >= CF_MAX) { 242 | cmark_strbuf_put(dest, str, char_len); 243 | } else if (char_len >= 0) { 244 | uint32_t key = c; 245 | uint32_t *entry = bsearch(&key, cf_table, 246 | CF_TABLE_SIZE, sizeof(uint32_t), 247 | cf_compare); 248 | if (entry == NULL) { 249 | cmark_strbuf_put(dest, str, char_len); 250 | } else { 251 | cmark_strbuf_put(dest, cf_repl + CF_REPL_IDX(*entry), 252 | CF_REPL_SIZE(*entry)); 253 | } 254 | } else { 255 | encode_unknown(dest); 256 | char_len = -char_len; 257 | } 258 | 259 | str += char_len; 260 | len -= char_len; 261 | } 262 | } 263 | 264 | // matches anything in the Zs class, plus LF, CR, TAB, FF. 265 | int cmark_utf8proc_is_space(int32_t uc) { 266 | return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 || 267 | uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 || 268 | uc == 8287 || uc == 12288); 269 | } 270 | 271 | // matches anything in the P or S classes. 272 | int cmark_utf8proc_is_punctuation_or_symbol(int32_t uc) { 273 | if (uc < 128) { 274 | return cmark_ispunct((char)uc); 275 | } else { 276 | return ( 277 | uc > 128 && 278 | ((uc >= 161 && uc <= 169) || (uc >= 171 && uc <= 172) || 279 | (uc >= 174 && uc <= 177) || (uc == 180) || (uc >= 182 && uc <= 184) || 280 | (uc == 187) || (uc == 191) || (uc == 215) || (uc == 247) || 281 | (uc >= 706 && uc <= 709) || (uc >= 722 && uc <= 735) || 282 | (uc >= 741 && uc <= 747) || (uc == 749) || (uc >= 751 && uc <= 767) || 283 | (uc == 885) || (uc == 894) || (uc >= 900 && uc <= 901) || 284 | (uc == 903) || (uc == 1014) || (uc == 1154) || 285 | (uc >= 1370 && uc <= 1375) || (uc >= 1417 && uc <= 1418) || 286 | (uc >= 1421 && uc <= 1423) || (uc == 1470) || (uc == 1472) || 287 | (uc == 1475) || (uc == 1478) || (uc >= 1523 && uc <= 1524) || 288 | (uc >= 1542 && uc <= 1551) || (uc == 1563) || 289 | (uc >= 1565 && uc <= 1567) || (uc >= 1642 && uc <= 1645) || 290 | (uc == 1748) || (uc == 1758) || (uc == 1769) || 291 | (uc >= 1789 && uc <= 1790) || (uc >= 1792 && uc <= 1805) || 292 | (uc >= 2038 && uc <= 2041) || (uc >= 2046 && uc <= 2047) || 293 | (uc >= 2096 && uc <= 2110) || (uc == 2142) || (uc == 2184) || 294 | (uc >= 2404 && uc <= 2405) || (uc == 2416) || 295 | (uc >= 2546 && uc <= 2547) || (uc >= 2554 && uc <= 2555) || 296 | (uc == 2557) || (uc == 2678) || (uc >= 2800 && uc <= 2801) || 297 | (uc == 2928) || (uc >= 3059 && uc <= 3066) || (uc == 3191) || 298 | (uc == 3199) || (uc == 3204) || (uc == 3407) || (uc == 3449) || 299 | (uc == 3572) || (uc == 3647) || (uc == 3663) || 300 | (uc >= 3674 && uc <= 3675) || (uc >= 3841 && uc <= 3863) || 301 | (uc >= 3866 && uc <= 3871) || (uc == 3892) || (uc == 3894) || 302 | (uc == 3896) || (uc >= 3898 && uc <= 3901) || (uc == 3973) || 303 | (uc >= 4030 && uc <= 4037) || (uc >= 4039 && uc <= 4044) || 304 | (uc >= 4046 && uc <= 4058) || (uc >= 4170 && uc <= 4175) || 305 | (uc >= 4254 && uc <= 4255) || (uc == 4347) || 306 | (uc >= 4960 && uc <= 4968) || (uc >= 5008 && uc <= 5017) || 307 | (uc == 5120) || (uc >= 5741 && uc <= 5742) || 308 | (uc >= 5787 && uc <= 5788) || (uc >= 5867 && uc <= 5869) || 309 | (uc >= 5941 && uc <= 5942) || (uc >= 6100 && uc <= 6102) || 310 | (uc >= 6104 && uc <= 6107) || (uc >= 6144 && uc <= 6154) || 311 | (uc == 6464) || (uc >= 6468 && uc <= 6469) || 312 | (uc >= 6622 && uc <= 6655) || (uc >= 6686 && uc <= 6687) || 313 | (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) || 314 | (uc >= 7002 && uc <= 7018) || (uc >= 7028 && uc <= 7038) || 315 | (uc >= 7164 && uc <= 7167) || (uc >= 7227 && uc <= 7231) || 316 | (uc >= 7294 && uc <= 7295) || (uc >= 7360 && uc <= 7367) || 317 | (uc == 7379) || (uc == 8125) || (uc >= 8127 && uc <= 8129) || 318 | (uc >= 8141 && uc <= 8143) || (uc >= 8157 && uc <= 8159) || 319 | (uc >= 8173 && uc <= 8175) || (uc >= 8189 && uc <= 8190) || 320 | (uc >= 8208 && uc <= 8231) || (uc >= 8240 && uc <= 8286) || 321 | (uc >= 8314 && uc <= 8318) || (uc >= 8330 && uc <= 8334) || 322 | (uc >= 8352 && uc <= 8384) || (uc >= 8448 && uc <= 8449) || 323 | (uc >= 8451 && uc <= 8454) || (uc >= 8456 && uc <= 8457) || 324 | (uc == 8468) || (uc >= 8470 && uc <= 8472) || 325 | (uc >= 8478 && uc <= 8483) || (uc == 8485) || (uc == 8487) || 326 | (uc == 8489) || (uc == 8494) || (uc >= 8506 && uc <= 8507) || 327 | (uc >= 8512 && uc <= 8516) || (uc >= 8522 && uc <= 8525) || 328 | (uc == 8527) || (uc >= 8586 && uc <= 8587) || 329 | (uc >= 8592 && uc <= 9254) || (uc >= 9280 && uc <= 9290) || 330 | (uc >= 9372 && uc <= 9449) || (uc >= 9472 && uc <= 10101) || 331 | (uc >= 10132 && uc <= 11123) || (uc >= 11126 && uc <= 11157) || 332 | (uc >= 11159 && uc <= 11263) || (uc >= 11493 && uc <= 11498) || 333 | (uc >= 11513 && uc <= 11516) || (uc >= 11518 && uc <= 11519) || 334 | (uc == 11632) || (uc >= 11776 && uc <= 11822) || 335 | (uc >= 11824 && uc <= 11869) || (uc >= 11904 && uc <= 11929) || 336 | (uc >= 11931 && uc <= 12019) || (uc >= 12032 && uc <= 12245) || 337 | (uc >= 12272 && uc <= 12283) || (uc >= 12289 && uc <= 12292) || 338 | (uc >= 12296 && uc <= 12320) || (uc == 12336) || 339 | (uc >= 12342 && uc <= 12343) || (uc >= 12349 && uc <= 12351) || 340 | (uc >= 12443 && uc <= 12444) || (uc == 12448) || (uc == 12539) || 341 | (uc >= 12688 && uc <= 12689) || (uc >= 12694 && uc <= 12703) || 342 | (uc >= 12736 && uc <= 12771) || (uc >= 12800 && uc <= 12830) || 343 | (uc >= 12842 && uc <= 12871) || (uc == 12880) || 344 | (uc >= 12896 && uc <= 12927) || (uc >= 12938 && uc <= 12976) || 345 | (uc >= 12992 && uc <= 13311) || (uc >= 19904 && uc <= 19967) || 346 | (uc >= 42128 && uc <= 42182) || (uc >= 42238 && uc <= 42239) || 347 | (uc >= 42509 && uc <= 42511) || (uc == 42611) || (uc == 42622) || 348 | (uc >= 42738 && uc <= 42743) || (uc >= 42752 && uc <= 42774) || 349 | (uc >= 42784 && uc <= 42785) || (uc >= 42889 && uc <= 42890) || 350 | (uc >= 43048 && uc <= 43051) || (uc >= 43062 && uc <= 43065) || 351 | (uc >= 43124 && uc <= 43127) || (uc >= 43214 && uc <= 43215) || 352 | (uc >= 43256 && uc <= 43258) || (uc == 43260) || 353 | (uc >= 43310 && uc <= 43311) || (uc == 43359) || 354 | (uc >= 43457 && uc <= 43469) || (uc >= 43486 && uc <= 43487) || 355 | (uc >= 43612 && uc <= 43615) || (uc >= 43639 && uc <= 43641) || 356 | (uc >= 43742 && uc <= 43743) || (uc >= 43760 && uc <= 43761) || 357 | (uc == 43867) || (uc >= 43882 && uc <= 43883) || (uc == 44011) || 358 | (uc == 64297) || (uc >= 64434 && uc <= 64450) || 359 | (uc >= 64830 && uc <= 64847) || (uc == 64975) || 360 | (uc >= 65020 && uc <= 65023) || (uc >= 65040 && uc <= 65049) || 361 | (uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65126) || 362 | (uc >= 65128 && uc <= 65131) || (uc >= 65281 && uc <= 65295) || 363 | (uc >= 65306 && uc <= 65312) || (uc >= 65339 && uc <= 65344) || 364 | (uc >= 65371 && uc <= 65381) || (uc >= 65504 && uc <= 65510) || 365 | (uc >= 65512 && uc <= 65518) || (uc >= 65532 && uc <= 65533) || 366 | (uc >= 65792 && uc <= 65794) || (uc >= 65847 && uc <= 65855) || 367 | (uc >= 65913 && uc <= 65929) || (uc >= 65932 && uc <= 65934) || 368 | (uc >= 65936 && uc <= 65948) || (uc == 65952) || 369 | (uc >= 66000 && uc <= 66044) || (uc == 66463) || (uc == 66512) || 370 | (uc == 66927) || (uc == 67671) || (uc >= 67703 && uc <= 67704) || 371 | (uc == 67871) || (uc == 67903) || (uc >= 68176 && uc <= 68184) || 372 | (uc == 68223) || (uc == 68296) || (uc >= 68336 && uc <= 68342) || 373 | (uc >= 68409 && uc <= 68415) || (uc >= 68505 && uc <= 68508) || 374 | (uc == 69293) || (uc >= 69461 && uc <= 69465) || 375 | (uc >= 69510 && uc <= 69513) || (uc >= 69703 && uc <= 69709) || 376 | (uc >= 69819 && uc <= 69820) || (uc >= 69822 && uc <= 69825) || 377 | (uc >= 69952 && uc <= 69955) || (uc >= 70004 && uc <= 70005) || 378 | (uc >= 70085 && uc <= 70088) || (uc == 70093) || (uc == 70107) || 379 | (uc >= 70109 && uc <= 70111) || (uc >= 70200 && uc <= 70205) || 380 | (uc == 70313) || (uc >= 70731 && uc <= 70735) || 381 | (uc >= 70746 && uc <= 70747) || (uc == 70749) || (uc == 70854) || 382 | (uc >= 71105 && uc <= 71127) || (uc >= 71233 && uc <= 71235) || 383 | (uc >= 71264 && uc <= 71276) || (uc == 71353) || 384 | (uc >= 71484 && uc <= 71487) || (uc == 71739) || 385 | (uc >= 72004 && uc <= 72006) || (uc == 72162) || 386 | (uc >= 72255 && uc <= 72262) || (uc >= 72346 && uc <= 72348) || 387 | (uc >= 72350 && uc <= 72354) || (uc >= 72448 && uc <= 72457) || 388 | (uc >= 72769 && uc <= 72773) || (uc >= 72816 && uc <= 72817) || 389 | (uc >= 73463 && uc <= 73464) || (uc >= 73539 && uc <= 73551) || 390 | (uc >= 73685 && uc <= 73713) || (uc == 73727) || 391 | (uc >= 74864 && uc <= 74868) || (uc >= 77809 && uc <= 77810) || 392 | (uc >= 92782 && uc <= 92783) || (uc == 92917) || 393 | (uc >= 92983 && uc <= 92991) || (uc >= 92996 && uc <= 92997) || 394 | (uc >= 93847 && uc <= 93850) || (uc == 94178) || (uc == 113820) || 395 | (uc == 113823) || (uc >= 118608 && uc <= 118723) || 396 | (uc >= 118784 && uc <= 119029) || (uc >= 119040 && uc <= 119078) || 397 | (uc >= 119081 && uc <= 119140) || (uc >= 119146 && uc <= 119148) || 398 | (uc >= 119171 && uc <= 119172) || (uc >= 119180 && uc <= 119209) || 399 | (uc >= 119214 && uc <= 119274) || (uc >= 119296 && uc <= 119361) || 400 | (uc == 119365) || (uc >= 119552 && uc <= 119638) || (uc == 120513) || 401 | (uc == 120539) || (uc == 120571) || (uc == 120597) || (uc == 120629) || 402 | (uc == 120655) || (uc == 120687) || (uc == 120713) || (uc == 120745) || 403 | (uc == 120771) || (uc >= 120832 && uc <= 121343) || 404 | (uc >= 121399 && uc <= 121402) || (uc >= 121453 && uc <= 121460) || 405 | (uc >= 121462 && uc <= 121475) || (uc >= 121477 && uc <= 121483) || 406 | (uc == 123215) || (uc == 123647) || (uc >= 125278 && uc <= 125279) || 407 | (uc == 126124) || (uc == 126128) || (uc == 126254) || 408 | (uc >= 126704 && uc <= 126705) || (uc >= 126976 && uc <= 127019) || 409 | (uc >= 127024 && uc <= 127123) || (uc >= 127136 && uc <= 127150) || 410 | (uc >= 127153 && uc <= 127167) || (uc >= 127169 && uc <= 127183) || 411 | (uc >= 127185 && uc <= 127221) || (uc >= 127245 && uc <= 127405) || 412 | (uc >= 127462 && uc <= 127490) || (uc >= 127504 && uc <= 127547) || 413 | (uc >= 127552 && uc <= 127560) || (uc >= 127568 && uc <= 127569) || 414 | (uc >= 127584 && uc <= 127589) || (uc >= 127744 && uc <= 128727) || 415 | (uc >= 128732 && uc <= 128748) || (uc >= 128752 && uc <= 128764) || 416 | (uc >= 128768 && uc <= 128886) || (uc >= 128891 && uc <= 128985) || 417 | (uc >= 128992 && uc <= 129003) || (uc == 129008) || 418 | (uc >= 129024 && uc <= 129035) || (uc >= 129040 && uc <= 129095) || 419 | (uc >= 129104 && uc <= 129113) || (uc >= 129120 && uc <= 129159) || 420 | (uc >= 129168 && uc <= 129197) || (uc >= 129200 && uc <= 129201) || 421 | (uc >= 129280 && uc <= 129619) || (uc >= 129632 && uc <= 129645) || 422 | (uc >= 129648 && uc <= 129660) || (uc >= 129664 && uc <= 129672) || 423 | (uc >= 129680 && uc <= 129725) || (uc >= 129727 && uc <= 129733) || 424 | (uc >= 129742 && uc <= 129755) || (uc >= 129760 && uc <= 129768) || 425 | (uc >= 129776 && uc <= 129784) || (uc >= 129792 && uc <= 129938) || 426 | (uc >= 129940 && uc <= 129994))); 427 | } 428 | } 429 | -------------------------------------------------------------------------------- /ext/utf8.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_UTF8_H 2 | #define CMARK_UTF8_H 3 | 4 | #include 5 | #include "buffer.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, 12 | bufsize_t len); 13 | void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); 14 | int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); 15 | void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line, 16 | bufsize_t size); 17 | int cmark_utf8proc_is_space(int32_t uc); 18 | int cmark_utf8proc_is_punctuation_or_symbol(int32_t uc); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /ext/xml.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | 11 | #define BUFFER_SIZE 100 12 | #define MAX_INDENT 40 13 | 14 | // Functions to convert cmark_nodes to XML strings. 15 | 16 | // C0 control characters, U+FFFE and U+FFF aren't allowed in XML. 17 | static const char XML_ESCAPE_TABLE[256] = { 18 | /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 19 | /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20 | /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 | /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0, 22 | /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 | /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 | /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 | /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26 | /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27 | /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29 | /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 30 | /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31 | /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32 | /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33 | /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 | }; 35 | 36 | // U+FFFD Replacement Character encoded in UTF-8 37 | #define UTF8_REPL "\xEF\xBF\xBD" 38 | 39 | static const char *XML_ESCAPES[] = { 40 | "", UTF8_REPL, """, "&", "<", ">" 41 | }; 42 | 43 | static void escape_xml(cmark_strbuf *ob, const unsigned char *src, 44 | bufsize_t size) { 45 | bufsize_t i = 0, org, esc = 0; 46 | 47 | while (i < size) { 48 | org = i; 49 | while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0) 50 | i++; 51 | 52 | if (i > org) 53 | cmark_strbuf_put(ob, src + org, i - org); 54 | 55 | if (i >= size) 56 | break; 57 | 58 | if (esc == 9) { 59 | // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to 60 | // be changed. 61 | // We know that src[i] is 0xBE or 0xBF. 62 | if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) { 63 | cmark_strbuf_putc(ob, 0xBD); 64 | } else { 65 | cmark_strbuf_putc(ob, src[i]); 66 | } 67 | } else { 68 | cmark_strbuf_puts(ob, XML_ESCAPES[esc]); 69 | } 70 | 71 | i++; 72 | } 73 | } 74 | 75 | static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) { 76 | if (source) 77 | escape_xml(dest, source, (bufsize_t)strlen((char *)source)); 78 | } 79 | 80 | struct render_state { 81 | cmark_strbuf *xml; 82 | int indent; 83 | }; 84 | 85 | static inline void indent(struct render_state *state) { 86 | int i; 87 | for (i = 0; i < state->indent && i < MAX_INDENT; i++) { 88 | cmark_strbuf_putc(state->xml, ' '); 89 | } 90 | } 91 | 92 | static int S_render_node(cmark_node *node, cmark_event_type ev_type, 93 | struct render_state *state, int options) { 94 | cmark_strbuf *xml = state->xml; 95 | bool literal = false; 96 | cmark_delim_type delim; 97 | bool entering = (ev_type == CMARK_EVENT_ENTER); 98 | char buffer[BUFFER_SIZE]; 99 | 100 | if (entering) { 101 | indent(state); 102 | cmark_strbuf_putc(xml, '<'); 103 | cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); 104 | 105 | if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) { 106 | snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"", 107 | node->start_line, node->start_column, node->end_line, 108 | node->end_column); 109 | cmark_strbuf_puts(xml, buffer); 110 | } 111 | 112 | literal = false; 113 | 114 | switch (node->type) { 115 | case CMARK_NODE_DOCUMENT: 116 | cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\""); 117 | break; 118 | case CMARK_NODE_TEXT: 119 | case CMARK_NODE_CODE: 120 | case CMARK_NODE_HTML_BLOCK: 121 | case CMARK_NODE_HTML_INLINE: 122 | cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); 123 | escape_xml(xml, node->data, node->len); 124 | cmark_strbuf_puts(xml, "as.heading.level); 154 | cmark_strbuf_puts(xml, buffer); 155 | break; 156 | case CMARK_NODE_CODE_BLOCK: 157 | if (node->as.code.info) { 158 | cmark_strbuf_puts(xml, " info=\""); 159 | escape_xml_str(xml, node->as.code.info); 160 | cmark_strbuf_putc(xml, '"'); 161 | } 162 | cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); 163 | escape_xml(xml, node->data, node->len); 164 | cmark_strbuf_puts(xml, "as.custom.on_enter); 172 | cmark_strbuf_putc(xml, '"'); 173 | cmark_strbuf_puts(xml, " on_exit=\""); 174 | escape_xml_str(xml, node->as.custom.on_exit); 175 | cmark_strbuf_putc(xml, '"'); 176 | break; 177 | case CMARK_NODE_LINK: 178 | case CMARK_NODE_IMAGE: 179 | cmark_strbuf_puts(xml, " destination=\""); 180 | escape_xml_str(xml, node->as.link.url); 181 | cmark_strbuf_putc(xml, '"'); 182 | if (node->as.link.title) { 183 | cmark_strbuf_puts(xml, " title=\""); 184 | escape_xml_str(xml, node->as.link.title); 185 | cmark_strbuf_putc(xml, '"'); 186 | } 187 | break; 188 | default: 189 | break; 190 | } 191 | if (node->first_child) { 192 | state->indent += 2; 193 | } else if (!literal) { 194 | cmark_strbuf_puts(xml, " /"); 195 | } 196 | cmark_strbuf_puts(xml, ">\n"); 197 | 198 | } else if (node->first_child) { 199 | state->indent -= 2; 200 | indent(state); 201 | cmark_strbuf_puts(xml, "\n"); 204 | } 205 | 206 | return 1; 207 | } 208 | 209 | char *cmark_render_xml(cmark_node *root, int options) { 210 | char *result; 211 | cmark_strbuf xml = CMARK_BUF_INIT(root->mem); 212 | cmark_event_type ev_type; 213 | cmark_node *cur; 214 | struct render_state state = {&xml, 0}; 215 | 216 | cmark_iter *iter = cmark_iter_new(root); 217 | 218 | cmark_strbuf_puts(state.xml, "\n"); 219 | cmark_strbuf_puts(state.xml, 220 | "\n"); 221 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 222 | cur = cmark_iter_get_node(iter); 223 | S_render_node(cur, ev_type, &state, options); 224 | } 225 | result = (char *)cmark_strbuf_detach(&xml); 226 | 227 | cmark_iter_free(iter); 228 | return result; 229 | } 230 | -------------------------------------------------------------------------------- /rockspec.in: -------------------------------------------------------------------------------- 1 | package = "cmark" 2 | version = "_VERSION-_REVISION" 3 | source = { 4 | url = "git://github.com/jgm/cmark-lua", 5 | tag = "_VERSION" 6 | } 7 | description = { 8 | summary = [[Lua wrapper for libcmark, CommonMark Markdown parsing 9 | and rendering library]], 10 | detailed = [[cmark exposes the entire API of libcmark, as 11 | documented in the `cmark(3)` man page, and adds a 12 | more lua-esque interface for walking the node tree.]], 13 | homepage = "https://github.com/jgm/cmark-lua", 14 | license = "BSD2", 15 | maintainer = "John MacFarlane ", 16 | } 17 | dependencies = { 18 | } 19 | build = { 20 | type = "builtin", 21 | modules = { 22 | cmark = { 23 | sources = { "cmark_wrap.c", 24 | "ext/blocks.c", 25 | "ext/houdini_html_u.c", 26 | "ext/references.c", 27 | "ext/buffer.c", 28 | "ext/html.c", 29 | "ext/render.c", 30 | "ext/cmark.c", 31 | "ext/inlines.c", 32 | "ext/scanners.c", 33 | "ext/cmark_ctype.c", 34 | "ext/iterator.c", 35 | "ext/utf8.c", 36 | "ext/commonmark.c", 37 | "ext/latex.c", 38 | "ext/xml.c", 39 | "ext/houdini_href_e.c", 40 | "ext/man.c", 41 | "ext/houdini_html_e.c", 42 | "ext/node.c", 43 | }, 44 | incdirs = { ".", "ext" } 45 | }, 46 | ["cmark.builder"] = "cmark/builder.lua" 47 | }, 48 | } 49 | -------------------------------------------------------------------------------- /test.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env lua 2 | require 'Test.More' 3 | 4 | package.path = "./?.lua;" .. package.path 5 | package.cpath = "./?.so;" .. package.cpath 6 | 7 | local cmark = require 'cmark' 8 | local builder = require 'cmark.builder' 9 | local tests = require 'spec-tests' 10 | 11 | subtest("spec tests (cmark)", function() 12 | for _,test in ipairs(tests) do 13 | local doc = cmark.parse_string(test.markdown, cmark.OPT_DEFAULT) 14 | local html = cmark.render_html(doc, cmark.OPT_DEFAULT + cmark.OPT_UNSAFE) 15 | is(html, test.html, "example " .. tostring(test.example) .. 16 | " (lines " .. tostring(test.start_line) .. " - " .. 17 | tostring(test.end_line) .. ")") 18 | end 19 | end) 20 | 21 | local b = builder 22 | 23 | local builds = function(node, expected, description) 24 | local rendered = cmark.render_html(node, cmark.OPT_DEFAULT + cmark.OPT_UNSAFE) 25 | return is(rendered, expected, description) 26 | end 27 | 28 | local returns_error = function(f, arg, expected_msg, description) 29 | local ok, msg = f(arg) 30 | is(ok, nil, description .. ' returns error status') 31 | is(msg, expected_msg, description .. ' error message') 32 | end 33 | 34 | builds(b.document { b.paragraph {"Hello ", b.emph { "world" }, "."} }, 35 | '

Hello world.

\n', "basic builder example") 36 | 37 | builds(b.document "hi", '

hi

\n', "promotion of string to block") 38 | 39 | builds(b.document(b.text "hi"), '

hi

\n', "promotion of inline to block") 40 | 41 | builds(b.paragraph(77), '

77

\n', "promotion of number to inline") 42 | 43 | builds(b.block_quote { b.paragraph "hi", b.paragraph "lo" }, 44 | '
\n

hi

\n

lo

\n
\n', "blockquote") 45 | 46 | builds(b.text("hello"), "hello", "b.text") 47 | 48 | builds(b.link{url = "url", "hello"}, 49 | 'hello', "b.link with string") 50 | builds(b.link{url = "url", b.text("hello")}, 51 | 'hello', "b.link with node") 52 | 53 | builds(b.bullet_list { tight = true, 54 | b.item(b.paragraph "hi"), 55 | b.item(b.paragraph "lo") }, 56 | '
    \n
  • hi
  • \n
  • lo
  • \n
\n', "list turns table elts to items") 57 | 58 | builds(b.bullet_list { tight = true, "hi", "lo" }, 59 | '
    \n
  • hi
  • \n
  • lo
  • \n
\n', "list turns table elts to items") 60 | 61 | builds(b.ordered_list { tight = false, start = 2, delim = ')', "hi", "lo" }, 62 | '
    \n
  1. \n

    hi

    \n
  2. \n
  3. \n

    lo

    \n
  4. \n
\n', 63 | "ordered list") 64 | 65 | builds(b.bullet_list{ b.item 66 | { b.paragraph "one", b.paragraph "two", tight = false }}, 67 | '
    \n
  • \n

    one

    \n

    two

    \n
  • \n
\n', 68 | "bullet list with two paragraphs in an item") 69 | 70 | builds(b.code_block "some code\n ok", 71 | '
some code\n  ok
\n', "basic code block") 72 | 73 | builds(b.code_block({info = "ruby", "some code\n ok"}), 74 | '
some code\n  ok
\n', 75 | "code block with info") 76 | 77 | builds(b.html_block '
bar
', 78 | '
bar
\n', "html block") 79 | 80 | builds(b.custom_block{ on_enter = "{{", on_exit = "}}", "foo\n bar"}, 81 | '{{\nfoo\n bar\n}}\n', "custom block") 82 | 83 | builds(b.thematic_break(), '
\n', "thematic break") 84 | 85 | builds(b.heading{level = 2, b.emph 'Foo', ' bar'}, 86 | '

Foo bar

\n', "heading") 87 | 88 | local link = b.link{url = "url", 89 | b.text("hello"), b.text("there")} 90 | 91 | is(#(b.get_children(link)), 2, "get_children has right length") 92 | 93 | builds(link, 94 | 'hellothere', "b.link with list of nodes") 95 | 96 | builds(b.link{url = "url", title = "tit", "hello"}, 97 | 'hello', "b.link with title") 98 | 99 | builds(b.image{url = "url", title = "tit", "hello"}, 100 | 'hello', "b.image with title") 101 | 102 | builds(b.emph "hi", 'hi', "emph") 103 | 104 | builds(b.strong(b.emph "hi"), 'hi', "strong emph") 105 | 106 | returns_error(b.emph, b.paragraph "text", 107 | "Tried to add a node with class block to a node with class inline", 108 | "paragraph inside emph") 109 | 110 | builds(b.paragraph{"hi", b.linebreak(), "lo"}, '

hi
\nlo

\n', 111 | "linebreak") 112 | 113 | builds(b.paragraph{"hi", b.linebreak(), "lo"}, '

hi
\nlo

\n', 114 | "linebreak, levaing off ()") 115 | 116 | builds(b.paragraph{"hi", b.softbreak, "lo"}, '

hi\nlo

\n', 117 | "softbreak") 118 | 119 | builds(b.code "some code", 'some code', "code") 120 | 121 | builds(b.html_inline "&", '&', "raw html inline") 122 | 123 | builds(b.custom_inline{ on_enter = "{", on_exit = ".", "&" }, 124 | '{&.', "custom inline") 125 | 126 | 127 | done_testing() 128 | --------------------------------------------------------------------------------