71 |
72 |
73 | % ./hoedown
74 | - one
75 | - two
76 | - three
77 | - four
78 | ^D
79 |
80 |
one
81 |
82 |
83 |
two
84 |
three
85 |
four
86 |
87 |
88 |
89 |
90 | `discount`
91 | ----------
92 |
93 | `cmark` is about six times faster.
94 |
95 | `kramdown`
96 | ----------
97 |
98 | `cmark` is about a hundred times faster.
99 |
100 | `kramdown` also gets tied in knots by pathological input like
101 |
102 | python -c 'print(("[" * 50000) + "a" + ("]" * 50000))'
103 |
104 |
105 |
--------------------------------------------------------------------------------
/tools/xml2md_gfm.xsl:
--------------------------------------------------------------------------------
1 |
2 |
3 |
11 |
12 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | |
39 |
40 |
|
41 |
42 |
43 |
44 | ---: |
45 |
46 |
47 | :--- |
48 |
49 |
50 | :---: |
51 |
52 |
53 | --- |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 | |
63 |
64 |
65 |
66 | |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | ~~
76 |
77 | ~~
78 |
79 |
80 |
81 |
--------------------------------------------------------------------------------
/src/houdini_html_e.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "houdini.h"
6 |
7 | /**
8 | * According to the OWASP rules:
9 | *
10 | * & --> &
11 | * < --> <
12 | * > --> >
13 | * " --> "
14 | * ' --> ' ' is not recommended
15 | * / --> / forward slash is included as it helps end an HTML entity
16 | *
17 | */
18 | static const char HTML_ESCAPE_TABLE[] = {
19 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
21 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 | };
31 |
32 | static const char *HTML_ESCAPES[] = {"", """, "&", "'",
33 | "/", "<", ">"};
34 |
35 | int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
36 | int secure) {
37 | bufsize_t i = 0, org, esc = 0;
38 |
39 | while (i < size) {
40 | org = i;
41 | while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
42 | i++;
43 |
44 | if (i > org)
45 | cmark_strbuf_put(ob, src + org, i - org);
46 |
47 | /* escaping */
48 | if (unlikely(i >= size))
49 | break;
50 |
51 | /* The forward slash and single quote are only escaped in secure mode */
52 | if ((src[i] == '/' || src[i] == '\'') && !secure) {
53 | cmark_strbuf_putc(ob, src[i]);
54 | } else {
55 | cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
56 | }
57 |
58 | i++;
59 | }
60 |
61 | return 1;
62 | }
63 |
64 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
65 | return houdini_escape_html0(ob, src, size, 1);
66 | }
67 |
--------------------------------------------------------------------------------
/test/roundtrip_tests.py:
--------------------------------------------------------------------------------
1 | import re
2 | import sys
3 | from spec_tests import get_tests, do_test
4 | from cmark import CMark
5 | import argparse
6 |
7 | if __name__ == "__main__":
8 | parser = argparse.ArgumentParser(description='Run cmark roundtrip tests.')
9 | parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
10 | help='program to test')
11 | parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
12 | help='path to spec')
13 | parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
14 | default=None, help='limit to sections matching regex pattern')
15 | parser.add_argument('--library-dir', dest='library_dir', nargs='?',
16 | default=None, help='directory containing dynamic library')
17 | parser.add_argument('--extensions', dest='extensions', nargs='?',
18 | default=None, help='space separated list of extensions to enable')
19 | parser.add_argument('--no-normalize', dest='normalize',
20 | action='store_const', const=False, default=True,
21 | help='do not normalize HTML')
22 | parser.add_argument('-n', '--number', type=int, default=None,
23 | help='only consider the test with the given number')
24 | args = parser.parse_args(sys.argv[1:])
25 |
26 | spec = sys.argv[1]
27 |
28 | def converter(md, exts):
29 | cmark = CMark(prog=args.program, library_dir=args.library_dir, extensions=args.extensions)
30 | [ec, result, err] = cmark.to_commonmark(md, exts)
31 | if ec == 0:
32 | [ec, html, err] = cmark.to_html(result, exts)
33 | if ec == 0:
34 | # In the commonmark writer we insert dummy HTML
35 | # comments between lists, and between lists and code
36 | # blocks. Strip these out, since the spec uses
37 | # two blank lines instead:
38 | return [ec, re.sub('\n', '', html), '']
39 | else:
40 | return [ec, html, err]
41 | else:
42 | return [ec, result, err]
43 |
44 | tests = get_tests(args.spec)
45 | result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': 0}
46 | for test in tests:
47 | do_test(converter, test, args.normalize, result_counts)
48 |
49 | sys.stdout.buffer.write("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts).encode('utf-8'))
50 | exit(result_counts['fail'] + result_counts['error'])
51 |
--------------------------------------------------------------------------------
/extensions/ext_scanners.re:
--------------------------------------------------------------------------------
1 | /*!re2c re2c:flags:no-debug-info = 1; */
2 | /*!re2c re2c:indent:string = ' '; */
3 |
4 | #include
5 | #include "ext_scanners.h"
6 |
7 | bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset)
8 | {
9 | bufsize_t res;
10 |
11 | if (ptr == NULL || offset >= len) {
12 | return 0;
13 | } else {
14 | unsigned char lim = ptr[len];
15 |
16 | ptr[len] = '\0';
17 | res = scanner(ptr + offset);
18 | ptr[len] = lim;
19 | }
20 |
21 | return res;
22 | }
23 |
24 | /*!re2c
25 | re2c:define:YYCTYPE = "unsigned char";
26 | re2c:define:YYCURSOR = p;
27 | re2c:define:YYMARKER = marker;
28 | re2c:yyfill:enable = 0;
29 |
30 | spacechar = [ \t\v\f];
31 | newline = [\r]?[\n];
32 | escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-];
33 |
34 | table_marker = (spacechar*[:]?[-]+[:]?spacechar*);
35 | table_cell = (escaped_char|[^|\r\n])+;
36 |
37 | tasklist = spacechar*("-"|"+"|"*"|[0-9]+.)spacechar+("[ ]"|"[x]")spacechar+;
38 | */
39 |
40 | bufsize_t _scan_table_start(const unsigned char *p)
41 | {
42 | const unsigned char *marker = NULL;
43 | const unsigned char *start = p;
44 | /*!re2c
45 | [|]? table_marker ([|] table_marker)* [|]? spacechar* newline {
46 | return (bufsize_t)(p - start);
47 | }
48 | * { return 0; }
49 | */
50 | }
51 |
52 | bufsize_t _scan_table_cell(const unsigned char *p)
53 | {
54 | const unsigned char *marker = NULL;
55 | const unsigned char *start = p;
56 | /*!re2c
57 | // In fact, `table_cell` matches non-empty table cells only. The empty
58 | // string is also a valid table cell, but is handled by the default rule.
59 | // This approach prevents re2c's match-empty-string warning.
60 | table_cell { return (bufsize_t)(p - start); }
61 | * { return 0; }
62 | */
63 | }
64 |
65 | bufsize_t _scan_table_cell_end(const unsigned char *p)
66 | {
67 | const unsigned char *start = p;
68 | /*!re2c
69 | [|] spacechar* { return (bufsize_t)(p - start); }
70 | * { return 0; }
71 | */
72 | }
73 |
74 | bufsize_t _scan_table_row_end(const unsigned char *p)
75 | {
76 | const unsigned char *marker = NULL;
77 | const unsigned char *start = p;
78 | /*!re2c
79 | spacechar* newline { return (bufsize_t)(p - start); }
80 | * { return 0; }
81 | */
82 | }
83 |
84 | bufsize_t _scan_tasklist(const unsigned char *p)
85 | {
86 | const unsigned char *marker = NULL;
87 | const unsigned char *start = p;
88 | /*!re2c
89 | tasklist { return (bufsize_t)(p - start); }
90 | * { return 0; }
91 | */
92 | }
93 |
--------------------------------------------------------------------------------
/src/arena.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-extension_api.h"
6 |
7 | static struct arena_chunk {
8 | size_t sz, used;
9 | uint8_t push_point;
10 | void *ptr;
11 | struct arena_chunk *prev;
12 | } *A = NULL;
13 |
14 | static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
15 | struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c));
16 | if (!c)
17 | abort();
18 | c->sz = sz;
19 | c->ptr = calloc(1, sz);
20 | if (!c->ptr)
21 | abort();
22 | c->prev = prev;
23 | return c;
24 | }
25 |
26 | void cmark_arena_push(void) {
27 | if (!A)
28 | return;
29 | A->push_point = 1;
30 | A = alloc_arena_chunk(10240, A);
31 | }
32 |
33 | int cmark_arena_pop(void) {
34 | if (!A)
35 | return 0;
36 | while (A && !A->push_point) {
37 | free(A->ptr);
38 | struct arena_chunk *n = A->prev;
39 | free(A);
40 | A = n;
41 | }
42 | if (A)
43 | A->push_point = 0;
44 | return 1;
45 | }
46 |
47 | static void init_arena(void) {
48 | A = alloc_arena_chunk(4 * 1048576, NULL);
49 | }
50 |
51 | void cmark_arena_reset(void) {
52 | while (A) {
53 | free(A->ptr);
54 | struct arena_chunk *n = A->prev;
55 | free(A);
56 | A = n;
57 | }
58 | }
59 |
60 | static void *arena_calloc(size_t nmem, size_t size) {
61 | if (!A)
62 | init_arena();
63 |
64 | size_t sz = nmem * size + sizeof(size_t);
65 |
66 | // Round allocation sizes to largest integer size to
67 | // ensure returned memory is correctly aligned
68 | const size_t align = sizeof(size_t) - 1;
69 | sz = (sz + align) & ~align;
70 |
71 | struct arena_chunk *chunk;
72 | if (sz > A->sz) {
73 | A->prev = chunk = alloc_arena_chunk(sz, A->prev);
74 | } else if (sz > A->sz - A->used) {
75 | A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
76 | } else {
77 | chunk = A;
78 | }
79 | void *ptr = (uint8_t *) chunk->ptr + chunk->used;
80 | chunk->used += sz;
81 | *((size_t *) ptr) = sz - sizeof(size_t);
82 | return (uint8_t *) ptr + sizeof(size_t);
83 | }
84 |
85 | static void *arena_realloc(void *ptr, size_t size) {
86 | if (!A)
87 | init_arena();
88 |
89 | void *new_ptr = arena_calloc(1, size);
90 | if (ptr)
91 | memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
92 | return new_ptr;
93 | }
94 |
95 | static void arena_free(void *ptr) {
96 | (void) ptr;
97 | /* no-op */
98 | }
99 |
100 | cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
101 |
102 | cmark_mem *cmark_get_arena_mem_allocator(void) {
103 | return &CMARK_ARENA_MEM_ALLOCATOR;
104 | }
105 |
--------------------------------------------------------------------------------
/man/man1/cmark-gfm.1:
--------------------------------------------------------------------------------
1 | .TH "cmark-gfm" "1" "March 24, 2016" "LOCAL" "General Commands Manual"
2 | .SH "NAME"
3 | \fBcmark\fR
4 | \- convert CommonMark formatted text with GitHub Flavored Markdown extensions to HTML
5 | .SH "SYNOPSIS"
6 | .HP 6n
7 | \fBcmark-gfm\fR
8 | [options]
9 | file*
10 | .SH "DESCRIPTION"
11 | \fBcmark-gfm\fR
12 | converts Markdown formatted plain text to either HTML, groff man,
13 | CommonMark XML, LaTeX, or CommonMark, using the conventions
14 | described in the CommonMark spec. It reads input from \fIstdin\fR
15 | or the specified files (concatenating their contents) and writes
16 | output to \fIstdout\fR.
17 | .SH "OPTIONS"
18 | .TP 12n
19 | .B \-\-to, \-t \f[I]FORMAT\f[]
20 | Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]xml\f[],
21 | \f[C]latex\f[], \f[C]commonmark\f[]).
22 | .TP 12n
23 | .B \-\-width \f[I]WIDTH\f[]
24 | Specify a column width to which to wrap the output. For no wrapping, use
25 | the value 0 (the default). This option currently only affects the
26 | commonmark, latex, and man renderers.
27 | .TP 12n
28 | .B \-\-hardbreaks
29 | Render soft breaks (newlines inside paragraphs in the CommonMark source)
30 | as hard line breaks in the target format. If this option is specified,
31 | hard wrapping is disabled for CommonMark output, regardless of the value
32 | given with \-\-width.
33 | .TP 12n
34 | .B \-\-nobreaks
35 | Render soft breaks as spaces. If this option is specified,
36 | hard wrapping is disabled for all output formats, regardless of the value
37 | given with \-\-width.
38 | .TP 12n
39 | .B \-\-sourcepos
40 | Include source position attribute.
41 | .TP 12n
42 | .B \-\-normalize
43 | Consolidate adjacent text nodes.
44 | .TP 12n
45 | .B \-\-extension, \-e \f[I]EXTENSION_NAME\f[]
46 | Specify an extension name to use.
47 | .TP 12n
48 | .B \-\-list\-extensions
49 | List available extensions and quit.
50 | .TP 12n
51 | .B \-\-validate-utf8
52 | Validate UTF-8, replacing illegal sequences with U+FFFD.
53 | .TP 12n
54 | .B \-\-smart
55 | Use smart punctuation. Straight double and single quotes will
56 | be rendered as curly quotes, depending on their position.
57 | \f[C]\-\-\f[] will be rendered as an en-dash.
58 | \f[C]\-\-\-\f[] will be rendered as an em-dash.
59 | \f[C]...\f[] will be rendered as ellipses.
60 | .TP 12n
61 | .B \-\-unsafe
62 | Render raw HTML and potentially dangerous URLs.
63 | (Raw HTML is not replaced by a placeholder comment; potentially
64 | dangerous URLs are not replaced by empty strings.) Dangerous
65 | URLs are those that begin with `javascript:`, `vbscript:`,
66 | `file:`, or `data:` (except for `image/png`, `image/gif`,
67 | `image/jpeg`, or `image/webp` mime types).
68 | .TP 12n
69 | .B \-\-help
70 | Print usage information.
71 | .TP 12n
72 | .B \-\-version
73 | Print version.
74 | .SH "AUTHORS"
75 | John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
76 | .SH "SEE ALSO"
77 | .PP
78 | CommonMark spec: \f[C]http://spec.commonmark.org\f[].
79 |
--------------------------------------------------------------------------------
/api_test/harness.c:
--------------------------------------------------------------------------------
1 | #define _DEFAULT_SOURCE
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include "harness.h"
8 |
9 | test_batch_runner *test_batch_runner_new() {
10 | return (test_batch_runner *)calloc(1, sizeof(test_batch_runner));
11 | }
12 |
13 | static void test_result(test_batch_runner *runner, int cond, const char *msg,
14 | va_list ap) {
15 | ++runner->test_num;
16 |
17 | if (cond) {
18 | ++runner->num_passed;
19 | } else {
20 | fprintf(stderr, "FAILED test %d: ", runner->test_num);
21 | vfprintf(stderr, msg, ap);
22 | fprintf(stderr, "\n");
23 | ++runner->num_failed;
24 | }
25 | }
26 |
27 | void SKIP(test_batch_runner *runner, int num_tests) {
28 | runner->test_num += num_tests;
29 | runner->num_skipped += num_tests;
30 | }
31 |
32 | void OK(test_batch_runner *runner, int cond, const char *msg, ...) {
33 | va_list ap;
34 | va_start(ap, msg);
35 | test_result(runner, cond, msg, ap);
36 | va_end(ap);
37 | }
38 |
39 | void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg,
40 | ...) {
41 | int cond = got == expected;
42 |
43 | va_list ap;
44 | va_start(ap, msg);
45 | test_result(runner, cond, msg, ap);
46 | va_end(ap);
47 |
48 | if (!cond) {
49 | fprintf(stderr, " Got: %d\n", got);
50 | fprintf(stderr, " Expected: %d\n", expected);
51 | }
52 | }
53 |
54 | #ifndef _WIN32
55 | #include
56 |
57 | static char *write_tmp(char const *header, char const *data) {
58 | char *name = strdup("/tmp/fileXXXXXX");
59 | int fd = mkstemp(name);
60 | FILE *f = fdopen(fd, "w+");
61 | fputs(header, f);
62 | fwrite(data, 1, strlen(data), f);
63 | fclose(f);
64 | return name;
65 | }
66 |
67 | #endif
68 |
69 | void STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
70 | const char *msg, ...) {
71 | int cond = strcmp(got, expected) == 0;
72 |
73 | va_list ap;
74 | va_start(ap, msg);
75 | test_result(runner, cond, msg, ap);
76 | va_end(ap);
77 |
78 | if (!cond) {
79 | #ifndef _WIN32
80 | char *got_fn = write_tmp("actual\n", got);
81 | char *expected_fn = write_tmp("expected\n", expected);
82 | char buf[1024];
83 | snprintf(buf, sizeof(buf), "git diff --no-index %s %s", expected_fn, got_fn);
84 | system(buf);
85 | remove(got_fn);
86 | remove(expected_fn);
87 | free(got_fn);
88 | free(expected_fn);
89 | #else
90 | fprintf(stderr, " Got: \"%s\"\n", got);
91 | fprintf(stderr, " Expected: \"%s\"\n", expected);
92 | #endif
93 | }
94 | }
95 |
96 | int test_ok(test_batch_runner *runner) { return runner->num_failed == 0; }
97 |
98 | void test_print_summary(test_batch_runner *runner) {
99 | int num_passed = runner->num_passed;
100 | int num_skipped = runner->num_skipped;
101 | int num_failed = runner->num_failed;
102 |
103 | fprintf(stderr, "%d tests passed, %d failed, %d skipped\n", num_passed,
104 | num_failed, num_skipped);
105 |
106 | if (test_ok(runner)) {
107 | fprintf(stderr, "PASS\n");
108 | } else {
109 | fprintf(stderr, "FAIL\n");
110 | }
111 | }
112 |
--------------------------------------------------------------------------------
/src/buffer.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_BUFFER_H
2 | #define CMARK_BUFFER_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include "config.h"
10 | #include "cmark-gfm.h"
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | typedef struct {
17 | cmark_mem *mem;
18 | unsigned char *ptr;
19 | bufsize_t asize, size;
20 | } cmark_strbuf;
21 |
22 | extern unsigned char cmark_strbuf__initbuf[];
23 |
24 | #define CMARK_BUF_INIT(mem) \
25 | { mem, cmark_strbuf__initbuf, 0, 0 }
26 |
27 | /**
28 | * Initialize a cmark_strbuf structure.
29 | *
30 | * For the cases where CMARK_BUF_INIT cannot be used to do static
31 | * initialization.
32 | */
33 | CMARK_GFM_EXPORT
34 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
35 | bufsize_t initial_size);
36 |
37 | /**
38 | * Grow the buffer to hold at least `target_size` bytes.
39 | */
40 | CMARK_GFM_EXPORT
41 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
42 |
43 | CMARK_GFM_EXPORT
44 | void cmark_strbuf_free(cmark_strbuf *buf);
45 |
46 | CMARK_GFM_EXPORT
47 | void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
48 |
49 | CMARK_GFM_EXPORT
50 | bufsize_t cmark_strbuf_len(const cmark_strbuf *buf);
51 |
52 | CMARK_GFM_EXPORT
53 | int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
54 |
55 | CMARK_GFM_EXPORT
56 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
57 |
58 | CMARK_GFM_EXPORT
59 | void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
60 | const cmark_strbuf *buf);
61 |
62 | static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) {
63 | return (char *)buf->ptr;
64 | }
65 |
66 | #define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
67 |
68 | CMARK_GFM_EXPORT
69 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
70 | bufsize_t len);
71 |
72 | CMARK_GFM_EXPORT
73 | void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
74 |
75 | CMARK_GFM_EXPORT
76 | void cmark_strbuf_putc(cmark_strbuf *buf, int c);
77 |
78 | CMARK_GFM_EXPORT
79 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
80 | bufsize_t len);
81 |
82 | CMARK_GFM_EXPORT
83 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
84 |
85 | CMARK_GFM_EXPORT
86 | void cmark_strbuf_clear(cmark_strbuf *buf);
87 |
88 | CMARK_GFM_EXPORT
89 | bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos);
90 |
91 | CMARK_GFM_EXPORT
92 | bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos);
93 |
94 | CMARK_GFM_EXPORT
95 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
96 |
97 | CMARK_GFM_EXPORT
98 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
99 |
100 | CMARK_GFM_EXPORT
101 | void cmark_strbuf_rtrim(cmark_strbuf *buf);
102 |
103 | CMARK_GFM_EXPORT
104 | void cmark_strbuf_trim(cmark_strbuf *buf);
105 |
106 | CMARK_GFM_EXPORT
107 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
108 |
109 | CMARK_GFM_EXPORT
110 | void cmark_strbuf_unescape(cmark_strbuf *s);
111 |
112 | #ifdef __cplusplus
113 | }
114 | #endif
115 |
116 | #endif
117 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ "master" ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ "master" ]
20 | schedule:
21 | - cron: '45 14 * * 3'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
27 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
28 | permissions:
29 | actions: read
30 | contents: read
31 | security-events: write
32 |
33 | strategy:
34 | fail-fast: false
35 | matrix:
36 | language: [ 'cpp', 'javascript', 'python', 'ruby' ]
37 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ]
38 | # Use only 'java' to analyze code written in Java, Kotlin or both
39 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
40 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
41 |
42 | steps:
43 | - name: Checkout repository
44 | uses: actions/checkout@v3
45 |
46 | # Initializes the CodeQL tools for scanning.
47 | - name: Initialize CodeQL
48 | uses: github/codeql-action/init@v2
49 | with:
50 | languages: ${{ matrix.language }}
51 | # If you wish to specify custom queries, you can do so here or in a config file.
52 | # By default, queries listed here will override any specified in a config file.
53 | # Prefix the list here with "+" to use these queries and those in the config file.
54 |
55 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
56 | # queries: security-extended,security-and-quality
57 |
58 |
59 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
60 | # If this step fails, then you should remove it and run the build manually (see below)
61 | - name: Autobuild
62 | uses: github/codeql-action/autobuild@v2
63 |
64 | # ℹ️ Command-line programs to run using the OS shell.
65 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
66 |
67 | # If the Autobuild fails above, remove it and uncomment the following three lines.
68 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
69 |
70 | # - run: |
71 | # echo "Run, Build Application using script"
72 | # ./location_of_script_within_repo/buildscript.sh
73 |
74 | - name: Perform CodeQL Analysis
75 | uses: github/codeql-action/analyze@v2
76 | with:
77 | category: "/language:${{matrix.language}}"
78 |
--------------------------------------------------------------------------------
/FindAsan.cmake:
--------------------------------------------------------------------------------
1 | #
2 | # The MIT License (MIT)
3 | #
4 | # Copyright (c) 2013 Matthew Arsenault
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | # THE SOFTWARE.
23 | #
24 | # This module tests if address sanitizer is supported by the compiler,
25 | # and creates a ASan build type (i.e. set CMAKE_BUILD_TYPE=ASan to use
26 | # it). This sets the following variables:
27 | #
28 | # CMAKE_C_FLAGS_ASAN - Flags to use for C with asan
29 | # CMAKE_CXX_FLAGS_ASAN - Flags to use for C++ with asan
30 | # HAVE_ADDRESS_SANITIZER - True or false if the ASan build type is available
31 |
32 | include(CheckCCompilerFlag)
33 |
34 | # Set -Werror to catch "argument unused during compilation" warnings
35 | set(CMAKE_REQUIRED_FLAGS "-Werror -faddress-sanitizer") # Also needs to be a link flag for test to pass
36 | check_c_compiler_flag("-faddress-sanitizer" HAVE_FLAG_ADDRESS_SANITIZER)
37 |
38 | set(CMAKE_REQUIRED_FLAGS "-Werror -fsanitize=address") # Also needs to be a link flag for test to pass
39 | check_c_compiler_flag("-fsanitize=address" HAVE_FLAG_SANITIZE_ADDRESS)
40 |
41 | unset(CMAKE_REQUIRED_FLAGS)
42 |
43 | if(HAVE_FLAG_SANITIZE_ADDRESS)
44 | # Clang 3.2+ use this version
45 | set(ADDRESS_SANITIZER_FLAG "-fsanitize=address")
46 | elseif(HAVE_FLAG_ADDRESS_SANITIZER)
47 | # Older deprecated flag for ASan
48 | set(ADDRESS_SANITIZER_FLAG "-faddress-sanitizer")
49 | endif()
50 |
51 | if(NOT ADDRESS_SANITIZER_FLAG)
52 | return()
53 | else(NOT ADDRESS_SANITIZER_FLAG)
54 | set(HAVE_ADDRESS_SANITIZER FALSE)
55 | endif()
56 |
57 | set(HAVE_ADDRESS_SANITIZER TRUE)
58 |
59 | set(CMAKE_C_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
60 | CACHE STRING "Flags used by the C compiler during ASan builds."
61 | FORCE)
62 | set(CMAKE_CXX_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
63 | CACHE STRING "Flags used by the C++ compiler during ASan builds."
64 | FORCE)
65 | set(CMAKE_EXE_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"
66 | CACHE STRING "Flags used for linking binaries during ASan builds."
67 | FORCE)
68 | set(CMAKE_SHARED_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"
69 | CACHE STRING "Flags used by the shared libraries linker during ASan builds."
70 | FORCE)
71 | mark_as_advanced(CMAKE_C_FLAGS_ASAN
72 | CMAKE_CXX_FLAGS_ASAN
73 | CMAKE_EXE_LINKER_FLAGS_ASAN
74 | CMAKE_SHARED_LINKER_FLAGS_ASAN)
75 |
--------------------------------------------------------------------------------
/src/houdini_href_e.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "houdini.h"
6 |
7 | /*
8 | * The following characters will not be escaped:
9 | *
10 | * -_.+!*'(),%#@?=;:/,+&$~ alphanum
11 | *
12 | * Note that this character set is the addition of:
13 | *
14 | * - The characters which are safe to be in an URL
15 | * - The characters which are *not* safe to be in
16 | * an URL because they are RESERVED characters.
17 | *
18 | * We assume (lazily) that any RESERVED char that
19 | * appears inside an URL is actually meant to
20 | * have its native function (i.e. as an URL
21 | * component/separator) and hence needs no escaping.
22 | *
23 | * There are two exceptions: the chacters & (amp)
24 | * and ' (single quote) do not appear in the table.
25 | * They are meant to appear in the URL as components,
26 | * yet they require special HTML-entity escaping
27 | * to generate valid HTML markup.
28 | *
29 | * All other characters will be escaped to %XX.
30 | *
31 | */
32 | static const char HREF_SAFE[] = {
33 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
37 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 | 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 | };
45 |
46 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
47 | static const uint8_t hex_chars[] = "0123456789ABCDEF";
48 | bufsize_t i = 0, org;
49 | uint8_t hex_str[3];
50 |
51 | hex_str[0] = '%';
52 |
53 | while (i < size) {
54 | org = i;
55 | while (i < size && HREF_SAFE[src[i]] != 0)
56 | i++;
57 |
58 | if (likely(i > org))
59 | cmark_strbuf_put(ob, src + org, i - org);
60 |
61 | /* escaping */
62 | if (i >= size)
63 | break;
64 |
65 | switch (src[i]) {
66 | /* amp appears all the time in URLs, but needs
67 | * HTML-entity escaping to be inside an href */
68 | case '&':
69 | cmark_strbuf_puts(ob, "&");
70 | break;
71 |
72 | /* the single quote is a valid URL character
73 | * according to the standard; it needs HTML
74 | * entity escaping too */
75 | case '\'':
76 | cmark_strbuf_puts(ob, "'");
77 | break;
78 |
79 | /* the space can be escaped to %20 or a plus
80 | * sign. we're going with the generic escape
81 | * for now. the plus thing is more commonly seen
82 | * when building GET strings */
83 | #if 0
84 | case ' ':
85 | cmark_strbuf_putc(ob, '+');
86 | break;
87 | #endif
88 |
89 | /* every other character goes with a %XX escaping */
90 | default:
91 | hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
92 | hex_str[2] = hex_chars[src[i] & 0xF];
93 | cmark_strbuf_put(ob, hex_str, 3);
94 | }
95 |
96 | i++;
97 | }
98 |
99 | return 1;
100 | }
101 |
--------------------------------------------------------------------------------
/fuzz/fuzz_quadratic.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-core-extensions.h"
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | const char *extension_names[] = {
12 | "autolink",
13 | "strikethrough",
14 | "table",
15 | "tagfilter",
16 | NULL,
17 | };
18 |
19 | int LLVMFuzzerInitialize(int *argc, char ***argv) {
20 | cmark_gfm_core_extensions_ensure_registered();
21 | return 0;
22 | }
23 |
24 | int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
25 | struct __attribute__((packed)) {
26 | int options;
27 | int width;
28 | uint8_t splitpoint;
29 | uint8_t repeatlen;
30 | } fuzz_config;
31 |
32 | if (size >= sizeof(fuzz_config)) {
33 | /* The beginning of `data` is treated as fuzzer configuration */
34 | memcpy(&fuzz_config, data, sizeof(fuzz_config));
35 |
36 | /* Test options that are used by GitHub. */
37 | fuzz_config.options = CMARK_OPT_UNSAFE | CMARK_OPT_FOOTNOTES | CMARK_OPT_GITHUB_PRE_LANG | CMARK_OPT_HARDBREAKS;
38 |
39 | /* Remainder of input is the markdown */
40 | const char *markdown0 = (const char *)(data + sizeof(fuzz_config));
41 | const size_t markdown_size0 = size - sizeof(fuzz_config);
42 | char markdown[0x80000];
43 | if (markdown_size0 <= sizeof(markdown)) {
44 | size_t markdown_size = 0;
45 | if (fuzz_config.splitpoint <= markdown_size0 && 0 < fuzz_config.repeatlen &&
46 | fuzz_config.repeatlen <= markdown_size0 - fuzz_config.splitpoint) {
47 | const size_t size_after_splitpoint = markdown_size0 - fuzz_config.splitpoint - fuzz_config.repeatlen;
48 | memcpy(&markdown[markdown_size], &markdown0[0], fuzz_config.splitpoint);
49 | markdown_size += fuzz_config.splitpoint;
50 |
51 | while (markdown_size + fuzz_config.repeatlen + size_after_splitpoint <= sizeof(markdown)) {
52 | memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint],
53 | fuzz_config.repeatlen);
54 | markdown_size += fuzz_config.repeatlen;
55 | }
56 | memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint + fuzz_config.repeatlen],
57 | size_after_splitpoint);
58 | markdown_size += size_after_splitpoint;
59 | } else {
60 | markdown_size = markdown_size0;
61 | memcpy(markdown, markdown0, markdown_size);
62 | }
63 |
64 | cmark_parser *parser = cmark_parser_new(fuzz_config.options);
65 |
66 | for (const char **it = extension_names; *it; ++it) {
67 | const char *extension_name = *it;
68 | cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
69 | if (!syntax_extension) {
70 | fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
71 | abort();
72 | }
73 | cmark_parser_attach_syntax_extension(parser, syntax_extension);
74 | }
75 |
76 | cmark_parser_feed(parser, markdown, markdown_size);
77 | cmark_node *doc = cmark_parser_finish(parser);
78 |
79 | free(cmark_render_html(doc, fuzz_config.options, NULL));
80 | free(cmark_render_xml(doc, fuzz_config.options));
81 | free(cmark_render_man(doc, fuzz_config.options, 80));
82 | free(cmark_render_commonmark(doc, fuzz_config.options, 80));
83 | free(cmark_render_plaintext(doc, fuzz_config.options, 80));
84 | free(cmark_render_latex(doc, fuzz_config.options, 80));
85 |
86 | cmark_node_free(doc);
87 | cmark_parser_free(parser);
88 | }
89 | }
90 | return 0;
91 | }
92 |
--------------------------------------------------------------------------------
/src/map.c:
--------------------------------------------------------------------------------
1 | #include "map.h"
2 | #include "utf8.h"
3 | #include "parser.h"
4 |
5 | // normalize map label: collapse internal whitespace to single space,
6 | // remove leading/trailing whitespace, case fold
7 | // Return NULL if the label is actually empty (i.e. composed solely from
8 | // whitespace)
9 | unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref) {
10 | cmark_strbuf normalized = CMARK_BUF_INIT(mem);
11 | unsigned char *result;
12 |
13 | if (ref == NULL)
14 | return NULL;
15 |
16 | if (ref->len == 0)
17 | return NULL;
18 |
19 | cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
20 | cmark_strbuf_trim(&normalized);
21 | cmark_strbuf_normalize_whitespace(&normalized);
22 |
23 | result = cmark_strbuf_detach(&normalized);
24 | assert(result);
25 |
26 | if (result[0] == '\0') {
27 | mem->free(result);
28 | return NULL;
29 | }
30 |
31 | return result;
32 | }
33 |
34 | static int
35 | labelcmp(const unsigned char *a, const unsigned char *b) {
36 | return strcmp((const char *)a, (const char *)b);
37 | }
38 |
39 | static int
40 | refcmp(const void *p1, const void *p2) {
41 | cmark_map_entry *r1 = *(cmark_map_entry **)p1;
42 | cmark_map_entry *r2 = *(cmark_map_entry **)p2;
43 | int res = labelcmp(r1->label, r2->label);
44 | return res ? res : ((int)r1->age - (int)r2->age);
45 | }
46 |
47 | static int
48 | refsearch(const void *label, const void *p2) {
49 | cmark_map_entry *ref = *(cmark_map_entry **)p2;
50 | return labelcmp((const unsigned char *)label, ref->label);
51 | }
52 |
53 | static void sort_map(cmark_map *map) {
54 | size_t i = 0, last = 0, size = map->size;
55 | cmark_map_entry *r = map->refs, **sorted = NULL;
56 |
57 | sorted = (cmark_map_entry **)map->mem->calloc(size, sizeof(cmark_map_entry *));
58 | while (r) {
59 | sorted[i++] = r;
60 | r = r->next;
61 | }
62 |
63 | qsort(sorted, size, sizeof(cmark_map_entry *), refcmp);
64 |
65 | for (i = 1; i < size; i++) {
66 | if (labelcmp(sorted[i]->label, sorted[last]->label) != 0)
67 | sorted[++last] = sorted[i];
68 | }
69 |
70 | map->sorted = sorted;
71 | map->size = last + 1;
72 | }
73 |
74 | cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label) {
75 | cmark_map_entry **ref = NULL;
76 | cmark_map_entry *r = NULL;
77 | unsigned char *norm;
78 |
79 | if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
80 | return NULL;
81 |
82 | if (map == NULL || !map->size)
83 | return NULL;
84 |
85 | norm = normalize_map_label(map->mem, label);
86 | if (norm == NULL)
87 | return NULL;
88 |
89 | if (!map->sorted)
90 | sort_map(map);
91 |
92 | ref = (cmark_map_entry **)bsearch(norm, map->sorted, map->size, sizeof(cmark_map_entry *), refsearch);
93 | map->mem->free(norm);
94 |
95 | if (ref != NULL) {
96 | r = ref[0];
97 | /* Check for expansion limit */
98 | if (r->size > map->max_ref_size - map->ref_size)
99 | return NULL;
100 | map->ref_size += r->size;
101 | }
102 |
103 | return r;
104 | }
105 |
106 | void cmark_map_free(cmark_map *map) {
107 | cmark_map_entry *ref;
108 |
109 | if (map == NULL)
110 | return;
111 |
112 | ref = map->refs;
113 | while (ref) {
114 | cmark_map_entry *next = ref->next;
115 | map->free(map, ref);
116 | ref = next;
117 | }
118 |
119 | map->mem->free(map->sorted);
120 | map->mem->free(map);
121 | }
122 |
123 | cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free) {
124 | cmark_map *map = (cmark_map *)mem->calloc(1, sizeof(cmark_map));
125 | map->mem = mem;
126 | map->free = free;
127 | map->max_ref_size = UINT_MAX;
128 | return map;
129 | }
130 |
--------------------------------------------------------------------------------
/src/scanners.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_SCANNERS_H
2 | #define CMARK_SCANNERS_H
3 |
4 | #include "cmark-gfm.h"
5 | #include "chunk.h"
6 |
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
12 | bufsize_t offset);
13 | bufsize_t _scan_scheme(const unsigned char *p);
14 | bufsize_t _scan_autolink_uri(const unsigned char *p);
15 | bufsize_t _scan_autolink_email(const unsigned char *p);
16 | bufsize_t _scan_html_tag(const unsigned char *p);
17 | bufsize_t _scan_liberal_html_tag(const unsigned char *p);
18 | bufsize_t _scan_html_comment(const unsigned char *p);
19 | bufsize_t _scan_html_pi(const unsigned char *p);
20 | bufsize_t _scan_html_declaration(const unsigned char *p);
21 | bufsize_t _scan_html_cdata(const unsigned char *p);
22 | bufsize_t _scan_html_block_start(const unsigned char *p);
23 | bufsize_t _scan_html_block_start_7(const unsigned char *p);
24 | bufsize_t _scan_html_block_end_1(const unsigned char *p);
25 | bufsize_t _scan_html_block_end_2(const unsigned char *p);
26 | bufsize_t _scan_html_block_end_3(const unsigned char *p);
27 | bufsize_t _scan_html_block_end_4(const unsigned char *p);
28 | bufsize_t _scan_html_block_end_5(const unsigned char *p);
29 | bufsize_t _scan_link_title(const unsigned char *p);
30 | bufsize_t _scan_spacechars(const unsigned char *p);
31 | bufsize_t _scan_atx_heading_start(const unsigned char *p);
32 | bufsize_t _scan_setext_heading_line(const unsigned char *p);
33 | bufsize_t _scan_open_code_fence(const unsigned char *p);
34 | bufsize_t _scan_close_code_fence(const unsigned char *p);
35 | bufsize_t _scan_entity(const unsigned char *p);
36 | bufsize_t _scan_dangerous_url(const unsigned char *p);
37 | bufsize_t _scan_footnote_definition(const unsigned char *p);
38 |
39 | #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
40 | #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
41 | #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
42 | #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n)
43 | #define scan_liberal_html_tag(c, n) _scan_at(&_scan_liberal_html_tag, c, n)
44 | #define scan_html_comment(c, n) _scan_at(&_scan_html_comment, c, n)
45 | #define scan_html_pi(c, n) _scan_at(&_scan_html_pi, c, n)
46 | #define scan_html_declaration(c, n) _scan_at(&_scan_html_declaration, c, n)
47 | #define scan_html_cdata(c, n) _scan_at(&_scan_html_cdata, c, n)
48 | #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n)
49 | #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n)
50 | #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n)
51 | #define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n)
52 | #define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n)
53 | #define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n)
54 | #define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n)
55 | #define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
56 | #define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
57 | #define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
58 | #define scan_setext_heading_line(c, n) \
59 | _scan_at(&_scan_setext_heading_line, c, n)
60 | #define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
61 | #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
62 | #define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
63 | #define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n)
64 | #define scan_footnote_definition(c, n) _scan_at(&_scan_footnote_definition, c, n)
65 |
66 | #ifdef __cplusplus
67 | }
68 | #endif
69 |
70 | #endif
71 |
--------------------------------------------------------------------------------
/bench/samples/lorem1.md:
--------------------------------------------------------------------------------
1 | Lorem ipsum dolor sit amet, __consectetur__ adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
2 |
3 | Vivamus sagittis, diam in [vehicula](https://github.com/markdown-it/markdown-it) lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
4 |
5 | Nullam ut tincidunt nunc. [Pellentesque][1] metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
6 |
7 | Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
8 |
9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
10 |
11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique.
12 |
13 | [1]: https://github.com/markdown-it
14 |
--------------------------------------------------------------------------------
/wrappers/wrapper_ext.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | #
4 | # Example for using the shared library from python.
5 | # Will work with either python 2 or python 3.
6 | # Requires cmark-gfm and cmark-gfm-extensions libraries to be installed.
7 | #
8 | # This particular example uses the GitHub extensions from the gfm-extensions
9 | # library. EXTENSIONS specifies which to use, and the sample shows how to
10 | # connect them into a parser.
11 | #
12 |
13 | import sys
14 | import ctypes
15 |
16 | if sys.platform == 'darwin':
17 | libname = 'libcmark-gfm.dylib'
18 | extname = 'libcmark-gfm-extensions.dylib'
19 | elif sys.platform == 'win32':
20 | libname = 'cmark-gfm.dll'
21 | extname = 'cmark-gfm-extensions.dll'
22 | else:
23 | libname = 'libcmark-gfm.so'
24 | extname = 'libcmark-gfm-extensions.so'
25 | cmark = ctypes.CDLL(libname)
26 | cmark_ext = ctypes.CDLL(extname)
27 |
28 | # Options for the GFM rendering call
29 | OPTS = 0 # defaults
30 |
31 | # The GFM extensions that we want to use
32 | EXTENSIONS = (
33 | 'autolink',
34 | 'table',
35 | 'strikethrough',
36 | 'tagfilter',
37 | )
38 |
39 | # Use ctypes to access the functions in libcmark-gfm
40 |
41 | F_cmark_parser_new = cmark.cmark_parser_new
42 | F_cmark_parser_new.restype = ctypes.c_void_p
43 | F_cmark_parser_new.argtypes = (ctypes.c_int,)
44 |
45 | F_cmark_parser_feed = cmark.cmark_parser_feed
46 | F_cmark_parser_feed.restype = None
47 | F_cmark_parser_feed.argtypes = (ctypes.c_void_p, ctypes.c_char_p, ctypes.c_size_t)
48 |
49 | F_cmark_parser_finish = cmark.cmark_parser_finish
50 | F_cmark_parser_finish.restype = ctypes.c_void_p
51 | F_cmark_parser_finish.argtypes = (ctypes.c_void_p,)
52 |
53 | F_cmark_parser_attach_syntax_extension = cmark.cmark_parser_attach_syntax_extension
54 | F_cmark_parser_attach_syntax_extension.restype = ctypes.c_int
55 | F_cmark_parser_attach_syntax_extension.argtypes = (ctypes.c_void_p, ctypes.c_void_p)
56 |
57 | F_cmark_parser_get_syntax_extensions = cmark.cmark_parser_get_syntax_extensions
58 | F_cmark_parser_get_syntax_extensions.restype = ctypes.c_void_p
59 | F_cmark_parser_get_syntax_extensions.argtypes = (ctypes.c_void_p,)
60 |
61 | F_cmark_parser_free = cmark.cmark_parser_free
62 | F_cmark_parser_free.restype = None
63 | F_cmark_parser_free.argtypes = (ctypes.c_void_p,)
64 |
65 | F_cmark_node_free = cmark.cmark_node_free
66 | F_cmark_node_free.restype = None
67 | F_cmark_node_free.argtypes = (ctypes.c_void_p,)
68 |
69 | F_cmark_find_syntax_extension = cmark.cmark_find_syntax_extension
70 | F_cmark_find_syntax_extension.restype = ctypes.c_void_p
71 | F_cmark_find_syntax_extension.argtypes = (ctypes.c_char_p,)
72 |
73 | F_cmark_render_html = cmark.cmark_render_html
74 | F_cmark_render_html.restype = ctypes.c_char_p
75 | F_cmark_render_html.argtypes = (ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p)
76 |
77 |
78 | # Set up the libcmark-gfm library and its extensions
79 | F_register = cmark_ext.cmark_gfm_core_extensions_ensure_registered
80 | F_register.restype = None
81 | F_register.argtypes = ( )
82 | F_register()
83 |
84 |
85 | def md2html(text):
86 | "Use cmark-gfm to render the Markdown into an HTML fragment."
87 |
88 | parser = F_cmark_parser_new(OPTS)
89 | assert parser
90 | for name in EXTENSIONS:
91 | ext = F_cmark_find_syntax_extension(name)
92 | assert ext
93 | rv = F_cmark_parser_attach_syntax_extension(parser, ext)
94 | assert rv
95 | exts = F_cmark_parser_get_syntax_extensions(parser)
96 |
97 | F_cmark_parser_feed(parser, text, len(text))
98 | doc = F_cmark_parser_finish(parser)
99 | assert doc
100 |
101 | output = F_cmark_render_html(doc, OPTS, exts)
102 |
103 | F_cmark_parser_free(parser)
104 | F_cmark_node_free(doc)
105 |
106 | return output
107 |
108 |
109 | sys.stdout.write(md2html(sys.stdin.read()))
110 |
--------------------------------------------------------------------------------
/extensions/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(LIBRARY "libcmark-gfm-extensions")
2 | set(STATICLIBRARY "libcmark-gfm-extensions_static")
3 | set(LIBRARY_SOURCES
4 | core-extensions.c
5 | table.c
6 | strikethrough.c
7 | autolink.c
8 | tagfilter.c
9 | ext_scanners.c
10 | ext_scanners.re
11 | ext_scanners.h
12 | tasklist.c
13 | )
14 |
15 | include_directories(
16 | ${PROJECT_SOURCE_DIR}/src
17 | ${PROJECT_BINARY_DIR}/src
18 | )
19 |
20 | include_directories(. ${CMAKE_CURRENT_BINARY_DIR})
21 |
22 | set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
23 | set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg")
24 |
25 | if (CMARK_SHARED)
26 | add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES})
27 |
28 | set_target_properties(${LIBRARY} PROPERTIES
29 | OUTPUT_NAME "cmark-gfm-extensions"
30 | DEFINE_SYMBOL "cmark-gfm"
31 | SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM}
32 | VERSION ${PROJECT_VERSION})
33 |
34 | set_property(TARGET ${LIBRARY}
35 | APPEND PROPERTY MACOSX_RPATH true)
36 |
37 | # Avoid name clash between PROGRAM and LIBRARY pdb files.
38 | set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm-extensions_dll)
39 |
40 | list(APPEND CMARK_INSTALL ${LIBRARY})
41 | target_link_libraries(${LIBRARY} libcmark-gfm)
42 |
43 | endif()
44 |
45 | if (CMARK_STATIC)
46 | add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES})
47 |
48 | set_target_properties(${STATICLIBRARY} PROPERTIES
49 | COMPILE_FLAGS "-DCMARK_GFM_STATIC_DEFINE -DCMARK_GFM_EXTENSIONS_STATIC_DEFINE"
50 | DEFINE_SYMBOL "cmark-gfm"
51 | POSITION_INDEPENDENT_CODE ON)
52 |
53 | if (MSVC)
54 | set_target_properties(${STATICLIBRARY} PROPERTIES
55 | OUTPUT_NAME "cmark-gfm-extensions_static"
56 | VERSION ${PROJECT_VERSION})
57 | else()
58 | set_target_properties(${STATICLIBRARY} PROPERTIES
59 | OUTPUT_NAME "cmark-gfm-extensions"
60 | VERSION ${PROJECT_VERSION})
61 | endif(MSVC)
62 |
63 | list(APPEND CMARK_INSTALL ${STATICLIBRARY})
64 | endif()
65 |
66 | set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
67 |
68 | include (InstallRequiredSystemLibraries)
69 | install(TARGETS ${CMARK_INSTALL}
70 | EXPORT cmark-gfm-extensions
71 | RUNTIME DESTINATION bin
72 | LIBRARY DESTINATION lib${LIB_SUFFIX}
73 | ARCHIVE DESTINATION lib${LIB_SUFFIX}
74 | )
75 |
76 | if (CMARK_SHARED OR CMARK_STATIC)
77 | install(FILES
78 | cmark-gfm-core-extensions.h
79 | DESTINATION include
80 | )
81 |
82 | install(EXPORT cmark-gfm-extensions DESTINATION lib${LIB_SUFFIX}/cmake-gfm-extensions)
83 | endif()
84 |
85 | # Feature tests
86 | include(CheckIncludeFile)
87 | include(CheckCSourceCompiles)
88 | include(CheckCSourceRuns)
89 | include(CheckSymbolExists)
90 | CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H)
91 | CHECK_C_SOURCE_COMPILES(
92 | "int main() { __builtin_expect(0,0); return 0; }"
93 | HAVE___BUILTIN_EXPECT)
94 | CHECK_C_SOURCE_COMPILES("
95 | int f(void) __attribute__ (());
96 | int main() { return 0; }
97 | " HAVE___ATTRIBUTE__)
98 |
99 | # Always compile with warnings
100 | if(MSVC)
101 | # Force to always compile with W4
102 | if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
103 | string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
104 | else()
105 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
106 | endif()
107 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX /wd4706 /wd4204 /wd4221 /wd4100 /D_CRT_SECURE_NO_WARNINGS")
108 | elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
109 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic")
110 | endif()
111 |
112 | # Compile as C++ under MSVC older than 12.0
113 | if(MSVC AND MSVC_VERSION LESS 1800)
114 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP")
115 | endif()
116 |
117 | if(CMAKE_BUILD_TYPE STREQUAL "Ubsan")
118 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
119 | endif()
120 |
--------------------------------------------------------------------------------
/src/chunk.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_CHUNK_H
2 | #define CMARK_CHUNK_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include "cmark-gfm.h"
8 | #include "buffer.h"
9 | #include "cmark_ctype.h"
10 |
11 | #define CMARK_CHUNK_EMPTY \
12 | { NULL, 0, 0 }
13 |
14 | typedef struct cmark_chunk {
15 | unsigned char *data;
16 | bufsize_t len;
17 | bufsize_t alloc; // also implies a NULL-terminated string
18 | } cmark_chunk;
19 |
20 | static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
21 | if (c->alloc)
22 | mem->free(c->data);
23 |
24 | c->data = NULL;
25 | c->alloc = 0;
26 | c->len = 0;
27 | }
28 |
29 | static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) {
30 | assert(!c->alloc);
31 |
32 | while (c->len && cmark_isspace(c->data[0])) {
33 | c->data++;
34 | c->len--;
35 | }
36 | }
37 |
38 | static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) {
39 | assert(!c->alloc);
40 |
41 | while (c->len > 0) {
42 | if (!cmark_isspace(c->data[c->len - 1]))
43 | break;
44 |
45 | c->len--;
46 | }
47 | }
48 |
49 | static CMARK_INLINE void cmark_chunk_trim(cmark_chunk *c) {
50 | cmark_chunk_ltrim(c);
51 | cmark_chunk_rtrim(c);
52 | }
53 |
54 | static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
55 | bufsize_t offset) {
56 | const unsigned char *p =
57 | (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
58 | return p ? (bufsize_t)(p - ch->data) : ch->len;
59 | }
60 |
61 | static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem,
62 | cmark_chunk *c) {
63 | unsigned char *str;
64 |
65 | if (c->alloc) {
66 | return (char *)c->data;
67 | }
68 | str = (unsigned char *)mem->calloc(c->len + 1, 1);
69 | if (c->len > 0) {
70 | memcpy(str, c->data, c->len);
71 | }
72 | str[c->len] = 0;
73 | c->data = str;
74 | c->alloc = 1;
75 |
76 | return (char *)str;
77 | }
78 |
79 | static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c,
80 | const char *str) {
81 | unsigned char *old = c->alloc ? c->data : NULL;
82 | if (str == NULL) {
83 | c->len = 0;
84 | c->data = NULL;
85 | c->alloc = 0;
86 | } else {
87 | c->len = (bufsize_t)strlen(str);
88 | c->data = (unsigned char *)mem->calloc(c->len + 1, 1);
89 | c->alloc = 1;
90 | memcpy(c->data, str, c->len + 1);
91 | }
92 | if (old != NULL) {
93 | mem->free(old);
94 | }
95 | }
96 |
97 | static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) {
98 | bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
99 | cmark_chunk c = {(unsigned char *)data, len, 0};
100 | return c;
101 | }
102 |
103 | static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch,
104 | bufsize_t pos, bufsize_t len) {
105 | cmark_chunk c = {ch->data + pos, len, 0};
106 | return c;
107 | }
108 |
109 | static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
110 | cmark_chunk c;
111 |
112 | c.len = buf->size;
113 | c.data = cmark_strbuf_detach(buf);
114 | c.alloc = 1;
115 |
116 | return c;
117 | }
118 |
119 | /* trim_new variants are to be used when the source chunk may or may not be
120 | * allocated; forces a newly allocated chunk. */
121 | static CMARK_INLINE cmark_chunk cmark_chunk_ltrim_new(cmark_mem *mem, cmark_chunk *c) {
122 | cmark_chunk r = cmark_chunk_dup(c, 0, c->len);
123 | cmark_chunk_ltrim(&r);
124 | cmark_chunk_to_cstr(mem, &r);
125 | return r;
126 | }
127 |
128 | static CMARK_INLINE cmark_chunk cmark_chunk_rtrim_new(cmark_mem *mem, cmark_chunk *c) {
129 | cmark_chunk r = cmark_chunk_dup(c, 0, c->len);
130 | cmark_chunk_rtrim(&r);
131 | cmark_chunk_to_cstr(mem, &r);
132 | return r;
133 | }
134 |
135 | #endif
136 |
--------------------------------------------------------------------------------
/src/houdini_html_u.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "buffer.h"
6 | #include "houdini.h"
7 | #include "utf8.h"
8 | #include "entities.inc"
9 |
10 | /* Binary tree lookup code for entities added by JGM */
11 |
12 | static const unsigned char *S_lookup(int i, int low, int hi,
13 | const unsigned char *s, int len) {
14 | int j;
15 | int cmp =
16 | strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
17 | if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
18 | return (const unsigned char *)cmark_entities[i].bytes;
19 | } else if (cmp <= 0 && i > low) {
20 | j = i - ((i - low) / 2);
21 | if (j == i)
22 | j -= 1;
23 | return S_lookup(j, low, i - 1, s, len);
24 | } else if (cmp > 0 && i < hi) {
25 | j = i + ((hi - i) / 2);
26 | if (j == i)
27 | j += 1;
28 | return S_lookup(j, i + 1, hi, s, len);
29 | } else {
30 | return NULL;
31 | }
32 | }
33 |
34 | static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
35 | return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
36 | }
37 |
38 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
39 | bufsize_t size) {
40 | bufsize_t i = 0;
41 |
42 | if (size >= 3 && src[0] == '#') {
43 | int codepoint = 0;
44 | int num_digits = 0;
45 |
46 | if (_isdigit(src[1])) {
47 | for (i = 1; i < size && _isdigit(src[i]); ++i) {
48 | codepoint = (codepoint * 10) + (src[i] - '0');
49 |
50 | if (codepoint >= 0x110000) {
51 | // Keep counting digits but
52 | // avoid integer overflow.
53 | codepoint = 0x110000;
54 | }
55 | }
56 |
57 | num_digits = i - 1;
58 | }
59 |
60 | else if (src[1] == 'x' || src[1] == 'X') {
61 | for (i = 2; i < size && _isxdigit(src[i]); ++i) {
62 | codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
63 |
64 | if (codepoint >= 0x110000) {
65 | // Keep counting digits but
66 | // avoid integer overflow.
67 | codepoint = 0x110000;
68 | }
69 | }
70 |
71 | num_digits = i - 2;
72 | }
73 |
74 | if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
75 | if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
76 | codepoint >= 0x110000) {
77 | codepoint = 0xFFFD;
78 | }
79 | cmark_utf8proc_encode_char(codepoint, ob);
80 | return i + 1;
81 | }
82 | }
83 |
84 | else {
85 | if (size > CMARK_ENTITY_MAX_LENGTH)
86 | size = CMARK_ENTITY_MAX_LENGTH;
87 |
88 | for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
89 | if (src[i] == ' ')
90 | break;
91 |
92 | if (src[i] == ';') {
93 | const unsigned char *entity = S_lookup_entity(src, i);
94 |
95 | if (entity != NULL) {
96 | cmark_strbuf_puts(ob, (const char *)entity);
97 | return i + 1;
98 | }
99 |
100 | break;
101 | }
102 | }
103 | }
104 |
105 | return 0;
106 | }
107 |
108 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
109 | bufsize_t size) {
110 | bufsize_t i = 0, org, ent;
111 |
112 | while (i < size) {
113 | org = i;
114 | while (i < size && src[i] != '&')
115 | i++;
116 |
117 | if (likely(i > org)) {
118 | if (unlikely(org == 0)) {
119 | if (i >= size)
120 | return 0;
121 |
122 | cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
123 | }
124 |
125 | cmark_strbuf_put(ob, src + org, i - org);
126 | }
127 |
128 | /* escaping */
129 | if (i >= size)
130 | break;
131 |
132 | i++;
133 |
134 | ent = houdini_unescape_ent(ob, src + i, size - i);
135 | i += ent;
136 |
137 | /* not really an entity */
138 | if (ent == 0)
139 | cmark_strbuf_putc(ob, '&');
140 | }
141 |
142 | return 1;
143 | }
144 |
145 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
146 | bufsize_t size) {
147 | if (!houdini_unescape_html(ob, src, size))
148 | cmark_strbuf_put(ob, src, size);
149 | }
150 |
--------------------------------------------------------------------------------
/fuzz/fuzz_quadratic_brackets.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-core-extensions.h"
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | const char *extension_names[] = {
12 | "autolink",
13 | "strikethrough",
14 | "table",
15 | "tagfilter",
16 | NULL,
17 | };
18 |
19 | int LLVMFuzzerInitialize(int *argc, char ***argv) {
20 | cmark_gfm_core_extensions_ensure_registered();
21 | return 0;
22 | }
23 |
24 | int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
25 | struct __attribute__((packed)) {
26 | int options;
27 | int width;
28 | uint8_t startlen;
29 | uint8_t openlen;
30 | uint8_t middlelen;
31 | uint8_t closelen;
32 | } fuzz_config;
33 |
34 | if (size >= sizeof(fuzz_config)) {
35 | /* The beginning of `data` is treated as fuzzer configuration */
36 | memcpy(&fuzz_config, data, sizeof(fuzz_config));
37 |
38 | /* Test options that are used by GitHub. */
39 | fuzz_config.options = CMARK_OPT_UNSAFE | CMARK_OPT_FOOTNOTES | CMARK_OPT_GITHUB_PRE_LANG | CMARK_OPT_HARDBREAKS;
40 | fuzz_config.openlen = fuzz_config.openlen & 0x7;
41 | fuzz_config.middlelen = fuzz_config.middlelen & 0x7;
42 | fuzz_config.closelen = fuzz_config.closelen & 0x7;
43 |
44 | /* Remainder of input is the markdown */
45 | const char *markdown0 = (const char *)(data + sizeof(fuzz_config));
46 | const size_t markdown_size0 = size - sizeof(fuzz_config);
47 | char markdown[0x80000];
48 | if (markdown_size0 <= sizeof(markdown)) {
49 | size_t markdown_size = 0;
50 | const size_t componentslen = fuzz_config.startlen + fuzz_config.openlen + fuzz_config.middlelen + fuzz_config.closelen;
51 | if (componentslen <= markdown_size0) {
52 | size_t offset = 0;
53 | const size_t endlen = markdown_size0 - componentslen;
54 | memcpy(&markdown[markdown_size], &markdown0[offset], fuzz_config.startlen);
55 | markdown_size += fuzz_config.startlen;
56 | offset += fuzz_config.startlen;
57 |
58 | if (0 < fuzz_config.openlen) {
59 | while (markdown_size + fuzz_config.openlen <= sizeof(markdown)/2) {
60 | memcpy(&markdown[markdown_size], &markdown0[offset],
61 | fuzz_config.openlen);
62 | markdown_size += fuzz_config.openlen;
63 | }
64 | offset += fuzz_config.openlen;
65 | }
66 | memcpy(&markdown[markdown_size], &markdown0[offset],
67 | fuzz_config.middlelen);
68 | markdown_size += fuzz_config.middlelen;
69 | offset += fuzz_config.middlelen;
70 | if (0 < fuzz_config.closelen) {
71 | while (markdown_size + fuzz_config.closelen + endlen <= sizeof(markdown)) {
72 | memcpy(&markdown[markdown_size], &markdown0[offset],
73 | fuzz_config.closelen);
74 | markdown_size += fuzz_config.closelen;
75 | }
76 | offset += fuzz_config.closelen;
77 | }
78 | if (markdown_size + endlen <= sizeof(markdown)) {
79 | memcpy(&markdown[markdown_size], &markdown0[offset],
80 | endlen);
81 | markdown_size += endlen;
82 | }
83 | } else {
84 | markdown_size = markdown_size0;
85 | memcpy(markdown, markdown0, markdown_size);
86 | }
87 |
88 | cmark_parser *parser = cmark_parser_new(fuzz_config.options);
89 |
90 | for (const char **it = extension_names; *it; ++it) {
91 | const char *extension_name = *it;
92 | cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
93 | if (!syntax_extension) {
94 | fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
95 | abort();
96 | }
97 | cmark_parser_attach_syntax_extension(parser, syntax_extension);
98 | }
99 |
100 | cmark_parser_feed(parser, markdown, markdown_size);
101 | cmark_node *doc = cmark_parser_finish(parser);
102 |
103 | free(cmark_render_html(doc, fuzz_config.options, NULL));
104 |
105 | cmark_node_free(doc);
106 | cmark_parser_free(parser);
107 | }
108 | }
109 | return 0;
110 | }
111 |
--------------------------------------------------------------------------------
/test/cmark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from ctypes import CDLL, c_char_p, c_size_t, c_int, c_void_p
5 | from subprocess import *
6 | import platform
7 | import os
8 |
9 | def pipe_through_prog(prog, text):
10 | p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
11 | [result, err] = p1.communicate(input=text.encode('utf-8'))
12 | return [p1.returncode, result.decode('utf-8'), err]
13 |
14 | def parse(lib, extlib, text, extensions):
15 | cmark_gfm_core_extensions_ensure_registered = extlib.cmark_gfm_core_extensions_ensure_registered
16 |
17 | find_syntax_extension = lib.cmark_find_syntax_extension
18 | find_syntax_extension.restype = c_void_p
19 | find_syntax_extension.argtypes = [c_char_p]
20 |
21 | parser_attach_syntax_extension = lib.cmark_parser_attach_syntax_extension
22 | parser_attach_syntax_extension.argtypes = [c_void_p, c_void_p]
23 |
24 | parser_new = lib.cmark_parser_new
25 | parser_new.restype = c_void_p
26 | parser_new.argtypes = [c_int]
27 |
28 | parser_feed = lib.cmark_parser_feed
29 | parser_feed.argtypes = [c_void_p, c_char_p, c_int]
30 |
31 | parser_finish = lib.cmark_parser_finish
32 | parser_finish.restype = c_void_p
33 | parser_finish.argtypes = [c_void_p]
34 |
35 | cmark_gfm_core_extensions_ensure_registered()
36 |
37 | parser = parser_new(0)
38 | for e in set(extensions):
39 | ext = find_syntax_extension(bytes(e, 'utf-8'))
40 | if not ext:
41 | raise Exception("Extension not found: '{}'".format(e))
42 | parser_attach_syntax_extension(parser, ext)
43 |
44 | textbytes = text.encode('utf-8')
45 | textlen = len(textbytes)
46 | parser_feed(parser, textbytes, textlen)
47 |
48 | return [parser_finish(parser), parser]
49 |
50 | def to_html(lib, extlib, text, extensions):
51 | document, parser = parse(lib, extlib, text, extensions)
52 | parser_get_syntax_extensions = lib.cmark_parser_get_syntax_extensions
53 | parser_get_syntax_extensions.restype = c_void_p
54 | parser_get_syntax_extensions.argtypes = [c_void_p]
55 | syntax_extensions = parser_get_syntax_extensions(parser)
56 |
57 | render_html = lib.cmark_render_html
58 | render_html.restype = c_char_p
59 | render_html.argtypes = [c_void_p, c_int, c_void_p]
60 | # 1 << 17 == CMARK_OPT_UNSAFE
61 | result = render_html(document, 1 << 17, syntax_extensions).decode('utf-8')
62 | return [0, result, '']
63 |
64 | def to_commonmark(lib, extlib, text, extensions):
65 | document, _ = parse(lib, extlib, text, extensions)
66 |
67 | render_commonmark = lib.cmark_render_commonmark
68 | render_commonmark.restype = c_char_p
69 | render_commonmark.argtypes = [c_void_p, c_int, c_int]
70 | result = render_commonmark(document, 0, 0).decode('utf-8')
71 | return [0, result, '']
72 |
73 | class CMark:
74 | def __init__(self, prog=None, library_dir=None, extensions=None):
75 | self.prog = prog
76 | self.extensions = []
77 | if extensions:
78 | self.extensions = extensions.split()
79 |
80 | if prog:
81 | prog += ' --unsafe'
82 | extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)])
83 | self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x)
84 | self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x)
85 | else:
86 | sysname = platform.system()
87 | if sysname == 'Darwin':
88 | libnames = [ ["lib", ".dylib" ] ]
89 | elif sysname == 'Windows':
90 | libnames = [ ["", ".dll"], ["lib", ".dll"] ]
91 | else:
92 | libnames = [ ["lib", ".so"] ]
93 | if not library_dir:
94 | library_dir = os.path.join("..", "build", "src")
95 | for prefix, suffix in libnames:
96 | candidate = os.path.join(library_dir, prefix + "cmark-gfm" + suffix)
97 | if os.path.isfile(candidate):
98 | libpath = candidate
99 | break
100 | cmark = CDLL(libpath)
101 | extlib = CDLL(os.path.join(
102 | library_dir, "..", "extensions", prefix + "cmark-gfm-extensions" + suffix))
103 | self.to_html = lambda x, exts=[]: to_html(cmark, extlib, x, exts + self.extensions)
104 | self.to_commonmark = lambda x, exts=[]: to_commonmark(cmark, extlib, x, exts + self.extensions)
105 |
106 |
--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # To get verbose output: cmake --build build --target "test" -- ARGS='-V'
2 |
3 | # By default, we run the spec tests only if python3 is available.
4 | # To require the spec tests, compile with -DSPEC_TESTS=1
5 |
6 | if (SPEC_TESTS)
7 | find_package(PythonInterp 3 REQUIRED)
8 | else(SPEC_TESTS)
9 | find_package(PythonInterp 3)
10 | endif(SPEC_TESTS)
11 |
12 | if (CMARK_SHARED OR CMARK_STATIC)
13 | add_test(NAME api_test COMMAND api_test)
14 | endif()
15 |
16 | if (WIN32)
17 | file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_SRC_DLL_DIR)
18 | file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/extensions WIN_EXTENSIONS_DLL_DIR)
19 | set(NEWPATH "${WIN_SRC_DLL_DIR};${WIN_EXTENSIONS_DLL_DIR};$ENV{PATH}")
20 | string(REPLACE ";" "\\;" NEWPATH "${NEWPATH}")
21 | set_tests_properties(api_test PROPERTIES ENVIRONMENT "PATH=${NEWPATH}")
22 | set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat")
23 | else(WIN32)
24 | set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.sh")
25 | endif(WIN32)
26 |
27 | IF (PYTHONINTERP_FOUND)
28 |
29 | add_test(html_normalization
30 | ${PYTHON_EXECUTABLE} "-m" "doctest"
31 | "${CMAKE_CURRENT_SOURCE_DIR}/normalize.py"
32 | )
33 |
34 | if (CMARK_SHARED)
35 | add_test(spectest_library
36 | ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec"
37 | "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
38 | )
39 |
40 | add_test(pathological_tests_library
41 | ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py"
42 | "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
43 | )
44 |
45 | add_test(roundtriptest_library
46 | ${PYTHON_EXECUTABLE}
47 | "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
48 | "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
49 | "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
50 | )
51 |
52 | add_test(entity_library
53 | ${PYTHON_EXECUTABLE}
54 | "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py"
55 | "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
56 | )
57 | endif()
58 |
59 | add_test(spectest_executable
60 | ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm"
61 | )
62 |
63 | add_test(smartpuncttest_executable
64 | ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --smart"
65 | )
66 |
67 | add_test(extensions_executable
68 | ${PYTHON_EXECUTABLE}
69 | "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
70 | "--no-normalize"
71 | "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt"
72 | "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm"
73 | "--extensions" "table strikethrough autolink tagfilter footnotes tasklist"
74 | )
75 |
76 | add_test(roundtrip_extensions_executable
77 | ${PYTHON_EXECUTABLE}
78 | "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
79 | "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt"
80 | "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm"
81 | "--extensions" "table strikethrough autolink tagfilter footnotes tasklist"
82 | )
83 |
84 | add_test(option_table_prefer_style_attributes
85 | ${PYTHON_EXECUTABLE}
86 | "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
87 | "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions-table-prefer-style-attributes.txt"
88 | "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --table-prefer-style-attributes"
89 | "--extensions" "table strikethrough autolink tagfilter footnotes tasklist"
90 | )
91 |
92 | add_test(option_full_info_string
93 | ${PYTHON_EXECUTABLE}
94 | "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
95 | "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/extensions-full-info-string.txt"
96 | "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm --full-info-string"
97 | )
98 |
99 | add_test(regressiontest_executable
100 | ${PYTHON_EXECUTABLE}
101 | "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec"
102 | "${CMAKE_CURRENT_SOURCE_DIR}/regression.txt" "--program"
103 | "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark-gfm"
104 | )
105 |
106 |
107 | ELSE(PYTHONINTERP_FOUND)
108 |
109 | message("\n*** A python 3 interpreter is required to run the spec tests.\n")
110 | add_test(skipping_spectests
111 | echo "Skipping spec tests, because no python 3 interpreter is available.")
112 |
113 | ENDIF(PYTHONINTERP_FOUND)
114 |
115 |
--------------------------------------------------------------------------------
/src/node.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_NODE_H
2 | #define CMARK_NODE_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | #include
9 | #include
10 |
11 | #include "cmark-gfm.h"
12 | #include "cmark-gfm-extension_api.h"
13 | #include "buffer.h"
14 | #include "chunk.h"
15 |
16 | typedef struct {
17 | cmark_list_type list_type;
18 | int marker_offset;
19 | int padding;
20 | int start;
21 | cmark_delim_type delimiter;
22 | unsigned char bullet_char;
23 | bool tight;
24 | bool checked; // For task list extension
25 | } cmark_list;
26 |
27 | typedef struct {
28 | cmark_chunk info;
29 | cmark_chunk literal;
30 | uint8_t fence_length;
31 | uint8_t fence_offset;
32 | unsigned char fence_char;
33 | int8_t fenced;
34 | } cmark_code;
35 |
36 | typedef struct {
37 | int level;
38 | bool setext;
39 | } cmark_heading;
40 |
41 | typedef struct {
42 | cmark_chunk url;
43 | cmark_chunk title;
44 | } cmark_link;
45 |
46 | typedef struct {
47 | cmark_chunk on_enter;
48 | cmark_chunk on_exit;
49 | } cmark_custom;
50 |
51 | enum cmark_node__internal_flags {
52 | CMARK_NODE__OPEN = (1 << 0),
53 | CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
54 | CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
55 |
56 | // Extensions can register custom flags by calling `cmark_register_node_flag`.
57 | // This is the starting value for the custom flags.
58 | CMARK_NODE__REGISTER_FIRST = (1 << 3),
59 | };
60 |
61 | typedef uint16_t cmark_node_internal_flags;
62 |
63 | struct cmark_node {
64 | cmark_strbuf content;
65 |
66 | struct cmark_node *next;
67 | struct cmark_node *prev;
68 | struct cmark_node *parent;
69 | struct cmark_node *first_child;
70 | struct cmark_node *last_child;
71 |
72 | void *user_data;
73 | cmark_free_func user_data_free_func;
74 |
75 | int start_line;
76 | int start_column;
77 | int end_line;
78 | int end_column;
79 | int internal_offset;
80 | uint16_t type;
81 | cmark_node_internal_flags flags;
82 |
83 | cmark_syntax_extension *extension;
84 |
85 | /**
86 | * Used during cmark_render() to cache the most recent non-NULL
87 | * extension, if you go up the parent chain like this:
88 | *
89 | * node->parent->...parent->extension
90 | */
91 | cmark_syntax_extension *ancestor_extension;
92 |
93 | union {
94 | int ref_ix;
95 | int def_count;
96 | } footnote;
97 |
98 | cmark_node *parent_footnote_def;
99 |
100 | union {
101 | cmark_chunk literal;
102 | cmark_list list;
103 | cmark_code code;
104 | cmark_heading heading;
105 | cmark_link link;
106 | cmark_custom custom;
107 | int html_block_type;
108 | int cell_index; // For keeping track of TABLE_CELL table alignments
109 | void *opaque;
110 | } as;
111 | };
112 |
113 | /**
114 | * Syntax extensions can use this function to register a custom node
115 | * flag. The flags are stored in the `flags` field of the `cmark_node`
116 | * struct. The `flags` parameter should be the address of a global variable
117 | * which will store the flag value.
118 | */
119 | CMARK_GFM_EXPORT
120 | void cmark_register_node_flag(cmark_node_internal_flags *flags);
121 |
122 | /**
123 | * DEPRECATED.
124 | *
125 | * This function was added in cmark-gfm version 0.29.0.gfm.7, and was
126 | * required to be called at program start time, which caused
127 | * backwards-compatibility issues in applications that use cmark-gfm as a
128 | * library. It is now a no-op.
129 | */
130 | CMARK_GFM_EXPORT
131 | void cmark_init_standard_node_flags(void);
132 |
133 | static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) {
134 | return node->content.mem;
135 | }
136 | CMARK_GFM_EXPORT int cmark_node_check(cmark_node *node, FILE *out);
137 |
138 | static CMARK_INLINE bool CMARK_NODE_TYPE_BLOCK_P(cmark_node_type node_type) {
139 | return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_BLOCK;
140 | }
141 |
142 | static CMARK_INLINE bool CMARK_NODE_BLOCK_P(cmark_node *node) {
143 | return node != NULL && CMARK_NODE_TYPE_BLOCK_P((cmark_node_type) node->type);
144 | }
145 |
146 | static CMARK_INLINE bool CMARK_NODE_TYPE_INLINE_P(cmark_node_type node_type) {
147 | return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_INLINE;
148 | }
149 |
150 | static CMARK_INLINE bool CMARK_NODE_INLINE_P(cmark_node *node) {
151 | return node != NULL && CMARK_NODE_TYPE_INLINE_P((cmark_node_type) node->type);
152 | }
153 |
154 | CMARK_GFM_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type);
155 |
156 | /**
157 | * Enable (or disable) extra safety checks. These extra checks cause
158 | * extra performance overhead (in some cases quadratic), so they are only
159 | * intended to be used during testing.
160 | */
161 | CMARK_GFM_EXPORT void cmark_enable_safety_checks(bool enable);
162 |
163 | #ifdef __cplusplus
164 | }
165 | #endif
166 |
167 | #endif
168 |
--------------------------------------------------------------------------------
/test/smart_punct.txt:
--------------------------------------------------------------------------------
1 | ## Smart punctuation
2 |
3 | Open quotes are matched with closed quotes.
4 | The same method is used for matching openers and closers
5 | as is used in emphasis parsing:
6 |
7 | ```````````````````````````````` example
8 | "Hello," said the spider.
9 | "'Shelob' is my name."
10 | .
11 |
“Hello,” said the spider.
12 | “‘Shelob’ is my name.”
13 | ````````````````````````````````
14 |
15 | ```````````````````````````````` example
16 | 'A', 'B', and 'C' are letters.
17 | .
18 |
‘A’, ‘B’, and ‘C’ are letters.
19 | ````````````````````````````````
20 |
21 | ```````````````````````````````` example
22 | 'Oak,' 'elm,' and 'beech' are names of trees.
23 | So is 'pine.'
24 | .
25 |
‘Oak,’ ‘elm,’ and ‘beech’ are names of trees.
26 | So is ‘pine.’
27 | ````````````````````````````````
28 |
29 | ```````````````````````````````` example
30 | 'He said, "I want to go."'
31 | .
32 |
‘He said, “I want to go.”’
33 | ````````````````````````````````
34 |
35 | A single quote that isn't an open quote matched
36 | with a close quote will be treated as an
37 | apostrophe:
38 |
39 | ```````````````````````````````` example
40 | Were you alive in the 70's?
41 | .
42 |
Were you alive in the 70’s?
43 | ````````````````````````````````
44 |
45 | ```````````````````````````````` example
46 | Here is some quoted '`code`' and a "[quoted link](url)".
47 | .
48 |
49 | ````````````````````````````````
50 |
51 | Here the first `'` is treated as an apostrophe, not
52 | an open quote, because the final single quote is matched
53 | by the single quote before `jolly`:
54 |
55 | ```````````````````````````````` example
56 | 'tis the season to be 'jolly'
57 | .
58 |
’tis the season to be ‘jolly’
59 | ````````````````````````````````
60 |
61 | Multiple apostrophes should not be marked as open/closing quotes.
62 |
63 | ```````````````````````````````` example
64 | 'We'll use Jane's boat and John's truck,' Jenna said.
65 | .
66 |
‘We’ll use Jane’s boat and John’s truck,’ Jenna said.
67 | ````````````````````````````````
68 |
69 | An unmatched double quote will be interpreted as a
70 | left double quote, to facilitate this style:
71 |
72 | ```````````````````````````````` example
73 | "A paragraph with no closing quote.
74 |
75 | "Second paragraph by same speaker, in fiction."
76 | .
77 |
“A paragraph with no closing quote.
78 |
“Second paragraph by same speaker, in fiction.”
79 | ````````````````````````````````
80 |
81 | A quote following a `]` or `)` character cannot
82 | be an open quote:
83 |
84 | ```````````````````````````````` example
85 | [a]'s b'
86 | .
87 |
[a]’s b’
88 | ````````````````````````````````
89 |
90 | Quotes that are escaped come out as literal straight
91 | quotes:
92 |
93 | ```````````````````````````````` example
94 | \"This is not smart.\"
95 | This isn\'t either.
96 | 5\'8\"
97 | .
98 |
"This is not smart."
99 | This isn't either.
100 | 5'8"
101 | ````````````````````````````````
102 |
103 | Two hyphens form an en-dash, three an em-dash.
104 |
105 | ```````````````````````````````` example
106 | Some dashes: em---em
107 | en--en
108 | em --- em
109 | en -- en
110 | 2--3
111 | .
112 |
Some dashes: em—em
113 | en–en
114 | em — em
115 | en – en
116 | 2–3
117 | ````````````````````````````````
118 |
119 | A sequence of more than three hyphens is
120 | parsed as a sequence of em and/or en dashes,
121 | with no hyphens. If possible, a homogeneous
122 | sequence of dashes is used (so, 10 hyphens
123 | = 5 en dashes, and 9 hyphens = 3 em dashes).
124 | When a heterogeneous sequence must be used,
125 | the em dashes come first, followed by the en
126 | dashes, and as few en dashes as possible are
127 | used (so, 7 hyphens = 2 em dashes an 1 en
128 | dash).
129 |
130 | ```````````````````````````````` example
131 | one-
132 | two--
133 | three---
134 | four----
135 | five-----
136 | six------
137 | seven-------
138 | eight--------
139 | nine---------
140 | thirteen-------------.
141 | .
142 |
152 | ````````````````````````````````
153 |
154 | Hyphens can be escaped:
155 |
156 | ```````````````````````````````` example
157 | Escaped hyphens: \-- \-\-\-.
158 | .
159 |
Escaped hyphens: -- ---.
160 | ````````````````````````````````
161 |
162 | Three periods form an ellipsis:
163 |
164 | ```````````````````````````````` example
165 | Ellipses...and...and....
166 | .
167 |
Ellipses…and…and….
168 | ````````````````````````````````
169 |
170 | Periods can be escaped if ellipsis-formation
171 | is not wanted:
172 |
173 | ```````````````````````````````` example
174 | No ellipses\.\.\.
175 | .
176 |