9 |
10 | #include "harness.h"
11 |
12 | test_batch_runner *test_batch_runner_new() {
13 | return (test_batch_runner *)calloc(1, sizeof(test_batch_runner));
14 | }
15 |
16 | static void test_result(test_batch_runner *runner, int cond, const char *msg,
17 | va_list ap) {
18 | ++runner->test_num;
19 |
20 | if (cond) {
21 | ++runner->num_passed;
22 | } else {
23 | fprintf(stderr, "FAILED test %d: ", runner->test_num);
24 | vfprintf(stderr, msg, ap);
25 | fprintf(stderr, "\n");
26 | ++runner->num_failed;
27 | }
28 | }
29 |
30 | void SKIP(test_batch_runner *runner, int num_tests) {
31 | runner->test_num += num_tests;
32 | runner->num_skipped += num_tests;
33 | }
34 |
35 | void OK(test_batch_runner *runner, int cond, const char *msg, ...) {
36 | va_list ap;
37 | va_start(ap, msg);
38 | test_result(runner, cond, msg, ap);
39 | va_end(ap);
40 | }
41 |
42 | void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg,
43 | ...) {
44 | int cond = got == expected;
45 |
46 | va_list ap;
47 | va_start(ap, msg);
48 | test_result(runner, cond, msg, ap);
49 | va_end(ap);
50 |
51 | if (!cond) {
52 | fprintf(stderr, " Got: %d\n", got);
53 | fprintf(stderr, " Expected: %d\n", expected);
54 | }
55 | }
56 |
57 | #ifndef _WIN32
58 | #include
59 |
60 | static char *write_tmp(char const *header, char const *data) {
61 | char *name = strdup("/tmp/fileXXXXXX");
62 | int fd = mkstemp(name);
63 | FILE *f = fdopen(fd, "w+");
64 | fputs(header, f);
65 | fwrite(data, 1, strlen(data), f);
66 | fclose(f);
67 | return name;
68 | }
69 |
70 | #endif
71 |
72 | void STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
73 | const char *msg, ...) {
74 | int cond = strcmp(got, expected) == 0;
75 |
76 | va_list ap;
77 | va_start(ap, msg);
78 | test_result(runner, cond, msg, ap);
79 | va_end(ap);
80 |
81 | if (!cond) {
82 | #ifndef _WIN32
83 | char *got_fn = write_tmp("actual\n", got);
84 | char *expected_fn = write_tmp("expected\n", expected);
85 | char buf[1024];
86 | snprintf(buf, sizeof(buf), "git diff --no-index %s %s", expected_fn, got_fn);
87 | system(buf);
88 | remove(got_fn);
89 | remove(expected_fn);
90 | free(got_fn);
91 | free(expected_fn);
92 | #else
93 | fprintf(stderr, " Got: \"%s\"\n", got);
94 | fprintf(stderr, " Expected: \"%s\"\n", expected);
95 | #endif
96 | }
97 | }
98 |
99 | int test_ok(test_batch_runner *runner) { return runner->num_failed == 0; }
100 |
101 | void test_print_summary(test_batch_runner *runner) {
102 | int num_passed = runner->num_passed;
103 | int num_skipped = runner->num_skipped;
104 | int num_failed = runner->num_failed;
105 |
106 | fprintf(stderr, "%d tests passed, %d failed, %d skipped\n", num_passed,
107 | num_failed, num_skipped);
108 |
109 | if (test_ok(runner)) {
110 | fprintf(stderr, "PASS\n");
111 | } else {
112 | fprintf(stderr, "FAIL\n");
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/api_test/harness.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_API_TEST_HARNESS_H
2 | #define CMARK_API_TEST_HARNESS_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | typedef struct {
9 | int test_num;
10 | int num_passed;
11 | int num_failed;
12 | int num_skipped;
13 | } test_batch_runner;
14 |
15 | test_batch_runner *test_batch_runner_new();
16 |
17 | void SKIP(test_batch_runner *runner, int num_tests);
18 |
19 | void OK(test_batch_runner *runner, int cond, const char *msg, ...);
20 |
21 | void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg,
22 | ...);
23 |
24 | void STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
25 | const char *msg, ...);
26 |
27 | int test_ok(test_batch_runner *runner);
28 |
29 | void test_print_summary(test_batch_runner *runner);
30 |
31 | #ifdef __cplusplus
32 | }
33 | #endif
34 |
35 | #endif
36 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | environment:
2 | PYTHON: "C:\\Python34-x64"
3 | PYTHON_VERSION: "3.4.3"
4 | PYTHON_ARCH: "64"
5 | matrix:
6 | - MSVC_VERSION: 10
7 | - MSVC_VERSION: 12
8 |
9 | # set up for nmake:
10 | install:
11 | - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
12 |
13 | build_script:
14 | - 'tools\appveyor-build.bat'
15 |
16 | artifacts:
17 | - path: build/src/cmark-gfm.exe
18 | name: cmark-gfm.exe
19 |
20 | test_script:
21 | - 'nmake test'
22 |
--------------------------------------------------------------------------------
/bench/samples/block-bq-flat.md:
--------------------------------------------------------------------------------
1 | > the simple example of a blockquote
2 | > the simple example of a blockquote
3 | > the simple example of a blockquote
4 | > the simple example of a blockquote
5 | ... continuation
6 | ... continuation
7 | ... continuation
8 | ... continuation
9 |
10 | empty blockquote:
11 |
12 | >
13 | >
14 | >
15 | >
16 |
17 |
--------------------------------------------------------------------------------
/bench/samples/block-bq-nested.md:
--------------------------------------------------------------------------------
1 | >>>>>> deeply nested blockquote
2 | >>>>> deeply nested blockquote
3 | >>>> deeply nested blockquote
4 | >>> deeply nested blockquote
5 | >> deeply nested blockquote
6 | > deeply nested blockquote
7 |
8 | > deeply nested blockquote
9 | >> deeply nested blockquote
10 | >>> deeply nested blockquote
11 | >>>> deeply nested blockquote
12 | >>>>> deeply nested blockquote
13 | >>>>>> deeply nested blockquote
14 |
--------------------------------------------------------------------------------
/bench/samples/block-code.md:
--------------------------------------------------------------------------------
1 |
2 | an
3 | example
4 |
5 | of
6 |
7 |
8 |
9 | a code
10 | block
11 |
12 |
--------------------------------------------------------------------------------
/bench/samples/block-fences.md:
--------------------------------------------------------------------------------
1 |
2 | ``````````text
3 | an
4 | example
5 | ```
6 | of
7 |
8 |
9 | a fenced
10 | ```
11 | code
12 | block
13 | ``````````
14 |
15 |
--------------------------------------------------------------------------------
/bench/samples/block-heading.md:
--------------------------------------------------------------------------------
1 | # heading
2 | ### heading
3 | ##### heading
4 |
5 | # heading #
6 | ### heading ###
7 | ##### heading \#\#\#\#\######
8 |
9 | ############ not a heading
10 |
--------------------------------------------------------------------------------
/bench/samples/block-hr.md:
--------------------------------------------------------------------------------
1 |
2 | * * * * *
3 |
4 | - - - - -
5 |
6 | ________
7 |
8 |
9 | ************************* text
10 |
11 |
--------------------------------------------------------------------------------
/bench/samples/block-html.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | blah blah
4 |
5 |
6 |
7 |
8 |
9 |
10 | **test**
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | test
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
32 |
33 |
--------------------------------------------------------------------------------
/bench/samples/block-lheading.md:
--------------------------------------------------------------------------------
1 | heading
2 | ---
3 |
4 | heading
5 | ===================================
6 |
7 | not a heading
8 | ----------------------------------- text
9 |
--------------------------------------------------------------------------------
/bench/samples/block-list-flat.md:
--------------------------------------------------------------------------------
1 | - tidy
2 | - bullet
3 | - list
4 |
5 |
6 | - loose
7 |
8 | - bullet
9 |
10 | - list
11 |
12 |
13 | 0. ordered
14 | 1. list
15 | 2. example
16 |
17 |
18 | -
19 | -
20 | -
21 | -
22 |
23 |
24 | 1.
25 | 2.
26 | 3.
27 |
28 |
29 | - an example
30 | of a list item
31 | with a continuation
32 |
33 | this part is inside the list
34 |
35 | this part is just a paragraph
36 |
37 |
38 | 1. test
39 | - test
40 | 1. test
41 | - test
42 |
43 |
44 | 111111111111111111111111111111111111111111. is this a valid bullet?
45 |
46 | - _________________________
47 |
48 | - this
49 | - is
50 |
51 | a
52 |
53 | long
54 | - loose
55 | - list
56 |
57 | - with
58 | - some
59 |
60 | tidy
61 |
62 | - list
63 | - items
64 | - in
65 |
66 | - between
67 | - _________________________
68 |
--------------------------------------------------------------------------------
/bench/samples/block-list-nested.md:
--------------------------------------------------------------------------------
1 |
2 | - this
3 | - is
4 | - a
5 | - deeply
6 | - nested
7 | - bullet
8 | - list
9 |
10 |
11 | 1. this
12 | 2. is
13 | 3. a
14 | 4. deeply
15 | 5. nested
16 | 6. unordered
17 | 7. list
18 |
19 |
20 | - 1
21 | - 2
22 | - 3
23 | - 4
24 | - 5
25 | - 6
26 | - 7
27 | - 6
28 | - 5
29 | - 4
30 | - 3
31 | - 2
32 | - 1
33 |
34 |
35 | - - - - - - - - - deeply-nested one-element item
36 |
37 |
--------------------------------------------------------------------------------
/bench/samples/block-ref-flat.md:
--------------------------------------------------------------------------------
1 | [1] [2] [3] [1] [2] [3]
2 |
3 | [looooooooooooooooooooooooooooooooooooooooooooooooooong label]
4 |
5 | [1]:
6 | [2]: http://something.example.com/foo/bar 'test'
7 | [3]:
8 | http://foo/bar
9 | [ looooooooooooooooooooooooooooooooooooooooooooooooooong label ]:
10 | 111
11 | 'test'
12 | [[[[[[[[[[[[[[[[[[[[ this should not slow down anything ]]]]]]]]]]]]]]]]]]]]: q
13 | (as long as it is not referenced anywhere)
14 |
15 | [[[[[[[[[[[[[[[[[[[[]: this is not a valid reference
16 |
--------------------------------------------------------------------------------
/bench/samples/block-ref-nested.md:
--------------------------------------------------------------------------------
1 | [[[[[[[foo]]]]]]]
2 |
3 | [[[[[[[foo]]]]]]]: bar
4 | [[[[[[foo]]]]]]: bar
5 | [[[[[foo]]]]]: bar
6 | [[[[foo]]]]: bar
7 | [[[foo]]]: bar
8 | [[foo]]: bar
9 | [foo]: bar
10 |
11 | [*[*[*[*[foo]*]*]*]*]
12 |
13 | [*[*[*[*[foo]*]*]*]*]: bar
14 | [*[*[*[foo]*]*]*]: bar
15 | [*[*[foo]*]*]: bar
16 | [*[foo]*]: bar
17 | [foo]: bar
18 |
--------------------------------------------------------------------------------
/bench/samples/inline-autolink.md:
--------------------------------------------------------------------------------
1 | closed (valid) autolinks:
2 |
3 |
4 |
5 |
6 |
7 |
8 | these are not autolinks:
9 |
10 |
15 |
--------------------------------------------------------------------------------
/bench/samples/inline-backticks.md:
--------------------------------------------------------------------------------
1 | `lots`of`backticks`
2 |
3 | ``i``wonder``how``this``will``be``parsed``
4 |
--------------------------------------------------------------------------------
/bench/samples/inline-em-flat.md:
--------------------------------------------------------------------------------
1 | *this* *is* *your* *basic* *boring* *emphasis*
2 |
3 | _this_ _is_ _your_ _basic_ _boring_ _emphasis_
4 |
5 | **this** **is** **your** **basic** **boring** **emphasis**
6 |
--------------------------------------------------------------------------------
/bench/samples/inline-em-nested.md:
--------------------------------------------------------------------------------
1 | *this *is *a *bunch* of* nested* emphases*
2 |
3 | __this __is __a __bunch__ of__ nested__ emphases__
4 |
5 | ***this ***is ***a ***bunch*** of*** nested*** emphases***
6 |
--------------------------------------------------------------------------------
/bench/samples/inline-em-worst.md:
--------------------------------------------------------------------------------
1 | *this *is *a *worst *case *for *em *backtracking
2 |
3 | __this __is __a __worst __case __for __em __backtracking
4 |
5 | ***this ***is ***a ***worst ***case ***for ***em ***backtracking
6 |
--------------------------------------------------------------------------------
/bench/samples/inline-entity.md:
--------------------------------------------------------------------------------
1 | entities:
2 |
3 | & © Æ Ď ¾ ℋ ⅆ ∲
4 |
5 | # Ӓ Ϡ
6 |
7 | non-entities:
8 |
9 | &18900987654321234567890; &1234567890098765432123456789009876543212345678987654;
10 |
11 | &qwertyuioppoiuytrewqwer; &oiuytrewqwertyuioiuytrewqwertyuioytrewqwertyuiiuytri;
12 |
--------------------------------------------------------------------------------
/bench/samples/inline-escape.md:
--------------------------------------------------------------------------------
1 |
2 | \t\e\s\t\i\n\g \e\s\c\a\p\e \s\e\q\u\e\n\c\e\s
3 |
4 | \!\\\"\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\?
5 |
6 | \@ \[ \] \^ \_ \` \{ \| \} \~ \- \'
7 |
8 | \
9 | \\
10 | \\\
11 | \\\\
12 | \\\\\
13 |
14 | \ \ \ \
15 |
16 |
--------------------------------------------------------------------------------
/bench/samples/inline-html.md:
--------------------------------------------------------------------------------
1 | Taking commonmark tests from the spec for benchmarking here:
2 |
3 |
4 |
5 |
6 |
7 |
9 |
10 |
12 |
13 | <33> <__>
14 |
15 |
16 |
17 |
28 |
29 | foo
31 |
32 | foo
33 |
34 | foo
35 |
36 | foo
37 |
38 | foo &<]]>
39 |
40 |
41 |
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/bench/samples/inline-links-flat.md:
--------------------------------------------------------------------------------
1 | Valid links:
2 |
3 | [this is a link]()
4 | [this is a link]()
5 | [this is a link](http://something.example.com/foo/bar 'test')
6 | ![this is an image]()
7 | ![this is an image]()
8 | 
9 |
10 | [escape test](<\>\>\>\>\>\>\>\>\>\>\>\>\>\>> '\'\'\'\'\'\'\'\'\'\'\'\'\'\'')
11 | [escape test \]\]\]\]\]\]\]\]\]\]\]\]\]\]\]\]](\)\)\)\)\)\)\)\)\)\)\)\)\)\))
12 |
13 | Invalid links:
14 |
15 | [this is not a link
16 |
17 | [this is not a link](
18 |
19 | [this is not a link](http://something.example.com/foo/bar 'test'
20 |
21 | [this is not a link](((((((((((((((((((((((((((((((((((((((((((((((
22 |
23 | [this is not a link]((((((((((()))))))))) (((((((((()))))))))))
24 |
--------------------------------------------------------------------------------
/bench/samples/inline-links-nested.md:
--------------------------------------------------------------------------------
1 | Valid links:
2 |
3 | [[[[[[[[](test)](test)](test)](test)](test)](test)](test)]
4 |
5 | [ [[[[[[[[[[[[[[[[[[ [](test) ]]]]]]]]]]]]]]]]]] ](test)
6 |
7 | Invalid links:
8 |
9 | [[[[[[[[[
10 |
11 | [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [
12 |
13 |  lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
4 |
5 | Nullam ut tincidunt nunc. [Pellentesque][1] metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
6 |
7 | Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
8 |
9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
10 |
11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique.
12 |
13 | [1]: https://github.com/markdown-it
14 |
--------------------------------------------------------------------------------
/bench/samples/rawtabs.md:
--------------------------------------------------------------------------------
1 |
2 | this is a test for tab expansion, be careful not to replace them with spaces
3 |
4 | 1 4444
5 | 22 333
6 | 333 22
7 | 4444 1
8 |
9 |
10 | tab-indented line
11 | space-indented line
12 | tab-indented line
13 |
14 |
15 | a lot of spaces in between here
16 |
17 | a lot of tabs in between here
18 |
19 |
--------------------------------------------------------------------------------
/bench/stats.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import sys
4 | import statistics
5 |
6 | def pairs(l, n):
7 | return zip(*[l[i::n] for i in range(n)])
8 |
9 | # data comes in pairs:
10 | # n - time for running the program with no input
11 | # m - time for running it with the benchmark input
12 | # we measure (m - n)
13 |
14 | values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)]
15 |
16 | print("mean = %.4f, median = %.4f, stdev = %.4f" %
17 | (statistics.mean(values), statistics.median(values),
18 | statistics.stdev(values)))
19 |
20 |
--------------------------------------------------------------------------------
/benchmarks.md:
--------------------------------------------------------------------------------
1 | # Benchmarks
2 |
3 | Here are some benchmarks, run on an ancient Thinkpad running Intel
4 | Core 2 Duo at 2GHz. The input text is a 11MB Markdown file built by
5 | concatenating the Markdown sources of all the localizations of the
6 | first edition of
7 | [*Pro Git*](https://github.com/progit/progit/tree/master/en) by Scott
8 | Chacon.
9 |
10 | |Implementation | Time (sec)|
11 | |-------------------|-----------:|
12 | | Markdown.pl | 2921.24 |
13 | | Python markdown | 291.25 |
14 | | PHP markdown | 20.82 |
15 | | kramdown | 17.32 |
16 | | cheapskate | 8.24 |
17 | | peg-markdown | 5.45 |
18 | | parsedown | 5.06 |
19 | | **commonmark.js** | 2.09 |
20 | | marked | 1.99 |
21 | | discount | 1.85 |
22 | | **cmark** | 0.29 |
23 | | hoedown | 0.21 |
24 |
25 | To run these benchmarks, use `make bench PROG=/path/to/program`.
26 |
27 | `time` is used to measure execution speed. The reported
28 | time is the *difference* between the time to run the program
29 | with the benchmark input and the time to run it with no input.
30 | (This procedure ensures that implementations in dynamic languages are
31 | not penalized by startup time.) A median of ten runs is taken. The
32 | process is reniced to a high priority so that the system doesn't
33 | interrupt runs.
34 |
--------------------------------------------------------------------------------
/cmake/modules/CheckFileOffsetBits.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #define KB ((off_t)1024)
4 | #define MB ((off_t)1024 * KB)
5 | #define GB ((off_t)1024 * MB)
6 | #define TB ((off_t)1024 * GB)
7 | int t2[(((64 * GB -1) % 671088649) == 268434537)
8 | && (((TB - (64 * GB -1) + 255) % 1792151290) == 305159546)? 1: -1];
9 |
10 | int main()
11 | {
12 | ;
13 | return 0;
14 | }
15 |
--------------------------------------------------------------------------------
/cmake/modules/CheckFileOffsetBits.cmake:
--------------------------------------------------------------------------------
1 | # - Check if _FILE_OFFSET_BITS macro needed for large files
2 | # CHECK_FILE_OFFSET_BITS ()
3 | #
4 | # The following variables may be set before calling this macro to
5 | # modify the way the check is run:
6 | #
7 | # CMAKE_REQUIRED_FLAGS = string of compile command line flags
8 | # CMAKE_REQUIRED_DEFINITIONS = list of macros to define (-DFOO=bar)
9 | # CMAKE_REQUIRED_INCLUDES = list of include directories
10 | # Copyright (c) 2009, Michihiro NAKAJIMA
11 | #
12 | # Redistribution and use is allowed according to the terms of the BSD license.
13 | # For details see the accompanying COPYING-CMAKE-SCRIPTS file.
14 |
15 | #INCLUDE(CheckCSourceCompiles)
16 |
17 | GET_FILENAME_COMPONENT(_selfdir_CheckFileOffsetBits
18 | "${CMAKE_CURRENT_LIST_FILE}" PATH)
19 |
20 | MACRO (CHECK_FILE_OFFSET_BITS)
21 | IF(NOT DEFINED _FILE_OFFSET_BITS)
22 | MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files")
23 | TRY_COMPILE(__WITHOUT_FILE_OFFSET_BITS_64
24 | ${CMAKE_CURRENT_BINARY_DIR}
25 | ${_selfdir_CheckFileOffsetBits}/CheckFileOffsetBits.c
26 | COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS})
27 | IF(NOT __WITHOUT_FILE_OFFSET_BITS_64)
28 | TRY_COMPILE(__WITH_FILE_OFFSET_BITS_64
29 | ${CMAKE_CURRENT_BINARY_DIR}
30 | ${_selfdir_CheckFileOffsetBits}/CheckFileOffsetBits.c
31 | COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS} -D_FILE_OFFSET_BITS=64)
32 | ENDIF(NOT __WITHOUT_FILE_OFFSET_BITS_64)
33 |
34 | IF(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64)
35 | SET(_FILE_OFFSET_BITS 64 CACHE INTERNAL "_FILE_OFFSET_BITS macro needed for large files")
36 | MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files - needed")
37 | ELSE(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64)
38 | SET(_FILE_OFFSET_BITS "" CACHE INTERNAL "_FILE_OFFSET_BITS macro needed for large files")
39 | MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files - not needed")
40 | ENDIF(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64)
41 | ENDIF(NOT DEFINED _FILE_OFFSET_BITS)
42 |
43 | ENDMACRO (CHECK_FILE_OFFSET_BITS)
44 |
--------------------------------------------------------------------------------
/cmake/modules/FindAsan.cmake:
--------------------------------------------------------------------------------
1 | #
2 | # The MIT License (MIT)
3 | #
4 | # Copyright (c) 2013 Matthew Arsenault
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
7 | # of this software and associated documentation files (the "Software"), to deal
8 | # in the Software without restriction, including without limitation the rights
9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | # THE SOFTWARE.
23 | #
24 | # This module tests if address sanitizer is supported by the compiler,
25 | # and creates a ASan build type (i.e. set CMAKE_BUILD_TYPE=ASan to use
26 | # it). This sets the following variables:
27 | #
28 | # CMAKE_C_FLAGS_ASAN - Flags to use for C with asan
29 | # CMAKE_CXX_FLAGS_ASAN - Flags to use for C++ with asan
30 | # HAVE_ADDRESS_SANITIZER - True or false if the ASan build type is available
31 |
32 | include(CheckCCompilerFlag)
33 |
34 | # Set -Werror to catch "argument unused during compilation" warnings
35 | set(CMAKE_REQUIRED_FLAGS "-Werror -faddress-sanitizer") # Also needs to be a link flag for test to pass
36 | check_c_compiler_flag("-faddress-sanitizer" HAVE_FLAG_ADDRESS_SANITIZER)
37 |
38 | set(CMAKE_REQUIRED_FLAGS "-Werror -fsanitize=address") # Also needs to be a link flag for test to pass
39 | check_c_compiler_flag("-fsanitize=address" HAVE_FLAG_SANITIZE_ADDRESS)
40 |
41 | unset(CMAKE_REQUIRED_FLAGS)
42 |
43 | if(HAVE_FLAG_SANITIZE_ADDRESS)
44 | # Clang 3.2+ use this version
45 | set(ADDRESS_SANITIZER_FLAG "-fsanitize=address")
46 | elseif(HAVE_FLAG_ADDRESS_SANITIZER)
47 | # Older deprecated flag for ASan
48 | set(ADDRESS_SANITIZER_FLAG "-faddress-sanitizer")
49 | endif()
50 |
51 | if(NOT ADDRESS_SANITIZER_FLAG)
52 | return()
53 | else(NOT ADDRESS_SANITIZER_FLAG)
54 | set(HAVE_ADDRESS_SANITIZER FALSE)
55 | endif()
56 |
57 | set(HAVE_ADDRESS_SANITIZER TRUE)
58 |
59 | set(CMAKE_C_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
60 | CACHE STRING "Flags used by the C compiler during ASan builds."
61 | FORCE)
62 | set(CMAKE_CXX_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
63 | CACHE STRING "Flags used by the C++ compiler during ASan builds."
64 | FORCE)
65 | set(CMAKE_EXE_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"
66 | CACHE STRING "Flags used for linking binaries during ASan builds."
67 | FORCE)
68 | set(CMAKE_SHARED_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"
69 | CACHE STRING "Flags used by the shared libraries linker during ASan builds."
70 | FORCE)
71 | mark_as_advanced(CMAKE_C_FLAGS_ASAN
72 | CMAKE_CXX_FLAGS_ASAN
73 | CMAKE_EXE_LINKER_FLAGS_ASAN
74 | CMAKE_SHARED_LINKER_FLAGS_ASAN)
75 |
--------------------------------------------------------------------------------
/cmark-gfm-config.cmake.in:
--------------------------------------------------------------------------------
1 | @PACKAGE_INIT@
2 | include(${CMAKE_CURRENT_LIST_DIR}/cmark-gfm/cmark-gfm.cmake)
3 | include(${CMAKE_CURRENT_LIST_DIR}/cmark-gfm-extensions/cmark-gfm-extensions.cmake)
4 |
--------------------------------------------------------------------------------
/extensions/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(libcmark-gfm-extensions
2 | autolink.c
3 | core-extensions.c
4 | ext_scanners.c
5 | ext_scanners.h
6 | ext_scanners.re
7 | strikethrough.c
8 | table.c
9 | tagfilter.c
10 | tasklist.c)
11 | target_compile_definitions(libcmark-gfm-extensions PUBLIC
12 | $<$>:CMARK_GFM_STATIC_DEFINE>)
13 | target_include_directories(libcmark-gfm-extensions PUBLIC
14 | $)
15 | target_link_libraries(libcmark-gfm-extensions PRIVATE
16 | libcmark-gfm)
17 | set_target_properties(libcmark-gfm-extensions PROPERTIES
18 | DEFINE_SYMBOL libcmark_gfm_EXPORTS
19 | MACOSX_RPATH TRUE
20 | OUTPUT_NAME cmark-gfm-extensions
21 | PDB_NAME libcmark-gfm-extensions
22 | POSITION_INDEPENDENT_CODE YES
23 | SOVERSION ${PROJECT_VERSION}
24 | VERSION ${PROJECT_VERSION})
25 |
26 |
27 | install(TARGETS libcmark-gfm-extensions
28 | EXPORT cmark-gfm-extensions
29 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
30 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
31 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
32 | install(FILES
33 | include/cmark-gfm-core-extensions.h
34 | include/module.modulemap
35 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cmark_gfm_extensions)
36 | install(EXPORT cmark-gfm-extensions
37 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cmark-gfm-extensions)
38 |
--------------------------------------------------------------------------------
/extensions/autolink.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_GFM_AUTOLINK_H
2 | #define CMARK_GFM_AUTOLINK_H
3 |
4 | #include "cmark-gfm-core-extensions.h"
5 |
6 | cmark_syntax_extension *create_autolink_extension(void);
7 |
8 | #endif
9 |
--------------------------------------------------------------------------------
/extensions/core-extensions.c:
--------------------------------------------------------------------------------
1 | #include "cmark-gfm-core-extensions.h"
2 | #include "autolink.h"
3 | #include "mutex.h"
4 | #include "node.h"
5 | #include "strikethrough.h"
6 | #include "table.h"
7 | #include "tagfilter.h"
8 | #include "tasklist.h"
9 | #include "registry.h"
10 | #include "plugin.h"
11 |
12 | static int core_extensions_registration(cmark_plugin *plugin) {
13 | cmark_plugin_register_syntax_extension(plugin, create_table_extension());
14 | cmark_plugin_register_syntax_extension(plugin,
15 | create_strikethrough_extension());
16 | cmark_plugin_register_syntax_extension(plugin, create_autolink_extension());
17 | cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension());
18 | cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension());
19 | return 1;
20 | }
21 |
22 | CMARK_DEFINE_ONCE(registered);
23 |
24 | static void register_plugins(void) {
25 | cmark_register_plugin(core_extensions_registration);
26 | }
27 |
28 | CMARK_GFM_EXPORT
29 | void cmark_gfm_core_extensions_ensure_registered(void) {
30 | CMARK_RUN_ONCE(registered, register_plugins);
31 | }
32 |
--------------------------------------------------------------------------------
/extensions/ext_scanners.h:
--------------------------------------------------------------------------------
1 | #include "chunk.h"
2 | #include "cmark-gfm.h"
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
9 | unsigned char *ptr, int len, bufsize_t offset);
10 | bufsize_t _scan_table_start(const unsigned char *p);
11 | bufsize_t _scan_table_cell(const unsigned char *p);
12 | bufsize_t _scan_table_cell_end(const unsigned char *p);
13 | bufsize_t _scan_table_row_end(const unsigned char *p);
14 | bufsize_t _scan_tasklist(const unsigned char *p);
15 |
16 | #define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
17 | #define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
18 | #define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
19 | #define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
20 | #define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
21 |
22 | #ifdef __cplusplus
23 | }
24 | #endif
25 |
--------------------------------------------------------------------------------
/extensions/ext_scanners.re:
--------------------------------------------------------------------------------
1 | /*!re2c re2c:flags:no-debug-info = 1; */
2 | /*!re2c re2c:indent:string = ' '; */
3 |
4 | #include
5 | #include "ext_scanners.h"
6 |
7 | bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset)
8 | {
9 | bufsize_t res;
10 |
11 | if (ptr == NULL || offset >= len) {
12 | return 0;
13 | } else {
14 | unsigned char lim = ptr[len];
15 |
16 | ptr[len] = '\0';
17 | res = scanner(ptr + offset);
18 | ptr[len] = lim;
19 | }
20 |
21 | return res;
22 | }
23 |
24 | /*!re2c
25 | re2c:define:YYCTYPE = "unsigned char";
26 | re2c:define:YYCURSOR = p;
27 | re2c:define:YYMARKER = marker;
28 | re2c:yyfill:enable = 0;
29 |
30 | spacechar = [ \t\v\f];
31 | newline = [\r]?[\n];
32 | escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-];
33 |
34 | table_marker = (spacechar*[:]?[-]+[:]?spacechar*);
35 | table_cell = (escaped_char|[^|\r\n])+;
36 |
37 | tasklist = spacechar*("-"|"+"|"*"|[0-9]+.)spacechar+("[ ]"|"[x]")spacechar+;
38 | */
39 |
40 | bufsize_t _scan_table_start(const unsigned char *p)
41 | {
42 | const unsigned char *marker = NULL;
43 | const unsigned char *start = p;
44 | /*!re2c
45 | [|]? table_marker ([|] table_marker)* [|]? spacechar* newline {
46 | return (bufsize_t)(p - start);
47 | }
48 | * { return 0; }
49 | */
50 | }
51 |
52 | bufsize_t _scan_table_cell(const unsigned char *p)
53 | {
54 | const unsigned char *marker = NULL;
55 | const unsigned char *start = p;
56 | /*!re2c
57 | // In fact, `table_cell` matches non-empty table cells only. The empty
58 | // string is also a valid table cell, but is handled by the default rule.
59 | // This approach prevents re2c's match-empty-string warning.
60 | table_cell { return (bufsize_t)(p - start); }
61 | * { return 0; }
62 | */
63 | }
64 |
65 | bufsize_t _scan_table_cell_end(const unsigned char *p)
66 | {
67 | const unsigned char *start = p;
68 | /*!re2c
69 | [|] spacechar* { return (bufsize_t)(p - start); }
70 | * { return 0; }
71 | */
72 | }
73 |
74 | bufsize_t _scan_table_row_end(const unsigned char *p)
75 | {
76 | const unsigned char *marker = NULL;
77 | const unsigned char *start = p;
78 | /*!re2c
79 | spacechar* newline { return (bufsize_t)(p - start); }
80 | * { return 0; }
81 | */
82 | }
83 |
84 | bufsize_t _scan_tasklist(const unsigned char *p)
85 | {
86 | const unsigned char *marker = NULL;
87 | const unsigned char *start = p;
88 | /*!re2c
89 | tasklist { return (bufsize_t)(p - start); }
90 | * { return 0; }
91 | */
92 | }
93 |
--------------------------------------------------------------------------------
/extensions/include/cmark-gfm-core-extensions.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_GFM_CORE_EXTENSIONS_H
2 | #define CMARK_GFM_CORE_EXTENSIONS_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | #include "cmark-gfm-extension_api.h"
9 | #include "export.h"
10 |
11 | #include
12 | #include
13 |
14 | CMARK_GFM_EXPORT
15 | void cmark_gfm_core_extensions_ensure_registered(void);
16 |
17 | CMARK_GFM_EXPORT
18 | uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node);
19 |
20 | /** Sets the number of columns for the table, returning 1 on success and 0 on error.
21 | */
22 | CMARK_GFM_EXPORT
23 | int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns);
24 |
25 | CMARK_GFM_EXPORT
26 | uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
27 |
28 | /** Sets the alignments for the table, returning 1 on success and 0 on error.
29 | */
30 | CMARK_GFM_EXPORT
31 | int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments);
32 |
33 | CMARK_GFM_EXPORT
34 | int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);
35 |
36 | /** Sets the column span for the table cell, returning 1 on success and 0 on error.
37 | */
38 | CMARK_GFM_EXPORT
39 | int cmark_gfm_extensions_set_table_cell_colspan(cmark_node *node, unsigned colspan);
40 |
41 | /** Sets the row span for the table cell, returning 1 on success and 0 on error.
42 | */
43 | CMARK_GFM_EXPORT
44 | int cmark_gfm_extensions_set_table_cell_rowspan(cmark_node *node, unsigned rowspan);
45 |
46 | /**
47 | Gets the column span for the table cell, returning \c UINT_MAX on error.
48 |
49 | A value of 0 indicates that the cell is a "filler" cell, intended to be overlapped with a previous
50 | cell with a span > 1.
51 |
52 | Column span is only parsed when \c CMARK_OPT_TABLE_SPANS is set.
53 | */
54 | CMARK_GFM_EXPORT
55 | unsigned cmark_gfm_extensions_get_table_cell_colspan(cmark_node *node);
56 |
57 | /**
58 | Gets the row span for the table cell, returning \c UINT_MAX on error.
59 |
60 | A value of 0 indicates that the cell is a "filler" cell, intended to be overlapped with a previous
61 | cell with a span > 1.
62 |
63 | Row span is only parsed when \c CMARK_OPT_TABLE_SPANS is set.
64 | */
65 | CMARK_GFM_EXPORT
66 | unsigned cmark_gfm_extensions_get_table_cell_rowspan(cmark_node *node);
67 |
68 | /** Sets whether the node is a table header row, returning 1 on success and 0 on error.
69 | */
70 | CMARK_GFM_EXPORT
71 | int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header);
72 |
73 | CMARK_GFM_EXPORT
74 | bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node);
75 | /* For backwards compatibility */
76 | #define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked
77 |
78 | /** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error.
79 | */
80 | CMARK_GFM_EXPORT
81 | int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked);
82 |
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 |
87 | #endif
88 |
--------------------------------------------------------------------------------
/extensions/include/module.modulemap:
--------------------------------------------------------------------------------
1 |
2 | module cmark_gfm_extensions {
3 | header "cmark-gfm-core-extensions.h"
4 | }
5 |
6 |
--------------------------------------------------------------------------------
/extensions/strikethrough.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "strikethrough.h"
4 | #include
5 | #include
6 |
7 | cmark_node_type CMARK_NODE_STRIKETHROUGH;
8 |
9 | static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser,
10 | cmark_node *parent, unsigned char character,
11 | cmark_inline_parser *inline_parser) {
12 | cmark_node *res = NULL;
13 | int left_flanking, right_flanking, punct_before, punct_after, delims;
14 | char buffer[101];
15 |
16 | if (character != '~')
17 | return NULL;
18 |
19 | delims = cmark_inline_parser_scan_delimiters(
20 | inline_parser, sizeof(buffer) - 1, '~',
21 | &left_flanking,
22 | &right_flanking, &punct_before, &punct_after);
23 |
24 | memset(buffer, '~', delims);
25 | buffer[delims] = 0;
26 |
27 | res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
28 | cmark_node_set_literal(res, buffer);
29 | res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser);
30 | res->start_column = cmark_inline_parser_get_column(inline_parser) - delims;
31 |
32 | if ((left_flanking || right_flanking) &&
33 | (delims == 2 || (!(parser->options & CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE) && delims == 1))) {
34 | cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking,
35 | right_flanking, res);
36 | }
37 |
38 | return res;
39 | }
40 |
41 | static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser,
42 | cmark_inline_parser *inline_parser, delimiter *opener,
43 | delimiter *closer) {
44 | cmark_node *strikethrough;
45 | cmark_node *tmp, *next;
46 | delimiter *delim, *tmp_delim;
47 | delimiter *res = closer->next;
48 |
49 | strikethrough = opener->inl_text;
50 |
51 | if (opener->inl_text->as.literal.len != closer->inl_text->as.literal.len)
52 | goto done;
53 |
54 | if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH))
55 | goto done;
56 |
57 | cmark_node_set_syntax_extension(strikethrough, self);
58 |
59 | tmp = cmark_node_next(opener->inl_text);
60 |
61 | while (tmp) {
62 | if (tmp == closer->inl_text)
63 | break;
64 | next = cmark_node_next(tmp);
65 | cmark_node_append_child(strikethrough, tmp);
66 | tmp = next;
67 | }
68 |
69 | strikethrough->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1;
70 | cmark_node_free(closer->inl_text);
71 |
72 | done:
73 | delim = closer;
74 | while (delim != NULL && delim != opener) {
75 | tmp_delim = delim->previous;
76 | cmark_inline_parser_remove_delimiter(inline_parser, delim);
77 | delim = tmp_delim;
78 | }
79 |
80 | cmark_inline_parser_remove_delimiter(inline_parser, opener);
81 |
82 | return res;
83 | }
84 |
85 | static const char *get_type_string(cmark_syntax_extension *extension,
86 | cmark_node *node) {
87 | return node->type == CMARK_NODE_STRIKETHROUGH ? "strikethrough" : "";
88 | }
89 |
90 | static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
91 | cmark_node_type child_type) {
92 | if (node->type != CMARK_NODE_STRIKETHROUGH)
93 | return false;
94 |
95 | return CMARK_NODE_TYPE_INLINE_P(child_type);
96 | }
97 |
98 | static void commonmark_render(cmark_syntax_extension *extension,
99 | cmark_renderer *renderer, cmark_node *node,
100 | cmark_event_type ev_type, int options) {
101 | renderer->out(renderer, node, "~~", false, LITERAL);
102 | }
103 |
104 | static void latex_render(cmark_syntax_extension *extension,
105 | cmark_renderer *renderer, cmark_node *node,
106 | cmark_event_type ev_type, int options) {
107 | // requires \usepackage{ulem}
108 | bool entering = (ev_type == CMARK_EVENT_ENTER);
109 | if (entering) {
110 | renderer->out(renderer, node, "\\sout{", false, LITERAL);
111 | } else {
112 | renderer->out(renderer, node, "}", false, LITERAL);
113 | }
114 | }
115 |
116 | static void man_render(cmark_syntax_extension *extension,
117 | cmark_renderer *renderer, cmark_node *node,
118 | cmark_event_type ev_type, int options) {
119 | bool entering = (ev_type == CMARK_EVENT_ENTER);
120 | if (entering) {
121 | renderer->cr(renderer);
122 | renderer->out(renderer, node, ".ST \"", false, LITERAL);
123 | } else {
124 | renderer->out(renderer, node, "\"", false, LITERAL);
125 | renderer->cr(renderer);
126 | }
127 | }
128 |
129 | static void html_render(cmark_syntax_extension *extension,
130 | cmark_html_renderer *renderer, cmark_node *node,
131 | cmark_event_type ev_type, int options) {
132 | bool entering = (ev_type == CMARK_EVENT_ENTER);
133 | if (entering) {
134 | cmark_strbuf_puts(renderer->html, "");
135 | } else {
136 | cmark_strbuf_puts(renderer->html, "");
137 | }
138 | }
139 |
140 | static void plaintext_render(cmark_syntax_extension *extension,
141 | cmark_renderer *renderer, cmark_node *node,
142 | cmark_event_type ev_type, int options) {
143 | renderer->out(renderer, node, "~", false, LITERAL);
144 | }
145 |
146 | cmark_syntax_extension *create_strikethrough_extension(void) {
147 | cmark_syntax_extension *ext = cmark_syntax_extension_new("strikethrough");
148 | cmark_llist *special_chars = NULL;
149 |
150 | cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
151 | cmark_syntax_extension_set_can_contain_func(ext, can_contain);
152 | cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
153 | cmark_syntax_extension_set_latex_render_func(ext, latex_render);
154 | cmark_syntax_extension_set_man_render_func(ext, man_render);
155 | cmark_syntax_extension_set_html_render_func(ext, html_render);
156 | cmark_syntax_extension_set_plaintext_render_func(ext, plaintext_render);
157 | CMARK_NODE_STRIKETHROUGH = cmark_syntax_extension_add_node(1);
158 |
159 | cmark_syntax_extension_set_match_inline_func(ext, match);
160 | cmark_syntax_extension_set_inline_from_delim_func(ext, insert);
161 |
162 | cmark_mem *mem = cmark_get_default_mem_allocator();
163 | special_chars = cmark_llist_append(mem, special_chars, (void *)'~');
164 | cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
165 |
166 | cmark_syntax_extension_set_emphasis(ext, 1);
167 |
168 | return ext;
169 | }
170 |
--------------------------------------------------------------------------------
/extensions/strikethrough.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_GFM_STRIKETHROUGH_H
2 | #define CMARK_GFM_STRIKETHROUGH_H
3 |
4 | #include "cmark-gfm-core-extensions.h"
5 |
6 | extern cmark_node_type CMARK_NODE_STRIKETHROUGH;
7 | cmark_syntax_extension *create_strikethrough_extension(void);
8 |
9 | #endif
10 |
--------------------------------------------------------------------------------
/extensions/table.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_GFM_TABLE_H
2 | #define CMARK_GFM_TABLE_H
3 |
4 | #include "cmark-gfm-core-extensions.h"
5 |
6 |
7 | extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
8 | CMARK_NODE_TABLE_CELL;
9 |
10 | cmark_syntax_extension *create_table_extension(void);
11 |
12 | #endif
13 |
--------------------------------------------------------------------------------
/extensions/tagfilter.c:
--------------------------------------------------------------------------------
1 | #include "tagfilter.h"
2 | #include
3 | #include
4 |
5 | static const char *blacklist[] = {
6 | "title", "textarea", "style", "xmp", "iframe",
7 | "noembed", "noframes", "script", "plaintext", NULL,
8 | };
9 |
10 | static int is_tag(const unsigned char *tag_data, size_t tag_size,
11 | const char *tagname) {
12 | size_t i;
13 |
14 | if (tag_size < 3 || tag_data[0] != '<')
15 | return 0;
16 |
17 | i = 1;
18 |
19 | if (tag_data[i] == '/') {
20 | i++;
21 | }
22 |
23 | for (; i < tag_size; ++i, ++tagname) {
24 | if (*tagname == 0)
25 | break;
26 |
27 | if (tolower(tag_data[i]) != *tagname)
28 | return 0;
29 | }
30 |
31 | if (i == tag_size)
32 | return 0;
33 |
34 | if (cmark_isspace(tag_data[i]) || tag_data[i] == '>')
35 | return 1;
36 |
37 | if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>')
38 | return 1;
39 |
40 | return 0;
41 | }
42 |
43 | static int filter(cmark_syntax_extension *ext, const unsigned char *tag,
44 | size_t tag_len) {
45 | const char **it;
46 |
47 | for (it = blacklist; *it; ++it) {
48 | if (is_tag(tag, tag_len, *it)) {
49 | return 0;
50 | }
51 | }
52 |
53 | return 1;
54 | }
55 |
56 | cmark_syntax_extension *create_tagfilter_extension(void) {
57 | cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter");
58 | cmark_syntax_extension_set_html_filter_func(ext, filter);
59 | return ext;
60 | }
61 |
--------------------------------------------------------------------------------
/extensions/tagfilter.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_GFM_TAGFILTER_H
2 | #define CMARK_GFM_TAGFILTER_H
3 |
4 | #include "cmark-gfm-core-extensions.h"
5 |
6 | cmark_syntax_extension *create_tagfilter_extension(void);
7 |
8 | #endif
9 |
--------------------------------------------------------------------------------
/extensions/tasklist.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "tasklist.h"
4 | #include
5 | #include
6 | #include
7 | #include "ext_scanners.h"
8 |
9 | typedef enum {
10 | CMARK_TASKLIST_NOCHECKED,
11 | CMARK_TASKLIST_CHECKED,
12 | } cmark_tasklist_type;
13 |
14 | // Local constants
15 | static const char *TYPE_STRING = "tasklist";
16 |
17 | static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) {
18 | return TYPE_STRING;
19 | }
20 |
21 |
22 | // Return 1 if state was set, 0 otherwise
23 | CMARK_GFM_EXPORT
24 | int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) {
25 | // The node has to exist, and be an extension, and actually be the right type in order to get the value.
26 | if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
27 | return 0;
28 |
29 | node->as.list.checked = is_checked;
30 | return 1;
31 | }
32 |
33 | CMARK_GFM_EXPORT
34 | bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) {
35 | if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
36 | return false;
37 |
38 | if (node->as.list.checked) {
39 | return true;
40 | }
41 | else {
42 | return false;
43 | }
44 | }
45 |
46 | static bool parse_node_item_prefix(cmark_parser *parser, const char *input,
47 | cmark_node *container) {
48 | bool res = false;
49 |
50 | if (parser->indent >=
51 | container->as.list.marker_offset + container->as.list.padding) {
52 | cmark_parser_advance_offset(parser, input, container->as.list.marker_offset +
53 | container->as.list.padding,
54 | true);
55 | res = true;
56 | } else if (parser->blank && container->first_child != NULL) {
57 | // if container->first_child is NULL, then the opening line
58 | // of the list item was blank after the list marker; in this
59 | // case, we are done with the list item.
60 | cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset,
61 | false);
62 | res = true;
63 | }
64 | return res;
65 | }
66 |
67 | static int matches(cmark_syntax_extension *self, cmark_parser *parser,
68 | unsigned char *input, int len,
69 | cmark_node *parent_container) {
70 | return parse_node_item_prefix(parser, (const char*)input, parent_container);
71 | }
72 |
73 | static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
74 | cmark_node_type child_type) {
75 | return (node->type == CMARK_NODE_ITEM) ? 1 : 0;
76 | }
77 |
78 | static cmark_node *open_tasklist_item(cmark_syntax_extension *self,
79 | int indented, cmark_parser *parser,
80 | cmark_node *parent_container,
81 | unsigned char *input, int len) {
82 | cmark_node_type node_type = cmark_node_get_type(parent_container);
83 | if (node_type != CMARK_NODE_ITEM) {
84 | return NULL;
85 | }
86 |
87 | bufsize_t matched = scan_tasklist(input, len, 0);
88 | if (!matched) {
89 | return NULL;
90 | }
91 |
92 | cmark_node_set_syntax_extension(parent_container, self);
93 | cmark_parser_advance_offset(parser, (char *)input, 3, false);
94 |
95 | // Either an upper or lower case X means the task is completed.
96 | parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]"));
97 |
98 | return NULL;
99 | }
100 |
101 | static void commonmark_render(cmark_syntax_extension *extension,
102 | cmark_renderer *renderer, cmark_node *node,
103 | cmark_event_type ev_type, int options) {
104 | bool entering = (ev_type == CMARK_EVENT_ENTER);
105 | if (entering) {
106 | renderer->cr(renderer);
107 | if (node->as.list.checked) {
108 | renderer->out(renderer, node, "- [x] ", false, LITERAL);
109 | } else {
110 | renderer->out(renderer, node, "- [ ] ", false, LITERAL);
111 | }
112 | cmark_strbuf_puts(renderer->prefix, " ");
113 | } else {
114 | cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
115 | renderer->cr(renderer);
116 | }
117 | }
118 |
119 | static void html_render(cmark_syntax_extension *extension,
120 | cmark_html_renderer *renderer, cmark_node *node,
121 | cmark_event_type ev_type, int options) {
122 | bool entering = (ev_type == CMARK_EVENT_ENTER);
123 | if (entering) {
124 | cmark_html_render_cr(renderer->html);
125 | cmark_strbuf_puts(renderer->html, "html, options);
127 | cmark_strbuf_putc(renderer->html, '>');
128 | if (node->as.list.checked) {
129 | cmark_strbuf_puts(renderer->html, " ");
130 | } else {
131 | cmark_strbuf_puts(renderer->html, " ");
132 | }
133 | } else {
134 | cmark_strbuf_puts(renderer->html, " \n");
135 | }
136 | }
137 |
138 | static const char *xml_attr(cmark_syntax_extension *extension,
139 | cmark_node *node) {
140 | if (node->as.list.checked) {
141 | return " completed=\"true\"";
142 | } else {
143 | return " completed=\"false\"";
144 | }
145 | }
146 |
147 | cmark_syntax_extension *create_tasklist_extension(void) {
148 | cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist");
149 |
150 | cmark_syntax_extension_set_match_block_func(ext, matches);
151 | cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
152 | cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item);
153 | cmark_syntax_extension_set_can_contain_func(ext, can_contain);
154 | cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
155 | cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render);
156 | cmark_syntax_extension_set_html_render_func(ext, html_render);
157 | cmark_syntax_extension_set_xml_attr_func(ext, xml_attr);
158 |
159 | return ext;
160 | }
161 |
--------------------------------------------------------------------------------
/extensions/tasklist.h:
--------------------------------------------------------------------------------
1 | #ifndef TASKLIST_H
2 | #define TASKLIST_H
3 |
4 | #include "cmark-gfm-core-extensions.h"
5 |
6 | cmark_syntax_extension *create_tasklist_extension(void);
7 |
8 | #endif
9 |
--------------------------------------------------------------------------------
/fuzz/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | foreach(fuzzer fuzz_quadratic fuzz_quadratic_brackets)
2 | add_executable(${fuzzer}
3 | ${fuzzer}.c)
4 | target_compile_options(${fuzzer} PRIVATE
5 | -fsanitize=fuzzer)
6 | target_link_options(${fuzzer} PRIVATE
7 | -fsanitize=fuzzer)
8 | target_link_libraries(${fuzzer} PRIVATE
9 | libcmark-gfm
10 | libcmark-gfm-extensions)
11 | endforeach()
12 |
--------------------------------------------------------------------------------
/fuzz/README.md:
--------------------------------------------------------------------------------
1 | The quadratic fuzzer generates long sequences of repeated characters, such as `
2 | #include
3 | #include
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-core-extensions.h"
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | const char *extension_names[] = {
12 | "autolink",
13 | "strikethrough",
14 | "table",
15 | "tagfilter",
16 | NULL,
17 | };
18 |
19 | int LLVMFuzzerInitialize(int *argc, char ***argv) {
20 | cmark_gfm_core_extensions_ensure_registered();
21 | return 0;
22 | }
23 |
24 | int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
25 | struct __attribute__((packed)) {
26 | int options;
27 | int width;
28 | uint8_t splitpoint;
29 | uint8_t repeatlen;
30 | } fuzz_config;
31 |
32 | if (size >= sizeof(fuzz_config)) {
33 | /* The beginning of `data` is treated as fuzzer configuration */
34 | memcpy(&fuzz_config, data, sizeof(fuzz_config));
35 |
36 | /* Test options that are used by GitHub. */
37 | fuzz_config.options = CMARK_OPT_UNSAFE | CMARK_OPT_FOOTNOTES | CMARK_OPT_GITHUB_PRE_LANG | CMARK_OPT_HARDBREAKS;
38 |
39 | /* Remainder of input is the markdown */
40 | const char *markdown0 = (const char *)(data + sizeof(fuzz_config));
41 | const size_t markdown_size0 = size - sizeof(fuzz_config);
42 | char markdown[0x80000];
43 | if (markdown_size0 <= sizeof(markdown)) {
44 | size_t markdown_size = 0;
45 | if (fuzz_config.splitpoint <= markdown_size0 && 0 < fuzz_config.repeatlen &&
46 | fuzz_config.repeatlen <= markdown_size0 - fuzz_config.splitpoint) {
47 | const size_t size_after_splitpoint = markdown_size0 - fuzz_config.splitpoint - fuzz_config.repeatlen;
48 | memcpy(&markdown[markdown_size], &markdown0[0], fuzz_config.splitpoint);
49 | markdown_size += fuzz_config.splitpoint;
50 |
51 | while (markdown_size + fuzz_config.repeatlen + size_after_splitpoint <= sizeof(markdown)) {
52 | memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint],
53 | fuzz_config.repeatlen);
54 | markdown_size += fuzz_config.repeatlen;
55 | }
56 | memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint + fuzz_config.repeatlen],
57 | size_after_splitpoint);
58 | markdown_size += size_after_splitpoint;
59 | } else {
60 | markdown_size = markdown_size0;
61 | memcpy(markdown, markdown0, markdown_size);
62 | }
63 |
64 | cmark_parser *parser = cmark_parser_new(fuzz_config.options);
65 |
66 | for (const char **it = extension_names; *it; ++it) {
67 | const char *extension_name = *it;
68 | cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
69 | if (!syntax_extension) {
70 | fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
71 | abort();
72 | }
73 | cmark_parser_attach_syntax_extension(parser, syntax_extension);
74 | }
75 |
76 | cmark_parser_feed(parser, markdown, markdown_size);
77 | cmark_node *doc = cmark_parser_finish(parser);
78 |
79 | free(cmark_render_html(doc, fuzz_config.options, NULL));
80 | free(cmark_render_xml(doc, fuzz_config.options));
81 | free(cmark_render_man(doc, fuzz_config.options, 80));
82 | free(cmark_render_commonmark(doc, fuzz_config.options, 80));
83 | free(cmark_render_plaintext(doc, fuzz_config.options, 80));
84 | free(cmark_render_latex(doc, fuzz_config.options, 80));
85 |
86 | cmark_node_free(doc);
87 | cmark_parser_free(parser);
88 | }
89 | }
90 | return 0;
91 | }
92 |
--------------------------------------------------------------------------------
/fuzz/fuzz_quadratic_brackets.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-core-extensions.h"
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | const char *extension_names[] = {
12 | "autolink",
13 | "strikethrough",
14 | "table",
15 | "tagfilter",
16 | NULL,
17 | };
18 |
19 | int LLVMFuzzerInitialize(int *argc, char ***argv) {
20 | cmark_gfm_core_extensions_ensure_registered();
21 | return 0;
22 | }
23 |
24 | int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
25 | struct __attribute__((packed)) {
26 | int options;
27 | int width;
28 | uint8_t startlen;
29 | uint8_t openlen;
30 | uint8_t middlelen;
31 | uint8_t closelen;
32 | } fuzz_config;
33 |
34 | if (size >= sizeof(fuzz_config)) {
35 | /* The beginning of `data` is treated as fuzzer configuration */
36 | memcpy(&fuzz_config, data, sizeof(fuzz_config));
37 |
38 | /* Test options that are used by GitHub. */
39 | fuzz_config.options = CMARK_OPT_UNSAFE | CMARK_OPT_FOOTNOTES | CMARK_OPT_GITHUB_PRE_LANG | CMARK_OPT_HARDBREAKS;
40 | fuzz_config.openlen = fuzz_config.openlen & 0x7;
41 | fuzz_config.middlelen = fuzz_config.middlelen & 0x7;
42 | fuzz_config.closelen = fuzz_config.closelen & 0x7;
43 |
44 | /* Remainder of input is the markdown */
45 | const char *markdown0 = (const char *)(data + sizeof(fuzz_config));
46 | const size_t markdown_size0 = size - sizeof(fuzz_config);
47 | char markdown[0x80000];
48 | if (markdown_size0 <= sizeof(markdown)) {
49 | size_t markdown_size = 0;
50 | const size_t componentslen = fuzz_config.startlen + fuzz_config.openlen + fuzz_config.middlelen + fuzz_config.closelen;
51 | if (componentslen <= markdown_size0) {
52 | size_t offset = 0;
53 | const size_t endlen = markdown_size0 - componentslen;
54 | memcpy(&markdown[markdown_size], &markdown0[offset], fuzz_config.startlen);
55 | markdown_size += fuzz_config.startlen;
56 | offset += fuzz_config.startlen;
57 |
58 | if (0 < fuzz_config.openlen) {
59 | while (markdown_size + fuzz_config.openlen <= sizeof(markdown)/2) {
60 | memcpy(&markdown[markdown_size], &markdown0[offset],
61 | fuzz_config.openlen);
62 | markdown_size += fuzz_config.openlen;
63 | }
64 | offset += fuzz_config.openlen;
65 | }
66 | memcpy(&markdown[markdown_size], &markdown0[offset],
67 | fuzz_config.middlelen);
68 | markdown_size += fuzz_config.middlelen;
69 | offset += fuzz_config.middlelen;
70 | if (0 < fuzz_config.closelen) {
71 | while (markdown_size + fuzz_config.closelen + endlen <= sizeof(markdown)) {
72 | memcpy(&markdown[markdown_size], &markdown0[offset],
73 | fuzz_config.closelen);
74 | markdown_size += fuzz_config.closelen;
75 | }
76 | offset += fuzz_config.closelen;
77 | }
78 | if (markdown_size + endlen <= sizeof(markdown)) {
79 | memcpy(&markdown[markdown_size], &markdown0[offset],
80 | endlen);
81 | markdown_size += endlen;
82 | }
83 | } else {
84 | markdown_size = markdown_size0;
85 | memcpy(markdown, markdown0, markdown_size);
86 | }
87 |
88 | cmark_parser *parser = cmark_parser_new(fuzz_config.options);
89 |
90 | for (const char **it = extension_names; *it; ++it) {
91 | const char *extension_name = *it;
92 | cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
93 | if (!syntax_extension) {
94 | fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
95 | abort();
96 | }
97 | cmark_parser_attach_syntax_extension(parser, syntax_extension);
98 | }
99 |
100 | cmark_parser_feed(parser, markdown, markdown_size);
101 | cmark_node *doc = cmark_parser_finish(parser);
102 |
103 | free(cmark_render_html(doc, fuzz_config.options, NULL));
104 |
105 | cmark_node_free(doc);
106 | cmark_parser_free(parser);
107 | }
108 | }
109 | return 0;
110 | }
111 |
--------------------------------------------------------------------------------
/fuzz/fuzzloop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Stop when an error is found
4 | set -e
5 |
6 | # Create a corpus sub-directory if it doesn't already exist.
7 | mkdir -p corpus
8 |
9 | # The memory and disk usage grows over time, so this loop restarts the
10 | # fuzzer every 4 hours. The `-merge=1` option is used to minimize the
11 | # corpus on each iteration.
12 | while :
13 | do
14 | date
15 | echo restarting loop
16 |
17 | # Minimize the corpus
18 | mv corpus/ corpus2
19 | mkdir corpus
20 | echo minimizing corpus
21 | ./fuzz/fuzz_quadratic -merge=1 corpus ../bench corpus2/ -max_len=1024
22 | rm -r corpus2
23 |
24 | # Run the fuzzer for 4 hours
25 | date
26 | echo start fuzzer
27 | ./fuzz/fuzz_quadratic corpus -dict=../test/fuzzing_dictionary -jobs=$(nproc) -workers=$(nproc) -max_len=1024 -max_total_time=14400
28 | done
29 |
--------------------------------------------------------------------------------
/man/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | install(FILES man1/cmark-gfm.1
2 | DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
3 | install(FILES man3/cmark-gfm.3
4 | DESTINATION ${CMAKE_INSTALL_MANDIR}/man3)
5 |
--------------------------------------------------------------------------------
/man/make_man_page.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Creates a man page from a C file.
4 |
5 | # first argument if present is path to cmark dynamic library
6 |
7 | # Comments beginning with `/**` are treated as Groff man, except that
8 | # 'this' is converted to \fIthis\f[], and ''this'' to \fBthis\f[].
9 |
10 | # Non-blank lines immediately following a man page comment are treated
11 | # as function signatures or examples and parsed into .Ft, .Fo, .Fa, .Fc. The
12 | # immediately preceding man documentation chunk is printed after the example
13 | # as a comment on it.
14 |
15 | # That's about it!
16 |
17 | import sys, re, os, platform
18 | from datetime import date
19 | from ctypes import CDLL, c_char_p, c_long, c_void_p
20 |
21 | sysname = platform.system()
22 |
23 | if sysname == 'Darwin':
24 | cmark = CDLL("build/src/libcmark-gfm.dylib")
25 | else:
26 | cmark = CDLL("build/src/libcmark-gfm.so")
27 |
28 | parse_document = cmark.cmark_parse_document
29 | parse_document.restype = c_void_p
30 | parse_document.argtypes = [c_char_p, c_long]
31 |
32 | render_man = cmark.cmark_render_man
33 | render_man.restype = c_char_p
34 | render_man.argtypes = [c_void_p, c_long, c_long]
35 |
36 | def md2man(text):
37 | if sys.version_info >= (3,0):
38 | textbytes = text.encode('utf-8')
39 | textlen = len(textbytes)
40 | return render_man(parse_document(textbytes, textlen), 0, 65).decode('utf-8')
41 | else:
42 | textbytes = text
43 | textlen = len(text)
44 | return render_man(parse_document(textbytes, textlen), 0, 72)
45 |
46 | comment_start_re = re.compile('^\/\*\* ?')
47 | comment_delim_re = re.compile('^[/ ]\** ?')
48 | comment_end_re = re.compile('^ \**\/')
49 | function_re = re.compile('^ *(?:CMARK_GFM_EXPORT\s+)?(?P(?:const\s+)?\w+(?:\s*[*])?)\s*(?P\w+)\s*\((?P[^)]*)\)')
50 | blank_re = re.compile('^\s*$')
51 | macro_re = re.compile('CMARK_GFM_EXPORT *')
52 | typedef_start_re = re.compile('typedef.*{$')
53 | typedef_end_re = re.compile('}')
54 | single_quote_re = re.compile("(?**', re.sub(single_quote_re, '*\g<1>*', s))
59 |
60 | typedef = False
61 | mdlines = []
62 | chunk = []
63 | sig = []
64 |
65 | if len(sys.argv) > 1:
66 | sourcefile = sys.argv[1]
67 | else:
68 | print("Usage: make_man_page.py sourcefile")
69 | exit(1)
70 |
71 | with open(sourcefile, 'r') as cmarkh:
72 | state = 'default'
73 | for line in cmarkh:
74 | # state transition
75 | oldstate = state
76 | if comment_start_re.match(line):
77 | state = 'man'
78 | elif comment_end_re.match(line) and state == 'man':
79 | continue
80 | elif comment_delim_re.match(line) and state == 'man':
81 | state = 'man'
82 | elif not typedef and blank_re.match(line):
83 | state = 'default'
84 | elif typedef and typedef_end_re.match(line):
85 | typedef = False
86 | elif typedef_start_re.match(line):
87 | typedef = True
88 | state = 'signature'
89 | elif state == 'man':
90 | state = 'signature'
91 |
92 | # handle line
93 | if state == 'man':
94 | chunk.append(handle_quotes(re.sub(comment_delim_re, '', line)))
95 | elif state == 'signature':
96 | ln = re.sub(macro_re, '', line)
97 | if typedef or not re.match(blank_re, ln):
98 | sig.append(ln)
99 | elif oldstate == 'signature' and state != 'signature':
100 | if len(mdlines) > 0 and mdlines[-1] != '\n':
101 | mdlines.append('\n')
102 | rawsig = ''.join(sig)
103 | m = function_re.match(rawsig)
104 | mdlines.append('.PP\n')
105 | if m:
106 | mdlines.append('\\fI' + m.group('type') + '\\f[]' + ' ')
107 | mdlines.append('\\fB' + m.group('name') + '\\f[]' + '(')
108 | first = True
109 | for argument in re.split(',', m.group('args')):
110 | if not first:
111 | mdlines.append(', ')
112 | first = False
113 | mdlines.append('\\fI' + argument.strip() + '\\f[]')
114 | mdlines.append(')\n')
115 | else:
116 | mdlines.append('.nf\n\\fC\n.RS 0n\n')
117 | mdlines += sig
118 | mdlines.append('.RE\n\\f[]\n.fi\n')
119 | if len(mdlines) > 0 and mdlines[-1] != '\n':
120 | mdlines.append('\n')
121 | mdlines += md2man(''.join(chunk))
122 | mdlines.append('\n')
123 | chunk = []
124 | sig = []
125 | elif oldstate == 'man' and state != 'signature':
126 | if len(mdlines) > 0 and mdlines[-1] != '\n':
127 | mdlines.append('\n')
128 | mdlines += md2man(''.join(chunk)) # add man chunk
129 | chunk = []
130 | mdlines.append('\n')
131 |
132 | sys.stdout.write('.TH cmark-gfm 3 "' + date.today().strftime('%B %d, %Y') + '" "LOCAL" "Library Functions Manual"\n')
133 | sys.stdout.write(''.join(mdlines))
134 |
--------------------------------------------------------------------------------
/man/man1/cmark-gfm.1:
--------------------------------------------------------------------------------
1 | .TH "cmark-gfm" "1" "March 24, 2016" "LOCAL" "General Commands Manual"
2 | .SH "NAME"
3 | \fBcmark\fR
4 | \- convert CommonMark formatted text with GitHub Flavored Markdown extensions to HTML
5 | .SH "SYNOPSIS"
6 | .HP 6n
7 | \fBcmark-gfm\fR
8 | [options]
9 | file*
10 | .SH "DESCRIPTION"
11 | \fBcmark-gfm\fR
12 | converts Markdown formatted plain text to either HTML, groff man,
13 | CommonMark XML, LaTeX, or CommonMark, using the conventions
14 | described in the CommonMark spec. It reads input from \fIstdin\fR
15 | or the specified files (concatenating their contents) and writes
16 | output to \fIstdout\fR.
17 | .SH "OPTIONS"
18 | .TP 12n
19 | .B \-\-to, \-t \f[I]FORMAT\f[]
20 | Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]xml\f[],
21 | \f[C]latex\f[], \f[C]commonmark\f[]).
22 | .TP 12n
23 | .B \-\-width \f[I]WIDTH\f[]
24 | Specify a column width to which to wrap the output. For no wrapping, use
25 | the value 0 (the default). This option currently only affects the
26 | commonmark, latex, and man renderers.
27 | .TP 12n
28 | .B \-\-hardbreaks
29 | Render soft breaks (newlines inside paragraphs in the CommonMark source)
30 | as hard line breaks in the target format. If this option is specified,
31 | hard wrapping is disabled for CommonMark output, regardless of the value
32 | given with \-\-width.
33 | .TP 12n
34 | .B \-\-nobreaks
35 | Render soft breaks as spaces. If this option is specified,
36 | hard wrapping is disabled for all output formats, regardless of the value
37 | given with \-\-width.
38 | .TP 12n
39 | .B \-\-sourcepos
40 | Include source position attribute.
41 | .TP 12n
42 | .B \-\-normalize
43 | Consolidate adjacent text nodes.
44 | .TP 12n
45 | .B \-\-extension, \-e \f[I]EXTENSION_NAME\f[]
46 | Specify an extension name to use.
47 | .TP 12n
48 | .B \-\-list\-extensions
49 | List available extensions and quit.
50 | .TP 12n
51 | .B \-\-validate-utf8
52 | Validate UTF-8, replacing illegal sequences with U+FFFD.
53 | .TP 12n
54 | .B \-\-smart
55 | Use smart punctuation. Straight double and single quotes will
56 | be rendered as curly quotes, depending on their position.
57 | \f[C]\-\-\f[] will be rendered as an en-dash.
58 | \f[C]\-\-\-\f[] will be rendered as an em-dash.
59 | \f[C]...\f[] will be rendered as ellipses.
60 | .TP 12n
61 | .B \-\-unsafe
62 | Render raw HTML and potentially dangerous URLs.
63 | (Raw HTML is not replaced by a placeholder comment; potentially
64 | dangerous URLs are not replaced by empty strings.) Dangerous
65 | URLs are those that begin with `javascript:`, `vbscript:`,
66 | `file:`, or `data:` (except for `image/png`, `image/gif`,
67 | `image/jpeg`, or `image/webp` mime types).
68 | .TP 12n
69 | .B \-\-help
70 | Print usage information.
71 | .TP 12n
72 | .B \-\-version
73 | Print version.
74 | .SH "AUTHORS"
75 | John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
76 | .SH "SEE ALSO"
77 | .PP
78 | CommonMark spec: \f[C]http://spec.commonmark.org\f[].
79 |
--------------------------------------------------------------------------------
/nmake.bat:
--------------------------------------------------------------------------------
1 | @nmake.exe /nologo /f Makefile.nmake %*
2 |
--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcmark-gfm.pc.in
2 | ${CMAKE_CURRENT_BINARY_DIR}/libcmark-gfm.pc @ONLY)
3 |
4 | add_library(libcmark-gfm
5 | arena.c
6 | blocks.c
7 | buffer.c
8 | cmark.c
9 | cmark_ctype.c
10 | commonmark.c
11 | footnotes.c
12 | houdini_href_e.c
13 | houdini_html_e.c
14 | houdini_html_u.c
15 | html.c
16 | inlines.c
17 | iterator.c
18 | latex.c
19 | linked_list.c
20 | man.c
21 | map.c
22 | node.c
23 | plaintext.c
24 | plugin.c
25 | references.c
26 | registry.c
27 | render.c
28 | scanners.c
29 | scanners.re
30 | syntax_extension.c
31 | utf8.c
32 | xml.c)
33 | if(NOT BUILD_SHARED_LIBS)
34 | target_compile_definitions(libcmark-gfm PUBLIC
35 | CMARK_GFM_STATIC_DEFINE)
36 | target_compile_options(libcmark-gfm PUBLIC
37 | $<$:-Xcc -DCMARK_GFM_STATIC_DEFINE>)
38 | endif()
39 | target_include_directories(libcmark-gfm PUBLIC
40 | $
41 | $
42 | $
43 | $)
44 | target_link_libraries(libcmark-gfm PRIVATE
45 | $<$:Threads::Threads>)
46 | set_target_properties(libcmark-gfm PROPERTIES
47 | MACOSX_RPATH TRUE
48 | OUTPUT_NAME cmark-gfm
49 | PDB_NAME libcmark-gfm
50 | POSITION_INDEPENDENT_CODE YES
51 | SOVERSION ${PROJECT_VERSION}
52 | VERSION ${PROJECT_VERSION})
53 |
54 | add_executable(cmark-gfm
55 | ${PROJECT_SOURCE_DIR}/bin/main.c)
56 | target_link_libraries(cmark-gfm
57 | libcmark-gfm
58 | libcmark-gfm-extensions)
59 |
60 |
61 | install(TARGETS cmark-gfm libcmark-gfm
62 | EXPORT cmark-gfm
63 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
64 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
65 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
66 | install(FILES
67 | include/buffer.h
68 | include/chunk.h
69 | include/cmark_ctype.h
70 | include/cmark-gfm.h
71 | include/cmark-gfm-extension_api.h
72 | include/cmark-gfm_version.h
73 | include/export.h
74 | include/footnotes.h
75 | include/houdini.h
76 | include/html.h
77 | include/inlines.h
78 | include/iterator.h
79 | include/map.h
80 | include/node.h
81 | include/parser.h
82 | include/plugin.h
83 | include/references.h
84 | include/registry.h
85 | include/render.h
86 | include/scanners.h
87 | include/syntax_extension.h
88 | include/utf8.h
89 | include/module.modulemap
90 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cmark_gfm)
91 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark-gfm.pc
92 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
93 | install(EXPORT cmark-gfm
94 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cmark-gfm)
95 |
96 | export(TARGETS libcmark-gfm
97 | FILE ${CMAKE_CURRENT_BINARY_DIR}/cmarkTargets.cmake)
98 |
--------------------------------------------------------------------------------
/src/arena.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-extension_api.h"
6 | #include "mutex.h"
7 |
8 | CMARK_DEFINE_LOCK(arena)
9 |
10 | static struct arena_chunk {
11 | size_t sz, used;
12 | uint8_t push_point;
13 | void *ptr;
14 | struct arena_chunk *prev;
15 | } *A = NULL;
16 |
17 | static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
18 | struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c));
19 | if (!c)
20 | abort();
21 | c->sz = sz;
22 | c->ptr = calloc(1, sz);
23 | if (!c->ptr)
24 | abort();
25 | c->prev = prev;
26 | return c;
27 | }
28 |
29 | void cmark_arena_push(void) {
30 | CMARK_INITIALIZE_AND_LOCK(arena);
31 | if (A) {
32 | A->push_point = 1;
33 | A = alloc_arena_chunk(10240, A);
34 | }
35 | CMARK_UNLOCK(arena);
36 | }
37 |
38 | int cmark_arena_pop(void) {
39 | int ret = 1;
40 | CMARK_INITIALIZE_AND_LOCK(arena);
41 | if (!A)
42 | ret = 0;
43 | else {
44 | while (A && !A->push_point) {
45 | free(A->ptr);
46 | struct arena_chunk *n = A->prev;
47 | free(A);
48 | A = n;
49 | }
50 | if (A)
51 | A->push_point = 0;
52 | }
53 | CMARK_UNLOCK(arena);
54 | return ret;
55 | }
56 |
57 | static void init_arena(void) {
58 | CMARK_INITIALIZE_AND_LOCK(arena);
59 | A = alloc_arena_chunk(4 * 1048576, NULL);
60 | CMARK_UNLOCK(arena);
61 | }
62 |
63 | void cmark_arena_reset(void) {
64 | CMARK_INITIALIZE_AND_LOCK(arena);
65 | while (A) {
66 | free(A->ptr);
67 | struct arena_chunk *n = A->prev;
68 | free(A);
69 | A = n;
70 | }
71 | CMARK_UNLOCK(arena);
72 | }
73 |
74 | static void *arena_calloc(size_t nmem, size_t size) {
75 | if (!A)
76 | init_arena();
77 |
78 | size_t sz = nmem * size + sizeof(size_t);
79 |
80 | // Round allocation sizes to largest integer size to
81 | // ensure returned memory is correctly aligned
82 | const size_t align = sizeof(size_t) - 1;
83 | sz = (sz + align) & ~align;
84 |
85 | CMARK_INITIALIZE_AND_LOCK(arena);
86 |
87 | struct arena_chunk *chunk;
88 | if (sz > A->sz) {
89 | A->prev = chunk = alloc_arena_chunk(sz, A->prev);
90 | } else if (sz > A->sz - A->used) {
91 | A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
92 | } else {
93 | chunk = A;
94 | }
95 | void *ptr = (uint8_t *) chunk->ptr + chunk->used;
96 | chunk->used += sz;
97 | *((size_t *) ptr) = sz - sizeof(size_t);
98 |
99 | CMARK_UNLOCK(arena);
100 |
101 | return (uint8_t *) ptr + sizeof(size_t);
102 | }
103 |
104 | static void *arena_realloc(void *ptr, size_t size) {
105 | if (!A)
106 | init_arena();
107 |
108 | void *new_ptr = arena_calloc(1, size);
109 | if (ptr)
110 | memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
111 | return new_ptr;
112 | }
113 |
114 | static void arena_free(void *ptr) {
115 | (void) ptr;
116 | /* no-op */
117 | }
118 |
119 | cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
120 |
121 | cmark_mem *cmark_get_arena_mem_allocator(void) {
122 | return &CMARK_ARENA_MEM_ALLOCATOR;
123 | }
124 |
--------------------------------------------------------------------------------
/src/cmark.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "registry.h"
5 | #include "node.h"
6 | #include "houdini.h"
7 | #include "cmark-gfm.h"
8 | #include "buffer.h"
9 |
10 | cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION;
11 | cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_ATTRIBUTE;
12 |
13 | int cmark_version(void) { return CMARK_GFM_VERSION; }
14 |
15 | const char *cmark_version_string(void) { return CMARK_GFM_VERSION_STRING; }
16 |
17 | static void *xcalloc(size_t nmem, size_t size) {
18 | void *ptr = calloc(nmem, size);
19 | if (!ptr) {
20 | fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n");
21 | abort();
22 | }
23 | return ptr;
24 | }
25 |
26 | static void *xrealloc(void *ptr, size_t size) {
27 | void *new_ptr = realloc(ptr, size);
28 | if (!new_ptr) {
29 | fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n");
30 | abort();
31 | }
32 | return new_ptr;
33 | }
34 |
35 | static void xfree(void *ptr) {
36 | free(ptr);
37 | }
38 |
39 | cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree};
40 |
41 | cmark_mem *cmark_get_default_mem_allocator(void) {
42 | return &CMARK_DEFAULT_MEM_ALLOCATOR;
43 | }
44 |
45 | char *cmark_markdown_to_html(const char *text, size_t len, int options) {
46 | cmark_node *doc;
47 | char *result;
48 |
49 | doc = cmark_parse_document(text, len, options);
50 |
51 | result = cmark_render_html(doc, options, NULL);
52 | cmark_node_free(doc);
53 |
54 | return result;
55 | }
56 |
--------------------------------------------------------------------------------
/src/cmark_ctype.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "cmark_ctype.h"
4 |
5 | /** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other
6 | */
7 | static const uint8_t cmark_ctype_class[256] = {
8 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
9 | /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
10 | /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11 | /* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
12 | /* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
13 | /* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
14 | /* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2,
15 | /* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
16 | /* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0,
17 | /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18 | /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19 | /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20 | /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21 | /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 | /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 | /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 | /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
25 |
26 | /**
27 | * Returns 1 if c is a "whitespace" character as defined by the spec.
28 | */
29 | int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
30 |
31 | /**
32 | * Returns 1 if c is an ascii punctuation character.
33 | */
34 | int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; }
35 |
36 | int cmark_isalnum(char c) {
37 | uint8_t result;
38 | result = cmark_ctype_class[(uint8_t)c];
39 | return (result == 3 || result == 4);
40 | }
41 |
42 | int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; }
43 |
44 | int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; }
45 |
--------------------------------------------------------------------------------
/src/footnotes.c:
--------------------------------------------------------------------------------
1 | #include "cmark-gfm.h"
2 | #include "parser.h"
3 | #include "footnotes.h"
4 | #include "inlines.h"
5 | #include "chunk.h"
6 |
7 | static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
8 | cmark_footnote *ref = (cmark_footnote *)_ref;
9 | cmark_mem *mem = map->mem;
10 | if (ref != NULL) {
11 | mem->free(ref->entry.label);
12 | if (ref->node)
13 | cmark_node_free(ref->node);
14 | mem->free(ref);
15 | }
16 | }
17 |
18 | void cmark_footnote_create(cmark_map *map, cmark_node *node) {
19 | cmark_footnote *ref;
20 | unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
21 |
22 | /* empty footnote name, or composed from only whitespace */
23 | if (reflabel == NULL)
24 | return;
25 |
26 | assert(map->sorted == NULL);
27 |
28 | ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
29 | ref->entry.label = reflabel;
30 | ref->node = node;
31 | ref->entry.age = map->size;
32 | ref->entry.next = map->refs;
33 |
34 | map->refs = (cmark_map_entry *)ref;
35 | map->size++;
36 | }
37 |
38 | cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
39 | return cmark_map_new(mem, footnote_free);
40 | }
41 |
42 | // Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
43 | // unlink all of the footnote nodes before freeing their memory.
44 | //
45 | // Sometimes, two (unused) footnote nodes can end up referencing each other,
46 | // which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
47 | // etc, can lead to a use-after-free error.
48 | //
49 | // Better to `unlink` every footnote node first, setting their next, prev, and
50 | // parent pointers to NULL, and only then walk thru & free them up.
51 | void cmark_unlink_footnotes_map(cmark_map *map) {
52 | cmark_map_entry *ref;
53 | cmark_map_entry *next;
54 |
55 | ref = map->refs;
56 | while(ref) {
57 | next = ref->next;
58 | if (((cmark_footnote *)ref)->node) {
59 | cmark_node_unlink(((cmark_footnote *)ref)->node);
60 | }
61 | ref = next;
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/houdini_href_e.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "houdini.h"
6 |
7 | #if !defined(__has_builtin)
8 | # define __has_builtin(b) 0
9 | #endif
10 |
11 | #if !__has_builtin(__builtin_expect)
12 | # define __builtin_expect(e, v) (e)
13 | #endif
14 |
15 | #define likely(e) __builtin_expect((e), 1)
16 |
17 | /*
18 | * The following characters will not be escaped:
19 | *
20 | * -_.+!*'(),%#@?=;:/,+&$~ alphanum
21 | *
22 | * Note that this character set is the addition of:
23 | *
24 | * - The characters which are safe to be in an URL
25 | * - The characters which are *not* safe to be in
26 | * an URL because they are RESERVED characters.
27 | *
28 | * We assume (lazily) that any RESERVED char that
29 | * appears inside an URL is actually meant to
30 | * have its native function (i.e. as an URL
31 | * component/separator) and hence needs no escaping.
32 | *
33 | * There are two exceptions: the chacters & (amp)
34 | * and ' (single quote) do not appear in the table.
35 | * They are meant to appear in the URL as components,
36 | * yet they require special HTML-entity escaping
37 | * to generate valid HTML markup.
38 | *
39 | * All other characters will be escaped to %XX.
40 | *
41 | */
42 | static const char HREF_SAFE[] = {
43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
45 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
47 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 | 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 | };
55 |
56 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
57 | static const uint8_t hex_chars[] = "0123456789ABCDEF";
58 | bufsize_t i = 0, org;
59 | uint8_t hex_str[3];
60 |
61 | hex_str[0] = '%';
62 |
63 | while (i < size) {
64 | org = i;
65 | while (i < size && HREF_SAFE[src[i]] != 0)
66 | i++;
67 |
68 | if (likely(i > org))
69 | cmark_strbuf_put(ob, src + org, i - org);
70 |
71 | /* escaping */
72 | if (i >= size)
73 | break;
74 |
75 | switch (src[i]) {
76 | /* amp appears all the time in URLs, but needs
77 | * HTML-entity escaping to be inside an href */
78 | case '&':
79 | cmark_strbuf_puts(ob, "&");
80 | break;
81 |
82 | /* the single quote is a valid URL character
83 | * according to the standard; it needs HTML
84 | * entity escaping too */
85 | case '\'':
86 | cmark_strbuf_puts(ob, "'");
87 | break;
88 |
89 | /* the space can be escaped to %20 or a plus
90 | * sign. we're going with the generic escape
91 | * for now. the plus thing is more commonly seen
92 | * when building GET strings */
93 | #if 0
94 | case ' ':
95 | cmark_strbuf_putc(ob, '+');
96 | break;
97 | #endif
98 |
99 | /* every other character goes with a %XX escaping */
100 | default:
101 | hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
102 | hex_str[2] = hex_chars[src[i] & 0xF];
103 | cmark_strbuf_put(ob, hex_str, 3);
104 | }
105 |
106 | i++;
107 | }
108 |
109 | return 1;
110 | }
111 |
--------------------------------------------------------------------------------
/src/houdini_html_e.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "houdini.h"
6 |
7 | #if !defined(__has_builtin)
8 | # define __has_builtin(b) 0
9 | #endif
10 |
11 | #if !__has_builtin(__builtin_expect)
12 | # define __builtin_expect(e, v) (e)
13 | #endif
14 |
15 | #define unlikely(e) __builtin_expect((e), 0)
16 |
17 | /**
18 | * According to the OWASP rules:
19 | *
20 | * & --> &
21 | * < --> <
22 | * > --> >
23 | * " --> "
24 | * ' --> ' ' is not recommended
25 | * / --> / forward slash is included as it helps end an HTML entity
26 | *
27 | */
28 | static const char HTML_ESCAPE_TABLE[] = {
29 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
31 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 | };
41 |
42 | static const char *HTML_ESCAPES[] = {"", """, "&", "'",
43 | "/", "<", ">"};
44 |
45 | int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
46 | int secure) {
47 | bufsize_t i = 0, org, esc = 0;
48 |
49 | while (i < size) {
50 | org = i;
51 | while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
52 | i++;
53 |
54 | if (i > org)
55 | cmark_strbuf_put(ob, src + org, i - org);
56 |
57 | /* escaping */
58 | if (unlikely(i >= size))
59 | break;
60 |
61 | /* The forward slash and single quote are only escaped in secure mode */
62 | if ((src[i] == '/' || src[i] == '\'') && !secure) {
63 | cmark_strbuf_putc(ob, src[i]);
64 | } else {
65 | cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
66 | }
67 |
68 | i++;
69 | }
70 |
71 | return 1;
72 | }
73 |
74 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
75 | return houdini_escape_html0(ob, src, size, 1);
76 | }
77 |
--------------------------------------------------------------------------------
/src/houdini_html_u.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "buffer.h"
6 | #include "houdini.h"
7 | #include "utf8.h"
8 | #include "entities.inc"
9 |
10 | #if !defined(__has_builtin)
11 | # define __has_builtin(b) 0
12 | #endif
13 |
14 | #if !__has_builtin(__builtin_expect)
15 | # define __builtin_expect(e, v) (e)
16 | #endif
17 |
18 | #define likely(e) __builtin_expect((e), 1)
19 | #define unlikely(e) __builtin_expect((e), 0)
20 |
21 | /* Binary tree lookup code for entities added by JGM */
22 |
23 | static const unsigned char *S_lookup(int i, int low, int hi,
24 | const unsigned char *s, int len) {
25 | int j;
26 | int cmp =
27 | strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
28 | if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
29 | return (const unsigned char *)cmark_entities[i].bytes;
30 | } else if (cmp <= 0 && i > low) {
31 | j = i - ((i - low) / 2);
32 | if (j == i)
33 | j -= 1;
34 | return S_lookup(j, low, i - 1, s, len);
35 | } else if (cmp > 0 && i < hi) {
36 | j = i + ((hi - i) / 2);
37 | if (j == i)
38 | j += 1;
39 | return S_lookup(j, i + 1, hi, s, len);
40 | } else {
41 | return NULL;
42 | }
43 | }
44 |
45 | static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
46 | return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
47 | }
48 |
49 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
50 | bufsize_t size) {
51 | bufsize_t i = 0;
52 |
53 | if (size >= 3 && src[0] == '#') {
54 | int codepoint = 0;
55 | int num_digits = 0;
56 |
57 | if (_isdigit(src[1])) {
58 | for (i = 1; i < size && _isdigit(src[i]); ++i) {
59 | codepoint = (codepoint * 10) + (src[i] - '0');
60 |
61 | if (codepoint >= 0x110000) {
62 | // Keep counting digits but
63 | // avoid integer overflow.
64 | codepoint = 0x110000;
65 | }
66 | }
67 |
68 | num_digits = i - 1;
69 | }
70 |
71 | else if (src[1] == 'x' || src[1] == 'X') {
72 | for (i = 2; i < size && _isxdigit(src[i]); ++i) {
73 | codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
74 |
75 | if (codepoint >= 0x110000) {
76 | // Keep counting digits but
77 | // avoid integer overflow.
78 | codepoint = 0x110000;
79 | }
80 | }
81 |
82 | num_digits = i - 2;
83 | }
84 |
85 | if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
86 | if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
87 | codepoint >= 0x110000) {
88 | codepoint = 0xFFFD;
89 | }
90 | cmark_utf8proc_encode_char(codepoint, ob);
91 | return i + 1;
92 | }
93 | }
94 |
95 | else {
96 | if (size > CMARK_ENTITY_MAX_LENGTH)
97 | size = CMARK_ENTITY_MAX_LENGTH;
98 |
99 | for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
100 | if (src[i] == ' ')
101 | break;
102 |
103 | if (src[i] == ';') {
104 | const unsigned char *entity = S_lookup_entity(src, i);
105 |
106 | if (entity != NULL) {
107 | cmark_strbuf_puts(ob, (const char *)entity);
108 | return i + 1;
109 | }
110 |
111 | break;
112 | }
113 | }
114 | }
115 |
116 | return 0;
117 | }
118 |
119 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
120 | bufsize_t size) {
121 | bufsize_t i = 0, org, ent;
122 |
123 | while (i < size) {
124 | org = i;
125 | while (i < size && src[i] != '&')
126 | i++;
127 |
128 | if (likely(i > org)) {
129 | if (unlikely(org == 0)) {
130 | if (i >= size)
131 | return 0;
132 |
133 | cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
134 | }
135 |
136 | cmark_strbuf_put(ob, src + org, i - org);
137 | }
138 |
139 | /* escaping */
140 | if (i >= size)
141 | break;
142 |
143 | i++;
144 |
145 | ent = houdini_unescape_ent(ob, src + i, size - i);
146 | i += ent;
147 |
148 | /* not really an entity */
149 | if (ent == 0)
150 | cmark_strbuf_putc(ob, '&');
151 | }
152 |
153 | return 1;
154 | }
155 |
156 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
157 | bufsize_t size) {
158 | if (!houdini_unescape_html(ob, src, size))
159 | cmark_strbuf_put(ob, src, size);
160 | }
161 |
--------------------------------------------------------------------------------
/src/include/buffer.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_BUFFER_H
2 | #define CMARK_BUFFER_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include "cmark-gfm.h"
10 |
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 |
15 | typedef struct {
16 | cmark_mem *mem;
17 | unsigned char *ptr;
18 | bufsize_t asize, size;
19 | } cmark_strbuf;
20 |
21 | extern unsigned char cmark_strbuf__initbuf[];
22 |
23 | #define CMARK_BUF_INIT(mem) \
24 | { mem, cmark_strbuf__initbuf, 0, 0 }
25 |
26 | /**
27 | * Initialize a cmark_strbuf structure.
28 | *
29 | * For the cases where CMARK_BUF_INIT cannot be used to do static
30 | * initialization.
31 | */
32 | CMARK_GFM_EXPORT
33 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
34 | bufsize_t initial_size);
35 |
36 | /**
37 | * Grow the buffer to hold at least `target_size` bytes.
38 | */
39 | CMARK_GFM_EXPORT
40 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
41 |
42 | CMARK_GFM_EXPORT
43 | void cmark_strbuf_free(cmark_strbuf *buf);
44 |
45 | CMARK_GFM_EXPORT
46 | void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
47 |
48 | CMARK_GFM_EXPORT
49 | bufsize_t cmark_strbuf_len(const cmark_strbuf *buf);
50 |
51 | CMARK_GFM_EXPORT
52 | int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
53 |
54 | CMARK_GFM_EXPORT
55 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
56 |
57 | CMARK_GFM_EXPORT
58 | void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
59 | const cmark_strbuf *buf);
60 |
61 | static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) {
62 | return (char *)buf->ptr;
63 | }
64 |
65 | #define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
66 |
67 | CMARK_GFM_EXPORT
68 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
69 | bufsize_t len);
70 |
71 | CMARK_GFM_EXPORT
72 | void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
73 |
74 | CMARK_GFM_EXPORT
75 | void cmark_strbuf_putc(cmark_strbuf *buf, int c);
76 |
77 | CMARK_GFM_EXPORT
78 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
79 | bufsize_t len);
80 |
81 | CMARK_GFM_EXPORT
82 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
83 |
84 | CMARK_GFM_EXPORT
85 | void cmark_strbuf_clear(cmark_strbuf *buf);
86 |
87 | CMARK_GFM_EXPORT
88 | bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos);
89 |
90 | CMARK_GFM_EXPORT
91 | bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos);
92 |
93 | CMARK_GFM_EXPORT
94 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
95 |
96 | CMARK_GFM_EXPORT
97 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
98 |
99 | CMARK_GFM_EXPORT
100 | void cmark_strbuf_rtrim(cmark_strbuf *buf);
101 |
102 | CMARK_GFM_EXPORT
103 | void cmark_strbuf_trim(cmark_strbuf *buf);
104 |
105 | CMARK_GFM_EXPORT
106 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
107 |
108 | CMARK_GFM_EXPORT
109 | void cmark_strbuf_unescape(cmark_strbuf *s);
110 |
111 | #ifdef __cplusplus
112 | }
113 | #endif
114 |
115 | #endif
116 |
--------------------------------------------------------------------------------
/src/include/chunk.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_CHUNK_H
2 | #define CMARK_CHUNK_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include "cmark-gfm.h"
8 | #include "buffer.h"
9 | #include "cmark_ctype.h"
10 |
11 | #define CMARK_CHUNK_EMPTY \
12 | { NULL, 0, 0 }
13 |
14 | typedef struct cmark_chunk {
15 | unsigned char *data;
16 | bufsize_t len;
17 | bufsize_t alloc; // also implies a NULL-terminated string
18 | } cmark_chunk;
19 |
20 | static inline void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
21 | if (c->alloc)
22 | mem->free(c->data);
23 |
24 | c->data = NULL;
25 | c->alloc = 0;
26 | c->len = 0;
27 | }
28 |
29 | static inline void cmark_chunk_ltrim(cmark_chunk *c) {
30 | assert(!c->alloc);
31 |
32 | while (c->len && cmark_isspace(c->data[0])) {
33 | c->data++;
34 | c->len--;
35 | }
36 | }
37 |
38 | static inline void cmark_chunk_rtrim(cmark_chunk *c) {
39 | assert(!c->alloc);
40 |
41 | while (c->len > 0) {
42 | if (!cmark_isspace(c->data[c->len - 1]))
43 | break;
44 |
45 | c->len--;
46 | }
47 | }
48 |
49 | static inline void cmark_chunk_trim(cmark_chunk *c) {
50 | cmark_chunk_ltrim(c);
51 | cmark_chunk_rtrim(c);
52 | }
53 |
54 | static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
55 | bufsize_t offset) {
56 | const unsigned char *p =
57 | (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
58 | return p ? (bufsize_t)(p - ch->data) : ch->len;
59 | }
60 |
61 | static inline const char *cmark_chunk_to_cstr(cmark_mem *mem, cmark_chunk *c) {
62 | unsigned char *str;
63 |
64 | if (c->alloc) {
65 | return (char *)c->data;
66 | }
67 | str = (unsigned char *)mem->calloc(c->len + 1, 1);
68 | if (c->len > 0) {
69 | memcpy(str, c->data, c->len);
70 | }
71 | str[c->len] = 0;
72 | c->data = str;
73 | c->alloc = 1;
74 |
75 | return (char *)str;
76 | }
77 |
78 | static inline void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c,
79 | const char *str) {
80 | unsigned char *old = c->alloc ? c->data : NULL;
81 | if (str == NULL) {
82 | c->len = 0;
83 | c->data = NULL;
84 | c->alloc = 0;
85 | } else {
86 | c->len = (bufsize_t)strlen(str);
87 | c->data = (unsigned char *)mem->calloc(c->len + 1, 1);
88 | c->alloc = 1;
89 | memcpy(c->data, str, c->len + 1);
90 | }
91 | if (old != NULL) {
92 | mem->free(old);
93 | }
94 | }
95 |
96 | static inline cmark_chunk cmark_chunk_literal(const char *data) {
97 | bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
98 | cmark_chunk c = {(unsigned char *)data, len, 0};
99 | return c;
100 | }
101 |
102 | static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos,
103 | bufsize_t len) {
104 | cmark_chunk c = {ch->data + pos, len, 0};
105 | return c;
106 | }
107 |
108 | static inline cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
109 | cmark_chunk c;
110 |
111 | c.len = buf->size;
112 | c.data = cmark_strbuf_detach(buf);
113 | c.alloc = 1;
114 |
115 | return c;
116 | }
117 |
118 | /* trim_new variants are to be used when the source chunk may or may not be
119 | * allocated; forces a newly allocated chunk. */
120 | static inline cmark_chunk cmark_chunk_ltrim_new(cmark_mem *mem, cmark_chunk *c) {
121 | cmark_chunk r = cmark_chunk_dup(c, 0, c->len);
122 | cmark_chunk_ltrim(&r);
123 | cmark_chunk_to_cstr(mem, &r);
124 | return r;
125 | }
126 |
127 | static inline cmark_chunk cmark_chunk_rtrim_new(cmark_mem *mem, cmark_chunk *c) {
128 | cmark_chunk r = cmark_chunk_dup(c, 0, c->len);
129 | cmark_chunk_rtrim(&r);
130 | cmark_chunk_to_cstr(mem, &r);
131 | return r;
132 | }
133 |
134 | #endif
135 |
--------------------------------------------------------------------------------
/src/include/cmark-gfm_version.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_GFM_VERSION_H
2 | #define CMARK_GFM_VERSION_H
3 |
4 | #define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 13)
5 | #define CMARK_GFM_VERSION_STRING "0.29.0.gfm.13"
6 |
7 | #endif
8 |
--------------------------------------------------------------------------------
/src/include/cmark_ctype.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_CMARK_CTYPE_H
2 | #define CMARK_CMARK_CTYPE_H
3 |
4 | #include "export.h"
5 |
6 | #ifdef __cplusplus
7 | extern "C" {
8 | #endif
9 |
10 | /** Locale-independent versions of functions from ctype.h.
11 | * We want cmark to behave the same no matter what the system locale.
12 | */
13 |
14 | CMARK_GFM_EXPORT
15 | int cmark_isspace(char c);
16 |
17 | CMARK_GFM_EXPORT
18 | int cmark_ispunct(char c);
19 |
20 | CMARK_GFM_EXPORT
21 | int cmark_isalnum(char c);
22 |
23 | CMARK_GFM_EXPORT
24 | int cmark_isdigit(char c);
25 |
26 | CMARK_GFM_EXPORT
27 | int cmark_isalpha(char c);
28 |
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 |
33 | #endif
34 |
--------------------------------------------------------------------------------
/src/include/export.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_GFM_EXPORT_H
2 | #define CMARK_GFM_EXPORT_H
3 |
4 | #ifdef CMARK_GFM_STATIC_DEFINE
5 | # define CMARK_GFM_EXPORT
6 | # define CMARK_GFM_NO_EXPORT
7 | #else
8 | # if defined(_WIN32)
9 | # if defined(libcmark_gfm_EXPORTS)
10 | # define CMARK_GFM_EXPORT __declspec(dllexport)
11 | # else
12 | # define CMARK_GFM_EXPORT __declspec(dllimport)
13 | # endif
14 | # define CMARK_GFM_NO_EXPORT
15 | # else
16 | # if defined(libcmark_gfm_EXPORTS)
17 | # define CMARK_GFM_EXPORT __attribute__((__visibility__("default")))
18 | # else
19 | # define CMARK_GFM_EXPORT __attribute__((__visibility__("default")))
20 | # endif
21 | # define CMARK_GFM_NO_EXPORT __attribute__((__visibility__("hidden")))
22 | # endif
23 | #endif
24 |
25 | #ifndef CMARK_GFM_DEPRECATED
26 | # if defined(_WIN32)
27 | # define CMARK_GFM_DEPRECATED __declspec(deprecated)
28 | # else
29 | # define CMARK_GFM_DEPRECATED __attribute__ ((__deprecated__))
30 | # endif
31 | #endif
32 |
33 | #ifndef CMARK_GFM_DEPRECATED_EXPORT
34 | # define CMARK_GFM_DEPRECATED_EXPORT CMARK_GFM_EXPORT CMARK_GFM_DEPRECATED
35 | #endif
36 |
37 | #ifndef CMARK_GFM_DEPRECATED_NO_EXPORT
38 | # define CMARK_GFM_DEPRECATED_NO_EXPORT CMARK_GFM_NO_EXPORT CMARK_GFM_DEPRECATED
39 | #endif
40 |
41 | #endif /* not CMARK_GFM_EXPORT_H */
42 |
--------------------------------------------------------------------------------
/src/include/footnotes.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_FOOTNOTES_H
2 | #define CMARK_FOOTNOTES_H
3 |
4 | #include "map.h"
5 |
6 | #ifdef __cplusplus
7 | extern "C" {
8 | #endif
9 |
10 | struct cmark_footnote {
11 | cmark_map_entry entry;
12 | cmark_node *node;
13 | unsigned int ix;
14 | };
15 |
16 | typedef struct cmark_footnote cmark_footnote;
17 |
18 | void cmark_footnote_create(cmark_map *map, cmark_node *node);
19 | cmark_map *cmark_footnote_map_new(cmark_mem *mem);
20 |
21 | void cmark_unlink_footnotes_map(cmark_map *map);
22 |
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 |
27 | #endif
28 |
--------------------------------------------------------------------------------
/src/include/houdini.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_HOUDINI_H
2 | #define CMARK_HOUDINI_H
3 |
4 | #include
5 |
6 | #include "buffer.h"
7 |
8 | #ifdef __cplusplus
9 | extern "C" {
10 | #endif
11 |
12 | #ifdef HOUDINI_USE_LOCALE
13 | #define _isxdigit(c) isxdigit(c)
14 | #define _isdigit(c) isdigit(c)
15 | #else
16 | /*
17 | * Helper _isdigit methods -- do not trust the current locale
18 | * */
19 | #define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
20 | #define _isdigit(c) ((c) >= '0' && (c) <= '9')
21 | #endif
22 |
23 | #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
24 | #define HOUDINI_UNESCAPED_SIZE(x) (x)
25 |
26 | CMARK_GFM_EXPORT
27 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
28 | bufsize_t size);
29 | CMARK_GFM_EXPORT
30 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
31 | bufsize_t size);
32 | CMARK_GFM_EXPORT
33 | int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
34 | bufsize_t size, int secure);
35 | CMARK_GFM_EXPORT
36 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
37 | bufsize_t size);
38 | CMARK_GFM_EXPORT
39 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
40 | bufsize_t size);
41 | CMARK_GFM_EXPORT
42 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
43 | bufsize_t size);
44 |
45 | #ifdef __cplusplus
46 | }
47 | #endif
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/src/include/html.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_HTML_H
2 | #define CMARK_HTML_H
3 |
4 | #include "buffer.h"
5 | #include "node.h"
6 |
7 | inline
8 | static void cmark_html_render_cr(cmark_strbuf *html) {
9 | if (html->size && html->ptr[html->size - 1] != '\n')
10 | cmark_strbuf_putc(html, '\n');
11 | }
12 |
13 | #define BUFFER_SIZE 100
14 |
15 | inline
16 | static void cmark_html_render_sourcepos(cmark_node *node, cmark_strbuf *html, int options) {
17 | char buffer[BUFFER_SIZE];
18 | if (CMARK_OPT_SOURCEPOS & options) {
19 | snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
20 | cmark_node_get_start_line(node), cmark_node_get_start_column(node),
21 | cmark_node_get_end_line(node), cmark_node_get_end_column(node));
22 | cmark_strbuf_puts(html, buffer);
23 | }
24 | }
25 |
26 |
27 | #endif
28 |
--------------------------------------------------------------------------------
/src/include/inlines.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_INLINES_H
2 | #define CMARK_INLINES_H
3 |
4 | #include
5 | #include
6 |
7 | #include "references.h"
8 |
9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 |
13 | cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
14 | cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
15 | cmark_chunk cmark_clean_attributes(cmark_mem *mem, cmark_chunk *attributes);
16 |
17 | CMARK_GFM_EXPORT
18 | void cmark_parse_inlines(cmark_parser *parser,
19 | cmark_node *parent,
20 | cmark_map *refmap,
21 | int options);
22 |
23 | bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
24 | cmark_map *refmap);
25 |
26 | bufsize_t cmark_parse_reference_attributes_inline(cmark_mem *mem, cmark_chunk *input,
27 | cmark_map *refmap);
28 |
29 | void cmark_inlines_add_special_character(cmark_parser *parser, unsigned char c, bool emphasis);
30 | void cmark_inlines_remove_special_character(cmark_parser *parser, unsigned char c, bool emphasis);
31 |
32 | void cmark_set_default_skip_chars(int8_t **skip_chars, bool use_memcpy);
33 | void cmark_set_default_special_chars(int8_t **special_chars, bool use_memcpy);
34 |
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 |
39 | #endif
40 |
--------------------------------------------------------------------------------
/src/include/iterator.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_ITERATOR_H
2 | #define CMARK_ITERATOR_H
3 |
4 | #include "cmark-gfm.h"
5 |
6 | #ifdef __cplusplus
7 | extern "C" {
8 | #endif
9 |
10 | typedef struct {
11 | cmark_event_type ev_type;
12 | cmark_node *node;
13 | } cmark_iter_state;
14 |
15 | struct cmark_iter {
16 | cmark_mem *mem;
17 | cmark_node *root;
18 | cmark_iter_state cur;
19 | cmark_iter_state next;
20 | };
21 |
22 | #ifdef __cplusplus
23 | }
24 | #endif
25 |
26 | #endif
27 |
--------------------------------------------------------------------------------
/src/include/map.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_MAP_H
2 | #define CMARK_MAP_H
3 |
4 | #include "chunk.h"
5 |
6 | #ifdef __cplusplus
7 | extern "C" {
8 | #endif
9 |
10 | struct cmark_map_entry {
11 | struct cmark_map_entry *next;
12 | unsigned char *label;
13 | size_t age;
14 | size_t size;
15 | };
16 |
17 | typedef struct cmark_map_entry cmark_map_entry;
18 |
19 | struct cmark_map;
20 |
21 | typedef void (*cmark_map_free_f)(struct cmark_map *, cmark_map_entry *);
22 |
23 | struct cmark_map {
24 | cmark_mem *mem;
25 | cmark_map_entry *refs;
26 | cmark_map_entry **sorted;
27 | size_t size;
28 | size_t ref_size;
29 | size_t max_ref_size;
30 | cmark_map_free_f free;
31 | };
32 |
33 | typedef struct cmark_map cmark_map;
34 |
35 | unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref);
36 | cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free);
37 | void cmark_map_free(cmark_map *map);
38 | cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label);
39 |
40 | #ifdef __cplusplus
41 | }
42 | #endif
43 |
44 | #endif
45 |
--------------------------------------------------------------------------------
/src/include/module.modulemap:
--------------------------------------------------------------------------------
1 | module cmark_gfm {
2 | header "cmark-gfm.h"
3 | header "cmark-gfm-extension_api.h"
4 | header "buffer.h"
5 | header "chunk.h"
6 | header "cmark_ctype.h"
7 | header "footnotes.h"
8 | header "houdini.h"
9 | header "html.h"
10 | header "inlines.h"
11 | header "iterator.h"
12 | header "map.h"
13 | header "node.h"
14 | header "parser.h"
15 | header "plugin.h"
16 | header "references.h"
17 | header "registry.h"
18 | header "render.h"
19 | header "scanners.h"
20 | header "syntax_extension.h"
21 | header "utf8.h"
22 | export *
23 | }
24 |
--------------------------------------------------------------------------------
/src/include/mutex.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_MUTEX_H
2 | #define CMARK_MUTEX_H
3 |
4 | #include
5 |
6 | #ifdef CMARK_THREADING
7 |
8 | #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
9 | #include
10 | #endif
11 |
12 | #if defined (_POSIX_THREADS)
13 |
14 | #include
15 |
16 | #define CMARK_DEFINE_ONCE(NAME) static pthread_once_t NAME##_once = PTHREAD_ONCE_INIT;
17 |
18 | #define CMARK_RUN_ONCE(NAME, FUNC) pthread_once(&NAME##_once, FUNC)
19 |
20 | #define CMARK_DEFINE_LOCK(NAME) \
21 | static pthread_mutex_t NAME##_lock; \
22 | CMARK_DEFINE_ONCE(NAME); \
23 | static void initialize_##NAME() { pthread_mutex_init(&NAME##_lock, NULL); }
24 |
25 | #define CMARK_INITIALIZE_AND_LOCK(NAME) \
26 | CMARK_RUN_ONCE(NAME, initialize_##NAME); \
27 | pthread_mutex_lock(&NAME##_lock);
28 |
29 | #define CMARK_UNLOCK(NAME) pthread_mutex_unlock(&NAME##_lock);
30 |
31 | #elif defined(_WIN32) // building for windows
32 |
33 | #define _WIN32_WINNT 0x0600 // minimum target of Windows Vista
34 | #define WIN32_LEAN_AND_MEAN
35 | #include
36 |
37 | #define CMARK_DEFINE_ONCE(NAME) static INIT_ONCE NAME##_once = INIT_ONCE_STATIC_INIT;
38 |
39 | #define CMARK_RUN_ONCE(NAME, FUNC) do { \
40 | BOOL fStatus; BOOL fPending; \
41 | fStatus = InitOnceBeginInitialize(&NAME##_once, 0, &fPending, NULL); \
42 | if (!fStatus || !fPending) break; \
43 | FUNC(); \
44 | InitOnceComplete(&NAME##_once, 0, NULL); \
45 | } while (0);
46 |
47 | #define CMARK_DEFINE_LOCK(NAME) static SRWLOCK NAME##_lock = SRWLOCK_INIT;
48 |
49 | #define CMARK_INITIALIZE_AND_LOCK(NAME) AcquireSRWLockExclusive(&NAME##_lock);
50 |
51 | #define CMARK_UNLOCK(NAME) ReleaseSRWLockExclusive(&NAME##_lock);
52 |
53 | #endif
54 |
55 | #else // no threading support
56 |
57 | static inline bool check_latch(int *latch) {
58 | if (!*latch) {
59 | *latch = 1;
60 | return true;
61 | } else {
62 | return false;
63 | }
64 | }
65 |
66 | #define CMARK_DEFINE_LOCK(NAME)
67 | #define CMARK_INITIALIZE_AND_LOCK(NAME)
68 | #define CMARK_UNLOCK(NAME)
69 |
70 | #define CMARK_DEFINE_ONCE(NAME) static int NAME = 0;
71 |
72 | #define CMARK_RUN_ONCE(NAME, FUNC) if (check_latch(&NAME)) FUNC();
73 |
74 | #endif // CMARK_THREADING
75 |
76 | #endif // CMARK_MUTEX_H
77 |
--------------------------------------------------------------------------------
/src/include/node.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_NODE_H
2 | #define CMARK_NODE_H
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #include "cmark-gfm.h"
9 | #include "cmark-gfm-extension_api.h"
10 | #include "buffer.h"
11 | #include "chunk.h"
12 |
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 |
17 | typedef struct {
18 | cmark_list_type list_type;
19 | int marker_offset;
20 | int padding;
21 | int start;
22 | cmark_delim_type delimiter;
23 | unsigned char bullet_char;
24 | bool tight;
25 | bool checked; // For task list extension
26 | } cmark_list;
27 |
28 | typedef struct {
29 | cmark_chunk info;
30 | cmark_chunk literal;
31 | uint8_t fence_length;
32 | uint8_t fence_offset;
33 | unsigned char fence_char;
34 | int8_t fenced;
35 | } cmark_code;
36 |
37 | typedef struct {
38 | int level;
39 | bool setext;
40 | } cmark_heading;
41 |
42 | typedef struct {
43 | cmark_chunk url;
44 | cmark_chunk title;
45 | } cmark_link;
46 |
47 | typedef struct {
48 | cmark_chunk attributes;
49 | } cmark_attribute;
50 |
51 | typedef struct {
52 | cmark_chunk on_enter;
53 | cmark_chunk on_exit;
54 | } cmark_custom;
55 |
56 | enum cmark_node__internal_flags {
57 | CMARK_NODE__OPEN = (1 << 0),
58 | CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
59 | CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
60 |
61 | // Extensions can register custom flags by calling `cmark_register_node_flag`.
62 | // This is the starting value for the custom flags.
63 | CMARK_NODE__REGISTER_FIRST = (1 << 3),
64 | };
65 |
66 | typedef uint16_t cmark_node_internal_flags;
67 |
68 | struct cmark_node {
69 | cmark_strbuf content;
70 |
71 | struct cmark_node *next;
72 | struct cmark_node *prev;
73 | struct cmark_node *parent;
74 | struct cmark_node *first_child;
75 | struct cmark_node *last_child;
76 |
77 | void *user_data;
78 | cmark_free_func user_data_free_func;
79 |
80 | int start_line;
81 | int start_column;
82 | int end_line;
83 | int end_column;
84 | int internal_offset;
85 | uint16_t type;
86 | cmark_node_internal_flags flags;
87 | int backtick_count;
88 |
89 | cmark_syntax_extension *extension;
90 |
91 | /**
92 | * Used during cmark_render() to cache the most recent non-NULL
93 | * extension, if you go up the parent chain like this:
94 | *
95 | * node->parent->...parent->extension
96 | */
97 | cmark_syntax_extension *ancestor_extension;
98 |
99 | union {
100 | int ref_ix;
101 | int def_count;
102 | } footnote;
103 |
104 | cmark_node *parent_footnote_def;
105 |
106 | union {
107 | cmark_chunk literal;
108 | cmark_list list;
109 | cmark_code code;
110 | cmark_heading heading;
111 | cmark_link link;
112 | cmark_attribute attribute;
113 | cmark_custom custom;
114 | int html_block_type;
115 | void *opaque;
116 | } as;
117 | };
118 |
119 | /**
120 | * Syntax extensions can use this function to register a custom node
121 | * flag. The flags are stored in the `flags` field of the `cmark_node`
122 | * struct. The `flags` parameter should be the address of a global variable
123 | * which will store the flag value.
124 | */
125 | CMARK_GFM_EXPORT
126 | void cmark_register_node_flag(cmark_node_internal_flags *flags);
127 |
128 | /**
129 | * DEPRECATED.
130 | *
131 | * This function was added in cmark-gfm version 0.29.0.gfm.7, and was
132 | * required to be called at program start time, which caused
133 | * backwards-compatibility issues in applications that use cmark-gfm as a
134 | * library. It is now a no-op.
135 | */
136 | CMARK_GFM_EXPORT
137 | void cmark_init_standard_node_flags(void);
138 |
139 | static inline cmark_mem *cmark_node_mem(cmark_node *node) {
140 | return node->content.mem;
141 | }
142 | CMARK_GFM_EXPORT int cmark_node_check(cmark_node *node, FILE *out);
143 |
144 | static inline bool CMARK_NODE_TYPE_BLOCK_P(cmark_node_type node_type) {
145 | return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_BLOCK;
146 | }
147 |
148 | static inline bool CMARK_NODE_BLOCK_P(cmark_node *node) {
149 | return node != NULL && CMARK_NODE_TYPE_BLOCK_P((cmark_node_type) node->type);
150 | }
151 |
152 | static inline bool CMARK_NODE_TYPE_INLINE_P(cmark_node_type node_type) {
153 | return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_INLINE;
154 | }
155 |
156 | static inline bool CMARK_NODE_INLINE_P(cmark_node *node) {
157 | return node != NULL && CMARK_NODE_TYPE_INLINE_P((cmark_node_type) node->type);
158 | }
159 |
160 | CMARK_GFM_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type);
161 |
162 | /**
163 | * Enable (or disable) extra safety checks. These extra checks cause
164 | * extra performance overhead (in some cases quadratic), so they are only
165 | * intended to be used during testing.
166 | */
167 | CMARK_GFM_EXPORT void cmark_enable_safety_checks(bool enable);
168 |
169 | #ifdef __cplusplus
170 | }
171 | #endif
172 |
173 | #endif
174 |
--------------------------------------------------------------------------------
/src/include/parser.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_PARSER_H
2 | #define CMARK_PARSER_H
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #include "references.h"
9 | #include "node.h"
10 | #include "buffer.h"
11 |
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 |
16 | #define MAX_LINK_LABEL_LENGTH 1000
17 |
18 | struct cmark_parser {
19 | struct cmark_mem *mem;
20 | /* A hashtable of urls in the current document for cross-references */
21 | struct cmark_map *refmap;
22 | /* The root node of the parser, always a CMARK_NODE_DOCUMENT */
23 | struct cmark_node *root;
24 | /* The last open block after a line is fully processed */
25 | struct cmark_node *current;
26 | /* See the documentation for cmark_parser_get_line_number() in cmark.h */
27 | int line_number;
28 | /* See the documentation for cmark_parser_get_offset() in cmark.h */
29 | bufsize_t offset;
30 | /* See the documentation for cmark_parser_get_column() in cmark.h */
31 | bufsize_t column;
32 | /* See the documentation for cmark_parser_get_first_nonspace() in cmark.h */
33 | bufsize_t first_nonspace;
34 | /* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */
35 | bufsize_t first_nonspace_column;
36 | bufsize_t thematic_break_kill_pos;
37 | /* See the documentation for cmark_parser_get_indent() in cmark.h */
38 | int indent;
39 | /* See the documentation for cmark_parser_is_blank() in cmark.h */
40 | bool blank;
41 | /* See the documentation for cmark_parser_has_partially_consumed_tab() in cmark.h */
42 | bool partially_consumed_tab;
43 | /* Contains the currently processed line */
44 | cmark_strbuf curline;
45 | /* See the documentation for cmark_parser_get_last_line_length() in cmark.h */
46 | bufsize_t last_line_length;
47 | /* FIXME: not sure about the difference with curline */
48 | cmark_strbuf linebuf;
49 | /* Options set by the user, see the Options section in cmark.h */
50 | int options;
51 | bool last_buffer_ended_with_cr;
52 | size_t total_size;
53 | cmark_llist *syntax_extensions;
54 | cmark_llist *inline_syntax_extensions;
55 | cmark_ispunct_func backslash_ispunct;
56 | /* used when parsing inlines, can be populated by extensions if any are loaded */
57 | int8_t *skip_chars;
58 | int8_t *special_chars;
59 | };
60 |
61 | #ifdef __cplusplus
62 | }
63 | #endif
64 |
65 | #endif
66 |
--------------------------------------------------------------------------------
/src/include/plugin.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_PLUGIN_H
2 | #define CMARK_PLUGIN_H
3 |
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-extension_api.h"
6 |
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | /**
12 | * cmark_plugin:
13 | *
14 | * A plugin structure, which should be filled by plugin's
15 | * init functions.
16 | */
17 | struct cmark_plugin {
18 | cmark_llist *syntax_extensions;
19 | };
20 |
21 | cmark_llist *
22 | cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin);
23 |
24 | cmark_plugin *
25 | cmark_plugin_new(void);
26 |
27 | void
28 | cmark_plugin_free(cmark_plugin *plugin);
29 |
30 | #ifdef __cplusplus
31 | }
32 | #endif
33 |
34 | #endif
35 |
--------------------------------------------------------------------------------
/src/include/references.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_REFERENCES_H
2 | #define CMARK_REFERENCES_H
3 |
4 | #include
5 |
6 | #include "map.h"
7 |
8 | #ifdef __cplusplus
9 | extern "C" {
10 | #endif
11 |
12 | struct cmark_reference {
13 | cmark_map_entry entry;
14 | bool is_attributes_reference;
15 | cmark_chunk url;
16 | cmark_chunk title;
17 | cmark_chunk attributes;
18 | };
19 |
20 | typedef struct cmark_reference cmark_reference;
21 |
22 | void cmark_reference_create(cmark_map *map, cmark_chunk *label,
23 | cmark_chunk *url, cmark_chunk *title);
24 | void cmark_reference_create_attributes(cmark_map *map, cmark_chunk *label,
25 | cmark_chunk *attributes);
26 | cmark_map *cmark_reference_map_new(cmark_mem *mem);
27 |
28 | #ifdef __cplusplus
29 | }
30 | #endif
31 |
32 | #endif
33 |
--------------------------------------------------------------------------------
/src/include/registry.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_REGISTRY_H
2 | #define CMARK_REGISTRY_H
3 |
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-extension_api.h"
6 |
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | CMARK_GFM_EXPORT
12 | void cmark_register_plugin(cmark_plugin_init_func reg_fn);
13 |
14 | CMARK_GFM_EXPORT
15 | void cmark_release_plugins(void);
16 |
17 | CMARK_GFM_EXPORT
18 | cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/src/include/render.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_RENDER_H
2 | #define CMARK_RENDER_H
3 |
4 | #include
5 | #include
6 |
7 | #include "buffer.h"
8 | #include "chunk.h"
9 |
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 |
14 | typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping;
15 |
16 | struct cmark_renderer {
17 | cmark_mem *mem;
18 | cmark_strbuf *buffer;
19 | cmark_strbuf *prefix;
20 | int column;
21 | int width;
22 | int need_cr;
23 | bufsize_t last_breakable;
24 | bool begin_line;
25 | bool begin_content;
26 | bool no_linebreaks;
27 | bool in_tight_list_item;
28 | void (*outc)(struct cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char);
29 | void (*cr)(struct cmark_renderer *);
30 | void (*blankline)(struct cmark_renderer *);
31 | void (*out)(struct cmark_renderer *, cmark_node *, const char *, bool, cmark_escaping);
32 | unsigned int footnote_ix;
33 | };
34 |
35 | typedef struct cmark_renderer cmark_renderer;
36 |
37 | struct cmark_html_renderer {
38 | cmark_strbuf *html;
39 | cmark_node *plain;
40 | cmark_llist *filter_extensions;
41 | unsigned int footnote_ix;
42 | unsigned int written_footnote_ix;
43 | void *opaque;
44 | };
45 |
46 | typedef struct cmark_html_renderer cmark_html_renderer;
47 |
48 | void cmark_render_ascii(cmark_renderer *renderer, const char *s);
49 |
50 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c);
51 |
52 | char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width,
53 | void (*outc)(cmark_renderer *, cmark_node *,
54 | cmark_escaping, int32_t,
55 | unsigned char),
56 | int (*render_node)(cmark_renderer *renderer,
57 | cmark_node *node,
58 | cmark_event_type ev_type, int options));
59 |
60 | #ifdef __cplusplus
61 | }
62 | #endif
63 |
64 | #endif
65 |
--------------------------------------------------------------------------------
/src/include/scanners.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_SCANNERS_H
2 | #define CMARK_SCANNERS_H
3 |
4 | #include "cmark-gfm.h"
5 | #include "chunk.h"
6 |
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
12 | bufsize_t offset);
13 | bufsize_t _scan_scheme(const unsigned char *p);
14 | bufsize_t _scan_autolink_uri(const unsigned char *p);
15 | bufsize_t _scan_autolink_email(const unsigned char *p);
16 | bufsize_t _scan_html_tag(const unsigned char *p);
17 | bufsize_t _scan_liberal_html_tag(const unsigned char *p);
18 | bufsize_t _scan_html_comment(const unsigned char *p);
19 | bufsize_t _scan_html_pi(const unsigned char *p);
20 | bufsize_t _scan_html_declaration(const unsigned char *p);
21 | bufsize_t _scan_html_cdata(const unsigned char *p);
22 | bufsize_t _scan_html_block_start(const unsigned char *p);
23 | bufsize_t _scan_html_block_start_7(const unsigned char *p);
24 | bufsize_t _scan_html_block_end_1(const unsigned char *p);
25 | bufsize_t _scan_html_block_end_2(const unsigned char *p);
26 | bufsize_t _scan_html_block_end_3(const unsigned char *p);
27 | bufsize_t _scan_html_block_end_4(const unsigned char *p);
28 | bufsize_t _scan_html_block_end_5(const unsigned char *p);
29 | bufsize_t _scan_link_title(const unsigned char *p);
30 | bufsize_t _scan_spacechars(const unsigned char *p);
31 | bufsize_t _scan_atx_heading_start(const unsigned char *p);
32 | bufsize_t _scan_setext_heading_line(const unsigned char *p);
33 | bufsize_t _scan_open_code_fence(const unsigned char *p);
34 | bufsize_t _scan_close_code_fence(const unsigned char *p);
35 | bufsize_t _scan_entity(const unsigned char *p);
36 | bufsize_t _scan_dangerous_url(const unsigned char *p);
37 | bufsize_t _scan_footnote_definition(const unsigned char *p);
38 |
39 | #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
40 | #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
41 | #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
42 | #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n)
43 | #define scan_liberal_html_tag(c, n) _scan_at(&_scan_liberal_html_tag, c, n)
44 | #define scan_html_comment(c, n) _scan_at(&_scan_html_comment, c, n)
45 | #define scan_html_pi(c, n) _scan_at(&_scan_html_pi, c, n)
46 | #define scan_html_declaration(c, n) _scan_at(&_scan_html_declaration, c, n)
47 | #define scan_html_cdata(c, n) _scan_at(&_scan_html_cdata, c, n)
48 | #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n)
49 | #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n)
50 | #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n)
51 | #define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n)
52 | #define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n)
53 | #define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n)
54 | #define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n)
55 | #define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
56 | #define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
57 | #define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
58 | #define scan_setext_heading_line(c, n) \
59 | _scan_at(&_scan_setext_heading_line, c, n)
60 | #define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
61 | #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
62 | #define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
63 | #define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n)
64 | #define scan_footnote_definition(c, n) _scan_at(&_scan_footnote_definition, c, n)
65 |
66 | #ifdef __cplusplus
67 | }
68 | #endif
69 |
70 | #endif
71 |
--------------------------------------------------------------------------------
/src/include/syntax_extension.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_SYNTAX_EXTENSION_H
2 | #define CMARK_SYNTAX_EXTENSION_H
3 |
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-extension_api.h"
6 |
7 | #include
8 |
9 | struct cmark_syntax_extension {
10 | cmark_match_block_func last_block_matches;
11 | cmark_open_block_func try_opening_block;
12 | cmark_match_inline_func match_inline;
13 | cmark_inline_from_delim_func insert_inline_from_delim;
14 | cmark_llist * special_inline_chars;
15 | char * name;
16 | void * priv;
17 | bool emphasis;
18 | cmark_free_func free_function;
19 | cmark_get_type_string_func get_type_string_func;
20 | cmark_can_contain_func can_contain_func;
21 | cmark_contains_inlines_func contains_inlines_func;
22 | cmark_common_render_func commonmark_render_func;
23 | cmark_common_render_func plaintext_render_func;
24 | cmark_common_render_func latex_render_func;
25 | cmark_xml_attr_func xml_attr_func;
26 | cmark_common_render_func man_render_func;
27 | cmark_html_render_func html_render_func;
28 | cmark_html_filter_func html_filter_func;
29 | cmark_postprocess_func postprocess_func;
30 | cmark_opaque_alloc_func opaque_alloc_func;
31 | cmark_opaque_free_func opaque_free_func;
32 | cmark_commonmark_escape_func commonmark_escape_func;
33 | };
34 |
35 | #endif
36 |
--------------------------------------------------------------------------------
/src/include/utf8.h:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_UTF8_H
2 | #define CMARK_UTF8_H
3 |
4 | #include
5 | #include "buffer.h"
6 |
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | CMARK_GFM_EXPORT
12 | void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
13 | bufsize_t len);
14 |
15 | CMARK_GFM_EXPORT
16 | void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
17 |
18 | CMARK_GFM_EXPORT
19 | int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
20 |
21 | CMARK_GFM_EXPORT
22 | void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
23 | bufsize_t size);
24 |
25 | CMARK_GFM_EXPORT
26 | int cmark_utf8proc_is_space(int32_t uc);
27 |
28 | CMARK_GFM_EXPORT
29 | int cmark_utf8proc_is_punctuation(int32_t uc);
30 |
31 | #ifdef __cplusplus
32 | }
33 | #endif
34 |
35 | #endif
36 |
--------------------------------------------------------------------------------
/src/iterator.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "node.h"
6 | #include "cmark-gfm.h"
7 | #include "iterator.h"
8 |
9 | cmark_iter *cmark_iter_new(cmark_node *root) {
10 | if (root == NULL) {
11 | return NULL;
12 | }
13 | cmark_mem *mem = root->content.mem;
14 | cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter));
15 | iter->mem = mem;
16 | iter->root = root;
17 | iter->cur.ev_type = CMARK_EVENT_NONE;
18 | iter->cur.node = NULL;
19 | iter->next.ev_type = CMARK_EVENT_ENTER;
20 | iter->next.node = root;
21 | return iter;
22 | }
23 |
24 | void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); }
25 |
26 | static bool S_is_leaf(cmark_node *node) {
27 | switch (node->type) {
28 | case CMARK_NODE_HTML_BLOCK:
29 | case CMARK_NODE_THEMATIC_BREAK:
30 | case CMARK_NODE_CODE_BLOCK:
31 | case CMARK_NODE_TEXT:
32 | case CMARK_NODE_SOFTBREAK:
33 | case CMARK_NODE_LINEBREAK:
34 | case CMARK_NODE_CODE:
35 | case CMARK_NODE_HTML_INLINE:
36 | return 1;
37 | }
38 | return 0;
39 | }
40 |
41 | cmark_event_type cmark_iter_next(cmark_iter *iter) {
42 | cmark_event_type ev_type = iter->next.ev_type;
43 | cmark_node *node = iter->next.node;
44 |
45 | iter->cur.ev_type = ev_type;
46 | iter->cur.node = node;
47 |
48 | if (ev_type == CMARK_EVENT_DONE) {
49 | return ev_type;
50 | }
51 |
52 | /* roll forward to next item, setting both fields */
53 | if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) {
54 | if (node->first_child == NULL) {
55 | /* stay on this node but exit */
56 | iter->next.ev_type = CMARK_EVENT_EXIT;
57 | } else {
58 | iter->next.ev_type = CMARK_EVENT_ENTER;
59 | iter->next.node = node->first_child;
60 | }
61 | } else if (node == iter->root) {
62 | /* don't move past root */
63 | iter->next.ev_type = CMARK_EVENT_DONE;
64 | iter->next.node = NULL;
65 | } else if (node->next) {
66 | iter->next.ev_type = CMARK_EVENT_ENTER;
67 | iter->next.node = node->next;
68 | } else if (node->parent) {
69 | iter->next.ev_type = CMARK_EVENT_EXIT;
70 | iter->next.node = node->parent;
71 | } else {
72 | assert(false);
73 | iter->next.ev_type = CMARK_EVENT_DONE;
74 | iter->next.node = NULL;
75 | }
76 |
77 | return ev_type;
78 | }
79 |
80 | void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
81 | cmark_event_type event_type) {
82 | iter->next.ev_type = event_type;
83 | iter->next.node = current;
84 | cmark_iter_next(iter);
85 | }
86 |
87 | cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; }
88 |
89 | cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) {
90 | return iter->cur.ev_type;
91 | }
92 |
93 | cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; }
94 |
95 | void cmark_consolidate_text_nodes(cmark_node *root) {
96 | if (root == NULL) {
97 | return;
98 | }
99 | cmark_iter *iter = cmark_iter_new(root);
100 | cmark_strbuf buf = CMARK_BUF_INIT(iter->mem);
101 | cmark_event_type ev_type;
102 | cmark_node *cur, *tmp, *next;
103 |
104 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
105 | cur = cmark_iter_get_node(iter);
106 | if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT &&
107 | cur->next && cur->next->type == CMARK_NODE_TEXT) {
108 | cmark_strbuf_clear(&buf);
109 | cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
110 | tmp = cur->next;
111 | while (tmp && tmp->type == CMARK_NODE_TEXT) {
112 | cmark_iter_next(iter); // advance pointer
113 | cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
114 | cur->end_column = tmp->end_column;
115 | next = tmp->next;
116 | cmark_node_free(tmp);
117 | tmp = next;
118 | }
119 | cmark_chunk_free(iter->mem, &cur->as.literal);
120 | cur->as.literal = cmark_chunk_buf_detach(&buf);
121 | }
122 | }
123 |
124 | cmark_strbuf_free(&buf);
125 | cmark_iter_free(iter);
126 | }
127 |
128 | void cmark_node_own(cmark_node *root) {
129 | if (root == NULL) {
130 | return;
131 | }
132 | cmark_iter *iter = cmark_iter_new(root);
133 | cmark_event_type ev_type;
134 | cmark_node *cur;
135 |
136 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
137 | cur = cmark_iter_get_node(iter);
138 | if (ev_type == CMARK_EVENT_ENTER) {
139 | switch (cur->type) {
140 | case CMARK_NODE_TEXT:
141 | case CMARK_NODE_HTML_INLINE:
142 | case CMARK_NODE_CODE:
143 | case CMARK_NODE_HTML_BLOCK:
144 | cmark_chunk_to_cstr(iter->mem, &cur->as.literal);
145 | break;
146 | case CMARK_NODE_LINK:
147 | cmark_chunk_to_cstr(iter->mem, &cur->as.link.url);
148 | cmark_chunk_to_cstr(iter->mem, &cur->as.link.title);
149 | break;
150 | case CMARK_NODE_CUSTOM_INLINE:
151 | cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_enter);
152 | cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_exit);
153 | break;
154 | }
155 | }
156 | }
157 |
158 | cmark_iter_free(iter);
159 | }
160 |
--------------------------------------------------------------------------------
/src/libcmark-gfm.pc.in:
--------------------------------------------------------------------------------
1 | prefix=@CMAKE_INSTALL_PREFIX@
2 | exec_prefix=@CMAKE_INSTALL_PREFIX@
3 | libdir=@CMAKE_INSTALL_PREFIX@/@libdir@
4 | includedir=@CMAKE_INSTALL_PREFIX@/include
5 |
6 | Name: libcmark-gfm
7 | Description: CommonMark parsing, rendering, and manipulation with GitHub Flavored Markdown extensions
8 | Version: @PROJECT_VERSION@
9 | Libs: -L${libdir} -lcmark-gfm -lcmark-gfm-extensions
10 | Cflags: -I${includedir}
11 |
--------------------------------------------------------------------------------
/src/linked_list.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "cmark-gfm.h"
4 |
5 | cmark_llist *cmark_llist_append(cmark_mem *mem, cmark_llist *head, void *data) {
6 | cmark_llist *tmp;
7 | cmark_llist *new_node = (cmark_llist *) mem->calloc(1, sizeof(cmark_llist));
8 |
9 | new_node->data = data;
10 | new_node->next = NULL;
11 |
12 | if (!head)
13 | return new_node;
14 |
15 | for (tmp = head; tmp->next; tmp=tmp->next);
16 |
17 | tmp->next = new_node;
18 |
19 | return head;
20 | }
21 |
22 | void cmark_llist_free_full(cmark_mem *mem, cmark_llist *head, cmark_free_func free_func) {
23 | cmark_llist *tmp, *prev;
24 |
25 | for (tmp = head; tmp;) {
26 | if (free_func)
27 | free_func(mem, tmp->data);
28 |
29 | prev = tmp;
30 | tmp = tmp->next;
31 | mem->free(prev);
32 | }
33 | }
34 |
35 | void cmark_llist_free(cmark_mem *mem, cmark_llist *head) {
36 | cmark_llist_free_full(mem, head, NULL);
37 | }
38 |
--------------------------------------------------------------------------------
/src/map.c:
--------------------------------------------------------------------------------
1 | #include "map.h"
2 | #include "utf8.h"
3 | #include "parser.h"
4 |
5 | // normalize map label: collapse internal whitespace to single space,
6 | // remove leading/trailing whitespace, case fold
7 | // Return NULL if the label is actually empty (i.e. composed solely from
8 | // whitespace)
9 | unsigned char *normalize_map_label(cmark_mem *mem, cmark_chunk *ref) {
10 | cmark_strbuf normalized = CMARK_BUF_INIT(mem);
11 | unsigned char *result;
12 |
13 | if (ref == NULL)
14 | return NULL;
15 |
16 | if (ref->len == 0)
17 | return NULL;
18 |
19 | cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
20 | cmark_strbuf_trim(&normalized);
21 | cmark_strbuf_normalize_whitespace(&normalized);
22 |
23 | result = cmark_strbuf_detach(&normalized);
24 | assert(result);
25 |
26 | if (result[0] == '\0') {
27 | mem->free(result);
28 | return NULL;
29 | }
30 |
31 | return result;
32 | }
33 |
34 | static int
35 | labelcmp(const unsigned char *a, const unsigned char *b) {
36 | return strcmp((const char *)a, (const char *)b);
37 | }
38 |
39 | static int
40 | refcmp(const void *p1, const void *p2) {
41 | cmark_map_entry *r1 = *(cmark_map_entry **)p1;
42 | cmark_map_entry *r2 = *(cmark_map_entry **)p2;
43 | int res = labelcmp(r1->label, r2->label);
44 | return res ? res : ((int)r1->age - (int)r2->age);
45 | }
46 |
47 | static int
48 | refsearch(const void *label, const void *p2) {
49 | cmark_map_entry *ref = *(cmark_map_entry **)p2;
50 | return labelcmp((const unsigned char *)label, ref->label);
51 | }
52 |
53 | static void sort_map(cmark_map *map) {
54 | size_t i = 0, last = 0, size = map->size;
55 | cmark_map_entry *r = map->refs, **sorted = NULL;
56 |
57 | sorted = (cmark_map_entry **)map->mem->calloc(size, sizeof(cmark_map_entry *));
58 | while (r) {
59 | sorted[i++] = r;
60 | r = r->next;
61 | }
62 |
63 | qsort(sorted, size, sizeof(cmark_map_entry *), refcmp);
64 |
65 | for (i = 1; i < size; i++) {
66 | if (labelcmp(sorted[i]->label, sorted[last]->label) != 0)
67 | sorted[++last] = sorted[i];
68 | }
69 |
70 | map->sorted = sorted;
71 | map->size = last + 1;
72 | }
73 |
74 | cmark_map_entry *cmark_map_lookup(cmark_map *map, cmark_chunk *label) {
75 | cmark_map_entry **ref = NULL;
76 | cmark_map_entry *r = NULL;
77 | unsigned char *norm;
78 |
79 | if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
80 | return NULL;
81 |
82 | if (map == NULL || !map->size)
83 | return NULL;
84 |
85 | norm = normalize_map_label(map->mem, label);
86 | if (norm == NULL)
87 | return NULL;
88 |
89 | if (!map->sorted)
90 | sort_map(map);
91 |
92 | ref = (cmark_map_entry **)bsearch(norm, map->sorted, map->size, sizeof(cmark_map_entry *), refsearch);
93 | map->mem->free(norm);
94 |
95 | if (ref != NULL) {
96 | r = ref[0];
97 | /* Check for expansion limit */
98 | if (r->size > map->max_ref_size - map->ref_size)
99 | return NULL;
100 | map->ref_size += r->size;
101 | }
102 |
103 | return r;
104 | }
105 |
106 | void cmark_map_free(cmark_map *map) {
107 | cmark_map_entry *ref;
108 |
109 | if (map == NULL)
110 | return;
111 |
112 | ref = map->refs;
113 | while (ref) {
114 | cmark_map_entry *next = ref->next;
115 | map->free(map, ref);
116 | ref = next;
117 | }
118 |
119 | map->mem->free(map->sorted);
120 | map->mem->free(map);
121 | }
122 |
123 | cmark_map *cmark_map_new(cmark_mem *mem, cmark_map_free_f free) {
124 | cmark_map *map = (cmark_map *)mem->calloc(1, sizeof(cmark_map));
125 | map->mem = mem;
126 | map->free = free;
127 | map->max_ref_size = UINT_MAX;
128 | return map;
129 | }
130 |
--------------------------------------------------------------------------------
/src/plugin.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "plugin.h"
4 |
5 | extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
6 |
7 | int cmark_plugin_register_syntax_extension(cmark_plugin * plugin,
8 | cmark_syntax_extension * extension) {
9 | plugin->syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, plugin->syntax_extensions, extension);
10 | return 1;
11 | }
12 |
13 | cmark_plugin *
14 | cmark_plugin_new(void) {
15 | cmark_plugin *res = (cmark_plugin *) CMARK_DEFAULT_MEM_ALLOCATOR.calloc(1, sizeof(cmark_plugin));
16 |
17 | res->syntax_extensions = NULL;
18 |
19 | return res;
20 | }
21 |
22 | void
23 | cmark_plugin_free(cmark_plugin *plugin) {
24 | cmark_llist_free_full(&CMARK_DEFAULT_MEM_ALLOCATOR,
25 | plugin->syntax_extensions,
26 | (cmark_free_func) cmark_syntax_extension_free);
27 | CMARK_DEFAULT_MEM_ALLOCATOR.free(plugin);
28 | }
29 |
30 | cmark_llist *
31 | cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin) {
32 | cmark_llist *res = plugin->syntax_extensions;
33 |
34 | plugin->syntax_extensions = NULL;
35 | return res;
36 | }
37 |
--------------------------------------------------------------------------------
/src/references.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "cmark-gfm.h"
4 | #include "parser.h"
5 | #include "references.h"
6 | #include "inlines.h"
7 | #include "chunk.h"
8 |
9 | static void reference_free(cmark_map *map, cmark_map_entry *_ref) {
10 | cmark_reference *ref = (cmark_reference *)_ref;
11 | cmark_mem *mem = map->mem;
12 | if (ref != NULL) {
13 | mem->free(ref->entry.label);
14 | cmark_chunk_free(mem, &ref->url);
15 | cmark_chunk_free(mem, &ref->title);
16 | cmark_chunk_free(mem, &ref->attributes);
17 | mem->free(ref);
18 | }
19 | }
20 |
21 | void cmark_reference_create(cmark_map *map, cmark_chunk *label,
22 | cmark_chunk *url, cmark_chunk *title) {
23 | cmark_reference *ref;
24 | unsigned char *reflabel = normalize_map_label(map->mem, label);
25 |
26 | /* empty reference name, or composed from only whitespace */
27 | if (reflabel == NULL)
28 | return;
29 |
30 | assert(map->sorted == NULL);
31 |
32 | ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
33 | ref->entry.label = reflabel;
34 | ref->is_attributes_reference = false;
35 | ref->url = cmark_clean_url(map->mem, url);
36 | ref->title = cmark_clean_title(map->mem, title);
37 | ref->attributes = cmark_chunk_literal("");
38 | ref->entry.age = map->size;
39 | ref->entry.next = map->refs;
40 | ref->entry.size = ref->url.len + ref->title.len;
41 |
42 | map->refs = (cmark_map_entry *)ref;
43 | map->size++;
44 | }
45 |
46 | void cmark_reference_create_attributes(cmark_map *map, cmark_chunk *label,
47 | cmark_chunk *attributes) {
48 | cmark_reference *ref;
49 | unsigned char *reflabel = normalize_map_label(map->mem, label);
50 |
51 | /* empty reference name, or composed from only whitespace */
52 | if (reflabel == NULL)
53 | return;
54 |
55 | assert(map->sorted == NULL);
56 |
57 | ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
58 | ref->entry.label = reflabel;
59 | ref->is_attributes_reference = true;
60 | ref->url = cmark_chunk_literal("");
61 | ref->title = cmark_chunk_literal("");
62 | ref->attributes = cmark_clean_attributes(map->mem, attributes);
63 | ref->entry.age = map->size;
64 | ref->entry.next = map->refs;
65 |
66 | map->refs = (cmark_map_entry *)ref;
67 | map->size++;
68 | }
69 |
70 | cmark_map *cmark_reference_map_new(cmark_mem *mem) {
71 | return cmark_map_new(mem, reference_free);
72 | }
73 |
--------------------------------------------------------------------------------
/src/registry.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "cmark-gfm.h"
6 | #include "mutex.h"
7 | #include "syntax_extension.h"
8 | #include "registry.h"
9 | #include "plugin.h"
10 |
11 | extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
12 |
13 | static cmark_llist *syntax_extensions = NULL;
14 |
15 | CMARK_DEFINE_LOCK(extensions);
16 |
17 | void cmark_register_plugin(cmark_plugin_init_func reg_fn) {
18 | cmark_plugin *plugin = cmark_plugin_new();
19 |
20 | if (!reg_fn(plugin)) {
21 | cmark_plugin_free(plugin);
22 | return;
23 | }
24 |
25 | cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin),
26 | *it;
27 |
28 | CMARK_INITIALIZE_AND_LOCK(extensions);
29 |
30 | for (it = syntax_extensions_list; it; it = it->next) {
31 | syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions, it->data);
32 | }
33 |
34 | CMARK_UNLOCK(extensions);
35 |
36 | cmark_llist_free(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions_list);
37 | cmark_plugin_free(plugin);
38 | }
39 |
40 | void cmark_release_plugins(void) {
41 | CMARK_INITIALIZE_AND_LOCK(extensions);
42 |
43 | if (syntax_extensions) {
44 | cmark_llist_free_full(
45 | &CMARK_DEFAULT_MEM_ALLOCATOR,
46 | syntax_extensions,
47 | (cmark_free_func) cmark_syntax_extension_free);
48 | syntax_extensions = NULL;
49 | }
50 |
51 | CMARK_UNLOCK(extensions);
52 | }
53 |
54 | cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem) {
55 | cmark_llist *it;
56 | cmark_llist *res = NULL;
57 |
58 | CMARK_INITIALIZE_AND_LOCK(extensions);
59 |
60 | for (it = syntax_extensions; it; it = it->next) {
61 | res = cmark_llist_append(mem, res, it->data);
62 | }
63 |
64 | CMARK_UNLOCK(extensions);
65 | return res;
66 | }
67 |
68 | cmark_syntax_extension *cmark_find_syntax_extension(const char *name) {
69 | cmark_llist *tmp;
70 | cmark_syntax_extension *res = NULL;
71 |
72 | CMARK_INITIALIZE_AND_LOCK(extensions);
73 |
74 | for (tmp = syntax_extensions; tmp; tmp = tmp->next) {
75 | cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data;
76 | if (!strcmp(ext->name, name)) {
77 | res = ext;
78 | break;
79 | }
80 | }
81 |
82 | CMARK_UNLOCK(extensions);
83 | return res;
84 | }
85 |
--------------------------------------------------------------------------------
/src/syntax_extension.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "cmark-gfm.h"
6 | #include "syntax_extension.h"
7 | #include "buffer.h"
8 |
9 | extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR;
10 |
11 | static cmark_mem *_mem = &CMARK_DEFAULT_MEM_ALLOCATOR;
12 |
13 | void cmark_syntax_extension_free(cmark_mem *mem, cmark_syntax_extension *extension) {
14 | if (extension->free_function && extension->priv) {
15 | extension->free_function(mem, extension->priv);
16 | }
17 |
18 | cmark_llist_free(mem, extension->special_inline_chars);
19 | mem->free(extension->name);
20 | mem->free(extension);
21 | }
22 |
23 | cmark_syntax_extension *cmark_syntax_extension_new(const char *name) {
24 | cmark_syntax_extension *res = (cmark_syntax_extension *) _mem->calloc(1, sizeof(cmark_syntax_extension));
25 | size_t size = strlen(name) + 1;
26 | res->name = (char *) _mem->calloc(size, sizeof(char));
27 | #if defined(_WIN32)
28 | strcpy_s(res->name, size, name);
29 | #else
30 | strcpy(res->name, name);
31 | #endif
32 | return res;
33 | }
34 |
35 | cmark_node_type cmark_syntax_extension_add_node(int is_inline) {
36 | cmark_node_type *ref = !is_inline ? &CMARK_NODE_LAST_BLOCK : &CMARK_NODE_LAST_INLINE;
37 |
38 | if ((*ref & CMARK_NODE_VALUE_MASK) == CMARK_NODE_VALUE_MASK) {
39 | assert(false);
40 | return (cmark_node_type) 0;
41 | }
42 |
43 | return *ref = (cmark_node_type) ((int) *ref + 1);
44 | }
45 |
46 | void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension,
47 | int emphasis) {
48 | extension->emphasis = emphasis == 1;
49 | }
50 |
51 | void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension,
52 | cmark_open_block_func func) {
53 | extension->try_opening_block = func;
54 | }
55 |
56 | void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension,
57 | cmark_match_block_func func) {
58 | extension->last_block_matches = func;
59 | }
60 |
61 | void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension,
62 | cmark_match_inline_func func) {
63 | extension->match_inline = func;
64 | }
65 |
66 | void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension,
67 | cmark_inline_from_delim_func func) {
68 | extension->insert_inline_from_delim = func;
69 | }
70 |
71 | void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension,
72 | cmark_llist *special_chars) {
73 | extension->special_inline_chars = special_chars;
74 | }
75 |
76 | void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension,
77 | cmark_get_type_string_func func) {
78 | extension->get_type_string_func = func;
79 | }
80 |
81 | void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension,
82 | cmark_can_contain_func func) {
83 | extension->can_contain_func = func;
84 | }
85 |
86 | void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension,
87 | cmark_contains_inlines_func func) {
88 | extension->contains_inlines_func = func;
89 | }
90 |
91 | void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension,
92 | cmark_common_render_func func) {
93 | extension->commonmark_render_func = func;
94 | }
95 |
96 | void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension,
97 | cmark_common_render_func func) {
98 | extension->plaintext_render_func = func;
99 | }
100 |
101 | void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension,
102 | cmark_common_render_func func) {
103 | extension->latex_render_func = func;
104 | }
105 |
106 | void cmark_syntax_extension_set_xml_attr_func(cmark_syntax_extension *extension,
107 | cmark_xml_attr_func func) {
108 | extension->xml_attr_func = func;
109 | }
110 |
111 | void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension,
112 | cmark_common_render_func func) {
113 | extension->man_render_func = func;
114 | }
115 |
116 | void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension,
117 | cmark_html_render_func func) {
118 | extension->html_render_func = func;
119 | }
120 |
121 | void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension,
122 | cmark_html_filter_func func) {
123 | extension->html_filter_func = func;
124 | }
125 |
126 | void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension,
127 | cmark_postprocess_func func) {
128 | extension->postprocess_func = func;
129 | }
130 |
131 | void cmark_syntax_extension_set_private(cmark_syntax_extension *extension,
132 | void *priv,
133 | cmark_free_func free_func) {
134 | extension->priv = priv;
135 | extension->free_function = free_func;
136 | }
137 |
138 | void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension) {
139 | return extension->priv;
140 | }
141 |
142 | void cmark_syntax_extension_set_opaque_alloc_func(cmark_syntax_extension *extension,
143 | cmark_opaque_alloc_func func) {
144 | extension->opaque_alloc_func = func;
145 | }
146 |
147 | void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension,
148 | cmark_opaque_free_func func) {
149 | extension->opaque_free_func = func;
150 | }
151 |
152 | void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension,
153 | cmark_commonmark_escape_func func) {
154 | extension->commonmark_escape_func = func;
155 | }
156 |
--------------------------------------------------------------------------------
/suppressions:
--------------------------------------------------------------------------------
1 | {
2 | .
3 | Memcheck:Leak
4 | fun:malloc
5 | fun:__smakebuf
6 | fun:__srefill0
7 | fun:__fread
8 | fun:fread
9 | fun:main
10 | }
11 |
--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # To get verbose output: cmake --build build --target "test" -- ARGS='-V'
2 |
3 | # By default, we run the spec tests only if python3 is available.
4 | # To require the spec tests, compile with -DSPEC_TESTS=1
5 |
6 | if(SPEC_TESTS)
7 | set(PYTHON_REQUIRED REQUIRED)
8 | else()
9 | set(PYTHON_REQUIRED)
10 | endif()
11 |
12 | if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12)
13 | find_package(Python3 ${PYTHON_REQUIRED} COMPONENTS Interpreter)
14 | else()
15 | find_package(PythonInterp 3 ${PYTHON_REQUIRED})
16 | set(Python3_Interpreter_FOUND ${PYTHONINTERP_FOUND})
17 | add_executable(Python3::Interpreter IMPORTED)
18 | set_target_properties(Python3::Interpreter PROPERTIES
19 | IMPORTED_LOCATION ${PYTHON_EXECUTABLE})
20 | endif()
21 |
22 | if(Python3_Interpreter_FOUND)
23 |
24 | add_test(NAME html_normalization
25 | COMMAND "$" "-m" "doctest" "${CMAKE_CURRENT_SOURCE_DIR}/normalize.py")
26 |
27 | if(BUILD_SHARED_LIBS)
28 | add_test(NAME spectest_library
29 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
30 | --no-normalize
31 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
32 | --library-dir "$")
33 |
34 | add_test(NAME pathological_tests_library
35 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py"
36 | --library-dir "$")
37 |
38 | add_test(NAME roundtriptest_library
39 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
40 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
41 | --library-dir "$")
42 |
43 | add_test(NAME entity_library
44 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py"
45 | --library-dir "$")
46 | endif()
47 |
48 | add_test(NAME spectest_executable
49 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
50 | --no-normalize
51 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
52 | --program "$")
53 |
54 | add_test(NAME smartpuncttest_executable
55 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
56 | --no-normalize
57 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt"
58 | --program "$ --smart")
59 |
60 | add_test(NAME extensions_executable
61 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
62 | --no-normalize
63 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt"
64 | --program "$"
65 | --extensions "table strikethrough autolink tagfilter footnotes tasklist")
66 |
67 | add_test(NAME roundtrip_extensions_executable
68 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
69 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/extensions.txt"
70 | --program "$"
71 | --extensions "table strikethrough autolink tagfilter footnotes tasklist")
72 |
73 | add_test(NAME option_table_prefer_style_attributes
74 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
75 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/extensions-table-prefer-style-attributes.txt"
76 | --program "$ --table-prefer-style-attributes"
77 | --extensions "table strikethrough autolink tagfilter footnotes tasklist")
78 |
79 | add_test(NAME option_full_info_string
80 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
81 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/extensions-full-info-string.txt"
82 | --program "$ --full-info-string")
83 |
84 | add_test(NAME regressiontest_executable
85 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
86 | --no-normalize
87 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/regression.txt"
88 | --program "$")
89 |
90 | else(Python3_Interpreter_FOUND)
91 |
92 | message(WARNING "A python 3 interpreter is required to run the spec tests")
93 |
94 | endif(Python3_Interpreter_FOUND)
95 |
96 |
97 | if(CMARK_LIB_FUZZER)
98 | add_executable(cmark-fuzz cmark-fuzz.c)
99 | target_link_libraries(cmark-fuzz PRIVATE
100 | libcmark-gfm_static
101 | "${CMAKE_LIB_FUZZER_PATH}")
102 | # cmark is written in C but the libFuzzer runtime is written in C++ which
103 | # needs to link against the C++ runtime.
104 | set_target_properties(cmark-fuzz PROPERTIES
105 | LINKER_LANGUAGE CXX)
106 | endif()
107 |
--------------------------------------------------------------------------------
/test/afl_test_cases/test.md:
--------------------------------------------------------------------------------
1 | # H1
2 |
3 | H2
4 | --
5 |
6 | t ☺
7 | *b* **em** `c`
8 | ≥\&\
9 | \_e\_
10 |
11 | 4) I1
12 |
13 | 5) I2
14 | > [l](/u "t")
15 | >
16 | > - [f]
17 | > - 
18 | >
19 | >>
20 | >>
21 |
22 | ~~~ l☺
23 | cb
24 | ~~~
25 |
26 | c1
27 | c2
28 |
29 | ***
30 |
31 |
32 | x
33 |
34 |
35 | | a | b |
36 | | --- | --- |
37 | | c | `d|` \| e |
38 |
39 | google ~~yahoo~~
40 |
41 | google.com http://google.com google@google.com
42 |
43 | and but
44 |
45 |
46 | sure
47 |
48 |
49 | [f]: /u "t"
50 |
--------------------------------------------------------------------------------
/test/cmark-fuzz.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "cmark-gfm.h"
5 | #include "cmark-gfm-core-extensions.h"
6 |
7 | const char *extension_names[] = {
8 | "autolink",
9 | "strikethrough",
10 | "table",
11 | "tagfilter",
12 | NULL,
13 | };
14 |
15 | int LLVMFuzzerInitialize(int *argc, char ***argv) {
16 | cmark_gfm_core_extensions_ensure_registered();
17 | return 0;
18 | }
19 |
20 | int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
21 | struct __attribute__((packed)) {
22 | int options;
23 | int width;
24 | } fuzz_config;
25 |
26 | if (size >= sizeof(fuzz_config)) {
27 | /* The beginning of `data` is treated as fuzzer configuration */
28 | memcpy(&fuzz_config, data, sizeof(fuzz_config));
29 |
30 | /* Remainder of input is the markdown */
31 | const char *markdown = (const char *)(data + sizeof(fuzz_config));
32 | const size_t markdown_size = size - sizeof(fuzz_config);
33 | cmark_parser *parser = cmark_parser_new(fuzz_config.options);
34 |
35 | for (const char **it = extension_names; *it; ++it) {
36 | const char *extension_name = *it;
37 | cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
38 | if (!syntax_extension) {
39 | fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
40 | abort();
41 | }
42 | cmark_parser_attach_syntax_extension(parser, syntax_extension);
43 | }
44 |
45 | cmark_parser_feed(parser, markdown, markdown_size);
46 | cmark_node *doc = cmark_parser_finish(parser);
47 |
48 | free(cmark_render_commonmark(doc, fuzz_config.options, fuzz_config.width));
49 | free(cmark_render_html(doc, fuzz_config.options, NULL));
50 | free(cmark_render_latex(doc, fuzz_config.options, fuzz_config.width));
51 | free(cmark_render_man(doc, fuzz_config.options, fuzz_config.width));
52 | free(cmark_render_xml(doc, fuzz_config.options));
53 |
54 | cmark_node_free(doc);
55 | cmark_parser_free(parser);
56 | }
57 | return 0;
58 | }
59 |
--------------------------------------------------------------------------------
/test/cmark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from ctypes import CDLL, c_char_p, c_size_t, c_int, c_void_p
5 | from subprocess import *
6 | import platform
7 | import os
8 |
9 | def pipe_through_prog(prog, text):
10 | p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
11 | [result, err] = p1.communicate(input=text.encode('utf-8'))
12 | return [p1.returncode, result.decode('utf-8'), err]
13 |
14 | def parse(lib, extlib, text, extensions):
15 | cmark_gfm_core_extensions_ensure_registered = extlib.cmark_gfm_core_extensions_ensure_registered
16 |
17 | find_syntax_extension = lib.cmark_find_syntax_extension
18 | find_syntax_extension.restype = c_void_p
19 | find_syntax_extension.argtypes = [c_char_p]
20 |
21 | parser_attach_syntax_extension = lib.cmark_parser_attach_syntax_extension
22 | parser_attach_syntax_extension.argtypes = [c_void_p, c_void_p]
23 |
24 | parser_new = lib.cmark_parser_new
25 | parser_new.restype = c_void_p
26 | parser_new.argtypes = [c_int]
27 |
28 | parser_feed = lib.cmark_parser_feed
29 | parser_feed.argtypes = [c_void_p, c_char_p, c_int]
30 |
31 | parser_finish = lib.cmark_parser_finish
32 | parser_finish.restype = c_void_p
33 | parser_finish.argtypes = [c_void_p]
34 |
35 | cmark_gfm_core_extensions_ensure_registered()
36 |
37 | parser = parser_new(0)
38 | for e in set(extensions):
39 | ext = find_syntax_extension(bytes(e, 'utf-8'))
40 | if not ext:
41 | raise Exception("Extension not found: '{}'".format(e))
42 | parser_attach_syntax_extension(parser, ext)
43 |
44 | textbytes = text.encode('utf-8')
45 | textlen = len(textbytes)
46 | parser_feed(parser, textbytes, textlen)
47 |
48 | return [parser_finish(parser), parser]
49 |
50 | def to_html(lib, extlib, text, extensions):
51 | document, parser = parse(lib, extlib, text, extensions)
52 | parser_get_syntax_extensions = lib.cmark_parser_get_syntax_extensions
53 | parser_get_syntax_extensions.restype = c_void_p
54 | parser_get_syntax_extensions.argtypes = [c_void_p]
55 | syntax_extensions = parser_get_syntax_extensions(parser)
56 |
57 | render_html = lib.cmark_render_html
58 | render_html.restype = c_char_p
59 | render_html.argtypes = [c_void_p, c_int, c_void_p]
60 | # 1 << 17 == CMARK_OPT_UNSAFE
61 | result = render_html(document, 1 << 17, syntax_extensions).decode('utf-8')
62 | return [0, result, '']
63 |
64 | def to_commonmark(lib, extlib, text, extensions):
65 | document, _ = parse(lib, extlib, text, extensions)
66 |
67 | render_commonmark = lib.cmark_render_commonmark
68 | render_commonmark.restype = c_char_p
69 | render_commonmark.argtypes = [c_void_p, c_int, c_int]
70 | result = render_commonmark(document, 0, 0).decode('utf-8')
71 | return [0, result, '']
72 |
73 | class CMark:
74 | def __init__(self, prog=None, library_dir=None, extensions=None):
75 | self.prog = prog
76 | self.extensions = []
77 | if extensions:
78 | self.extensions = extensions.split()
79 |
80 | if prog:
81 | prog += ' --unsafe'
82 | extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)])
83 | self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x)
84 | self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x)
85 | else:
86 | sysname = platform.system()
87 | if sysname == 'Darwin':
88 | libnames = [ ["lib", ".dylib" ] ]
89 | elif sysname == 'Windows':
90 | libnames = [ ["", ".dll"], ["lib", ".dll"] ]
91 | else:
92 | libnames = [ ["lib", ".so"] ]
93 | if not library_dir:
94 | library_dir = os.path.join("..", "build", "src")
95 | for prefix, suffix in libnames:
96 | candidate = os.path.join(library_dir, prefix + "cmark-gfm" + suffix)
97 | if os.path.isfile(candidate):
98 | libpath = candidate
99 | break
100 | cmark = CDLL(libpath)
101 | extlib = CDLL(os.path.join(
102 | library_dir, "..", "extensions", prefix + "cmark-gfm-extensions" + suffix))
103 | self.to_html = lambda x, exts=[]: to_html(cmark, extlib, x, exts + self.extensions)
104 | self.to_commonmark = lambda x, exts=[]: to_commonmark(cmark, extlib, x, exts + self.extensions)
105 |
106 |
--------------------------------------------------------------------------------
/test/entity_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import re
5 | import os
6 | import argparse
7 | import sys
8 | import platform
9 | import html
10 | from cmark import CMark
11 |
12 | def get_entities():
13 | regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}'
14 | with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f:
15 | code = f.read()
16 | entities = []
17 | for entity, utf8 in re.findall(regex, code, re.MULTILINE):
18 | utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8')
19 | entities.append((entity, utf8))
20 | return entities
21 |
22 | if __name__ == "__main__":
23 | parser = argparse.ArgumentParser(description='Run cmark tests.')
24 | parser.add_argument('--program', dest='program', nargs='?', default=None,
25 | help='program to test')
26 | parser.add_argument('--library-dir', dest='library_dir', nargs='?',
27 | default=None, help='directory containing dynamic library')
28 | args = parser.parse_args(sys.argv[1:])
29 |
30 | cmark = CMark(prog=args.program, library_dir=args.library_dir)
31 |
32 | entities = get_entities()
33 |
34 | passed = 0
35 | errored = 0
36 | failed = 0
37 |
38 | exceptions = {
39 | 'quot': '"',
40 | 'QUOT': '"',
41 |
42 | # These are broken, but I'm not too worried about them.
43 | 'nvlt': '<⃒',
44 | 'nvgt': '>⃒',
45 | }
46 |
47 | print("Testing entities:")
48 | for entity, utf8 in entities:
49 | [rc, actual, err] = cmark.to_html("&{};".format(entity))
50 | check = exceptions.get(entity, utf8)
51 |
52 | if rc != 0:
53 | errored += 1
54 | print(entity, '[ERRORED (return code {})]'.format(rc))
55 | print(err)
56 | elif check in actual:
57 | passed += 1
58 | else:
59 | print(entity, '[FAILED]')
60 | print(repr(actual))
61 | failed += 1
62 |
63 | print("{} passed, {} failed, {} errored".format(passed, failed, errored))
64 | if failed == 0 and errored == 0:
65 | exit(0)
66 | else:
67 | exit(1)
68 |
--------------------------------------------------------------------------------
/test/extensions-full-info-string.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: --full-info-string test
3 | author: Ashe Connor
4 | version: 0.1
5 | date: '2018-08-08'
6 | license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
7 | ...
8 |
9 | ## `--full-info-string`
10 |
11 | Without extended info:
12 |
13 | ```````````````````````````````` example
14 | ```ruby
15 | module Foo
16 | ```
17 | .
18 | module Foo
19 |
20 | ````````````````````````````````
21 |
22 | With extended info:
23 |
24 | ```````````````````````````````` example
25 | ```ruby some "data"
26 | module Foo
27 | ```
28 | .
29 | module Foo
30 |
31 | ````````````````````````````````
32 |
33 | With an embedded NUL:
34 |
35 | ```````````````````````````````` example
36 | ```ruby nul nul
37 | module Foo
38 | ```
39 | .
40 | module Foo
41 |
42 | ````````````````````````````````
43 |
44 | With a lot:
45 |
46 | ```````````````````````````````` example
47 | ```ruby xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
48 | module Foo
49 | ```
50 | .
51 | module Foo
52 |
53 | ````````````````````````````````
54 |
--------------------------------------------------------------------------------
/test/extensions-table-prefer-style-attributes.txt:
--------------------------------------------------------------------------------
1 | ---
2 | title: Extensions test with --table-prefer-style-attributes
3 | author: FUJI Goro
4 | version: 0.1
5 | date: '2018-02-20'
6 | license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
7 | ...
8 |
9 | ## Tables
10 |
11 | Table alignment:
12 |
13 | ```````````````````````````````` example
14 | aaa | bbb | ccc | ddd | eee
15 | :-- | --- | :-: | --- | --:
16 | fff | ggg | hhh | iii | jjj
17 | .
18 |
19 |
20 |
21 | aaa
22 | bbb
23 | ccc
24 | ddd
25 | eee
26 |
27 |
28 |
29 |
30 | fff
31 | ggg
32 | hhh
33 | iii
34 | jjj
35 |
36 |
37 |
38 | ````````````````````````````````
39 |
--------------------------------------------------------------------------------
/test/fuzzing_dictionary:
--------------------------------------------------------------------------------
1 | asterisk="*"
2 | attr_generic=" a=\"1\""
3 | attr_href=" href=\"1\""
4 | attr_xml_lang=" xml:lang=\"1\""
5 | attr_xmlns=" xmlns=\"1\""
6 | backslash="\\"
7 | backtick="`"
8 | colon=":"
9 | dashes="---"
10 | double_quote="\""
11 | entity_builtin="<"
12 | entity_decimal=""
13 | entity_external="&a;"
14 | entity_hex=""
15 | equals="==="
16 | exclamation="!"
17 | greater_than=">"
18 | hash="#"
19 | hyphen="-"
20 | indent=" "
21 | left_bracket="["
22 | left_paren="("
23 | less_than="<"
24 | plus="+"
25 | right_bracket="]"
26 | right_paren=")"
27 | single_quote="'"
28 | string_any="ANY"
29 | string_brackets="[]"
30 | string_cdata="CDATA"
31 | string_dashes="--"
32 | string_empty_dblquotes="\"\""
33 | string_empty_quotes="''"
34 | string_idrefs="IDREFS"
35 | string_parentheses="()"
36 | string_pcdata="#PCDATA"
37 | tag_cdata=""
39 | tag_doctype=""
44 | tag_open_close=" "
45 | tag_open_exclamation=""
48 | tag_xml_q=""
49 | underscore="_"
50 |
51 | # GFM specific
52 |
53 | strikethrough="~~~strike~~~"
54 | user_mention="@octocat"
55 | email_mention="octocat@github.com"
56 | http="http://"
57 | https="https://"
58 | ftp="ftp://"
59 | title_tag="title"
60 | textarea_tag="textarea"
61 | style_tag="style"
62 | xmp_tag="xmp"
63 | iframe_tag="iframe"
64 | noembed_tag="noembed"
65 | noframes_tag="noframes"
66 | script_tag="script"
67 | plaintext_tag="plaintext"
68 |
--------------------------------------------------------------------------------
/test/pathological_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import re
5 | import argparse
6 | import sys
7 | import platform
8 | import itertools
9 | import multiprocessing
10 | from cmark import CMark
11 |
12 | def hash_collisions():
13 | REFMAP_SIZE = 16
14 | COUNT = 50000
15 |
16 | def badhash(ref):
17 | h = 0
18 | for c in ref:
19 | a = (h << 6) & 0xFFFFFFFF
20 | b = (h << 16) & 0xFFFFFFFF
21 | h = ord(c) + a + b - h
22 | h = h & 0xFFFFFFFF
23 |
24 | return (h % REFMAP_SIZE) == 0
25 |
26 | keys = ("x%d" % i for i in itertools.count())
27 | collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
28 | bad_key = next(collisions)
29 |
30 | document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
31 |
32 | return document, re.compile("(\[%s\]
\n){%d}" % (bad_key, COUNT-1))
33 |
34 | allowed_failures = {"many references": True}
35 |
36 | # list of pairs consisting of input and a regex that must match the output.
37 | pathological = {
38 | # note - some pythons have limit of 65535 for {num-matches} in re.
39 | "nested strong emph":
40 | (("*a **a " * 65000) + "b" + (" a** a*" * 65000),
41 | re.compile("(a a ){65000}b( a a ){65000}")),
42 | "many emph closers with no openers":
43 | (("a_ " * 65000),
44 | re.compile("(a[_] ){64999}a_")),
45 | "many emph openers with no closers":
46 | (("_a " * 65000),
47 | re.compile("(_a ){64999}_a")),
48 | "many link closers with no openers":
49 | (("a]" * 65000),
50 | re.compile("(a\]){65000}")),
51 | "many link openers with no closers":
52 | (("[a" * 65000),
53 | re.compile("(\[a){65000}")),
54 | "mismatched openers and closers":
55 | (("*a_ " * 50000),
56 | re.compile("([*]a[_] ){49999}[*]a_")),
57 | "openers and closers multiple of 3":
58 | (("a**b" + ("c* " * 50000)),
59 | re.compile("a[*][*]b(c[*] ){49999}c[*]")),
60 | "link openers and emph closers":
61 | (("[ a_" * 50000),
62 | re.compile("(\[ a_){50000}")),
63 | "pattern [ (]( repeated":
64 | (("[ (](" * 80000),
65 | re.compile("(\[ \(\]\(){80000}")),
66 | "pattern ![[]() repeated":
67 | ("![[]()" * 160000,
68 | re.compile("(!\[ ){160000}")),
69 | "hard link/emph case":
70 | ("**x [a*b**c*](d)",
71 | re.compile("\\*\\*x ab\\*\\*c ")),
72 | "nested brackets":
73 | (("[" * 50000) + "a" + ("]" * 50000),
74 | re.compile("\[{50000}a\]{50000}")),
75 | "nested block quotes":
76 | ((("> " * 50000) + "a"),
77 | re.compile("(\n){50000}")),
78 | "deeply nested lists":
79 | ("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))),
80 | re.compile("\n){999}")),
81 | "U+0000 in input":
82 | ("abc\u0000de\u0000",
83 | re.compile("abc\ufffd?de\ufffd?")),
84 | "backticks":
85 | ("".join(map(lambda x: ("e" + "`" * x), range(1,5000))),
86 | re.compile("^[e`]*
\n$")),
87 | "unclosed links A":
88 | ("[a](aaa