├── .editorconfig
├── .gitattributes
├── .github
├── FUNDING.yml
├── dependabot.yml
└── workflows
│ ├── ci.yml
│ └── fuzz.yml
├── .gitignore
├── CMakeLists.txt
├── COPYING
├── Makefile
├── Makefile.nmake
├── README.md
├── api_test
├── CMakeLists.txt
├── cplusplus.cpp
├── cplusplus.h
├── harness.c
├── harness.h
└── main.c
├── bench
├── samples
│ ├── block-bq-flat.md
│ ├── block-bq-nested.md
│ ├── block-code.md
│ ├── block-fences.md
│ ├── block-heading.md
│ ├── block-hr.md
│ ├── block-html.md
│ ├── block-lheading.md
│ ├── block-list-flat.md
│ ├── block-list-nested.md
│ ├── block-ref-flat.md
│ ├── block-ref-nested.md
│ ├── inline-autolink.md
│ ├── inline-backticks.md
│ ├── inline-em-flat.md
│ ├── inline-em-nested.md
│ ├── inline-em-worst.md
│ ├── inline-entity.md
│ ├── inline-escape.md
│ ├── inline-html.md
│ ├── inline-links-flat.md
│ ├── inline-links-nested.md
│ ├── inline-newlines.md
│ ├── lorem1.md
│ └── rawtabs.md
├── statistics.py
└── stats.py
├── benchmarks.md
├── changelog.txt
├── cmake
└── modules
│ └── FindAsan.cmake
├── data
└── CaseFolding.txt
├── fuzz
├── CMakeLists.txt
├── afl_test_cases
│ └── test.md
├── cmark-fuzz.c
└── dictionary
├── man
├── CMakeLists.txt
├── make_man_page.py
├── man1
│ └── cmark.1
└── man3
│ └── cmark.3
├── nmake.bat
├── shell.nix
├── src
├── CMakeLists.txt
├── blocks.c
├── buffer.c
├── buffer.h
├── case_fold.inc
├── chunk.h
├── cmark.c
├── cmark.h
├── cmarkConfig.cmake.in
├── cmark_ctype.c
├── cmark_ctype.h
├── cmark_version.h.in
├── commonmark.c
├── entities.inc
├── houdini.h
├── houdini_href_e.c
├── houdini_html_e.c
├── houdini_html_u.c
├── html.c
├── inlines.c
├── inlines.h
├── iterator.c
├── iterator.h
├── latex.c
├── libcmark.pc.in
├── main.c
├── man.c
├── node.c
├── node.h
├── parser.h
├── references.c
├── references.h
├── render.c
├── render.h
├── scanners.c
├── scanners.h
├── scanners.re
├── utf8.c
├── utf8.h
└── xml.c
├── test
├── CMakeLists.txt
├── cmark.py
├── entity_tests.py
├── normalize.py
├── pathological_tests.py
├── regression.txt
├── roundtrip_tests.py
├── smart_punct.txt
├── spec.txt
└── spec_tests.py
├── toolchain-mingw32.cmake
├── tools
├── appveyor-build.bat
├── make_case_fold_inc.py
├── make_entities_inc.py
└── xml2md.xsl
├── why-cmark-and-not-x.md
└── wrappers
├── wrapper.php
├── wrapper.py
├── wrapper.rb
└── wrapper.rkt
/.editorconfig:
--------------------------------------------------------------------------------
1 | # editorconfig.org
2 |
3 | root = true
4 |
5 | [*]
6 | end_of_line = lf
7 | charset = utf-8
8 | insert_final_newline = true
9 |
10 | [*.{c,h}]
11 | trim_trailing_whitespace = true
12 | indent_style = space
13 | indent_size = 2
14 |
15 | [Makefile]
16 | trim_trailing_whitespace = true
17 | indent_style = tab
18 | indent_size = 8
19 |
20 | [Makefile.nmake]
21 | end_of_line = crlf
22 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 | Makefile.nmake eol=crlf
3 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [jgm]
2 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | updates:
4 | - package-ecosystem: "github-actions"
5 | directory: "/"
6 | schedule:
7 | interval: "daily"
8 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI tests
2 |
3 | on:
4 | push:
5 | branches-ignore:
6 | - 'dependabot/**'
7 | pull_request:
8 | workflow_dispatch:
9 |
10 | jobs:
11 |
12 | linter:
13 |
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 |
18 | - uses: actions/checkout@v4
19 | - name: Install clang-tidy
20 | run: |
21 | sudo apt-get install -y clang-tidy
22 | - name: lint with clang-tidy
23 | run: |
24 | make lint
25 | env:
26 | CC: clang
27 | CXX: clang++
28 |
29 | posix:
30 |
31 | strategy:
32 | fail-fast: false
33 | matrix:
34 | os: [linux, macos]
35 | cc: [clang, gcc]
36 | build_type: [shared, static]
37 | sanitizers: ['', ASan]
38 |
39 | include:
40 | # Translate human readable labels
41 | - os: 'linux'
42 | image: 'ubuntu-latest'
43 | - os: 'macos'
44 | image: 'macos-latest'
45 | - cc: 'clang'
46 | cxx: 'clang++'
47 | - cc: 'gcc'
48 | cxx: 'g++'
49 | - build_type: 'shared'
50 | cmake_shared: 'YES'
51 | - build_type: 'static'
52 | cmake_shared: 'NO'
53 | - sanitizers: 'ASan'
54 | san_cflags: '-fsanitize=address,undefined -fno-sanitize-recover=all'
55 |
56 | # When building shared libraries, they will be loaded
57 | # dynamically from Python when testing. This means that
58 | # we have to use a preloaded shared libasan.
59 | - sanitizers: 'ASan'
60 | os: 'linux'
61 | cc: 'gcc'
62 | build_type: 'shared'
63 | test_env: 'LD_PRELOAD=$(gcc -print-file-name=libasan.so)'
64 | - sanitizers: 'ASan'
65 | os: 'linux'
66 | cc: 'clang'
67 | build_type: 'shared'
68 | # clang defaults to -static-libsasn
69 | asan_cflags: '-shared-libasan'
70 | test_env: 'LD_PRELOAD=$(clang -print-file-name=libclang_rt.asan-x86_64.so)'
71 |
72 | # We have to disable LeakSanitizer in shared library builds
73 | # because we get false positives from Python.
74 | - sanitizers: 'ASan'
75 | build_type: 'shared'
76 | asan_opts: 'detect_leaks=0'
77 |
78 | # The static build can run with LeakSanitizer.
79 | # gcc defaults to -shared-libasan and needs -static-libasan
80 | - sanitizers: 'ASan'
81 | cc: 'gcc'
82 | build_type: 'static'
83 | asan_cflags: '-static-libasan'
84 |
85 | exclude:
86 | # gcc is just an alias for clang on macOS
87 | - os: 'macos'
88 | cc: 'gcc'
89 | # Shared libasan doesn't work with macOS system Python.
90 | - os: 'macos'
91 | sanitizers: 'ASan'
92 | build_type: 'shared'
93 |
94 | runs-on: ${{ matrix.image }}
95 |
96 | env:
97 | ASAN_OPTIONS: ${{ matrix.asan_opts }}
98 | CC: ${{ matrix.cc }}
99 | CXX: ${{ matrix.cxx }}
100 | CFLAGS: '${{ matrix.san_cflags }} ${{ matrix.asan_cflags }}'
101 | CXXFLAGS: '${{ matrix.san_cflags }} ${{ matrix.asan_cflags }}'
102 |
103 | steps:
104 | - uses: actions/checkout@v4
105 | - name: Build and test
106 | run: |
107 | cmake \
108 | -DBUILD_SHARED_LIBS=${{ matrix.cmake_shared }} \
109 | -DCMAKE_BUILD_TYPE=Debug \
110 | -S . -B build
111 | cmake --build build
112 | ${{ matrix.test_env }} ctest --test-dir build --output-on-failure
113 |
114 | windows:
115 |
116 | runs-on: windows-latest
117 | strategy:
118 | fail-fast: false
119 | matrix:
120 | build_type: [shared, static]
121 | include:
122 | - build_type: 'shared'
123 | cmake_shared: 'YES'
124 | - build_type: 'static'
125 | cmake_shared: 'NO'
126 |
127 | steps:
128 | - uses: actions/checkout@v4
129 | - uses: ilammy/msvc-dev-cmd@v1
130 | - name: Build and test
131 | run: |
132 | cmake ^
133 | -DBUILD_SHARED_LIBS=${{ matrix.cmake_shared }} ^
134 | -DCMAKE_BUILD_TYPE=Debug ^
135 | -S . -B build
136 | cmake --build build
137 | ctest --test-dir build -C Debug --output-on-failure
138 | shell: cmd
139 | - name: Upload artifact
140 | if: ${{ matrix.build_type == 'static' }}
141 | uses: actions/upload-artifact@v4
142 | with:
143 | name: cmark windows ${{ matrix.build_type }}
144 | path: build/src/Debug/cmark.exe
145 | if-no-files-found: error
146 |
--------------------------------------------------------------------------------
/.github/workflows/fuzz.yml:
--------------------------------------------------------------------------------
1 | name: CIFuzz
2 | on: [pull_request]
3 | jobs:
4 | Fuzzing:
5 | runs-on: ubuntu-latest
6 | steps:
7 | - name: Build Fuzzers
8 | uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
9 | with:
10 | oss-fuzz-project-name: 'cmark'
11 | dry-run: false
12 | - name: Run Fuzzers
13 | uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
14 | with:
15 | oss-fuzz-project-name: 'cmark'
16 | fuzz-seconds: 600
17 | dry-run: false
18 | - name: Upload Crash
19 | uses: actions/upload-artifact@v4
20 | if: failure()
21 | with:
22 | name: artifacts
23 | path: ./out/artifacts
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Object files
2 | *.o
3 | *.ko
4 | *.obj
5 | *.elf
6 |
7 | # Libraries
8 | *.lib
9 | *.a
10 |
11 | # Shared objects (inc. Windows DLLs)
12 | *.dll
13 | *.so
14 | *.so.*
15 | *.dylib
16 |
17 | # Executables
18 | *.exe
19 | *.out
20 | *.app
21 | *.i*86
22 | *.x86_64
23 | *.hex
24 | *.pyc
25 |
26 | *~
27 | *.bak
28 | *.sw?
29 | *.diff
30 | *#
31 | *.zip
32 | bstrlib.txt
33 | build
34 | cmark.dSYM/*
35 | cmark
36 |
37 | # Visual Studio CMake support
38 | /.vs/
39 | /out/
40 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.7)
2 |
3 | if(POLICY CMP0063)
4 | cmake_policy(SET CMP0063 NEW)
5 | endif()
6 | if(POLICY CMP0092)
7 | cmake_policy(SET CMP0092 NEW)
8 | endif()
9 |
10 | project(cmark
11 | LANGUAGES C CXX
12 | VERSION 0.31.1)
13 |
14 | if(CMAKE_BUILD_TYPE STREQUAL Asan)
15 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)
16 | include(FindAsan)
17 | endif()
18 |
19 | # Include module for functions
20 | # - 'write_basic_package_version_file'
21 | # - 'configure_package_config_file'
22 | include(CMakePackageConfigHelpers)
23 | include(CTest)
24 | include(GenerateExportHeader)
25 | include(GNUInstallDirs)
26 |
27 | if(NOT MSVC OR CMAKE_HOST_SYSTEM_NAME STREQUAL Windows)
28 | set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
29 | include(InstallRequiredSystemLibraries)
30 | endif()
31 |
32 | if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
33 | message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make")
34 | endif()
35 |
36 | # Backwards Compatibility
37 | # TODO: remove this once there has been a period to enable people to migrate
38 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT NO)
39 | if(DEFINED CMARK_SHARED)
40 | message(AUTHOR_WARNING [=[
41 | 'CMARK_SHARED' has been replaced with the standard 'BUILD_SHARED_LIBS' to control the library type.
42 | ]=])
43 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT ${CMARK_SHARED})
44 | endif()
45 | if(DEFINED CMARK_STATIC)
46 | message(AUTHOR_WARNING [=[
47 | 'CMARK_STATIC' has been replaced with the standard 'BUILD_SHARED_LIBS' to control the library type.
48 | ]=])
49 | if(NOT CMARK_STATIC)
50 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT YES)
51 | else()
52 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT NO)
53 | endif()
54 | endif()
55 |
56 | option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF)
57 | option(BUILD_SHARED_LIBS "Build the CMark library as shared"
58 | ${_CMARK_BUILD_SHARED_LIBS_DEFAULT})
59 |
60 | if(NOT MSVC)
61 | set(CMAKE_C_STANDARD 99)
62 | set(CMAKE_C_STANDARD_REQUIRED YES)
63 | set(CMAKE_C_EXTENSIONS NO)
64 | endif()
65 |
66 | # -fvisibility=hidden
67 | set(CMAKE_C_VISIBILITY_PRESET hidden)
68 | set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
69 |
70 | set(CMAKE_INCLUDE_CURRENT_DIR ON)
71 |
72 | if (DEFINED CMAKE_INSTALL_RPATH)
73 | set(Base_rpath "${CMAKE_INSTALL_RPATH}")
74 | else()
75 | if(BUILD_SHARED_LIBS)
76 | set(p "${CMAKE_INSTALL_FULL_LIBDIR}")
77 | list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${p}" i)
78 | if("${i}" STREQUAL "-1")
79 | set(Base_rpath "${p}")
80 | endif()
81 | endif()
82 | endif()
83 |
84 | # Append non-standard external dependency directories, if any.
85 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
86 |
87 | # Check integrity of node structure when compiled as debug
88 | add_compile_options($<$ paragraph a \t b a b a \t\nb a b a b a b a b a b a b a b \n\t\ta b\t\t a b
8 |
14 |
15 |
9 |
13 |
10 | **test**
11 |
12 |
16 |
17 |
28 |
29 |
32 |
33 |
--------------------------------------------------------------------------------
/bench/samples/block-lheading.md:
--------------------------------------------------------------------------------
1 | heading
2 | ---
3 |
4 | heading
5 | ===================================
6 |
7 | not a heading
8 | ----------------------------------- text
9 |
--------------------------------------------------------------------------------
/bench/samples/block-list-flat.md:
--------------------------------------------------------------------------------
1 | - tidy
2 | - bullet
3 | - list
4 |
5 |
6 | - loose
7 |
8 | - bullet
9 |
10 | - list
11 |
12 |
13 | 0. ordered
14 | 1. list
15 | 2. example
16 |
17 |
18 | -
19 | -
20 | -
21 | -
22 |
23 |
24 | 1.
25 | 2.
26 | 3.
27 |
28 |
29 | - an example
30 | of a list item
31 | with a continuation
32 |
33 | this part is inside the list
34 |
35 | this part is just a paragraph
36 |
37 |
38 | 1. test
39 | - test
40 | 1. test
41 | - test
42 |
43 |
44 | 111111111111111111111111111111111111111111. is this a valid bullet?
45 |
46 | - _________________________
47 |
48 | - this
49 | - is
50 |
51 | a
52 |
53 | long
54 | - loose
55 | - list
56 |
57 | - with
58 | - some
59 |
60 | tidy
61 |
62 | - list
63 | - items
64 | - in
65 |
66 | - between
67 | - _________________________
68 |
--------------------------------------------------------------------------------
/bench/samples/block-list-nested.md:
--------------------------------------------------------------------------------
1 |
2 | - this
3 | - is
4 | - a
5 | - deeply
6 | - nested
7 | - bullet
8 | - list
9 |
10 |
11 | 1. this
12 | 2. is
13 | 3. a
14 | 4. deeply
15 | 5. nested
16 | 6. unordered
17 | 7. list
18 |
19 |
20 | - 1
21 | - 2
22 | - 3
23 | - 4
24 | - 5
25 | - 6
26 | - 7
27 | - 6
28 | - 5
29 | - 4
30 | - 3
31 | - 2
32 | - 1
33 |
34 |
35 | - - - - - - - - - deeply-nested one-element item
36 |
37 |
--------------------------------------------------------------------------------
/bench/samples/block-ref-flat.md:
--------------------------------------------------------------------------------
1 | [1] [2] [3] [1] [2] [3]
2 |
3 | [looooooooooooooooooooooooooooooooooooooooooooooooooong label]
4 |
5 | [1]:
18 |
19 |
26 |
27 |
20 |
21 | test
22 |
23 |
24 |
25 |
")
165 | '
'
166 |
167 | * Attributes are sorted and lowercased.
168 |
169 | >>> normalize_html('x')
170 | 'x'
171 |
172 | * References are converted to unicode, except that '<', '>', '&', and
173 | '"' are rendered using entities.
174 |
175 | >>> normalize_html("∀&><"")
176 | '\u2200&><"'
177 |
178 | """
179 | html_chunk_re = re.compile("(\|\<[^>]*\>|[^<]+)")
180 | try:
181 | parser = MyHTMLParser()
182 | # We work around HTMLParser's limitations parsing CDATA
183 | # by breaking the input into chunks and passing CDATA chunks
184 | # through verbatim.
185 | for chunk in re.finditer(html_chunk_re, html):
186 | if chunk.group(0)[:8] == "\[%s]
\n){25000}")), 95 | "deeply nested lists": 96 | ("".join(map(lambda x: (" " * x + "* a\n"), range(0,500))), 97 | re.compile("\n(
\n){499}")), 98 | "U+0000 in input": 99 | ("abc\u0000de\u0000", 100 | re.compile("abc\ufffd?de\ufffd?")), 101 | "backticks": 102 | ("".join(map(lambda x: ("e" + "`" * x), range(1,2500))), 103 | re.compile("^- a\n
\n\n){499}
\n(- a
\n[e`]*
\n$")), 104 | "unclosed links A": 105 | ("[a](\n?)+x(\w+>\n)+$")), 116 | "empty lines in deeply nested lists in blockquote": 117 | ("> " + "- " * 30000 + "x\n" + ">\n" * 30000, 118 | re.compile(r"^(<\w+>\n?)+x(\w+>\n)+$")), 119 | "emph in deep blockquote": 120 | (">" * 100000 + "a*" * 100000, 121 | re.compile(r"^(<\w+>\n)+.*
\n(\w+>\n)+$")), 122 | "reference collisions": hash_collisions() 123 | # "many references": 124 | # ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000, 125 | # re.compile("(\\[0\\] ){4999}")) 126 | } 127 | 128 | pathological_cmark = { 129 | "nested inlines": 130 | ("*" * 20000 + "a" + "*" * 20000, 131 | re.compile("^\\*+a\\*+$")), 132 | } 133 | 134 | whitespace_re = re.compile('/s+/') 135 | 136 | def run_pathological(q, inp): 137 | q.put(cmark.to_html(inp)) 138 | 139 | def run_pathological_cmark(q, inp): 140 | q.put(cmark.to_commonmark(inp)) 141 | 142 | def run_tests(): 143 | q = multiprocessing.Queue() 144 | passed = [] 145 | errored = [] 146 | failed = [] 147 | ignored = [] 148 | 149 | print("Testing pathological cases:") 150 | for description in (*pathological, *pathological_cmark): 151 | if description in pathological: 152 | (inp, regex) = pathological[description] 153 | p = multiprocessing.Process(target=run_pathological, 154 | args=(q, inp)) 155 | else: 156 | (inp, regex) = pathological_cmark[description] 157 | p = multiprocessing.Process(target=run_pathological_cmark, 158 | args=(q, inp)) 159 | p.start() 160 | try: 161 | # wait TIMEOUT seconds or until it finishes 162 | rc, actual, err = q.get(True, TIMEOUT) 163 | p.join() 164 | if rc != 0: 165 | print(description, '[ERRORED (return code %d)]' %rc) 166 | print(err) 167 | if description in allowed_failures: 168 | ignored.append(description) 169 | else: 170 | errored.append(description) 171 | elif regex.search(actual): 172 | print(description, '[PASSED]') 173 | passed.append(description) 174 | else: 175 | print(description, '[FAILED]') 176 | print(repr(actual[:60])) 177 | if description in allowed_failures: 178 | ignored.append(description) 179 | else: 180 | failed.append(description) 181 | except queue.Empty: 182 | p.terminate() 183 | p.join() 184 | print(description, '[TIMEOUT]') 185 | if description in allowed_failures: 186 | ignored.append(description) 187 | else: 188 | errored.append(description) 189 | 190 | print("%d passed, %d failed, %d errored" % 191 | (len(passed), len(failed), len(errored))) 192 | if ignored: 193 | print("Ignoring these allowed failures:") 194 | for x in ignored: 195 | print(x) 196 | if failed or errored: 197 | exit(1) 198 | else: 199 | exit(0) 200 | 201 | if __name__ == "__main__": 202 | run_tests() 203 | -------------------------------------------------------------------------------- /test/regression.txt: -------------------------------------------------------------------------------- 1 | ### Regression tests 2 | 3 | Issue #113: EOL character weirdness on Windows 4 | (Important: first line ends with CR + CR + LF) 5 | 6 | ```````````````````````````````` example 7 | line1 8 | line2 9 | . 10 |line1
11 |line2
12 | ```````````````````````````````` 13 | 14 | Issue #114: cmark skipping first character in line 15 | (Important: the blank lines around "Repeatedly" contain a tab.) 16 | 17 | ```````````````````````````````` example 18 | By taking it apart 19 | 20 | - alternative solutions 21 | → 22 | Repeatedly solving 23 | → 24 | - how techniques 25 | . 26 |By taking it apart
27 |28 |
30 |- alternative solutions
29 |Repeatedly solving
31 |32 |
34 | ```````````````````````````````` 35 | 36 | Issue jgm/CommonMark#430: h2..h6 not recognized as block tags. 37 | 38 | ```````````````````````````````` example 39 |- how techniques
33 |lorem
40 | 41 |lorem
42 | 43 |lorem
44 | 45 |lorem
46 | 47 |lorem
48 | 49 |lorem
50 | . 51 |lorem
52 |lorem
53 |lorem
54 |lorem
55 |lorem
56 |lorem
57 | ```````````````````````````````` 58 | 59 | Issue jgm/commonmark.js#109 - tabs after setext header line 60 | 61 | 62 | ```````````````````````````````` example 63 | hi 64 | --→ 65 | . 66 |hi
67 | ```````````````````````````````` 68 | 69 | Issue #177 - incorrect emphasis parsing 70 | 71 | ```````````````````````````````` example 72 | a***b* c* 73 | . 74 |a*b c
75 | ```````````````````````````````` 76 | 77 | Issue #193 - unescaped left angle brackets in link destination 78 | 79 | ```````````````````````````````` example 80 | [a] 81 | 82 | [a]:83 | . 84 | [a]
85 |[a]: <te
86 | ```````````````````````````````` 87 | 88 | Issue #192 - escaped spaces in link destination 89 | 90 | 91 | ```````````````````````````````` example 92 | [a](te\ st) 93 | . 94 |[a](te\ st)
95 | ```````````````````````````````` 96 | 97 | Issue #527 - meta tags in inline contexts 98 | 99 | ```````````````````````````````` example 100 | City: 101 | 102 | 103 | 104 | . 105 |City: 106 | 107 | 108 |
109 | ```````````````````````````````` 110 | 111 | Issue #530 - link parsing corner cases 112 | 113 | ```````````````````````````````` example 114 | [a](\ b) 115 | 116 | [a](<[a](\ b) 122 |[a](<<b)
123 |[a](<b 124 | )
125 | ```````````````````````````````` 126 | 127 | Issue commonmark#526 - unescaped ( in link title 128 | 129 | ```````````````````````````````` example 130 | [link](url ((title)) 131 | . 132 |[link](url ((title))
133 | ```````````````````````````````` 134 | 135 | Issue commonamrk#517 - script, pre, style close tag without 136 | opener. 137 | 138 | ```````````````````````````````` example 139 | 140 | 141 | 142 | 143 | 144 | . 145 | 146 | 147 | 148 | ```````````````````````````````` 149 | 150 | Issue #289. 151 | 152 | ```````````````````````````````` example 153 | [a]( 154 | . 155 |[a](<b) c>
156 | ```````````````````````````````` 157 | 158 | Issue #334 - UTF-8 BOM 159 | 160 | ```````````````````````````````` example 161 | # Hi 162 | . 163 |Hi
164 | ```````````````````````````````` 165 | 166 | Issue commonmark.js#213 - type 7 blocks can't interrupt 167 | paragraph 168 | 169 | ```````````````````````````````` example 170 | - 174 | . 175 |176 |
183 | ```````````````````````````````` 184 | 185 | Issue #383 - emphasis parsing. 186 | 187 | ```````````````````````````````` example 188 | *****Hello*world**** 189 | . 190 |- 177 |
182 |**Helloworld
191 | ```````````````````````````````` 192 | 193 | Issue #424 - emphasis before links 194 | 195 | ```````````````````````````````` example 196 | *text* [link](#section) 197 | . 198 |text link
199 | ```````````````````````````````` 200 | 201 | ` 204 | . 205 | 206 | ```````````````````````````````` 207 | 208 | Declarations don't need spaces, according to the spec 209 | ```````````````````````````````` example 210 | x 211 | . 212 |x
213 | ```````````````````````````````` 214 | 215 | An underscore that is not part of a delimiter should not prevent another 216 | pair of underscores from forming part of their own. 217 | ```````````````````````````````` example 218 | __!_!__ 219 | 220 | __!x!__ 221 | 222 | **!*!** 223 | 224 | --- 225 | 226 | _*__*_* 227 | 228 | _*xx*_* 229 | 230 | _*__-_- 231 | 232 | _*xx-_- 233 | . 234 |!_!
235 |!x!
236 |!*!
237 |
238 |__*
239 |xx*
240 |*__--
241 |*xx--
242 | ```````````````````````````````` 243 | 244 | commonmark.js #277: 245 | ```````````````````````````````` example 246 | ```language-r 247 | x <- 1 248 | ``` 249 | 250 | ```r 251 | x <- 1 252 | ``` 253 | . 254 |256 |x <- 1 255 |
258 | ```````````````````````````````` 259 | 260 | https://github.com/commonmark/commonmark.js/issues/283 261 | ```````````````````````````````` example 262 | x 263 | 264 | x 265 | . 266 |x <- 1 257 |
x
267 |x<!>
268 | ```````````````````````````````` 269 | 270 | Case fold test 271 | ```````````````````````````````` example 272 | [link][µÓĐĹŠƆƦǏǶȜɆΓΨϪЇЛЯҎҶӞԆԮՄႠႴᏸᲕᲩᲿḦṎṶẚỂỪἙἿὭᾑᾥᾼῢΩↃⓉⰍⰡⱩⲔⲼⳫꙢꜢꝌꝽꞪꟇꭾꮒꮦꮺCW𐐐𐐤𐓀𐕰𐖅𐲅𐲙𐲭𑢮𖹂𖹖𞤊𞤞] 273 | 274 | [μóđĺšɔʀǐƕȝɇγψϫїляҏҷӟԇԯմⴀⴔᏰვჩჿḧṏṷaʾểừἑἷὥἡιὥιαιῢωↄⓣⰽⱑⱪⲕⲽⳬꙣꜣꝍᵹɦꟈᎮᏂᏖᏪcw𐐸𐑌𐓨𐖗𐖬𐳅𐳙𐳭𑣎𖹢𖹶𞤬𞥀]: /url 275 | . 276 | 277 | ```````````````````````````````` 278 | 279 | https://github.com/commonmark/cmark/issues/548 280 | ```````````````````````````````` example 281 | () 282 | . 283 |()
284 | ```````````````````````````````` 285 | -------------------------------------------------------------------------------- /test/roundtrip_tests.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | from spec_tests import get_tests, do_test 4 | from cmark import CMark 5 | import argparse 6 | 7 | parser = argparse.ArgumentParser(description='Run cmark roundtrip tests.') 8 | parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, 9 | help='program to test') 10 | parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', 11 | help='path to spec') 12 | parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', 13 | default=None, help='limit to sections matching regex pattern') 14 | parser.add_argument('--library-dir', dest='library_dir', nargs='?', 15 | default=None, help='directory containing dynamic library') 16 | parser.add_argument('--no-normalize', dest='normalize', 17 | action='store_const', const=False, default=True, 18 | help='do not normalize HTML') 19 | parser.add_argument('-n', '--number', type=int, default=None, 20 | help='only consider the test with the given number') 21 | args = parser.parse_args(sys.argv[1:]) 22 | 23 | spec = sys.argv[1] 24 | 25 | def converter(md): 26 | cmark = CMark(prog=args.program, library_dir=args.library_dir) 27 | [ec, result, err] = cmark.to_commonmark(md) 28 | if ec == 0: 29 | [ec, html, err] = cmark.to_html(result) 30 | if ec == 0: 31 | # In the commonmark writer we insert dummy HTML 32 | # comments between lists, and between lists and code 33 | # blocks. Strip these out, since the spec uses 34 | # two blank lines instead: 35 | return [ec, re.sub('\n', '', html), ''] 36 | else: 37 | return [ec, html, err] 38 | else: 39 | return [ec, result, err] 40 | 41 | tests = get_tests(args.spec) 42 | result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': 0} 43 | for test in tests: 44 | do_test(converter, test, args.normalize, result_counts) 45 | 46 | exit(result_counts['fail'] + result_counts['error']) 47 | -------------------------------------------------------------------------------- /test/smart_punct.txt: -------------------------------------------------------------------------------- 1 | ## Smart punctuation 2 | 3 | Open quotes are matched with closed quotes. 4 | The same method is used for matching openers and closers 5 | as is used in emphasis parsing: 6 | 7 | ```````````````````````````````` example 8 | "Hello," said the spider. 9 | "'Shelob' is my name." 10 | . 11 |“Hello,” said the spider. 12 | “‘Shelob’ is my name.”
13 | ```````````````````````````````` 14 | 15 | ```````````````````````````````` example 16 | 'A', 'B', and 'C' are letters. 17 | . 18 |‘A’, ‘B’, and ‘C’ are letters.
19 | ```````````````````````````````` 20 | 21 | ```````````````````````````````` example 22 | 'Oak,' 'elm,' and 'beech' are names of trees. 23 | So is 'pine.' 24 | . 25 |‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. 26 | So is ‘pine.’
27 | ```````````````````````````````` 28 | 29 | ```````````````````````````````` example 30 | 'He said, "I want to go."' 31 | . 32 |‘He said, “I want to go.”’
33 | ```````````````````````````````` 34 | 35 | A single quote that isn't an open quote matched 36 | with a close quote will be treated as an 37 | apostrophe: 38 | 39 | ```````````````````````````````` example 40 | Were you alive in the 70's? 41 | . 42 |Were you alive in the 70’s?
43 | ```````````````````````````````` 44 | 45 | ```````````````````````````````` example 46 | Here is some quoted '`code`' and a "[quoted link](url)". 47 | . 48 |Here is some quoted ‘
49 | ```````````````````````````````` 50 | 51 | Here the first `'` is treated as an apostrophe, not 52 | an open quote, because the final single quote is matched 53 | by the single quote before `jolly`: 54 | 55 | ```````````````````````````````` example 56 | 'tis the season to be 'jolly' 57 | . 58 |code
’ and a “quoted link”.’tis the season to be ‘jolly’
59 | ```````````````````````````````` 60 | 61 | Multiple apostrophes should not be marked as open/closing quotes. 62 | 63 | ```````````````````````````````` example 64 | 'We'll use Jane's boat and John's truck,' Jenna said. 65 | . 66 |‘We’ll use Jane’s boat and John’s truck,’ Jenna said.
67 | ```````````````````````````````` 68 | 69 | An unmatched double quote will be interpreted as a 70 | left double quote, to facilitate this style: 71 | 72 | ```````````````````````````````` example 73 | "A paragraph with no closing quote. 74 | 75 | "Second paragraph by same speaker, in fiction." 76 | . 77 |“A paragraph with no closing quote.
78 |“Second paragraph by same speaker, in fiction.”
79 | ```````````````````````````````` 80 | 81 | A quote following a `]` or `)` character cannot 82 | be an open quote: 83 | 84 | ```````````````````````````````` example 85 | [a]'s b' 86 | . 87 |[a]’s b’
88 | ```````````````````````````````` 89 | 90 | Quotes that are escaped come out as literal straight 91 | quotes: 92 | 93 | ```````````````````````````````` example 94 | \"This is not smart.\" 95 | This isn\'t either. 96 | 5\'8\" 97 | . 98 |"This is not smart." 99 | This isn't either. 100 | 5'8"
101 | ```````````````````````````````` 102 | 103 | Two hyphens form an en-dash, three an em-dash. 104 | 105 | ```````````````````````````````` example 106 | Some dashes: em---em 107 | en--en 108 | em --- em 109 | en -- en 110 | 2--3 111 | . 112 |Some dashes: em—em 113 | en–en 114 | em — em 115 | en – en 116 | 2–3
117 | ```````````````````````````````` 118 | 119 | A sequence of more than three hyphens is 120 | parsed as a sequence of em and/or en dashes, 121 | with no hyphens. If possible, a homogeneous 122 | sequence of dashes is used (so, 10 hyphens 123 | = 5 en dashes, and 9 hyphens = 3 em dashes). 124 | When a heterogeneous sequence must be used, 125 | the em dashes come first, followed by the en 126 | dashes, and as few en dashes as possible are 127 | used (so, 7 hyphens = 2 em dashes an 1 en 128 | dash). 129 | 130 | ```````````````````````````````` example 131 | one- 132 | two-- 133 | three--- 134 | four---- 135 | five----- 136 | six------ 137 | seven------- 138 | eight-------- 139 | nine--------- 140 | thirteen-------------. 141 | . 142 |one- 143 | two– 144 | three— 145 | four–– 146 | five—– 147 | six—— 148 | seven—–– 149 | eight–––– 150 | nine——— 151 | thirteen———––.
152 | ```````````````````````````````` 153 | 154 | Hyphens can be escaped: 155 | 156 | ```````````````````````````````` example 157 | Escaped hyphens: \-- \-\-\-. 158 | . 159 |Escaped hyphens: -- ---.
160 | ```````````````````````````````` 161 | 162 | Three periods form an ellipsis: 163 | 164 | ```````````````````````````````` example 165 | Ellipses...and...and.... 166 | . 167 |Ellipses…and…and….
168 | ```````````````````````````````` 169 | 170 | Periods can be escaped if ellipsis-formation 171 | is not wanted: 172 | 173 | ```````````````````````````````` example 174 | No ellipses\.\.\. 175 | . 176 |No ellipses...
177 | ```````````````````````````````` 178 | -------------------------------------------------------------------------------- /test/spec_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | from difflib import unified_diff 6 | import argparse 7 | import re 8 | import json 9 | import os 10 | from cmark import CMark 11 | from normalize import normalize_html 12 | 13 | parser = argparse.ArgumentParser(description='Run cmark tests.') 14 | parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, 15 | help='program to test') 16 | parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', 17 | help='path to spec') 18 | parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', 19 | default=None, help='limit to sections matching regex pattern') 20 | parser.add_argument('--library-dir', dest='library_dir', nargs='?', 21 | default=None, help='directory containing dynamic library') 22 | parser.add_argument('--no-normalize', dest='normalize', 23 | action='store_const', const=False, default=True, 24 | help='do not normalize HTML') 25 | parser.add_argument('-d', '--dump-tests', dest='dump_tests', 26 | action='store_const', const=True, default=False, 27 | help='dump tests in JSON format') 28 | parser.add_argument('--debug-normalization', dest='debug_normalization', 29 | action='store_const', const=True, 30 | default=False, help='filter stdin through normalizer for testing') 31 | parser.add_argument('-n', '--number', type=int, default=None, 32 | help='only consider the test with the given number') 33 | parser.add_argument('--fuzz-corpus', 34 | help='convert test cases to fuzz corpus') 35 | args = parser.parse_args(sys.argv[1:]) 36 | 37 | def out(str): 38 | sys.stdout.buffer.write(str.encode('utf-8')) 39 | 40 | def print_test_header(headertext, example_number, start_line, end_line): 41 | out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext)) 42 | 43 | def do_test(converter, test, normalize, result_counts): 44 | [retcode, actual_html, err] = converter(test['markdown']) 45 | if retcode == 0: 46 | expected_html = test['html'] 47 | unicode_error = None 48 | if normalize: 49 | try: 50 | passed = normalize_html(actual_html) == normalize_html(expected_html) 51 | except UnicodeDecodeError as e: 52 | unicode_error = e 53 | passed = False 54 | else: 55 | passed = actual_html == expected_html 56 | if passed: 57 | result_counts['pass'] += 1 58 | else: 59 | print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) 60 | out(test['markdown'] + '\n') 61 | if unicode_error: 62 | out("Unicode error: " + str(unicode_error) + '\n') 63 | out("Expected: " + repr(expected_html) + '\n') 64 | out("Got: " + repr(actual_html) + '\n') 65 | else: 66 | expected_html_lines = expected_html.splitlines(True) 67 | actual_html_lines = actual_html.splitlines(True) 68 | for diffline in unified_diff(expected_html_lines, actual_html_lines, 69 | "expected HTML", "actual HTML"): 70 | out(diffline) 71 | out('\n') 72 | result_counts['fail'] += 1 73 | else: 74 | print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) 75 | out("program returned error code %d\n" % retcode) 76 | sys.stdout.buffer.write(err) 77 | result_counts['error'] += 1 78 | 79 | def get_tests(specfile): 80 | line_number = 0 81 | start_line = 0 82 | end_line = 0 83 | example_number = 0 84 | markdown_lines = [] 85 | html_lines = [] 86 | state = 0 # 0 regular text, 1 markdown example, 2 html output 87 | headertext = '' 88 | tests = [] 89 | 90 | header_re = re.compile('#+ ') 91 | 92 | with open(specfile, 'r', encoding='utf-8', newline='\n') as specf: 93 | for line in specf: 94 | line_number = line_number + 1 95 | l = line.strip() 96 | if l == "`" * 32 + " example": 97 | state = 1 98 | elif l == "`" * 32: 99 | state = 0 100 | example_number = example_number + 1 101 | end_line = line_number 102 | tests.append({ 103 | "markdown":''.join(markdown_lines).replace('→',"\t"), 104 | "html":''.join(html_lines).replace('→',"\t"), 105 | "example": example_number, 106 | "start_line": start_line, 107 | "end_line": end_line, 108 | "section": headertext}) 109 | start_line = 0 110 | markdown_lines = [] 111 | html_lines = [] 112 | elif l == ".": 113 | state = 2 114 | elif state == 1: 115 | if start_line == 0: 116 | start_line = line_number - 1 117 | markdown_lines.append(line) 118 | elif state == 2: 119 | html_lines.append(line) 120 | elif state == 0 and re.match(header_re, line): 121 | headertext = header_re.sub('', line).strip() 122 | return tests 123 | 124 | if __name__ == "__main__": 125 | if args.debug_normalization: 126 | out(normalize_html(sys.stdin.read())) 127 | exit(0) 128 | 129 | all_tests = get_tests(args.spec) 130 | 131 | if args.fuzz_corpus: 132 | i = 1 133 | base = os.path.basename(args.spec) 134 | (name, ext) = os.path.splitext(base) 135 | for test in all_tests: 136 | filename = os.path.join(args.fuzz_corpus, '%s.%d' % (name, i)) 137 | with open(filename, 'wb') as f: 138 | f.write(b'\0' * 8) # options header 139 | f.write(test['markdown'].encode()) 140 | i += 1 141 | exit(0) 142 | 143 | if args.pattern: 144 | pattern_re = re.compile(args.pattern, re.IGNORECASE) 145 | else: 146 | pattern_re = re.compile('.') 147 | tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ] 148 | if args.dump_tests: 149 | out(json.dumps(tests, ensure_ascii=False, indent=2)) 150 | exit(0) 151 | else: 152 | skipped = len(all_tests) - len(tests) 153 | converter = CMark(prog=args.program, library_dir=args.library_dir).to_html 154 | result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped} 155 | for test in tests: 156 | do_test(converter, test, args.normalize, result_counts) 157 | out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts)) 158 | exit(result_counts['fail'] + result_counts['error']) 159 | -------------------------------------------------------------------------------- /toolchain-mingw32.cmake: -------------------------------------------------------------------------------- 1 | # the name of the target operating system 2 | SET(CMAKE_SYSTEM_NAME Windows) 3 | 4 | # which compilers to use for C and C++ 5 | SET(CMAKE_C_COMPILER i686-w64-mingw32-gcc) 6 | SET(CMAKE_CXX_COMPILER i686-w64-mingw32-g++) 7 | SET(CMAKE_RC_COMPILER i686-w64-mingw32-windres) 8 | 9 | # here is the target environment located 10 | SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32/ "${CMAKE_SOURCE_DIR}/windows") 11 | 12 | # adjust the default behaviour of the FIND_XXX() commands: 13 | # search headers and libraries in the target environment, search 14 | # programs in the host environment 15 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 16 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 17 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 18 | -------------------------------------------------------------------------------- /tools/appveyor-build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | if "%MSVC_VERSION%" == "10" goto msvc10 4 | 5 | call "C:\Program Files (x86)\Microsoft Visual Studio %MSVC_VERSION%.0\VC\vcvarsall.bat" amd64 6 | goto build 7 | 8 | :msvc10 9 | call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 10 | 11 | :build 12 | nmake 13 | 14 | -------------------------------------------------------------------------------- /tools/make_case_fold_inc.py: -------------------------------------------------------------------------------- 1 | # Creates a C lookup table for Unicode case folding (https://unicode.org/Public/UCD/latest/ucd/CaseFolding.txt). 2 | # Usage: python3 tools/make_case_fold_inc.py < data/CaseFolding.txt > src/case_fold.inc 3 | 4 | import sys, re 5 | 6 | prog = re.compile('([0-9A-F]+); [CF];((?: [0-9A-F]+)+);') 7 | main_table = [] 8 | repl_table = [] 9 | repl_idx = 0 10 | test = '' 11 | test_result = '' 12 | 13 | for line in sys.stdin: 14 | m = prog.match(line) 15 | if m is None: 16 | continue 17 | 18 | cp = int(m[1], 16); 19 | if cp < 0x80: 20 | continue 21 | 22 | repl = b'' 23 | for x in m[2].split(): 24 | repl += chr(int(x, 16)).encode('UTF-8') 25 | 26 | # Generate test case 27 | if len(main_table) % 20 == 0: 28 | test += chr(cp) 29 | test_result += repl.decode('UTF-8') 30 | 31 | # 17 bits for code point 32 | if cp >= (1 << 17): 33 | raise Exception("code point too large") 34 | 35 | # 12 bits for upper bits of replacement index 36 | # The lowest bit is always zero. 37 | if repl_idx // 2 >= (1 << 12): 38 | raise Exception("too many replacements") 39 | 40 | # 3 bits for size of replacement 41 | repl_size = len(repl) 42 | if repl_size >= (1 << 3): 43 | raise Exception("too many replacement chars") 44 | 45 | main_table += [ cp | repl_idx // 2 << 17 | repl_size << 29 ] 46 | repl_table += repl 47 | repl_idx += repl_size 48 | 49 | # Make sure that repl_idx is even 50 | if repl_idx % 2 != 0: 51 | repl_table += [0] 52 | repl_idx += 1 53 | 54 | # Print test case 55 | if False: 56 | print("test:", test) 57 | print("test_result:", test_result) 58 | sys.exit(0) 59 | 60 | print("""// Generated by tools/make_case_fold_inc.py 61 | 62 | #define CF_MAX (1 << 17) 63 | #define CF_TABLE_SIZE %d 64 | #define CF_CODE_POINT(x) ((x) & 0x1FFFF) 65 | #define CF_REPL_IDX(x) ((((x) >> 17) & 0xFFF) * 2) 66 | #define CF_REPL_SIZE(x) ((x) >> 29) 67 | 68 | static const uint32_t cf_table[%d] = {""" % (len(main_table), len(main_table))) 69 | 70 | i = 0 71 | size = len(main_table) 72 | for value in main_table: 73 | if i % 6 == 0: 74 | print(" ", end="") 75 | print("0x%X" % value, end="") 76 | i += 1 77 | if i == size: print() 78 | elif i % 6 == 0: print(",") 79 | else: print(", ", end="") 80 | 81 | print("""}; 82 | 83 | static const unsigned char cf_repl[%d] = {""" % len(repl_table)) 84 | 85 | i = 0 86 | size = len(repl_table) 87 | for value in repl_table: 88 | if i % 12 == 0: 89 | print(" ", end="") 90 | print("0x%02X" % value, end="") 91 | i += 1 92 | if i == size: print() 93 | elif i % 12 == 0: print(",") 94 | else: print(", ", end="") 95 | 96 | print("};") 97 | -------------------------------------------------------------------------------- /tools/make_entities_inc.py: -------------------------------------------------------------------------------- 1 | # Creates C data structures for binary lookup table of entities, 2 | # using python's html5 entity data. 3 | # Usage: python3 tools/make_entities_inc.py > src/entities.inc 4 | 5 | import html 6 | 7 | entities5 = html.entities.html5 8 | 9 | # Remove keys without semicolons. HTML5 allows some named character 10 | # references without a trailing semicolon. 11 | entities = sorted([(k[:-1], entities5[k]) for k in entities5.keys() if k[-1] == ';']) 12 | 13 | main_table = [] 14 | text_table = b'' 15 | text_idx = 0 16 | 17 | for (ent, repl) in entities: 18 | ent_bytes = ent.encode('UTF-8') 19 | ent_size = len(ent_bytes) 20 | repl_bytes = repl.encode('UTF-8') 21 | repl_size = len(repl_bytes) 22 | 23 | if text_idx >= (1 << 15): 24 | raise Exception("text index too large") 25 | if ent_size >= (1 << 5): 26 | raise Exception("entity name too long") 27 | if repl_size >= (1 << 3): 28 | raise Exception("entity replacement too long") 29 | 30 | main_table += [ text_idx | ent_size << 15 | repl_size << 20 ] 31 | 32 | text_table += ent_bytes + repl_bytes 33 | text_idx += ent_size + repl_size 34 | 35 | print("""/* Autogenerated by tools/make_headers_inc.py */ 36 | 37 | #define ENT_MIN_LENGTH 2 38 | #define ENT_MAX_LENGTH 32 39 | #define ENT_TABLE_SIZE %d 40 | #define ENT_TEXT_IDX(x) ((x) & 0x7FFF) 41 | #define ENT_NAME_SIZE(x) (((x) >> 15) & 0x1F) 42 | #define ENT_REPL_SIZE(x) ((x) >> 20) 43 | 44 | static const uint32_t cmark_entities[%d] = {""" % (len(main_table), len(main_table))); 45 | 46 | i = 0 47 | size = len(main_table) 48 | for value in main_table: 49 | if i % 6 == 0: 50 | print(" ", end="") 51 | print("0x%X" % value, end="") 52 | i += 1 53 | if i == size: print() 54 | elif i % 6 == 0: print(",") 55 | else: print(", ", end="") 56 | 57 | print("""}; 58 | 59 | static const unsigned char cmark_entity_text[%d] = {""" % len(text_table)) 60 | 61 | i = 0 62 | size = len(text_table) 63 | for value in text_table: 64 | if i % 12 == 0: 65 | print(" ", end="") 66 | print("0x%02X" % value, end="") 67 | i += 1 68 | if i == size: print() 69 | elif i % 12 == 0: print(",") 70 | else: print(", ", end="") 71 | 72 | print("};") 73 | -------------------------------------------------------------------------------- /why-cmark-and-not-x.md: -------------------------------------------------------------------------------- 1 | Why use `cmark` and not X? 2 | ========================== 3 | 4 | `hoedown` 5 | --------- 6 | 7 | `hoedown` (which derives from `sundown`) is slightly faster 8 | than `cmark` in our benchmarks (0.21s vs. 0.29s). But both 9 | are much faster than any other available implementations. 10 | 11 | `hoedown` boasts of including "protection against all possible 12 | DOS attacks," but there are some chinks in the armor: 13 | 14 | % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | cmark 15 | ... 16 | user 0m0.073s 17 | % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | hoedown 18 | ... 19 | 0m17.84s 20 | 21 | `hoedown` has many parsing bugs. Here is a selection (as of 22 | v3.0.3): 23 | 24 | % hoedown 25 | - one 26 | - two 27 | 1. three 28 | ^D 29 |30 |
37 | 38 | 39 | % hoedown 40 | ## hi\### 41 | ^D 42 |- one 31 | 32 |
36 |33 |
- two
34 |- three
35 |hi\
43 | 44 | 45 | % hoedown 46 | [ΑΓΩ]: /φου 47 | 48 | [αγω] 49 | ^D 50 |[αγω]
51 | 52 | 53 | % hoedown 54 | ``` 55 | [foo]: /url 56 | ``` 57 | 58 | [foo] 59 | ^D 60 |```
61 | 62 |```
63 | 64 | 65 | 66 | 67 | % hoedown 68 | [foo](url "ti\*tle") 69 | ^D 70 | 71 | 72 | 73 | % ./hoedown 74 | - one 75 | - two 76 | - three 77 | - four 78 | ^D 79 |80 |
88 | 89 | 90 | `discount` 91 | ---------- 92 | 93 | `cmark` is about six times faster. 94 | 95 | `kramdown` 96 | ---------- 97 | 98 | `cmark` is about a hundred times faster. 99 | 100 | `kramdown` also gets tied in knots by pathological input like 101 | 102 | python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' 103 | 104 | 105 | -------------------------------------------------------------------------------- /wrappers/wrapper.php: -------------------------------------------------------------------------------- 1 | cmark_markdown_to_html($markdown, strlen($markdown), 0); 11 | $html = FFI::string($pointerReturn); 12 | FFI::free($pointerReturn); 13 | 14 | return $html; 15 | } 16 | 17 | $markdown = <<<'md' 18 | # First level title 19 | 20 | ## Second level title 21 | 22 | Paragraph 23 | 24 | md; 25 | 26 | echo markdownToHtml($markdown) . PHP_EOL; 27 | -------------------------------------------------------------------------------- /wrappers/wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Example for using the shared library from python 4 | # Will work with either python 2 or python 3 5 | # Requires cmark library to be installed 6 | 7 | from ctypes import * 8 | import sys 9 | import platform 10 | 11 | sysname = platform.system() 12 | 13 | if sysname == 'Darwin': 14 | libname = "libcmark.dylib" 15 | elif sysname == 'Windows': 16 | libname = "cmark.dll" 17 | else: 18 | libname = "libcmark.so" 19 | cmark = CDLL(libname) 20 | 21 | class cmark_mem(Structure): 22 | _fields_ = [("calloc", c_void_p), 23 | ("realloc", c_void_p), 24 | ("free", CFUNCTYPE(None, c_void_p))] 25 | 26 | get_alloc = cmark.cmark_get_default_mem_allocator 27 | get_alloc.restype = POINTER(cmark_mem) 28 | free_func = get_alloc().contents.free 29 | 30 | markdown = cmark.cmark_markdown_to_html 31 | markdown.restype = POINTER(c_char) 32 | markdown.argtypes = [c_char_p, c_size_t, c_int] 33 | 34 | opts = 0 # defaults 35 | 36 | def md2html(text): 37 | text = text.encode('utf-8') 38 | cstring = markdown(text, len(text), opts) 39 | result = string_at(cstring).decode('utf-8') 40 | free_func(cstring) 41 | return result 42 | 43 | sys.stdout.write(md2html(sys.stdin.read())) 44 | -------------------------------------------------------------------------------- /wrappers/wrapper.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'ffi' 3 | 4 | module CMark 5 | extend FFI::Library 6 | 7 | class Mem < FFI::Struct 8 | layout :calloc, :pointer, 9 | :realloc, :pointer, 10 | :free, callback([:pointer], :void) 11 | end 12 | 13 | ffi_lib ['libcmark', 'cmark'] 14 | attach_function :cmark_get_default_mem_allocator, [], :pointer 15 | attach_function :cmark_markdown_to_html, [:string, :size_t, :int], :pointer 16 | end 17 | 18 | def markdown_to_html(s) 19 | len = s.bytesize 20 | cstring = CMark::cmark_markdown_to_html(s, len, 0) 21 | result = cstring.get_string(0) 22 | mem = CMark::cmark_get_default_mem_allocator 23 | CMark::Mem.new(mem)[:free].call(cstring) 24 | result 25 | end 26 | 27 | STDOUT.write(markdown_to_html(ARGF.read())) 28 | -------------------------------------------------------------------------------- /wrappers/wrapper.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; requires racket >= 5.3 because of submodules 4 | 5 | ;; Lowlevel interface 6 | 7 | (module low-level racket/base 8 | 9 | (require ffi/unsafe ffi/unsafe/define) 10 | 11 | (provide (all-defined-out)) 12 | 13 | (define-ffi-definer defcmark (ffi-lib "libcmark")) 14 | 15 | (define _cmark_node_type 16 | (_enum '(;; Error status 17 | none 18 | ;; Block 19 | document block-quote list item code-block 20 | html-block custom-block 21 | paragraph heading thematic-break 22 | ;; ?? first-block = document 23 | ;; ?? last-block = thematic-break 24 | ;; Inline 25 | text softbreak linebreak code html-inline custom-inline 26 | emph strong link image 27 | ;; ?? first-inline = text 28 | ;; ?? last-inline = image 29 | ))) 30 | (define _cmark_list_type 31 | (_enum '(no_list bullet_list ordered_list))) 32 | (define _cmark_delim_type 33 | (_enum '(no_delim period_delim paren_delim))) 34 | (define _cmark_opts 35 | (let ([opts '([sourcepos 1] ; include sourcepos attribute on block elements 36 | [hardbreaks 2] ; render `softbreak` elements as hard line breaks 37 | [safe 3] ; defined here for API compatibility (on by default) 38 | [unsafe 17] ; render raw HTML and unsafe links 39 | [nobreaks 4] ; render `softbreak` elements as spaces 40 | [normalize 8] ; legacy (no effect) 41 | [validate-utf8 9] ; validate UTF-8 in the input 42 | [smart 10] ; straight quotes to curly, ---/-- to em/en dashes 43 | )]) 44 | (_bitmask (apply append (map (λ(o) `(,(car o) = ,(expt 2 (cadr o)))) 45 | opts))))) 46 | 47 | (define-cpointer-type _node) 48 | 49 | (defcmark cmark_markdown_to_html 50 | (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts 51 | -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) 52 | 53 | (defcmark cmark_parse_document 54 | (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts 55 | -> _node)) 56 | 57 | (defcmark cmark_render_html 58 | (_fun _node _cmark_opts 59 | -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) 60 | 61 | (defcmark cmark_node_new (_fun _cmark_node_type -> _node)) 62 | (defcmark cmark_node_free (_fun _node -> _void)) 63 | 64 | (defcmark cmark_node_next (_fun _node -> _node/null)) 65 | (defcmark cmark_node_previous (_fun _node -> _node/null)) 66 | (defcmark cmark_node_parent (_fun _node -> _node/null)) 67 | (defcmark cmark_node_first_child (_fun _node -> _node/null)) 68 | (defcmark cmark_node_last_child (_fun _node -> _node/null)) 69 | 70 | (defcmark cmark_node_get_user_data (_fun _node -> _racket)) 71 | (defcmark cmark_node_set_user_data (_fun _node _racket -> _bool)) 72 | (defcmark cmark_node_get_type (_fun _node -> _cmark_node_type)) 73 | (defcmark cmark_node_get_type_string (_fun _node -> _bytes)) 74 | (defcmark cmark_node_get_literal (_fun _node -> _string)) 75 | (defcmark cmark_node_set_literal (_fun _node _string -> _bool)) 76 | (defcmark cmark_node_get_heading_level (_fun _node -> _int)) 77 | (defcmark cmark_node_set_heading_level (_fun _node _int -> _bool)) 78 | (defcmark cmark_node_get_list_type (_fun _node -> _cmark_list_type)) 79 | (defcmark cmark_node_set_list_type (_fun _node _cmark_list_type -> _bool)) 80 | (defcmark cmark_node_get_list_delim (_fun _node -> _cmark_delim_type)) 81 | (defcmark cmark_node_set_list_delim (_fun _node _cmark_delim_type -> _bool)) 82 | (defcmark cmark_node_get_list_start (_fun _node -> _int)) 83 | (defcmark cmark_node_set_list_start (_fun _node _int -> _bool)) 84 | (defcmark cmark_node_get_list_tight (_fun _node -> _bool)) 85 | (defcmark cmark_node_set_list_tight (_fun _node _bool -> _bool)) 86 | (defcmark cmark_node_get_fence_info (_fun _node -> _string)) 87 | (defcmark cmark_node_set_fence_info (_fun _node _string -> _bool)) 88 | (defcmark cmark_node_get_url (_fun _node -> _string)) 89 | (defcmark cmark_node_set_url (_fun _node _string -> _bool)) 90 | (defcmark cmark_node_get_title (_fun _node -> _string)) 91 | (defcmark cmark_node_set_title (_fun _node _string -> _bool)) 92 | (defcmark cmark_node_get_start_line (_fun _node -> _int)) 93 | (defcmark cmark_node_get_start_column (_fun _node -> _int)) 94 | (defcmark cmark_node_get_end_line (_fun _node -> _int)) 95 | (defcmark cmark_node_get_end_column (_fun _node -> _int)) 96 | 97 | (defcmark cmark_node_unlink (_fun _node -> _void)) 98 | (defcmark cmark_node_insert_before (_fun _node _node -> _bool)) 99 | (defcmark cmark_node_insert_after (_fun _node _node -> _bool)) 100 | (defcmark cmark_node_prepend_child (_fun _node _node -> _bool)) 101 | (defcmark cmark_node_append_child (_fun _node _node -> _bool)) 102 | (defcmark cmark_consolidate_text_nodes (_fun _node -> _void)) 103 | 104 | (defcmark cmark_version (_fun -> _int)) 105 | (defcmark cmark_version_string (_fun -> _string)) 106 | 107 | ) 108 | 109 | ;; Rackety interface 110 | 111 | (module high-level racket/base 112 | 113 | (require (submod ".." low-level) ffi/unsafe) 114 | 115 | (provide cmark-markdown-to-html) 116 | (define (cmark-markdown-to-html str [options '(normalize smart)]) 117 | (cmark_markdown_to_html (if (bytes? str) str (string->bytes/utf-8 str)) 118 | options)) 119 | 120 | (require (for-syntax racket/base racket/syntax)) 121 | (define-syntax (make-getter+setter stx) 122 | (syntax-case stx () 123 | [(_ name) (with-syntax ([(getter setter) 124 | (map (λ(op) (format-id #'name "cmark_node_~a_~a" 125 | op #'name)) 126 | '(get set))]) 127 | #'(cons getter setter))])) 128 | (define-syntax-rule (define-getters+setters name [type field ...] ...) 129 | (define name (list (list 'type (make-getter+setter field) ...) ...))) 130 | (define-getters+setters getters+setters 131 | [heading heading_level] [code-block fence_info] 132 | [link url title] [image url title] 133 | [list list_type list_delim list_start list_tight]) 134 | 135 | (provide cmark->sexpr) 136 | (define (cmark->sexpr node) 137 | (define text (cmark_node_get_literal node)) 138 | (define type (cmark_node_get_type node)) 139 | (define children 140 | (let loop ([node (cmark_node_first_child node)]) 141 | (if (not node) '() 142 | (cons (cmark->sexpr node) (loop (cmark_node_next node)))))) 143 | (define info 144 | (cond [(assq type getters+setters) 145 | => (λ(gss) (map (λ(gs) ((car gs) node)) (cdr gss)))] 146 | [else '()])) 147 | (define (assert-no what-not b) 148 | (when b (error 'cmark->sexpr "unexpected ~a in ~s" what-not type))) 149 | (cond [(memq type '(document paragraph heading block-quote list item 150 | emph strong link image)) 151 | (assert-no 'text text) 152 | (list type info children)] 153 | [(memq type '(text code code-block html-block html-inline 154 | softbreak linebreak thematic-break)) 155 | (assert-no 'children (pair? children)) 156 | (list type info text)] 157 | [else (error 'cmark->sexpr "unknown type: ~s" type)])) 158 | 159 | (provide sexpr->cmark) 160 | (define (sexpr->cmark sexpr) ; assumes valid input, as generated by the above 161 | (define (loop sexpr) 162 | (define type (car sexpr)) 163 | (define info (cadr sexpr)) 164 | (define data (caddr sexpr)) 165 | (define node (cmark_node_new type)) 166 | (let ([gss (assq type getters+setters)]) 167 | (when gss 168 | (unless (= (length (cdr gss)) (length info)) 169 | (error 'sexpr->cmark "bad number of info values in ~s" sexpr)) 170 | (for-each (λ(gs x) ((cdr gs) node x)) (cdr gss) info))) 171 | (cond [(string? data) (cmark_node_set_literal node data)] 172 | [(not data) (void)] 173 | [(list? data) 174 | (for ([child (in-list data)]) 175 | (cmark_node_append_child node (sexpr->cmark child)))] 176 | [else (error 'sexpr->cmark "bad data in ~s" sexpr)]) 177 | node) 178 | (define root (loop sexpr)) 179 | (register-finalizer root cmark_node_free) 180 | root) 181 | 182 | ;; Registers a `cmark_node_free` finalizer 183 | (provide cmark-parse-document) 184 | (define (cmark-parse-document str [options '(normalize smart)]) 185 | (define root (cmark_parse_document 186 | (if (bytes? str) str (string->bytes/utf-8 str)) 187 | options)) 188 | (register-finalizer root cmark_node_free) 189 | root) 190 | 191 | (provide cmark-render-html) 192 | (define (cmark-render-html root [options '(normalize smart)]) 193 | (cmark_render_html root options))) 194 | 195 | #; ;; sample use 196 | (begin 197 | (require 'high-level racket/string) 198 | (cmark-render-html 199 | (cmark-parse-document 200 | (string-join '("foo" 201 | "===" 202 | "" 203 | "> blah" 204 | ">" 205 | "> blah *blah* `bar()` blah:" 206 | ">" 207 | "> function foo() {" 208 | "> bar();" 209 | "> }") 210 | "\n")))) 211 | --------------------------------------------------------------------------------- one 81 | 82 |
87 |83 |
- two
84 |- three
85 |- four
86 |