├── .editorconfig
├── .gitattributes
├── .github
    ├── FUNDING.yml
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   └── fuzz.yml
├── .gitignore
├── CMakeLists.txt
├── COPYING
├── Makefile
├── Makefile.nmake
├── README.md
├── api_test
    ├── CMakeLists.txt
    ├── cplusplus.cpp
    ├── cplusplus.h
    ├── harness.c
    ├── harness.h
    └── main.c
├── bench
    ├── samples
    │   ├── block-bq-flat.md
    │   ├── block-bq-nested.md
    │   ├── block-code.md
    │   ├── block-fences.md
    │   ├── block-heading.md
    │   ├── block-hr.md
    │   ├── block-html.md
    │   ├── block-lheading.md
    │   ├── block-list-flat.md
    │   ├── block-list-nested.md
    │   ├── block-ref-flat.md
    │   ├── block-ref-nested.md
    │   ├── inline-autolink.md
    │   ├── inline-backticks.md
    │   ├── inline-em-flat.md
    │   ├── inline-em-nested.md
    │   ├── inline-em-worst.md
    │   ├── inline-entity.md
    │   ├── inline-escape.md
    │   ├── inline-html.md
    │   ├── inline-links-flat.md
    │   ├── inline-links-nested.md
    │   ├── inline-newlines.md
    │   ├── lorem1.md
    │   └── rawtabs.md
    ├── statistics.py
    └── stats.py
├── benchmarks.md
├── changelog.txt
├── cmake
    └── modules
    │   └── FindAsan.cmake
├── data
    └── CaseFolding.txt
├── fuzz
    ├── CMakeLists.txt
    ├── afl_test_cases
    │   └── test.md
    ├── cmark-fuzz.c
    └── dictionary
├── man
    ├── CMakeLists.txt
    ├── make_man_page.py
    ├── man1
    │   └── cmark.1
    └── man3
    │   └── cmark.3
├── nmake.bat
├── shell.nix
├── src
    ├── CMakeLists.txt
    ├── blocks.c
    ├── buffer.c
    ├── buffer.h
    ├── case_fold.inc
    ├── chunk.h
    ├── cmark.c
    ├── cmark.h
    ├── cmarkConfig.cmake.in
    ├── cmark_ctype.c
    ├── cmark_ctype.h
    ├── cmark_version.h.in
    ├── commonmark.c
    ├── entities.inc
    ├── houdini.h
    ├── houdini_href_e.c
    ├── houdini_html_e.c
    ├── houdini_html_u.c
    ├── html.c
    ├── inlines.c
    ├── inlines.h
    ├── iterator.c
    ├── iterator.h
    ├── latex.c
    ├── libcmark.pc.in
    ├── main.c
    ├── man.c
    ├── node.c
    ├── node.h
    ├── parser.h
    ├── references.c
    ├── references.h
    ├── render.c
    ├── render.h
    ├── scanners.c
    ├── scanners.h
    ├── scanners.re
    ├── utf8.c
    ├── utf8.h
    └── xml.c
├── test
    ├── CMakeLists.txt
    ├── cmark.py
    ├── entity_tests.py
    ├── normalize.py
    ├── pathological_tests.py
    ├── regression.txt
    ├── roundtrip_tests.py
    ├── smart_punct.txt
    ├── spec.txt
    └── spec_tests.py
├── toolchain-mingw32.cmake
├── tools
    ├── appveyor-build.bat
    ├── make_case_fold_inc.py
    ├── make_entities_inc.py
    └── xml2md.xsl
├── why-cmark-and-not-x.md
└── wrappers
    ├── wrapper.php
    ├── wrapper.py
    ├── wrapper.rb
    └── wrapper.rkt


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | end_of_line = lf
 7 | charset = utf-8
 8 | insert_final_newline = true
 9 | 
10 | [*.{c,h}]
11 | trim_trailing_whitespace = true
12 | indent_style = space
13 | indent_size = 2
14 | 
15 | [Makefile]
16 | trim_trailing_whitespace = true
17 | indent_style = tab
18 | indent_size = 8
19 | 
20 | [Makefile.nmake]
21 | end_of_line = crlf
22 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 | Makefile.nmake eol=crlf
3 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [jgm]
2 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | updates:
4 |   - package-ecosystem: "github-actions"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "daily"
8 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI tests
  2 | 
  3 | on:
  4 |   push:
  5 |     branches-ignore:
  6 |       - 'dependabot/**'
  7 |   pull_request:
  8 |   workflow_dispatch:
  9 | 
 10 | jobs:
 11 | 
 12 |   linter:
 13 | 
 14 |     runs-on: ubuntu-latest
 15 | 
 16 |     steps:
 17 | 
 18 |     - uses: actions/checkout@v4
 19 |     - name: Install clang-tidy
 20 |       run: |
 21 |         sudo apt-get install -y clang-tidy
 22 |     - name: lint with clang-tidy
 23 |       run: |
 24 |         make lint
 25 |       env:
 26 |         CC: clang
 27 |         CXX: clang++
 28 | 
 29 |   posix:
 30 | 
 31 |     strategy:
 32 |       fail-fast: false
 33 |       matrix:
 34 |         os: [linux, macos]
 35 |         cc: [clang, gcc]
 36 |         build_type: [shared, static]
 37 |         sanitizers: ['', ASan]
 38 | 
 39 |         include:
 40 |           # Translate human readable labels
 41 |           - os: 'linux'
 42 |             image: 'ubuntu-latest'
 43 |           - os: 'macos'
 44 |             image: 'macos-latest'
 45 |           - cc: 'clang'
 46 |             cxx: 'clang++'
 47 |           - cc: 'gcc'
 48 |             cxx: 'g++'
 49 |           - build_type: 'shared'
 50 |             cmake_shared: 'YES'
 51 |           - build_type: 'static'
 52 |             cmake_shared: 'NO'
 53 |           - sanitizers: 'ASan'
 54 |             san_cflags: '-fsanitize=address,undefined -fno-sanitize-recover=all'
 55 | 
 56 |           # When building shared libraries, they will be loaded
 57 |           # dynamically from Python when testing. This means that
 58 |           # we have to use a preloaded shared libasan.
 59 |           - sanitizers: 'ASan'
 60 |             os: 'linux'
 61 |             cc: 'gcc'
 62 |             build_type: 'shared'
 63 |             test_env: 'LD_PRELOAD=$(gcc -print-file-name=libasan.so)'
 64 |           - sanitizers: 'ASan'
 65 |             os: 'linux'
 66 |             cc: 'clang'
 67 |             build_type: 'shared'
 68 |             # clang defaults to -static-libsasn
 69 |             asan_cflags: '-shared-libasan'
 70 |             test_env: 'LD_PRELOAD=$(clang -print-file-name=libclang_rt.asan-x86_64.so)'
 71 | 
 72 |           # We have to disable LeakSanitizer in shared library builds
 73 |           # because we get false positives from Python.
 74 |           - sanitizers: 'ASan'
 75 |             build_type: 'shared'
 76 |             asan_opts: 'detect_leaks=0'
 77 | 
 78 |           # The static build can run with LeakSanitizer.
 79 |           # gcc defaults to -shared-libasan and needs -static-libasan
 80 |           - sanitizers: 'ASan'
 81 |             cc: 'gcc'
 82 |             build_type: 'static'
 83 |             asan_cflags: '-static-libasan'
 84 | 
 85 |         exclude:
 86 |           # gcc is just an alias for clang on macOS
 87 |           - os: 'macos'
 88 |             cc: 'gcc'
 89 |           # Shared libasan doesn't work with macOS system Python.
 90 |           - os: 'macos'
 91 |             sanitizers: 'ASan'
 92 |             build_type: 'shared'
 93 | 
 94 |     runs-on: ${{ matrix.image }}
 95 | 
 96 |     env:
 97 |        ASAN_OPTIONS: ${{ matrix.asan_opts }}
 98 |        CC: ${{ matrix.cc }}
 99 |        CXX: ${{ matrix.cxx }}
100 |        CFLAGS: '${{ matrix.san_cflags }} ${{ matrix.asan_cflags }}'
101 |        CXXFLAGS: '${{ matrix.san_cflags }} ${{ matrix.asan_cflags }}'
102 | 
103 |     steps:
104 |     - uses: actions/checkout@v4
105 |     - name: Build and test
106 |       run: |
107 |          cmake \
108 |             -DBUILD_SHARED_LIBS=${{ matrix.cmake_shared }} \
109 |             -DCMAKE_BUILD_TYPE=Debug \
110 |             -S . -B build
111 |          cmake --build build
112 |          ${{ matrix.test_env }} ctest --test-dir build --output-on-failure
113 | 
114 |   windows:
115 | 
116 |     runs-on: windows-latest
117 |     strategy:
118 |       fail-fast: false
119 |       matrix:
120 |         build_type: [shared, static]
121 |         include:
122 |           - build_type: 'shared'
123 |             cmake_shared: 'YES'
124 |           - build_type: 'static'
125 |             cmake_shared: 'NO'
126 | 
127 |     steps:
128 |     - uses: actions/checkout@v4
129 |     - uses: ilammy/msvc-dev-cmd@v1
130 |     - name: Build and test
131 |       run: |
132 |         cmake ^
133 |             -DBUILD_SHARED_LIBS=${{ matrix.cmake_shared }} ^
134 |             -DCMAKE_BUILD_TYPE=Debug ^
135 |             -S . -B build
136 |         cmake --build build
137 |         ctest --test-dir build -C Debug --output-on-failure
138 |       shell: cmd
139 |     - name: Upload artifact
140 |       if: ${{ matrix.build_type == 'static' }}
141 |       uses: actions/upload-artifact@v4
142 |       with:
143 |         name: cmark windows ${{ matrix.build_type }}
144 |         path: build/src/Debug/cmark.exe
145 |         if-no-files-found: error
146 | 


--------------------------------------------------------------------------------
/.github/workflows/fuzz.yml:
--------------------------------------------------------------------------------
 1 | name: CIFuzz
 2 | on: [pull_request]
 3 | jobs:
 4 |   Fuzzing:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |     - name: Build Fuzzers
 8 |       uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
 9 |       with:
10 |         oss-fuzz-project-name: 'cmark'
11 |         dry-run: false
12 |     - name: Run Fuzzers
13 |       uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
14 |       with:
15 |         oss-fuzz-project-name: 'cmark'
16 |         fuzz-seconds: 600
17 |         dry-run: false
18 |     - name: Upload Crash
19 |       uses: actions/upload-artifact@v4
20 |       if: failure()
21 |       with:
22 |         name: artifacts
23 |         path: ./out/artifacts
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Object files
 2 | *.o
 3 | *.ko
 4 | *.obj
 5 | *.elf
 6 | 
 7 | # Libraries
 8 | *.lib
 9 | *.a
10 | 
11 | # Shared objects (inc. Windows DLLs)
12 | *.dll
13 | *.so
14 | *.so.*
15 | *.dylib
16 | 
17 | # Executables
18 | *.exe
19 | *.out
20 | *.app
21 | *.i*86
22 | *.x86_64
23 | *.hex
24 | *.pyc
25 | 
26 | *~
27 | *.bak
28 | *.sw?
29 | *.diff
30 | *#
31 | *.zip
32 | bstrlib.txt
33 | build
34 | cmark.dSYM/*
35 | cmark
36 | 
37 | # Visual Studio CMake support
38 | /.vs/
39 | /out/
40 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.7)
  2 | 
  3 | if(POLICY CMP0063)
  4 |   cmake_policy(SET CMP0063 NEW)
  5 | endif()
  6 | if(POLICY CMP0092)
  7 |   cmake_policy(SET CMP0092 NEW)
  8 | endif()
  9 | 
 10 | project(cmark
 11 |   LANGUAGES C CXX
 12 |   VERSION 0.31.1)
 13 | 
 14 | if(CMAKE_BUILD_TYPE STREQUAL Asan)
 15 |   list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)
 16 |   include(FindAsan)
 17 | endif()
 18 | 
 19 | # Include module for functions
 20 | # - 'write_basic_package_version_file'
 21 | # - 'configure_package_config_file'
 22 | include(CMakePackageConfigHelpers)
 23 | include(CTest)
 24 | include(GenerateExportHeader)
 25 | include(GNUInstallDirs)
 26 | 
 27 | if(NOT MSVC OR CMAKE_HOST_SYSTEM_NAME STREQUAL Windows)
 28 |   set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
 29 |   include(InstallRequiredSystemLibraries)
 30 | endif()
 31 | 
 32 | if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
 33 |     message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make")
 34 | endif()
 35 | 
 36 | # Backwards Compatibility
 37 | # TODO: remove this once there has been a period to enable people to migrate
 38 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT NO)
 39 | if(DEFINED CMARK_SHARED)
 40 |   message(AUTHOR_WARNING [=[
 41 | 'CMARK_SHARED' has been replaced with the standard 'BUILD_SHARED_LIBS' to control the library type.
 42 | ]=])
 43 |   set(_CMARK_BUILD_SHARED_LIBS_DEFAULT ${CMARK_SHARED})
 44 | endif()
 45 | if(DEFINED CMARK_STATIC)
 46 |   message(AUTHOR_WARNING [=[
 47 | 'CMARK_STATIC' has been replaced with the standard 'BUILD_SHARED_LIBS' to control the library type.
 48 | ]=])
 49 |   if(NOT CMARK_STATIC)
 50 |     set(_CMARK_BUILD_SHARED_LIBS_DEFAULT YES)
 51 |   else()
 52 |     set(_CMARK_BUILD_SHARED_LIBS_DEFAULT NO)
 53 |   endif()
 54 | endif()
 55 | 
 56 | option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF)
 57 | option(BUILD_SHARED_LIBS "Build the CMark library as shared"
 58 |   ${_CMARK_BUILD_SHARED_LIBS_DEFAULT})
 59 | 
 60 | if(NOT MSVC)
 61 |   set(CMAKE_C_STANDARD 99)
 62 |   set(CMAKE_C_STANDARD_REQUIRED YES)
 63 |   set(CMAKE_C_EXTENSIONS NO)
 64 | endif()
 65 | 
 66 | # -fvisibility=hidden
 67 | set(CMAKE_C_VISIBILITY_PRESET hidden)
 68 | set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
 69 | 
 70 | set(CMAKE_INCLUDE_CURRENT_DIR ON)
 71 | 
 72 | if (DEFINED CMAKE_INSTALL_RPATH)
 73 |   set(Base_rpath "${CMAKE_INSTALL_RPATH}")
 74 | else()
 75 |   if(BUILD_SHARED_LIBS)
 76 |     set(p "${CMAKE_INSTALL_FULL_LIBDIR}")
 77 |     list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${p}" i)
 78 |     if("${i}" STREQUAL "-1")
 79 |       set(Base_rpath "${p}")
 80 |     endif()
 81 |   endif()
 82 | endif()
 83 | 
 84 | # Append non-standard external dependency directories, if any.
 85 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 86 | 
 87 | # Check integrity of node structure when compiled as debug
 88 | add_compile_options($<$<CONFIG:Debug>:-DCMARK_DEBUG_NODES>)
 89 | 
 90 | # In order to maintain compatibility with older platforms which may not have a
 91 | # recent version of CMake (i.e. are running CMake <3.3), we cannot simply use
 92 | # the `add_compile_options` with a generator expression.  This uses the
 93 | # `target_compile_options` with `PRIVATE` to add the flags only to the targets
 94 | # so that CMark may be used in projects with non-C languages.
 95 | function(cmark_add_compile_options target)
 96 |   if(MSVC)
 97 |     target_compile_definitions(${target} PRIVATE _CRT_SECURE_NO_WARNINGS)
 98 |   else()
 99 |     target_compile_options(${target} PRIVATE
100 |             -Wall -Wextra -pedantic
101 |             $<$<COMPILE_LANGUAGE:C>:-Wstrict-prototypes>)
102 |   endif()
103 |   if(CMAKE_BUILD_TYPE STREQUAL Profile)
104 |     target_compile_options(${target} PRIVATE -pg)
105 |   endif()
106 |   if(CMAKE_BUILD_TYPE STREQUAL Ubsan)
107 |     target_compile_options(${target} PRIVATE -fsanitize=undefined)
108 |   endif()
109 |   if(CMARK_LIB_FUZZER)
110 |     if(target MATCHES fuzz)
111 |       target_compile_options(${target} PRIVATE -fsanitize=fuzzer)
112 |       target_link_options(${target} PRIVATE -fsanitize=fuzzer)
113 |     else()
114 |       target_compile_options(${target} PRIVATE -fsanitize=fuzzer-no-link)
115 |       target_link_options(${target} PRIVATE -fsanitize=fuzzer-no-link)
116 |     endif()
117 |   endif()
118 | endfunction()
119 | 
120 | add_subdirectory(src)
121 | # TODO(compnerd) should this be enabled for MinGW, which sets CMAKE_SYSTEM_NAME
122 | # to Windows, but defines `MINGW`.
123 | if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows)
124 |   add_subdirectory(man)
125 | endif()
126 | if(BUILD_TESTING)
127 |   add_subdirectory(api_test)
128 |   add_subdirectory(test testdir)
129 | endif()
130 | if(CMARK_LIB_FUZZER)
131 |   add_subdirectory(fuzz)
132 | endif()
133 | 
134 | if(NOT CMAKE_BUILD_TYPE)
135 |   set(CMAKE_BUILD_TYPE "Release" CACHE STRING
136 |   "Choose the type of build, options are: Debug Profile Release Asan Ubsan." FORCE)
137 | endif(NOT CMAKE_BUILD_TYPE)
138 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 | Copyright (c) 2014, John MacFarlane
  2 | 
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without
  6 | modification, are permitted provided that the following conditions are met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 | 
 11 |     * Redistributions in binary form must reproduce the above
 12 |       copyright notice, this list of conditions and the following
 13 |       disclaimer in the documentation and/or other materials provided
 14 |       with the distribution.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | -----
 29 | 
 30 | houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c
 31 | 
 32 | derive from https://github.com/vmg/houdini (with some modifications)
 33 | 
 34 | Copyright (C) 2012 Vicent Martí
 35 | 
 36 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 37 | this software and associated documentation files (the "Software"), to deal in
 38 | the Software without restriction, including without limitation the rights to
 39 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 40 | of the Software, and to permit persons to whom the Software is furnished to do
 41 | so, subject to the following conditions:
 42 | 
 43 | The above copyright notice and this permission notice shall be included in all
 44 | copies or substantial portions of the Software.
 45 | 
 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 52 | SOFTWARE.
 53 | 
 54 | -----
 55 | 
 56 | buffer.h, buffer.c, chunk.h
 57 | 
 58 | are derived from code (C) 2012 Github, Inc.
 59 | 
 60 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 61 | this software and associated documentation files (the "Software"), to deal in
 62 | the Software without restriction, including without limitation the rights to
 63 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 64 | of the Software, and to permit persons to whom the Software is furnished to do
 65 | so, subject to the following conditions:
 66 | 
 67 | The above copyright notice and this permission notice shall be included in all
 68 | copies or substantial portions of the Software.
 69 | 
 70 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 71 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 72 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 73 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 74 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 75 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 76 | SOFTWARE.
 77 | 
 78 | -----
 79 | 
 80 | utf8.c and utf8.c
 81 | 
 82 | are derived from utf8proc
 83 | (<http://www.public-software-group.org/utf8proc>),
 84 | (C) 2009 Public Software Group e. V., Berlin, Germany.
 85 | 
 86 | Permission is hereby granted, free of charge, to any person obtaining a
 87 | copy of this software and associated documentation files (the "Software"),
 88 | to deal in the Software without restriction, including without limitation
 89 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 90 | and/or sell copies of the Software, and to permit persons to whom the
 91 | Software is furnished to do so, subject to the following conditions:
 92 | 
 93 | The above copyright notice and this permission notice shall be included in
 94 | all copies or substantial portions of the Software.
 95 | 
 96 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 97 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 98 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 99 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
100 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
101 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
102 | DEALINGS IN THE SOFTWARE.
103 | 
104 | -----
105 | 
106 | The normalization code in normalize.py was derived from the
107 | markdowntest project, Copyright 2013 Karl Dubost:
108 | 
109 | The MIT License (MIT)
110 | 
111 | Copyright (c) 2013 Karl Dubost
112 | 
113 | Permission is hereby granted, free of charge, to any person obtaining
114 | a copy of this software and associated documentation files (the
115 | "Software"), to deal in the Software without restriction, including
116 | without limitation the rights to use, copy, modify, merge, publish,
117 | distribute, sublicense, and/or sell copies of the Software, and to
118 | permit persons to whom the Software is furnished to do so, subject to
119 | the following conditions:
120 | 
121 | The above copyright notice and this permission notice shall be
122 | included in all copies or substantial portions of the Software.
123 | 
124 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
125 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
126 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
127 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
128 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
129 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
130 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
131 | 
132 | -----
133 | 
134 | The CommonMark spec (test/spec.txt) is
135 | 
136 | Copyright (C) 2014-15 John MacFarlane
137 | 
138 | Released under the Creative Commons CC-BY-SA 4.0 license:
139 | <http://creativecommons.org/licenses/by-sa/4.0/>.
140 | 
141 | -----
142 | 
143 | The test software in test/ is
144 | 
145 | Copyright (c) 2014, John MacFarlane
146 | 
147 | All rights reserved.
148 | 
149 | Redistribution and use in source and binary forms, with or without
150 | modification, are permitted provided that the following conditions are met:
151 | 
152 |     * Redistributions of source code must retain the above copyright
153 |       notice, this list of conditions and the following disclaimer.
154 | 
155 |     * Redistributions in binary form must reproduce the above
156 |       copyright notice, this list of conditions and the following
157 |       disclaimer in the documentation and/or other materials provided
158 |       with the distribution.
159 | 
160 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
161 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
162 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
163 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
164 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
165 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
166 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
167 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
168 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
169 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
170 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
171 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | SRCDIR=src
  2 | DATADIR=data
  3 | BUILDDIR?=build
  4 | GENERATOR?=Unix Makefiles
  5 | MINGW_BUILDDIR?=build-mingw
  6 | MINGW_INSTALLDIR?=windows
  7 | SPEC=test/spec.txt
  8 | SITE=_site
  9 | SPECVERSION=$(shell perl -ne 'print $$1 if /^version: *([0-9.]+)/' $(SPEC))
 10 | FUZZCHARS?=2000000  # for fuzztest
 11 | BENCHDIR=bench
 12 | BENCHSAMPLES=$(wildcard $(BENCHDIR)/samples/*.md)
 13 | BENCHFILE=$(BENCHDIR)/benchinput.md
 14 | ALLTESTS=alltests.md
 15 | NUMRUNS?=10
 16 | CMARK=$(BUILDDIR)/src/cmark
 17 | CMARK_FUZZ=$(BUILDDIR)/src/cmark-fuzz
 18 | PROG?=$(CMARK)
 19 | VERSION?=$(SPECVERSION)
 20 | RELEASE?=cmark-$(VERSION)
 21 | INSTALL_PREFIX?=/usr/local
 22 | CLANG_CHECK?=clang-check
 23 | CLANG_FORMAT=clang-format -style llvm -sort-includes=0 -i
 24 | AFL_PATH?=/usr/local/bin
 25 | 
 26 | .PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl libFuzzer lint
 27 | 
 28 | all: cmake_build man/man3/cmark.3
 29 | 
 30 | $(CMARK): cmake_build
 31 | 
 32 | cmake_build: $(BUILDDIR)
 33 | 	cmake --build $(BUILDDIR)
 34 | 	@echo "Binaries can be found in $(BUILDDIR)/src"
 35 | 
 36 | $(BUILDDIR):
 37 | 	@cmake --version > /dev/null || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1)
 38 | 	cmake \
 39 | 	    -S . -B $(BUILDDIR) -G "$(GENERATOR)" \
 40 | 	    -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
 41 | 	    -DCMAKE_INSTALL_PREFIX=$(INSTALL_PREFIX) \
 42 | 	    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
 43 | 	    -DBUILD_SHARED_LIBS=YES
 44 | 
 45 | install: $(BUILDDIR)
 46 | 	cmake --install $(BUILDDIR)
 47 | 
 48 | uninstall: $(BUILDDIR)/install_manifest.txt
 49 | 	xargs rm < $<
 50 | 
 51 | debug:
 52 | 	cmake \
 53 | 	    -S . -B $(BUILDDIR) -G "$(GENERATOR)" \
 54 | 	    -DCMAKE_BUILD_TYPE=Debug \
 55 | 	    -DBUILD_SHARED_LIBS=YES
 56 | 	cmake --build $(BUILDDIR)
 57 | 
 58 | ubsan:
 59 | 	cmake \
 60 | 	    -S . -B $(BUILDDIR) -G "$(GENERATOR)" \
 61 | 	    -DCMAKE_BUILD_TYPE=Ubsan
 62 | 	cmake --build $(BUILDDIR)
 63 | 
 64 | asan:
 65 | 	cmake \
 66 | 	    -S . -B $(BUILDDIR) -G "$(GENERATOR)" \
 67 | 	    -DCMAKE_BUILD_TYPE=Asan
 68 | 	cmake --build $(BUILDDIR)
 69 | 
 70 | prof:
 71 | 	cmake \
 72 | 	    -S . -B $(BUILDDIR) -G "$(GENERATOR)" \
 73 | 	    -DCMAKE_BUILD_TYPE=Profile
 74 | 	cmake --build $(BUILDDIR)
 75 | 
 76 | afl:
 77 | 	@[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; }
 78 | 	cmake \
 79 | 	    -S . -B $(BUILDDIR) -G "$(GENERATOR)" \
 80 | 	    -DBUILD_TESTING=NO \
 81 | 	    -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-clang
 82 | 	cmake --build $(BUILDDIR)
 83 | 	$(AFL_PATH)/afl-fuzz \
 84 | 	    -i fuzz/afl_test_cases \
 85 | 	    -o fuzz/afl_results \
 86 | 	    -x fuzz/dictionary \
 87 | 	    -t 100 \
 88 | 	    $(CMARK) $(CMARK_OPTS)
 89 | 
 90 | libFuzzer:
 91 | 	cmake \
 92 | 	    -S . -B $(BUILDDIR) -G "$(GENERATOR)" \
 93 | 	    -DCMAKE_C_COMPILER=clang \
 94 | 	    -DCMAKE_CXX_COMPILER=clang++ \
 95 | 	    -DCMAKE_BUILD_TYPE=Asan \
 96 | 	    -DCMARK_LIB_FUZZER=ON
 97 | 	cmake --build $(BUILDDIR)
 98 | 	mkdir -p fuzz/corpus
 99 | 	$(BUILDDIR)/fuzz/cmark-fuzz \
100 | 	    -dict=fuzz/dictionary \
101 | 	    -max_len=1000 \
102 | 	    -timeout=1 \
103 | 	    fuzz/corpus
104 | 
105 | lint: $(BUILDDIR)
106 | 	errs=0 ; \
107 | 	for f in `ls src/*.[ch] | grep -v "scanners.c"` ; \
108 | 	  do echo $$f ; clang-tidy -header-filter='^build/.*' -p=build -warnings-as-errors='*' $$f || errs=1 ; done ; \
109 | 	exit $$errs
110 | 
111 | mingw:
112 | 	cmake \
113 | 	    -S . -B $(MINGW_BUILDDIR) -G "$(GENERATOR)" \
114 | 	    -DCMAKE_TOOLCHAIN_FILE=toolchain-mingw32.cmake \
115 | 	    -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR)
116 | 	cmake --build $(MINGW_BUILDDIR)
117 | 	cmake --install $(MINGW_BUILDDIR)
118 | 
119 | man/man3/cmark.3: src/cmark.h | $(CMARK)
120 | 	python3 man/make_man_page.py $< > $@ \
121 | 
122 | archive:
123 | 	git archive --prefix=$(RELEASE)/ -o $(RELEASE).tar.gz HEAD
124 | 	git archive --prefix=$(RELEASE)/ -o $(RELEASE).zip HEAD
125 | 
126 | clean:
127 | 	rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR)
128 | 
129 | # We include case_fold.inc in the repository, so this shouldn't
130 | # normally need to be generated.
131 | $(SRCDIR)/case_fold.inc: $(DATADIR)/CaseFolding.txt
132 | 	python3 tools/make_case_fold_inc.py < $< > $@
133 | 
134 | # We include scanners.c in the repository, so this shouldn't
135 | # normally need to be generated.
136 | $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
137 | 	@case "$$(re2c -v)" in \
138 | 	    *\ 0.13.*|*\ 0.14|*\ 0.14.1) \
139 | 		echo "re2c >= 0.14.2 is required"; \
140 | 		false; \
141 | 		;; \
142 | 	esac
143 | 	re2c -W -Werror --case-insensitive -b -i --no-generation-date \
144 | 		-o $@ $<
145 | 	$(CLANG_FORMAT) $@
146 | 
147 | # We include entities.inc in the repository, so normally this
148 | # doesn't need to be regenerated:
149 | $(SRCDIR)/entities.inc: tools/make_entities_inc.py
150 | 	python3 $< > $@
151 | 
152 | update-spec:
153 | 	curl 'https://raw.githubusercontent.com/jgm/CommonMark/master/spec.txt'\
154 |  > $(SPEC)
155 | 
156 | test: cmake_build
157 | 	ctest --test-dir $(BUILDDIR) --output-on-failure || (cat $(BUILDDIR)/Testing/Temporary/LastTest.log && exit 1)
158 | 
159 | $(ALLTESTS):
160 | 	python3 test/spec_tests.py --spec $(SPEC) --dump-tests | python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))' > $@
161 | 
162 | leakcheck: $(ALLTESTS)
163 | 	for format in html man xml latex commonmark; do \
164 | 	  for opts in "" "--smart"; do \
165 | 	     echo "cmark -t $$format $$opts" ; \
166 | 	     valgrind -q --leak-check=full --dsymutil=yes --error-exitcode=1 $(PROG) -t $$format $$opts $(ALLTESTS) >/dev/null || exit 1;\
167 |           done; \
168 | 	done;
169 | 
170 | fuzztest:
171 | 	{ for i in `seq 1 10`; do \
172 | 	  cat /dev/urandom | head -c $(FUZZCHARS) | iconv -f latin1 -t utf-8 | tee fuzz-$$i.txt | \
173 | 		/usr/bin/env time -p $(PROG) >/dev/null && rm fuzz-$$i.txt ; \
174 | 	done } 2>&1 | grep 'user\|abnormally'
175 | 
176 | progit:
177 | 	git clone https://github.com/progit/progit.git
178 | 
179 | $(BENCHFILE): progit
180 | 	echo "" > $@
181 | 	for lang in ar az be ca cs de en eo es es-ni fa fi fr hi hu id it ja ko mk nl no-nb pl pt-br ro ru sr th tr uk vi zh zh-tw; do \
182 | 	cat progit/$$lang/*/*.markdown >> $@; \
183 | 	done
184 | 
185 | # for more accurate results, run with
186 | # sudo renice -10 $$; make bench
187 | bench: $(BENCHFILE)
188 | 	{ for x in `seq 1 $(NUMRUNS)` ; do \
189 | 		/usr/bin/env time -p $(PROG) </dev/null >/dev/null ; \
190 | 		/usr/bin/env time -p $(PROG) $< >/dev/null ; \
191 | 		done \
192 | 	} 2>&1  | grep 'real' | awk '{print $$2}' | python3 'bench/stats.py'
193 | 
194 | newbench:
195 | 	for f in $(BENCHSAMPLES) ; do \
196 | 	  printf "%26s  " `basename $$f` ; \
197 | 	  { for x in `seq 1 $(NUMRUNS)` ; do \
198 | 		/usr/bin/env time -p $(PROG) </dev/null >/dev/null ; \
199 | 		for x in `seq 1 200` ; do cat $$f ; done | \
200 | 		  /usr/bin/env time -p $(PROG) > /dev/null; \
201 | 		done \
202 | 	  } 2>&1  | grep 'real' | awk '{print $$2}' | \
203 | 	    python3 'bench/stats.py'; done
204 | 
205 | format:
206 | 	$(CLANG_FORMAT) src/*.c src/*.h api_test/*.c api_test/*.h
207 | 
208 | operf: $(CMARK)
209 | 	operf $< < $(BENCHFILE) > /dev/null
210 | 
211 | distclean: clean
212 | 	-rm -rf *.dSYM
213 | 	-rm -f README.html
214 | 	-rm -rf $(BENCHFILE) $(ALLTESTS) progit
215 | 


--------------------------------------------------------------------------------
/Makefile.nmake:
--------------------------------------------------------------------------------
 1 | SRCDIR=src
 2 | DATADIR=data
 3 | BUILDDIR=build
 4 | INSTALLDIR=windows
 5 | PROG=$(BUILDDIR)\src\cmark.exe
 6 | GENERATOR=NMake Makefiles
 7 | 
 8 | all: $(BUILDDIR)/CMakeFiles
 9 | 	cmake --build $(BUILDDIR)
10 | 
11 | $(BUILDDIR)/CMakeFiles:
12 | 	cmake \
13 | 	    -S . -B $(BUILDDIR) -G "$(GENERATOR)" \
14 | 	    -D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \
15 | 	    -D CMAKE_INSTALL_PREFIX=$(INSTALLDIR)
16 | 
17 | install: all
18 | 	cmake --install $(BUILDDIR)
19 | 
20 | clean:
21 | 	-rmdir /s /q $(BUILDDIR) $(MINGW_INSTALLDIR) 2> nul
22 | 
23 | test: all
24 | 	ctest --test-dir $(BUILDDIR) --output-on-failure
25 | 
26 | distclean: clean
27 | 	del /q src\scanners.c 2> nul
28 | 	del /q spec.md spec.html 2> nul
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | cmark
  2 | =====
  3 | 
  4 | [![CI
  5 | tests](https://github.com/commonmark/cmark/workflows/CI%20tests/badge.svg)](https://github.com/commonmark/cmark/actions)
  6 | 
  7 | `cmark` is the C reference implementation of [CommonMark], a
  8 | rationalized version of Markdown syntax with a [spec][the spec].
  9 | (For the JavaScript reference implementation, see
 10 | [commonmark.js].)
 11 | 
 12 | It provides a shared library (`libcmark`) with functions for parsing
 13 | CommonMark documents to an abstract syntax tree (AST), manipulating
 14 | the AST, and rendering the document to HTML, groff man, LaTeX,
 15 | CommonMark, or an XML representation of the AST.  It also provides a
 16 | command-line program (`cmark`) for parsing and rendering CommonMark
 17 | documents.
 18 | 
 19 | Advantages of this library:
 20 | 
 21 | - **Portable.**  The library and program are written in standard
 22 |   C99 and have no external dependencies.  They have been tested with
 23 |   MSVC, gcc, tcc, and clang.
 24 | 
 25 | - **Fast.** cmark can render a Markdown version of *War and Peace* in
 26 |   the blink of an eye (127 milliseconds on a ten year old laptop,
 27 |   vs. 100-400 milliseconds for an eye blink).  In our [benchmarks],
 28 |   cmark is 10,000 times faster than the original `Markdown.pl`, and
 29 |   on par with the very fastest available Markdown processors.
 30 | 
 31 | - **Accurate.** The library passes all CommonMark conformance tests.
 32 | 
 33 | - **Standardized.** The library can be expected to parse CommonMark
 34 |   the same way as any other conforming parser.  So, for example,
 35 |   you can use `commonmark.js` on the client to preview content that
 36 |   will be rendered on the server using `cmark`.
 37 | 
 38 | - **Robust.** The library has been extensively fuzz-tested using
 39 |   [american fuzzy lop].  The test suite includes pathological cases
 40 |   that bring many other Markdown parsers to a crawl (for example,
 41 |   thousands-deep nested bracketed text or block quotes).
 42 | 
 43 | - **Flexible.** CommonMark input is parsed to an AST which can be
 44 |   manipulated programmatically prior to rendering.
 45 | 
 46 | - **Multiple renderers.**  Output in HTML, groff man, LaTeX, CommonMark,
 47 |   and a custom XML format is supported. And it is easy to write new
 48 |   renderers to support other formats.
 49 | 
 50 | - **Free.** BSD2-licensed.
 51 | 
 52 | It is easy to use `libcmark` in python, lua, ruby, and other dynamic
 53 | languages: see the `wrappers/` subdirectory for some simple examples.
 54 | 
 55 | There are also libraries that wrap `libcmark` for
 56 | [Go](https://github.com/rhinoman/go-commonmark),
 57 | [Haskell](https://hackage.haskell.org/package/cmark),
 58 | [Ruby](https://github.com/gjtorikian/commonmarker),
 59 | [Lua](https://github.com/jgm/cmark-lua),
 60 | [Perl](https://metacpan.org/release/CommonMark),
 61 | [Python](https://pypi.python.org/pypi/paka.cmark),
 62 | [R](https://cran.r-project.org/package=commonmark),
 63 | [Scala](https://github.com/sparsetech/cmark-scala) and
 64 | [PHP](https://www.php.net/manual/en/book.cmark.php).
 65 | 
 66 | Installing
 67 | ----------
 68 | 
 69 | Building the C program (`cmark`) and shared library (`libcmark`)
 70 | requires [cmake].  If you modify `scanners.re`, then you will also
 71 | need [re2c] \(>= 0.14.2\), which is used to generate `scanners.c` from
 72 | `scanners.re`.  We have included a pre-generated `scanners.c` in
 73 | the repository to reduce build dependencies.
 74 | 
 75 | If you have GNU make, you can simply `make`, `make test`, and `make
 76 | install`.  This calls [cmake] to create a `Makefile` in the `build`
 77 | directory, then uses that `Makefile` to create the executable and
 78 | library.  The binaries can be found in `build/src`.  The default
 79 | installation prefix is `/usr/local`.  To change the installation
 80 | prefix, pass the `INSTALL_PREFIX` variable if you run `make` for the
 81 | first time: `make INSTALL_PREFIX=path`.
 82 | 
 83 | For a more portable method, you can use [cmake] manually. [cmake] knows
 84 | how to create build environments for many build systems.  For example,
 85 | on FreeBSD:
 86 | 
 87 |     cmake -S . -B build  # optionally: -DCMAKE_INSTALL_PREFIX=path
 88 |     cmake --build build  # executable will be created as build/src/cmark
 89 |     ctest --test-dir build
 90 |     cmake --install build
 91 | 
 92 | Or, to create Xcode project files on OSX:
 93 | 
 94 |     cmake -S . -B build -G Xcode
 95 |     open build/cmark.xcodeproj
 96 | 
 97 | The GNU Makefile also provides a few other targets for developers.
 98 | To run a benchmark:
 99 | 
100 |     make bench
101 | 
102 | For more detailed benchmarks:
103 | 
104 |     make newbench
105 | 
106 | To run a test for memory leaks using `valgrind`:
107 | 
108 |     make leakcheck
109 | 
110 | To reformat source code using `clang-format`:
111 | 
112 |     make format
113 | 
114 | To run a "fuzz test" against ten long randomly generated inputs:
115 | 
116 |     make fuzztest
117 | 
118 | To do a more systematic fuzz test with [american fuzzy lop]:
119 | 
120 |     AFL_PATH=/path/to/afl_directory make afl
121 | 
122 | Fuzzing with [libFuzzer] is also supported. The fuzzer can be run with:
123 | 
124 |     make libFuzzer
125 | 
126 | To make a release tarball and zip archive:
127 | 
128 |     make archive
129 | 
130 | Installing (Windows)
131 | --------------------
132 | 
133 | To compile with MSVC and NMAKE:
134 | 
135 |     nmake /f Makefile.nmake
136 | 
137 | You can cross-compile a Windows binary and dll on linux if you have the
138 | `mingw32` compiler:
139 | 
140 |     make mingw
141 | 
142 | The binaries will be in `build-mingw/windows/bin`.
143 | 
144 | Usage
145 | -----
146 | 
147 | Instructions for the use of the command line program and library can
148 | be found in the man pages in the `man` subdirectory.
149 | 
150 | Security
151 | --------
152 | 
153 | By default, the library will scrub raw HTML and potentially
154 | dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).
155 | 
156 | To allow these, use the option `CMARK_OPT_UNSAFE` (or
157 | `--unsafe`) with the command line program. If doing so, we
158 | recommend you use a HTML sanitizer specific to your needs to
159 | protect against [XSS
160 | attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
161 | 
162 | Contributing
163 | ------------
164 | 
165 | There is a [forum for discussing
166 | CommonMark](http://talk.commonmark.org); you should use it instead of
167 | github issues for questions and possibly open-ended discussions.
168 | Use the [github issue tracker](http://github.com/commonmark/CommonMark/issues)
169 | only for simple, clear, actionable issues.
170 | 
171 | Authors
172 | -------
173 | 
174 | John MacFarlane wrote the original library and program.
175 | The block parsing algorithm was worked out together with David
176 | Greenspan. Vicent Marti optimized the C implementation for
177 | performance, increasing its speed tenfold.  Kārlis Gaņģis helped
178 | work out a better parsing algorithm for links and emphasis,
179 | eliminating several worst-case performance issues.
180 | Nick Wellnhofer contributed many improvements, including
181 | most of the C library's API and its test harness.
182 | 
183 | [benchmarks]: benchmarks.md
184 | [the spec]: http://spec.commonmark.org
185 | [CommonMark]: http://commonmark.org
186 | [cmake]: http://www.cmake.org/download/
187 | [re2c]: http://re2c.org
188 | [commonmark.js]: https://github.com/commonmark/commonmark.js
189 | [american fuzzy lop]: http://lcamtuf.coredump.cx/afl/
190 | [libFuzzer]: http://llvm.org/docs/LibFuzzer.html
191 | 


--------------------------------------------------------------------------------
/api_test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(api_test
 2 |   cplusplus.cpp
 3 |   harness.c
 4 |   harness.h
 5 |   main.c
 6 | )
 7 | cmark_add_compile_options(api_test)
 8 | target_link_libraries(api_test PRIVATE
 9 |   cmark)
10 | 
11 | add_test(NAME api_test COMMAND api_test)
12 | if(WIN32)
13 |   set_tests_properties(api_test PROPERTIES
14 |     ENVIRONMENT "PATH=$<TARGET_FILE_DIR:cmark>$<SEMICOLON>$ENV{PATH}")
15 | endif()
16 | 
17 | 


--------------------------------------------------------------------------------
/api_test/cplusplus.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | 
 3 | #include "cmark.h"
 4 | #include "cplusplus.h"
 5 | #include "harness.h"
 6 | 
 7 | void
 8 | test_cplusplus(test_batch_runner *runner)
 9 | {
10 |     static const char md[] = "paragraph\n";
11 |     char *html = cmark_markdown_to_html(md, sizeof(md) - 1, CMARK_OPT_DEFAULT);
12 |     STR_EQ(runner, html, "<p>paragraph</p>\n", "libcmark works with C++");
13 |     free(html);
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/api_test/cplusplus.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_API_TEST_CPLUSPLUS_H
 2 | #define CMARK_API_TEST_CPLUSPLUS_H
 3 | 
 4 | #include "harness.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | void test_cplusplus(test_batch_runner *runner);
11 | 
12 | #ifdef __cplusplus
13 | }
14 | #endif
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/api_test/harness.c:
--------------------------------------------------------------------------------
 1 | #include <stdarg.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | 
 6 | #include "harness.h"
 7 | 
 8 | test_batch_runner *test_batch_runner_new(void) {
 9 |   return (test_batch_runner *)calloc(1, sizeof(test_batch_runner));
10 | }
11 | 
12 | static void test_result(test_batch_runner *runner, int cond, const char *msg,
13 |                         va_list ap) {
14 |   ++runner->test_num;
15 | 
16 |   if (cond) {
17 |     ++runner->num_passed;
18 |   } else {
19 |     fprintf(stderr, "FAILED test %d: ", runner->test_num);
20 |     vfprintf(stderr, msg, ap);
21 |     fprintf(stderr, "\n");
22 |     ++runner->num_failed;
23 |   }
24 | }
25 | 
26 | void SKIP(test_batch_runner *runner, int num_tests) {
27 |   runner->test_num += num_tests;
28 |   runner->num_skipped += num_tests;
29 | }
30 | 
31 | void OK(test_batch_runner *runner, int cond, const char *msg, ...) {
32 |   va_list ap;
33 |   va_start(ap, msg);
34 |   test_result(runner, cond, msg, ap);
35 |   va_end(ap);
36 | }
37 | 
38 | void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg,
39 |             ...) {
40 |   int cond = got == expected;
41 | 
42 |   va_list ap;
43 |   va_start(ap, msg);
44 |   test_result(runner, cond, msg, ap);
45 |   va_end(ap);
46 | 
47 |   if (!cond) {
48 |     fprintf(stderr, "  Got:      %d\n", got);
49 |     fprintf(stderr, "  Expected: %d\n", expected);
50 |   }
51 | }
52 | 
53 | void STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
54 |             const char *msg, ...) {
55 |   int cond = strcmp(got, expected) == 0;
56 | 
57 |   va_list ap;
58 |   va_start(ap, msg);
59 |   test_result(runner, cond, msg, ap);
60 |   va_end(ap);
61 | 
62 |   if (!cond) {
63 |     fprintf(stderr, "  Got:      \"%s\"\n", got);
64 |     fprintf(stderr, "  Expected: \"%s\"\n", expected);
65 |   }
66 | }
67 | 
68 | int test_ok(test_batch_runner *runner) { return runner->num_failed == 0; }
69 | 
70 | void test_print_summary(test_batch_runner *runner) {
71 |   int num_passed = runner->num_passed;
72 |   int num_skipped = runner->num_skipped;
73 |   int num_failed = runner->num_failed;
74 | 
75 |   fprintf(stderr, "%d tests passed, %d failed, %d skipped\n", num_passed,
76 |           num_failed, num_skipped);
77 | 
78 |   if (test_ok(runner)) {
79 |     fprintf(stderr, "PASS\n");
80 |   } else {
81 |     fprintf(stderr, "FAIL\n");
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/api_test/harness.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_API_TEST_HARNESS_H
 2 | #define CMARK_API_TEST_HARNESS_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | typedef struct {
 9 |   int test_num;
10 |   int num_passed;
11 |   int num_failed;
12 |   int num_skipped;
13 | } test_batch_runner;
14 | 
15 | test_batch_runner *test_batch_runner_new(void);
16 | 
17 | void SKIP(test_batch_runner *runner, int num_tests);
18 | 
19 | void OK(test_batch_runner *runner, int cond, const char *msg, ...);
20 | 
21 | void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg,
22 |             ...);
23 | 
24 | void STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
25 |             const char *msg, ...);
26 | 
27 | int test_ok(test_batch_runner *runner);
28 | 
29 | void test_print_summary(test_batch_runner *runner);
30 | 
31 | #ifdef __cplusplus
32 | }
33 | #endif
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/bench/samples/block-bq-flat.md:
--------------------------------------------------------------------------------
 1 | > the simple example of a blockquote 
 2 | > the simple example of a blockquote
 3 | > the simple example of a blockquote
 4 | > the simple example of a blockquote
 5 | ... continuation
 6 | ... continuation
 7 | ... continuation
 8 | ... continuation
 9 | 
10 | empty blockquote:
11 | 
12 | >
13 | >
14 | >
15 | >
16 | 
17 | 


--------------------------------------------------------------------------------
/bench/samples/block-bq-nested.md:
--------------------------------------------------------------------------------
 1 | >>>>>> deeply nested blockquote
 2 | >>>>> deeply nested blockquote
 3 | >>>> deeply nested blockquote
 4 | >>> deeply nested blockquote
 5 | >> deeply nested blockquote
 6 | > deeply nested blockquote
 7 | 
 8 | > deeply nested blockquote
 9 | >> deeply nested blockquote
10 | >>> deeply nested blockquote
11 | >>>> deeply nested blockquote
12 | >>>>> deeply nested blockquote
13 | >>>>>> deeply nested blockquote
14 | 


--------------------------------------------------------------------------------
/bench/samples/block-code.md:
--------------------------------------------------------------------------------
 1 | 
 2 |         an
 3 |         example
 4 | 
 5 |         of
 6 | 
 7 | 
 8 | 
 9 |         a code
10 |         block
11 | 
12 | 


--------------------------------------------------------------------------------
/bench/samples/block-fences.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ``````````text
 3 | an
 4 | example
 5 | ```
 6 | of
 7 | 
 8 | 
 9 | a fenced
10 | ```
11 | code
12 | block
13 | ``````````
14 | 
15 | 


--------------------------------------------------------------------------------
/bench/samples/block-heading.md:
--------------------------------------------------------------------------------
 1 | # heading
 2 | ### heading
 3 | ##### heading
 4 | 
 5 | # heading #
 6 | ### heading ###
 7 | ##### heading \#\#\#\#\######
 8 | 
 9 | ############ not a heading
10 | 


--------------------------------------------------------------------------------
/bench/samples/block-hr.md:
--------------------------------------------------------------------------------
 1 | 
 2 |  * * * * *
 3 | 
 4 |  -  -  -  -  -
 5 | 
 6 |  ________
 7 | 
 8 | 
 9 |  ************************* text
10 | 
11 | 


--------------------------------------------------------------------------------
/bench/samples/block-html.md:
--------------------------------------------------------------------------------
 1 | <div class="this is an html block">
 2 | 
 3 | blah blah
 4 | 
 5 | </div>
 6 | 
 7 | <table>
 8 |   <tr>
 9 |     <td>
10 |       **test**
11 |     </td>
12 |   </tr>
13 | </table>
14 | 
15 | <table>
16 | 
17 |   <tr>
18 | 
19 |     <td>
20 | 
21 |       test
22 | 
23 |     </td>
24 | 
25 |   </tr>
26 | 
27 | </table>
28 | 
29 | <![CDATA[
30 |   [[[[[[[[[[[... *cdata section - this should not be parsed* ...]]]]]]]]]]]
31 | ]]>
32 | 
33 | 


--------------------------------------------------------------------------------
/bench/samples/block-lheading.md:
--------------------------------------------------------------------------------
1 | heading
2 | ---
3 | 
4 | heading
5 | ===================================
6 | 
7 | not a heading
8 | ----------------------------------- text
9 | 


--------------------------------------------------------------------------------
/bench/samples/block-list-flat.md:
--------------------------------------------------------------------------------
 1 |  - tidy
 2 |  - bullet
 3 |  - list
 4 | 
 5 | 
 6 |  - loose
 7 | 
 8 |  - bullet
 9 | 
10 |  - list
11 | 
12 | 
13 |  0. ordered
14 |  1. list
15 |  2. example
16 | 
17 | 
18 |  -
19 |  -
20 |  -
21 |  -
22 | 
23 | 
24 |  1.
25 |  2.
26 |  3.
27 | 
28 | 
29 |  -  an example
30 | of a list item
31 |        with a continuation
32 | 
33 |     this part is inside the list
34 | 
35 |    this part is just a paragraph  
36 | 
37 | 
38 |  1. test
39 |  -  test
40 |  1. test
41 |  -  test
42 | 
43 | 
44 | 111111111111111111111111111111111111111111. is this a valid bullet?
45 | 
46 |  - _________________________
47 | 
48 |  - this
49 |  - is
50 | 
51 |    a
52 | 
53 |    long
54 |  - loose
55 |  - list
56 | 
57 |  - with
58 |  - some
59 | 
60 |    tidy
61 | 
62 |  - list
63 |  - items
64 |  - in
65 | 
66 |  - between
67 |  - _________________________
68 | 


--------------------------------------------------------------------------------
/bench/samples/block-list-nested.md:
--------------------------------------------------------------------------------
 1 | 
 2 |  - this
 3 |    - is
 4 |      - a
 5 |        - deeply
 6 |          - nested
 7 |            - bullet
 8 |              - list
 9 |    
10 | 
11 |  1. this
12 |     2. is
13 |        3. a
14 |           4. deeply
15 |              5. nested
16 |                 6. unordered
17 |                    7. list
18 | 
19 | 
20 |  - 1
21 |   - 2
22 |    - 3
23 |     - 4
24 |      - 5
25 |       - 6
26 |        - 7
27 |       - 6
28 |      - 5
29 |     - 4
30 |    - 3
31 |   - 2
32 |  - 1
33 | 
34 | 
35 |  - - - - - - - - - deeply-nested one-element item
36 | 
37 | 


--------------------------------------------------------------------------------
/bench/samples/block-ref-flat.md:
--------------------------------------------------------------------------------
 1 | [1] [2] [3] [1] [2] [3]
 2 | 
 3 | [looooooooooooooooooooooooooooooooooooooooooooooooooong label]
 4 | 
 5 |  [1]: <http://something.example.com/foo/bar>
 6 |  [2]: http://something.example.com/foo/bar 'test'
 7 |  [3]:
 8 |  http://foo/bar
 9 |  [    looooooooooooooooooooooooooooooooooooooooooooooooooong   label    ]:
10 |  111
11 |  'test'
12 |  [[[[[[[[[[[[[[[[[[[[ this should not slow down anything ]]]]]]]]]]]]]]]]]]]]: q
13 |  (as long as it is not referenced anywhere)
14 | 
15 |  [[[[[[[[[[[[[[[[[[[[]: this is not a valid reference
16 | 


--------------------------------------------------------------------------------
/bench/samples/block-ref-nested.md:
--------------------------------------------------------------------------------
 1 | [[[[[[[foo]]]]]]]
 2 | 
 3 | [[[[[[[foo]]]]]]]: bar
 4 | [[[[[[foo]]]]]]: bar
 5 | [[[[[foo]]]]]: bar
 6 | [[[[foo]]]]: bar
 7 | [[[foo]]]: bar
 8 | [[foo]]: bar
 9 | [foo]: bar
10 | 
11 | [*[*[*[*[foo]*]*]*]*]
12 | 
13 | [*[*[*[*[foo]*]*]*]*]: bar
14 | [*[*[*[foo]*]*]*]: bar
15 | [*[*[foo]*]*]: bar
16 | [*[foo]*]: bar
17 | [foo]: bar
18 | 


--------------------------------------------------------------------------------
/bench/samples/inline-autolink.md:
--------------------------------------------------------------------------------
 1 | closed (valid) autolinks:
 2 | 
 3 |  <ftp://1.2.3.4:21/path/foo>
 4 |  <http://foo.bar.baz?q=hello&id=22&boolean>
 5 |  <http://veeeeeeeeeeeeeeeeeeery.loooooooooooooooooooooooooooooooong.autolink/>
 6 |  <teeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeest@gmail.com>
 7 | 
 8 | these are not autolinks:
 9 | 
10 |  <ftp://1.2.3.4:21/path/foo
11 |  <http://foo.bar.baz?q=hello&id=22&boolean
12 |  <http://veeeeeeeeeeeeeeeeeeery.loooooooooooooooooooooooooooooooong.autolink
13 |  <teeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeest@gmail.com
14 |  < http://foo.bar.baz?q=hello&id=22&boolean >
15 | 


--------------------------------------------------------------------------------
/bench/samples/inline-backticks.md:
--------------------------------------------------------------------------------
1 | `lots`of`backticks`
2 | 
3 | ``i``wonder``how``this``will``be``parsed``
4 | 


--------------------------------------------------------------------------------
/bench/samples/inline-em-flat.md:
--------------------------------------------------------------------------------
1 | *this* *is* *your* *basic* *boring* *emphasis*
2 | 
3 | _this_ _is_ _your_ _basic_ _boring_ _emphasis_
4 | 
5 | **this** **is** **your** **basic** **boring** **emphasis**
6 | 


--------------------------------------------------------------------------------
/bench/samples/inline-em-nested.md:
--------------------------------------------------------------------------------
1 | *this *is *a *bunch* of* nested* emphases* 
2 | 
3 | __this __is __a __bunch__ of__ nested__ emphases__ 
4 | 
5 | ***this ***is ***a ***bunch*** of*** nested*** emphases*** 
6 | 


--------------------------------------------------------------------------------
/bench/samples/inline-em-worst.md:
--------------------------------------------------------------------------------
1 | *this *is *a *worst *case *for *em *backtracking
2 | 
3 | __this __is __a __worst __case __for __em __backtracking
4 | 
5 | ***this ***is ***a ***worst ***case ***for ***em ***backtracking
6 | 


--------------------------------------------------------------------------------
/bench/samples/inline-entity.md:
--------------------------------------------------------------------------------
 1 | entities:
 2 | 
 3 | &nbsp; &amp; &copy; &AElig; &Dcaron; &frac34; &HilbertSpace; &DifferentialD; &ClockwiseContourIntegral;
 4 | 
 5 | &#35; &#1234; &#992; &#98765432;
 6 | 
 7 | non-entities:
 8 | 
 9 | &18900987654321234567890; &1234567890098765432123456789009876543212345678987654;
10 | 
11 | &qwertyuioppoiuytrewqwer; &oiuytrewqwertyuioiuytrewqwertyuioytrewqwertyuiiuytri;
12 | 


--------------------------------------------------------------------------------
/bench/samples/inline-escape.md:
--------------------------------------------------------------------------------
 1 | 
 2 | \t\e\s\t\i\n\g \e\s\c\a\p\e \s\e\q\u\e\n\c\e\s
 3 | 
 4 | \!\\\"\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\?
 5 | 
 6 | \@ \[ \] \^ \_ \` \{ \| \} \~ \- \'
 7 | 
 8 | \
 9 | \\
10 | \\\
11 | \\\\
12 | \\\\\
13 | 
14 | \<this\> \<is\> \<not\> \<html\>
15 | 
16 | 


--------------------------------------------------------------------------------
/bench/samples/inline-html.md:
--------------------------------------------------------------------------------
 1 | Taking commonmark tests from the spec for benchmarking here:
 2 | 
 3 | <a><bab><c2c>
 4 | 
 5 | <a/><b2/>
 6 | 
 7 | <a  /><b2
 8 | data="foo" >
 9 | 
10 | <a foo="bar" bam = 'baz <em>"</em>'
11 | _boolean zoop:33=zoop:33 />
12 | 
13 | <33> <__>
14 | 
15 | <a h*#ref="hi">
16 | 
17 | <a href="hi'> <a href=hi'>
18 | 
19 | < a><
20 | foo><bar/ >
21 | 
22 | <a href='bar'title=title>
23 | 
24 | </a>
25 | </foo >
26 | 
27 | </a href="foo">
28 | 
29 | foo <!-- this is a
30 | comment - with hyphen -->
31 | 
32 | foo <!-- not a comment -- two hyphens -->
33 | 
34 | foo <?php echo $a; ?>
35 | 
36 | foo <!ELEMENT br EMPTY>
37 | 
38 | foo <![CDATA[>&<]]>
39 | 
40 | <a href="&ouml;">
41 | 
42 | <a href="\*">
43 | 
44 | <a href="\"">
45 | 


--------------------------------------------------------------------------------
/bench/samples/inline-links-flat.md:
--------------------------------------------------------------------------------
 1 | Valid links:
 2 | 
 3 |  [this is a link]()
 4 |  [this is a link](<http://something.example.com/foo/bar>)
 5 |  [this is a link](http://something.example.com/foo/bar 'test')
 6 |  ![this is an image]()
 7 |  ![this is an image](<http://something.example.com/foo/bar>)
 8 |  ![this is an image](http://something.example.com/foo/bar 'test')
 9 |  
10 |  [escape test](<\>\>\>\>\>\>\>\>\>\>\>\>\>\>> '\'\'\'\'\'\'\'\'\'\'\'\'\'\'')
11 |  [escape test \]\]\]\]\]\]\]\]\]\]\]\]\]\]\]\]](\)\)\)\)\)\)\)\)\)\)\)\)\)\))
12 | 
13 | Invalid links:
14 | 
15 |  [this is not a link
16 | 
17 |  [this is not a link](
18 | 
19 |  [this is not a link](http://something.example.com/foo/bar 'test'
20 |  
21 |  [this is not a link](((((((((((((((((((((((((((((((((((((((((((((((
22 |  
23 |  [this is not a link]((((((((((()))))))))) (((((((((()))))))))))
24 | 


--------------------------------------------------------------------------------
/bench/samples/inline-links-nested.md:
--------------------------------------------------------------------------------
 1 | Valid links:
 2 | 
 3 | [[[[[[[[](test)](test)](test)](test)](test)](test)](test)]
 4 | 
 5 | [ [[[[[[[[[[[[[[[[[[ [](test) ]]]]]]]]]]]]]]]]]] ](test)
 6 | 
 7 | Invalid links:
 8 | 
 9 | [[[[[[[[[
10 | 
11 | [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [
12 | 
13 | ![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![
14 | 


--------------------------------------------------------------------------------
/bench/samples/inline-newlines.md:
--------------------------------------------------------------------------------
 1 | 
 2 | this\
 3 | should\
 4 | be\
 5 | separated\
 6 | by\
 7 | newlines
 8 | 
 9 | this  
10 | should  
11 | be  
12 | separated  
13 | by  
14 | newlines  
15 | too
16 | 
17 | this
18 | should
19 | not
20 | be
21 | separated
22 | by
23 | newlines
24 | 
25 | 


--------------------------------------------------------------------------------
/bench/samples/lorem1.md:
--------------------------------------------------------------------------------
 1 | Lorem ipsum dolor sit amet, __consectetur__ adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
 2 | 
 3 | Vivamus sagittis, diam in [vehicula](https://github.com/markdown-it/markdown-it) lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
 4 | 
 5 | Nullam ut tincidunt nunc. [Pellentesque][1] metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
 6 | 
 7 | Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
 8 | 
 9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
10 | 
11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique.
12 | 
13 | [1]: https://github.com/markdown-it
14 | 


--------------------------------------------------------------------------------
/bench/samples/rawtabs.md:
--------------------------------------------------------------------------------
 1 | 
 2 | this is a test for tab expansion, be careful not to replace them with spaces
 3 | 
 4 | 1	4444
 5 | 22	333
 6 | 333	22
 7 | 4444	1
 8 | 
 9 | 
10 | 	tab-indented line
11 |     space-indented line
12 | 	tab-indented line
13 | 
14 | 
15 | a lot of                                                spaces in between here
16 | 
17 | a lot of												tabs in between here
18 | 
19 | 


--------------------------------------------------------------------------------
/bench/stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | import statistics
 5 | 
 6 | def pairs(l, n):
 7 |         return zip(*[l[i::n] for i in range(n)])
 8 | 
 9 | # data comes in pairs:
10 | #    n - time for running the program with no input
11 | #    m - time for running it with the benchmark input
12 | # we measure (m - n)
13 | 
14 | values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)]
15 | 
16 | print("mean = %.4f, median = %.4f, stdev = %.4f" %
17 |     (statistics.mean(values), statistics.median(values),
18 |       statistics.stdev(values)))
19 | 
20 | 


--------------------------------------------------------------------------------
/benchmarks.md:
--------------------------------------------------------------------------------
 1 | # Benchmarks
 2 | 
 3 | Here are some benchmarks, run on a 2.3GHz 8-core i9 macbook pro.
 4 | The input text is a 1106 KB Markdown file built by concatenating
 5 | the Markdown sources of all the localizations of the first edition
 6 | of [*Pro Git*](https://github.com/progit/progit/tree/master/en) by
 7 | Scott Chacon.
 8 | 
 9 | |Implementation     |  Time (sec)|
10 | |-------------------|-----------:|
11 | | **commonmark.js** |    0.59    |
12 | | **cmark**         |    0.12    |
13 | | **md4c**          |    0.04    |
14 | 
15 | To run these benchmarks, use `make bench PROG=/path/to/program`.
16 | 
17 | `time` is used to measure execution speed.  The reported
18 | time is the *difference* between the time to run the program
19 | with the benchmark input and the time to run it with no input.
20 | (This procedure ensures that implementations in dynamic languages are
21 | not penalized by startup time.) A median of ten runs is taken.  The
22 | process is reniced to a high priority so that the system doesn't
23 | interrupt runs.
24 | 


--------------------------------------------------------------------------------
/cmake/modules/FindAsan.cmake:
--------------------------------------------------------------------------------
 1 | #
 2 | # The MIT License (MIT)
 3 | #
 4 | # Copyright (c) 2013 Matthew Arsenault
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | # THE SOFTWARE.
23 | #
24 | # This module tests if address sanitizer is supported by the compiler,
25 | # and creates a ASan build type (i.e. set CMAKE_BUILD_TYPE=ASan to use
26 | # it). This sets the following variables:
27 | #
28 | # CMAKE_C_FLAGS_ASAN - Flags to use for C with asan
29 | # CMAKE_CXX_FLAGS_ASAN  - Flags to use for C++ with asan
30 | # HAVE_ADDRESS_SANITIZER - True or false if the ASan build type is available
31 | 
32 | include(CheckCCompilerFlag)
33 | 
34 | # Set -Werror to catch "argument unused during compilation" warnings
35 | set(CMAKE_REQUIRED_FLAGS "-Werror -faddress-sanitizer") # Also needs to be a link flag for test to pass
36 | check_c_compiler_flag("-faddress-sanitizer" HAVE_FLAG_ADDRESS_SANITIZER)
37 | 
38 | set(CMAKE_REQUIRED_FLAGS "-Werror -fsanitize=address") # Also needs to be a link flag for test to pass
39 | check_c_compiler_flag("-fsanitize=address" HAVE_FLAG_SANITIZE_ADDRESS)
40 | 
41 | unset(CMAKE_REQUIRED_FLAGS)
42 | 
43 | if(HAVE_FLAG_SANITIZE_ADDRESS)
44 |   # Clang 3.2+ use this version
45 |   set(ADDRESS_SANITIZER_FLAG "-fsanitize=address")
46 | elseif(HAVE_FLAG_ADDRESS_SANITIZER)
47 |   # Older deprecated flag for ASan
48 |   set(ADDRESS_SANITIZER_FLAG "-faddress-sanitizer")
49 | endif()
50 | 
51 | if(NOT ADDRESS_SANITIZER_FLAG)
52 |   return()
53 | else(NOT ADDRESS_SANITIZER_FLAG)
54 |   set(HAVE_ADDRESS_SANITIZER FALSE)
55 | endif()
56 | 
57 | set(HAVE_ADDRESS_SANITIZER TRUE)
58 | 
59 | set(CMAKE_C_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
60 |     CACHE STRING "Flags used by the C compiler during ASan builds."
61 |     FORCE)
62 | set(CMAKE_CXX_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
63 |     CACHE STRING "Flags used by the C++ compiler during ASan builds."
64 |     FORCE)
65 | set(CMAKE_EXE_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"
66 |     CACHE STRING "Flags used for linking binaries during ASan builds."
67 |     FORCE)
68 | set(CMAKE_SHARED_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"
69 |     CACHE STRING "Flags used by the shared libraries linker during ASan builds."
70 |     FORCE)
71 | mark_as_advanced(CMAKE_C_FLAGS_ASAN
72 |                  CMAKE_CXX_FLAGS_ASAN
73 |                  CMAKE_EXE_LINKER_FLAGS_ASAN
74 |                  CMAKE_SHARED_LINKER_FLAGS_ASAN)
75 | 


--------------------------------------------------------------------------------
/fuzz/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(cmark-fuzz cmark-fuzz.c)
2 | cmark_add_compile_options(cmark-fuzz)
3 | target_link_libraries(cmark-fuzz cmark)
4 | 


--------------------------------------------------------------------------------
/fuzz/afl_test_cases/test.md:
--------------------------------------------------------------------------------
 1 | # H1
 2 | 
 3 | H2
 4 | --
 5 | 
 6 | t ☺  
 7 | *b* **em** `c`
 8 | &ge;\&\
 9 | \_e\_
10 | 
11 | 4) I1
12 | 
13 | 5) I2
14 |    > [l](/u "t")
15 |    >
16 |    > - [f]
17 |    > - ![a](/u "t")
18 |    >
19 |    >> <ftp://hh>
20 |    >> <u@hh>
21 | 
22 | ~~~ l☺
23 | cb
24 | ~~~
25 | 
26 |     c1
27 |     c2
28 | 
29 | ***
30 | 
31 | <div>
32 | <b>x</b>
33 | </div>
34 | 
35 | [f]: /u "t"
36 | 
37 | 


--------------------------------------------------------------------------------
/fuzz/cmark-fuzz.c:
--------------------------------------------------------------------------------
 1 | /* for fmemopen */
 2 | #define _POSIX_C_SOURCE 200809L
 3 | 
 4 | #include <stdint.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | #include "cmark.h"
 9 | 
10 | int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
11 |   struct __attribute__((packed)) {
12 |     int options;
13 |     int width;
14 |   } fuzz_config;
15 | 
16 |   if (size >= sizeof(fuzz_config)) {
17 |     /* The beginning of `data` is treated as fuzzer configuration */
18 |     memcpy(&fuzz_config, data, sizeof(fuzz_config));
19 |     int options = fuzz_config.options;
20 | 
21 |     /* Mask off valid option bits */
22 |     options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
23 | 
24 |     /* Remainder of input is the markdown */
25 |     const char *markdown = (const char *)(data + sizeof(fuzz_config));
26 |     size_t markdown_size = size - sizeof(fuzz_config);
27 |     cmark_node *doc = NULL;
28 | 
29 |     /* Use upper bits of options to select parsing mode */
30 |     switch (((unsigned) fuzz_config.options >> 30) & 3) {
31 |       case 0:
32 |         doc = cmark_parse_document(markdown, markdown_size, options);
33 |         break;
34 | 
35 |       case 1:
36 |         if (markdown_size > 0) {
37 |           FILE *file = fmemopen((void *) markdown, markdown_size, "r");
38 |           doc = cmark_parse_file(file, options);
39 |           fclose(file);
40 |         }
41 |         break;
42 | 
43 |       case 2: {
44 |         size_t block_max = 20;
45 |         cmark_parser *parser = cmark_parser_new(options);
46 | 
47 |         while (markdown_size > 0) {
48 |           size_t block_size = markdown_size > block_max ? block_max : markdown_size;
49 |           cmark_parser_feed(parser, markdown, block_size);
50 |           markdown += block_size;
51 |           markdown_size -= block_size;
52 |         }
53 | 
54 |         doc = cmark_parser_finish(parser);
55 |         cmark_parser_free(parser);
56 |         break;
57 |       }
58 | 
59 |       case 3:
60 |         free(cmark_markdown_to_html(markdown, markdown_size, options));
61 |         break;
62 |     }
63 | 
64 |     if (doc != NULL) {
65 |       free(cmark_render_commonmark(doc, options, fuzz_config.width));
66 |       free(cmark_render_html(doc, options));
67 |       free(cmark_render_latex(doc, options, fuzz_config.width));
68 |       free(cmark_render_man(doc, options, fuzz_config.width));
69 |       free(cmark_render_xml(doc, options));
70 | 
71 |       cmark_node_free(doc);
72 |     }
73 |   }
74 |   return 0;
75 | }
76 | 


--------------------------------------------------------------------------------
/fuzz/dictionary:
--------------------------------------------------------------------------------
 1 | asterisk="*"
 2 | attr_generic=" a=\"1\""
 3 | attr_href=" href=\"1\""
 4 | attr_xml_lang=" xml:lang=\"1\""
 5 | attr_xmlns=" xmlns=\"1\""
 6 | backslash="\\"
 7 | backtick="`"
 8 | colon=":"
 9 | dashes="---"
10 | double_quote="\""
11 | entity_builtin="&lt;"
12 | entity_decimal=""
13 | entity_external="&a;"
14 | entity_hex=""
15 | equals="==="
16 | exclamation="!"
17 | greater_than=">"
18 | hash="#"
19 | hyphen="-"
20 | indent="  "
21 | left_bracket="["
22 | left_paren="("
23 | less_than="<"
24 | plus="+"
25 | right_bracket="]"
26 | right_paren=")"
27 | single_quote="'"
28 | string_any="ANY"
29 | string_brackets="[]"
30 | string_cdata="CDATA"
31 | string_dashes="--"
32 | string_empty_dblquotes="\"\""
33 | string_empty_quotes="''"
34 | string_idrefs="IDREFS"
35 | string_parentheses="()"
36 | string_pcdata="#PCDATA"
37 | tag_cdata="<![CDATA["
38 | tag_close="</a>"
39 | tag_doctype="<!DOCTYPE"
40 | tag_element="<!ELEMENT"
41 | tag_entity="<!ENTITY"
42 | tag_notation="<!NOTATION"
43 | tag_open="<a>"
44 | tag_open_close="<a />"
45 | tag_open_exclamation="<!"
46 | tag_open_q="<?"
47 | tag_sq2_close="]]>"
48 | tag_xml_q="<?xml?>"
49 | underscore="_"
50 | 


--------------------------------------------------------------------------------
/man/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark.1
2 |   DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
3 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark.3
4 |   DESTINATION ${CMAKE_INSTALL_MANDIR}/man3)
5 | 


--------------------------------------------------------------------------------
/man/make_man_page.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Creates a man page from a C file.
  4 | 
  5 | # Comments beginning with `/**` are treated as Groff man, except that
  6 | # 'this' is converted to \fIthis\f[], and ''this'' to \fBthis\f[].
  7 | 
  8 | # Non-blank lines immediately following a man page comment are treated
  9 | # as function signatures or examples and parsed into .Ft, .Fo, .Fa, .Fc. The
 10 | # immediately preceding man documentation chunk is printed after the example
 11 | # as a comment on it.
 12 | 
 13 | # That's about it!
 14 | 
 15 | import sys, re, os, platform
 16 | from datetime import date
 17 | from ctypes import CDLL, c_char_p, c_long, c_void_p
 18 | 
 19 | sysname = platform.system()
 20 | 
 21 | curdir = os.getcwd()
 22 | 
 23 | if sysname == 'Darwin':
 24 |     cmark = CDLL(curdir + "/build/src/libcmark.dylib")
 25 | else:
 26 |     cmark = CDLL(curdir + "/build/src/libcmark.so")
 27 | 
 28 | parse_document = cmark.cmark_parse_document
 29 | parse_document.restype = c_void_p
 30 | parse_document.argtypes = [c_char_p, c_long]
 31 | 
 32 | render_man = cmark.cmark_render_man
 33 | render_man.restype = c_char_p
 34 | render_man.argtypes = [c_void_p, c_long, c_long]
 35 | 
 36 | cmark_version_string = cmark.cmark_version_string
 37 | cmark_version_string.restype = c_char_p
 38 | cmark_version_string.argtypes = []
 39 | 
 40 | def md2man(text):
 41 |     if sys.version_info >= (3,0):
 42 |         textbytes = text.encode('utf-8')
 43 |         textlen = len(textbytes)
 44 |         return render_man(parse_document(textbytes, textlen), 0, 72).decode('utf-8')
 45 |     else:
 46 |         textbytes = text
 47 |         textlen = len(text)
 48 |         return render_man(parse_document(textbytes, textlen), 0, 72)
 49 | 
 50 | comment_start_re = re.compile(r'^\/\*\* ?')
 51 | comment_delim_re = re.compile(r'^[/ ]\** ?')
 52 | comment_end_re = re.compile(r'^ \**\/')
 53 | function_re = re.compile(r'^ *(?:CMARK_EXPORT\s+)?(?P<type>(?:const\s+)?\w+(?:\s*[*])?)\s*(?P<name>\w+)\s*\((?P<args>[^)]*)\)')
 54 | blank_re = re.compile(r'^\s*$')
 55 | macro_re = re.compile(r'CMARK_EXPORT *')
 56 | typedef_start_re = re.compile(r'typedef.*{$')
 57 | typedef_end_re = re.compile(r'}')
 58 | single_quote_re = re.compile(r"(?<!\w)'([^']+)'(?!\w)")
 59 | double_quote_re = re.compile(r"(?<!\w)''([^']+)''(?!\w)")
 60 | 
 61 | def handle_quotes(s):
 62 |     return re.sub(double_quote_re, r'**\g<1>**', re.sub(single_quote_re, r'*\g<1>*', s))
 63 | 
 64 | typedef = False
 65 | mdlines = []
 66 | chunk = []
 67 | sig = []
 68 | 
 69 | if len(sys.argv) > 1:
 70 |     sourcefile = sys.argv[1]
 71 | else:
 72 |     print("Usage:  make_man_page.py sourcefile")
 73 |     exit(1)
 74 | 
 75 | with open(sourcefile, 'r') as cmarkh:
 76 |     state = 'default'
 77 |     for line in cmarkh:
 78 |         # state transition
 79 |         oldstate = state
 80 |         if comment_start_re.match(line):
 81 |             state = 'man'
 82 |         elif comment_end_re.match(line) and state == 'man':
 83 |             continue
 84 |         elif comment_delim_re.match(line) and state == 'man':
 85 |             state = 'man'
 86 |         elif not typedef and blank_re.match(line):
 87 |             state = 'default'
 88 |         elif typedef and typedef_end_re.match(line):
 89 |             typedef = False
 90 |         elif typedef_start_re.match(line):
 91 |             typedef = True
 92 |             state = 'signature'
 93 |         elif state == 'man':
 94 |             state = 'signature'
 95 | 
 96 |         # handle line
 97 |         if state == 'man':
 98 |             chunk.append(handle_quotes(re.sub(comment_delim_re, '', line)))
 99 |         elif state == 'signature':
100 |             ln = re.sub(macro_re, '', line)
101 |             if typedef or not re.match(blank_re, ln):
102 |                 sig.append(ln)
103 |         elif oldstate == 'signature' and state != 'signature':
104 |             if len(mdlines) > 0 and mdlines[-1] != '\n':
105 |                 mdlines.append('\n')
106 |             rawsig = ''.join(sig)
107 |             m = function_re.match(rawsig)
108 |             mdlines.append('.PP\n')
109 |             if m:
110 |                 mdlines.append('\\fI' + m.group('type') + '\\f[]' + ' ')
111 |                 mdlines.append('\\fB' + m.group('name') + '\\f[]' + '(')
112 |                 first = True
113 |                 for argument in re.split(',', m.group('args')):
114 |                     if not first:
115 |                         mdlines.append(', ')
116 |                     first = False
117 |                     mdlines.append('\\fI' + argument.strip() + '\\f[]')
118 |                 mdlines.append(')\n')
119 |             else:
120 |                 mdlines.append('.nf\n\\fC\n.RS 0n\n')
121 |                 mdlines += sig
122 |                 mdlines.append('.RE\n\\f[]\n.fi\n')
123 |             if len(mdlines) > 0 and mdlines[-1] != '\n':
124 |                 mdlines.append('\n')
125 |             mdlines += md2man(''.join(chunk))
126 |             mdlines.append('\n')
127 |             chunk = []
128 |             sig = []
129 |         elif oldstate == 'man' and state != 'signature':
130 |             if len(mdlines) > 0 and mdlines[-1] != '\n':
131 |                 mdlines.append('\n')
132 |             mdlines += md2man(''.join(chunk)) # add man chunk
133 |             chunk = []
134 |             mdlines.append('\n')
135 | 
136 | sys.stdout.write('.TH ' + os.path.basename(sourcefile).replace('.h','') + ' 3 "' + date.today().strftime('%B %d, %Y') + '" "cmark ' + cmark_version_string().decode('utf-8') + '" "Library Functions Manual"\n')
137 | sys.stdout.write(''.join(mdlines))
138 | 


--------------------------------------------------------------------------------
/man/man1/cmark.1:
--------------------------------------------------------------------------------
 1 | .TH "cmark" "1" "February 11, 2020" "LOCAL" "General Commands Manual"
 2 | .SH "NAME"
 3 | \fBcmark\fR
 4 | \- convert CommonMark formatted text to HTML
 5 | .SH "SYNOPSIS"
 6 | .HP 6n
 7 | \fBcmark\fR
 8 | [options]
 9 | file*
10 | .SH "DESCRIPTION"
11 | \fBcmark\fR
12 | converts Markdown formatted plain text to either HTML, groff man,
13 | CommonMark XML, LaTeX, or CommonMark, using the conventions
14 | described in the CommonMark spec.  It reads input from \fIstdin\fR
15 | or the specified files (concatenating their contents) and writes
16 | output to \fIstdout\fR.
17 | .SH "OPTIONS"
18 | .TP 12n
19 | .B \-\-to, \-t \f[I]FORMAT\f[]
20 | Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]xml\f[],
21 | \f[C]latex\f[], \f[C]commonmark\f[]).
22 | .TP 12n
23 | .B \-\-width \f[I]WIDTH\f[]
24 | Specify a column width to which to wrap the output. For no wrapping, use
25 | the value 0 (the default).  This option currently only affects the
26 | commonmark, latex, and man renderers.
27 | .TP 12n
28 | .B \-\-hardbreaks
29 | Render soft breaks (newlines inside paragraphs in the CommonMark source)
30 | as hard line breaks in the target format.  If this option is specified,
31 | hard wrapping is disabled for CommonMark output, regardless of the value
32 | given with \-\-width.
33 | .TP 12n
34 | .B \-\-nobreaks
35 | Render soft breaks as spaces.  If this option is specified,
36 | hard wrapping is disabled for all output formats, regardless of the value
37 | given with \-\-width.
38 | .TP 12n
39 | .B \-\-sourcepos
40 | Include source position attribute.
41 | .TP 12n
42 | .B \-\-validate-utf8
43 | Validate UTF-8, replacing illegal sequences with U+FFFD.
44 | .TP 12n
45 | .B \-\-smart
46 | Use smart punctuation.  Straight double and single quotes will
47 | be rendered as curly quotes, depending on their position.
48 | \f[C]\-\-\f[] will be rendered as an en-dash.
49 | \f[C]\-\-\-\f[] will be rendered as an em-dash.
50 | \f[C]...\f[] will be rendered as ellipses.
51 | .TP 12n
52 | .B \-\-safe
53 | Omit raw HTML and potentially dangerous URLs (default).
54 | Raw HTML is replaced by a placeholder comment
55 | and potentially dangerous URLs are replaced by empty strings.
56 | Potentially dangerous URLs are those that begin with `javascript:`,
57 | `vbscript:`, `file:`, or `data:` (except for `image/png`,
58 | `image/gif`, `image/jpeg`, or `image/webp` mime types).
59 | .TP 12n
60 | .B \-\-unsafe
61 | Render raw HTML or potentially dangerous URLs, overriding
62 | the default (\-\-safe) behavior.
63 | .TP 12n
64 | .B \-\-help
65 | Print usage information.
66 | .TP 12n
67 | .B \-\-version
68 | Print version.
69 | .SH "AUTHORS"
70 | John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
71 | .SH "SEE ALSO"
72 | .PP
73 | CommonMark spec:  \f[C]https://spec.commonmark.org\f[].
74 | 


--------------------------------------------------------------------------------
/nmake.bat:
--------------------------------------------------------------------------------
1 | @nmake.exe /nologo /f Makefile.nmake %*
2 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
 1 | with (import <nixpkgs> {});
 2 | mkShell {
 3 |   buildInputs = [
 4 |     clangStdenv
 5 |     cmake
 6 |     gdb
 7 |     python3
 8 |     perl
 9 |     re2c
10 |     curl
11 |   ];
12 | }
13 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | configure_file(cmark_version.h.in
  3 |   ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h)
  4 | configure_file(libcmark.pc.in
  5 |   ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc
  6 |   @ONLY)
  7 | 
  8 | add_library(cmark
  9 |   blocks.c
 10 |   buffer.c
 11 |   cmark.c
 12 |   cmark_ctype.c
 13 |   commonmark.c
 14 |   houdini_href_e.c
 15 |   houdini_html_e.c
 16 |   houdini_html_u.c
 17 |   html.c
 18 |   inlines.c
 19 |   iterator.c
 20 |   latex.c
 21 |   man.c
 22 |   node.c
 23 |   references.c
 24 |   render.c
 25 |   scanners.c
 26 |   scanners.re
 27 |   utf8.c
 28 |   xml.c)
 29 | cmark_add_compile_options(cmark)
 30 | set_target_properties(cmark PROPERTIES
 31 |   OUTPUT_NAME "cmark"
 32 |   # Avoid name clash between PROGRAM and LIBRARY pdb files.
 33 |   PDB_NAME libcmark
 34 |   POSITION_INDEPENDENT_CODE YES
 35 |   # Include minor version and patch level in soname for now.
 36 |   SOVERSION ${PROJECT_VERSION}
 37 |   VERSION ${PROJECT_VERSION})
 38 | if(NOT BUILD_SHARED_LIBS)
 39 |   target_compile_definitions(cmark PUBLIC
 40 |     CMARK_STATIC_DEFINE)
 41 | endif()
 42 | target_include_directories(cmark INTERFACE
 43 |   $<INSTALL_INTERFACE:include>
 44 |   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
 45 |   $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
 46 | 
 47 | generate_export_header(cmark
 48 |   BASE_NAME ${PROJECT_NAME})
 49 | 
 50 | # FIXME(compnerd) this should be removed, but exists solely to allow a migration
 51 | # path for OSS Fuzz.
 52 | add_custom_target(cmark_static DEPENDS cmark)
 53 | 
 54 | add_executable(cmark_exe
 55 |   main.c)
 56 | cmark_add_compile_options(cmark_exe)
 57 | set_target_properties(cmark_exe PROPERTIES
 58 |   OUTPUT_NAME "cmark"
 59 |   INSTALL_RPATH "${Base_rpath}")
 60 | target_link_libraries(cmark_exe PRIVATE
 61 |   cmark)
 62 | 
 63 | install(TARGETS cmark_exe cmark
 64 |   EXPORT cmark-targets
 65 |   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 66 |   LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
 67 |   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
 68 |   )
 69 | 
 70 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc
 71 |   DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
 72 | 
 73 | install(FILES
 74 |   cmark.h
 75 |   ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h
 76 |   ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h
 77 |   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
 78 |   )
 79 | 
 80 | # generate cmark-config.cmake and cmark-config-version.cmake files
 81 | configure_package_config_file(
 82 |   "cmarkConfig.cmake.in"
 83 |   "${CMAKE_CURRENT_BINARY_DIR}/generated/cmark-config.cmake"
 84 |   INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cmark")
 85 | write_basic_package_version_file(
 86 |   "${CMAKE_CURRENT_BINARY_DIR}/generated/cmark-config-version.cmake"
 87 |   VERSION ${PROJECT_VERSION}
 88 |   COMPATIBILITY SameMajorVersion)
 89 | # install config and version file
 90 | install(
 91 |   FILES "${CMAKE_CURRENT_BINARY_DIR}/generated/cmark-config.cmake"
 92 |         "${CMAKE_CURRENT_BINARY_DIR}/generated/cmark-config-version.cmake"
 93 |   DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cmark"
 94 | )
 95 | # install targets file
 96 | install(
 97 |   EXPORT "cmark-targets"
 98 |   NAMESPACE "cmark::"
 99 |   DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cmark"
100 | )
101 | 


--------------------------------------------------------------------------------
/src/buffer.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <limits.h>
  3 | #include <stdarg.h>
  4 | #include <stdbool.h>
  5 | #include <stdint.h>
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <string.h>
  9 | 
 10 | #include "cmark_ctype.h"
 11 | #include "buffer.h"
 12 | 
 13 | /* Used as default value for cmark_strbuf->ptr so that people can always
 14 |  * assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
 15 |  */
 16 | unsigned char cmark_strbuf__initbuf[1];
 17 | 
 18 | #ifndef MIN
 19 | #define MIN(x, y) ((x < y) ? x : y)
 20 | #endif
 21 | 
 22 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
 23 |                        bufsize_t initial_size) {
 24 |   buf->mem = mem;
 25 |   buf->asize = 0;
 26 |   buf->size = 0;
 27 |   buf->ptr = cmark_strbuf__initbuf;
 28 | 
 29 |   if (initial_size > 0)
 30 |     cmark_strbuf_grow(buf, initial_size);
 31 | }
 32 | 
 33 | static inline void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
 34 |   cmark_strbuf_grow(buf, buf->size + add);
 35 | }
 36 | 
 37 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
 38 |   assert(target_size > 0);
 39 | 
 40 |   if (target_size < buf->asize)
 41 |     return;
 42 | 
 43 |   if (target_size > (bufsize_t)(INT32_MAX / 2)) {
 44 |     fprintf(stderr,
 45 |       "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n",
 46 |          (INT32_MAX / 2));
 47 |     abort();
 48 |   }
 49 | 
 50 |   /* Oversize the buffer by 50% to guarantee amortized linear time
 51 |    * complexity on append operations. */
 52 |   bufsize_t new_size = target_size + target_size / 2;
 53 |   new_size += 1;
 54 |   new_size = (new_size + 7) & ~7;
 55 | 
 56 |   buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
 57 |                                                 new_size);
 58 |   buf->asize = new_size;
 59 | }
 60 | 
 61 | void cmark_strbuf_free(cmark_strbuf *buf) {
 62 |   if (!buf)
 63 |     return;
 64 | 
 65 |   if (buf->ptr != cmark_strbuf__initbuf)
 66 |     buf->mem->free(buf->ptr);
 67 | 
 68 |   cmark_strbuf_init(buf->mem, buf, 0);
 69 | }
 70 | 
 71 | void cmark_strbuf_clear(cmark_strbuf *buf) {
 72 |   buf->size = 0;
 73 | 
 74 |   if (buf->asize > 0)
 75 |     buf->ptr[0] = '\0';
 76 | }
 77 | 
 78 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
 79 |                       bufsize_t len) {
 80 |   if (len <= 0 || data == NULL) {
 81 |     cmark_strbuf_clear(buf);
 82 |   } else {
 83 |     if (data != buf->ptr) {
 84 |       if (len >= buf->asize)
 85 |         cmark_strbuf_grow(buf, len);
 86 |       memmove(buf->ptr, data, len);
 87 |     }
 88 |     buf->size = len;
 89 |     buf->ptr[buf->size] = '\0';
 90 |   }
 91 | }
 92 | 
 93 | void cmark_strbuf_putc(cmark_strbuf *buf, int c) {
 94 |   S_strbuf_grow_by(buf, 1);
 95 |   buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
 96 |   buf->ptr[buf->size] = '\0';
 97 | }
 98 | 
 99 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
100 |                       bufsize_t len) {
101 |   if (len <= 0)
102 |     return;
103 | 
104 |   S_strbuf_grow_by(buf, len);
105 |   memmove(buf->ptr + buf->size, data, len);
106 |   buf->size += len;
107 |   buf->ptr[buf->size] = '\0';
108 | }
109 | 
110 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) {
111 |   cmark_strbuf_put(buf, (const unsigned char *)string, (bufsize_t)strlen(string));
112 | }
113 | 
114 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) {
115 |   unsigned char *data = buf->ptr;
116 | 
117 |   if (buf->asize == 0) {
118 |     /* return an empty string */
119 |     return (unsigned char *)buf->mem->calloc(1, 1);
120 |   }
121 | 
122 |   cmark_strbuf_init(buf->mem, buf, 0);
123 |   return data;
124 | }
125 | 
126 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) {
127 |   if (len < 0)
128 |     len = 0;
129 | 
130 |   if (len < buf->size) {
131 |     buf->size = len;
132 |     buf->ptr[buf->size] = '\0';
133 |   }
134 | }
135 | 
136 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) {
137 |   if (n > 0) {
138 |     if (n > buf->size)
139 |       n = buf->size;
140 |     buf->size = buf->size - n;
141 |     if (buf->size)
142 |       memmove(buf->ptr, buf->ptr + n, buf->size);
143 | 
144 |     buf->ptr[buf->size] = '\0';
145 |   }
146 | }
147 | 
148 | void cmark_strbuf_rtrim(cmark_strbuf *buf) {
149 |   if (!buf->size)
150 |     return;
151 | 
152 |   while (buf->size > 0) {
153 |     if (!cmark_isspace(buf->ptr[buf->size - 1]))
154 |       break;
155 | 
156 |     buf->size--;
157 |   }
158 | 
159 |   buf->ptr[buf->size] = '\0';
160 | }
161 | 
162 | void cmark_strbuf_trim(cmark_strbuf *buf) {
163 |   bufsize_t i = 0;
164 | 
165 |   if (!buf->size)
166 |     return;
167 | 
168 |   while (i < buf->size && cmark_isspace(buf->ptr[i]))
169 |     i++;
170 | 
171 |   cmark_strbuf_drop(buf, i);
172 | 
173 |   cmark_strbuf_rtrim(buf);
174 | }
175 | 
176 | // Destructively modify string, collapsing consecutive
177 | // space and newline characters into a single space.
178 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) {
179 |   bool last_char_was_space = false;
180 |   bufsize_t r, w;
181 | 
182 |   for (r = 0, w = 0; r < s->size; ++r) {
183 |     if (cmark_isspace(s->ptr[r])) {
184 |       if (!last_char_was_space) {
185 |         s->ptr[w++] = ' ';
186 |         last_char_was_space = true;
187 |       }
188 |     } else {
189 |       s->ptr[w++] = s->ptr[r];
190 |       last_char_was_space = false;
191 |     }
192 |   }
193 | 
194 |   cmark_strbuf_truncate(s, w);
195 | }
196 | 
197 | // Destructively unescape a string: remove backslashes before punctuation chars.
198 | void cmark_strbuf_unescape(cmark_strbuf *buf) {
199 |   bufsize_t r, w;
200 | 
201 |   for (r = 0, w = 0; r < buf->size; ++r) {
202 |     if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
203 |       r++;
204 | 
205 |     buf->ptr[w++] = buf->ptr[r];
206 |   }
207 | 
208 |   cmark_strbuf_truncate(buf, w);
209 | }
210 | 


--------------------------------------------------------------------------------
/src/buffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_BUFFER_H
 2 | #define CMARK_BUFFER_H
 3 | 
 4 | #include <limits.h>
 5 | #include <stdarg.h>
 6 | #include <stddef.h>
 7 | #include <stdint.h>
 8 | #include <string.h>
 9 | 
10 | #include "cmark.h"
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 | typedef int32_t bufsize_t;
17 | 
18 | typedef struct {
19 |   cmark_mem *mem;
20 |   unsigned char *ptr;
21 |   bufsize_t asize, size;
22 | } cmark_strbuf;
23 | 
24 | extern unsigned char cmark_strbuf__initbuf[];
25 | 
26 | #define CMARK_BUF_INIT(mem)                                                    \
27 |   { mem, cmark_strbuf__initbuf, 0, 0 }
28 | 
29 | /**
30 |  * Initialize a cmark_strbuf structure.
31 |  *
32 |  * For the cases where CMARK_BUF_INIT cannot be used to do static
33 |  * initialization.
34 |  */
35 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
36 |                        bufsize_t initial_size);
37 | 
38 | /**
39 |  * Grow the buffer to hold at least `target_size` bytes.
40 |  */
41 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
42 | 
43 | void cmark_strbuf_free(cmark_strbuf *buf);
44 | 
45 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
46 | 
47 | /*
48 | static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) {
49 |  return (char *)buf->ptr;
50 | }
51 | */
52 | 
53 | #define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
54 | 
55 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
56 |                       bufsize_t len);
57 | void cmark_strbuf_putc(cmark_strbuf *buf, int c);
58 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
59 |                       bufsize_t len);
60 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
61 | void cmark_strbuf_clear(cmark_strbuf *buf);
62 | 
63 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
64 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
65 | void cmark_strbuf_rtrim(cmark_strbuf *buf);
66 | void cmark_strbuf_trim(cmark_strbuf *buf);
67 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
68 | void cmark_strbuf_unescape(cmark_strbuf *s);
69 | 
70 | #ifdef __cplusplus
71 | }
72 | #endif
73 | 
74 | #endif
75 | 


--------------------------------------------------------------------------------
/src/chunk.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_CHUNK_H
 2 | #define CMARK_CHUNK_H
 3 | 
 4 | #include <string.h>
 5 | #include <stdlib.h>
 6 | #include <assert.h>
 7 | #include "cmark.h"
 8 | #include "buffer.h"
 9 | #include "cmark_ctype.h"
10 | 
11 | #define CMARK_CHUNK_EMPTY                                                      \
12 |   { NULL, 0 }
13 | 
14 | typedef struct {
15 |   const unsigned char *data;
16 |   bufsize_t len;
17 | } cmark_chunk;
18 | 
19 | // NOLINTNEXTLINE(clang-diagnostic-unused-function)
20 | static inline void cmark_chunk_free(cmark_chunk *c) {
21 |   c->data = NULL;
22 |   c->len = 0;
23 | }
24 | 
25 | static inline void cmark_chunk_ltrim(cmark_chunk *c) {
26 |   while (c->len && cmark_isspace(c->data[0])) {
27 |     c->data++;
28 |     c->len--;
29 |   }
30 | }
31 | 
32 | static inline void cmark_chunk_rtrim(cmark_chunk *c) {
33 |   while (c->len > 0) {
34 |     if (!cmark_isspace(c->data[c->len - 1]))
35 |       break;
36 | 
37 |     c->len--;
38 |   }
39 | }
40 | 
41 | // NOLINTNEXTLINE(clang-diagnostic-unused-function)
42 | static inline void cmark_chunk_trim(cmark_chunk *c) {
43 |   cmark_chunk_ltrim(c);
44 |   cmark_chunk_rtrim(c);
45 | }
46 | 
47 | // NOLINTNEXTLINE(clang-diagnostic-unused-function)
48 | static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
49 |                                            bufsize_t offset) {
50 |   const unsigned char *p =
51 |       (unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
52 |   return p ? (bufsize_t)(p - ch->data) : ch->len;
53 | }
54 | 
55 | // NOLINTNEXTLINE(clang-diagnostic-unused-function)
56 | static inline cmark_chunk cmark_chunk_literal(const char *data) {
57 |   bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
58 |   cmark_chunk c = {(unsigned char *)data, len};
59 |   return c;
60 | }
61 | 
62 | // NOLINTNEXTLINE(clang-diagnostic-unused-function)
63 | static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos,
64 |                                           bufsize_t len) {
65 |   cmark_chunk c = {ch->data + pos, len};
66 |   return c;
67 | }
68 | 
69 | #endif
70 | 


--------------------------------------------------------------------------------
/src/cmark.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <assert.h>
 3 | #include <stdio.h>
 4 | #include "node.h"
 5 | #include "houdini.h"
 6 | #include "cmark.h"
 7 | #include "buffer.h"
 8 | 
 9 | int cmark_version(void) { return CMARK_VERSION; }
10 | 
11 | const char *cmark_version_string(void) { return CMARK_VERSION_STRING; }
12 | 
13 | static void *xcalloc(size_t nmem, size_t size) {
14 |   void *ptr = calloc(nmem, size);
15 |   if (!ptr) {
16 |     fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n");
17 |     abort();
18 |   }
19 |   return ptr;
20 | }
21 | 
22 | static void *xrealloc(void *ptr, size_t size) {
23 |   void *new_ptr = realloc(ptr, size);
24 |   if (!new_ptr) {
25 |     fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n");
26 |     abort();
27 |   }
28 |   return new_ptr;
29 | }
30 | 
31 | cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
32 | 
33 | cmark_mem *cmark_get_default_mem_allocator(void) {
34 |   return &DEFAULT_MEM_ALLOCATOR;
35 | }
36 | 
37 | 
38 | char *cmark_markdown_to_html(const char *text, size_t len, int options) {
39 |   cmark_node *doc;
40 |   char *result;
41 | 
42 |   doc = cmark_parse_document(text, len, options);
43 | 
44 |   result = cmark_render_html(doc, options);
45 |   cmark_node_free(doc);
46 | 
47 |   return result;
48 | }
49 | 


--------------------------------------------------------------------------------
/src/cmarkConfig.cmake.in:
--------------------------------------------------------------------------------
1 | @PACKAGE_INIT@
2 | 
3 | include("${CMAKE_CURRENT_LIST_DIR}/cmark-targets.cmake")
4 | check_required_components("cmark")
5 | 


--------------------------------------------------------------------------------
/src/cmark_ctype.c:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | #include "cmark_ctype.h"
 4 | 
 5 | /** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other
 6 |  */
 7 | static const uint8_t cmark_ctype_class[256] = {
 8 |     /*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
 9 |     /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
10 |     /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11 |     /* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
12 |     /* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
13 |     /* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
14 |     /* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2,
15 |     /* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
16 |     /* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0,
17 |     /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18 |     /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19 |     /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20 |     /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21 |     /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 |     /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 |     /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 |     /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
25 | 
26 | /**
27 |  * Returns 1 if c is a "whitespace" character as defined by the spec.
28 |  */
29 | int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
30 | 
31 | /**
32 |  * Returns 1 if c is an ascii punctuation character.
33 |  */
34 | int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; }
35 | 
36 | int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; }
37 | 
38 | int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; }
39 | 


--------------------------------------------------------------------------------
/src/cmark_ctype.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_CMARK_CTYPE_H
 2 | #define CMARK_CMARK_CTYPE_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | /** Locale-independent versions of functions from ctype.h.
 9 |  * We want cmark to behave the same no matter what the system locale.
10 |  */
11 | 
12 | int cmark_isspace(char c);
13 | 
14 | int cmark_ispunct(char c);
15 | 
16 | int cmark_isdigit(char c);
17 | 
18 | int cmark_isalpha(char c);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/cmark_version.h.in:
--------------------------------------------------------------------------------
1 | #ifndef CMARK_VERSION_H
2 | #define CMARK_VERSION_H
3 | 
4 | #define CMARK_VERSION ((@PROJECT_VERSION_MAJOR@ << 16) | (@PROJECT_VERSION_MINOR@ << 8)  | @PROJECT_VERSION_PATCH@)
5 | #define CMARK_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@"
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/src/houdini.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_HOUDINI_H
 2 | #define CMARK_HOUDINI_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include <stdint.h>
 9 | 
10 | #include "buffer.h"
11 | 
12 | #ifdef HOUDINI_USE_LOCALE
13 | #define _isxdigit(c) isxdigit(c)
14 | #define _isdigit(c) isdigit(c)
15 | #else
16 | /*
17 |  * Helper _isdigit methods -- do not trust the current locale
18 |  * */
19 | #define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
20 | #define _isdigit(c) ((c) >= '0' && (c) <= '9')
21 | #endif
22 | 
23 | #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
24 | #define HOUDINI_UNESCAPED_SIZE(x) (x)
25 | 
26 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
27 |                                bufsize_t size);
28 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
29 |                          bufsize_t size, int secure);
30 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
31 |                           bufsize_t size);
32 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
33 |                              bufsize_t size);
34 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
35 |                         bufsize_t size);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/houdini_href_e.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | 
  5 | #include "houdini.h"
  6 | 
  7 | #if !defined(__has_builtin)
  8 | # define __has_builtin(b) 0
  9 | #endif
 10 | 
 11 | #if !__has_builtin(__builtin_expect)
 12 | # define __builtin_expect(e, v) (e)
 13 | #endif
 14 | 
 15 | #define likely(e) __builtin_expect((e), 1)
 16 | #define unlikely(e) __builtin_expect((e), 0)
 17 | 
 18 | /*
 19 |  * The following characters will not be escaped:
 20 |  *
 21 |  *              -_.+!*'(),%#@?=;:/,+&$~ alphanum
 22 |  *
 23 |  * Note that this character set is the addition of:
 24 |  *
 25 |  *      - The characters which are safe to be in an URL
 26 |  *      - The characters which are *not* safe to be in
 27 |  *      an URL because they are RESERVED characters.
 28 |  *
 29 |  * We assume (lazily) that any RESERVED char that
 30 |  * appears inside an URL is actually meant to
 31 |  * have its native function (i.e. as an URL
 32 |  * component/separator) and hence needs no escaping.
 33 |  *
 34 |  * There are two exceptions: the characters & (amp)
 35 |  * and ' (single quote) do not appear in the table.
 36 |  * They are meant to appear in the URL as components,
 37 |  * yet they require special HTML-entity escaping
 38 |  * to generate valid HTML markup.
 39 |  *
 40 |  * All other characters will be escaped to %XX.
 41 |  *
 42 |  */
 43 | static const char HREF_SAFE[] = {
 44 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 45 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
 46 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 47 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
 48 |     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 49 |     1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 50 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 51 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 52 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 53 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 54 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 55 | };
 56 | 
 57 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
 58 |   static const uint8_t hex_chars[] = "0123456789ABCDEF";
 59 |   bufsize_t i = 0, org;
 60 |   uint8_t hex_str[3];
 61 | 
 62 |   hex_str[0] = '%';
 63 | 
 64 |   while (i < size) {
 65 |     org = i;
 66 |     while (i < size && HREF_SAFE[src[i]] != 0)
 67 |       i++;
 68 | 
 69 |     if (likely(i > org))
 70 |       cmark_strbuf_put(ob, src + org, i - org);
 71 | 
 72 |     /* escaping */
 73 |     if (i >= size)
 74 |       break;
 75 | 
 76 |     switch (src[i]) {
 77 |     /* amp appears all the time in URLs, but needs
 78 |      * HTML-entity escaping to be inside an href */
 79 |     case '&':
 80 |       cmark_strbuf_puts(ob, "&amp;");
 81 |       break;
 82 | 
 83 |     /* the single quote is a valid URL character
 84 |      * according to the standard; it needs HTML
 85 |      * entity escaping too */
 86 |     case '\'':
 87 |       cmark_strbuf_puts(ob, "&#x27;");
 88 |       break;
 89 | 
 90 | /* the space can be escaped to %20 or a plus
 91 |  * sign. we're going with the generic escape
 92 |  * for now. the plus thing is more commonly seen
 93 |  * when building GET strings */
 94 | #if 0
 95 |     case ' ':
 96 |       cmark_strbuf_putc(ob, '+');
 97 |       break;
 98 | #endif
 99 | 
100 |     /* every other character goes with a %XX escaping */
101 |     default:
102 |       hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
103 |       hex_str[2] = hex_chars[src[i] & 0xF];
104 |       cmark_strbuf_put(ob, hex_str, 3);
105 |     }
106 | 
107 |     i++;
108 |   }
109 | 
110 |   return 1;
111 | }
112 | 


--------------------------------------------------------------------------------
/src/houdini_html_e.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | #include "houdini.h"
 6 | 
 7 | #if !defined(__has_builtin)
 8 | # define __has_builtin(b) 0
 9 | #endif
10 | 
11 | #if !__has_builtin(__builtin_expect)
12 | # define __builtin_expect(e, v) (e)
13 | #endif
14 | 
15 | #define likely(e) __builtin_expect((e), 1)
16 | #define unlikely(e) __builtin_expect((e), 0)
17 | 
18 | /**
19 |  * According to the OWASP rules:
20 |  *
21 |  * & --> &amp;
22 |  * < --> &lt;
23 |  * > --> &gt;
24 |  * " --> &quot;
25 |  * ' --> &#x27;     &apos; is not recommended
26 |  * / --> &#x2F;     forward slash is included as it helps end an HTML entity
27 |  *
28 |  */
29 | static const char HTML_ESCAPE_TABLE[] = {
30 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
32 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 | };
42 | 
43 | static const char *HTML_ESCAPES[] = {"",      "&quot;", "&amp;", "&#39;",
44 |                                      "&#47;", "&lt;",   "&gt;"};
45 | 
46 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
47 |                          int secure) {
48 |   bufsize_t i = 0, org, esc = 0;
49 | 
50 |   while (i < size) {
51 |     org = i;
52 |     while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
53 |       i++;
54 | 
55 |     if (i > org)
56 |       cmark_strbuf_put(ob, src + org, i - org);
57 | 
58 |     /* escaping */
59 |     if (unlikely(i >= size))
60 |       break;
61 | 
62 |     /* The forward slash is only escaped in secure mode */
63 |     if ((src[i] == '/' || src[i] == '\'') && !secure) {
64 |       cmark_strbuf_putc(ob, src[i]);
65 |     } else {
66 |       cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
67 |     }
68 | 
69 |     i++;
70 |   }
71 | 
72 |   return 1;
73 | }
74 | 


--------------------------------------------------------------------------------
/src/houdini_html_u.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | 
  5 | #include "buffer.h"
  6 | #include "houdini.h"
  7 | #include "utf8.h"
  8 | #include "entities.inc"
  9 | 
 10 | #if !defined(__has_builtin)
 11 | # define __has_builtin(b) 0
 12 | #endif
 13 | 
 14 | #if !__has_builtin(__builtin_expect)
 15 | # define __builtin_expect(e, v) (e)
 16 | #endif
 17 | 
 18 | #define likely(e) __builtin_expect((e), 1)
 19 | #define unlikely(e) __builtin_expect((e), 0)
 20 | 
 21 | /* Binary tree lookup code for entities added by JGM */
 22 | 
 23 | static const unsigned char *S_lookup(int i, int low, int hi,
 24 |                                      const unsigned char *s, int len,
 25 |                                      bufsize_t *size_out) {
 26 |   int j;
 27 |   uint32_t value = cmark_entities[i];
 28 |   const unsigned char *ent_name = cmark_entity_text + ENT_TEXT_IDX(value);
 29 |   int ent_len = ENT_NAME_SIZE(value);
 30 |   int min_len = len < ent_len ? len : ent_len;
 31 |   int cmp =
 32 |       strncmp((const char *)s, (const char *)ent_name, min_len);
 33 |   if (cmp == 0)
 34 |     cmp = len - ent_len;
 35 |   if (cmp == 0) {
 36 |     *size_out = ENT_REPL_SIZE(value);
 37 |     return ent_name + ent_len;
 38 |   } else if (cmp <= 0 && i > low) {
 39 |     j = i - ((i - low) / 2);
 40 |     if (j == i)
 41 |       j -= 1;
 42 |     return S_lookup(j, low, i - 1, s, len, size_out);
 43 |   } else if (cmp > 0 && i < hi) {
 44 |     j = i + ((hi - i) / 2);
 45 |     if (j == i)
 46 |       j += 1;
 47 |     return S_lookup(j, i + 1, hi, s, len, size_out);
 48 |   } else {
 49 |     return NULL;
 50 |   }
 51 | }
 52 | 
 53 | static const unsigned char *S_lookup_entity(const unsigned char *s, int len,
 54 |                                             bufsize_t *size_out) {
 55 |   return S_lookup(ENT_TABLE_SIZE / 2, 0, ENT_TABLE_SIZE - 1, s, len, size_out);
 56 | }
 57 | 
 58 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
 59 |                                bufsize_t size) {
 60 |   bufsize_t i = 0;
 61 | 
 62 |   if (size >= 3 && src[0] == '#') {
 63 |     int codepoint = 0;
 64 |     int num_digits = 0;
 65 |     int max_digits = 7;
 66 | 
 67 |     if (_isdigit(src[1])) {
 68 |       for (i = 1; i < size && _isdigit(src[i]); ++i) {
 69 |         codepoint = (codepoint * 10) + (src[i] - '0');
 70 | 
 71 |         if (codepoint >= 0x110000) {
 72 |           // Keep counting digits but
 73 |           // avoid integer overflow.
 74 |           codepoint = 0x110000;
 75 |         }
 76 |       }
 77 | 
 78 |       num_digits = i - 1;
 79 |       max_digits = 7;
 80 |     }
 81 | 
 82 |     else if (src[1] == 'x' || src[1] == 'X') {
 83 |       for (i = 2; i < size && _isxdigit(src[i]); ++i) {
 84 |         codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
 85 | 
 86 |         if (codepoint >= 0x110000) {
 87 |           // Keep counting digits but
 88 |           // avoid integer overflow.
 89 |           codepoint = 0x110000;
 90 |         }
 91 |       }
 92 | 
 93 |       num_digits = i - 2;
 94 |       max_digits = 6;
 95 |     }
 96 | 
 97 |     if (num_digits >= 1 && num_digits <= max_digits &&
 98 |                     i < size && src[i] == ';') {
 99 |       if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
100 |           codepoint >= 0x110000) {
101 |         codepoint = 0xFFFD;
102 |       }
103 |       cmark_utf8proc_encode_char(codepoint, ob);
104 |       return i + 1;
105 |     }
106 |   }
107 | 
108 |   else {
109 |     if (size > ENT_MAX_LENGTH)
110 |       size = ENT_MAX_LENGTH;
111 | 
112 |     for (i = ENT_MIN_LENGTH; i < size; ++i) {
113 |       if (src[i] == ' ')
114 |         break;
115 | 
116 |       if (src[i] == ';') {
117 |         bufsize_t size;
118 |         const unsigned char *entity = S_lookup_entity(src, i, &size);
119 | 
120 |         if (entity != NULL) {
121 |           cmark_strbuf_put(ob, entity, size);
122 |           return i + 1;
123 |         }
124 | 
125 |         break;
126 |       }
127 |     }
128 |   }
129 | 
130 |   return 0;
131 | }
132 | 
133 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
134 |                           bufsize_t size) {
135 |   bufsize_t i = 0, org, ent;
136 | 
137 |   while (i < size) {
138 |     org = i;
139 |     while (i < size && src[i] != '&')
140 |       i++;
141 | 
142 |     if (likely(i > org)) {
143 |       if (unlikely(org == 0)) {
144 |         if (i >= size)
145 |           return 0;
146 | 
147 |         cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
148 |       }
149 | 
150 |       cmark_strbuf_put(ob, src + org, i - org);
151 |     }
152 | 
153 |     /* escaping */
154 |     if (i >= size)
155 |       break;
156 | 
157 |     i++;
158 | 
159 |     ent = houdini_unescape_ent(ob, src + i, size - i);
160 |     i += ent;
161 | 
162 |     /* not really an entity */
163 |     if (ent == 0)
164 |       cmark_strbuf_putc(ob, '&');
165 |   }
166 | 
167 |   return 1;
168 | }
169 | 
170 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
171 |                              bufsize_t size) {
172 |   if (!houdini_unescape_html(ob, src, size))
173 |     cmark_strbuf_put(ob, src, size);
174 | }
175 | 


--------------------------------------------------------------------------------
/src/inlines.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_INLINES_H
 2 | #define CMARK_INLINES_H
 3 | 
 4 | #include "chunk.h"
 5 | #include "references.h"
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
12 | unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
13 | 
14 | void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
15 |                          cmark_reference_map *refmap, int options);
16 | 
17 | bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
18 |                                        cmark_reference_map *refmap);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/iterator.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <stdbool.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #include "node.h"
  6 | #include "cmark.h"
  7 | #include "iterator.h"
  8 | 
  9 | static const int S_leaf_mask =
 10 |     (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) |
 11 |     (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) |
 12 |     (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) |
 13 |     (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE);
 14 | 
 15 | cmark_iter *cmark_iter_new(cmark_node *root) {
 16 |   if (root == NULL) {
 17 |     return NULL;
 18 |   }
 19 |   cmark_mem *mem = root->mem;
 20 |   cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter));
 21 |   iter->mem = mem;
 22 |   iter->root = root;
 23 |   iter->cur.ev_type = CMARK_EVENT_NONE;
 24 |   iter->cur.node = NULL;
 25 |   iter->next.ev_type = CMARK_EVENT_ENTER;
 26 |   iter->next.node = root;
 27 |   return iter;
 28 | }
 29 | 
 30 | void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); }
 31 | 
 32 | static bool S_is_leaf(cmark_node *node) {
 33 |   return ((1 << node->type) & S_leaf_mask) != 0;
 34 | }
 35 | 
 36 | cmark_event_type cmark_iter_next(cmark_iter *iter) {
 37 |   cmark_event_type ev_type = iter->next.ev_type;
 38 |   cmark_node *node = iter->next.node;
 39 | 
 40 |   iter->cur.ev_type = ev_type;
 41 |   iter->cur.node = node;
 42 | 
 43 |   if (ev_type == CMARK_EVENT_DONE) {
 44 |     return ev_type;
 45 |   }
 46 | 
 47 |   /* roll forward to next item, setting both fields */
 48 |   if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) {
 49 |     if (node->first_child == NULL) {
 50 |       /* stay on this node but exit */
 51 |       iter->next.ev_type = CMARK_EVENT_EXIT;
 52 |     } else {
 53 |       iter->next.ev_type = CMARK_EVENT_ENTER;
 54 |       iter->next.node = node->first_child;
 55 |     }
 56 |   } else if (node == iter->root) {
 57 |     /* don't move past root */
 58 |     iter->next.ev_type = CMARK_EVENT_DONE;
 59 |     iter->next.node = NULL;
 60 |   } else if (node->next) {
 61 |     iter->next.ev_type = CMARK_EVENT_ENTER;
 62 |     iter->next.node = node->next;
 63 |   } else if (node->parent) {
 64 |     iter->next.ev_type = CMARK_EVENT_EXIT;
 65 |     iter->next.node = node->parent;
 66 |   } else {
 67 |     assert(false);
 68 |     iter->next.ev_type = CMARK_EVENT_DONE;
 69 |     iter->next.node = NULL;
 70 |   }
 71 | 
 72 |   return ev_type;
 73 | }
 74 | 
 75 | void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
 76 |                       cmark_event_type event_type) {
 77 |   iter->next.ev_type = event_type;
 78 |   iter->next.node = current;
 79 |   cmark_iter_next(iter);
 80 | }
 81 | 
 82 | cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; }
 83 | 
 84 | cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) {
 85 |   return iter->cur.ev_type;
 86 | }
 87 | 
 88 | cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; }
 89 | 
 90 | void cmark_consolidate_text_nodes(cmark_node *root) {
 91 |   if (root == NULL) {
 92 |     return;
 93 |   }
 94 |   cmark_iter *iter = cmark_iter_new(root);
 95 |   cmark_strbuf buf = CMARK_BUF_INIT(iter->mem);
 96 |   cmark_event_type ev_type;
 97 |   cmark_node *cur, *tmp, *next;
 98 | 
 99 |   while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
100 |     cur = cmark_iter_get_node(iter);
101 |     if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT &&
102 |         cur->next && cur->next->type == CMARK_NODE_TEXT) {
103 |       cmark_strbuf_clear(&buf);
104 |       cmark_strbuf_put(&buf, cur->data, cur->len);
105 |       tmp = cur->next;
106 |       while (tmp && tmp->type == CMARK_NODE_TEXT) {
107 |         cmark_iter_next(iter); // advance pointer
108 |         cmark_strbuf_put(&buf, tmp->data, tmp->len);
109 |         cur->end_column = tmp->end_column;
110 |         next = tmp->next;
111 |         cmark_node_free(tmp);
112 |         tmp = next;
113 |       }
114 |       iter->mem->free(cur->data);
115 |       cur->len = buf.size;
116 |       cur->data = cmark_strbuf_detach(&buf);
117 |     }
118 |   }
119 | 
120 |   cmark_strbuf_free(&buf);
121 |   cmark_iter_free(iter);
122 | }
123 | 


--------------------------------------------------------------------------------
/src/iterator.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_ITERATOR_H
 2 | #define CMARK_ITERATOR_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include "cmark.h"
 9 | 
10 | typedef struct {
11 |   cmark_event_type ev_type;
12 |   cmark_node *node;
13 | } cmark_iter_state;
14 | 
15 | struct cmark_iter {
16 |   cmark_mem *mem;
17 |   cmark_node *root;
18 |   cmark_iter_state cur;
19 |   cmark_iter_state next;
20 | };
21 | 
22 | #ifdef __cplusplus
23 | }
24 | #endif
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/libcmark.pc.in:
--------------------------------------------------------------------------------
 1 | prefix=@CMAKE_INSTALL_PREFIX@
 2 | exec_prefix=@CMAKE_INSTALL_PREFIX@
 3 | libdir=@CMAKE_INSTALL_FULL_LIBDIR@
 4 | includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
 5 | 
 6 | Name: libcmark
 7 | Description: CommonMark parsing, rendering, and manipulation
 8 | Version: @PROJECT_VERSION@
 9 | Libs: -L${libdir} -lcmark
10 | Cflags: -I${includedir}
11 | 


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
  1 | #include <errno.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | 
  6 | #include "cmark.h"
  7 | #include "node.h"
  8 | 
  9 | #if defined(__OpenBSD__)
 10 | #  include <sys/param.h>
 11 | #  if OpenBSD >= 201605
 12 | #    define USE_PLEDGE
 13 | #    include <unistd.h>
 14 | #  endif
 15 | #endif
 16 | 
 17 | #if defined(_WIN32) && !defined(__CYGWIN__)
 18 | #include <io.h>
 19 | #include <fcntl.h>
 20 | #endif
 21 | 
 22 | typedef enum {
 23 |   FORMAT_NONE,
 24 |   FORMAT_HTML,
 25 |   FORMAT_XML,
 26 |   FORMAT_MAN,
 27 |   FORMAT_COMMONMARK,
 28 |   FORMAT_LATEX
 29 | } writer_format;
 30 | 
 31 | void print_usage(void) {
 32 |   printf("Usage:   cmark [FILE*]\n");
 33 |   printf("Options:\n");
 34 |   printf("  --to, -t FORMAT  Specify output format (html, xml, man, "
 35 |          "commonmark, latex)\n");
 36 |   printf("  --width WIDTH    Specify wrap width (default 0 = nowrap)\n");
 37 |   printf("  --sourcepos      Include source position attribute\n");
 38 |   printf("  --hardbreaks     Treat newlines as hard line breaks\n");
 39 |   printf("  --nobreaks       Render soft line breaks as spaces\n");
 40 |   printf("  --safe           Omit raw HTML and dangerous URLs\n");
 41 |   printf("  --unsafe         Render raw HTML and dangerous URLs\n");
 42 |   printf("  --smart          Use smart punctuation\n");
 43 |   printf("  --validate-utf8  Replace invalid UTF-8 sequences with U+FFFD\n");
 44 |   printf("  --help, -h       Print usage information\n");
 45 |   printf("  --version        Print version\n");
 46 | }
 47 | 
 48 | static void print_document(cmark_node *document, writer_format writer,
 49 |                            int options, int width) {
 50 |   char *result;
 51 | 
 52 |   switch (writer) {
 53 |   case FORMAT_HTML:
 54 |     result = cmark_render_html(document, options);
 55 |     break;
 56 |   case FORMAT_XML:
 57 |     result = cmark_render_xml(document, options);
 58 |     break;
 59 |   case FORMAT_MAN:
 60 |     result = cmark_render_man(document, options, width);
 61 |     break;
 62 |   case FORMAT_COMMONMARK:
 63 |     result = cmark_render_commonmark(document, options, width);
 64 |     break;
 65 |   case FORMAT_LATEX:
 66 |     result = cmark_render_latex(document, options, width);
 67 |     break;
 68 |   default:
 69 |     fprintf(stderr, "Unknown format %d\n", writer);
 70 |     exit(1);
 71 |   }
 72 |   fwrite(result, strlen(result), 1, stdout);
 73 |   document->mem->free(result);
 74 | }
 75 | 
 76 | int main(int argc, char *argv[]) {
 77 |   int i, numfps = 0;
 78 |   int *files;
 79 |   char buffer[4096];
 80 |   cmark_parser *parser;
 81 |   size_t bytes;
 82 |   cmark_node *document;
 83 |   int width = 0;
 84 |   char *unparsed;
 85 |   writer_format writer = FORMAT_HTML;
 86 |   int options = CMARK_OPT_DEFAULT;
 87 | 
 88 | #ifdef USE_PLEDGE
 89 |   if (pledge("stdio rpath", NULL) != 0) {
 90 |     perror("pledge");
 91 |     return 1;
 92 |   }
 93 | #endif
 94 | 
 95 | #if defined(_WIN32) && !defined(__CYGWIN__)
 96 |   _setmode(_fileno(stdin), _O_BINARY);
 97 |   _setmode(_fileno(stdout), _O_BINARY);
 98 | #endif
 99 | 
100 |   files = (int *)calloc(argc, sizeof(*files));
101 | 
102 |   for (i = 1; i < argc; i++) {
103 |     if (strcmp(argv[i], "--version") == 0) {
104 |       printf("cmark %s", CMARK_VERSION_STRING);
105 |       printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n");
106 |       exit(0);
107 |     } else if (strcmp(argv[i], "--sourcepos") == 0) {
108 |       options |= CMARK_OPT_SOURCEPOS;
109 |     } else if (strcmp(argv[i], "--hardbreaks") == 0) {
110 |       options |= CMARK_OPT_HARDBREAKS;
111 |     } else if (strcmp(argv[i], "--nobreaks") == 0) {
112 |       options |= CMARK_OPT_NOBREAKS;
113 |     } else if (strcmp(argv[i], "--smart") == 0) {
114 |       options |= CMARK_OPT_SMART;
115 |     } else if (strcmp(argv[i], "--safe") == 0) {
116 |       options |= CMARK_OPT_SAFE;
117 |     } else if (strcmp(argv[i], "--unsafe") == 0) {
118 |       options |= CMARK_OPT_UNSAFE;
119 |     } else if (strcmp(argv[i], "--validate-utf8") == 0) {
120 |       options |= CMARK_OPT_VALIDATE_UTF8;
121 |     } else if ((strcmp(argv[i], "--help") == 0) ||
122 |                (strcmp(argv[i], "-h") == 0)) {
123 |       print_usage();
124 |       exit(0);
125 |     } else if (strcmp(argv[i], "--width") == 0) {
126 |       i += 1;
127 |       if (i < argc) {
128 |         width = (int)strtol(argv[i], &unparsed, 10);
129 |         if (unparsed && strlen(unparsed) > 0) {
130 |           fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i],
131 |                   unparsed);
132 |           exit(1);
133 |         }
134 |       } else {
135 |         fprintf(stderr, "--width requires an argument\n");
136 |         exit(1);
137 |       }
138 |     } else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) {
139 |       i += 1;
140 |       if (i < argc) {
141 |         if (strcmp(argv[i], "man") == 0) {
142 |           writer = FORMAT_MAN;
143 |         } else if (strcmp(argv[i], "html") == 0) {
144 |           writer = FORMAT_HTML;
145 |         } else if (strcmp(argv[i], "xml") == 0) {
146 |           writer = FORMAT_XML;
147 |         } else if (strcmp(argv[i], "commonmark") == 0) {
148 |           writer = FORMAT_COMMONMARK;
149 |         } else if (strcmp(argv[i], "latex") == 0) {
150 |           writer = FORMAT_LATEX;
151 |         } else {
152 |           fprintf(stderr, "Unknown format %s\n", argv[i]);
153 |           exit(1);
154 |         }
155 |       } else {
156 |         fprintf(stderr, "No argument provided for %s\n", argv[i - 1]);
157 |         exit(1);
158 |       }
159 |     } else if (*argv[i] == '-') {
160 |       print_usage();
161 |       exit(1);
162 |     } else { // treat as file argument
163 |       files[numfps++] = i;
164 |     }
165 |   }
166 | 
167 |   parser = cmark_parser_new(options);
168 |   for (i = 0; i < numfps; i++) {
169 |     FILE *fp = fopen(argv[files[i]], "rb");
170 |     if (fp == NULL) {
171 |       fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]],
172 |               strerror(errno));
173 |       exit(1);
174 |     }
175 | 
176 |     while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
177 |       cmark_parser_feed(parser, buffer, bytes);
178 |       if (bytes < sizeof(buffer)) {
179 |         break;
180 |       }
181 |     }
182 | 
183 |     fclose(fp);
184 |   }
185 | 
186 |   if (numfps == 0) {
187 | 
188 |     while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
189 |       cmark_parser_feed(parser, buffer, bytes);
190 |       if (bytes < sizeof(buffer)) {
191 |         break;
192 |       }
193 |     }
194 |   }
195 | 
196 | #ifdef USE_PLEDGE
197 |   if (pledge("stdio", NULL) != 0) {
198 |     perror("pledge");
199 |     return 1;
200 |   }
201 | #endif
202 | 
203 |   document = cmark_parser_finish(parser);
204 |   cmark_parser_free(parser);
205 | 
206 |   print_document(document, writer, options, width);
207 | 
208 |   cmark_node_free(document);
209 | 
210 |   free(files);
211 | 
212 |   return 0;
213 | }
214 | 


--------------------------------------------------------------------------------
/src/man.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <stdbool.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | 
  7 | #include "cmark.h"
  8 | #include "node.h"
  9 | #include "buffer.h"
 10 | #include "utf8.h"
 11 | #include "render.h"
 12 | 
 13 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
 14 | #define LIT(s) renderer->out(renderer, s, false, LITERAL)
 15 | #define CR() renderer->cr(renderer)
 16 | #define BLANKLINE() renderer->blankline(renderer)
 17 | #define LIST_NUMBER_SIZE 20
 18 | 
 19 | // Functions to convert cmark_nodes to groff man strings.
 20 | static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c,
 21 |                    unsigned char nextc) {
 22 |   (void)(nextc);
 23 | 
 24 |   if (escape == LITERAL) {
 25 |     cmark_render_code_point(renderer, c);
 26 |     return;
 27 |   }
 28 | 
 29 |   switch (c) {
 30 |   case 46:
 31 |     if (renderer->begin_line) {
 32 |       cmark_render_ascii(renderer, "\\&.");
 33 |     } else {
 34 |       cmark_render_code_point(renderer, c);
 35 |     }
 36 |     break;
 37 |   case 39:
 38 |     if (renderer->begin_line) {
 39 |       cmark_render_ascii(renderer, "\\&'");
 40 |     } else {
 41 |       cmark_render_code_point(renderer, c);
 42 |     }
 43 |     break;
 44 |   case 45:
 45 |     cmark_render_ascii(renderer, "\\-");
 46 |     break;
 47 |   case 92:
 48 |     cmark_render_ascii(renderer, "\\e");
 49 |     break;
 50 |   case 8216: // left single quote
 51 |     cmark_render_ascii(renderer, "\\[oq]");
 52 |     break;
 53 |   case 8217: // right single quote
 54 |     cmark_render_ascii(renderer, "\\[cq]");
 55 |     break;
 56 |   case 8220: // left double quote
 57 |     cmark_render_ascii(renderer, "\\[lq]");
 58 |     break;
 59 |   case 8221: // right double quote
 60 |     cmark_render_ascii(renderer, "\\[rq]");
 61 |     break;
 62 |   case 8212: // em dash
 63 |     cmark_render_ascii(renderer, "\\[em]");
 64 |     break;
 65 |   case 8211: // en dash
 66 |     cmark_render_ascii(renderer, "\\[en]");
 67 |     break;
 68 |   default:
 69 |     cmark_render_code_point(renderer, c);
 70 |   }
 71 | }
 72 | 
 73 | static int S_render_node(cmark_renderer *renderer, cmark_node *node,
 74 |                          cmark_event_type ev_type, int options) {
 75 |   cmark_node *tmp;
 76 |   int list_number;
 77 |   bool entering = (ev_type == CMARK_EVENT_ENTER);
 78 |   bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
 79 |   struct block_number *new_block_number;
 80 |   cmark_mem *allocator = cmark_get_default_mem_allocator();
 81 | 
 82 |   // avoid unused parameter error:
 83 |   (void)(options);
 84 | 
 85 |   // indent inside nested lists
 86 |   if (renderer->block_number_in_list_item &&
 87 |       node->type < CMARK_NODE_FIRST_INLINE) {
 88 |     if (entering) {
 89 |       renderer->block_number_in_list_item->number += 1;
 90 |       if (renderer->block_number_in_list_item->number == 2) {
 91 |         CR();
 92 |         LIT(".RS"); // indent
 93 |         CR();
 94 |       }
 95 |     }
 96 |   }
 97 | 
 98 |   switch (node->type) {
 99 |   case CMARK_NODE_DOCUMENT:
100 |     break;
101 | 
102 |   case CMARK_NODE_BLOCK_QUOTE:
103 |     if (entering) {
104 |       CR();
105 |       LIT(".RS");
106 |       CR();
107 |     } else {
108 |       CR();
109 |       LIT(".RE");
110 |       CR();
111 |     }
112 |     break;
113 | 
114 |   case CMARK_NODE_LIST:
115 |     break;
116 | 
117 |   case CMARK_NODE_ITEM:
118 |     if (entering) {
119 |       new_block_number = allocator->calloc(1, sizeof(struct block_number));
120 |       new_block_number->number = 0;
121 |       new_block_number->parent = renderer->block_number_in_list_item;
122 |       renderer->block_number_in_list_item = new_block_number;
123 |       CR();
124 |       LIT(".IP ");
125 |       if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
126 |         LIT("\\[bu] 2");
127 |       } else {
128 |         list_number = cmark_node_get_list_start(node->parent);
129 |         tmp = node;
130 |         while (tmp->prev) {
131 |           tmp = tmp->prev;
132 |           list_number += 1;
133 |         }
134 |         char list_number_s[LIST_NUMBER_SIZE];
135 |         snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number);
136 |         LIT(list_number_s);
137 |       }
138 |       CR();
139 |     } else {
140 |       if (renderer->block_number_in_list_item) {
141 |         if (renderer->block_number_in_list_item->number >= 2) {
142 |           CR();
143 |           LIT(".RE"); // de-indent
144 |         }
145 |         new_block_number = renderer->block_number_in_list_item;
146 |         renderer->block_number_in_list_item =
147 |           renderer->block_number_in_list_item->parent;
148 |         allocator->free(new_block_number);
149 |       }
150 |       CR();
151 |     }
152 |     break;
153 | 
154 |   case CMARK_NODE_HEADING:
155 |     if (entering) {
156 |       CR();
157 |       LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS");
158 |       CR();
159 |     } else {
160 |       CR();
161 |     }
162 |     break;
163 | 
164 |   case CMARK_NODE_CODE_BLOCK:
165 |     CR();
166 |     LIT(".IP\n.nf\n\\f[C]\n");
167 |     OUT(cmark_node_get_literal(node), false, NORMAL);
168 |     CR();
169 |     LIT("\\f[]\n.fi");
170 |     CR();
171 |     break;
172 | 
173 |   case CMARK_NODE_HTML_BLOCK:
174 |     break;
175 | 
176 |   case CMARK_NODE_CUSTOM_BLOCK:
177 |     CR();
178 |     OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
179 |         false, LITERAL);
180 |     CR();
181 |     break;
182 | 
183 |   case CMARK_NODE_THEMATIC_BREAK:
184 |     CR();
185 |     LIT(".PP\n  *  *  *  *  *");
186 |     CR();
187 |     break;
188 | 
189 |   case CMARK_NODE_PARAGRAPH:
190 |     if (entering) {
191 |       // no blank line if first paragraph in list:
192 |       if (node->parent && node->parent->type == CMARK_NODE_ITEM &&
193 |           node->prev == NULL) {
194 |         // no blank line or .PP
195 |       } else {
196 |         CR();
197 |         LIT(".PP");
198 |         CR();
199 |       }
200 |     } else {
201 |       CR();
202 |     }
203 |     break;
204 | 
205 |   case CMARK_NODE_TEXT:
206 |     OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
207 |     break;
208 | 
209 |   case CMARK_NODE_LINEBREAK:
210 |     LIT(".PD 0\n.P\n.PD");
211 |     CR();
212 |     break;
213 | 
214 |   case CMARK_NODE_SOFTBREAK:
215 |     if (options & CMARK_OPT_HARDBREAKS) {
216 |       LIT(".PD 0\n.P\n.PD");
217 |       CR();
218 |     } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
219 |       CR();
220 |     } else {
221 |       OUT(" ", allow_wrap, LITERAL);
222 |     }
223 |     break;
224 | 
225 |   case CMARK_NODE_CODE:
226 |     LIT("\\f[C]");
227 |     OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
228 |     LIT("\\f[]");
229 |     break;
230 | 
231 |   case CMARK_NODE_HTML_INLINE:
232 |     break;
233 | 
234 |   case CMARK_NODE_CUSTOM_INLINE:
235 |     OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
236 |         false, LITERAL);
237 |     break;
238 | 
239 |   case CMARK_NODE_STRONG:
240 |     if (entering) {
241 |       LIT("\\f[B]");
242 |     } else {
243 |       LIT("\\f[]");
244 |     }
245 |     break;
246 | 
247 |   case CMARK_NODE_EMPH:
248 |     if (entering) {
249 |       LIT("\\f[I]");
250 |     } else {
251 |       LIT("\\f[]");
252 |     }
253 |     break;
254 | 
255 |   case CMARK_NODE_LINK:
256 |     if (!entering) {
257 |       LIT(" (");
258 |       OUT(cmark_node_get_url(node), allow_wrap, URL);
259 |       LIT(")");
260 |     }
261 |     break;
262 | 
263 |   case CMARK_NODE_IMAGE:
264 |     if (entering) {
265 |       LIT("[IMAGE: ");
266 |     } else {
267 |       LIT("]");
268 |     }
269 |     break;
270 | 
271 |   default:
272 |     assert(false);
273 |     break;
274 |   }
275 | 
276 |   return 1;
277 | }
278 | 
279 | char *cmark_render_man(cmark_node *root, int options, int width) {
280 |   return cmark_render(root, options, width, S_outc, S_render_node);
281 | }
282 | 


--------------------------------------------------------------------------------
/src/node.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_NODE_H
 2 | #define CMARK_NODE_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include <stdbool.h>
 9 | #include <stdint.h>
10 | #include <stdio.h>
11 | 
12 | #include "cmark.h"
13 | #include "buffer.h"
14 | 
15 | typedef struct {
16 |   int marker_offset;
17 |   int padding;
18 |   int start;
19 |   unsigned char list_type;
20 |   unsigned char delimiter;
21 |   unsigned char bullet_char;
22 |   bool tight;
23 | } cmark_list;
24 | 
25 | typedef struct {
26 |   unsigned char *info;
27 |   uint8_t fence_length;
28 |   uint8_t fence_offset;
29 |   unsigned char fence_char;
30 |   int8_t fenced;
31 | } cmark_code;
32 | 
33 | typedef struct {
34 |   int internal_offset;
35 |   int8_t level;
36 |   bool setext;
37 | } cmark_heading;
38 | 
39 | typedef struct {
40 |   unsigned char *url;
41 |   unsigned char *title;
42 | } cmark_link;
43 | 
44 | typedef struct {
45 |   unsigned char *on_enter;
46 |   unsigned char *on_exit;
47 | } cmark_custom;
48 | 
49 | enum cmark_node__internal_flags {
50 |   CMARK_NODE__OPEN = (1 << 0),
51 |   CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
52 |   CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
53 |   CMARK_NODE__LIST_LAST_LINE_BLANK = (1 << 3),
54 | };
55 | 
56 | struct cmark_node {
57 |   cmark_mem *mem;
58 | 
59 |   struct cmark_node *next;
60 |   struct cmark_node *prev;
61 |   struct cmark_node *parent;
62 |   struct cmark_node *first_child;
63 |   struct cmark_node *last_child;
64 | 
65 |   void *user_data;
66 | 
67 |   unsigned char *data;
68 |   bufsize_t len;
69 | 
70 |   int start_line;
71 |   int start_column;
72 |   int end_line;
73 |   int end_column;
74 |   uint16_t type;
75 |   uint16_t flags;
76 | 
77 |   union {
78 |     cmark_list list;
79 |     cmark_code code;
80 |     cmark_heading heading;
81 |     cmark_link link;
82 |     cmark_custom custom;
83 |     int html_block_type;
84 |   } as;
85 | };
86 | 
87 | CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out);
88 | 
89 | #ifdef __cplusplus
90 | }
91 | #endif
92 | 
93 | #endif
94 | 


--------------------------------------------------------------------------------
/src/parser.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_AST_H
 2 | #define CMARK_AST_H
 3 | 
 4 | #include <stdio.h>
 5 | #include "references.h"
 6 | #include "node.h"
 7 | #include "buffer.h"
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | #define MAX_LINK_LABEL_LENGTH 1000
14 | 
15 | struct cmark_parser {
16 |   struct cmark_mem *mem;
17 |   struct cmark_reference_map *refmap;
18 |   struct cmark_node *root;
19 |   struct cmark_node *current;
20 |   int line_number;
21 |   bufsize_t offset;
22 |   bufsize_t column;
23 |   bufsize_t first_nonspace;
24 |   bufsize_t first_nonspace_column;
25 |   bufsize_t thematic_break_kill_pos;
26 |   int indent;
27 |   bool blank;
28 |   bool partially_consumed_tab;
29 |   cmark_strbuf curline;
30 |   bufsize_t last_line_length;
31 |   cmark_strbuf linebuf;
32 |   cmark_strbuf content;
33 |   int options;
34 |   bool last_buffer_ended_with_cr;
35 |   unsigned int total_size;
36 | };
37 | 
38 | #ifdef __cplusplus
39 | }
40 | #endif
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/src/references.c:
--------------------------------------------------------------------------------
  1 | #include "cmark.h"
  2 | #include "utf8.h"
  3 | #include "parser.h"
  4 | #include "references.h"
  5 | #include "inlines.h"
  6 | #include "chunk.h"
  7 | 
  8 | static void reference_free(cmark_reference_map *map, cmark_reference *ref) {
  9 |   cmark_mem *mem = map->mem;
 10 |   if (ref != NULL) {
 11 |     mem->free(ref->label);
 12 |     mem->free(ref->url);
 13 |     mem->free(ref->title);
 14 |     mem->free(ref);
 15 |   }
 16 | }
 17 | 
 18 | // normalize reference:  collapse internal whitespace to single space,
 19 | // remove leading/trailing whitespace, case fold
 20 | // Return NULL if the reference name is actually empty (i.e. composed
 21 | // solely from whitespace)
 22 | static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) {
 23 |   cmark_strbuf normalized = CMARK_BUF_INIT(mem);
 24 |   unsigned char *result;
 25 | 
 26 |   if (ref == NULL)
 27 |     return NULL;
 28 | 
 29 |   if (ref->len == 0)
 30 |     return NULL;
 31 | 
 32 |   cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
 33 |   cmark_strbuf_trim(&normalized);
 34 |   cmark_strbuf_normalize_whitespace(&normalized);
 35 | 
 36 |   result = cmark_strbuf_detach(&normalized);
 37 |   assert(result);
 38 | 
 39 |   if (result[0] == '\0') {
 40 |     mem->free(result);
 41 |     return NULL;
 42 |   }
 43 | 
 44 |   return result;
 45 | }
 46 | 
 47 | void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
 48 |                             cmark_chunk *url, cmark_chunk *title) {
 49 |   cmark_reference *ref;
 50 |   unsigned char *reflabel = normalize_reference(map->mem, label);
 51 | 
 52 |   /* empty reference name, or composed from only whitespace */
 53 |   if (reflabel == NULL)
 54 |     return;
 55 | 
 56 |   assert(map->sorted == NULL);
 57 | 
 58 |   ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
 59 |   ref->label = reflabel;
 60 |   ref->url = cmark_clean_url(map->mem, url);
 61 |   ref->title = cmark_clean_title(map->mem, title);
 62 |   ref->age = map->size;
 63 |   ref->next = map->refs;
 64 | 
 65 |   if (ref->url != NULL)
 66 |     ref->size += (int)strlen((char*)ref->url);
 67 |   if (ref->title != NULL)
 68 |     ref->size += (int)strlen((char*)ref->title);
 69 | 
 70 |   map->refs = ref;
 71 |   map->size++;
 72 | }
 73 | 
 74 | static int
 75 | labelcmp(const unsigned char *a, const unsigned char *b) {
 76 |   return strcmp((const char *)a, (const char *)b);
 77 | }
 78 | 
 79 | static int
 80 | refcmp(const void *p1, const void *p2) {
 81 |   cmark_reference *r1 = *(cmark_reference **)p1;
 82 |   cmark_reference *r2 = *(cmark_reference **)p2;
 83 |   int res = labelcmp(r1->label, r2->label);
 84 |   return res ? res : ((int)r1->age - (int)r2->age);
 85 | }
 86 | 
 87 | static int
 88 | refsearch(const void *label, const void *p2) {
 89 |   cmark_reference *ref = *(cmark_reference **)p2;
 90 |   return labelcmp((const unsigned char *)label, ref->label);
 91 | }
 92 | 
 93 | static void sort_references(cmark_reference_map *map) {
 94 |   unsigned int i = 0, last = 0, size = map->size;
 95 |   cmark_reference *r = map->refs, **sorted = NULL;
 96 | 
 97 |   sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *));
 98 |   while (r) {
 99 |     sorted[i++] = r;
100 |     r = r->next;
101 |   }
102 | 
103 |   qsort(sorted, size, sizeof(cmark_reference *), refcmp);
104 | 
105 |   for (i = 1; i < size; i++) {
106 |     if (labelcmp(sorted[i]->label, sorted[last]->label) != 0)
107 |       sorted[++last] = sorted[i];
108 |   }
109 |   map->sorted = sorted;
110 |   map->size = last + 1;
111 | }
112 | 
113 | // Returns reference if refmap contains a reference with matching
114 | // label, otherwise NULL.
115 | cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
116 |                                         cmark_chunk *label) {
117 |   cmark_reference **ref = NULL;
118 |   cmark_reference *r = NULL;
119 |   unsigned char *norm;
120 | 
121 |   if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
122 |     return NULL;
123 | 
124 |   if (map == NULL || !map->size)
125 |     return NULL;
126 | 
127 |   norm = normalize_reference(map->mem, label);
128 |   if (norm == NULL)
129 |     return NULL;
130 | 
131 |   if (!map->sorted)
132 |     sort_references(map);
133 | 
134 |   ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *),
135 |                 refsearch);
136 |   map->mem->free(norm);
137 | 
138 |   if (ref != NULL) {
139 |     r = ref[0];
140 |     /* Check for expansion limit */
141 |     if (map->max_ref_size && r->size > map->max_ref_size - map->ref_size)
142 |       return NULL;
143 |     map->ref_size += r->size;
144 |   }
145 | 
146 |   return r;
147 | }
148 | 
149 | void cmark_reference_map_free(cmark_reference_map *map) {
150 |   cmark_reference *ref;
151 | 
152 |   if (map == NULL)
153 |     return;
154 | 
155 |   ref = map->refs;
156 |   while (ref) {
157 |     cmark_reference *next = ref->next;
158 |     reference_free(map, ref);
159 |     ref = next;
160 |   }
161 | 
162 |   map->mem->free(map->sorted);
163 |   map->mem->free(map);
164 | }
165 | 
166 | cmark_reference_map *cmark_reference_map_new(cmark_mem *mem) {
167 |   cmark_reference_map *map =
168 |       (cmark_reference_map *)mem->calloc(1, sizeof(cmark_reference_map));
169 |   map->mem = mem;
170 |   return map;
171 | }
172 | 


--------------------------------------------------------------------------------
/src/references.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_REFERENCES_H
 2 | #define CMARK_REFERENCES_H
 3 | 
 4 | #include "chunk.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | struct cmark_reference {
11 |   struct cmark_reference *next;
12 |   unsigned char *label;
13 |   unsigned char *url;
14 |   unsigned char *title;
15 |   unsigned int age;
16 |   unsigned int size;
17 | };
18 | 
19 | typedef struct cmark_reference cmark_reference;
20 | 
21 | struct cmark_reference_map {
22 |   cmark_mem *mem;
23 |   cmark_reference *refs;
24 |   cmark_reference **sorted;
25 |   unsigned int size;
26 |   unsigned int ref_size;
27 |   unsigned int max_ref_size;
28 | };
29 | 
30 | typedef struct cmark_reference_map cmark_reference_map;
31 | 
32 | cmark_reference_map *cmark_reference_map_new(cmark_mem *mem);
33 | void cmark_reference_map_free(cmark_reference_map *map);
34 | cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
35 |                                         cmark_chunk *label);
36 | void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
37 |                             cmark_chunk *url, cmark_chunk *title);
38 | 
39 | #ifdef __cplusplus
40 | }
41 | #endif
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/src/render.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include "buffer.h"
  3 | #include "cmark.h"
  4 | #include "utf8.h"
  5 | #include "render.h"
  6 | #include "node.h"
  7 | #include "cmark_ctype.h"
  8 | 
  9 | static inline void S_cr(cmark_renderer *renderer) {
 10 |   if (renderer->need_cr < 1) {
 11 |     renderer->need_cr = 1;
 12 |   }
 13 | }
 14 | 
 15 | static inline void S_blankline(cmark_renderer *renderer) {
 16 |   if (renderer->need_cr < 2) {
 17 |     renderer->need_cr = 2;
 18 |   }
 19 | }
 20 | 
 21 | static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
 22 |                   cmark_escaping escape) {
 23 |   int length = (int)strlen(source);
 24 |   unsigned char nextc;
 25 |   int32_t c;
 26 |   int i = 0;
 27 |   int last_nonspace;
 28 |   int len;
 29 |   int k = renderer->buffer->size - 1;
 30 | 
 31 |   wrap = wrap && !renderer->no_linebreaks;
 32 | 
 33 |   if (renderer->in_tight_list_item && renderer->need_cr > 1) {
 34 |     renderer->need_cr = 1;
 35 |   }
 36 |   while (renderer->need_cr) {
 37 |     if (k < 0 || renderer->buffer->ptr[k] == '\n') {
 38 |       k -= 1;
 39 |     } else {
 40 |       cmark_strbuf_putc(renderer->buffer, '\n');
 41 |       if (renderer->need_cr > 1) {
 42 |         cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
 43 |                          renderer->prefix->size);
 44 |       }
 45 |     }
 46 |     renderer->column = 0;
 47 |     renderer->last_breakable = 0;
 48 |     renderer->begin_line = true;
 49 |     renderer->begin_content = true;
 50 |     renderer->need_cr -= 1;
 51 |   }
 52 | 
 53 |   while (i < length) {
 54 |     if (renderer->begin_line) {
 55 |       cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
 56 |                        renderer->prefix->size);
 57 |       // note: this assumes prefix is ascii:
 58 |       renderer->column = renderer->prefix->size;
 59 |     }
 60 | 
 61 |     len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c);
 62 |     if (len == -1) { // error condition
 63 |       return;        // return without rendering rest of string
 64 |     }
 65 |     nextc = source[i + len];
 66 |     if (c == 32 && wrap) {
 67 |       if (!renderer->begin_line) {
 68 |         last_nonspace = renderer->buffer->size;
 69 |         cmark_strbuf_putc(renderer->buffer, ' ');
 70 |         renderer->column += 1;
 71 |         renderer->begin_line = false;
 72 |         renderer->begin_content = false;
 73 |         // skip following spaces
 74 |         while (source[i + 1] == ' ') {
 75 |           i++;
 76 |         }
 77 |         // We don't allow breaks that make a digit the first character
 78 |         // because this causes problems with commonmark output.
 79 |         if (!cmark_isdigit(source[i + 1])) {
 80 |           renderer->last_breakable = last_nonspace;
 81 |         }
 82 |       }
 83 | 
 84 |     } else if (escape == LITERAL) {
 85 |       if (c == 10) {
 86 |         cmark_strbuf_putc(renderer->buffer, '\n');
 87 |         renderer->column = 0;
 88 |         renderer->begin_line = true;
 89 |         renderer->begin_content = true;
 90 |         renderer->last_breakable = 0;
 91 |       } else {
 92 |         cmark_render_code_point(renderer, c);
 93 |         renderer->begin_line = false;
 94 |         // we don't set 'begin_content' to false til we've
 95 |         // finished parsing a digit.  Reason:  in commonmark
 96 |         // we need to escape a potential list marker after
 97 |         // a digit:
 98 |         renderer->begin_content =
 99 |             renderer->begin_content && cmark_isdigit(c) == 1;
100 |       }
101 |     } else {
102 |       (renderer->outc)(renderer, escape, c, nextc);
103 |       renderer->begin_line = false;
104 |       renderer->begin_content =
105 |           renderer->begin_content && cmark_isdigit(c) == 1;
106 |     }
107 | 
108 |     // If adding the character went beyond width, look for an
109 |     // earlier place where the line could be broken:
110 |     if (renderer->width > 0 && renderer->column > renderer->width &&
111 |         !renderer->begin_line && renderer->last_breakable > 0) {
112 | 
113 |       // copy from last_breakable to remainder
114 |       unsigned char *src = renderer->buffer->ptr +
115 |                            renderer->last_breakable + 1;
116 |       bufsize_t remainder_len = renderer->buffer->size -
117 |                                 renderer->last_breakable - 1;
118 |       unsigned char *remainder =
119 |           (unsigned char *)renderer->mem->realloc(NULL, remainder_len);
120 |       memcpy(remainder, src, remainder_len);
121 |       // truncate at last_breakable
122 |       cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable);
123 |       // add newline, prefix, and remainder
124 |       cmark_strbuf_putc(renderer->buffer, '\n');
125 |       cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
126 |                        renderer->prefix->size);
127 |       cmark_strbuf_put(renderer->buffer, remainder, remainder_len);
128 |       renderer->column = renderer->prefix->size + remainder_len;
129 |       renderer->mem->free(remainder);
130 |       renderer->last_breakable = 0;
131 |       renderer->begin_line = false;
132 |       renderer->begin_content = false;
133 |     }
134 | 
135 |     i += len;
136 |   }
137 | }
138 | 
139 | // Assumes no newlines, assumes ascii content:
140 | void cmark_render_ascii(cmark_renderer *renderer, const char *s) {
141 |   int origsize = renderer->buffer->size;
142 |   cmark_strbuf_puts(renderer->buffer, s);
143 |   renderer->column += renderer->buffer->size - origsize;
144 | }
145 | 
146 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) {
147 |   cmark_utf8proc_encode_char(c, renderer->buffer);
148 |   renderer->column += 1;
149 | }
150 | 
151 | char *cmark_render(cmark_node *root, int options, int width,
152 |                    void (*outc)(cmark_renderer *, cmark_escaping, int32_t,
153 |                                 unsigned char),
154 |                    int (*render_node)(cmark_renderer *renderer,
155 |                                       cmark_node *node,
156 |                                       cmark_event_type ev_type, int options)) {
157 |   cmark_mem *mem = root->mem;
158 |   cmark_strbuf pref = CMARK_BUF_INIT(mem);
159 |   cmark_strbuf buf = CMARK_BUF_INIT(mem);
160 |   cmark_node *cur;
161 |   cmark_event_type ev_type;
162 |   char *result;
163 |   cmark_iter *iter = cmark_iter_new(root);
164 | 
165 |   cmark_renderer renderer = {options,
166 |                              mem,    &buf,    &pref,      0,      width,
167 |                              0,      0,       true,       true,   false,
168 |                              false,  NULL,
169 |                              outc,   S_cr,    S_blankline, S_out};
170 | 
171 |   while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
172 |     cur = cmark_iter_get_node(iter);
173 |     if (!render_node(&renderer, cur, ev_type, options)) {
174 |       // a false value causes us to skip processing
175 |       // the node's contents.  this is used for
176 |       // autolinks.
177 |       cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT);
178 |     }
179 |   }
180 | 
181 |   // If the root node is a block type (i.e. not inline), ensure there's a final newline:
182 |   if (cmark_node_is_block(root)) {
183 |     if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
184 |       cmark_strbuf_putc(renderer.buffer, '\n');
185 |     }
186 |   }
187 | 
188 |   result = (char *)cmark_strbuf_detach(renderer.buffer);
189 | 
190 |   cmark_iter_free(iter);
191 |   cmark_strbuf_free(renderer.prefix);
192 |   cmark_strbuf_free(renderer.buffer);
193 | 
194 |   return result;
195 | }
196 | 


--------------------------------------------------------------------------------
/src/render.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_RENDER_H
 2 | #define CMARK_RENDER_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #include <stdbool.h>
 9 | #include <stdlib.h>
10 | 
11 | #include "buffer.h"
12 | 
13 | typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping;
14 | 
15 | struct block_number {
16 |   int number;
17 |   struct block_number *parent;
18 | };
19 | 
20 | struct cmark_renderer {
21 |   int options;
22 |   cmark_mem *mem;
23 |   cmark_strbuf *buffer;
24 |   cmark_strbuf *prefix;
25 |   int column;
26 |   int width;
27 |   int need_cr;
28 |   bufsize_t last_breakable;
29 |   bool begin_line;
30 |   bool begin_content;
31 |   bool no_linebreaks;
32 |   bool in_tight_list_item;
33 |   struct block_number *block_number_in_list_item;
34 |   void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char);
35 |   void (*cr)(struct cmark_renderer *);
36 |   void (*blankline)(struct cmark_renderer *);
37 |   void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping);
38 | };
39 | 
40 | typedef struct cmark_renderer cmark_renderer;
41 | 
42 | void cmark_render_ascii(cmark_renderer *renderer, const char *s);
43 | 
44 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c);
45 | 
46 | char *cmark_render(cmark_node *root, int options, int width,
47 |                    void (*outc)(cmark_renderer *, cmark_escaping, int32_t,
48 |                                 unsigned char),
49 |                    int (*render_node)(cmark_renderer *renderer,
50 |                                       cmark_node *node,
51 |                                       cmark_event_type ev_type, int options));
52 | 
53 | #ifdef __cplusplus
54 | }
55 | #endif
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/src/scanners.h:
--------------------------------------------------------------------------------
 1 | #include "cmark.h"
 2 | #include "chunk.h"
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
 9 |                    bufsize_t offset);
10 | bufsize_t _scan_scheme(const unsigned char *p);
11 | bufsize_t _scan_autolink_uri(const unsigned char *p);
12 | bufsize_t _scan_autolink_email(const unsigned char *p);
13 | bufsize_t _scan_html_tag(const unsigned char *p);
14 | bufsize_t _scan_html_comment(const unsigned char *p);
15 | bufsize_t _scan_html_pi(const unsigned char *p);
16 | bufsize_t _scan_html_declaration(const unsigned char *p);
17 | bufsize_t _scan_html_cdata(const unsigned char *p);
18 | bufsize_t _scan_html_block_start(const unsigned char *p);
19 | bufsize_t _scan_html_block_start_7(const unsigned char *p);
20 | bufsize_t _scan_html_block_end_1(const unsigned char *p);
21 | bufsize_t _scan_html_block_end_2(const unsigned char *p);
22 | bufsize_t _scan_html_block_end_3(const unsigned char *p);
23 | bufsize_t _scan_html_block_end_4(const unsigned char *p);
24 | bufsize_t _scan_html_block_end_5(const unsigned char *p);
25 | bufsize_t _scan_link_title(const unsigned char *p);
26 | bufsize_t _scan_spacechars(const unsigned char *p);
27 | bufsize_t _scan_atx_heading_start(const unsigned char *p);
28 | bufsize_t _scan_setext_heading_line(const unsigned char *p);
29 | bufsize_t _scan_open_code_fence(const unsigned char *p);
30 | bufsize_t _scan_close_code_fence(const unsigned char *p);
31 | bufsize_t _scan_dangerous_url(const unsigned char *p);
32 | 
33 | #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
34 | #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
35 | #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
36 | #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n)
37 | #define scan_html_comment(c, n) _scan_at(&_scan_html_comment, c, n)
38 | #define scan_html_pi(c, n) _scan_at(&_scan_html_pi, c, n)
39 | #define scan_html_declaration(c, n) _scan_at(&_scan_html_declaration, c, n)
40 | #define scan_html_cdata(c, n) _scan_at(&_scan_html_cdata, c, n)
41 | #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n)
42 | #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n)
43 | #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n)
44 | #define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n)
45 | #define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n)
46 | #define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n)
47 | #define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n)
48 | #define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
49 | #define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
50 | #define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
51 | #define scan_setext_heading_line(c, n)                                         \
52 |   _scan_at(&_scan_setext_heading_line, c, n)
53 | #define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
54 | #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
55 | #define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n)
56 | 
57 | #ifdef __cplusplus
58 | }
59 | #endif
60 | 


--------------------------------------------------------------------------------
/src/utf8.h:
--------------------------------------------------------------------------------
 1 | #ifndef CMARK_UTF8_H
 2 | #define CMARK_UTF8_H
 3 | 
 4 | #include <stdint.h>
 5 | #include "buffer.h"
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
12 |                               bufsize_t len);
13 | void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
14 | int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
15 | void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
16 |                           bufsize_t size);
17 | int cmark_utf8proc_is_space(int32_t uc);
18 | int cmark_utf8proc_is_punctuation_or_symbol(int32_t uc);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/xml.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <stdbool.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | 
  7 | #include "cmark.h"
  8 | #include "node.h"
  9 | #include "buffer.h"
 10 | 
 11 | #define BUFFER_SIZE 100
 12 | #define MAX_INDENT 40
 13 | 
 14 | // Functions to convert cmark_nodes to XML strings.
 15 | 
 16 | // C0 control characters, U+FFFE and U+FFF aren't allowed in XML.
 17 | static const char XML_ESCAPE_TABLE[256] = {
 18 |     /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
 19 |     /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 20 |     /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 21 |     /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
 22 |     /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 23 |     /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 24 |     /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 25 |     /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 26 |     /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 27 |     /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 28 |     /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 29 |     /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9,
 30 |     /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 31 |     /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 32 |     /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 33 |     /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 34 | };
 35 | 
 36 | // U+FFFD Replacement Character encoded in UTF-8
 37 | #define UTF8_REPL "\xEF\xBF\xBD"
 38 | 
 39 | static const char *XML_ESCAPES[] = {
 40 |   "", UTF8_REPL, "&quot;", "&amp;", "&lt;", "&gt;"
 41 | };
 42 | 
 43 | static void escape_xml(cmark_strbuf *ob, const unsigned char *src,
 44 |                        bufsize_t size) {
 45 |   bufsize_t i = 0, org, esc = 0;
 46 | 
 47 |   while (i < size) {
 48 |     org = i;
 49 |     while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0)
 50 |       i++;
 51 | 
 52 |     if (i > org)
 53 |       cmark_strbuf_put(ob, src + org, i - org);
 54 | 
 55 |     if (i >= size)
 56 |       break;
 57 | 
 58 |     if (esc == 9) {
 59 |       // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to
 60 |       // be changed.
 61 |       // We know that src[i] is 0xBE or 0xBF.
 62 |       if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) {
 63 |         cmark_strbuf_putc(ob, 0xBD);
 64 |       } else {
 65 |         cmark_strbuf_putc(ob, src[i]);
 66 |       }
 67 |     } else {
 68 |       cmark_strbuf_puts(ob, XML_ESCAPES[esc]);
 69 |     }
 70 | 
 71 |     i++;
 72 |   }
 73 | }
 74 | 
 75 | static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) {
 76 |   if (source)
 77 |     escape_xml(dest, source, (bufsize_t)strlen((char *)source));
 78 | }
 79 | 
 80 | struct render_state {
 81 |   cmark_strbuf *xml;
 82 |   int indent;
 83 | };
 84 | 
 85 | static inline void indent(struct render_state *state) {
 86 |   int i;
 87 |   for (i = 0; i < state->indent && i < MAX_INDENT; i++) {
 88 |     cmark_strbuf_putc(state->xml, ' ');
 89 |   }
 90 | }
 91 | 
 92 | static int S_render_node(cmark_node *node, cmark_event_type ev_type,
 93 |                          struct render_state *state, int options) {
 94 |   cmark_strbuf *xml = state->xml;
 95 |   bool literal = false;
 96 |   cmark_delim_type delim;
 97 |   bool entering = (ev_type == CMARK_EVENT_ENTER);
 98 |   char buffer[BUFFER_SIZE];
 99 | 
100 |   if (entering) {
101 |     indent(state);
102 |     cmark_strbuf_putc(xml, '<');
103 |     cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
104 | 
105 |     if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
106 |       snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
107 |                node->start_line, node->start_column, node->end_line,
108 |                node->end_column);
109 |       cmark_strbuf_puts(xml, buffer);
110 |     }
111 | 
112 |     literal = false;
113 | 
114 |     switch (node->type) {
115 |     case CMARK_NODE_DOCUMENT:
116 |       cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
117 |       break;
118 |     case CMARK_NODE_TEXT:
119 |     case CMARK_NODE_CODE:
120 |     case CMARK_NODE_HTML_BLOCK:
121 |     case CMARK_NODE_HTML_INLINE:
122 |       cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
123 |       escape_xml(xml, node->data, node->len);
124 |       cmark_strbuf_puts(xml, "</");
125 |       cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
126 |       literal = true;
127 |       break;
128 |     case CMARK_NODE_LIST:
129 |       switch (cmark_node_get_list_type(node)) {
130 |       case CMARK_ORDERED_LIST:
131 |         cmark_strbuf_puts(xml, " type=\"ordered\"");
132 |         snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
133 |                  cmark_node_get_list_start(node));
134 |         cmark_strbuf_puts(xml, buffer);
135 |         delim = cmark_node_get_list_delim(node);
136 |         if (delim == CMARK_PAREN_DELIM) {
137 |           cmark_strbuf_puts(xml, " delim=\"paren\"");
138 |         } else if (delim == CMARK_PERIOD_DELIM) {
139 |           cmark_strbuf_puts(xml, " delim=\"period\"");
140 |         }
141 |         break;
142 |       case CMARK_BULLET_LIST:
143 |         cmark_strbuf_puts(xml, " type=\"bullet\"");
144 |         break;
145 |       default:
146 |         break;
147 |       }
148 |       snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
149 |                (cmark_node_get_list_tight(node) ? "true" : "false"));
150 |       cmark_strbuf_puts(xml, buffer);
151 |       break;
152 |     case CMARK_NODE_HEADING:
153 |       snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
154 |       cmark_strbuf_puts(xml, buffer);
155 |       break;
156 |     case CMARK_NODE_CODE_BLOCK:
157 |       if (node->as.code.info) {
158 |         cmark_strbuf_puts(xml, " info=\"");
159 |         escape_xml_str(xml, node->as.code.info);
160 |         cmark_strbuf_putc(xml, '"');
161 |       }
162 |       cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
163 |       escape_xml(xml, node->data, node->len);
164 |       cmark_strbuf_puts(xml, "</");
165 |       cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
166 |       literal = true;
167 |       break;
168 |     case CMARK_NODE_CUSTOM_BLOCK:
169 |     case CMARK_NODE_CUSTOM_INLINE:
170 |       cmark_strbuf_puts(xml, " on_enter=\"");
171 |       escape_xml_str(xml, node->as.custom.on_enter);
172 |       cmark_strbuf_putc(xml, '"');
173 |       cmark_strbuf_puts(xml, " on_exit=\"");
174 |       escape_xml_str(xml, node->as.custom.on_exit);
175 |       cmark_strbuf_putc(xml, '"');
176 |       break;
177 |     case CMARK_NODE_LINK:
178 |     case CMARK_NODE_IMAGE:
179 |       cmark_strbuf_puts(xml, " destination=\"");
180 |       escape_xml_str(xml, node->as.link.url);
181 |       cmark_strbuf_putc(xml, '"');
182 |       if (node->as.link.title) {
183 |         cmark_strbuf_puts(xml, " title=\"");
184 |         escape_xml_str(xml, node->as.link.title);
185 |         cmark_strbuf_putc(xml, '"');
186 |       }
187 |       break;
188 |     default:
189 |       break;
190 |     }
191 |     if (node->first_child) {
192 |       state->indent += 2;
193 |     } else if (!literal) {
194 |       cmark_strbuf_puts(xml, " /");
195 |     }
196 |     cmark_strbuf_puts(xml, ">\n");
197 | 
198 |   } else if (node->first_child) {
199 |     state->indent -= 2;
200 |     indent(state);
201 |     cmark_strbuf_puts(xml, "</");
202 |     cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
203 |     cmark_strbuf_puts(xml, ">\n");
204 |   }
205 | 
206 |   return 1;
207 | }
208 | 
209 | char *cmark_render_xml(cmark_node *root, int options) {
210 |   char *result;
211 |   cmark_strbuf xml = CMARK_BUF_INIT(root->mem);
212 |   cmark_event_type ev_type;
213 |   cmark_node *cur;
214 |   struct render_state state = {&xml, 0};
215 | 
216 |   cmark_iter *iter = cmark_iter_new(root);
217 | 
218 |   cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
219 |   cmark_strbuf_puts(state.xml,
220 |                     "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
221 |   while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
222 |     cur = cmark_iter_get_node(iter);
223 |     S_render_node(cur, ev_type, &state, options);
224 |   }
225 |   result = (char *)cmark_strbuf_detach(&xml);
226 | 
227 |   cmark_iter_free(iter);
228 |   return result;
229 | }
230 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # To get verbose output: cmake --build build --target "test" -- ARGS='-V'
 2 | 
 3 | # By default, we run the spec tests only if python3 is available.
 4 | # To require the spec tests, compile with -DSPEC_TESTS=1
 5 | 
 6 | if(SPEC_TESTS)
 7 |   set(PYTHON_REQUIRED REQUIRED)
 8 | else()
 9 |   set(PYTHON_REQUIRED)
10 | endif()
11 | 
12 | if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12)
13 |   find_package(Python3 ${PYTHON_REQUIRED} COMPONENTS Interpreter)
14 | else()
15 |   find_package(PythonInterp 3 ${PYTHON_REQUIRED})
16 |   set(Python3_Interpreter_FOUND ${PYTHONINTERP_FOUND})
17 |   add_executable(Python3::Interpreter IMPORTED)
18 |   set_target_properties(Python3::Interpreter PROPERTIES
19 |     IMPORTED_LOCATION ${PYTHON_EXECUTABLE})
20 | endif()
21 | 
22 | IF (Python3_Interpreter_FOUND)
23 | 
24 |   add_test(NAME html_normalization
25 |            COMMAND "$<TARGET_FILE:Python3::Interpreter>" -m doctest "${CMAKE_CURRENT_SOURCE_DIR}/normalize.py")
26 | 
27 |   if(BUILD_SHARED_LIBS)
28 |     add_test(NAME spectest_library
29 |              COMMAND "$<TARGET_FILE:Python3::Interpreter>" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
30 |                                                            --no-normalize
31 |                                                            --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
32 |                                                            --library-dir "$<TARGET_FILE_DIR:cmark>")
33 | 
34 |     add_test(NAME pathological_tests_library
35 |              COMMAND "$<TARGET_FILE:Python3::Interpreter>" "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py"
36 |                                                            --library-dir "$<TARGET_FILE_DIR:cmark>")
37 | 
38 |     add_test(NAME roundtriptest_library
39 |              COMMAND "$<TARGET_FILE:Python3::Interpreter>" "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py"
40 |                                                            --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
41 |                                                            --library-dir "$<TARGET_FILE_DIR:cmark>")
42 | 
43 |     add_test(NAME entity_library
44 |              COMMAND "$<TARGET_FILE:Python3::Interpreter>" "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py"
45 |                                                            --library-dir "$<TARGET_FILE_DIR:cmark>")
46 |   endif()
47 | 
48 |   add_test(NAME spectest_executable
49 |            COMMAND "$<TARGET_FILE:Python3::Interpreter>" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
50 |                                                          --no-normalize
51 |                                                          --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt"
52 |                                                          --program "$<TARGET_FILE:cmark_exe>")
53 | 
54 |   add_test(NAME smartpuncttest_executable
55 |            COMMAND "$<TARGET_FILE:Python3::Interpreter>" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
56 |                                                          --no-normalize
57 |                                                          --spec "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt"
58 |                                                          --program "$<TARGET_FILE:cmark_exe> --smart")
59 | 
60 |   add_test(NAME regressiontest_executable
61 |            COMMAND "$<TARGET_FILE:Python3::Interpreter>" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py"
62 |                                                          --no-normalize
63 |                                                          --spec "${CMAKE_CURRENT_SOURCE_DIR}/regression.txt"
64 |                                                          --program "$<TARGET_FILE:cmark_exe>")
65 | 
66 | ELSE(Python3_Interpreter_FOUND)
67 | 
68 |   message(WARNING "A Python 3 Interpreter is required to run the spec tests")
69 | 
70 | ENDIF(Python3_Interpreter_FOUND)
71 | 


--------------------------------------------------------------------------------
/test/cmark.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from ctypes import *
 5 | from subprocess import Popen, PIPE
 6 | import platform
 7 | import os
 8 | 
 9 | class cmark_mem(Structure):
10 |     _fields_ = [("calloc", c_void_p),
11 |                 ("realloc", c_void_p),
12 |                 ("free", CFUNCTYPE(None, c_void_p))]
13 | 
14 | def pipe_through_prog(prog, text):
15 |     p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
16 |     [result, err] = p1.communicate(input=text.encode('utf-8'))
17 |     return [p1.returncode, result.decode('utf-8'), err]
18 | 
19 | def to_html(lib, text):
20 |     get_alloc = lib.cmark_get_default_mem_allocator
21 |     get_alloc.restype = POINTER(cmark_mem)
22 |     free_func = get_alloc().contents.free
23 | 
24 |     markdown = lib.cmark_markdown_to_html
25 |     markdown.restype = POINTER(c_char)
26 |     markdown.argtypes = [c_char_p, c_size_t, c_int]
27 | 
28 |     textbytes = text.encode('utf-8')
29 |     textlen = len(textbytes)
30 |     # 1 << 17 == CMARK_OPT_UNSAFE
31 |     cstring = markdown(textbytes, textlen, 1 << 17)
32 |     result = string_at(cstring).decode('utf-8')
33 |     free_func(cstring)
34 | 
35 |     return [0, result, '']
36 | 
37 | def to_commonmark(lib, text):
38 |     get_alloc = lib.cmark_get_default_mem_allocator
39 |     get_alloc.restype = POINTER(cmark_mem)
40 |     free_func = get_alloc().contents.free
41 | 
42 |     parse_document = lib.cmark_parse_document
43 |     parse_document.restype = c_void_p
44 |     parse_document.argtypes = [c_char_p, c_size_t, c_int]
45 | 
46 |     render_commonmark = lib.cmark_render_commonmark
47 |     render_commonmark.restype = POINTER(c_char)
48 |     render_commonmark.argtypes = [c_void_p, c_int, c_int]
49 | 
50 |     free_node = lib.cmark_node_free
51 |     free_node.argtypes = [c_void_p]
52 | 
53 |     textbytes = text.encode('utf-8')
54 |     textlen = len(textbytes)
55 |     node = parse_document(textbytes, textlen, 0)
56 |     cstring = render_commonmark(node, 0, 0)
57 |     result = string_at(cstring).decode('utf-8')
58 |     free_func(cstring)
59 |     free_node(node)
60 | 
61 |     return [0, result, '']
62 | 
63 | class CMark:
64 |     def __init__(self, prog=None, library_dir=None):
65 |         self.prog = prog
66 |         if prog:
67 |             prog += ' --unsafe'
68 |             self.to_html = lambda x: pipe_through_prog(prog, x)
69 |             self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x)
70 |         else:
71 |             sysname = platform.system()
72 |             if sysname == 'Darwin':
73 |                 libnames = [ "libcmark.dylib" ]
74 |             elif sysname == 'Windows':
75 |                 libnames = [ "cmark.dll", "libcmark.dll" ]
76 |             else:
77 |                 libnames = [ "libcmark.so" ]
78 |             if not library_dir:
79 |                 library_dir = os.path.join("build", "src")
80 |             for libname in libnames:
81 |                 candidate = os.path.join(library_dir, libname)
82 |                 if os.path.isfile(candidate):
83 |                     libpath = candidate
84 |                     break
85 |             cmark = CDLL(libpath)
86 |             self.to_html = lambda x: to_html(cmark, x)
87 |             self.to_commonmark = lambda x: to_commonmark(cmark, x)
88 | 
89 | 


--------------------------------------------------------------------------------
/test/entity_tests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import re
 5 | import os
 6 | import argparse
 7 | import sys
 8 | import platform
 9 | import html
10 | from cmark import CMark
11 | 
12 | def get_entities():
13 |     regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}'
14 |     with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f:
15 |         code = f.read()
16 |     entities = []
17 |     for entity, utf8 in re.findall(regex, code, re.MULTILINE):
18 |         utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8')
19 |         entities.append((entity, utf8))
20 |     return entities
21 | 
22 | parser = argparse.ArgumentParser(description='Run cmark tests.')
23 | parser.add_argument('--program', dest='program', nargs='?', default=None,
24 |         help='program to test')
25 | parser.add_argument('--library-dir', dest='library_dir', nargs='?',
26 |         default=None, help='directory containing dynamic library')
27 | args = parser.parse_args(sys.argv[1:])
28 | 
29 | cmark = CMark(prog=args.program, library_dir=args.library_dir)
30 | 
31 | entities = get_entities()
32 | 
33 | passed = 0
34 | errored = 0
35 | failed = 0
36 | 
37 | exceptions = {
38 |     'quot': '&quot;',
39 |     'QUOT': '&quot;',
40 | 
41 |     # These are broken, but I'm not too worried about them.
42 |     'nvlt': '&lt;⃒',
43 |     'nvgt': '&gt;⃒',
44 | }
45 | 
46 | print("Testing entities:")
47 | for entity, utf8 in entities:
48 |     [rc, actual, err] = cmark.to_html("&{};".format(entity))
49 |     check = exceptions.get(entity, utf8)
50 | 
51 |     if rc != 0:
52 |         errored += 1
53 |         print(entity, '[ERRORED (return code {})]'.format(rc))
54 |         print(err)
55 |     elif check in actual:
56 |         # print(entity, '[PASSED]') # omit noisy success output
57 |         passed += 1
58 |     else:
59 |         print(entity, '[FAILED]')
60 |         print(repr(actual))
61 |         failed += 1
62 | 
63 | print("{} passed, {} failed, {} errored".format(passed, failed, errored))
64 | if failed == 0 and errored == 0:
65 |     exit(0)
66 | else:
67 |     exit(1)
68 | 


--------------------------------------------------------------------------------
/test/normalize.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from html.parser import HTMLParser
  3 | import urllib
  4 | 
  5 | try:
  6 |     from html.parser import HTMLParseError
  7 | except ImportError:
  8 |     # HTMLParseError was removed in Python 3.5. It could never be
  9 |     # thrown, so we define a placeholder instead.
 10 |     class HTMLParseError(Exception):
 11 |         pass
 12 | 
 13 | from html.entities import name2codepoint
 14 | import sys
 15 | import re
 16 | import html
 17 | 
 18 | # Normalization code, adapted from
 19 | # https://github.com/karlcow/markdown-testsuite/
 20 | significant_attrs = ["alt", "href", "src", "title"]
 21 | whitespace_re = re.compile('\s+')
 22 | class MyHTMLParser(HTMLParser):
 23 |     def __init__(self):
 24 |         HTMLParser.__init__(self)
 25 |         self.convert_charrefs = False
 26 |         self.last = "starttag"
 27 |         self.in_pre = False
 28 |         self.output = ""
 29 |         self.last_tag = ""
 30 |     def handle_data(self, data):
 31 |         after_tag = self.last == "endtag" or self.last == "starttag"
 32 |         after_block_tag = after_tag and self.is_block_tag(self.last_tag)
 33 |         if after_tag and self.last_tag == "br":
 34 |             data = data.lstrip('\n')
 35 |         if not self.in_pre:
 36 |             data = whitespace_re.sub(' ', data)
 37 |         if after_block_tag and not self.in_pre:
 38 |             if self.last == "starttag":
 39 |                 data = data.lstrip()
 40 |             elif self.last == "endtag":
 41 |                 data = data.strip()
 42 |         self.output += data
 43 |         self.last = "data"
 44 |     def handle_endtag(self, tag):
 45 |         if tag == "pre":
 46 |             self.in_pre = False
 47 |         elif self.is_block_tag(tag):
 48 |             self.output = self.output.rstrip()
 49 |         self.output += "</" + tag + ">"
 50 |         self.last_tag = tag
 51 |         self.last = "endtag"
 52 |     def handle_starttag(self, tag, attrs):
 53 |         if tag == "pre":
 54 |             self.in_pre = True
 55 |         if self.is_block_tag(tag):
 56 |             self.output = self.output.rstrip()
 57 |         self.output += "<" + tag
 58 |         # For now we don't strip out 'extra' attributes, because of
 59 |         # raw HTML test cases.
 60 |         # attrs = filter(lambda attr: attr[0] in significant_attrs, attrs)
 61 |         if attrs:
 62 |             attrs.sort()
 63 |             for (k,v) in attrs:
 64 |                 self.output += " " + k
 65 |                 if v in ['href','src']:
 66 |                     self.output += ("=" + '"' +
 67 |                             urllib.quote(urllib.unquote(v), safe='/') + '"')
 68 |                 elif v != None:
 69 |                     self.output += ("=" + '"' + html.escape(v,quote=True) + '"')
 70 |         self.output += ">"
 71 |         self.last_tag = tag
 72 |         self.last = "starttag"
 73 |     def handle_startendtag(self, tag, attrs):
 74 |         """Ignore closing tag for self-closing """
 75 |         self.handle_starttag(tag, attrs)
 76 |         self.last_tag = tag
 77 |         self.last = "endtag"
 78 |     def handle_comment(self, data):
 79 |         self.output += '<!--' + data + '-->'
 80 |         self.last = "comment"
 81 |     def handle_decl(self, data):
 82 |         self.output += '<!' + data + '>'
 83 |         self.last = "decl"
 84 |     def unknown_decl(self, data):
 85 |         self.output += '<!' + data + '>'
 86 |         self.last = "decl"
 87 |     def handle_pi(self,data):
 88 |         self.output += '<?' + data + '>'
 89 |         self.last = "pi"
 90 |     def handle_entityref(self, name):
 91 |         try:
 92 |             c = chr(name2codepoint[name])
 93 |         except KeyError:
 94 |             c = None
 95 |         self.output_char(c, '&' + name + ';')
 96 |         self.last = "ref"
 97 |     def handle_charref(self, name):
 98 |         try:
 99 |             if name.startswith("x"):
100 |                 c = chr(int(name[1:], 16))
101 |             else:
102 |                 c = chr(int(name))
103 |         except ValueError:
104 |                 c = None
105 |         self.output_char(c, '&' + name + ';')
106 |         self.last = "ref"
107 |     # Helpers.
108 |     def output_char(self, c, fallback):
109 |         if c == '<':
110 |             self.output += "&lt;"
111 |         elif c == '>':
112 |             self.output += "&gt;"
113 |         elif c == '&':
114 |             self.output += "&amp;"
115 |         elif c == '"':
116 |             self.output += "&quot;"
117 |         elif c == None:
118 |             self.output += fallback
119 |         else:
120 |             self.output += c
121 | 
122 |     def is_block_tag(self,tag):
123 |         return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote',
124 |             'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas',
125 |             'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd',
126 |             'progress', 'div', 'section', 'dl', 'table', 'td', 'dt',
127 |             'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption',
128 |             'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul',
129 |             'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style'])
130 | 
131 | def normalize_html(html):
132 |     r"""
133 |     Return normalized form of HTML which ignores insignificant output
134 |     differences:
135 | 
136 |     Multiple inner whitespaces are collapsed to a single space (except
137 |     in pre tags):
138 | 
139 |         >>> normalize_html("<p>a  \t b</p>")
140 |         '<p>a b</p>'
141 | 
142 |         >>> normalize_html("<p>a  \t\nb</p>")
143 |         '<p>a b</p>'
144 | 
145 |     * Whitespace surrounding block-level tags is removed.
146 | 
147 |         >>> normalize_html("<p>a  b</p>")
148 |         '<p>a b</p>'
149 | 
150 |         >>> normalize_html(" <p>a  b</p>")
151 |         '<p>a b</p>'
152 | 
153 |         >>> normalize_html("<p>a  b</p> ")
154 |         '<p>a b</p>'
155 | 
156 |         >>> normalize_html("\n\t<p>\n\t\ta  b\t\t</p>\n\t")
157 |         '<p>a b</p>'
158 | 
159 |         >>> normalize_html("<i>a  b</i> ")
160 |         '<i>a b</i> '
161 | 
162 |     * Self-closing tags are converted to open tags.
163 | 
164 |         >>> normalize_html("<br />")
165 |         '<br>'
166 | 
167 |     * Attributes are sorted and lowercased.
168 | 
169 |         >>> normalize_html('<a title="bar" HREF="foo">x</a>')
170 |         '<a href="foo" title="bar">x</a>'
171 | 
172 |     * References are converted to unicode, except that '<', '>', '&', and
173 |       '"' are rendered using entities.
174 | 
175 |         >>> normalize_html("&forall;&amp;&gt;&lt;&quot;")
176 |         '\u2200&amp;&gt;&lt;&quot;'
177 | 
178 |     """
179 |     html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
180 |     try:
181 |         parser = MyHTMLParser()
182 |         # We work around HTMLParser's limitations parsing CDATA
183 |         # by breaking the input into chunks and passing CDATA chunks
184 |         # through verbatim.
185 |         for chunk in re.finditer(html_chunk_re, html):
186 |             if chunk.group(0)[:8] == "<![CDATA":
187 |                 parser.output += chunk.group(0)
188 |             else:
189 |                 parser.feed(chunk.group(0))
190 |         parser.close()
191 |         return parser.output
192 |     except HTMLParseError as e:
193 |         sys.stderr.write("Normalization error: " + e.msg + "\n")
194 |         return html  # on error, return unnormalized HTML
195 | 


--------------------------------------------------------------------------------
/test/pathological_tests.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import re
  5 | import argparse
  6 | import sys
  7 | import platform
  8 | import itertools
  9 | import multiprocessing
 10 | import queue
 11 | import time
 12 | from cmark import CMark
 13 | 
 14 | TIMEOUT = 5
 15 | 
 16 | parser = argparse.ArgumentParser(description='Run cmark tests.')
 17 | parser.add_argument('--program', dest='program', nargs='?', default=None,
 18 |         help='program to test')
 19 | parser.add_argument('--library-dir', dest='library_dir', nargs='?',
 20 |         default=None, help='directory containing dynamic library')
 21 | args = parser.parse_args(sys.argv[1:])
 22 | 
 23 | allowed_failures = {"many references": True}
 24 | 
 25 | cmark = CMark(prog=args.program, library_dir=args.library_dir)
 26 | 
 27 | def hash_collisions():
 28 |     REFMAP_SIZE = 16
 29 |     COUNT = 25000
 30 | 
 31 |     def badhash(ref):
 32 |         h = 0
 33 |         for c in ref:
 34 |             a = (h << 6) & 0xFFFFFFFF
 35 |             b = (h << 16) & 0xFFFFFFFF
 36 |             h = ord(c) + a + b - h
 37 |             h = h & 0xFFFFFFFF
 38 | 
 39 |         return (h % REFMAP_SIZE) == 0
 40 | 
 41 |     keys = ("x%d" % i for i in itertools.count())
 42 |     collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
 43 |     bad_key = next(collisions)
 44 | 
 45 |     document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
 46 | 
 47 |     return document, re.compile(r"(<p>\[%s]</p>\n){%d}" % (bad_key, COUNT-1))
 48 | 
 49 | 
 50 | # list of pairs consisting of input and a regex that must match the output.
 51 | pathological = {
 52 |     # note - some pythons have limit of 65535 for {num-matches} in re.
 53 |     "nested strong emph":
 54 |                 (("*a **a " * 32500) + "b" + (" a** a*" * 32500),
 55 |                  re.compile("(<em>a <strong>a ){32500}b( a</strong> a</em>){32500}")),
 56 |     "many emph closers with no openers":
 57 |                  (("a_ " * 32500),
 58 |                   re.compile("(a[_] ){32499}a_")),
 59 |     "many emph openers with no closers":
 60 |                  (("_a " * 32500),
 61 |                   re.compile("(_a ){32499}_a")),
 62 |     "many link closers with no openers":
 63 |                  (("a]" * 32500),
 64 |                   re.compile("(a\\]){32500}")),
 65 |     "many link openers with no closers":
 66 |                  (("[a" * 32500),
 67 |                   re.compile("(\\[a){32500}")),
 68 |     "mismatched openers and closers":
 69 |                  (("*a_ " * 25000),
 70 |                   re.compile("([*]a[_] ){24999}[*]a_")),
 71 |     "issue #389":
 72 |                  (("*a " * 20000 + "_a*_ " * 20000),
 73 |                   re.compile("(<em>a ){20000}(_a<\\/em>_ ?){20000}")),
 74 |     "openers and closers multiple of 3":
 75 |                  (("a**b" + ("c* " * 25000)),
 76 |                   re.compile("a[*][*]b(c[*] ){24999}c[*]")),
 77 |     "link openers and emph closers":
 78 |                  (("[ a_" * 25000),
 79 |                   re.compile("(\\[ a_){25000}")),
 80 |     "pattern [ (]( repeated":
 81 |                  (("[ (](" * 40000),
 82 |                   re.compile("(\\[ \\(\\]\\(){40000}")),
 83 |     "pattern ![[]() repeated":
 84 |                  ("![[]()" * 160000,
 85 |                   re.compile("(!\\[<a href=\"\"></a>){160000}")),
 86 |     "hard link/emph case":
 87 |                  ("**x [a*b**c*](d)",
 88 |                   re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
 89 |     "nested brackets":
 90 |                  (("[" * 25000) + "a" + ("]" * 25000),
 91 |                   re.compile("\\[{25000}a\\]{25000}")),
 92 |     "nested block quotes":
 93 |                  ((("> " * 25000) + "a"),
 94 |                   re.compile("(<blockquote>\n){25000}")),
 95 |     "deeply nested lists":
 96 |                  ("".join(map(lambda x: ("  " * x + "* a\n"), range(0,500))),
 97 |                   re.compile("<ul>\n(<li>a\n<ul>\n){499}<li>a</li>\n</ul>\n(</li>\n</ul>\n){499}")),
 98 |     "U+0000 in input":
 99 |                  ("abc\u0000de\u0000",
100 |                   re.compile("abc\ufffd?de\ufffd?")),
101 |     "backticks":
102 |                  ("".join(map(lambda x: ("e" + "`" * x), range(1,2500))),
103 |                   re.compile("^<p>[e`]*</p>\n$")),
104 |     "unclosed links A":
105 |                  ("[a](<b" * 30000,
106 |                   re.compile(r"(\[a]\(&lt;b){30000}")),
107 |     "unclosed links B":
108 |                  ("[a](b" * 30000,
109 |                   re.compile(r"(\[a]\(b){30000}")),
110 |     "unclosed <!--":
111 |                  ("</" + "<!--" * 300000,
112 |                   re.compile("\\&lt;\\/(\\&lt;!--){300000}")),
113 |     "empty lines in deeply nested lists":
114 |                  ("- " * 30000 + "x" + "\n" * 30000,
115 |                   re.compile(r"^(<\w+>\n?)+x(</\w+>\n)+$")),
116 |     "empty lines in deeply nested lists in blockquote":
117 |                  ("> " + "- " * 30000 + "x\n" + ">\n" * 30000,
118 |                   re.compile(r"^(<\w+>\n?)+x(</\w+>\n)+$")),
119 |     "emph in deep blockquote":
120 |                  (">" * 100000 + "a*" * 100000,
121 |                   re.compile(r"^(<\w+>\n)+<p>.*</p>\n(</\w+>\n)+$")),
122 |     "reference collisions": hash_collisions()
123 | #    "many references":
124 | #                 ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000,
125 | #                  re.compile("(\\[0\\] ){4999}"))
126 |     }
127 | 
128 | pathological_cmark = {
129 |     "nested inlines":
130 |                  ("*" * 20000 + "a" + "*" * 20000,
131 |                   re.compile("^\\*+a\\*+$")),
132 |     }
133 | 
134 | whitespace_re = re.compile('/s+/')
135 | 
136 | def run_pathological(q, inp):
137 |     q.put(cmark.to_html(inp))
138 | 
139 | def run_pathological_cmark(q, inp):
140 |     q.put(cmark.to_commonmark(inp))
141 | 
142 | def run_tests():
143 |     q = multiprocessing.Queue()
144 |     passed = []
145 |     errored = []
146 |     failed = []
147 |     ignored = []
148 | 
149 |     print("Testing pathological cases:")
150 |     for description in (*pathological, *pathological_cmark):
151 |         if description in pathological:
152 |             (inp, regex) = pathological[description]
153 |             p = multiprocessing.Process(target=run_pathological,
154 |                       args=(q, inp))
155 |         else:
156 |             (inp, regex) = pathological_cmark[description]
157 |             p = multiprocessing.Process(target=run_pathological_cmark,
158 |                       args=(q, inp))
159 |         p.start()
160 |         try:
161 |             # wait TIMEOUT seconds or until it finishes
162 |             rc, actual, err = q.get(True, TIMEOUT)
163 |             p.join()
164 |             if rc != 0:
165 |                 print(description, '[ERRORED (return code %d)]' %rc)
166 |                 print(err)
167 |                 if description in allowed_failures:
168 |                     ignored.append(description)
169 |                 else:
170 |                     errored.append(description)
171 |             elif regex.search(actual):
172 |                 print(description, '[PASSED]')
173 |                 passed.append(description)
174 |             else:
175 |                 print(description, '[FAILED]')
176 |                 print(repr(actual[:60]))
177 |                 if description in allowed_failures:
178 |                     ignored.append(description)
179 |                 else:
180 |                     failed.append(description)
181 |         except queue.Empty:
182 |             p.terminate()
183 |             p.join()
184 |             print(description, '[TIMEOUT]')
185 |             if description in allowed_failures:
186 |                 ignored.append(description)
187 |             else:
188 |                 errored.append(description)
189 | 
190 |     print("%d passed, %d failed, %d errored" %
191 |           (len(passed), len(failed), len(errored)))
192 |     if ignored:
193 |         print("Ignoring these allowed failures:")
194 |         for x in ignored:
195 |             print(x)
196 |     if failed or errored:
197 |         exit(1)
198 |     else:
199 |         exit(0)
200 | 
201 | if __name__ == "__main__":
202 |     run_tests()
203 | 


--------------------------------------------------------------------------------
/test/regression.txt:
--------------------------------------------------------------------------------
  1 | ### Regression tests
  2 | 
  3 | Issue #113: EOL character weirdness on Windows
  4 | (Important: first line ends with CR + CR + LF)
  5 | 
  6 | ```````````````````````````````` example
  7 | line1
  8 | line2
  9 | .
 10 | <p>line1</p>
 11 | <p>line2</p>
 12 | ````````````````````````````````
 13 | 
 14 | Issue #114: cmark skipping first character in line
 15 | (Important: the blank lines around "Repeatedly" contain a tab.)
 16 | 
 17 | ```````````````````````````````` example
 18 | By taking it apart
 19 | 
 20 | - alternative solutions
 21 | →
 22 | Repeatedly solving
 23 | →
 24 | - how techniques
 25 | .
 26 | <p>By taking it apart</p>
 27 | <ul>
 28 | <li>alternative solutions</li>
 29 | </ul>
 30 | <p>Repeatedly solving</p>
 31 | <ul>
 32 | <li>how techniques</li>
 33 | </ul>
 34 | ````````````````````````````````
 35 | 
 36 | Issue jgm/CommonMark#430:  h2..h6 not recognized as block tags.
 37 | 
 38 | ```````````````````````````````` example
 39 | <h1>lorem</h1>
 40 | 
 41 | <h2>lorem</h2>
 42 | 
 43 | <h3>lorem</h3>
 44 | 
 45 | <h4>lorem</h4>
 46 | 
 47 | <h5>lorem</h5>
 48 | 
 49 | <h6>lorem</h6>
 50 | .
 51 | <h1>lorem</h1>
 52 | <h2>lorem</h2>
 53 | <h3>lorem</h3>
 54 | <h4>lorem</h4>
 55 | <h5>lorem</h5>
 56 | <h6>lorem</h6>
 57 | ````````````````````````````````
 58 | 
 59 | Issue jgm/commonmark.js#109 - tabs after setext header line
 60 | 
 61 | 
 62 | ```````````````````````````````` example
 63 | hi
 64 | --→
 65 | .
 66 | <h2>hi</h2>
 67 | ````````````````````````````````
 68 | 
 69 | Issue #177 - incorrect emphasis parsing
 70 | 
 71 | ```````````````````````````````` example
 72 | a***b* c*
 73 | .
 74 | <p>a*<em><em>b</em> c</em></p>
 75 | ````````````````````````````````
 76 | 
 77 | Issue #193 - unescaped left angle brackets in link destination
 78 | 
 79 | ```````````````````````````````` example
 80 | [a]
 81 | 
 82 | [a]: <te<st>
 83 | .
 84 | <p>[a]</p>
 85 | <p>[a]: &lt;te<st></p>
 86 | ````````````````````````````````
 87 | 
 88 | Issue #192 - escaped spaces in link destination
 89 | 
 90 | 
 91 | ```````````````````````````````` example
 92 | [a](te\ st)
 93 | .
 94 | <p>[a](te\ st)</p>
 95 | ````````````````````````````````
 96 | 
 97 | Issue #527 - meta tags in inline contexts
 98 | 
 99 | ```````````````````````````````` example
100 | City:
101 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
102 |   <meta itemprop="name" content="Springfield">
103 | </span>
104 | .
105 | <p>City:
106 | <span itemprop="contentLocation" itemscope itemtype="https://schema.org/City">
107 | <meta itemprop="name" content="Springfield">
108 | </span></p>
109 | ````````````````````````````````
110 | 
111 | Issue #530 - link parsing corner cases
112 | 
113 | ```````````````````````````````` example
114 | [a](\ b)
115 | 
116 | [a](<<b)
117 | 
118 | [a](<b
119 | )
120 | .
121 | <p>[a](\ b)</p>
122 | <p>[a](&lt;&lt;b)</p>
123 | <p>[a](&lt;b
124 | )</p>
125 | ````````````````````````````````
126 | 
127 | Issue commonmark#526 - unescaped ( in link title
128 | 
129 | ```````````````````````````````` example
130 | [link](url ((title))
131 | .
132 | <p>[link](url ((title))</p>
133 | ````````````````````````````````
134 | 
135 | Issue commonamrk#517 - script, pre, style close tag without
136 | opener.
137 | 
138 | ```````````````````````````````` example
139 | </script>
140 | 
141 | </pre>
142 | 
143 | </style>
144 | .
145 | </script>
146 | </pre>
147 | </style>
148 | ````````````````````````````````
149 | 
150 | Issue #289.
151 | 
152 | ```````````````````````````````` example
153 | [a](<b) c>
154 | .
155 | <p>[a](&lt;b) c&gt;</p>
156 | ````````````````````````````````
157 | 
158 | Issue #334 - UTF-8 BOM
159 | 
160 | ```````````````````````````````` example
161 | ﻿# Hi
162 | .
163 | <h1>Hi</h1>
164 | ````````````````````````````````
165 | 
166 | Issue commonmark.js#213 - type 7 blocks can't interrupt
167 | paragraph
168 | 
169 | ```````````````````````````````` example
170 | - <script>
171 | - some text
172 | some other text
173 | </script>
174 | .
175 | <ul>
176 | <li>
177 | <script>
178 | </li>
179 | <li>some text
180 | some other text
181 | </script></li>
182 | </ul>
183 | ````````````````````````````````
184 | 
185 | Issue #383 - emphasis parsing.
186 | 
187 | ```````````````````````````````` example
188 | *****Hello*world****
189 | .
190 | <p>**<em><strong>Hello<em>world</em></strong></em></p>
191 | ````````````````````````````````
192 | 
193 | Issue #424 - emphasis before links
194 | 
195 | ```````````````````````````````` example
196 | *text* [link](#section)
197 | .
198 | <p><em>text</em> <a href="#section">link</a></p>
199 | ````````````````````````````````
200 | 
201 | `<!doctype` is case-insensitive
202 | ```````````````````````````````` example
203 | <!docType html>
204 | .
205 | <!docType html>
206 | ````````````````````````````````
207 | 
208 | Declarations don't need spaces, according to the spec
209 | ```````````````````````````````` example
210 | x <!A>
211 | .
212 | <p>x <!A></p>
213 | ````````````````````````````````
214 | 
215 | An underscore that is not part of a delimiter should not prevent another
216 | pair of underscores from forming part of their own.
217 | ```````````````````````````````` example
218 | __!_!__
219 | 
220 | __!x!__
221 | 
222 | **!*!**
223 | 
224 | ---
225 | 
226 | _*__*_*
227 | 
228 | _*xx*_*
229 | 
230 | _*__-_-
231 | 
232 | _*xx-_-
233 | .
234 | <p><strong>!_!</strong></p>
235 | <p><strong>!x!</strong></p>
236 | <p><strong>!*!</strong></p>
237 | <hr />
238 | <p><em><em>__</em></em>*</p>
239 | <p><em><em>xx</em></em>*</p>
240 | <p><em>*__-</em>-</p>
241 | <p><em>*xx-</em>-</p>
242 | ````````````````````````````````
243 | 
244 | commonmark.js #277:
245 | ```````````````````````````````` example
246 | ```language-r
247 | x <- 1
248 | ```
249 | 
250 | ```r
251 | x <- 1
252 | ```
253 | .
254 | <pre><code class="language-r">x &lt;- 1
255 | </code></pre>
256 | <pre><code class="language-r">x &lt;- 1
257 | </code></pre>
258 | ````````````````````````````````
259 | 
260 | https://github.com/commonmark/commonmark.js/issues/283
261 | ```````````````````````````````` example
262 | x<!x>
263 | 
264 | x<!>
265 | .
266 | <p>x<!x></p>
267 | <p>x&lt;!&gt;</p>
268 | ````````````````````````````````
269 | 
270 | Case fold test
271 | ```````````````````````````````` example
272 | [link][µÓĐĹŠƆƦǏǶȜɆΓΨϪЇЛЯҎҶӞԆԮՄႠႴᏸᲕᲩᲿḦṎṶẚỂỪἙἿὭᾑᾥᾼῢΩↃⓉⰍⰡⱩⲔⲼⳫꙢꜢꝌꝽꞪꟇꭾꮒꮦꮺＣＷ𐐐𐐤𐓀𐕰𐖅𐲅𐲙𐲭𑢮𖹂𖹖𞤊𞤞]
273 | 
274 | [μóđĺšɔʀǐƕȝɇγψϫїляҏҷӟԇԯմⴀⴔᏰვჩჿḧṏṷaʾểừἑἷὥἡιὥιαιῢωↄⓣⰽⱑⱪⲕⲽⳬꙣꜣꝍᵹɦꟈᎮᏂᏖᏪｃｗ𐐸𐑌𐓨𐖗𐖬𐳅𐳙𐳭𑣎𖹢𖹶𞤬𞥀]: /url
275 | .
276 | <p><a href="/url">link</a></p>
277 | ````````````````````````````````
278 | 
279 | https://github.com/commonmark/cmark/issues/548
280 | ```````````````````````````````` example
281 | (&#xFFFE;&#xFFFF;)
282 | .
283 | <p>(￾￿)</p>
284 | ````````````````````````````````
285 | 


--------------------------------------------------------------------------------
/test/roundtrip_tests.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import sys
 3 | from spec_tests import get_tests, do_test
 4 | from cmark import CMark
 5 | import argparse
 6 | 
 7 | parser = argparse.ArgumentParser(description='Run cmark roundtrip tests.')
 8 | parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
 9 |         help='program to test')
10 | parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
11 |         help='path to spec')
12 | parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
13 |         default=None, help='limit to sections matching regex pattern')
14 | parser.add_argument('--library-dir', dest='library_dir', nargs='?',
15 |         default=None, help='directory containing dynamic library')
16 | parser.add_argument('--no-normalize', dest='normalize',
17 |         action='store_const', const=False, default=True,
18 |         help='do not normalize HTML')
19 | parser.add_argument('-n', '--number', type=int, default=None,
20 |         help='only consider the test with the given number')
21 | args = parser.parse_args(sys.argv[1:])
22 | 
23 | spec = sys.argv[1]
24 | 
25 | def converter(md):
26 |   cmark = CMark(prog=args.program, library_dir=args.library_dir)
27 |   [ec, result, err] = cmark.to_commonmark(md)
28 |   if ec == 0:
29 |     [ec, html, err] = cmark.to_html(result)
30 |     if ec == 0:
31 |         # In the commonmark writer we insert dummy HTML
32 |         # comments between lists, and between lists and code
33 |         # blocks.  Strip these out, since the spec uses
34 |         # two blank lines instead:
35 |         return [ec, re.sub('<!-- end list -->\n', '', html), '']
36 |     else:
37 |         return [ec, html, err]
38 |   else:
39 |     return [ec, result, err]
40 | 
41 | tests = get_tests(args.spec)
42 | result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': 0}
43 | for test in tests:
44 |     do_test(converter, test, args.normalize, result_counts)
45 | 
46 | exit(result_counts['fail'] + result_counts['error'])
47 | 


--------------------------------------------------------------------------------
/test/smart_punct.txt:
--------------------------------------------------------------------------------
  1 | ## Smart punctuation
  2 | 
  3 | Open quotes are matched with closed quotes.
  4 | The same method is used for matching openers and closers
  5 | as is used in emphasis parsing:
  6 | 
  7 | ```````````````````````````````` example
  8 | "Hello," said the spider.
  9 | "'Shelob' is my name."
 10 | .
 11 | <p>“Hello,” said the spider.
 12 | “‘Shelob’ is my name.”</p>
 13 | ````````````````````````````````
 14 | 
 15 | ```````````````````````````````` example
 16 | 'A', 'B', and 'C' are letters.
 17 | .
 18 | <p>‘A’, ‘B’, and ‘C’ are letters.</p>
 19 | ````````````````````````````````
 20 | 
 21 | ```````````````````````````````` example
 22 | 'Oak,' 'elm,' and 'beech' are names of trees.
 23 | So is 'pine.'
 24 | .
 25 | <p>‘Oak,’ ‘elm,’ and ‘beech’ are names of trees.
 26 | So is ‘pine.’</p>
 27 | ````````````````````````````````
 28 | 
 29 | ```````````````````````````````` example
 30 | 'He said, "I want to go."'
 31 | .
 32 | <p>‘He said, “I want to go.”’</p>
 33 | ````````````````````````````````
 34 | 
 35 | A single quote that isn't an open quote matched
 36 | with a close quote will be treated as an
 37 | apostrophe:
 38 | 
 39 | ```````````````````````````````` example
 40 | Were you alive in the 70's?
 41 | .
 42 | <p>Were you alive in the 70’s?</p>
 43 | ````````````````````````````````
 44 | 
 45 | ```````````````````````````````` example
 46 | Here is some quoted '`code`' and a "[quoted link](url)".
 47 | .
 48 | <p>Here is some quoted ‘<code>code</code>’ and a “<a href="url">quoted link</a>”.</p>
 49 | ````````````````````````````````
 50 | 
 51 | Here the first `'` is treated as an apostrophe, not
 52 | an open quote, because the final single quote is matched
 53 | by the single quote before `jolly`:
 54 | 
 55 | ```````````````````````````````` example
 56 | 'tis the season to be 'jolly'
 57 | .
 58 | <p>’tis the season to be ‘jolly’</p>
 59 | ````````````````````````````````
 60 | 
 61 | Multiple apostrophes should not be marked as open/closing quotes.
 62 | 
 63 | ```````````````````````````````` example
 64 | 'We'll use Jane's boat and John's truck,' Jenna said.
 65 | .
 66 | <p>‘We’ll use Jane’s boat and John’s truck,’ Jenna said.</p>
 67 | ````````````````````````````````
 68 | 
 69 | An unmatched double quote will be interpreted as a
 70 | left double quote, to facilitate this style:
 71 | 
 72 | ```````````````````````````````` example
 73 | "A paragraph with no closing quote.
 74 | 
 75 | "Second paragraph by same speaker, in fiction."
 76 | .
 77 | <p>“A paragraph with no closing quote.</p>
 78 | <p>“Second paragraph by same speaker, in fiction.”</p>
 79 | ````````````````````````````````
 80 | 
 81 | A quote following a `]` or `)` character cannot
 82 | be an open quote:
 83 | 
 84 | ```````````````````````````````` example
 85 | [a]'s b'
 86 | .
 87 | <p>[a]’s b’</p>
 88 | ````````````````````````````````
 89 | 
 90 | Quotes that are escaped come out as literal straight
 91 | quotes:
 92 | 
 93 | ```````````````````````````````` example
 94 | \"This is not smart.\"
 95 | This isn\'t either.
 96 | 5\'8\"
 97 | .
 98 | <p>&quot;This is not smart.&quot;
 99 | This isn't either.
100 | 5'8&quot;</p>
101 | ````````````````````````````````
102 | 
103 | Two hyphens form an en-dash, three an em-dash.
104 | 
105 | ```````````````````````````````` example
106 | Some dashes:  em---em
107 | en--en
108 | em --- em
109 | en -- en
110 | 2--3
111 | .
112 | <p>Some dashes:  em—em
113 | en–en
114 | em — em
115 | en – en
116 | 2–3</p>
117 | ````````````````````````````````
118 | 
119 | A sequence of more than three hyphens is
120 | parsed as a sequence of em and/or en dashes,
121 | with no hyphens. If possible, a homogeneous
122 | sequence of dashes is used (so, 10 hyphens
123 | = 5 en dashes, and 9 hyphens = 3 em dashes).
124 | When a heterogeneous sequence must be used,
125 | the em dashes come first, followed by the en
126 | dashes, and as few en dashes as possible are
127 | used (so, 7 hyphens = 2 em dashes an 1 en
128 | dash).
129 | 
130 | ```````````````````````````````` example
131 | one-
132 | two--
133 | three---
134 | four----
135 | five-----
136 | six------
137 | seven-------
138 | eight--------
139 | nine---------
140 | thirteen-------------.
141 | .
142 | <p>one-
143 | two–
144 | three—
145 | four––
146 | five—–
147 | six——
148 | seven—––
149 | eight––––
150 | nine———
151 | thirteen———––.</p>
152 | ````````````````````````````````
153 | 
154 | Hyphens can be escaped:
155 | 
156 | ```````````````````````````````` example
157 | Escaped hyphens: \-- \-\-\-.
158 | .
159 | <p>Escaped hyphens: -- ---.</p>
160 | ````````````````````````````````
161 | 
162 | Three periods form an ellipsis:
163 | 
164 | ```````````````````````````````` example
165 | Ellipses...and...and....
166 | .
167 | <p>Ellipses…and…and….</p>
168 | ````````````````````````````````
169 | 
170 | Periods can be escaped if ellipsis-formation
171 | is not wanted:
172 | 
173 | ```````````````````````````````` example
174 | No ellipses\.\.\.
175 | .
176 | <p>No ellipses...</p>
177 | ````````````````````````````````
178 | 


--------------------------------------------------------------------------------
/test/spec_tests.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import sys
  5 | from difflib import unified_diff
  6 | import argparse
  7 | import re
  8 | import json
  9 | import os
 10 | from cmark import CMark
 11 | from normalize import normalize_html
 12 | 
 13 | parser = argparse.ArgumentParser(description='Run cmark tests.')
 14 | parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
 15 |         help='program to test')
 16 | parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
 17 |         help='path to spec')
 18 | parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
 19 |         default=None, help='limit to sections matching regex pattern')
 20 | parser.add_argument('--library-dir', dest='library_dir', nargs='?',
 21 |         default=None, help='directory containing dynamic library')
 22 | parser.add_argument('--no-normalize', dest='normalize',
 23 |         action='store_const', const=False, default=True,
 24 |         help='do not normalize HTML')
 25 | parser.add_argument('-d', '--dump-tests', dest='dump_tests',
 26 |         action='store_const', const=True, default=False,
 27 |         help='dump tests in JSON format')
 28 | parser.add_argument('--debug-normalization', dest='debug_normalization',
 29 |         action='store_const', const=True,
 30 |         default=False, help='filter stdin through normalizer for testing')
 31 | parser.add_argument('-n', '--number', type=int, default=None,
 32 |         help='only consider the test with the given number')
 33 | parser.add_argument('--fuzz-corpus',
 34 |         help='convert test cases to fuzz corpus')
 35 | args = parser.parse_args(sys.argv[1:])
 36 | 
 37 | def out(str):
 38 |     sys.stdout.buffer.write(str.encode('utf-8')) 
 39 | 
 40 | def print_test_header(headertext, example_number, start_line, end_line):
 41 |     out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext))
 42 | 
 43 | def do_test(converter, test, normalize, result_counts):
 44 |     [retcode, actual_html, err] = converter(test['markdown'])
 45 |     if retcode == 0:
 46 |         expected_html = test['html']
 47 |         unicode_error = None
 48 |         if normalize:
 49 |             try:
 50 |                 passed = normalize_html(actual_html) == normalize_html(expected_html)
 51 |             except UnicodeDecodeError as e:
 52 |                 unicode_error = e
 53 |                 passed = False
 54 |         else:
 55 |             passed = actual_html == expected_html
 56 |         if passed:
 57 |             result_counts['pass'] += 1
 58 |         else:
 59 |             print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
 60 |             out(test['markdown'] + '\n')
 61 |             if unicode_error:
 62 |                 out("Unicode error: " + str(unicode_error) + '\n')
 63 |                 out("Expected: " + repr(expected_html) + '\n')
 64 |                 out("Got:      " + repr(actual_html) + '\n')
 65 |             else:
 66 |                 expected_html_lines = expected_html.splitlines(True)
 67 |                 actual_html_lines = actual_html.splitlines(True)
 68 |                 for diffline in unified_diff(expected_html_lines, actual_html_lines,
 69 |                                 "expected HTML", "actual HTML"):
 70 |                     out(diffline)
 71 |             out('\n')
 72 |             result_counts['fail'] += 1
 73 |     else:
 74 |         print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
 75 |         out("program returned error code %d\n" % retcode)
 76 |         sys.stdout.buffer.write(err)
 77 |         result_counts['error'] += 1
 78 | 
 79 | def get_tests(specfile):
 80 |     line_number = 0
 81 |     start_line = 0
 82 |     end_line = 0
 83 |     example_number = 0
 84 |     markdown_lines = []
 85 |     html_lines = []
 86 |     state = 0  # 0 regular text, 1 markdown example, 2 html output
 87 |     headertext = ''
 88 |     tests = []
 89 | 
 90 |     header_re = re.compile('#+ ')
 91 | 
 92 |     with open(specfile, 'r', encoding='utf-8', newline='\n') as specf:
 93 |         for line in specf:
 94 |             line_number = line_number + 1
 95 |             l = line.strip()
 96 |             if l == "`" * 32 + " example":
 97 |                 state = 1
 98 |             elif l == "`" * 32:
 99 |                 state = 0
100 |                 example_number = example_number + 1
101 |                 end_line = line_number
102 |                 tests.append({
103 |                     "markdown":''.join(markdown_lines).replace('→',"\t"),
104 |                     "html":''.join(html_lines).replace('→',"\t"),
105 |                     "example": example_number,
106 |                     "start_line": start_line,
107 |                     "end_line": end_line,
108 |                     "section": headertext})
109 |                 start_line = 0
110 |                 markdown_lines = []
111 |                 html_lines = []
112 |             elif l == ".":
113 |                 state = 2
114 |             elif state == 1:
115 |                 if start_line == 0:
116 |                     start_line = line_number - 1
117 |                 markdown_lines.append(line)
118 |             elif state == 2:
119 |                 html_lines.append(line)
120 |             elif state == 0 and re.match(header_re, line):
121 |                 headertext = header_re.sub('', line).strip()
122 |     return tests
123 | 
124 | if __name__ == "__main__":
125 |     if args.debug_normalization:
126 |         out(normalize_html(sys.stdin.read()))
127 |         exit(0)
128 | 
129 |     all_tests = get_tests(args.spec)
130 | 
131 |     if args.fuzz_corpus:
132 |         i = 1
133 |         base = os.path.basename(args.spec)
134 |         (name, ext) = os.path.splitext(base)
135 |         for test in all_tests:
136 |             filename = os.path.join(args.fuzz_corpus, '%s.%d' % (name, i))
137 |             with open(filename, 'wb') as f:
138 |                 f.write(b'\0' * 8) # options header
139 |                 f.write(test['markdown'].encode())
140 |             i += 1
141 |         exit(0)
142 | 
143 |     if args.pattern:
144 |         pattern_re = re.compile(args.pattern, re.IGNORECASE)
145 |     else:
146 |         pattern_re = re.compile('.')
147 |     tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ]
148 |     if args.dump_tests:
149 |         out(json.dumps(tests, ensure_ascii=False, indent=2))
150 |         exit(0)
151 |     else:
152 |         skipped = len(all_tests) - len(tests)
153 |         converter = CMark(prog=args.program, library_dir=args.library_dir).to_html
154 |         result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped}
155 |         for test in tests:
156 |             do_test(converter, test, args.normalize, result_counts)
157 |         out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts))
158 |         exit(result_counts['fail'] + result_counts['error'])
159 | 


--------------------------------------------------------------------------------
/toolchain-mingw32.cmake:
--------------------------------------------------------------------------------
 1 | # the name of the target operating system
 2 | SET(CMAKE_SYSTEM_NAME Windows)
 3 | 
 4 | # which compilers to use for C and C++
 5 | SET(CMAKE_C_COMPILER i686-w64-mingw32-gcc)
 6 | SET(CMAKE_CXX_COMPILER i686-w64-mingw32-g++)
 7 | SET(CMAKE_RC_COMPILER i686-w64-mingw32-windres)
 8 | 
 9 | # here is the target environment located
10 | SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32/ "${CMAKE_SOURCE_DIR}/windows")
11 | 
12 | # adjust the default behaviour of the FIND_XXX() commands:
13 | # search headers and libraries in the target environment, search
14 | # programs in the host environment
15 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
16 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
17 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
18 | 


--------------------------------------------------------------------------------
/tools/appveyor-build.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | if "%MSVC_VERSION%" == "10" goto msvc10
 4 | 
 5 | call "C:\Program Files (x86)\Microsoft Visual Studio %MSVC_VERSION%.0\VC\vcvarsall.bat" amd64
 6 | goto build
 7 | 
 8 | :msvc10
 9 | call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64
10 | 
11 | :build
12 | nmake
13 | 
14 | 


--------------------------------------------------------------------------------
/tools/make_case_fold_inc.py:
--------------------------------------------------------------------------------
 1 | # Creates a C lookup table for Unicode case folding (https://unicode.org/Public/UCD/latest/ucd/CaseFolding.txt).
 2 | # Usage: python3 tools/make_case_fold_inc.py < data/CaseFolding.txt > src/case_fold.inc
 3 | 
 4 | import sys, re
 5 | 
 6 | prog = re.compile('([0-9A-F]+); [CF];((?: [0-9A-F]+)+);')
 7 | main_table = []
 8 | repl_table = []
 9 | repl_idx = 0
10 | test = ''
11 | test_result = ''
12 | 
13 | for line in sys.stdin:
14 |     m = prog.match(line)
15 |     if m is None:
16 |         continue
17 | 
18 |     cp = int(m[1], 16);
19 |     if cp < 0x80:
20 |         continue
21 | 
22 |     repl = b''
23 |     for x in m[2].split():
24 |         repl += chr(int(x, 16)).encode('UTF-8')
25 | 
26 |     # Generate test case
27 |     if len(main_table) % 20 == 0:
28 |         test += chr(cp)
29 |         test_result += repl.decode('UTF-8')
30 | 
31 |     # 17 bits for code point
32 |     if cp >= (1 << 17):
33 |         raise Exception("code point too large")
34 | 
35 |     # 12 bits for upper bits of replacement index
36 |     # The lowest bit is always zero.
37 |     if repl_idx // 2 >= (1 << 12):
38 |         raise Exception("too many replacements")
39 | 
40 |     # 3 bits for size of replacement
41 |     repl_size = len(repl)
42 |     if repl_size >= (1 << 3):
43 |         raise Exception("too many replacement chars")
44 | 
45 |     main_table += [ cp | repl_idx // 2 << 17 | repl_size << 29 ]
46 |     repl_table += repl
47 |     repl_idx += repl_size
48 | 
49 |     # Make sure that repl_idx is even
50 |     if repl_idx % 2 != 0:
51 |         repl_table += [0]
52 |         repl_idx += 1
53 | 
54 | # Print test case
55 | if False:
56 |     print("test:", test)
57 |     print("test_result:", test_result)
58 |     sys.exit(0)
59 | 
60 | print("""// Generated by tools/make_case_fold_inc.py
61 | 
62 | #define CF_MAX            (1 << 17)
63 | #define CF_TABLE_SIZE     %d
64 | #define CF_CODE_POINT(x)  ((x) & 0x1FFFF)
65 | #define CF_REPL_IDX(x)    ((((x) >> 17) & 0xFFF) * 2)
66 | #define CF_REPL_SIZE(x)   ((x) >> 29)
67 | 
68 | static const uint32_t cf_table[%d] = {""" % (len(main_table), len(main_table)))
69 | 
70 | i = 0
71 | size = len(main_table)
72 | for value in main_table:
73 |     if i % 6 == 0:
74 |         print("  ", end="")
75 |     print("0x%X" % value, end="")
76 |     i += 1
77 |     if i == size: print()
78 |     elif i % 6 == 0: print(",")
79 |     else: print(", ", end="")
80 | 
81 | print("""};
82 | 
83 | static const unsigned char cf_repl[%d] = {""" % len(repl_table))
84 | 
85 | i = 0
86 | size = len(repl_table)
87 | for value in repl_table:
88 |     if i % 12 == 0:
89 |         print("  ", end="")
90 |     print("0x%02X" % value, end="")
91 |     i += 1
92 |     if i == size: print()
93 |     elif i % 12 == 0: print(",")
94 |     else: print(", ", end="")
95 | 
96 | print("};")
97 | 


--------------------------------------------------------------------------------
/tools/make_entities_inc.py:
--------------------------------------------------------------------------------
 1 | # Creates C data structures for binary lookup table of entities,
 2 | # using python's html5 entity data.
 3 | # Usage: python3 tools/make_entities_inc.py > src/entities.inc
 4 | 
 5 | import html
 6 | 
 7 | entities5 = html.entities.html5
 8 | 
 9 | # Remove keys without semicolons. HTML5 allows some named character
10 | # references without a trailing semicolon.
11 | entities = sorted([(k[:-1], entities5[k]) for k in entities5.keys() if k[-1] == ';'])
12 | 
13 | main_table = []
14 | text_table = b''
15 | text_idx = 0
16 | 
17 | for (ent, repl) in entities:
18 |     ent_bytes = ent.encode('UTF-8')
19 |     ent_size = len(ent_bytes)
20 |     repl_bytes = repl.encode('UTF-8')
21 |     repl_size = len(repl_bytes)
22 | 
23 |     if text_idx >= (1 << 15):
24 |         raise Exception("text index too large")
25 |     if ent_size >= (1 << 5):
26 |         raise Exception("entity name too long")
27 |     if repl_size >= (1 << 3):
28 |         raise Exception("entity replacement too long")
29 | 
30 |     main_table += [ text_idx | ent_size << 15 | repl_size << 20 ]
31 | 
32 |     text_table += ent_bytes + repl_bytes
33 |     text_idx += ent_size + repl_size
34 | 
35 | print("""/* Autogenerated by tools/make_headers_inc.py */
36 | 
37 | #define ENT_MIN_LENGTH      2
38 | #define ENT_MAX_LENGTH      32
39 | #define ENT_TABLE_SIZE      %d
40 | #define ENT_TEXT_IDX(x)     ((x) & 0x7FFF)
41 | #define ENT_NAME_SIZE(x)    (((x) >> 15) & 0x1F)
42 | #define ENT_REPL_SIZE(x)    ((x) >> 20)
43 | 
44 | static const uint32_t cmark_entities[%d] = {""" % (len(main_table), len(main_table)));
45 | 
46 | i = 0
47 | size = len(main_table)
48 | for value in main_table:
49 |     if i % 6 == 0:
50 |         print("  ", end="")
51 |     print("0x%X" % value, end="")
52 |     i += 1
53 |     if i == size: print()
54 |     elif i % 6 == 0: print(",")
55 |     else: print(", ", end="")
56 | 
57 | print("""};
58 | 
59 | static const unsigned char cmark_entity_text[%d] = {""" % len(text_table))
60 | 
61 | i = 0
62 | size = len(text_table)
63 | for value in text_table:
64 |     if i % 12 == 0:
65 |         print("  ", end="")
66 |     print("0x%02X" % value, end="")
67 |     i += 1
68 |     if i == size: print()
69 |     elif i % 12 == 0: print(",")
70 |     else: print(", ", end="")
71 | 
72 | print("};")
73 | 


--------------------------------------------------------------------------------
/why-cmark-and-not-x.md:
--------------------------------------------------------------------------------
  1 | Why use `cmark` and not X?
  2 | ==========================
  3 | 
  4 | `hoedown`
  5 | ---------
  6 | 
  7 | `hoedown` (which derives from `sundown`) is slightly faster
  8 | than `cmark` in our benchmarks (0.21s vs. 0.29s).  But both
  9 | are much faster than any other available implementations.
 10 | 
 11 | `hoedown` boasts of including "protection against all possible
 12 | DOS attacks," but there are some chinks in the armor:
 13 | 
 14 |     % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | cmark
 15 |     ...
 16 |     user 0m0.073s
 17 |     % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | hoedown
 18 |     ...
 19 |     0m17.84s
 20 | 
 21 | `hoedown` has many parsing bugs.  Here is a selection (as of
 22 | v3.0.3):
 23 | 
 24 |     % hoedown
 25 |     - one
 26 |       - two
 27 |         1. three
 28 |     ^D
 29 |     <ul>
 30 |     <li>one
 31 | 
 32 |     <ul>
 33 |     <li>two</li>
 34 |     <li>three</li>
 35 |     </ul></li>
 36 |     </ul>
 37 | 
 38 | 
 39 |     % hoedown
 40 |     ## hi\###
 41 |     ^D
 42 |     <h2>hi\</h2>
 43 | 
 44 | 
 45 |     % hoedown
 46 |     [ΑΓΩ]: /φου
 47 | 
 48 |     [αγω]
 49 |     ^D
 50 |     <p>[αγω]</p>
 51 | 
 52 | 
 53 |     % hoedown
 54 |     ```
 55 |     [foo]: /url
 56 |     ```
 57 | 
 58 |     [foo]
 59 |     ^D
 60 |     <p>```</p>
 61 | 
 62 |     <p>```</p>
 63 | 
 64 |     <p><a href="/url">foo</a></p>
 65 | 
 66 | 
 67 |     % hoedown
 68 |     [foo](url "ti\*tle")
 69 |     ^D
 70 |     <p><a href="url" title="ti\*tle">foo</a></p>
 71 | 
 72 | 
 73 |     % ./hoedown
 74 |     - one
 75 |      - two
 76 |       - three
 77 |        - four
 78 |     ^D
 79 |     <ul>
 80 |     <li>one
 81 | 
 82 |     <ul>
 83 |     <li>two</li>
 84 |     <li>three</li>
 85 |     <li>four</li>
 86 |     </ul></li>
 87 |     </ul>
 88 | 
 89 | 
 90 | `discount`
 91 | ----------
 92 | 
 93 | `cmark` is about six times faster.
 94 | 
 95 | `kramdown`
 96 | ----------
 97 | 
 98 | `cmark` is about a hundred times faster.
 99 | 
100 | `kramdown` also gets tied in knots by pathological input like
101 | 
102 |     python -c 'print(("[" * 50000) + "a" + ("]" * 50000))'
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/wrappers/wrapper.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | function markdownToHtml(string $markdown): string
 4 | {
 5 |     $ffi = FFI::cdef(
 6 |         'char *cmark_markdown_to_html(const char *text, size_t len, int options);',
 7 |         'libcmark.so'
 8 |     );
 9 | 
10 |     $pointerReturn = $ffi->cmark_markdown_to_html($markdown, strlen($markdown), 0);
11 |     $html = FFI::string($pointerReturn);
12 |     FFI::free($pointerReturn);
13 | 
14 |     return $html;
15 | }
16 | 
17 | $markdown = <<<'md'
18 | # First level title
19 | 
20 | ## Second level title
21 | 
22 | Paragraph
23 | 
24 | md;
25 | 
26 | echo markdownToHtml($markdown) . PHP_EOL;
27 | 


--------------------------------------------------------------------------------
/wrappers/wrapper.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Example for using the shared library from python
 4 | # Will work with either python 2 or python 3
 5 | # Requires cmark library to be installed
 6 | 
 7 | from ctypes import *
 8 | import sys
 9 | import platform
10 | 
11 | sysname = platform.system()
12 | 
13 | if sysname == 'Darwin':
14 |     libname = "libcmark.dylib"
15 | elif sysname == 'Windows':
16 |     libname = "cmark.dll"
17 | else:
18 |     libname = "libcmark.so"
19 | cmark = CDLL(libname)
20 | 
21 | class cmark_mem(Structure):
22 |     _fields_ = [("calloc", c_void_p),
23 |                 ("realloc", c_void_p),
24 |                 ("free", CFUNCTYPE(None, c_void_p))]
25 | 
26 | get_alloc = cmark.cmark_get_default_mem_allocator
27 | get_alloc.restype = POINTER(cmark_mem)
28 | free_func = get_alloc().contents.free
29 | 
30 | markdown = cmark.cmark_markdown_to_html
31 | markdown.restype = POINTER(c_char)
32 | markdown.argtypes = [c_char_p, c_size_t, c_int]
33 | 
34 | opts = 0 # defaults
35 | 
36 | def md2html(text):
37 |     text = text.encode('utf-8')
38 |     cstring = markdown(text, len(text), opts)
39 |     result = string_at(cstring).decode('utf-8')
40 |     free_func(cstring)
41 |     return result
42 | 
43 | sys.stdout.write(md2html(sys.stdin.read()))
44 | 


--------------------------------------------------------------------------------
/wrappers/wrapper.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'ffi'
 3 | 
 4 | module CMark
 5 |   extend FFI::Library
 6 | 
 7 |   class Mem < FFI::Struct
 8 |     layout :calloc, :pointer,
 9 |            :realloc, :pointer,
10 |            :free, callback([:pointer], :void)
11 |   end
12 | 
13 |   ffi_lib ['libcmark', 'cmark']
14 |   attach_function :cmark_get_default_mem_allocator, [], :pointer
15 |   attach_function :cmark_markdown_to_html, [:string, :size_t, :int], :pointer
16 | end
17 | 
18 | def markdown_to_html(s)
19 |   len = s.bytesize
20 |   cstring = CMark::cmark_markdown_to_html(s, len, 0)
21 |   result = cstring.get_string(0)
22 |   mem = CMark::cmark_get_default_mem_allocator
23 |   CMark::Mem.new(mem)[:free].call(cstring)
24 |   result
25 | end
26 | 
27 | STDOUT.write(markdown_to_html(ARGF.read()))
28 | 


--------------------------------------------------------------------------------
/wrappers/wrapper.rkt:
--------------------------------------------------------------------------------
  1 | #lang racket/base
  2 | 
  3 | ;; requires racket >= 5.3 because of submodules
  4 | 
  5 | ;; Lowlevel interface
  6 | 
  7 | (module low-level racket/base
  8 | 
  9 |   (require ffi/unsafe ffi/unsafe/define)
 10 | 
 11 |   (provide (all-defined-out))
 12 | 
 13 |   (define-ffi-definer defcmark (ffi-lib "libcmark"))
 14 | 
 15 |   (define _cmark_node_type
 16 |     (_enum '(;; Error status
 17 |              none
 18 |              ;; Block
 19 |              document block-quote list item code-block
 20 |              html-block custom-block
 21 |              paragraph heading thematic-break
 22 |              ;; ?? first-block = document
 23 |              ;; ?? last-block = thematic-break
 24 |              ;; Inline
 25 |              text softbreak linebreak code html-inline custom-inline
 26 |              emph strong link image
 27 |              ;; ?? first-inline = text
 28 |              ;; ?? last-inline = image
 29 |              )))
 30 |   (define _cmark_list_type
 31 |     (_enum '(no_list bullet_list ordered_list)))
 32 |   (define _cmark_delim_type
 33 |     (_enum '(no_delim period_delim paren_delim)))
 34 |   (define _cmark_opts
 35 |     (let ([opts '([sourcepos  1] ; include sourcepos attribute on block elements
 36 |                   [hardbreaks 2] ; render `softbreak` elements as hard line breaks
 37 |                   [safe       3] ; defined here for API compatibility (on by default)
 38 |                   [unsafe    17] ; render raw HTML and unsafe links
 39 |                   [nobreaks   4] ; render `softbreak` elements as spaces
 40 |                   [normalize  8] ; legacy (no effect)
 41 |                   [validate-utf8 9] ; validate UTF-8 in the input
 42 |                   [smart     10] ; straight quotes to curly, ---/-- to em/en dashes
 43 |                   )])
 44 |       (_bitmask (apply append (map (λ(o) `(,(car o) = ,(expt 2 (cadr o))))
 45 |                                    opts)))))
 46 | 
 47 |   (define-cpointer-type _node)
 48 | 
 49 |   (defcmark cmark_markdown_to_html
 50 |     (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts
 51 |           -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r))))
 52 | 
 53 |   (defcmark cmark_parse_document
 54 |     (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts
 55 |           -> _node))
 56 | 
 57 |   (defcmark cmark_render_html
 58 |     (_fun _node _cmark_opts
 59 |           -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r))))
 60 | 
 61 |   (defcmark cmark_node_new              (_fun _cmark_node_type -> _node))
 62 |   (defcmark cmark_node_free             (_fun _node -> _void))
 63 | 
 64 |   (defcmark cmark_node_next             (_fun _node -> _node/null))
 65 |   (defcmark cmark_node_previous         (_fun _node -> _node/null))
 66 |   (defcmark cmark_node_parent           (_fun _node -> _node/null))
 67 |   (defcmark cmark_node_first_child      (_fun _node -> _node/null))
 68 |   (defcmark cmark_node_last_child       (_fun _node -> _node/null))
 69 | 
 70 |   (defcmark cmark_node_get_user_data    (_fun _node -> _racket))
 71 |   (defcmark cmark_node_set_user_data    (_fun _node _racket -> _bool))
 72 |   (defcmark cmark_node_get_type         (_fun _node -> _cmark_node_type))
 73 |   (defcmark cmark_node_get_type_string  (_fun _node -> _bytes))
 74 |   (defcmark cmark_node_get_literal      (_fun _node -> _string))
 75 |   (defcmark cmark_node_set_literal      (_fun _node _string -> _bool))
 76 |   (defcmark cmark_node_get_heading_level (_fun _node -> _int))
 77 |   (defcmark cmark_node_set_heading_level (_fun _node _int -> _bool))
 78 |   (defcmark cmark_node_get_list_type    (_fun _node -> _cmark_list_type))
 79 |   (defcmark cmark_node_set_list_type    (_fun _node _cmark_list_type -> _bool))
 80 |   (defcmark cmark_node_get_list_delim   (_fun _node -> _cmark_delim_type))
 81 |   (defcmark cmark_node_set_list_delim   (_fun _node _cmark_delim_type -> _bool))
 82 |   (defcmark cmark_node_get_list_start   (_fun _node -> _int))
 83 |   (defcmark cmark_node_set_list_start   (_fun _node _int -> _bool))
 84 |   (defcmark cmark_node_get_list_tight   (_fun _node -> _bool))
 85 |   (defcmark cmark_node_set_list_tight   (_fun _node _bool -> _bool))
 86 |   (defcmark cmark_node_get_fence_info   (_fun _node -> _string))
 87 |   (defcmark cmark_node_set_fence_info   (_fun _node _string -> _bool))
 88 |   (defcmark cmark_node_get_url          (_fun _node -> _string))
 89 |   (defcmark cmark_node_set_url          (_fun _node _string -> _bool))
 90 |   (defcmark cmark_node_get_title        (_fun _node -> _string))
 91 |   (defcmark cmark_node_set_title        (_fun _node _string -> _bool))
 92 |   (defcmark cmark_node_get_start_line   (_fun _node -> _int))
 93 |   (defcmark cmark_node_get_start_column (_fun _node -> _int))
 94 |   (defcmark cmark_node_get_end_line     (_fun _node -> _int))
 95 |   (defcmark cmark_node_get_end_column   (_fun _node -> _int))
 96 | 
 97 |   (defcmark cmark_node_unlink           (_fun _node -> _void))
 98 |   (defcmark cmark_node_insert_before    (_fun _node _node -> _bool))
 99 |   (defcmark cmark_node_insert_after     (_fun _node _node -> _bool))
100 |   (defcmark cmark_node_prepend_child    (_fun _node _node -> _bool))
101 |   (defcmark cmark_node_append_child     (_fun _node _node -> _bool))
102 |   (defcmark cmark_consolidate_text_nodes (_fun _node -> _void))
103 | 
104 |   (defcmark cmark_version               (_fun -> _int))
105 |   (defcmark cmark_version_string        (_fun -> _string))
106 | 
107 |   )
108 | 
109 | ;; Rackety interface
110 | 
111 | (module high-level racket/base
112 | 
113 |   (require (submod ".." low-level) ffi/unsafe)
114 | 
115 |   (provide cmark-markdown-to-html)
116 |   (define (cmark-markdown-to-html str [options '(normalize smart)])
117 |     (cmark_markdown_to_html (if (bytes? str) str (string->bytes/utf-8 str))
118 |                             options))
119 | 
120 |   (require (for-syntax racket/base racket/syntax))
121 |   (define-syntax (make-getter+setter stx)
122 |     (syntax-case stx ()
123 |       [(_ name) (with-syntax ([(getter setter)
124 |                                (map (λ(op) (format-id #'name "cmark_node_~a_~a"
125 |                                                       op #'name))
126 |                                     '(get set))])
127 |                   #'(cons getter setter))]))
128 |   (define-syntax-rule (define-getters+setters name [type field ...] ...)
129 |     (define name (list (list 'type (make-getter+setter field) ...) ...)))
130 |   (define-getters+setters getters+setters
131 |     [heading heading_level] [code-block fence_info]
132 |     [link url title] [image url title]
133 |     [list list_type list_delim list_start list_tight])
134 | 
135 |   (provide cmark->sexpr)
136 |   (define (cmark->sexpr node)
137 |     (define text (cmark_node_get_literal node))
138 |     (define type (cmark_node_get_type node))
139 |     (define children
140 |       (let loop ([node (cmark_node_first_child node)])
141 |         (if (not node) '()
142 |             (cons (cmark->sexpr node) (loop (cmark_node_next node))))))
143 |     (define info
144 |       (cond [(assq type getters+setters)
145 |              => (λ(gss) (map (λ(gs) ((car gs) node)) (cdr gss)))]
146 |             [else '()]))
147 |     (define (assert-no what-not b)
148 |       (when b (error 'cmark->sexpr "unexpected ~a in ~s" what-not type)))
149 |     (cond [(memq type '(document paragraph heading block-quote list item
150 |                         emph strong link image))
151 |            (assert-no 'text text)
152 |            (list type info children)]
153 |           [(memq type '(text code code-block html-block html-inline
154 |                         softbreak linebreak thematic-break))
155 |            (assert-no 'children (pair? children))
156 |            (list type info text)]
157 |           [else (error 'cmark->sexpr "unknown type: ~s" type)]))
158 | 
159 |   (provide sexpr->cmark)
160 |   (define (sexpr->cmark sexpr) ; assumes valid input, as generated by the above
161 |     (define (loop sexpr)
162 |       (define type (car sexpr))
163 |       (define info (cadr sexpr))
164 |       (define data (caddr sexpr))
165 |       (define node (cmark_node_new type))
166 |       (let ([gss (assq type getters+setters)])
167 |         (when gss
168 |           (unless (= (length (cdr gss)) (length info))
169 |             (error 'sexpr->cmark "bad number of info values in ~s" sexpr))
170 |           (for-each (λ(gs x) ((cdr gs) node x)) (cdr gss) info)))
171 |       (cond [(string? data) (cmark_node_set_literal node data)]
172 |             [(not data) (void)]
173 |             [(list? data)
174 |              (for ([child (in-list data)])
175 |                (cmark_node_append_child node (sexpr->cmark child)))]
176 |             [else (error 'sexpr->cmark "bad data in ~s" sexpr)])
177 |       node)
178 |     (define root (loop sexpr))
179 |     (register-finalizer root cmark_node_free)
180 |     root)
181 | 
182 |   ;; Registers a `cmark_node_free` finalizer
183 |   (provide cmark-parse-document)
184 |   (define (cmark-parse-document str [options '(normalize smart)])
185 |     (define root (cmark_parse_document
186 |                   (if (bytes? str) str (string->bytes/utf-8 str))
187 |                   options))
188 |     (register-finalizer root cmark_node_free)
189 |     root)
190 | 
191 |   (provide cmark-render-html)
192 |   (define (cmark-render-html root [options '(normalize smart)])
193 |     (cmark_render_html root options)))
194 | 
195 | #; ;; sample use
196 | (begin
197 |   (require 'high-level racket/string)
198 |   (cmark-render-html
199 |    (cmark-parse-document
200 |     (string-join '("foo"
201 |                    "==="
202 |                    ""
203 |                    "> blah"
204 |                    ">"
205 |                    "> blah *blah* `bar()` blah:"
206 |                    ">"
207 |                    ">     function foo() {"
208 |                    ">       bar();"
209 |                    ">     }")
210 |                  "\n"))))
211 | 


--------------------------------------------------------------------------------