├── .editorconfig ├── .gitattributes ├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── ci.yml │ └── fuzz.yml ├── .gitignore ├── CMakeLists.txt ├── COPYING ├── Makefile ├── Makefile.nmake ├── README.md ├── api_test ├── CMakeLists.txt ├── cplusplus.cpp ├── cplusplus.h ├── harness.c ├── harness.h └── main.c ├── bench ├── samples │ ├── block-bq-flat.md │ ├── block-bq-nested.md │ ├── block-code.md │ ├── block-fences.md │ ├── block-heading.md │ ├── block-hr.md │ ├── block-html.md │ ├── block-lheading.md │ ├── block-list-flat.md │ ├── block-list-nested.md │ ├── block-ref-flat.md │ ├── block-ref-nested.md │ ├── inline-autolink.md │ ├── inline-backticks.md │ ├── inline-em-flat.md │ ├── inline-em-nested.md │ ├── inline-em-worst.md │ ├── inline-entity.md │ ├── inline-escape.md │ ├── inline-html.md │ ├── inline-links-flat.md │ ├── inline-links-nested.md │ ├── inline-newlines.md │ ├── lorem1.md │ └── rawtabs.md ├── statistics.py └── stats.py ├── benchmarks.md ├── changelog.txt ├── cmake └── modules │ └── FindAsan.cmake ├── data └── CaseFolding.txt ├── fuzz ├── CMakeLists.txt ├── afl_test_cases │ └── test.md ├── cmark-fuzz.c └── dictionary ├── man ├── CMakeLists.txt ├── make_man_page.py ├── man1 │ └── cmark.1 └── man3 │ └── cmark.3 ├── nmake.bat ├── shell.nix ├── src ├── CMakeLists.txt ├── blocks.c ├── buffer.c ├── buffer.h ├── case_fold.inc ├── chunk.h ├── cmark.c ├── cmark.h ├── cmarkConfig.cmake.in ├── cmark_ctype.c ├── cmark_ctype.h ├── cmark_version.h.in ├── commonmark.c ├── entities.inc ├── houdini.h ├── houdini_href_e.c ├── houdini_html_e.c ├── houdini_html_u.c ├── html.c ├── inlines.c ├── inlines.h ├── iterator.c ├── iterator.h ├── latex.c ├── libcmark.pc.in ├── main.c ├── man.c ├── node.c ├── node.h ├── parser.h ├── references.c ├── references.h ├── render.c ├── render.h ├── scanners.c ├── scanners.h ├── scanners.re ├── utf8.c ├── utf8.h └── xml.c ├── test ├── CMakeLists.txt ├── cmark.py ├── entity_tests.py ├── normalize.py ├── pathological_tests.py ├── regression.txt ├── roundtrip_tests.py ├── smart_punct.txt ├── spec.txt └── spec_tests.py ├── toolchain-mingw32.cmake ├── tools ├── appveyor-build.bat ├── make_case_fold_inc.py ├── make_entities_inc.py └── xml2md.xsl ├── why-cmark-and-not-x.md └── wrappers ├── wrapper.php ├── wrapper.py ├── wrapper.rb └── wrapper.rkt /.editorconfig: -------------------------------------------------------------------------------- 1 | # editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | end_of_line = lf 7 | charset = utf-8 8 | insert_final_newline = true 9 | 10 | [*.{c,h}] 11 | trim_trailing_whitespace = true 12 | indent_style = space 13 | indent_size = 2 14 | 15 | [Makefile] 16 | trim_trailing_whitespace = true 17 | indent_style = tab 18 | indent_size = 8 19 | 20 | [Makefile.nmake] 21 | end_of_line = crlf 22 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | Makefile.nmake eol=crlf 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [jgm] 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "daily" 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI tests 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - 'dependabot/**' 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | jobs: 11 | 12 | linter: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | 18 | - uses: actions/checkout@v4 19 | - name: Install clang-tidy 20 | run: | 21 | sudo apt-get install -y clang-tidy 22 | - name: lint with clang-tidy 23 | run: | 24 | make lint 25 | env: 26 | CC: clang 27 | CXX: clang++ 28 | 29 | posix: 30 | 31 | strategy: 32 | fail-fast: false 33 | matrix: 34 | os: [linux, macos] 35 | cc: [clang, gcc] 36 | build_type: [shared, static] 37 | sanitizers: ['', ASan] 38 | 39 | include: 40 | # Translate human readable labels 41 | - os: 'linux' 42 | image: 'ubuntu-latest' 43 | - os: 'macos' 44 | image: 'macos-latest' 45 | - cc: 'clang' 46 | cxx: 'clang++' 47 | - cc: 'gcc' 48 | cxx: 'g++' 49 | - build_type: 'shared' 50 | cmake_shared: 'YES' 51 | - build_type: 'static' 52 | cmake_shared: 'NO' 53 | - sanitizers: 'ASan' 54 | san_cflags: '-fsanitize=address,undefined -fno-sanitize-recover=all' 55 | 56 | # When building shared libraries, they will be loaded 57 | # dynamically from Python when testing. This means that 58 | # we have to use a preloaded shared libasan. 59 | - sanitizers: 'ASan' 60 | os: 'linux' 61 | cc: 'gcc' 62 | build_type: 'shared' 63 | test_env: 'LD_PRELOAD=$(gcc -print-file-name=libasan.so)' 64 | - sanitizers: 'ASan' 65 | os: 'linux' 66 | cc: 'clang' 67 | build_type: 'shared' 68 | # clang defaults to -static-libsasn 69 | asan_cflags: '-shared-libasan' 70 | test_env: 'LD_PRELOAD=$(clang -print-file-name=libclang_rt.asan-x86_64.so)' 71 | 72 | # We have to disable LeakSanitizer in shared library builds 73 | # because we get false positives from Python. 74 | - sanitizers: 'ASan' 75 | build_type: 'shared' 76 | asan_opts: 'detect_leaks=0' 77 | 78 | # The static build can run with LeakSanitizer. 79 | # gcc defaults to -shared-libasan and needs -static-libasan 80 | - sanitizers: 'ASan' 81 | cc: 'gcc' 82 | build_type: 'static' 83 | asan_cflags: '-static-libasan' 84 | 85 | exclude: 86 | # gcc is just an alias for clang on macOS 87 | - os: 'macos' 88 | cc: 'gcc' 89 | # Shared libasan doesn't work with macOS system Python. 90 | - os: 'macos' 91 | sanitizers: 'ASan' 92 | build_type: 'shared' 93 | 94 | runs-on: ${{ matrix.image }} 95 | 96 | env: 97 | ASAN_OPTIONS: ${{ matrix.asan_opts }} 98 | CC: ${{ matrix.cc }} 99 | CXX: ${{ matrix.cxx }} 100 | CFLAGS: '${{ matrix.san_cflags }} ${{ matrix.asan_cflags }}' 101 | CXXFLAGS: '${{ matrix.san_cflags }} ${{ matrix.asan_cflags }}' 102 | 103 | steps: 104 | - uses: actions/checkout@v4 105 | - name: Build and test 106 | run: | 107 | cmake \ 108 | -DBUILD_SHARED_LIBS=${{ matrix.cmake_shared }} \ 109 | -DCMAKE_BUILD_TYPE=Debug \ 110 | -S . -B build 111 | cmake --build build 112 | ${{ matrix.test_env }} ctest --test-dir build --output-on-failure 113 | 114 | windows: 115 | 116 | runs-on: windows-latest 117 | strategy: 118 | fail-fast: false 119 | matrix: 120 | build_type: [shared, static] 121 | include: 122 | - build_type: 'shared' 123 | cmake_shared: 'YES' 124 | - build_type: 'static' 125 | cmake_shared: 'NO' 126 | 127 | steps: 128 | - uses: actions/checkout@v4 129 | - uses: ilammy/msvc-dev-cmd@v1 130 | - name: Build and test 131 | run: | 132 | cmake ^ 133 | -DBUILD_SHARED_LIBS=${{ matrix.cmake_shared }} ^ 134 | -DCMAKE_BUILD_TYPE=Debug ^ 135 | -S . -B build 136 | cmake --build build 137 | ctest --test-dir build -C Debug --output-on-failure 138 | shell: cmd 139 | - name: Upload artifact 140 | if: ${{ matrix.build_type == 'static' }} 141 | uses: actions/upload-artifact@v4 142 | with: 143 | name: cmark windows ${{ matrix.build_type }} 144 | path: build/src/Debug/cmark.exe 145 | if-no-files-found: error 146 | -------------------------------------------------------------------------------- /.github/workflows/fuzz.yml: -------------------------------------------------------------------------------- 1 | name: CIFuzz 2 | on: [pull_request] 3 | jobs: 4 | Fuzzing: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Build Fuzzers 8 | uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master 9 | with: 10 | oss-fuzz-project-name: 'cmark' 11 | dry-run: false 12 | - name: Run Fuzzers 13 | uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master 14 | with: 15 | oss-fuzz-project-name: 'cmark' 16 | fuzz-seconds: 600 17 | dry-run: false 18 | - name: Upload Crash 19 | uses: actions/upload-artifact@v4 20 | if: failure() 21 | with: 22 | name: artifacts 23 | path: ./out/artifacts 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Libraries 8 | *.lib 9 | *.a 10 | 11 | # Shared objects (inc. Windows DLLs) 12 | *.dll 13 | *.so 14 | *.so.* 15 | *.dylib 16 | 17 | # Executables 18 | *.exe 19 | *.out 20 | *.app 21 | *.i*86 22 | *.x86_64 23 | *.hex 24 | *.pyc 25 | 26 | *~ 27 | *.bak 28 | *.sw? 29 | *.diff 30 | *# 31 | *.zip 32 | bstrlib.txt 33 | build 34 | cmark.dSYM/* 35 | cmark 36 | 37 | # Visual Studio CMake support 38 | /.vs/ 39 | /out/ 40 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.7) 2 | 3 | if(POLICY CMP0063) 4 | cmake_policy(SET CMP0063 NEW) 5 | endif() 6 | if(POLICY CMP0092) 7 | cmake_policy(SET CMP0092 NEW) 8 | endif() 9 | 10 | project(cmark 11 | LANGUAGES C CXX 12 | VERSION 0.31.1) 13 | 14 | if(CMAKE_BUILD_TYPE STREQUAL Asan) 15 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules) 16 | include(FindAsan) 17 | endif() 18 | 19 | # Include module for functions 20 | # - 'write_basic_package_version_file' 21 | # - 'configure_package_config_file' 22 | include(CMakePackageConfigHelpers) 23 | include(CTest) 24 | include(GenerateExportHeader) 25 | include(GNUInstallDirs) 26 | 27 | if(NOT MSVC OR CMAKE_HOST_SYSTEM_NAME STREQUAL Windows) 28 | set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) 29 | include(InstallRequiredSystemLibraries) 30 | endif() 31 | 32 | if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") 33 | message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make") 34 | endif() 35 | 36 | # Backwards Compatibility 37 | # TODO: remove this once there has been a period to enable people to migrate 38 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT NO) 39 | if(DEFINED CMARK_SHARED) 40 | message(AUTHOR_WARNING [=[ 41 | 'CMARK_SHARED' has been replaced with the standard 'BUILD_SHARED_LIBS' to control the library type. 42 | ]=]) 43 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT ${CMARK_SHARED}) 44 | endif() 45 | if(DEFINED CMARK_STATIC) 46 | message(AUTHOR_WARNING [=[ 47 | 'CMARK_STATIC' has been replaced with the standard 'BUILD_SHARED_LIBS' to control the library type. 48 | ]=]) 49 | if(NOT CMARK_STATIC) 50 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT YES) 51 | else() 52 | set(_CMARK_BUILD_SHARED_LIBS_DEFAULT NO) 53 | endif() 54 | endif() 55 | 56 | option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF) 57 | option(BUILD_SHARED_LIBS "Build the CMark library as shared" 58 | ${_CMARK_BUILD_SHARED_LIBS_DEFAULT}) 59 | 60 | if(NOT MSVC) 61 | set(CMAKE_C_STANDARD 99) 62 | set(CMAKE_C_STANDARD_REQUIRED YES) 63 | set(CMAKE_C_EXTENSIONS NO) 64 | endif() 65 | 66 | # -fvisibility=hidden 67 | set(CMAKE_C_VISIBILITY_PRESET hidden) 68 | set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) 69 | 70 | set(CMAKE_INCLUDE_CURRENT_DIR ON) 71 | 72 | if (DEFINED CMAKE_INSTALL_RPATH) 73 | set(Base_rpath "${CMAKE_INSTALL_RPATH}") 74 | else() 75 | if(BUILD_SHARED_LIBS) 76 | set(p "${CMAKE_INSTALL_FULL_LIBDIR}") 77 | list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${p}" i) 78 | if("${i}" STREQUAL "-1") 79 | set(Base_rpath "${p}") 80 | endif() 81 | endif() 82 | endif() 83 | 84 | # Append non-standard external dependency directories, if any. 85 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 86 | 87 | # Check integrity of node structure when compiled as debug 88 | add_compile_options($<$:-DCMARK_DEBUG_NODES>) 89 | 90 | # In order to maintain compatibility with older platforms which may not have a 91 | # recent version of CMake (i.e. are running CMake <3.3), we cannot simply use 92 | # the `add_compile_options` with a generator expression. This uses the 93 | # `target_compile_options` with `PRIVATE` to add the flags only to the targets 94 | # so that CMark may be used in projects with non-C languages. 95 | function(cmark_add_compile_options target) 96 | if(MSVC) 97 | target_compile_definitions(${target} PRIVATE _CRT_SECURE_NO_WARNINGS) 98 | else() 99 | target_compile_options(${target} PRIVATE 100 | -Wall -Wextra -pedantic 101 | $<$:-Wstrict-prototypes>) 102 | endif() 103 | if(CMAKE_BUILD_TYPE STREQUAL Profile) 104 | target_compile_options(${target} PRIVATE -pg) 105 | endif() 106 | if(CMAKE_BUILD_TYPE STREQUAL Ubsan) 107 | target_compile_options(${target} PRIVATE -fsanitize=undefined) 108 | endif() 109 | if(CMARK_LIB_FUZZER) 110 | if(target MATCHES fuzz) 111 | target_compile_options(${target} PRIVATE -fsanitize=fuzzer) 112 | target_link_options(${target} PRIVATE -fsanitize=fuzzer) 113 | else() 114 | target_compile_options(${target} PRIVATE -fsanitize=fuzzer-no-link) 115 | target_link_options(${target} PRIVATE -fsanitize=fuzzer-no-link) 116 | endif() 117 | endif() 118 | endfunction() 119 | 120 | add_subdirectory(src) 121 | # TODO(compnerd) should this be enabled for MinGW, which sets CMAKE_SYSTEM_NAME 122 | # to Windows, but defines `MINGW`. 123 | if(NOT CMAKE_SYSTEM_NAME STREQUAL Windows) 124 | add_subdirectory(man) 125 | endif() 126 | if(BUILD_TESTING) 127 | add_subdirectory(api_test) 128 | add_subdirectory(test testdir) 129 | endif() 130 | if(CMARK_LIB_FUZZER) 131 | add_subdirectory(fuzz) 132 | endif() 133 | 134 | if(NOT CMAKE_BUILD_TYPE) 135 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING 136 | "Choose the type of build, options are: Debug Profile Release Asan Ubsan." FORCE) 137 | endif(NOT CMAKE_BUILD_TYPE) 138 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, John MacFarlane 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | ----- 29 | 30 | houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c 31 | 32 | derive from https://github.com/vmg/houdini (with some modifications) 33 | 34 | Copyright (C) 2012 Vicent Martí 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy of 37 | this software and associated documentation files (the "Software"), to deal in 38 | the Software without restriction, including without limitation the rights to 39 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 40 | of the Software, and to permit persons to whom the Software is furnished to do 41 | so, subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in all 44 | copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 49 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 52 | SOFTWARE. 53 | 54 | ----- 55 | 56 | buffer.h, buffer.c, chunk.h 57 | 58 | are derived from code (C) 2012 Github, Inc. 59 | 60 | Permission is hereby granted, free of charge, to any person obtaining a copy of 61 | this software and associated documentation files (the "Software"), to deal in 62 | the Software without restriction, including without limitation the rights to 63 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 64 | of the Software, and to permit persons to whom the Software is furnished to do 65 | so, subject to the following conditions: 66 | 67 | The above copyright notice and this permission notice shall be included in all 68 | copies or substantial portions of the Software. 69 | 70 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 71 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 72 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 73 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 74 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 75 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 76 | SOFTWARE. 77 | 78 | ----- 79 | 80 | utf8.c and utf8.c 81 | 82 | are derived from utf8proc 83 | (), 84 | (C) 2009 Public Software Group e. V., Berlin, Germany. 85 | 86 | Permission is hereby granted, free of charge, to any person obtaining a 87 | copy of this software and associated documentation files (the "Software"), 88 | to deal in the Software without restriction, including without limitation 89 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 90 | and/or sell copies of the Software, and to permit persons to whom the 91 | Software is furnished to do so, subject to the following conditions: 92 | 93 | The above copyright notice and this permission notice shall be included in 94 | all copies or substantial portions of the Software. 95 | 96 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 97 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 98 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 99 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 100 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 101 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 102 | DEALINGS IN THE SOFTWARE. 103 | 104 | ----- 105 | 106 | The normalization code in normalize.py was derived from the 107 | markdowntest project, Copyright 2013 Karl Dubost: 108 | 109 | The MIT License (MIT) 110 | 111 | Copyright (c) 2013 Karl Dubost 112 | 113 | Permission is hereby granted, free of charge, to any person obtaining 114 | a copy of this software and associated documentation files (the 115 | "Software"), to deal in the Software without restriction, including 116 | without limitation the rights to use, copy, modify, merge, publish, 117 | distribute, sublicense, and/or sell copies of the Software, and to 118 | permit persons to whom the Software is furnished to do so, subject to 119 | the following conditions: 120 | 121 | The above copyright notice and this permission notice shall be 122 | included in all copies or substantial portions of the Software. 123 | 124 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 125 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 126 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 127 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 128 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 129 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 130 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 131 | 132 | ----- 133 | 134 | The CommonMark spec (test/spec.txt) is 135 | 136 | Copyright (C) 2014-15 John MacFarlane 137 | 138 | Released under the Creative Commons CC-BY-SA 4.0 license: 139 | . 140 | 141 | ----- 142 | 143 | The test software in test/ is 144 | 145 | Copyright (c) 2014, John MacFarlane 146 | 147 | All rights reserved. 148 | 149 | Redistribution and use in source and binary forms, with or without 150 | modification, are permitted provided that the following conditions are met: 151 | 152 | * Redistributions of source code must retain the above copyright 153 | notice, this list of conditions and the following disclaimer. 154 | 155 | * Redistributions in binary form must reproduce the above 156 | copyright notice, this list of conditions and the following 157 | disclaimer in the documentation and/or other materials provided 158 | with the distribution. 159 | 160 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 161 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 162 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 163 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 164 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 165 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 166 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 167 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 168 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 169 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 170 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 171 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SRCDIR=src 2 | DATADIR=data 3 | BUILDDIR?=build 4 | GENERATOR?=Unix Makefiles 5 | MINGW_BUILDDIR?=build-mingw 6 | MINGW_INSTALLDIR?=windows 7 | SPEC=test/spec.txt 8 | SITE=_site 9 | SPECVERSION=$(shell perl -ne 'print $$1 if /^version: *([0-9.]+)/' $(SPEC)) 10 | FUZZCHARS?=2000000 # for fuzztest 11 | BENCHDIR=bench 12 | BENCHSAMPLES=$(wildcard $(BENCHDIR)/samples/*.md) 13 | BENCHFILE=$(BENCHDIR)/benchinput.md 14 | ALLTESTS=alltests.md 15 | NUMRUNS?=10 16 | CMARK=$(BUILDDIR)/src/cmark 17 | CMARK_FUZZ=$(BUILDDIR)/src/cmark-fuzz 18 | PROG?=$(CMARK) 19 | VERSION?=$(SPECVERSION) 20 | RELEASE?=cmark-$(VERSION) 21 | INSTALL_PREFIX?=/usr/local 22 | CLANG_CHECK?=clang-check 23 | CLANG_FORMAT=clang-format -style llvm -sort-includes=0 -i 24 | AFL_PATH?=/usr/local/bin 25 | 26 | .PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl libFuzzer lint 27 | 28 | all: cmake_build man/man3/cmark.3 29 | 30 | $(CMARK): cmake_build 31 | 32 | cmake_build: $(BUILDDIR) 33 | cmake --build $(BUILDDIR) 34 | @echo "Binaries can be found in $(BUILDDIR)/src" 35 | 36 | $(BUILDDIR): 37 | @cmake --version > /dev/null || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1) 38 | cmake \ 39 | -S . -B $(BUILDDIR) -G "$(GENERATOR)" \ 40 | -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ 41 | -DCMAKE_INSTALL_PREFIX=$(INSTALL_PREFIX) \ 42 | -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ 43 | -DBUILD_SHARED_LIBS=YES 44 | 45 | install: $(BUILDDIR) 46 | cmake --install $(BUILDDIR) 47 | 48 | uninstall: $(BUILDDIR)/install_manifest.txt 49 | xargs rm < $< 50 | 51 | debug: 52 | cmake \ 53 | -S . -B $(BUILDDIR) -G "$(GENERATOR)" \ 54 | -DCMAKE_BUILD_TYPE=Debug \ 55 | -DBUILD_SHARED_LIBS=YES 56 | cmake --build $(BUILDDIR) 57 | 58 | ubsan: 59 | cmake \ 60 | -S . -B $(BUILDDIR) -G "$(GENERATOR)" \ 61 | -DCMAKE_BUILD_TYPE=Ubsan 62 | cmake --build $(BUILDDIR) 63 | 64 | asan: 65 | cmake \ 66 | -S . -B $(BUILDDIR) -G "$(GENERATOR)" \ 67 | -DCMAKE_BUILD_TYPE=Asan 68 | cmake --build $(BUILDDIR) 69 | 70 | prof: 71 | cmake \ 72 | -S . -B $(BUILDDIR) -G "$(GENERATOR)" \ 73 | -DCMAKE_BUILD_TYPE=Profile 74 | cmake --build $(BUILDDIR) 75 | 76 | afl: 77 | @[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; } 78 | cmake \ 79 | -S . -B $(BUILDDIR) -G "$(GENERATOR)" \ 80 | -DBUILD_TESTING=NO \ 81 | -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-clang 82 | cmake --build $(BUILDDIR) 83 | $(AFL_PATH)/afl-fuzz \ 84 | -i fuzz/afl_test_cases \ 85 | -o fuzz/afl_results \ 86 | -x fuzz/dictionary \ 87 | -t 100 \ 88 | $(CMARK) $(CMARK_OPTS) 89 | 90 | libFuzzer: 91 | cmake \ 92 | -S . -B $(BUILDDIR) -G "$(GENERATOR)" \ 93 | -DCMAKE_C_COMPILER=clang \ 94 | -DCMAKE_CXX_COMPILER=clang++ \ 95 | -DCMAKE_BUILD_TYPE=Asan \ 96 | -DCMARK_LIB_FUZZER=ON 97 | cmake --build $(BUILDDIR) 98 | mkdir -p fuzz/corpus 99 | $(BUILDDIR)/fuzz/cmark-fuzz \ 100 | -dict=fuzz/dictionary \ 101 | -max_len=1000 \ 102 | -timeout=1 \ 103 | fuzz/corpus 104 | 105 | lint: $(BUILDDIR) 106 | errs=0 ; \ 107 | for f in `ls src/*.[ch] | grep -v "scanners.c"` ; \ 108 | do echo $$f ; clang-tidy -header-filter='^build/.*' -p=build -warnings-as-errors='*' $$f || errs=1 ; done ; \ 109 | exit $$errs 110 | 111 | mingw: 112 | cmake \ 113 | -S . -B $(MINGW_BUILDDIR) -G "$(GENERATOR)" \ 114 | -DCMAKE_TOOLCHAIN_FILE=toolchain-mingw32.cmake \ 115 | -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) 116 | cmake --build $(MINGW_BUILDDIR) 117 | cmake --install $(MINGW_BUILDDIR) 118 | 119 | man/man3/cmark.3: src/cmark.h | $(CMARK) 120 | python3 man/make_man_page.py $< > $@ \ 121 | 122 | archive: 123 | git archive --prefix=$(RELEASE)/ -o $(RELEASE).tar.gz HEAD 124 | git archive --prefix=$(RELEASE)/ -o $(RELEASE).zip HEAD 125 | 126 | clean: 127 | rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR) 128 | 129 | # We include case_fold.inc in the repository, so this shouldn't 130 | # normally need to be generated. 131 | $(SRCDIR)/case_fold.inc: $(DATADIR)/CaseFolding.txt 132 | python3 tools/make_case_fold_inc.py < $< > $@ 133 | 134 | # We include scanners.c in the repository, so this shouldn't 135 | # normally need to be generated. 136 | $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re 137 | @case "$$(re2c -v)" in \ 138 | *\ 0.13.*|*\ 0.14|*\ 0.14.1) \ 139 | echo "re2c >= 0.14.2 is required"; \ 140 | false; \ 141 | ;; \ 142 | esac 143 | re2c -W -Werror --case-insensitive -b -i --no-generation-date \ 144 | -o $@ $< 145 | $(CLANG_FORMAT) $@ 146 | 147 | # We include entities.inc in the repository, so normally this 148 | # doesn't need to be regenerated: 149 | $(SRCDIR)/entities.inc: tools/make_entities_inc.py 150 | python3 $< > $@ 151 | 152 | update-spec: 153 | curl 'https://raw.githubusercontent.com/jgm/CommonMark/master/spec.txt'\ 154 | > $(SPEC) 155 | 156 | test: cmake_build 157 | ctest --test-dir $(BUILDDIR) --output-on-failure || (cat $(BUILDDIR)/Testing/Temporary/LastTest.log && exit 1) 158 | 159 | $(ALLTESTS): 160 | python3 test/spec_tests.py --spec $(SPEC) --dump-tests | python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))' > $@ 161 | 162 | leakcheck: $(ALLTESTS) 163 | for format in html man xml latex commonmark; do \ 164 | for opts in "" "--smart"; do \ 165 | echo "cmark -t $$format $$opts" ; \ 166 | valgrind -q --leak-check=full --dsymutil=yes --error-exitcode=1 $(PROG) -t $$format $$opts $(ALLTESTS) >/dev/null || exit 1;\ 167 | done; \ 168 | done; 169 | 170 | fuzztest: 171 | { for i in `seq 1 10`; do \ 172 | cat /dev/urandom | head -c $(FUZZCHARS) | iconv -f latin1 -t utf-8 | tee fuzz-$$i.txt | \ 173 | /usr/bin/env time -p $(PROG) >/dev/null && rm fuzz-$$i.txt ; \ 174 | done } 2>&1 | grep 'user\|abnormally' 175 | 176 | progit: 177 | git clone https://github.com/progit/progit.git 178 | 179 | $(BENCHFILE): progit 180 | echo "" > $@ 181 | for lang in ar az be ca cs de en eo es es-ni fa fi fr hi hu id it ja ko mk nl no-nb pl pt-br ro ru sr th tr uk vi zh zh-tw; do \ 182 | cat progit/$$lang/*/*.markdown >> $@; \ 183 | done 184 | 185 | # for more accurate results, run with 186 | # sudo renice -10 $$; make bench 187 | bench: $(BENCHFILE) 188 | { for x in `seq 1 $(NUMRUNS)` ; do \ 189 | /usr/bin/env time -p $(PROG) /dev/null ; \ 190 | /usr/bin/env time -p $(PROG) $< >/dev/null ; \ 191 | done \ 192 | } 2>&1 | grep 'real' | awk '{print $$2}' | python3 'bench/stats.py' 193 | 194 | newbench: 195 | for f in $(BENCHSAMPLES) ; do \ 196 | printf "%26s " `basename $$f` ; \ 197 | { for x in `seq 1 $(NUMRUNS)` ; do \ 198 | /usr/bin/env time -p $(PROG) /dev/null ; \ 199 | for x in `seq 1 200` ; do cat $$f ; done | \ 200 | /usr/bin/env time -p $(PROG) > /dev/null; \ 201 | done \ 202 | } 2>&1 | grep 'real' | awk '{print $$2}' | \ 203 | python3 'bench/stats.py'; done 204 | 205 | format: 206 | $(CLANG_FORMAT) src/*.c src/*.h api_test/*.c api_test/*.h 207 | 208 | operf: $(CMARK) 209 | operf $< < $(BENCHFILE) > /dev/null 210 | 211 | distclean: clean 212 | -rm -rf *.dSYM 213 | -rm -f README.html 214 | -rm -rf $(BENCHFILE) $(ALLTESTS) progit 215 | -------------------------------------------------------------------------------- /Makefile.nmake: -------------------------------------------------------------------------------- 1 | SRCDIR=src 2 | DATADIR=data 3 | BUILDDIR=build 4 | INSTALLDIR=windows 5 | PROG=$(BUILDDIR)\src\cmark.exe 6 | GENERATOR=NMake Makefiles 7 | 8 | all: $(BUILDDIR)/CMakeFiles 9 | cmake --build $(BUILDDIR) 10 | 11 | $(BUILDDIR)/CMakeFiles: 12 | cmake \ 13 | -S . -B $(BUILDDIR) -G "$(GENERATOR)" \ 14 | -D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \ 15 | -D CMAKE_INSTALL_PREFIX=$(INSTALLDIR) 16 | 17 | install: all 18 | cmake --install $(BUILDDIR) 19 | 20 | clean: 21 | -rmdir /s /q $(BUILDDIR) $(MINGW_INSTALLDIR) 2> nul 22 | 23 | test: all 24 | ctest --test-dir $(BUILDDIR) --output-on-failure 25 | 26 | distclean: clean 27 | del /q src\scanners.c 2> nul 28 | del /q spec.md spec.html 2> nul 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | cmark 2 | ===== 3 | 4 | [![CI 5 | tests](https://github.com/commonmark/cmark/workflows/CI%20tests/badge.svg)](https://github.com/commonmark/cmark/actions) 6 | 7 | `cmark` is the C reference implementation of [CommonMark], a 8 | rationalized version of Markdown syntax with a [spec][the spec]. 9 | (For the JavaScript reference implementation, see 10 | [commonmark.js].) 11 | 12 | It provides a shared library (`libcmark`) with functions for parsing 13 | CommonMark documents to an abstract syntax tree (AST), manipulating 14 | the AST, and rendering the document to HTML, groff man, LaTeX, 15 | CommonMark, or an XML representation of the AST. It also provides a 16 | command-line program (`cmark`) for parsing and rendering CommonMark 17 | documents. 18 | 19 | Advantages of this library: 20 | 21 | - **Portable.** The library and program are written in standard 22 | C99 and have no external dependencies. They have been tested with 23 | MSVC, gcc, tcc, and clang. 24 | 25 | - **Fast.** cmark can render a Markdown version of *War and Peace* in 26 | the blink of an eye (127 milliseconds on a ten year old laptop, 27 | vs. 100-400 milliseconds for an eye blink). In our [benchmarks], 28 | cmark is 10,000 times faster than the original `Markdown.pl`, and 29 | on par with the very fastest available Markdown processors. 30 | 31 | - **Accurate.** The library passes all CommonMark conformance tests. 32 | 33 | - **Standardized.** The library can be expected to parse CommonMark 34 | the same way as any other conforming parser. So, for example, 35 | you can use `commonmark.js` on the client to preview content that 36 | will be rendered on the server using `cmark`. 37 | 38 | - **Robust.** The library has been extensively fuzz-tested using 39 | [american fuzzy lop]. The test suite includes pathological cases 40 | that bring many other Markdown parsers to a crawl (for example, 41 | thousands-deep nested bracketed text or block quotes). 42 | 43 | - **Flexible.** CommonMark input is parsed to an AST which can be 44 | manipulated programmatically prior to rendering. 45 | 46 | - **Multiple renderers.** Output in HTML, groff man, LaTeX, CommonMark, 47 | and a custom XML format is supported. And it is easy to write new 48 | renderers to support other formats. 49 | 50 | - **Free.** BSD2-licensed. 51 | 52 | It is easy to use `libcmark` in python, lua, ruby, and other dynamic 53 | languages: see the `wrappers/` subdirectory for some simple examples. 54 | 55 | There are also libraries that wrap `libcmark` for 56 | [Go](https://github.com/rhinoman/go-commonmark), 57 | [Haskell](https://hackage.haskell.org/package/cmark), 58 | [Ruby](https://github.com/gjtorikian/commonmarker), 59 | [Lua](https://github.com/jgm/cmark-lua), 60 | [Perl](https://metacpan.org/release/CommonMark), 61 | [Python](https://pypi.python.org/pypi/paka.cmark), 62 | [R](https://cran.r-project.org/package=commonmark), 63 | [Scala](https://github.com/sparsetech/cmark-scala) and 64 | [PHP](https://www.php.net/manual/en/book.cmark.php). 65 | 66 | Installing 67 | ---------- 68 | 69 | Building the C program (`cmark`) and shared library (`libcmark`) 70 | requires [cmake]. If you modify `scanners.re`, then you will also 71 | need [re2c] \(>= 0.14.2\), which is used to generate `scanners.c` from 72 | `scanners.re`. We have included a pre-generated `scanners.c` in 73 | the repository to reduce build dependencies. 74 | 75 | If you have GNU make, you can simply `make`, `make test`, and `make 76 | install`. This calls [cmake] to create a `Makefile` in the `build` 77 | directory, then uses that `Makefile` to create the executable and 78 | library. The binaries can be found in `build/src`. The default 79 | installation prefix is `/usr/local`. To change the installation 80 | prefix, pass the `INSTALL_PREFIX` variable if you run `make` for the 81 | first time: `make INSTALL_PREFIX=path`. 82 | 83 | For a more portable method, you can use [cmake] manually. [cmake] knows 84 | how to create build environments for many build systems. For example, 85 | on FreeBSD: 86 | 87 | cmake -S . -B build # optionally: -DCMAKE_INSTALL_PREFIX=path 88 | cmake --build build # executable will be created as build/src/cmark 89 | ctest --test-dir build 90 | cmake --install build 91 | 92 | Or, to create Xcode project files on OSX: 93 | 94 | cmake -S . -B build -G Xcode 95 | open build/cmark.xcodeproj 96 | 97 | The GNU Makefile also provides a few other targets for developers. 98 | To run a benchmark: 99 | 100 | make bench 101 | 102 | For more detailed benchmarks: 103 | 104 | make newbench 105 | 106 | To run a test for memory leaks using `valgrind`: 107 | 108 | make leakcheck 109 | 110 | To reformat source code using `clang-format`: 111 | 112 | make format 113 | 114 | To run a "fuzz test" against ten long randomly generated inputs: 115 | 116 | make fuzztest 117 | 118 | To do a more systematic fuzz test with [american fuzzy lop]: 119 | 120 | AFL_PATH=/path/to/afl_directory make afl 121 | 122 | Fuzzing with [libFuzzer] is also supported. The fuzzer can be run with: 123 | 124 | make libFuzzer 125 | 126 | To make a release tarball and zip archive: 127 | 128 | make archive 129 | 130 | Installing (Windows) 131 | -------------------- 132 | 133 | To compile with MSVC and NMAKE: 134 | 135 | nmake /f Makefile.nmake 136 | 137 | You can cross-compile a Windows binary and dll on linux if you have the 138 | `mingw32` compiler: 139 | 140 | make mingw 141 | 142 | The binaries will be in `build-mingw/windows/bin`. 143 | 144 | Usage 145 | ----- 146 | 147 | Instructions for the use of the command line program and library can 148 | be found in the man pages in the `man` subdirectory. 149 | 150 | Security 151 | -------- 152 | 153 | By default, the library will scrub raw HTML and potentially 154 | dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`). 155 | 156 | To allow these, use the option `CMARK_OPT_UNSAFE` (or 157 | `--unsafe`) with the command line program. If doing so, we 158 | recommend you use a HTML sanitizer specific to your needs to 159 | protect against [XSS 160 | attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). 161 | 162 | Contributing 163 | ------------ 164 | 165 | There is a [forum for discussing 166 | CommonMark](http://talk.commonmark.org); you should use it instead of 167 | github issues for questions and possibly open-ended discussions. 168 | Use the [github issue tracker](http://github.com/commonmark/CommonMark/issues) 169 | only for simple, clear, actionable issues. 170 | 171 | Authors 172 | ------- 173 | 174 | John MacFarlane wrote the original library and program. 175 | The block parsing algorithm was worked out together with David 176 | Greenspan. Vicent Marti optimized the C implementation for 177 | performance, increasing its speed tenfold. Kārlis Gaņģis helped 178 | work out a better parsing algorithm for links and emphasis, 179 | eliminating several worst-case performance issues. 180 | Nick Wellnhofer contributed many improvements, including 181 | most of the C library's API and its test harness. 182 | 183 | [benchmarks]: benchmarks.md 184 | [the spec]: http://spec.commonmark.org 185 | [CommonMark]: http://commonmark.org 186 | [cmake]: http://www.cmake.org/download/ 187 | [re2c]: http://re2c.org 188 | [commonmark.js]: https://github.com/commonmark/commonmark.js 189 | [american fuzzy lop]: http://lcamtuf.coredump.cx/afl/ 190 | [libFuzzer]: http://llvm.org/docs/LibFuzzer.html 191 | -------------------------------------------------------------------------------- /api_test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(api_test 2 | cplusplus.cpp 3 | harness.c 4 | harness.h 5 | main.c 6 | ) 7 | cmark_add_compile_options(api_test) 8 | target_link_libraries(api_test PRIVATE 9 | cmark) 10 | 11 | add_test(NAME api_test COMMAND api_test) 12 | if(WIN32) 13 | set_tests_properties(api_test PROPERTIES 14 | ENVIRONMENT "PATH=$$$ENV{PATH}") 15 | endif() 16 | 17 | -------------------------------------------------------------------------------- /api_test/cplusplus.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "cmark.h" 4 | #include "cplusplus.h" 5 | #include "harness.h" 6 | 7 | void 8 | test_cplusplus(test_batch_runner *runner) 9 | { 10 | static const char md[] = "paragraph\n"; 11 | char *html = cmark_markdown_to_html(md, sizeof(md) - 1, CMARK_OPT_DEFAULT); 12 | STR_EQ(runner, html, "

paragraph

\n", "libcmark works with C++"); 13 | free(html); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /api_test/cplusplus.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_API_TEST_CPLUSPLUS_H 2 | #define CMARK_API_TEST_CPLUSPLUS_H 3 | 4 | #include "harness.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | void test_cplusplus(test_batch_runner *runner); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /api_test/harness.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "harness.h" 7 | 8 | test_batch_runner *test_batch_runner_new(void) { 9 | return (test_batch_runner *)calloc(1, sizeof(test_batch_runner)); 10 | } 11 | 12 | static void test_result(test_batch_runner *runner, int cond, const char *msg, 13 | va_list ap) { 14 | ++runner->test_num; 15 | 16 | if (cond) { 17 | ++runner->num_passed; 18 | } else { 19 | fprintf(stderr, "FAILED test %d: ", runner->test_num); 20 | vfprintf(stderr, msg, ap); 21 | fprintf(stderr, "\n"); 22 | ++runner->num_failed; 23 | } 24 | } 25 | 26 | void SKIP(test_batch_runner *runner, int num_tests) { 27 | runner->test_num += num_tests; 28 | runner->num_skipped += num_tests; 29 | } 30 | 31 | void OK(test_batch_runner *runner, int cond, const char *msg, ...) { 32 | va_list ap; 33 | va_start(ap, msg); 34 | test_result(runner, cond, msg, ap); 35 | va_end(ap); 36 | } 37 | 38 | void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, 39 | ...) { 40 | int cond = got == expected; 41 | 42 | va_list ap; 43 | va_start(ap, msg); 44 | test_result(runner, cond, msg, ap); 45 | va_end(ap); 46 | 47 | if (!cond) { 48 | fprintf(stderr, " Got: %d\n", got); 49 | fprintf(stderr, " Expected: %d\n", expected); 50 | } 51 | } 52 | 53 | void STR_EQ(test_batch_runner *runner, const char *got, const char *expected, 54 | const char *msg, ...) { 55 | int cond = strcmp(got, expected) == 0; 56 | 57 | va_list ap; 58 | va_start(ap, msg); 59 | test_result(runner, cond, msg, ap); 60 | va_end(ap); 61 | 62 | if (!cond) { 63 | fprintf(stderr, " Got: \"%s\"\n", got); 64 | fprintf(stderr, " Expected: \"%s\"\n", expected); 65 | } 66 | } 67 | 68 | int test_ok(test_batch_runner *runner) { return runner->num_failed == 0; } 69 | 70 | void test_print_summary(test_batch_runner *runner) { 71 | int num_passed = runner->num_passed; 72 | int num_skipped = runner->num_skipped; 73 | int num_failed = runner->num_failed; 74 | 75 | fprintf(stderr, "%d tests passed, %d failed, %d skipped\n", num_passed, 76 | num_failed, num_skipped); 77 | 78 | if (test_ok(runner)) { 79 | fprintf(stderr, "PASS\n"); 80 | } else { 81 | fprintf(stderr, "FAIL\n"); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /api_test/harness.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_API_TEST_HARNESS_H 2 | #define CMARK_API_TEST_HARNESS_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | typedef struct { 9 | int test_num; 10 | int num_passed; 11 | int num_failed; 12 | int num_skipped; 13 | } test_batch_runner; 14 | 15 | test_batch_runner *test_batch_runner_new(void); 16 | 17 | void SKIP(test_batch_runner *runner, int num_tests); 18 | 19 | void OK(test_batch_runner *runner, int cond, const char *msg, ...); 20 | 21 | void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg, 22 | ...); 23 | 24 | void STR_EQ(test_batch_runner *runner, const char *got, const char *expected, 25 | const char *msg, ...); 26 | 27 | int test_ok(test_batch_runner *runner); 28 | 29 | void test_print_summary(test_batch_runner *runner); 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /bench/samples/block-bq-flat.md: -------------------------------------------------------------------------------- 1 | > the simple example of a blockquote 2 | > the simple example of a blockquote 3 | > the simple example of a blockquote 4 | > the simple example of a blockquote 5 | ... continuation 6 | ... continuation 7 | ... continuation 8 | ... continuation 9 | 10 | empty blockquote: 11 | 12 | > 13 | > 14 | > 15 | > 16 | 17 | -------------------------------------------------------------------------------- /bench/samples/block-bq-nested.md: -------------------------------------------------------------------------------- 1 | >>>>>> deeply nested blockquote 2 | >>>>> deeply nested blockquote 3 | >>>> deeply nested blockquote 4 | >>> deeply nested blockquote 5 | >> deeply nested blockquote 6 | > deeply nested blockquote 7 | 8 | > deeply nested blockquote 9 | >> deeply nested blockquote 10 | >>> deeply nested blockquote 11 | >>>> deeply nested blockquote 12 | >>>>> deeply nested blockquote 13 | >>>>>> deeply nested blockquote 14 | -------------------------------------------------------------------------------- /bench/samples/block-code.md: -------------------------------------------------------------------------------- 1 | 2 | an 3 | example 4 | 5 | of 6 | 7 | 8 | 9 | a code 10 | block 11 | 12 | -------------------------------------------------------------------------------- /bench/samples/block-fences.md: -------------------------------------------------------------------------------- 1 | 2 | ``````````text 3 | an 4 | example 5 | ``` 6 | of 7 | 8 | 9 | a fenced 10 | ``` 11 | code 12 | block 13 | `````````` 14 | 15 | -------------------------------------------------------------------------------- /bench/samples/block-heading.md: -------------------------------------------------------------------------------- 1 | # heading 2 | ### heading 3 | ##### heading 4 | 5 | # heading # 6 | ### heading ### 7 | ##### heading \#\#\#\#\###### 8 | 9 | ############ not a heading 10 | -------------------------------------------------------------------------------- /bench/samples/block-hr.md: -------------------------------------------------------------------------------- 1 | 2 | * * * * * 3 | 4 | - - - - - 5 | 6 | ________ 7 | 8 | 9 | ************************* text 10 | 11 | -------------------------------------------------------------------------------- /bench/samples/block-html.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | blah blah 4 | 5 |
6 | 7 | 8 | 9 | 12 | 13 |
10 | **test** 11 |
14 | 15 | 16 | 17 | 18 | 19 | 24 | 25 | 26 | 27 |
20 | 21 | test 22 | 23 |
28 | 29 | 32 | 33 | -------------------------------------------------------------------------------- /bench/samples/block-lheading.md: -------------------------------------------------------------------------------- 1 | heading 2 | --- 3 | 4 | heading 5 | =================================== 6 | 7 | not a heading 8 | ----------------------------------- text 9 | -------------------------------------------------------------------------------- /bench/samples/block-list-flat.md: -------------------------------------------------------------------------------- 1 | - tidy 2 | - bullet 3 | - list 4 | 5 | 6 | - loose 7 | 8 | - bullet 9 | 10 | - list 11 | 12 | 13 | 0. ordered 14 | 1. list 15 | 2. example 16 | 17 | 18 | - 19 | - 20 | - 21 | - 22 | 23 | 24 | 1. 25 | 2. 26 | 3. 27 | 28 | 29 | - an example 30 | of a list item 31 | with a continuation 32 | 33 | this part is inside the list 34 | 35 | this part is just a paragraph 36 | 37 | 38 | 1. test 39 | - test 40 | 1. test 41 | - test 42 | 43 | 44 | 111111111111111111111111111111111111111111. is this a valid bullet? 45 | 46 | - _________________________ 47 | 48 | - this 49 | - is 50 | 51 | a 52 | 53 | long 54 | - loose 55 | - list 56 | 57 | - with 58 | - some 59 | 60 | tidy 61 | 62 | - list 63 | - items 64 | - in 65 | 66 | - between 67 | - _________________________ 68 | -------------------------------------------------------------------------------- /bench/samples/block-list-nested.md: -------------------------------------------------------------------------------- 1 | 2 | - this 3 | - is 4 | - a 5 | - deeply 6 | - nested 7 | - bullet 8 | - list 9 | 10 | 11 | 1. this 12 | 2. is 13 | 3. a 14 | 4. deeply 15 | 5. nested 16 | 6. unordered 17 | 7. list 18 | 19 | 20 | - 1 21 | - 2 22 | - 3 23 | - 4 24 | - 5 25 | - 6 26 | - 7 27 | - 6 28 | - 5 29 | - 4 30 | - 3 31 | - 2 32 | - 1 33 | 34 | 35 | - - - - - - - - - deeply-nested one-element item 36 | 37 | -------------------------------------------------------------------------------- /bench/samples/block-ref-flat.md: -------------------------------------------------------------------------------- 1 | [1] [2] [3] [1] [2] [3] 2 | 3 | [looooooooooooooooooooooooooooooooooooooooooooooooooong label] 4 | 5 | [1]: 6 | [2]: http://something.example.com/foo/bar 'test' 7 | [3]: 8 | http://foo/bar 9 | [ looooooooooooooooooooooooooooooooooooooooooooooooooong label ]: 10 | 111 11 | 'test' 12 | [[[[[[[[[[[[[[[[[[[[ this should not slow down anything ]]]]]]]]]]]]]]]]]]]]: q 13 | (as long as it is not referenced anywhere) 14 | 15 | [[[[[[[[[[[[[[[[[[[[]: this is not a valid reference 16 | -------------------------------------------------------------------------------- /bench/samples/block-ref-nested.md: -------------------------------------------------------------------------------- 1 | [[[[[[[foo]]]]]]] 2 | 3 | [[[[[[[foo]]]]]]]: bar 4 | [[[[[[foo]]]]]]: bar 5 | [[[[[foo]]]]]: bar 6 | [[[[foo]]]]: bar 7 | [[[foo]]]: bar 8 | [[foo]]: bar 9 | [foo]: bar 10 | 11 | [*[*[*[*[foo]*]*]*]*] 12 | 13 | [*[*[*[*[foo]*]*]*]*]: bar 14 | [*[*[*[foo]*]*]*]: bar 15 | [*[*[foo]*]*]: bar 16 | [*[foo]*]: bar 17 | [foo]: bar 18 | -------------------------------------------------------------------------------- /bench/samples/inline-autolink.md: -------------------------------------------------------------------------------- 1 | closed (valid) autolinks: 2 | 3 | 4 | 5 | 6 | 7 | 8 | these are not autolinks: 9 | 10 | 15 | -------------------------------------------------------------------------------- /bench/samples/inline-backticks.md: -------------------------------------------------------------------------------- 1 | `lots`of`backticks` 2 | 3 | ``i``wonder``how``this``will``be``parsed`` 4 | -------------------------------------------------------------------------------- /bench/samples/inline-em-flat.md: -------------------------------------------------------------------------------- 1 | *this* *is* *your* *basic* *boring* *emphasis* 2 | 3 | _this_ _is_ _your_ _basic_ _boring_ _emphasis_ 4 | 5 | **this** **is** **your** **basic** **boring** **emphasis** 6 | -------------------------------------------------------------------------------- /bench/samples/inline-em-nested.md: -------------------------------------------------------------------------------- 1 | *this *is *a *bunch* of* nested* emphases* 2 | 3 | __this __is __a __bunch__ of__ nested__ emphases__ 4 | 5 | ***this ***is ***a ***bunch*** of*** nested*** emphases*** 6 | -------------------------------------------------------------------------------- /bench/samples/inline-em-worst.md: -------------------------------------------------------------------------------- 1 | *this *is *a *worst *case *for *em *backtracking 2 | 3 | __this __is __a __worst __case __for __em __backtracking 4 | 5 | ***this ***is ***a ***worst ***case ***for ***em ***backtracking 6 | -------------------------------------------------------------------------------- /bench/samples/inline-entity.md: -------------------------------------------------------------------------------- 1 | entities: 2 | 3 |   & © Æ Ď ¾ ℋ ⅆ ∲ 4 | 5 | # Ӓ Ϡ � 6 | 7 | non-entities: 8 | 9 | &18900987654321234567890; &1234567890098765432123456789009876543212345678987654; 10 | 11 | &qwertyuioppoiuytrewqwer; &oiuytrewqwertyuioiuytrewqwertyuioytrewqwertyuiiuytri; 12 | -------------------------------------------------------------------------------- /bench/samples/inline-escape.md: -------------------------------------------------------------------------------- 1 | 2 | \t\e\s\t\i\n\g \e\s\c\a\p\e \s\e\q\u\e\n\c\e\s 3 | 4 | \!\\\"\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\? 5 | 6 | \@ \[ \] \^ \_ \` \{ \| \} \~ \- \' 7 | 8 | \ 9 | \\ 10 | \\\ 11 | \\\\ 12 | \\\\\ 13 | 14 | \ \ \ \ 15 | 16 | -------------------------------------------------------------------------------- /bench/samples/inline-html.md: -------------------------------------------------------------------------------- 1 | Taking commonmark tests from the spec for benchmarking here: 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 12 | 13 | <33> <__> 14 | 15 | 16 | 17 | 28 | 29 | foo 31 | 32 | foo 33 | 34 | foo 35 | 36 | foo 37 | 38 | foo &<]]> 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /bench/samples/inline-links-flat.md: -------------------------------------------------------------------------------- 1 | Valid links: 2 | 3 | [this is a link]() 4 | [this is a link]() 5 | [this is a link](http://something.example.com/foo/bar 'test') 6 | ![this is an image]() 7 | ![this is an image]() 8 | ![this is an image](http://something.example.com/foo/bar 'test') 9 | 10 | [escape test](<\>\>\>\>\>\>\>\>\>\>\>\>\>\>> '\'\'\'\'\'\'\'\'\'\'\'\'\'\'') 11 | [escape test \]\]\]\]\]\]\]\]\]\]\]\]\]\]\]\]](\)\)\)\)\)\)\)\)\)\)\)\)\)\)) 12 | 13 | Invalid links: 14 | 15 | [this is not a link 16 | 17 | [this is not a link]( 18 | 19 | [this is not a link](http://something.example.com/foo/bar 'test' 20 | 21 | [this is not a link]((((((((((((((((((((((((((((((((((((((((((((((( 22 | 23 | [this is not a link]((((((((((()))))))))) (((((((((())))))))))) 24 | -------------------------------------------------------------------------------- /bench/samples/inline-links-nested.md: -------------------------------------------------------------------------------- 1 | Valid links: 2 | 3 | [[[[[[[[](test)](test)](test)](test)](test)](test)](test)] 4 | 5 | [ [[[[[[[[[[[[[[[[[[ [](test) ]]]]]]]]]]]]]]]]]] ](test) 6 | 7 | Invalid links: 8 | 9 | [[[[[[[[[ 10 | 11 | [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ 12 | 13 | ![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![ 14 | -------------------------------------------------------------------------------- /bench/samples/inline-newlines.md: -------------------------------------------------------------------------------- 1 | 2 | this\ 3 | should\ 4 | be\ 5 | separated\ 6 | by\ 7 | newlines 8 | 9 | this 10 | should 11 | be 12 | separated 13 | by 14 | newlines 15 | too 16 | 17 | this 18 | should 19 | not 20 | be 21 | separated 22 | by 23 | newlines 24 | 25 | -------------------------------------------------------------------------------- /bench/samples/lorem1.md: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, __consectetur__ adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat. 2 | 3 | Vivamus sagittis, diam in [vehicula](https://github.com/markdown-it/markdown-it) lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit. 4 | 5 | Nullam ut tincidunt nunc. [Pellentesque][1] metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu. 6 | 7 | Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero. 8 | 9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat. 10 | 11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique. 12 | 13 | [1]: https://github.com/markdown-it 14 | -------------------------------------------------------------------------------- /bench/samples/rawtabs.md: -------------------------------------------------------------------------------- 1 | 2 | this is a test for tab expansion, be careful not to replace them with spaces 3 | 4 | 1 4444 5 | 22 333 6 | 333 22 7 | 4444 1 8 | 9 | 10 | tab-indented line 11 | space-indented line 12 | tab-indented line 13 | 14 | 15 | a lot of spaces in between here 16 | 17 | a lot of tabs in between here 18 | 19 | -------------------------------------------------------------------------------- /bench/stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import statistics 5 | 6 | def pairs(l, n): 7 | return zip(*[l[i::n] for i in range(n)]) 8 | 9 | # data comes in pairs: 10 | # n - time for running the program with no input 11 | # m - time for running it with the benchmark input 12 | # we measure (m - n) 13 | 14 | values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)] 15 | 16 | print("mean = %.4f, median = %.4f, stdev = %.4f" % 17 | (statistics.mean(values), statistics.median(values), 18 | statistics.stdev(values))) 19 | 20 | -------------------------------------------------------------------------------- /benchmarks.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | Here are some benchmarks, run on a 2.3GHz 8-core i9 macbook pro. 4 | The input text is a 1106 KB Markdown file built by concatenating 5 | the Markdown sources of all the localizations of the first edition 6 | of [*Pro Git*](https://github.com/progit/progit/tree/master/en) by 7 | Scott Chacon. 8 | 9 | |Implementation | Time (sec)| 10 | |-------------------|-----------:| 11 | | **commonmark.js** | 0.59 | 12 | | **cmark** | 0.12 | 13 | | **md4c** | 0.04 | 14 | 15 | To run these benchmarks, use `make bench PROG=/path/to/program`. 16 | 17 | `time` is used to measure execution speed. The reported 18 | time is the *difference* between the time to run the program 19 | with the benchmark input and the time to run it with no input. 20 | (This procedure ensures that implementations in dynamic languages are 21 | not penalized by startup time.) A median of ten runs is taken. The 22 | process is reniced to a high priority so that the system doesn't 23 | interrupt runs. 24 | -------------------------------------------------------------------------------- /cmake/modules/FindAsan.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # The MIT License (MIT) 3 | # 4 | # Copyright (c) 2013 Matthew Arsenault 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | # 24 | # This module tests if address sanitizer is supported by the compiler, 25 | # and creates a ASan build type (i.e. set CMAKE_BUILD_TYPE=ASan to use 26 | # it). This sets the following variables: 27 | # 28 | # CMAKE_C_FLAGS_ASAN - Flags to use for C with asan 29 | # CMAKE_CXX_FLAGS_ASAN - Flags to use for C++ with asan 30 | # HAVE_ADDRESS_SANITIZER - True or false if the ASan build type is available 31 | 32 | include(CheckCCompilerFlag) 33 | 34 | # Set -Werror to catch "argument unused during compilation" warnings 35 | set(CMAKE_REQUIRED_FLAGS "-Werror -faddress-sanitizer") # Also needs to be a link flag for test to pass 36 | check_c_compiler_flag("-faddress-sanitizer" HAVE_FLAG_ADDRESS_SANITIZER) 37 | 38 | set(CMAKE_REQUIRED_FLAGS "-Werror -fsanitize=address") # Also needs to be a link flag for test to pass 39 | check_c_compiler_flag("-fsanitize=address" HAVE_FLAG_SANITIZE_ADDRESS) 40 | 41 | unset(CMAKE_REQUIRED_FLAGS) 42 | 43 | if(HAVE_FLAG_SANITIZE_ADDRESS) 44 | # Clang 3.2+ use this version 45 | set(ADDRESS_SANITIZER_FLAG "-fsanitize=address") 46 | elseif(HAVE_FLAG_ADDRESS_SANITIZER) 47 | # Older deprecated flag for ASan 48 | set(ADDRESS_SANITIZER_FLAG "-faddress-sanitizer") 49 | endif() 50 | 51 | if(NOT ADDRESS_SANITIZER_FLAG) 52 | return() 53 | else(NOT ADDRESS_SANITIZER_FLAG) 54 | set(HAVE_ADDRESS_SANITIZER FALSE) 55 | endif() 56 | 57 | set(HAVE_ADDRESS_SANITIZER TRUE) 58 | 59 | set(CMAKE_C_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls" 60 | CACHE STRING "Flags used by the C compiler during ASan builds." 61 | FORCE) 62 | set(CMAKE_CXX_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls" 63 | CACHE STRING "Flags used by the C++ compiler during ASan builds." 64 | FORCE) 65 | set(CMAKE_EXE_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}" 66 | CACHE STRING "Flags used for linking binaries during ASan builds." 67 | FORCE) 68 | set(CMAKE_SHARED_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}" 69 | CACHE STRING "Flags used by the shared libraries linker during ASan builds." 70 | FORCE) 71 | mark_as_advanced(CMAKE_C_FLAGS_ASAN 72 | CMAKE_CXX_FLAGS_ASAN 73 | CMAKE_EXE_LINKER_FLAGS_ASAN 74 | CMAKE_SHARED_LINKER_FLAGS_ASAN) 75 | -------------------------------------------------------------------------------- /fuzz/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(cmark-fuzz cmark-fuzz.c) 2 | cmark_add_compile_options(cmark-fuzz) 3 | target_link_libraries(cmark-fuzz cmark) 4 | -------------------------------------------------------------------------------- /fuzz/afl_test_cases/test.md: -------------------------------------------------------------------------------- 1 | # H1 2 | 3 | H2 4 | -- 5 | 6 | t ☺ 7 | *b* **em** `c` 8 | ≥\&\ 9 | \_e\_ 10 | 11 | 4) I1 12 | 13 | 5) I2 14 | > [l](/u "t") 15 | > 16 | > - [f] 17 | > - ![a](/u "t") 18 | > 19 | >> 20 | >> 21 | 22 | ~~~ l☺ 23 | cb 24 | ~~~ 25 | 26 | c1 27 | c2 28 | 29 | *** 30 | 31 |
32 | x 33 |
34 | 35 | [f]: /u "t" 36 | 37 | -------------------------------------------------------------------------------- /fuzz/cmark-fuzz.c: -------------------------------------------------------------------------------- 1 | /* for fmemopen */ 2 | #define _POSIX_C_SOURCE 200809L 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "cmark.h" 9 | 10 | int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { 11 | struct __attribute__((packed)) { 12 | int options; 13 | int width; 14 | } fuzz_config; 15 | 16 | if (size >= sizeof(fuzz_config)) { 17 | /* The beginning of `data` is treated as fuzzer configuration */ 18 | memcpy(&fuzz_config, data, sizeof(fuzz_config)); 19 | int options = fuzz_config.options; 20 | 21 | /* Mask off valid option bits */ 22 | options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART); 23 | 24 | /* Remainder of input is the markdown */ 25 | const char *markdown = (const char *)(data + sizeof(fuzz_config)); 26 | size_t markdown_size = size - sizeof(fuzz_config); 27 | cmark_node *doc = NULL; 28 | 29 | /* Use upper bits of options to select parsing mode */ 30 | switch (((unsigned) fuzz_config.options >> 30) & 3) { 31 | case 0: 32 | doc = cmark_parse_document(markdown, markdown_size, options); 33 | break; 34 | 35 | case 1: 36 | if (markdown_size > 0) { 37 | FILE *file = fmemopen((void *) markdown, markdown_size, "r"); 38 | doc = cmark_parse_file(file, options); 39 | fclose(file); 40 | } 41 | break; 42 | 43 | case 2: { 44 | size_t block_max = 20; 45 | cmark_parser *parser = cmark_parser_new(options); 46 | 47 | while (markdown_size > 0) { 48 | size_t block_size = markdown_size > block_max ? block_max : markdown_size; 49 | cmark_parser_feed(parser, markdown, block_size); 50 | markdown += block_size; 51 | markdown_size -= block_size; 52 | } 53 | 54 | doc = cmark_parser_finish(parser); 55 | cmark_parser_free(parser); 56 | break; 57 | } 58 | 59 | case 3: 60 | free(cmark_markdown_to_html(markdown, markdown_size, options)); 61 | break; 62 | } 63 | 64 | if (doc != NULL) { 65 | free(cmark_render_commonmark(doc, options, fuzz_config.width)); 66 | free(cmark_render_html(doc, options)); 67 | free(cmark_render_latex(doc, options, fuzz_config.width)); 68 | free(cmark_render_man(doc, options, fuzz_config.width)); 69 | free(cmark_render_xml(doc, options)); 70 | 71 | cmark_node_free(doc); 72 | } 73 | } 74 | return 0; 75 | } 76 | -------------------------------------------------------------------------------- /fuzz/dictionary: -------------------------------------------------------------------------------- 1 | asterisk="*" 2 | attr_generic=" a=\"1\"" 3 | attr_href=" href=\"1\"" 4 | attr_xml_lang=" xml:lang=\"1\"" 5 | attr_xmlns=" xmlns=\"1\"" 6 | backslash="\\" 7 | backtick="`" 8 | colon=":" 9 | dashes="---" 10 | double_quote="\"" 11 | entity_builtin="<" 12 | entity_decimal="" 13 | entity_external="&a;" 14 | entity_hex="" 15 | equals="===" 16 | exclamation="!" 17 | greater_than=">" 18 | hash="#" 19 | hyphen="-" 20 | indent=" " 21 | left_bracket="[" 22 | left_paren="(" 23 | less_than="<" 24 | plus="+" 25 | right_bracket="]" 26 | right_paren=")" 27 | single_quote="'" 28 | string_any="ANY" 29 | string_brackets="[]" 30 | string_cdata="CDATA" 31 | string_dashes="--" 32 | string_empty_dblquotes="\"\"" 33 | string_empty_quotes="''" 34 | string_idrefs="IDREFS" 35 | string_parentheses="()" 36 | string_pcdata="#PCDATA" 37 | tag_cdata="" 39 | tag_doctype="" 44 | tag_open_close="
" 45 | tag_open_exclamation="" 48 | tag_xml_q="" 49 | underscore="_" 50 | -------------------------------------------------------------------------------- /man/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark.1 2 | DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) 3 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark.3 4 | DESTINATION ${CMAKE_INSTALL_MANDIR}/man3) 5 | -------------------------------------------------------------------------------- /man/make_man_page.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Creates a man page from a C file. 4 | 5 | # Comments beginning with `/**` are treated as Groff man, except that 6 | # 'this' is converted to \fIthis\f[], and ''this'' to \fBthis\f[]. 7 | 8 | # Non-blank lines immediately following a man page comment are treated 9 | # as function signatures or examples and parsed into .Ft, .Fo, .Fa, .Fc. The 10 | # immediately preceding man documentation chunk is printed after the example 11 | # as a comment on it. 12 | 13 | # That's about it! 14 | 15 | import sys, re, os, platform 16 | from datetime import date 17 | from ctypes import CDLL, c_char_p, c_long, c_void_p 18 | 19 | sysname = platform.system() 20 | 21 | curdir = os.getcwd() 22 | 23 | if sysname == 'Darwin': 24 | cmark = CDLL(curdir + "/build/src/libcmark.dylib") 25 | else: 26 | cmark = CDLL(curdir + "/build/src/libcmark.so") 27 | 28 | parse_document = cmark.cmark_parse_document 29 | parse_document.restype = c_void_p 30 | parse_document.argtypes = [c_char_p, c_long] 31 | 32 | render_man = cmark.cmark_render_man 33 | render_man.restype = c_char_p 34 | render_man.argtypes = [c_void_p, c_long, c_long] 35 | 36 | cmark_version_string = cmark.cmark_version_string 37 | cmark_version_string.restype = c_char_p 38 | cmark_version_string.argtypes = [] 39 | 40 | def md2man(text): 41 | if sys.version_info >= (3,0): 42 | textbytes = text.encode('utf-8') 43 | textlen = len(textbytes) 44 | return render_man(parse_document(textbytes, textlen), 0, 72).decode('utf-8') 45 | else: 46 | textbytes = text 47 | textlen = len(text) 48 | return render_man(parse_document(textbytes, textlen), 0, 72) 49 | 50 | comment_start_re = re.compile(r'^\/\*\* ?') 51 | comment_delim_re = re.compile(r'^[/ ]\** ?') 52 | comment_end_re = re.compile(r'^ \**\/') 53 | function_re = re.compile(r'^ *(?:CMARK_EXPORT\s+)?(?P(?:const\s+)?\w+(?:\s*[*])?)\s*(?P\w+)\s*\((?P[^)]*)\)') 54 | blank_re = re.compile(r'^\s*$') 55 | macro_re = re.compile(r'CMARK_EXPORT *') 56 | typedef_start_re = re.compile(r'typedef.*{$') 57 | typedef_end_re = re.compile(r'}') 58 | single_quote_re = re.compile(r"(?**', re.sub(single_quote_re, r'*\g<1>*', s)) 63 | 64 | typedef = False 65 | mdlines = [] 66 | chunk = [] 67 | sig = [] 68 | 69 | if len(sys.argv) > 1: 70 | sourcefile = sys.argv[1] 71 | else: 72 | print("Usage: make_man_page.py sourcefile") 73 | exit(1) 74 | 75 | with open(sourcefile, 'r') as cmarkh: 76 | state = 'default' 77 | for line in cmarkh: 78 | # state transition 79 | oldstate = state 80 | if comment_start_re.match(line): 81 | state = 'man' 82 | elif comment_end_re.match(line) and state == 'man': 83 | continue 84 | elif comment_delim_re.match(line) and state == 'man': 85 | state = 'man' 86 | elif not typedef and blank_re.match(line): 87 | state = 'default' 88 | elif typedef and typedef_end_re.match(line): 89 | typedef = False 90 | elif typedef_start_re.match(line): 91 | typedef = True 92 | state = 'signature' 93 | elif state == 'man': 94 | state = 'signature' 95 | 96 | # handle line 97 | if state == 'man': 98 | chunk.append(handle_quotes(re.sub(comment_delim_re, '', line))) 99 | elif state == 'signature': 100 | ln = re.sub(macro_re, '', line) 101 | if typedef or not re.match(blank_re, ln): 102 | sig.append(ln) 103 | elif oldstate == 'signature' and state != 'signature': 104 | if len(mdlines) > 0 and mdlines[-1] != '\n': 105 | mdlines.append('\n') 106 | rawsig = ''.join(sig) 107 | m = function_re.match(rawsig) 108 | mdlines.append('.PP\n') 109 | if m: 110 | mdlines.append('\\fI' + m.group('type') + '\\f[]' + ' ') 111 | mdlines.append('\\fB' + m.group('name') + '\\f[]' + '(') 112 | first = True 113 | for argument in re.split(',', m.group('args')): 114 | if not first: 115 | mdlines.append(', ') 116 | first = False 117 | mdlines.append('\\fI' + argument.strip() + '\\f[]') 118 | mdlines.append(')\n') 119 | else: 120 | mdlines.append('.nf\n\\fC\n.RS 0n\n') 121 | mdlines += sig 122 | mdlines.append('.RE\n\\f[]\n.fi\n') 123 | if len(mdlines) > 0 and mdlines[-1] != '\n': 124 | mdlines.append('\n') 125 | mdlines += md2man(''.join(chunk)) 126 | mdlines.append('\n') 127 | chunk = [] 128 | sig = [] 129 | elif oldstate == 'man' and state != 'signature': 130 | if len(mdlines) > 0 and mdlines[-1] != '\n': 131 | mdlines.append('\n') 132 | mdlines += md2man(''.join(chunk)) # add man chunk 133 | chunk = [] 134 | mdlines.append('\n') 135 | 136 | sys.stdout.write('.TH ' + os.path.basename(sourcefile).replace('.h','') + ' 3 "' + date.today().strftime('%B %d, %Y') + '" "cmark ' + cmark_version_string().decode('utf-8') + '" "Library Functions Manual"\n') 137 | sys.stdout.write(''.join(mdlines)) 138 | -------------------------------------------------------------------------------- /man/man1/cmark.1: -------------------------------------------------------------------------------- 1 | .TH "cmark" "1" "February 11, 2020" "LOCAL" "General Commands Manual" 2 | .SH "NAME" 3 | \fBcmark\fR 4 | \- convert CommonMark formatted text to HTML 5 | .SH "SYNOPSIS" 6 | .HP 6n 7 | \fBcmark\fR 8 | [options] 9 | file* 10 | .SH "DESCRIPTION" 11 | \fBcmark\fR 12 | converts Markdown formatted plain text to either HTML, groff man, 13 | CommonMark XML, LaTeX, or CommonMark, using the conventions 14 | described in the CommonMark spec. It reads input from \fIstdin\fR 15 | or the specified files (concatenating their contents) and writes 16 | output to \fIstdout\fR. 17 | .SH "OPTIONS" 18 | .TP 12n 19 | .B \-\-to, \-t \f[I]FORMAT\f[] 20 | Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]xml\f[], 21 | \f[C]latex\f[], \f[C]commonmark\f[]). 22 | .TP 12n 23 | .B \-\-width \f[I]WIDTH\f[] 24 | Specify a column width to which to wrap the output. For no wrapping, use 25 | the value 0 (the default). This option currently only affects the 26 | commonmark, latex, and man renderers. 27 | .TP 12n 28 | .B \-\-hardbreaks 29 | Render soft breaks (newlines inside paragraphs in the CommonMark source) 30 | as hard line breaks in the target format. If this option is specified, 31 | hard wrapping is disabled for CommonMark output, regardless of the value 32 | given with \-\-width. 33 | .TP 12n 34 | .B \-\-nobreaks 35 | Render soft breaks as spaces. If this option is specified, 36 | hard wrapping is disabled for all output formats, regardless of the value 37 | given with \-\-width. 38 | .TP 12n 39 | .B \-\-sourcepos 40 | Include source position attribute. 41 | .TP 12n 42 | .B \-\-validate-utf8 43 | Validate UTF-8, replacing illegal sequences with U+FFFD. 44 | .TP 12n 45 | .B \-\-smart 46 | Use smart punctuation. Straight double and single quotes will 47 | be rendered as curly quotes, depending on their position. 48 | \f[C]\-\-\f[] will be rendered as an en-dash. 49 | \f[C]\-\-\-\f[] will be rendered as an em-dash. 50 | \f[C]...\f[] will be rendered as ellipses. 51 | .TP 12n 52 | .B \-\-safe 53 | Omit raw HTML and potentially dangerous URLs (default). 54 | Raw HTML is replaced by a placeholder comment 55 | and potentially dangerous URLs are replaced by empty strings. 56 | Potentially dangerous URLs are those that begin with `javascript:`, 57 | `vbscript:`, `file:`, or `data:` (except for `image/png`, 58 | `image/gif`, `image/jpeg`, or `image/webp` mime types). 59 | .TP 12n 60 | .B \-\-unsafe 61 | Render raw HTML or potentially dangerous URLs, overriding 62 | the default (\-\-safe) behavior. 63 | .TP 12n 64 | .B \-\-help 65 | Print usage information. 66 | .TP 12n 67 | .B \-\-version 68 | Print version. 69 | .SH "AUTHORS" 70 | John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. 71 | .SH "SEE ALSO" 72 | .PP 73 | CommonMark spec: \f[C]https://spec.commonmark.org\f[]. 74 | -------------------------------------------------------------------------------- /nmake.bat: -------------------------------------------------------------------------------- 1 | @nmake.exe /nologo /f Makefile.nmake %* 2 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | with (import {}); 2 | mkShell { 3 | buildInputs = [ 4 | clangStdenv 5 | cmake 6 | gdb 7 | python3 8 | perl 9 | re2c 10 | curl 11 | ]; 12 | } 13 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | configure_file(cmark_version.h.in 3 | ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h) 4 | configure_file(libcmark.pc.in 5 | ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc 6 | @ONLY) 7 | 8 | add_library(cmark 9 | blocks.c 10 | buffer.c 11 | cmark.c 12 | cmark_ctype.c 13 | commonmark.c 14 | houdini_href_e.c 15 | houdini_html_e.c 16 | houdini_html_u.c 17 | html.c 18 | inlines.c 19 | iterator.c 20 | latex.c 21 | man.c 22 | node.c 23 | references.c 24 | render.c 25 | scanners.c 26 | scanners.re 27 | utf8.c 28 | xml.c) 29 | cmark_add_compile_options(cmark) 30 | set_target_properties(cmark PROPERTIES 31 | OUTPUT_NAME "cmark" 32 | # Avoid name clash between PROGRAM and LIBRARY pdb files. 33 | PDB_NAME libcmark 34 | POSITION_INDEPENDENT_CODE YES 35 | # Include minor version and patch level in soname for now. 36 | SOVERSION ${PROJECT_VERSION} 37 | VERSION ${PROJECT_VERSION}) 38 | if(NOT BUILD_SHARED_LIBS) 39 | target_compile_definitions(cmark PUBLIC 40 | CMARK_STATIC_DEFINE) 41 | endif() 42 | target_include_directories(cmark INTERFACE 43 | $ 44 | $ 45 | $) 46 | 47 | generate_export_header(cmark 48 | BASE_NAME ${PROJECT_NAME}) 49 | 50 | # FIXME(compnerd) this should be removed, but exists solely to allow a migration 51 | # path for OSS Fuzz. 52 | add_custom_target(cmark_static DEPENDS cmark) 53 | 54 | add_executable(cmark_exe 55 | main.c) 56 | cmark_add_compile_options(cmark_exe) 57 | set_target_properties(cmark_exe PROPERTIES 58 | OUTPUT_NAME "cmark" 59 | INSTALL_RPATH "${Base_rpath}") 60 | target_link_libraries(cmark_exe PRIVATE 61 | cmark) 62 | 63 | install(TARGETS cmark_exe cmark 64 | EXPORT cmark-targets 65 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 66 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 67 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 68 | ) 69 | 70 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc 71 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) 72 | 73 | install(FILES 74 | cmark.h 75 | ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h 76 | ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h 77 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 78 | ) 79 | 80 | # generate cmark-config.cmake and cmark-config-version.cmake files 81 | configure_package_config_file( 82 | "cmarkConfig.cmake.in" 83 | "${CMAKE_CURRENT_BINARY_DIR}/generated/cmark-config.cmake" 84 | INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cmark") 85 | write_basic_package_version_file( 86 | "${CMAKE_CURRENT_BINARY_DIR}/generated/cmark-config-version.cmake" 87 | VERSION ${PROJECT_VERSION} 88 | COMPATIBILITY SameMajorVersion) 89 | # install config and version file 90 | install( 91 | FILES "${CMAKE_CURRENT_BINARY_DIR}/generated/cmark-config.cmake" 92 | "${CMAKE_CURRENT_BINARY_DIR}/generated/cmark-config-version.cmake" 93 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cmark" 94 | ) 95 | # install targets file 96 | install( 97 | EXPORT "cmark-targets" 98 | NAMESPACE "cmark::" 99 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cmark" 100 | ) 101 | -------------------------------------------------------------------------------- /src/buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cmark_ctype.h" 11 | #include "buffer.h" 12 | 13 | /* Used as default value for cmark_strbuf->ptr so that people can always 14 | * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. 15 | */ 16 | unsigned char cmark_strbuf__initbuf[1]; 17 | 18 | #ifndef MIN 19 | #define MIN(x, y) ((x < y) ? x : y) 20 | #endif 21 | 22 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, 23 | bufsize_t initial_size) { 24 | buf->mem = mem; 25 | buf->asize = 0; 26 | buf->size = 0; 27 | buf->ptr = cmark_strbuf__initbuf; 28 | 29 | if (initial_size > 0) 30 | cmark_strbuf_grow(buf, initial_size); 31 | } 32 | 33 | static inline void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { 34 | cmark_strbuf_grow(buf, buf->size + add); 35 | } 36 | 37 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { 38 | assert(target_size > 0); 39 | 40 | if (target_size < buf->asize) 41 | return; 42 | 43 | if (target_size > (bufsize_t)(INT32_MAX / 2)) { 44 | fprintf(stderr, 45 | "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n", 46 | (INT32_MAX / 2)); 47 | abort(); 48 | } 49 | 50 | /* Oversize the buffer by 50% to guarantee amortized linear time 51 | * complexity on append operations. */ 52 | bufsize_t new_size = target_size + target_size / 2; 53 | new_size += 1; 54 | new_size = (new_size + 7) & ~7; 55 | 56 | buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, 57 | new_size); 58 | buf->asize = new_size; 59 | } 60 | 61 | void cmark_strbuf_free(cmark_strbuf *buf) { 62 | if (!buf) 63 | return; 64 | 65 | if (buf->ptr != cmark_strbuf__initbuf) 66 | buf->mem->free(buf->ptr); 67 | 68 | cmark_strbuf_init(buf->mem, buf, 0); 69 | } 70 | 71 | void cmark_strbuf_clear(cmark_strbuf *buf) { 72 | buf->size = 0; 73 | 74 | if (buf->asize > 0) 75 | buf->ptr[0] = '\0'; 76 | } 77 | 78 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, 79 | bufsize_t len) { 80 | if (len <= 0 || data == NULL) { 81 | cmark_strbuf_clear(buf); 82 | } else { 83 | if (data != buf->ptr) { 84 | if (len >= buf->asize) 85 | cmark_strbuf_grow(buf, len); 86 | memmove(buf->ptr, data, len); 87 | } 88 | buf->size = len; 89 | buf->ptr[buf->size] = '\0'; 90 | } 91 | } 92 | 93 | void cmark_strbuf_putc(cmark_strbuf *buf, int c) { 94 | S_strbuf_grow_by(buf, 1); 95 | buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); 96 | buf->ptr[buf->size] = '\0'; 97 | } 98 | 99 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, 100 | bufsize_t len) { 101 | if (len <= 0) 102 | return; 103 | 104 | S_strbuf_grow_by(buf, len); 105 | memmove(buf->ptr + buf->size, data, len); 106 | buf->size += len; 107 | buf->ptr[buf->size] = '\0'; 108 | } 109 | 110 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { 111 | cmark_strbuf_put(buf, (const unsigned char *)string, (bufsize_t)strlen(string)); 112 | } 113 | 114 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { 115 | unsigned char *data = buf->ptr; 116 | 117 | if (buf->asize == 0) { 118 | /* return an empty string */ 119 | return (unsigned char *)buf->mem->calloc(1, 1); 120 | } 121 | 122 | cmark_strbuf_init(buf->mem, buf, 0); 123 | return data; 124 | } 125 | 126 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { 127 | if (len < 0) 128 | len = 0; 129 | 130 | if (len < buf->size) { 131 | buf->size = len; 132 | buf->ptr[buf->size] = '\0'; 133 | } 134 | } 135 | 136 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { 137 | if (n > 0) { 138 | if (n > buf->size) 139 | n = buf->size; 140 | buf->size = buf->size - n; 141 | if (buf->size) 142 | memmove(buf->ptr, buf->ptr + n, buf->size); 143 | 144 | buf->ptr[buf->size] = '\0'; 145 | } 146 | } 147 | 148 | void cmark_strbuf_rtrim(cmark_strbuf *buf) { 149 | if (!buf->size) 150 | return; 151 | 152 | while (buf->size > 0) { 153 | if (!cmark_isspace(buf->ptr[buf->size - 1])) 154 | break; 155 | 156 | buf->size--; 157 | } 158 | 159 | buf->ptr[buf->size] = '\0'; 160 | } 161 | 162 | void cmark_strbuf_trim(cmark_strbuf *buf) { 163 | bufsize_t i = 0; 164 | 165 | if (!buf->size) 166 | return; 167 | 168 | while (i < buf->size && cmark_isspace(buf->ptr[i])) 169 | i++; 170 | 171 | cmark_strbuf_drop(buf, i); 172 | 173 | cmark_strbuf_rtrim(buf); 174 | } 175 | 176 | // Destructively modify string, collapsing consecutive 177 | // space and newline characters into a single space. 178 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { 179 | bool last_char_was_space = false; 180 | bufsize_t r, w; 181 | 182 | for (r = 0, w = 0; r < s->size; ++r) { 183 | if (cmark_isspace(s->ptr[r])) { 184 | if (!last_char_was_space) { 185 | s->ptr[w++] = ' '; 186 | last_char_was_space = true; 187 | } 188 | } else { 189 | s->ptr[w++] = s->ptr[r]; 190 | last_char_was_space = false; 191 | } 192 | } 193 | 194 | cmark_strbuf_truncate(s, w); 195 | } 196 | 197 | // Destructively unescape a string: remove backslashes before punctuation chars. 198 | void cmark_strbuf_unescape(cmark_strbuf *buf) { 199 | bufsize_t r, w; 200 | 201 | for (r = 0, w = 0; r < buf->size; ++r) { 202 | if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) 203 | r++; 204 | 205 | buf->ptr[w++] = buf->ptr[r]; 206 | } 207 | 208 | cmark_strbuf_truncate(buf, w); 209 | } 210 | -------------------------------------------------------------------------------- /src/buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_BUFFER_H 2 | #define CMARK_BUFFER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cmark.h" 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | typedef int32_t bufsize_t; 17 | 18 | typedef struct { 19 | cmark_mem *mem; 20 | unsigned char *ptr; 21 | bufsize_t asize, size; 22 | } cmark_strbuf; 23 | 24 | extern unsigned char cmark_strbuf__initbuf[]; 25 | 26 | #define CMARK_BUF_INIT(mem) \ 27 | { mem, cmark_strbuf__initbuf, 0, 0 } 28 | 29 | /** 30 | * Initialize a cmark_strbuf structure. 31 | * 32 | * For the cases where CMARK_BUF_INIT cannot be used to do static 33 | * initialization. 34 | */ 35 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, 36 | bufsize_t initial_size); 37 | 38 | /** 39 | * Grow the buffer to hold at least `target_size` bytes. 40 | */ 41 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); 42 | 43 | void cmark_strbuf_free(cmark_strbuf *buf); 44 | 45 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); 46 | 47 | /* 48 | static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { 49 | return (char *)buf->ptr; 50 | } 51 | */ 52 | 53 | #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) 54 | 55 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, 56 | bufsize_t len); 57 | void cmark_strbuf_putc(cmark_strbuf *buf, int c); 58 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, 59 | bufsize_t len); 60 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); 61 | void cmark_strbuf_clear(cmark_strbuf *buf); 62 | 63 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); 64 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); 65 | void cmark_strbuf_rtrim(cmark_strbuf *buf); 66 | void cmark_strbuf_trim(cmark_strbuf *buf); 67 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); 68 | void cmark_strbuf_unescape(cmark_strbuf *s); 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /src/chunk.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_CHUNK_H 2 | #define CMARK_CHUNK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "cmark.h" 8 | #include "buffer.h" 9 | #include "cmark_ctype.h" 10 | 11 | #define CMARK_CHUNK_EMPTY \ 12 | { NULL, 0 } 13 | 14 | typedef struct { 15 | const unsigned char *data; 16 | bufsize_t len; 17 | } cmark_chunk; 18 | 19 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 20 | static inline void cmark_chunk_free(cmark_chunk *c) { 21 | c->data = NULL; 22 | c->len = 0; 23 | } 24 | 25 | static inline void cmark_chunk_ltrim(cmark_chunk *c) { 26 | while (c->len && cmark_isspace(c->data[0])) { 27 | c->data++; 28 | c->len--; 29 | } 30 | } 31 | 32 | static inline void cmark_chunk_rtrim(cmark_chunk *c) { 33 | while (c->len > 0) { 34 | if (!cmark_isspace(c->data[c->len - 1])) 35 | break; 36 | 37 | c->len--; 38 | } 39 | } 40 | 41 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 42 | static inline void cmark_chunk_trim(cmark_chunk *c) { 43 | cmark_chunk_ltrim(c); 44 | cmark_chunk_rtrim(c); 45 | } 46 | 47 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 48 | static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, 49 | bufsize_t offset) { 50 | const unsigned char *p = 51 | (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); 52 | return p ? (bufsize_t)(p - ch->data) : ch->len; 53 | } 54 | 55 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 56 | static inline cmark_chunk cmark_chunk_literal(const char *data) { 57 | bufsize_t len = data ? (bufsize_t)strlen(data) : 0; 58 | cmark_chunk c = {(unsigned char *)data, len}; 59 | return c; 60 | } 61 | 62 | // NOLINTNEXTLINE(clang-diagnostic-unused-function) 63 | static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, 64 | bufsize_t len) { 65 | cmark_chunk c = {ch->data + pos, len}; 66 | return c; 67 | } 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /src/cmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "node.h" 5 | #include "houdini.h" 6 | #include "cmark.h" 7 | #include "buffer.h" 8 | 9 | int cmark_version(void) { return CMARK_VERSION; } 10 | 11 | const char *cmark_version_string(void) { return CMARK_VERSION_STRING; } 12 | 13 | static void *xcalloc(size_t nmem, size_t size) { 14 | void *ptr = calloc(nmem, size); 15 | if (!ptr) { 16 | fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n"); 17 | abort(); 18 | } 19 | return ptr; 20 | } 21 | 22 | static void *xrealloc(void *ptr, size_t size) { 23 | void *new_ptr = realloc(ptr, size); 24 | if (!new_ptr) { 25 | fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n"); 26 | abort(); 27 | } 28 | return new_ptr; 29 | } 30 | 31 | cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; 32 | 33 | cmark_mem *cmark_get_default_mem_allocator(void) { 34 | return &DEFAULT_MEM_ALLOCATOR; 35 | } 36 | 37 | 38 | char *cmark_markdown_to_html(const char *text, size_t len, int options) { 39 | cmark_node *doc; 40 | char *result; 41 | 42 | doc = cmark_parse_document(text, len, options); 43 | 44 | result = cmark_render_html(doc, options); 45 | cmark_node_free(doc); 46 | 47 | return result; 48 | } 49 | -------------------------------------------------------------------------------- /src/cmarkConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include("${CMAKE_CURRENT_LIST_DIR}/cmark-targets.cmake") 4 | check_required_components("cmark") 5 | -------------------------------------------------------------------------------- /src/cmark_ctype.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "cmark_ctype.h" 4 | 5 | /** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other 6 | */ 7 | static const uint8_t cmark_ctype_class[256] = { 8 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 9 | /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 10 | /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11 | /* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12 | /* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 13 | /* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 14 | /* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 15 | /* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 16 | /* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0, 17 | /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18 | /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19 | /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20 | /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 | /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 | /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 | /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 | /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 25 | 26 | /** 27 | * Returns 1 if c is a "whitespace" character as defined by the spec. 28 | */ 29 | int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; } 30 | 31 | /** 32 | * Returns 1 if c is an ascii punctuation character. 33 | */ 34 | int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; } 35 | 36 | int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; } 37 | 38 | int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; } 39 | -------------------------------------------------------------------------------- /src/cmark_ctype.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_CMARK_CTYPE_H 2 | #define CMARK_CMARK_CTYPE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | /** Locale-independent versions of functions from ctype.h. 9 | * We want cmark to behave the same no matter what the system locale. 10 | */ 11 | 12 | int cmark_isspace(char c); 13 | 14 | int cmark_ispunct(char c); 15 | 16 | int cmark_isdigit(char c); 17 | 18 | int cmark_isalpha(char c); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/cmark_version.h.in: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_VERSION_H 2 | #define CMARK_VERSION_H 3 | 4 | #define CMARK_VERSION ((@PROJECT_VERSION_MAJOR@ << 16) | (@PROJECT_VERSION_MINOR@ << 8) | @PROJECT_VERSION_PATCH@) 5 | #define CMARK_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@" 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/houdini.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_HOUDINI_H 2 | #define CMARK_HOUDINI_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | 10 | #include "buffer.h" 11 | 12 | #ifdef HOUDINI_USE_LOCALE 13 | #define _isxdigit(c) isxdigit(c) 14 | #define _isdigit(c) isdigit(c) 15 | #else 16 | /* 17 | * Helper _isdigit methods -- do not trust the current locale 18 | * */ 19 | #define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) 20 | #define _isdigit(c) ((c) >= '0' && (c) <= '9') 21 | #endif 22 | 23 | #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10) 24 | #define HOUDINI_UNESCAPED_SIZE(x) (x) 25 | 26 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, 27 | bufsize_t size); 28 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, 29 | bufsize_t size, int secure); 30 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, 31 | bufsize_t size); 32 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, 33 | bufsize_t size); 34 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, 35 | bufsize_t size); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/houdini_href_e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "houdini.h" 6 | 7 | #if !defined(__has_builtin) 8 | # define __has_builtin(b) 0 9 | #endif 10 | 11 | #if !__has_builtin(__builtin_expect) 12 | # define __builtin_expect(e, v) (e) 13 | #endif 14 | 15 | #define likely(e) __builtin_expect((e), 1) 16 | #define unlikely(e) __builtin_expect((e), 0) 17 | 18 | /* 19 | * The following characters will not be escaped: 20 | * 21 | * -_.+!*'(),%#@?=;:/,+&$~ alphanum 22 | * 23 | * Note that this character set is the addition of: 24 | * 25 | * - The characters which are safe to be in an URL 26 | * - The characters which are *not* safe to be in 27 | * an URL because they are RESERVED characters. 28 | * 29 | * We assume (lazily) that any RESERVED char that 30 | * appears inside an URL is actually meant to 31 | * have its native function (i.e. as an URL 32 | * component/separator) and hence needs no escaping. 33 | * 34 | * There are two exceptions: the characters & (amp) 35 | * and ' (single quote) do not appear in the table. 36 | * They are meant to appear in the URL as components, 37 | * yet they require special HTML-entity escaping 38 | * to generate valid HTML markup. 39 | * 40 | * All other characters will be escaped to %XX. 41 | * 42 | */ 43 | static const char HREF_SAFE[] = { 44 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 46 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 48 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 49 | 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55 | }; 56 | 57 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { 58 | static const uint8_t hex_chars[] = "0123456789ABCDEF"; 59 | bufsize_t i = 0, org; 60 | uint8_t hex_str[3]; 61 | 62 | hex_str[0] = '%'; 63 | 64 | while (i < size) { 65 | org = i; 66 | while (i < size && HREF_SAFE[src[i]] != 0) 67 | i++; 68 | 69 | if (likely(i > org)) 70 | cmark_strbuf_put(ob, src + org, i - org); 71 | 72 | /* escaping */ 73 | if (i >= size) 74 | break; 75 | 76 | switch (src[i]) { 77 | /* amp appears all the time in URLs, but needs 78 | * HTML-entity escaping to be inside an href */ 79 | case '&': 80 | cmark_strbuf_puts(ob, "&"); 81 | break; 82 | 83 | /* the single quote is a valid URL character 84 | * according to the standard; it needs HTML 85 | * entity escaping too */ 86 | case '\'': 87 | cmark_strbuf_puts(ob, "'"); 88 | break; 89 | 90 | /* the space can be escaped to %20 or a plus 91 | * sign. we're going with the generic escape 92 | * for now. the plus thing is more commonly seen 93 | * when building GET strings */ 94 | #if 0 95 | case ' ': 96 | cmark_strbuf_putc(ob, '+'); 97 | break; 98 | #endif 99 | 100 | /* every other character goes with a %XX escaping */ 101 | default: 102 | hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; 103 | hex_str[2] = hex_chars[src[i] & 0xF]; 104 | cmark_strbuf_put(ob, hex_str, 3); 105 | } 106 | 107 | i++; 108 | } 109 | 110 | return 1; 111 | } 112 | -------------------------------------------------------------------------------- /src/houdini_html_e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "houdini.h" 6 | 7 | #if !defined(__has_builtin) 8 | # define __has_builtin(b) 0 9 | #endif 10 | 11 | #if !__has_builtin(__builtin_expect) 12 | # define __builtin_expect(e, v) (e) 13 | #endif 14 | 15 | #define likely(e) __builtin_expect((e), 1) 16 | #define unlikely(e) __builtin_expect((e), 0) 17 | 18 | /** 19 | * According to the OWASP rules: 20 | * 21 | * & --> & 22 | * < --> < 23 | * > --> > 24 | * " --> " 25 | * ' --> ' ' is not recommended 26 | * / --> / forward slash is included as it helps end an HTML entity 27 | * 28 | */ 29 | static const char HTML_ESCAPE_TABLE[] = { 30 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 32 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 | }; 42 | 43 | static const char *HTML_ESCAPES[] = {"", """, "&", "'", 44 | "/", "<", ">"}; 45 | 46 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, 47 | int secure) { 48 | bufsize_t i = 0, org, esc = 0; 49 | 50 | while (i < size) { 51 | org = i; 52 | while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) 53 | i++; 54 | 55 | if (i > org) 56 | cmark_strbuf_put(ob, src + org, i - org); 57 | 58 | /* escaping */ 59 | if (unlikely(i >= size)) 60 | break; 61 | 62 | /* The forward slash is only escaped in secure mode */ 63 | if ((src[i] == '/' || src[i] == '\'') && !secure) { 64 | cmark_strbuf_putc(ob, src[i]); 65 | } else { 66 | cmark_strbuf_puts(ob, HTML_ESCAPES[esc]); 67 | } 68 | 69 | i++; 70 | } 71 | 72 | return 1; 73 | } 74 | -------------------------------------------------------------------------------- /src/houdini_html_u.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "buffer.h" 6 | #include "houdini.h" 7 | #include "utf8.h" 8 | #include "entities.inc" 9 | 10 | #if !defined(__has_builtin) 11 | # define __has_builtin(b) 0 12 | #endif 13 | 14 | #if !__has_builtin(__builtin_expect) 15 | # define __builtin_expect(e, v) (e) 16 | #endif 17 | 18 | #define likely(e) __builtin_expect((e), 1) 19 | #define unlikely(e) __builtin_expect((e), 0) 20 | 21 | /* Binary tree lookup code for entities added by JGM */ 22 | 23 | static const unsigned char *S_lookup(int i, int low, int hi, 24 | const unsigned char *s, int len, 25 | bufsize_t *size_out) { 26 | int j; 27 | uint32_t value = cmark_entities[i]; 28 | const unsigned char *ent_name = cmark_entity_text + ENT_TEXT_IDX(value); 29 | int ent_len = ENT_NAME_SIZE(value); 30 | int min_len = len < ent_len ? len : ent_len; 31 | int cmp = 32 | strncmp((const char *)s, (const char *)ent_name, min_len); 33 | if (cmp == 0) 34 | cmp = len - ent_len; 35 | if (cmp == 0) { 36 | *size_out = ENT_REPL_SIZE(value); 37 | return ent_name + ent_len; 38 | } else if (cmp <= 0 && i > low) { 39 | j = i - ((i - low) / 2); 40 | if (j == i) 41 | j -= 1; 42 | return S_lookup(j, low, i - 1, s, len, size_out); 43 | } else if (cmp > 0 && i < hi) { 44 | j = i + ((hi - i) / 2); 45 | if (j == i) 46 | j += 1; 47 | return S_lookup(j, i + 1, hi, s, len, size_out); 48 | } else { 49 | return NULL; 50 | } 51 | } 52 | 53 | static const unsigned char *S_lookup_entity(const unsigned char *s, int len, 54 | bufsize_t *size_out) { 55 | return S_lookup(ENT_TABLE_SIZE / 2, 0, ENT_TABLE_SIZE - 1, s, len, size_out); 56 | } 57 | 58 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, 59 | bufsize_t size) { 60 | bufsize_t i = 0; 61 | 62 | if (size >= 3 && src[0] == '#') { 63 | int codepoint = 0; 64 | int num_digits = 0; 65 | int max_digits = 7; 66 | 67 | if (_isdigit(src[1])) { 68 | for (i = 1; i < size && _isdigit(src[i]); ++i) { 69 | codepoint = (codepoint * 10) + (src[i] - '0'); 70 | 71 | if (codepoint >= 0x110000) { 72 | // Keep counting digits but 73 | // avoid integer overflow. 74 | codepoint = 0x110000; 75 | } 76 | } 77 | 78 | num_digits = i - 1; 79 | max_digits = 7; 80 | } 81 | 82 | else if (src[1] == 'x' || src[1] == 'X') { 83 | for (i = 2; i < size && _isxdigit(src[i]); ++i) { 84 | codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); 85 | 86 | if (codepoint >= 0x110000) { 87 | // Keep counting digits but 88 | // avoid integer overflow. 89 | codepoint = 0x110000; 90 | } 91 | } 92 | 93 | num_digits = i - 2; 94 | max_digits = 6; 95 | } 96 | 97 | if (num_digits >= 1 && num_digits <= max_digits && 98 | i < size && src[i] == ';') { 99 | if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) || 100 | codepoint >= 0x110000) { 101 | codepoint = 0xFFFD; 102 | } 103 | cmark_utf8proc_encode_char(codepoint, ob); 104 | return i + 1; 105 | } 106 | } 107 | 108 | else { 109 | if (size > ENT_MAX_LENGTH) 110 | size = ENT_MAX_LENGTH; 111 | 112 | for (i = ENT_MIN_LENGTH; i < size; ++i) { 113 | if (src[i] == ' ') 114 | break; 115 | 116 | if (src[i] == ';') { 117 | bufsize_t size; 118 | const unsigned char *entity = S_lookup_entity(src, i, &size); 119 | 120 | if (entity != NULL) { 121 | cmark_strbuf_put(ob, entity, size); 122 | return i + 1; 123 | } 124 | 125 | break; 126 | } 127 | } 128 | } 129 | 130 | return 0; 131 | } 132 | 133 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, 134 | bufsize_t size) { 135 | bufsize_t i = 0, org, ent; 136 | 137 | while (i < size) { 138 | org = i; 139 | while (i < size && src[i] != '&') 140 | i++; 141 | 142 | if (likely(i > org)) { 143 | if (unlikely(org == 0)) { 144 | if (i >= size) 145 | return 0; 146 | 147 | cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); 148 | } 149 | 150 | cmark_strbuf_put(ob, src + org, i - org); 151 | } 152 | 153 | /* escaping */ 154 | if (i >= size) 155 | break; 156 | 157 | i++; 158 | 159 | ent = houdini_unescape_ent(ob, src + i, size - i); 160 | i += ent; 161 | 162 | /* not really an entity */ 163 | if (ent == 0) 164 | cmark_strbuf_putc(ob, '&'); 165 | } 166 | 167 | return 1; 168 | } 169 | 170 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, 171 | bufsize_t size) { 172 | if (!houdini_unescape_html(ob, src, size)) 173 | cmark_strbuf_put(ob, src, size); 174 | } 175 | -------------------------------------------------------------------------------- /src/inlines.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_INLINES_H 2 | #define CMARK_INLINES_H 3 | 4 | #include "chunk.h" 5 | #include "references.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url); 12 | unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title); 13 | 14 | void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, 15 | cmark_reference_map *refmap, int options); 16 | 17 | bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, 18 | cmark_reference_map *refmap); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/iterator.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "node.h" 6 | #include "cmark.h" 7 | #include "iterator.h" 8 | 9 | static const int S_leaf_mask = 10 | (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) | 11 | (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) | 12 | (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) | 13 | (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE); 14 | 15 | cmark_iter *cmark_iter_new(cmark_node *root) { 16 | if (root == NULL) { 17 | return NULL; 18 | } 19 | cmark_mem *mem = root->mem; 20 | cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter)); 21 | iter->mem = mem; 22 | iter->root = root; 23 | iter->cur.ev_type = CMARK_EVENT_NONE; 24 | iter->cur.node = NULL; 25 | iter->next.ev_type = CMARK_EVENT_ENTER; 26 | iter->next.node = root; 27 | return iter; 28 | } 29 | 30 | void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); } 31 | 32 | static bool S_is_leaf(cmark_node *node) { 33 | return ((1 << node->type) & S_leaf_mask) != 0; 34 | } 35 | 36 | cmark_event_type cmark_iter_next(cmark_iter *iter) { 37 | cmark_event_type ev_type = iter->next.ev_type; 38 | cmark_node *node = iter->next.node; 39 | 40 | iter->cur.ev_type = ev_type; 41 | iter->cur.node = node; 42 | 43 | if (ev_type == CMARK_EVENT_DONE) { 44 | return ev_type; 45 | } 46 | 47 | /* roll forward to next item, setting both fields */ 48 | if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) { 49 | if (node->first_child == NULL) { 50 | /* stay on this node but exit */ 51 | iter->next.ev_type = CMARK_EVENT_EXIT; 52 | } else { 53 | iter->next.ev_type = CMARK_EVENT_ENTER; 54 | iter->next.node = node->first_child; 55 | } 56 | } else if (node == iter->root) { 57 | /* don't move past root */ 58 | iter->next.ev_type = CMARK_EVENT_DONE; 59 | iter->next.node = NULL; 60 | } else if (node->next) { 61 | iter->next.ev_type = CMARK_EVENT_ENTER; 62 | iter->next.node = node->next; 63 | } else if (node->parent) { 64 | iter->next.ev_type = CMARK_EVENT_EXIT; 65 | iter->next.node = node->parent; 66 | } else { 67 | assert(false); 68 | iter->next.ev_type = CMARK_EVENT_DONE; 69 | iter->next.node = NULL; 70 | } 71 | 72 | return ev_type; 73 | } 74 | 75 | void cmark_iter_reset(cmark_iter *iter, cmark_node *current, 76 | cmark_event_type event_type) { 77 | iter->next.ev_type = event_type; 78 | iter->next.node = current; 79 | cmark_iter_next(iter); 80 | } 81 | 82 | cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; } 83 | 84 | cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) { 85 | return iter->cur.ev_type; 86 | } 87 | 88 | cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; } 89 | 90 | void cmark_consolidate_text_nodes(cmark_node *root) { 91 | if (root == NULL) { 92 | return; 93 | } 94 | cmark_iter *iter = cmark_iter_new(root); 95 | cmark_strbuf buf = CMARK_BUF_INIT(iter->mem); 96 | cmark_event_type ev_type; 97 | cmark_node *cur, *tmp, *next; 98 | 99 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 100 | cur = cmark_iter_get_node(iter); 101 | if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT && 102 | cur->next && cur->next->type == CMARK_NODE_TEXT) { 103 | cmark_strbuf_clear(&buf); 104 | cmark_strbuf_put(&buf, cur->data, cur->len); 105 | tmp = cur->next; 106 | while (tmp && tmp->type == CMARK_NODE_TEXT) { 107 | cmark_iter_next(iter); // advance pointer 108 | cmark_strbuf_put(&buf, tmp->data, tmp->len); 109 | cur->end_column = tmp->end_column; 110 | next = tmp->next; 111 | cmark_node_free(tmp); 112 | tmp = next; 113 | } 114 | iter->mem->free(cur->data); 115 | cur->len = buf.size; 116 | cur->data = cmark_strbuf_detach(&buf); 117 | } 118 | } 119 | 120 | cmark_strbuf_free(&buf); 121 | cmark_iter_free(iter); 122 | } 123 | -------------------------------------------------------------------------------- /src/iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_ITERATOR_H 2 | #define CMARK_ITERATOR_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "cmark.h" 9 | 10 | typedef struct { 11 | cmark_event_type ev_type; 12 | cmark_node *node; 13 | } cmark_iter_state; 14 | 15 | struct cmark_iter { 16 | cmark_mem *mem; 17 | cmark_node *root; 18 | cmark_iter_state cur; 19 | cmark_iter_state next; 20 | }; 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/libcmark.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=@CMAKE_INSTALL_PREFIX@ 3 | libdir=@CMAKE_INSTALL_FULL_LIBDIR@ 4 | includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ 5 | 6 | Name: libcmark 7 | Description: CommonMark parsing, rendering, and manipulation 8 | Version: @PROJECT_VERSION@ 9 | Libs: -L${libdir} -lcmark 10 | Cflags: -I${includedir} 11 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "cmark.h" 7 | #include "node.h" 8 | 9 | #if defined(__OpenBSD__) 10 | # include 11 | # if OpenBSD >= 201605 12 | # define USE_PLEDGE 13 | # include 14 | # endif 15 | #endif 16 | 17 | #if defined(_WIN32) && !defined(__CYGWIN__) 18 | #include 19 | #include 20 | #endif 21 | 22 | typedef enum { 23 | FORMAT_NONE, 24 | FORMAT_HTML, 25 | FORMAT_XML, 26 | FORMAT_MAN, 27 | FORMAT_COMMONMARK, 28 | FORMAT_LATEX 29 | } writer_format; 30 | 31 | void print_usage(void) { 32 | printf("Usage: cmark [FILE*]\n"); 33 | printf("Options:\n"); 34 | printf(" --to, -t FORMAT Specify output format (html, xml, man, " 35 | "commonmark, latex)\n"); 36 | printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); 37 | printf(" --sourcepos Include source position attribute\n"); 38 | printf(" --hardbreaks Treat newlines as hard line breaks\n"); 39 | printf(" --nobreaks Render soft line breaks as spaces\n"); 40 | printf(" --safe Omit raw HTML and dangerous URLs\n"); 41 | printf(" --unsafe Render raw HTML and dangerous URLs\n"); 42 | printf(" --smart Use smart punctuation\n"); 43 | printf(" --validate-utf8 Replace invalid UTF-8 sequences with U+FFFD\n"); 44 | printf(" --help, -h Print usage information\n"); 45 | printf(" --version Print version\n"); 46 | } 47 | 48 | static void print_document(cmark_node *document, writer_format writer, 49 | int options, int width) { 50 | char *result; 51 | 52 | switch (writer) { 53 | case FORMAT_HTML: 54 | result = cmark_render_html(document, options); 55 | break; 56 | case FORMAT_XML: 57 | result = cmark_render_xml(document, options); 58 | break; 59 | case FORMAT_MAN: 60 | result = cmark_render_man(document, options, width); 61 | break; 62 | case FORMAT_COMMONMARK: 63 | result = cmark_render_commonmark(document, options, width); 64 | break; 65 | case FORMAT_LATEX: 66 | result = cmark_render_latex(document, options, width); 67 | break; 68 | default: 69 | fprintf(stderr, "Unknown format %d\n", writer); 70 | exit(1); 71 | } 72 | fwrite(result, strlen(result), 1, stdout); 73 | document->mem->free(result); 74 | } 75 | 76 | int main(int argc, char *argv[]) { 77 | int i, numfps = 0; 78 | int *files; 79 | char buffer[4096]; 80 | cmark_parser *parser; 81 | size_t bytes; 82 | cmark_node *document; 83 | int width = 0; 84 | char *unparsed; 85 | writer_format writer = FORMAT_HTML; 86 | int options = CMARK_OPT_DEFAULT; 87 | 88 | #ifdef USE_PLEDGE 89 | if (pledge("stdio rpath", NULL) != 0) { 90 | perror("pledge"); 91 | return 1; 92 | } 93 | #endif 94 | 95 | #if defined(_WIN32) && !defined(__CYGWIN__) 96 | _setmode(_fileno(stdin), _O_BINARY); 97 | _setmode(_fileno(stdout), _O_BINARY); 98 | #endif 99 | 100 | files = (int *)calloc(argc, sizeof(*files)); 101 | 102 | for (i = 1; i < argc; i++) { 103 | if (strcmp(argv[i], "--version") == 0) { 104 | printf("cmark %s", CMARK_VERSION_STRING); 105 | printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n"); 106 | exit(0); 107 | } else if (strcmp(argv[i], "--sourcepos") == 0) { 108 | options |= CMARK_OPT_SOURCEPOS; 109 | } else if (strcmp(argv[i], "--hardbreaks") == 0) { 110 | options |= CMARK_OPT_HARDBREAKS; 111 | } else if (strcmp(argv[i], "--nobreaks") == 0) { 112 | options |= CMARK_OPT_NOBREAKS; 113 | } else if (strcmp(argv[i], "--smart") == 0) { 114 | options |= CMARK_OPT_SMART; 115 | } else if (strcmp(argv[i], "--safe") == 0) { 116 | options |= CMARK_OPT_SAFE; 117 | } else if (strcmp(argv[i], "--unsafe") == 0) { 118 | options |= CMARK_OPT_UNSAFE; 119 | } else if (strcmp(argv[i], "--validate-utf8") == 0) { 120 | options |= CMARK_OPT_VALIDATE_UTF8; 121 | } else if ((strcmp(argv[i], "--help") == 0) || 122 | (strcmp(argv[i], "-h") == 0)) { 123 | print_usage(); 124 | exit(0); 125 | } else if (strcmp(argv[i], "--width") == 0) { 126 | i += 1; 127 | if (i < argc) { 128 | width = (int)strtol(argv[i], &unparsed, 10); 129 | if (unparsed && strlen(unparsed) > 0) { 130 | fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i], 131 | unparsed); 132 | exit(1); 133 | } 134 | } else { 135 | fprintf(stderr, "--width requires an argument\n"); 136 | exit(1); 137 | } 138 | } else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) { 139 | i += 1; 140 | if (i < argc) { 141 | if (strcmp(argv[i], "man") == 0) { 142 | writer = FORMAT_MAN; 143 | } else if (strcmp(argv[i], "html") == 0) { 144 | writer = FORMAT_HTML; 145 | } else if (strcmp(argv[i], "xml") == 0) { 146 | writer = FORMAT_XML; 147 | } else if (strcmp(argv[i], "commonmark") == 0) { 148 | writer = FORMAT_COMMONMARK; 149 | } else if (strcmp(argv[i], "latex") == 0) { 150 | writer = FORMAT_LATEX; 151 | } else { 152 | fprintf(stderr, "Unknown format %s\n", argv[i]); 153 | exit(1); 154 | } 155 | } else { 156 | fprintf(stderr, "No argument provided for %s\n", argv[i - 1]); 157 | exit(1); 158 | } 159 | } else if (*argv[i] == '-') { 160 | print_usage(); 161 | exit(1); 162 | } else { // treat as file argument 163 | files[numfps++] = i; 164 | } 165 | } 166 | 167 | parser = cmark_parser_new(options); 168 | for (i = 0; i < numfps; i++) { 169 | FILE *fp = fopen(argv[files[i]], "rb"); 170 | if (fp == NULL) { 171 | fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]], 172 | strerror(errno)); 173 | exit(1); 174 | } 175 | 176 | while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { 177 | cmark_parser_feed(parser, buffer, bytes); 178 | if (bytes < sizeof(buffer)) { 179 | break; 180 | } 181 | } 182 | 183 | fclose(fp); 184 | } 185 | 186 | if (numfps == 0) { 187 | 188 | while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) { 189 | cmark_parser_feed(parser, buffer, bytes); 190 | if (bytes < sizeof(buffer)) { 191 | break; 192 | } 193 | } 194 | } 195 | 196 | #ifdef USE_PLEDGE 197 | if (pledge("stdio", NULL) != 0) { 198 | perror("pledge"); 199 | return 1; 200 | } 201 | #endif 202 | 203 | document = cmark_parser_finish(parser); 204 | cmark_parser_free(parser); 205 | 206 | print_document(document, writer, options, width); 207 | 208 | cmark_node_free(document); 209 | 210 | free(files); 211 | 212 | return 0; 213 | } 214 | -------------------------------------------------------------------------------- /src/man.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | #include "utf8.h" 11 | #include "render.h" 12 | 13 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 14 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) 15 | #define CR() renderer->cr(renderer) 16 | #define BLANKLINE() renderer->blankline(renderer) 17 | #define LIST_NUMBER_SIZE 20 18 | 19 | // Functions to convert cmark_nodes to groff man strings. 20 | static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, 21 | unsigned char nextc) { 22 | (void)(nextc); 23 | 24 | if (escape == LITERAL) { 25 | cmark_render_code_point(renderer, c); 26 | return; 27 | } 28 | 29 | switch (c) { 30 | case 46: 31 | if (renderer->begin_line) { 32 | cmark_render_ascii(renderer, "\\&."); 33 | } else { 34 | cmark_render_code_point(renderer, c); 35 | } 36 | break; 37 | case 39: 38 | if (renderer->begin_line) { 39 | cmark_render_ascii(renderer, "\\&'"); 40 | } else { 41 | cmark_render_code_point(renderer, c); 42 | } 43 | break; 44 | case 45: 45 | cmark_render_ascii(renderer, "\\-"); 46 | break; 47 | case 92: 48 | cmark_render_ascii(renderer, "\\e"); 49 | break; 50 | case 8216: // left single quote 51 | cmark_render_ascii(renderer, "\\[oq]"); 52 | break; 53 | case 8217: // right single quote 54 | cmark_render_ascii(renderer, "\\[cq]"); 55 | break; 56 | case 8220: // left double quote 57 | cmark_render_ascii(renderer, "\\[lq]"); 58 | break; 59 | case 8221: // right double quote 60 | cmark_render_ascii(renderer, "\\[rq]"); 61 | break; 62 | case 8212: // em dash 63 | cmark_render_ascii(renderer, "\\[em]"); 64 | break; 65 | case 8211: // en dash 66 | cmark_render_ascii(renderer, "\\[en]"); 67 | break; 68 | default: 69 | cmark_render_code_point(renderer, c); 70 | } 71 | } 72 | 73 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, 74 | cmark_event_type ev_type, int options) { 75 | cmark_node *tmp; 76 | int list_number; 77 | bool entering = (ev_type == CMARK_EVENT_ENTER); 78 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); 79 | struct block_number *new_block_number; 80 | cmark_mem *allocator = cmark_get_default_mem_allocator(); 81 | 82 | // avoid unused parameter error: 83 | (void)(options); 84 | 85 | // indent inside nested lists 86 | if (renderer->block_number_in_list_item && 87 | node->type < CMARK_NODE_FIRST_INLINE) { 88 | if (entering) { 89 | renderer->block_number_in_list_item->number += 1; 90 | if (renderer->block_number_in_list_item->number == 2) { 91 | CR(); 92 | LIT(".RS"); // indent 93 | CR(); 94 | } 95 | } 96 | } 97 | 98 | switch (node->type) { 99 | case CMARK_NODE_DOCUMENT: 100 | break; 101 | 102 | case CMARK_NODE_BLOCK_QUOTE: 103 | if (entering) { 104 | CR(); 105 | LIT(".RS"); 106 | CR(); 107 | } else { 108 | CR(); 109 | LIT(".RE"); 110 | CR(); 111 | } 112 | break; 113 | 114 | case CMARK_NODE_LIST: 115 | break; 116 | 117 | case CMARK_NODE_ITEM: 118 | if (entering) { 119 | new_block_number = allocator->calloc(1, sizeof(struct block_number)); 120 | new_block_number->number = 0; 121 | new_block_number->parent = renderer->block_number_in_list_item; 122 | renderer->block_number_in_list_item = new_block_number; 123 | CR(); 124 | LIT(".IP "); 125 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 126 | LIT("\\[bu] 2"); 127 | } else { 128 | list_number = cmark_node_get_list_start(node->parent); 129 | tmp = node; 130 | while (tmp->prev) { 131 | tmp = tmp->prev; 132 | list_number += 1; 133 | } 134 | char list_number_s[LIST_NUMBER_SIZE]; 135 | snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number); 136 | LIT(list_number_s); 137 | } 138 | CR(); 139 | } else { 140 | if (renderer->block_number_in_list_item) { 141 | if (renderer->block_number_in_list_item->number >= 2) { 142 | CR(); 143 | LIT(".RE"); // de-indent 144 | } 145 | new_block_number = renderer->block_number_in_list_item; 146 | renderer->block_number_in_list_item = 147 | renderer->block_number_in_list_item->parent; 148 | allocator->free(new_block_number); 149 | } 150 | CR(); 151 | } 152 | break; 153 | 154 | case CMARK_NODE_HEADING: 155 | if (entering) { 156 | CR(); 157 | LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS"); 158 | CR(); 159 | } else { 160 | CR(); 161 | } 162 | break; 163 | 164 | case CMARK_NODE_CODE_BLOCK: 165 | CR(); 166 | LIT(".IP\n.nf\n\\f[C]\n"); 167 | OUT(cmark_node_get_literal(node), false, NORMAL); 168 | CR(); 169 | LIT("\\f[]\n.fi"); 170 | CR(); 171 | break; 172 | 173 | case CMARK_NODE_HTML_BLOCK: 174 | break; 175 | 176 | case CMARK_NODE_CUSTOM_BLOCK: 177 | CR(); 178 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 179 | false, LITERAL); 180 | CR(); 181 | break; 182 | 183 | case CMARK_NODE_THEMATIC_BREAK: 184 | CR(); 185 | LIT(".PP\n * * * * *"); 186 | CR(); 187 | break; 188 | 189 | case CMARK_NODE_PARAGRAPH: 190 | if (entering) { 191 | // no blank line if first paragraph in list: 192 | if (node->parent && node->parent->type == CMARK_NODE_ITEM && 193 | node->prev == NULL) { 194 | // no blank line or .PP 195 | } else { 196 | CR(); 197 | LIT(".PP"); 198 | CR(); 199 | } 200 | } else { 201 | CR(); 202 | } 203 | break; 204 | 205 | case CMARK_NODE_TEXT: 206 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 207 | break; 208 | 209 | case CMARK_NODE_LINEBREAK: 210 | LIT(".PD 0\n.P\n.PD"); 211 | CR(); 212 | break; 213 | 214 | case CMARK_NODE_SOFTBREAK: 215 | if (options & CMARK_OPT_HARDBREAKS) { 216 | LIT(".PD 0\n.P\n.PD"); 217 | CR(); 218 | } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { 219 | CR(); 220 | } else { 221 | OUT(" ", allow_wrap, LITERAL); 222 | } 223 | break; 224 | 225 | case CMARK_NODE_CODE: 226 | LIT("\\f[C]"); 227 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 228 | LIT("\\f[]"); 229 | break; 230 | 231 | case CMARK_NODE_HTML_INLINE: 232 | break; 233 | 234 | case CMARK_NODE_CUSTOM_INLINE: 235 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 236 | false, LITERAL); 237 | break; 238 | 239 | case CMARK_NODE_STRONG: 240 | if (entering) { 241 | LIT("\\f[B]"); 242 | } else { 243 | LIT("\\f[]"); 244 | } 245 | break; 246 | 247 | case CMARK_NODE_EMPH: 248 | if (entering) { 249 | LIT("\\f[I]"); 250 | } else { 251 | LIT("\\f[]"); 252 | } 253 | break; 254 | 255 | case CMARK_NODE_LINK: 256 | if (!entering) { 257 | LIT(" ("); 258 | OUT(cmark_node_get_url(node), allow_wrap, URL); 259 | LIT(")"); 260 | } 261 | break; 262 | 263 | case CMARK_NODE_IMAGE: 264 | if (entering) { 265 | LIT("[IMAGE: "); 266 | } else { 267 | LIT("]"); 268 | } 269 | break; 270 | 271 | default: 272 | assert(false); 273 | break; 274 | } 275 | 276 | return 1; 277 | } 278 | 279 | char *cmark_render_man(cmark_node *root, int options, int width) { 280 | return cmark_render(root, options, width, S_outc, S_render_node); 281 | } 282 | -------------------------------------------------------------------------------- /src/node.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_NODE_H 2 | #define CMARK_NODE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cmark.h" 13 | #include "buffer.h" 14 | 15 | typedef struct { 16 | int marker_offset; 17 | int padding; 18 | int start; 19 | unsigned char list_type; 20 | unsigned char delimiter; 21 | unsigned char bullet_char; 22 | bool tight; 23 | } cmark_list; 24 | 25 | typedef struct { 26 | unsigned char *info; 27 | uint8_t fence_length; 28 | uint8_t fence_offset; 29 | unsigned char fence_char; 30 | int8_t fenced; 31 | } cmark_code; 32 | 33 | typedef struct { 34 | int internal_offset; 35 | int8_t level; 36 | bool setext; 37 | } cmark_heading; 38 | 39 | typedef struct { 40 | unsigned char *url; 41 | unsigned char *title; 42 | } cmark_link; 43 | 44 | typedef struct { 45 | unsigned char *on_enter; 46 | unsigned char *on_exit; 47 | } cmark_custom; 48 | 49 | enum cmark_node__internal_flags { 50 | CMARK_NODE__OPEN = (1 << 0), 51 | CMARK_NODE__LAST_LINE_BLANK = (1 << 1), 52 | CMARK_NODE__LAST_LINE_CHECKED = (1 << 2), 53 | CMARK_NODE__LIST_LAST_LINE_BLANK = (1 << 3), 54 | }; 55 | 56 | struct cmark_node { 57 | cmark_mem *mem; 58 | 59 | struct cmark_node *next; 60 | struct cmark_node *prev; 61 | struct cmark_node *parent; 62 | struct cmark_node *first_child; 63 | struct cmark_node *last_child; 64 | 65 | void *user_data; 66 | 67 | unsigned char *data; 68 | bufsize_t len; 69 | 70 | int start_line; 71 | int start_column; 72 | int end_line; 73 | int end_column; 74 | uint16_t type; 75 | uint16_t flags; 76 | 77 | union { 78 | cmark_list list; 79 | cmark_code code; 80 | cmark_heading heading; 81 | cmark_link link; 82 | cmark_custom custom; 83 | int html_block_type; 84 | } as; 85 | }; 86 | 87 | CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out); 88 | 89 | #ifdef __cplusplus 90 | } 91 | #endif 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /src/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_AST_H 2 | #define CMARK_AST_H 3 | 4 | #include 5 | #include "references.h" 6 | #include "node.h" 7 | #include "buffer.h" 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | #define MAX_LINK_LABEL_LENGTH 1000 14 | 15 | struct cmark_parser { 16 | struct cmark_mem *mem; 17 | struct cmark_reference_map *refmap; 18 | struct cmark_node *root; 19 | struct cmark_node *current; 20 | int line_number; 21 | bufsize_t offset; 22 | bufsize_t column; 23 | bufsize_t first_nonspace; 24 | bufsize_t first_nonspace_column; 25 | bufsize_t thematic_break_kill_pos; 26 | int indent; 27 | bool blank; 28 | bool partially_consumed_tab; 29 | cmark_strbuf curline; 30 | bufsize_t last_line_length; 31 | cmark_strbuf linebuf; 32 | cmark_strbuf content; 33 | int options; 34 | bool last_buffer_ended_with_cr; 35 | unsigned int total_size; 36 | }; 37 | 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/references.c: -------------------------------------------------------------------------------- 1 | #include "cmark.h" 2 | #include "utf8.h" 3 | #include "parser.h" 4 | #include "references.h" 5 | #include "inlines.h" 6 | #include "chunk.h" 7 | 8 | static void reference_free(cmark_reference_map *map, cmark_reference *ref) { 9 | cmark_mem *mem = map->mem; 10 | if (ref != NULL) { 11 | mem->free(ref->label); 12 | mem->free(ref->url); 13 | mem->free(ref->title); 14 | mem->free(ref); 15 | } 16 | } 17 | 18 | // normalize reference: collapse internal whitespace to single space, 19 | // remove leading/trailing whitespace, case fold 20 | // Return NULL if the reference name is actually empty (i.e. composed 21 | // solely from whitespace) 22 | static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) { 23 | cmark_strbuf normalized = CMARK_BUF_INIT(mem); 24 | unsigned char *result; 25 | 26 | if (ref == NULL) 27 | return NULL; 28 | 29 | if (ref->len == 0) 30 | return NULL; 31 | 32 | cmark_utf8proc_case_fold(&normalized, ref->data, ref->len); 33 | cmark_strbuf_trim(&normalized); 34 | cmark_strbuf_normalize_whitespace(&normalized); 35 | 36 | result = cmark_strbuf_detach(&normalized); 37 | assert(result); 38 | 39 | if (result[0] == '\0') { 40 | mem->free(result); 41 | return NULL; 42 | } 43 | 44 | return result; 45 | } 46 | 47 | void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, 48 | cmark_chunk *url, cmark_chunk *title) { 49 | cmark_reference *ref; 50 | unsigned char *reflabel = normalize_reference(map->mem, label); 51 | 52 | /* empty reference name, or composed from only whitespace */ 53 | if (reflabel == NULL) 54 | return; 55 | 56 | assert(map->sorted == NULL); 57 | 58 | ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); 59 | ref->label = reflabel; 60 | ref->url = cmark_clean_url(map->mem, url); 61 | ref->title = cmark_clean_title(map->mem, title); 62 | ref->age = map->size; 63 | ref->next = map->refs; 64 | 65 | if (ref->url != NULL) 66 | ref->size += (int)strlen((char*)ref->url); 67 | if (ref->title != NULL) 68 | ref->size += (int)strlen((char*)ref->title); 69 | 70 | map->refs = ref; 71 | map->size++; 72 | } 73 | 74 | static int 75 | labelcmp(const unsigned char *a, const unsigned char *b) { 76 | return strcmp((const char *)a, (const char *)b); 77 | } 78 | 79 | static int 80 | refcmp(const void *p1, const void *p2) { 81 | cmark_reference *r1 = *(cmark_reference **)p1; 82 | cmark_reference *r2 = *(cmark_reference **)p2; 83 | int res = labelcmp(r1->label, r2->label); 84 | return res ? res : ((int)r1->age - (int)r2->age); 85 | } 86 | 87 | static int 88 | refsearch(const void *label, const void *p2) { 89 | cmark_reference *ref = *(cmark_reference **)p2; 90 | return labelcmp((const unsigned char *)label, ref->label); 91 | } 92 | 93 | static void sort_references(cmark_reference_map *map) { 94 | unsigned int i = 0, last = 0, size = map->size; 95 | cmark_reference *r = map->refs, **sorted = NULL; 96 | 97 | sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *)); 98 | while (r) { 99 | sorted[i++] = r; 100 | r = r->next; 101 | } 102 | 103 | qsort(sorted, size, sizeof(cmark_reference *), refcmp); 104 | 105 | for (i = 1; i < size; i++) { 106 | if (labelcmp(sorted[i]->label, sorted[last]->label) != 0) 107 | sorted[++last] = sorted[i]; 108 | } 109 | map->sorted = sorted; 110 | map->size = last + 1; 111 | } 112 | 113 | // Returns reference if refmap contains a reference with matching 114 | // label, otherwise NULL. 115 | cmark_reference *cmark_reference_lookup(cmark_reference_map *map, 116 | cmark_chunk *label) { 117 | cmark_reference **ref = NULL; 118 | cmark_reference *r = NULL; 119 | unsigned char *norm; 120 | 121 | if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) 122 | return NULL; 123 | 124 | if (map == NULL || !map->size) 125 | return NULL; 126 | 127 | norm = normalize_reference(map->mem, label); 128 | if (norm == NULL) 129 | return NULL; 130 | 131 | if (!map->sorted) 132 | sort_references(map); 133 | 134 | ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), 135 | refsearch); 136 | map->mem->free(norm); 137 | 138 | if (ref != NULL) { 139 | r = ref[0]; 140 | /* Check for expansion limit */ 141 | if (map->max_ref_size && r->size > map->max_ref_size - map->ref_size) 142 | return NULL; 143 | map->ref_size += r->size; 144 | } 145 | 146 | return r; 147 | } 148 | 149 | void cmark_reference_map_free(cmark_reference_map *map) { 150 | cmark_reference *ref; 151 | 152 | if (map == NULL) 153 | return; 154 | 155 | ref = map->refs; 156 | while (ref) { 157 | cmark_reference *next = ref->next; 158 | reference_free(map, ref); 159 | ref = next; 160 | } 161 | 162 | map->mem->free(map->sorted); 163 | map->mem->free(map); 164 | } 165 | 166 | cmark_reference_map *cmark_reference_map_new(cmark_mem *mem) { 167 | cmark_reference_map *map = 168 | (cmark_reference_map *)mem->calloc(1, sizeof(cmark_reference_map)); 169 | map->mem = mem; 170 | return map; 171 | } 172 | -------------------------------------------------------------------------------- /src/references.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_REFERENCES_H 2 | #define CMARK_REFERENCES_H 3 | 4 | #include "chunk.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct cmark_reference { 11 | struct cmark_reference *next; 12 | unsigned char *label; 13 | unsigned char *url; 14 | unsigned char *title; 15 | unsigned int age; 16 | unsigned int size; 17 | }; 18 | 19 | typedef struct cmark_reference cmark_reference; 20 | 21 | struct cmark_reference_map { 22 | cmark_mem *mem; 23 | cmark_reference *refs; 24 | cmark_reference **sorted; 25 | unsigned int size; 26 | unsigned int ref_size; 27 | unsigned int max_ref_size; 28 | }; 29 | 30 | typedef struct cmark_reference_map cmark_reference_map; 31 | 32 | cmark_reference_map *cmark_reference_map_new(cmark_mem *mem); 33 | void cmark_reference_map_free(cmark_reference_map *map); 34 | cmark_reference *cmark_reference_lookup(cmark_reference_map *map, 35 | cmark_chunk *label); 36 | void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, 37 | cmark_chunk *url, cmark_chunk *title); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /src/render.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "buffer.h" 3 | #include "cmark.h" 4 | #include "utf8.h" 5 | #include "render.h" 6 | #include "node.h" 7 | #include "cmark_ctype.h" 8 | 9 | static inline void S_cr(cmark_renderer *renderer) { 10 | if (renderer->need_cr < 1) { 11 | renderer->need_cr = 1; 12 | } 13 | } 14 | 15 | static inline void S_blankline(cmark_renderer *renderer) { 16 | if (renderer->need_cr < 2) { 17 | renderer->need_cr = 2; 18 | } 19 | } 20 | 21 | static void S_out(cmark_renderer *renderer, const char *source, bool wrap, 22 | cmark_escaping escape) { 23 | int length = (int)strlen(source); 24 | unsigned char nextc; 25 | int32_t c; 26 | int i = 0; 27 | int last_nonspace; 28 | int len; 29 | int k = renderer->buffer->size - 1; 30 | 31 | wrap = wrap && !renderer->no_linebreaks; 32 | 33 | if (renderer->in_tight_list_item && renderer->need_cr > 1) { 34 | renderer->need_cr = 1; 35 | } 36 | while (renderer->need_cr) { 37 | if (k < 0 || renderer->buffer->ptr[k] == '\n') { 38 | k -= 1; 39 | } else { 40 | cmark_strbuf_putc(renderer->buffer, '\n'); 41 | if (renderer->need_cr > 1) { 42 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 43 | renderer->prefix->size); 44 | } 45 | } 46 | renderer->column = 0; 47 | renderer->last_breakable = 0; 48 | renderer->begin_line = true; 49 | renderer->begin_content = true; 50 | renderer->need_cr -= 1; 51 | } 52 | 53 | while (i < length) { 54 | if (renderer->begin_line) { 55 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 56 | renderer->prefix->size); 57 | // note: this assumes prefix is ascii: 58 | renderer->column = renderer->prefix->size; 59 | } 60 | 61 | len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c); 62 | if (len == -1) { // error condition 63 | return; // return without rendering rest of string 64 | } 65 | nextc = source[i + len]; 66 | if (c == 32 && wrap) { 67 | if (!renderer->begin_line) { 68 | last_nonspace = renderer->buffer->size; 69 | cmark_strbuf_putc(renderer->buffer, ' '); 70 | renderer->column += 1; 71 | renderer->begin_line = false; 72 | renderer->begin_content = false; 73 | // skip following spaces 74 | while (source[i + 1] == ' ') { 75 | i++; 76 | } 77 | // We don't allow breaks that make a digit the first character 78 | // because this causes problems with commonmark output. 79 | if (!cmark_isdigit(source[i + 1])) { 80 | renderer->last_breakable = last_nonspace; 81 | } 82 | } 83 | 84 | } else if (escape == LITERAL) { 85 | if (c == 10) { 86 | cmark_strbuf_putc(renderer->buffer, '\n'); 87 | renderer->column = 0; 88 | renderer->begin_line = true; 89 | renderer->begin_content = true; 90 | renderer->last_breakable = 0; 91 | } else { 92 | cmark_render_code_point(renderer, c); 93 | renderer->begin_line = false; 94 | // we don't set 'begin_content' to false til we've 95 | // finished parsing a digit. Reason: in commonmark 96 | // we need to escape a potential list marker after 97 | // a digit: 98 | renderer->begin_content = 99 | renderer->begin_content && cmark_isdigit(c) == 1; 100 | } 101 | } else { 102 | (renderer->outc)(renderer, escape, c, nextc); 103 | renderer->begin_line = false; 104 | renderer->begin_content = 105 | renderer->begin_content && cmark_isdigit(c) == 1; 106 | } 107 | 108 | // If adding the character went beyond width, look for an 109 | // earlier place where the line could be broken: 110 | if (renderer->width > 0 && renderer->column > renderer->width && 111 | !renderer->begin_line && renderer->last_breakable > 0) { 112 | 113 | // copy from last_breakable to remainder 114 | unsigned char *src = renderer->buffer->ptr + 115 | renderer->last_breakable + 1; 116 | bufsize_t remainder_len = renderer->buffer->size - 117 | renderer->last_breakable - 1; 118 | unsigned char *remainder = 119 | (unsigned char *)renderer->mem->realloc(NULL, remainder_len); 120 | memcpy(remainder, src, remainder_len); 121 | // truncate at last_breakable 122 | cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable); 123 | // add newline, prefix, and remainder 124 | cmark_strbuf_putc(renderer->buffer, '\n'); 125 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 126 | renderer->prefix->size); 127 | cmark_strbuf_put(renderer->buffer, remainder, remainder_len); 128 | renderer->column = renderer->prefix->size + remainder_len; 129 | renderer->mem->free(remainder); 130 | renderer->last_breakable = 0; 131 | renderer->begin_line = false; 132 | renderer->begin_content = false; 133 | } 134 | 135 | i += len; 136 | } 137 | } 138 | 139 | // Assumes no newlines, assumes ascii content: 140 | void cmark_render_ascii(cmark_renderer *renderer, const char *s) { 141 | int origsize = renderer->buffer->size; 142 | cmark_strbuf_puts(renderer->buffer, s); 143 | renderer->column += renderer->buffer->size - origsize; 144 | } 145 | 146 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { 147 | cmark_utf8proc_encode_char(c, renderer->buffer); 148 | renderer->column += 1; 149 | } 150 | 151 | char *cmark_render(cmark_node *root, int options, int width, 152 | void (*outc)(cmark_renderer *, cmark_escaping, int32_t, 153 | unsigned char), 154 | int (*render_node)(cmark_renderer *renderer, 155 | cmark_node *node, 156 | cmark_event_type ev_type, int options)) { 157 | cmark_mem *mem = root->mem; 158 | cmark_strbuf pref = CMARK_BUF_INIT(mem); 159 | cmark_strbuf buf = CMARK_BUF_INIT(mem); 160 | cmark_node *cur; 161 | cmark_event_type ev_type; 162 | char *result; 163 | cmark_iter *iter = cmark_iter_new(root); 164 | 165 | cmark_renderer renderer = {options, 166 | mem, &buf, &pref, 0, width, 167 | 0, 0, true, true, false, 168 | false, NULL, 169 | outc, S_cr, S_blankline, S_out}; 170 | 171 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 172 | cur = cmark_iter_get_node(iter); 173 | if (!render_node(&renderer, cur, ev_type, options)) { 174 | // a false value causes us to skip processing 175 | // the node's contents. this is used for 176 | // autolinks. 177 | cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT); 178 | } 179 | } 180 | 181 | // If the root node is a block type (i.e. not inline), ensure there's a final newline: 182 | if (cmark_node_is_block(root)) { 183 | if (renderer.buffer->size == 0 || renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { 184 | cmark_strbuf_putc(renderer.buffer, '\n'); 185 | } 186 | } 187 | 188 | result = (char *)cmark_strbuf_detach(renderer.buffer); 189 | 190 | cmark_iter_free(iter); 191 | cmark_strbuf_free(renderer.prefix); 192 | cmark_strbuf_free(renderer.buffer); 193 | 194 | return result; 195 | } 196 | -------------------------------------------------------------------------------- /src/render.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_RENDER_H 2 | #define CMARK_RENDER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | #include "buffer.h" 12 | 13 | typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping; 14 | 15 | struct block_number { 16 | int number; 17 | struct block_number *parent; 18 | }; 19 | 20 | struct cmark_renderer { 21 | int options; 22 | cmark_mem *mem; 23 | cmark_strbuf *buffer; 24 | cmark_strbuf *prefix; 25 | int column; 26 | int width; 27 | int need_cr; 28 | bufsize_t last_breakable; 29 | bool begin_line; 30 | bool begin_content; 31 | bool no_linebreaks; 32 | bool in_tight_list_item; 33 | struct block_number *block_number_in_list_item; 34 | void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char); 35 | void (*cr)(struct cmark_renderer *); 36 | void (*blankline)(struct cmark_renderer *); 37 | void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping); 38 | }; 39 | 40 | typedef struct cmark_renderer cmark_renderer; 41 | 42 | void cmark_render_ascii(cmark_renderer *renderer, const char *s); 43 | 44 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); 45 | 46 | char *cmark_render(cmark_node *root, int options, int width, 47 | void (*outc)(cmark_renderer *, cmark_escaping, int32_t, 48 | unsigned char), 49 | int (*render_node)(cmark_renderer *renderer, 50 | cmark_node *node, 51 | cmark_event_type ev_type, int options)); 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /src/scanners.h: -------------------------------------------------------------------------------- 1 | #include "cmark.h" 2 | #include "chunk.h" 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, 9 | bufsize_t offset); 10 | bufsize_t _scan_scheme(const unsigned char *p); 11 | bufsize_t _scan_autolink_uri(const unsigned char *p); 12 | bufsize_t _scan_autolink_email(const unsigned char *p); 13 | bufsize_t _scan_html_tag(const unsigned char *p); 14 | bufsize_t _scan_html_comment(const unsigned char *p); 15 | bufsize_t _scan_html_pi(const unsigned char *p); 16 | bufsize_t _scan_html_declaration(const unsigned char *p); 17 | bufsize_t _scan_html_cdata(const unsigned char *p); 18 | bufsize_t _scan_html_block_start(const unsigned char *p); 19 | bufsize_t _scan_html_block_start_7(const unsigned char *p); 20 | bufsize_t _scan_html_block_end_1(const unsigned char *p); 21 | bufsize_t _scan_html_block_end_2(const unsigned char *p); 22 | bufsize_t _scan_html_block_end_3(const unsigned char *p); 23 | bufsize_t _scan_html_block_end_4(const unsigned char *p); 24 | bufsize_t _scan_html_block_end_5(const unsigned char *p); 25 | bufsize_t _scan_link_title(const unsigned char *p); 26 | bufsize_t _scan_spacechars(const unsigned char *p); 27 | bufsize_t _scan_atx_heading_start(const unsigned char *p); 28 | bufsize_t _scan_setext_heading_line(const unsigned char *p); 29 | bufsize_t _scan_open_code_fence(const unsigned char *p); 30 | bufsize_t _scan_close_code_fence(const unsigned char *p); 31 | bufsize_t _scan_dangerous_url(const unsigned char *p); 32 | 33 | #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) 34 | #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) 35 | #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n) 36 | #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n) 37 | #define scan_html_comment(c, n) _scan_at(&_scan_html_comment, c, n) 38 | #define scan_html_pi(c, n) _scan_at(&_scan_html_pi, c, n) 39 | #define scan_html_declaration(c, n) _scan_at(&_scan_html_declaration, c, n) 40 | #define scan_html_cdata(c, n) _scan_at(&_scan_html_cdata, c, n) 41 | #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n) 42 | #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n) 43 | #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n) 44 | #define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n) 45 | #define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n) 46 | #define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n) 47 | #define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n) 48 | #define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n) 49 | #define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n) 50 | #define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n) 51 | #define scan_setext_heading_line(c, n) \ 52 | _scan_at(&_scan_setext_heading_line, c, n) 53 | #define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n) 54 | #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n) 55 | #define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n) 56 | 57 | #ifdef __cplusplus 58 | } 59 | #endif 60 | -------------------------------------------------------------------------------- /src/utf8.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_UTF8_H 2 | #define CMARK_UTF8_H 3 | 4 | #include 5 | #include "buffer.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, 12 | bufsize_t len); 13 | void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); 14 | int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); 15 | void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line, 16 | bufsize_t size); 17 | int cmark_utf8proc_is_space(int32_t uc); 18 | int cmark_utf8proc_is_punctuation_or_symbol(int32_t uc); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/xml.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | 11 | #define BUFFER_SIZE 100 12 | #define MAX_INDENT 40 13 | 14 | // Functions to convert cmark_nodes to XML strings. 15 | 16 | // C0 control characters, U+FFFE and U+FFF aren't allowed in XML. 17 | static const char XML_ESCAPE_TABLE[256] = { 18 | /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 19 | /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20 | /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 | /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0, 22 | /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 | /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 | /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 | /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26 | /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27 | /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29 | /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 30 | /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31 | /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32 | /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33 | /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 | }; 35 | 36 | // U+FFFD Replacement Character encoded in UTF-8 37 | #define UTF8_REPL "\xEF\xBF\xBD" 38 | 39 | static const char *XML_ESCAPES[] = { 40 | "", UTF8_REPL, """, "&", "<", ">" 41 | }; 42 | 43 | static void escape_xml(cmark_strbuf *ob, const unsigned char *src, 44 | bufsize_t size) { 45 | bufsize_t i = 0, org, esc = 0; 46 | 47 | while (i < size) { 48 | org = i; 49 | while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0) 50 | i++; 51 | 52 | if (i > org) 53 | cmark_strbuf_put(ob, src + org, i - org); 54 | 55 | if (i >= size) 56 | break; 57 | 58 | if (esc == 9) { 59 | // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to 60 | // be changed. 61 | // We know that src[i] is 0xBE or 0xBF. 62 | if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) { 63 | cmark_strbuf_putc(ob, 0xBD); 64 | } else { 65 | cmark_strbuf_putc(ob, src[i]); 66 | } 67 | } else { 68 | cmark_strbuf_puts(ob, XML_ESCAPES[esc]); 69 | } 70 | 71 | i++; 72 | } 73 | } 74 | 75 | static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) { 76 | if (source) 77 | escape_xml(dest, source, (bufsize_t)strlen((char *)source)); 78 | } 79 | 80 | struct render_state { 81 | cmark_strbuf *xml; 82 | int indent; 83 | }; 84 | 85 | static inline void indent(struct render_state *state) { 86 | int i; 87 | for (i = 0; i < state->indent && i < MAX_INDENT; i++) { 88 | cmark_strbuf_putc(state->xml, ' '); 89 | } 90 | } 91 | 92 | static int S_render_node(cmark_node *node, cmark_event_type ev_type, 93 | struct render_state *state, int options) { 94 | cmark_strbuf *xml = state->xml; 95 | bool literal = false; 96 | cmark_delim_type delim; 97 | bool entering = (ev_type == CMARK_EVENT_ENTER); 98 | char buffer[BUFFER_SIZE]; 99 | 100 | if (entering) { 101 | indent(state); 102 | cmark_strbuf_putc(xml, '<'); 103 | cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); 104 | 105 | if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) { 106 | snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"", 107 | node->start_line, node->start_column, node->end_line, 108 | node->end_column); 109 | cmark_strbuf_puts(xml, buffer); 110 | } 111 | 112 | literal = false; 113 | 114 | switch (node->type) { 115 | case CMARK_NODE_DOCUMENT: 116 | cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\""); 117 | break; 118 | case CMARK_NODE_TEXT: 119 | case CMARK_NODE_CODE: 120 | case CMARK_NODE_HTML_BLOCK: 121 | case CMARK_NODE_HTML_INLINE: 122 | cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); 123 | escape_xml(xml, node->data, node->len); 124 | cmark_strbuf_puts(xml, "as.heading.level); 154 | cmark_strbuf_puts(xml, buffer); 155 | break; 156 | case CMARK_NODE_CODE_BLOCK: 157 | if (node->as.code.info) { 158 | cmark_strbuf_puts(xml, " info=\""); 159 | escape_xml_str(xml, node->as.code.info); 160 | cmark_strbuf_putc(xml, '"'); 161 | } 162 | cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); 163 | escape_xml(xml, node->data, node->len); 164 | cmark_strbuf_puts(xml, "as.custom.on_enter); 172 | cmark_strbuf_putc(xml, '"'); 173 | cmark_strbuf_puts(xml, " on_exit=\""); 174 | escape_xml_str(xml, node->as.custom.on_exit); 175 | cmark_strbuf_putc(xml, '"'); 176 | break; 177 | case CMARK_NODE_LINK: 178 | case CMARK_NODE_IMAGE: 179 | cmark_strbuf_puts(xml, " destination=\""); 180 | escape_xml_str(xml, node->as.link.url); 181 | cmark_strbuf_putc(xml, '"'); 182 | if (node->as.link.title) { 183 | cmark_strbuf_puts(xml, " title=\""); 184 | escape_xml_str(xml, node->as.link.title); 185 | cmark_strbuf_putc(xml, '"'); 186 | } 187 | break; 188 | default: 189 | break; 190 | } 191 | if (node->first_child) { 192 | state->indent += 2; 193 | } else if (!literal) { 194 | cmark_strbuf_puts(xml, " /"); 195 | } 196 | cmark_strbuf_puts(xml, ">\n"); 197 | 198 | } else if (node->first_child) { 199 | state->indent -= 2; 200 | indent(state); 201 | cmark_strbuf_puts(xml, "\n"); 204 | } 205 | 206 | return 1; 207 | } 208 | 209 | char *cmark_render_xml(cmark_node *root, int options) { 210 | char *result; 211 | cmark_strbuf xml = CMARK_BUF_INIT(root->mem); 212 | cmark_event_type ev_type; 213 | cmark_node *cur; 214 | struct render_state state = {&xml, 0}; 215 | 216 | cmark_iter *iter = cmark_iter_new(root); 217 | 218 | cmark_strbuf_puts(state.xml, "\n"); 219 | cmark_strbuf_puts(state.xml, 220 | "\n"); 221 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 222 | cur = cmark_iter_get_node(iter); 223 | S_render_node(cur, ev_type, &state, options); 224 | } 225 | result = (char *)cmark_strbuf_detach(&xml); 226 | 227 | cmark_iter_free(iter); 228 | return result; 229 | } 230 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # To get verbose output: cmake --build build --target "test" -- ARGS='-V' 2 | 3 | # By default, we run the spec tests only if python3 is available. 4 | # To require the spec tests, compile with -DSPEC_TESTS=1 5 | 6 | if(SPEC_TESTS) 7 | set(PYTHON_REQUIRED REQUIRED) 8 | else() 9 | set(PYTHON_REQUIRED) 10 | endif() 11 | 12 | if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12) 13 | find_package(Python3 ${PYTHON_REQUIRED} COMPONENTS Interpreter) 14 | else() 15 | find_package(PythonInterp 3 ${PYTHON_REQUIRED}) 16 | set(Python3_Interpreter_FOUND ${PYTHONINTERP_FOUND}) 17 | add_executable(Python3::Interpreter IMPORTED) 18 | set_target_properties(Python3::Interpreter PROPERTIES 19 | IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) 20 | endif() 21 | 22 | IF (Python3_Interpreter_FOUND) 23 | 24 | add_test(NAME html_normalization 25 | COMMAND "$" -m doctest "${CMAKE_CURRENT_SOURCE_DIR}/normalize.py") 26 | 27 | if(BUILD_SHARED_LIBS) 28 | add_test(NAME spectest_library 29 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" 30 | --no-normalize 31 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" 32 | --library-dir "$") 33 | 34 | add_test(NAME pathological_tests_library 35 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py" 36 | --library-dir "$") 37 | 38 | add_test(NAME roundtriptest_library 39 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip_tests.py" 40 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" 41 | --library-dir "$") 42 | 43 | add_test(NAME entity_library 44 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py" 45 | --library-dir "$") 46 | endif() 47 | 48 | add_test(NAME spectest_executable 49 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" 50 | --no-normalize 51 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" 52 | --program "$") 53 | 54 | add_test(NAME smartpuncttest_executable 55 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" 56 | --no-normalize 57 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" 58 | --program "$ --smart") 59 | 60 | add_test(NAME regressiontest_executable 61 | COMMAND "$" "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" 62 | --no-normalize 63 | --spec "${CMAKE_CURRENT_SOURCE_DIR}/regression.txt" 64 | --program "$") 65 | 66 | ELSE(Python3_Interpreter_FOUND) 67 | 68 | message(WARNING "A Python 3 Interpreter is required to run the spec tests") 69 | 70 | ENDIF(Python3_Interpreter_FOUND) 71 | -------------------------------------------------------------------------------- /test/cmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from ctypes import * 5 | from subprocess import Popen, PIPE 6 | import platform 7 | import os 8 | 9 | class cmark_mem(Structure): 10 | _fields_ = [("calloc", c_void_p), 11 | ("realloc", c_void_p), 12 | ("free", CFUNCTYPE(None, c_void_p))] 13 | 14 | def pipe_through_prog(prog, text): 15 | p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE) 16 | [result, err] = p1.communicate(input=text.encode('utf-8')) 17 | return [p1.returncode, result.decode('utf-8'), err] 18 | 19 | def to_html(lib, text): 20 | get_alloc = lib.cmark_get_default_mem_allocator 21 | get_alloc.restype = POINTER(cmark_mem) 22 | free_func = get_alloc().contents.free 23 | 24 | markdown = lib.cmark_markdown_to_html 25 | markdown.restype = POINTER(c_char) 26 | markdown.argtypes = [c_char_p, c_size_t, c_int] 27 | 28 | textbytes = text.encode('utf-8') 29 | textlen = len(textbytes) 30 | # 1 << 17 == CMARK_OPT_UNSAFE 31 | cstring = markdown(textbytes, textlen, 1 << 17) 32 | result = string_at(cstring).decode('utf-8') 33 | free_func(cstring) 34 | 35 | return [0, result, ''] 36 | 37 | def to_commonmark(lib, text): 38 | get_alloc = lib.cmark_get_default_mem_allocator 39 | get_alloc.restype = POINTER(cmark_mem) 40 | free_func = get_alloc().contents.free 41 | 42 | parse_document = lib.cmark_parse_document 43 | parse_document.restype = c_void_p 44 | parse_document.argtypes = [c_char_p, c_size_t, c_int] 45 | 46 | render_commonmark = lib.cmark_render_commonmark 47 | render_commonmark.restype = POINTER(c_char) 48 | render_commonmark.argtypes = [c_void_p, c_int, c_int] 49 | 50 | free_node = lib.cmark_node_free 51 | free_node.argtypes = [c_void_p] 52 | 53 | textbytes = text.encode('utf-8') 54 | textlen = len(textbytes) 55 | node = parse_document(textbytes, textlen, 0) 56 | cstring = render_commonmark(node, 0, 0) 57 | result = string_at(cstring).decode('utf-8') 58 | free_func(cstring) 59 | free_node(node) 60 | 61 | return [0, result, ''] 62 | 63 | class CMark: 64 | def __init__(self, prog=None, library_dir=None): 65 | self.prog = prog 66 | if prog: 67 | prog += ' --unsafe' 68 | self.to_html = lambda x: pipe_through_prog(prog, x) 69 | self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x) 70 | else: 71 | sysname = platform.system() 72 | if sysname == 'Darwin': 73 | libnames = [ "libcmark.dylib" ] 74 | elif sysname == 'Windows': 75 | libnames = [ "cmark.dll", "libcmark.dll" ] 76 | else: 77 | libnames = [ "libcmark.so" ] 78 | if not library_dir: 79 | library_dir = os.path.join("build", "src") 80 | for libname in libnames: 81 | candidate = os.path.join(library_dir, libname) 82 | if os.path.isfile(candidate): 83 | libpath = candidate 84 | break 85 | cmark = CDLL(libpath) 86 | self.to_html = lambda x: to_html(cmark, x) 87 | self.to_commonmark = lambda x: to_commonmark(cmark, x) 88 | 89 | -------------------------------------------------------------------------------- /test/entity_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import re 5 | import os 6 | import argparse 7 | import sys 8 | import platform 9 | import html 10 | from cmark import CMark 11 | 12 | def get_entities(): 13 | regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}' 14 | with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f: 15 | code = f.read() 16 | entities = [] 17 | for entity, utf8 in re.findall(regex, code, re.MULTILINE): 18 | utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8') 19 | entities.append((entity, utf8)) 20 | return entities 21 | 22 | parser = argparse.ArgumentParser(description='Run cmark tests.') 23 | parser.add_argument('--program', dest='program', nargs='?', default=None, 24 | help='program to test') 25 | parser.add_argument('--library-dir', dest='library_dir', nargs='?', 26 | default=None, help='directory containing dynamic library') 27 | args = parser.parse_args(sys.argv[1:]) 28 | 29 | cmark = CMark(prog=args.program, library_dir=args.library_dir) 30 | 31 | entities = get_entities() 32 | 33 | passed = 0 34 | errored = 0 35 | failed = 0 36 | 37 | exceptions = { 38 | 'quot': '"', 39 | 'QUOT': '"', 40 | 41 | # These are broken, but I'm not too worried about them. 42 | 'nvlt': '<⃒', 43 | 'nvgt': '>⃒', 44 | } 45 | 46 | print("Testing entities:") 47 | for entity, utf8 in entities: 48 | [rc, actual, err] = cmark.to_html("&{};".format(entity)) 49 | check = exceptions.get(entity, utf8) 50 | 51 | if rc != 0: 52 | errored += 1 53 | print(entity, '[ERRORED (return code {})]'.format(rc)) 54 | print(err) 55 | elif check in actual: 56 | # print(entity, '[PASSED]') # omit noisy success output 57 | passed += 1 58 | else: 59 | print(entity, '[FAILED]') 60 | print(repr(actual)) 61 | failed += 1 62 | 63 | print("{} passed, {} failed, {} errored".format(passed, failed, errored)) 64 | if failed == 0 and errored == 0: 65 | exit(0) 66 | else: 67 | exit(1) 68 | -------------------------------------------------------------------------------- /test/normalize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from html.parser import HTMLParser 3 | import urllib 4 | 5 | try: 6 | from html.parser import HTMLParseError 7 | except ImportError: 8 | # HTMLParseError was removed in Python 3.5. It could never be 9 | # thrown, so we define a placeholder instead. 10 | class HTMLParseError(Exception): 11 | pass 12 | 13 | from html.entities import name2codepoint 14 | import sys 15 | import re 16 | import html 17 | 18 | # Normalization code, adapted from 19 | # https://github.com/karlcow/markdown-testsuite/ 20 | significant_attrs = ["alt", "href", "src", "title"] 21 | whitespace_re = re.compile('\s+') 22 | class MyHTMLParser(HTMLParser): 23 | def __init__(self): 24 | HTMLParser.__init__(self) 25 | self.convert_charrefs = False 26 | self.last = "starttag" 27 | self.in_pre = False 28 | self.output = "" 29 | self.last_tag = "" 30 | def handle_data(self, data): 31 | after_tag = self.last == "endtag" or self.last == "starttag" 32 | after_block_tag = after_tag and self.is_block_tag(self.last_tag) 33 | if after_tag and self.last_tag == "br": 34 | data = data.lstrip('\n') 35 | if not self.in_pre: 36 | data = whitespace_re.sub(' ', data) 37 | if after_block_tag and not self.in_pre: 38 | if self.last == "starttag": 39 | data = data.lstrip() 40 | elif self.last == "endtag": 41 | data = data.strip() 42 | self.output += data 43 | self.last = "data" 44 | def handle_endtag(self, tag): 45 | if tag == "pre": 46 | self.in_pre = False 47 | elif self.is_block_tag(tag): 48 | self.output = self.output.rstrip() 49 | self.output += "" 50 | self.last_tag = tag 51 | self.last = "endtag" 52 | def handle_starttag(self, tag, attrs): 53 | if tag == "pre": 54 | self.in_pre = True 55 | if self.is_block_tag(tag): 56 | self.output = self.output.rstrip() 57 | self.output += "<" + tag 58 | # For now we don't strip out 'extra' attributes, because of 59 | # raw HTML test cases. 60 | # attrs = filter(lambda attr: attr[0] in significant_attrs, attrs) 61 | if attrs: 62 | attrs.sort() 63 | for (k,v) in attrs: 64 | self.output += " " + k 65 | if v in ['href','src']: 66 | self.output += ("=" + '"' + 67 | urllib.quote(urllib.unquote(v), safe='/') + '"') 68 | elif v != None: 69 | self.output += ("=" + '"' + html.escape(v,quote=True) + '"') 70 | self.output += ">" 71 | self.last_tag = tag 72 | self.last = "starttag" 73 | def handle_startendtag(self, tag, attrs): 74 | """Ignore closing tag for self-closing """ 75 | self.handle_starttag(tag, attrs) 76 | self.last_tag = tag 77 | self.last = "endtag" 78 | def handle_comment(self, data): 79 | self.output += '' 80 | self.last = "comment" 81 | def handle_decl(self, data): 82 | self.output += '' 83 | self.last = "decl" 84 | def unknown_decl(self, data): 85 | self.output += '' 86 | self.last = "decl" 87 | def handle_pi(self,data): 88 | self.output += '' 89 | self.last = "pi" 90 | def handle_entityref(self, name): 91 | try: 92 | c = chr(name2codepoint[name]) 93 | except KeyError: 94 | c = None 95 | self.output_char(c, '&' + name + ';') 96 | self.last = "ref" 97 | def handle_charref(self, name): 98 | try: 99 | if name.startswith("x"): 100 | c = chr(int(name[1:], 16)) 101 | else: 102 | c = chr(int(name)) 103 | except ValueError: 104 | c = None 105 | self.output_char(c, '&' + name + ';') 106 | self.last = "ref" 107 | # Helpers. 108 | def output_char(self, c, fallback): 109 | if c == '<': 110 | self.output += "<" 111 | elif c == '>': 112 | self.output += ">" 113 | elif c == '&': 114 | self.output += "&" 115 | elif c == '"': 116 | self.output += """ 117 | elif c == None: 118 | self.output += fallback 119 | else: 120 | self.output += c 121 | 122 | def is_block_tag(self,tag): 123 | return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote', 124 | 'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas', 125 | 'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd', 126 | 'progress', 'div', 'section', 'dl', 'table', 'td', 'dt', 127 | 'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption', 128 | 'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul', 129 | 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style']) 130 | 131 | def normalize_html(html): 132 | r""" 133 | Return normalized form of HTML which ignores insignificant output 134 | differences: 135 | 136 | Multiple inner whitespaces are collapsed to a single space (except 137 | in pre tags): 138 | 139 | >>> normalize_html("

a \t b

") 140 | '

a b

' 141 | 142 | >>> normalize_html("

a \t\nb

") 143 | '

a b

' 144 | 145 | * Whitespace surrounding block-level tags is removed. 146 | 147 | >>> normalize_html("

a b

") 148 | '

a b

' 149 | 150 | >>> normalize_html("

a b

") 151 | '

a b

' 152 | 153 | >>> normalize_html("

a b

") 154 | '

a b

' 155 | 156 | >>> normalize_html("\n\t

\n\t\ta b\t\t

\n\t") 157 | '

a b

' 158 | 159 | >>> normalize_html("a b ") 160 | 'a b ' 161 | 162 | * Self-closing tags are converted to open tags. 163 | 164 | >>> normalize_html("
") 165 | '
' 166 | 167 | * Attributes are sorted and lowercased. 168 | 169 | >>> normalize_html('
x') 170 | 'x' 171 | 172 | * References are converted to unicode, except that '<', '>', '&', and 173 | '"' are rendered using entities. 174 | 175 | >>> normalize_html("∀&><"") 176 | '\u2200&><"' 177 | 178 | """ 179 | html_chunk_re = re.compile("(\|\<[^>]*\>|[^<]+)") 180 | try: 181 | parser = MyHTMLParser() 182 | # We work around HTMLParser's limitations parsing CDATA 183 | # by breaking the input into chunks and passing CDATA chunks 184 | # through verbatim. 185 | for chunk in re.finditer(html_chunk_re, html): 186 | if chunk.group(0)[:8] == "\[%s]

\n){%d}" % (bad_key, COUNT-1)) 48 | 49 | 50 | # list of pairs consisting of input and a regex that must match the output. 51 | pathological = { 52 | # note - some pythons have limit of 65535 for {num-matches} in re. 53 | "nested strong emph": 54 | (("*a **a " * 32500) + "b" + (" a** a*" * 32500), 55 | re.compile("(a a ){32500}b( a a){32500}")), 56 | "many emph closers with no openers": 57 | (("a_ " * 32500), 58 | re.compile("(a[_] ){32499}a_")), 59 | "many emph openers with no closers": 60 | (("_a " * 32500), 61 | re.compile("(_a ){32499}_a")), 62 | "many link closers with no openers": 63 | (("a]" * 32500), 64 | re.compile("(a\\]){32500}")), 65 | "many link openers with no closers": 66 | (("[a" * 32500), 67 | re.compile("(\\[a){32500}")), 68 | "mismatched openers and closers": 69 | (("*a_ " * 25000), 70 | re.compile("([*]a[_] ){24999}[*]a_")), 71 | "issue #389": 72 | (("*a " * 20000 + "_a*_ " * 20000), 73 | re.compile("(a ){20000}(_a<\\/em>_ ?){20000}")), 74 | "openers and closers multiple of 3": 75 | (("a**b" + ("c* " * 25000)), 76 | re.compile("a[*][*]b(c[*] ){24999}c[*]")), 77 | "link openers and emph closers": 78 | (("[ a_" * 25000), 79 | re.compile("(\\[ a_){25000}")), 80 | "pattern [ (]( repeated": 81 | (("[ (](" * 40000), 82 | re.compile("(\\[ \\(\\]\\(){40000}")), 83 | "pattern ![[]() repeated": 84 | ("![[]()" * 160000, 85 | re.compile("(!\\[){160000}")), 86 | "hard link/emph case": 87 | ("**x [a*b**c*](d)", 88 | re.compile("\\*\\*x ab\\*\\*c")), 89 | "nested brackets": 90 | (("[" * 25000) + "a" + ("]" * 25000), 91 | re.compile("\\[{25000}a\\]{25000}")), 92 | "nested block quotes": 93 | ((("> " * 25000) + "a"), 94 | re.compile("(
\n){25000}")), 95 | "deeply nested lists": 96 | ("".join(map(lambda x: (" " * x + "* a\n"), range(0,500))), 97 | re.compile("
    \n(
  • a\n
      \n){499}
    • a
    • \n
    \n(
  • \n
\n){499}")), 98 | "U+0000 in input": 99 | ("abc\u0000de\u0000", 100 | re.compile("abc\ufffd?de\ufffd?")), 101 | "backticks": 102 | ("".join(map(lambda x: ("e" + "`" * x), range(1,2500))), 103 | re.compile("^

[e`]*

\n$")), 104 | "unclosed links A": 105 | ("[a](\n?)+x(\n)+$")), 116 | "empty lines in deeply nested lists in blockquote": 117 | ("> " + "- " * 30000 + "x\n" + ">\n" * 30000, 118 | re.compile(r"^(<\w+>\n?)+x(\n)+$")), 119 | "emph in deep blockquote": 120 | (">" * 100000 + "a*" * 100000, 121 | re.compile(r"^(<\w+>\n)+

.*

\n(\n)+$")), 122 | "reference collisions": hash_collisions() 123 | # "many references": 124 | # ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000, 125 | # re.compile("(\\[0\\] ){4999}")) 126 | } 127 | 128 | pathological_cmark = { 129 | "nested inlines": 130 | ("*" * 20000 + "a" + "*" * 20000, 131 | re.compile("^\\*+a\\*+$")), 132 | } 133 | 134 | whitespace_re = re.compile('/s+/') 135 | 136 | def run_pathological(q, inp): 137 | q.put(cmark.to_html(inp)) 138 | 139 | def run_pathological_cmark(q, inp): 140 | q.put(cmark.to_commonmark(inp)) 141 | 142 | def run_tests(): 143 | q = multiprocessing.Queue() 144 | passed = [] 145 | errored = [] 146 | failed = [] 147 | ignored = [] 148 | 149 | print("Testing pathological cases:") 150 | for description in (*pathological, *pathological_cmark): 151 | if description in pathological: 152 | (inp, regex) = pathological[description] 153 | p = multiprocessing.Process(target=run_pathological, 154 | args=(q, inp)) 155 | else: 156 | (inp, regex) = pathological_cmark[description] 157 | p = multiprocessing.Process(target=run_pathological_cmark, 158 | args=(q, inp)) 159 | p.start() 160 | try: 161 | # wait TIMEOUT seconds or until it finishes 162 | rc, actual, err = q.get(True, TIMEOUT) 163 | p.join() 164 | if rc != 0: 165 | print(description, '[ERRORED (return code %d)]' %rc) 166 | print(err) 167 | if description in allowed_failures: 168 | ignored.append(description) 169 | else: 170 | errored.append(description) 171 | elif regex.search(actual): 172 | print(description, '[PASSED]') 173 | passed.append(description) 174 | else: 175 | print(description, '[FAILED]') 176 | print(repr(actual[:60])) 177 | if description in allowed_failures: 178 | ignored.append(description) 179 | else: 180 | failed.append(description) 181 | except queue.Empty: 182 | p.terminate() 183 | p.join() 184 | print(description, '[TIMEOUT]') 185 | if description in allowed_failures: 186 | ignored.append(description) 187 | else: 188 | errored.append(description) 189 | 190 | print("%d passed, %d failed, %d errored" % 191 | (len(passed), len(failed), len(errored))) 192 | if ignored: 193 | print("Ignoring these allowed failures:") 194 | for x in ignored: 195 | print(x) 196 | if failed or errored: 197 | exit(1) 198 | else: 199 | exit(0) 200 | 201 | if __name__ == "__main__": 202 | run_tests() 203 | -------------------------------------------------------------------------------- /test/regression.txt: -------------------------------------------------------------------------------- 1 | ### Regression tests 2 | 3 | Issue #113: EOL character weirdness on Windows 4 | (Important: first line ends with CR + CR + LF) 5 | 6 | ```````````````````````````````` example 7 | line1 8 | line2 9 | . 10 |

line1

11 |

line2

12 | ```````````````````````````````` 13 | 14 | Issue #114: cmark skipping first character in line 15 | (Important: the blank lines around "Repeatedly" contain a tab.) 16 | 17 | ```````````````````````````````` example 18 | By taking it apart 19 | 20 | - alternative solutions 21 | → 22 | Repeatedly solving 23 | → 24 | - how techniques 25 | . 26 |

By taking it apart

27 |
    28 |
  • alternative solutions
  • 29 |
30 |

Repeatedly solving

31 |
    32 |
  • how techniques
  • 33 |
34 | ```````````````````````````````` 35 | 36 | Issue jgm/CommonMark#430: h2..h6 not recognized as block tags. 37 | 38 | ```````````````````````````````` example 39 |

lorem

40 | 41 |

lorem

42 | 43 |

lorem

44 | 45 |

lorem

46 | 47 |
lorem
48 | 49 |
lorem
50 | . 51 |

lorem

52 |

lorem

53 |

lorem

54 |

lorem

55 |
lorem
56 |
lorem
57 | ```````````````````````````````` 58 | 59 | Issue jgm/commonmark.js#109 - tabs after setext header line 60 | 61 | 62 | ```````````````````````````````` example 63 | hi 64 | --→ 65 | . 66 |

hi

67 | ```````````````````````````````` 68 | 69 | Issue #177 - incorrect emphasis parsing 70 | 71 | ```````````````````````````````` example 72 | a***b* c* 73 | . 74 |

a*b c

75 | ```````````````````````````````` 76 | 77 | Issue #193 - unescaped left angle brackets in link destination 78 | 79 | ```````````````````````````````` example 80 | [a] 81 | 82 | [a]: 83 | . 84 |

[a]

85 |

[a]: <te

86 | ```````````````````````````````` 87 | 88 | Issue #192 - escaped spaces in link destination 89 | 90 | 91 | ```````````````````````````````` example 92 | [a](te\ st) 93 | . 94 |

[a](te\ st)

95 | ```````````````````````````````` 96 | 97 | Issue #527 - meta tags in inline contexts 98 | 99 | ```````````````````````````````` example 100 | City: 101 | 102 | 103 | 104 | . 105 |

City: 106 | 107 | 108 |

109 | ```````````````````````````````` 110 | 111 | Issue #530 - link parsing corner cases 112 | 113 | ```````````````````````````````` example 114 | [a](\ b) 115 | 116 | [a](<[a](\ b)

122 |

[a](<<b)

123 |

[a](<b 124 | )

125 | ```````````````````````````````` 126 | 127 | Issue commonmark#526 - unescaped ( in link title 128 | 129 | ```````````````````````````````` example 130 | [link](url ((title)) 131 | . 132 |

[link](url ((title))

133 | ```````````````````````````````` 134 | 135 | Issue commonamrk#517 - script, pre, style close tag without 136 | opener. 137 | 138 | ```````````````````````````````` example 139 | 140 | 141 | 142 | 143 | 144 | . 145 | 146 | 147 | 148 | ```````````````````````````````` 149 | 150 | Issue #289. 151 | 152 | ```````````````````````````````` example 153 | [a]( 154 | . 155 |

[a](<b) c>

156 | ```````````````````````````````` 157 | 158 | Issue #334 - UTF-8 BOM 159 | 160 | ```````````````````````````````` example 161 | # Hi 162 | . 163 |

Hi

164 | ```````````````````````````````` 165 | 166 | Issue commonmark.js#213 - type 7 blocks can't interrupt 167 | paragraph 168 | 169 | ```````````````````````````````` example 170 | - 174 | . 175 |
    176 |
  • 177 |
  • 182 |
183 | ```````````````````````````````` 184 | 185 | Issue #383 - emphasis parsing. 186 | 187 | ```````````````````````````````` example 188 | *****Hello*world**** 189 | . 190 |

**Helloworld

191 | ```````````````````````````````` 192 | 193 | Issue #424 - emphasis before links 194 | 195 | ```````````````````````````````` example 196 | *text* [link](#section) 197 | . 198 |

text link

199 | ```````````````````````````````` 200 | 201 | ` 204 | . 205 | 206 | ```````````````````````````````` 207 | 208 | Declarations don't need spaces, according to the spec 209 | ```````````````````````````````` example 210 | x 211 | . 212 |

x

213 | ```````````````````````````````` 214 | 215 | An underscore that is not part of a delimiter should not prevent another 216 | pair of underscores from forming part of their own. 217 | ```````````````````````````````` example 218 | __!_!__ 219 | 220 | __!x!__ 221 | 222 | **!*!** 223 | 224 | --- 225 | 226 | _*__*_* 227 | 228 | _*xx*_* 229 | 230 | _*__-_- 231 | 232 | _*xx-_- 233 | . 234 |

!_!

235 |

!x!

236 |

!*!

237 |
238 |

__*

239 |

xx*

240 |

*__--

241 |

*xx--

242 | ```````````````````````````````` 243 | 244 | commonmark.js #277: 245 | ```````````````````````````````` example 246 | ```language-r 247 | x <- 1 248 | ``` 249 | 250 | ```r 251 | x <- 1 252 | ``` 253 | . 254 |
x <- 1
255 | 
256 |
x <- 1
257 | 
258 | ```````````````````````````````` 259 | 260 | https://github.com/commonmark/commonmark.js/issues/283 261 | ```````````````````````````````` example 262 | x 263 | 264 | x 265 | . 266 |

x

267 |

x<!>

268 | ```````````````````````````````` 269 | 270 | Case fold test 271 | ```````````````````````````````` example 272 | [link][µÓĐĹŠƆƦǏǶȜɆΓΨϪЇЛЯҎҶӞԆԮՄႠႴᏸᲕᲩᲿḦṎṶẚỂỪἙἿὭᾑᾥᾼῢΩↃⓉⰍⰡⱩⲔⲼⳫꙢꜢꝌꝽꞪꟇꭾꮒꮦꮺCW𐐐𐐤𐓀𐕰𐖅𐲅𐲙𐲭𑢮𖹂𖹖𞤊𞤞] 273 | 274 | [μóđĺšɔʀǐƕȝɇγψϫїляҏҷӟԇԯմⴀⴔᏰვჩჿḧṏṷaʾểừἑἷὥἡιὥιαιῢωↄⓣⰽⱑⱪⲕⲽⳬꙣꜣꝍᵹɦꟈᎮᏂᏖᏪcw𐐸𐑌𐓨𐖗𐖬𐳅𐳙𐳭𑣎𖹢𖹶𞤬𞥀]: /url 275 | . 276 |

link

277 | ```````````````````````````````` 278 | 279 | https://github.com/commonmark/cmark/issues/548 280 | ```````````````````````````````` example 281 | (￾￿) 282 | . 283 |

(￾￿)

284 | ```````````````````````````````` 285 | -------------------------------------------------------------------------------- /test/roundtrip_tests.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | from spec_tests import get_tests, do_test 4 | from cmark import CMark 5 | import argparse 6 | 7 | parser = argparse.ArgumentParser(description='Run cmark roundtrip tests.') 8 | parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, 9 | help='program to test') 10 | parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', 11 | help='path to spec') 12 | parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', 13 | default=None, help='limit to sections matching regex pattern') 14 | parser.add_argument('--library-dir', dest='library_dir', nargs='?', 15 | default=None, help='directory containing dynamic library') 16 | parser.add_argument('--no-normalize', dest='normalize', 17 | action='store_const', const=False, default=True, 18 | help='do not normalize HTML') 19 | parser.add_argument('-n', '--number', type=int, default=None, 20 | help='only consider the test with the given number') 21 | args = parser.parse_args(sys.argv[1:]) 22 | 23 | spec = sys.argv[1] 24 | 25 | def converter(md): 26 | cmark = CMark(prog=args.program, library_dir=args.library_dir) 27 | [ec, result, err] = cmark.to_commonmark(md) 28 | if ec == 0: 29 | [ec, html, err] = cmark.to_html(result) 30 | if ec == 0: 31 | # In the commonmark writer we insert dummy HTML 32 | # comments between lists, and between lists and code 33 | # blocks. Strip these out, since the spec uses 34 | # two blank lines instead: 35 | return [ec, re.sub('\n', '', html), ''] 36 | else: 37 | return [ec, html, err] 38 | else: 39 | return [ec, result, err] 40 | 41 | tests = get_tests(args.spec) 42 | result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': 0} 43 | for test in tests: 44 | do_test(converter, test, args.normalize, result_counts) 45 | 46 | exit(result_counts['fail'] + result_counts['error']) 47 | -------------------------------------------------------------------------------- /test/smart_punct.txt: -------------------------------------------------------------------------------- 1 | ## Smart punctuation 2 | 3 | Open quotes are matched with closed quotes. 4 | The same method is used for matching openers and closers 5 | as is used in emphasis parsing: 6 | 7 | ```````````````````````````````` example 8 | "Hello," said the spider. 9 | "'Shelob' is my name." 10 | . 11 |

“Hello,” said the spider. 12 | “‘Shelob’ is my name.”

13 | ```````````````````````````````` 14 | 15 | ```````````````````````````````` example 16 | 'A', 'B', and 'C' are letters. 17 | . 18 |

‘A’, ‘B’, and ‘C’ are letters.

19 | ```````````````````````````````` 20 | 21 | ```````````````````````````````` example 22 | 'Oak,' 'elm,' and 'beech' are names of trees. 23 | So is 'pine.' 24 | . 25 |

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. 26 | So is ‘pine.’

27 | ```````````````````````````````` 28 | 29 | ```````````````````````````````` example 30 | 'He said, "I want to go."' 31 | . 32 |

‘He said, “I want to go.”’

33 | ```````````````````````````````` 34 | 35 | A single quote that isn't an open quote matched 36 | with a close quote will be treated as an 37 | apostrophe: 38 | 39 | ```````````````````````````````` example 40 | Were you alive in the 70's? 41 | . 42 |

Were you alive in the 70’s?

43 | ```````````````````````````````` 44 | 45 | ```````````````````````````````` example 46 | Here is some quoted '`code`' and a "[quoted link](url)". 47 | . 48 |

Here is some quoted ‘code’ and a “quoted link”.

49 | ```````````````````````````````` 50 | 51 | Here the first `'` is treated as an apostrophe, not 52 | an open quote, because the final single quote is matched 53 | by the single quote before `jolly`: 54 | 55 | ```````````````````````````````` example 56 | 'tis the season to be 'jolly' 57 | . 58 |

’tis the season to be ‘jolly’

59 | ```````````````````````````````` 60 | 61 | Multiple apostrophes should not be marked as open/closing quotes. 62 | 63 | ```````````````````````````````` example 64 | 'We'll use Jane's boat and John's truck,' Jenna said. 65 | . 66 |

‘We’ll use Jane’s boat and John’s truck,’ Jenna said.

67 | ```````````````````````````````` 68 | 69 | An unmatched double quote will be interpreted as a 70 | left double quote, to facilitate this style: 71 | 72 | ```````````````````````````````` example 73 | "A paragraph with no closing quote. 74 | 75 | "Second paragraph by same speaker, in fiction." 76 | . 77 |

“A paragraph with no closing quote.

78 |

“Second paragraph by same speaker, in fiction.”

79 | ```````````````````````````````` 80 | 81 | A quote following a `]` or `)` character cannot 82 | be an open quote: 83 | 84 | ```````````````````````````````` example 85 | [a]'s b' 86 | . 87 |

[a]’s b’

88 | ```````````````````````````````` 89 | 90 | Quotes that are escaped come out as literal straight 91 | quotes: 92 | 93 | ```````````````````````````````` example 94 | \"This is not smart.\" 95 | This isn\'t either. 96 | 5\'8\" 97 | . 98 |

"This is not smart." 99 | This isn't either. 100 | 5'8"

101 | ```````````````````````````````` 102 | 103 | Two hyphens form an en-dash, three an em-dash. 104 | 105 | ```````````````````````````````` example 106 | Some dashes: em---em 107 | en--en 108 | em --- em 109 | en -- en 110 | 2--3 111 | . 112 |

Some dashes: em—em 113 | en–en 114 | em — em 115 | en – en 116 | 2–3

117 | ```````````````````````````````` 118 | 119 | A sequence of more than three hyphens is 120 | parsed as a sequence of em and/or en dashes, 121 | with no hyphens. If possible, a homogeneous 122 | sequence of dashes is used (so, 10 hyphens 123 | = 5 en dashes, and 9 hyphens = 3 em dashes). 124 | When a heterogeneous sequence must be used, 125 | the em dashes come first, followed by the en 126 | dashes, and as few en dashes as possible are 127 | used (so, 7 hyphens = 2 em dashes an 1 en 128 | dash). 129 | 130 | ```````````````````````````````` example 131 | one- 132 | two-- 133 | three--- 134 | four---- 135 | five----- 136 | six------ 137 | seven------- 138 | eight-------- 139 | nine--------- 140 | thirteen-------------. 141 | . 142 |

one- 143 | two– 144 | three— 145 | four–– 146 | five—– 147 | six—— 148 | seven—–– 149 | eight–––– 150 | nine——— 151 | thirteen———––.

152 | ```````````````````````````````` 153 | 154 | Hyphens can be escaped: 155 | 156 | ```````````````````````````````` example 157 | Escaped hyphens: \-- \-\-\-. 158 | . 159 |

Escaped hyphens: -- ---.

160 | ```````````````````````````````` 161 | 162 | Three periods form an ellipsis: 163 | 164 | ```````````````````````````````` example 165 | Ellipses...and...and.... 166 | . 167 |

Ellipses…and…and….

168 | ```````````````````````````````` 169 | 170 | Periods can be escaped if ellipsis-formation 171 | is not wanted: 172 | 173 | ```````````````````````````````` example 174 | No ellipses\.\.\. 175 | . 176 |

No ellipses...

177 | ```````````````````````````````` 178 | -------------------------------------------------------------------------------- /test/spec_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | from difflib import unified_diff 6 | import argparse 7 | import re 8 | import json 9 | import os 10 | from cmark import CMark 11 | from normalize import normalize_html 12 | 13 | parser = argparse.ArgumentParser(description='Run cmark tests.') 14 | parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, 15 | help='program to test') 16 | parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', 17 | help='path to spec') 18 | parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', 19 | default=None, help='limit to sections matching regex pattern') 20 | parser.add_argument('--library-dir', dest='library_dir', nargs='?', 21 | default=None, help='directory containing dynamic library') 22 | parser.add_argument('--no-normalize', dest='normalize', 23 | action='store_const', const=False, default=True, 24 | help='do not normalize HTML') 25 | parser.add_argument('-d', '--dump-tests', dest='dump_tests', 26 | action='store_const', const=True, default=False, 27 | help='dump tests in JSON format') 28 | parser.add_argument('--debug-normalization', dest='debug_normalization', 29 | action='store_const', const=True, 30 | default=False, help='filter stdin through normalizer for testing') 31 | parser.add_argument('-n', '--number', type=int, default=None, 32 | help='only consider the test with the given number') 33 | parser.add_argument('--fuzz-corpus', 34 | help='convert test cases to fuzz corpus') 35 | args = parser.parse_args(sys.argv[1:]) 36 | 37 | def out(str): 38 | sys.stdout.buffer.write(str.encode('utf-8')) 39 | 40 | def print_test_header(headertext, example_number, start_line, end_line): 41 | out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext)) 42 | 43 | def do_test(converter, test, normalize, result_counts): 44 | [retcode, actual_html, err] = converter(test['markdown']) 45 | if retcode == 0: 46 | expected_html = test['html'] 47 | unicode_error = None 48 | if normalize: 49 | try: 50 | passed = normalize_html(actual_html) == normalize_html(expected_html) 51 | except UnicodeDecodeError as e: 52 | unicode_error = e 53 | passed = False 54 | else: 55 | passed = actual_html == expected_html 56 | if passed: 57 | result_counts['pass'] += 1 58 | else: 59 | print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) 60 | out(test['markdown'] + '\n') 61 | if unicode_error: 62 | out("Unicode error: " + str(unicode_error) + '\n') 63 | out("Expected: " + repr(expected_html) + '\n') 64 | out("Got: " + repr(actual_html) + '\n') 65 | else: 66 | expected_html_lines = expected_html.splitlines(True) 67 | actual_html_lines = actual_html.splitlines(True) 68 | for diffline in unified_diff(expected_html_lines, actual_html_lines, 69 | "expected HTML", "actual HTML"): 70 | out(diffline) 71 | out('\n') 72 | result_counts['fail'] += 1 73 | else: 74 | print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) 75 | out("program returned error code %d\n" % retcode) 76 | sys.stdout.buffer.write(err) 77 | result_counts['error'] += 1 78 | 79 | def get_tests(specfile): 80 | line_number = 0 81 | start_line = 0 82 | end_line = 0 83 | example_number = 0 84 | markdown_lines = [] 85 | html_lines = [] 86 | state = 0 # 0 regular text, 1 markdown example, 2 html output 87 | headertext = '' 88 | tests = [] 89 | 90 | header_re = re.compile('#+ ') 91 | 92 | with open(specfile, 'r', encoding='utf-8', newline='\n') as specf: 93 | for line in specf: 94 | line_number = line_number + 1 95 | l = line.strip() 96 | if l == "`" * 32 + " example": 97 | state = 1 98 | elif l == "`" * 32: 99 | state = 0 100 | example_number = example_number + 1 101 | end_line = line_number 102 | tests.append({ 103 | "markdown":''.join(markdown_lines).replace('→',"\t"), 104 | "html":''.join(html_lines).replace('→',"\t"), 105 | "example": example_number, 106 | "start_line": start_line, 107 | "end_line": end_line, 108 | "section": headertext}) 109 | start_line = 0 110 | markdown_lines = [] 111 | html_lines = [] 112 | elif l == ".": 113 | state = 2 114 | elif state == 1: 115 | if start_line == 0: 116 | start_line = line_number - 1 117 | markdown_lines.append(line) 118 | elif state == 2: 119 | html_lines.append(line) 120 | elif state == 0 and re.match(header_re, line): 121 | headertext = header_re.sub('', line).strip() 122 | return tests 123 | 124 | if __name__ == "__main__": 125 | if args.debug_normalization: 126 | out(normalize_html(sys.stdin.read())) 127 | exit(0) 128 | 129 | all_tests = get_tests(args.spec) 130 | 131 | if args.fuzz_corpus: 132 | i = 1 133 | base = os.path.basename(args.spec) 134 | (name, ext) = os.path.splitext(base) 135 | for test in all_tests: 136 | filename = os.path.join(args.fuzz_corpus, '%s.%d' % (name, i)) 137 | with open(filename, 'wb') as f: 138 | f.write(b'\0' * 8) # options header 139 | f.write(test['markdown'].encode()) 140 | i += 1 141 | exit(0) 142 | 143 | if args.pattern: 144 | pattern_re = re.compile(args.pattern, re.IGNORECASE) 145 | else: 146 | pattern_re = re.compile('.') 147 | tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ] 148 | if args.dump_tests: 149 | out(json.dumps(tests, ensure_ascii=False, indent=2)) 150 | exit(0) 151 | else: 152 | skipped = len(all_tests) - len(tests) 153 | converter = CMark(prog=args.program, library_dir=args.library_dir).to_html 154 | result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped} 155 | for test in tests: 156 | do_test(converter, test, args.normalize, result_counts) 157 | out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts)) 158 | exit(result_counts['fail'] + result_counts['error']) 159 | -------------------------------------------------------------------------------- /toolchain-mingw32.cmake: -------------------------------------------------------------------------------- 1 | # the name of the target operating system 2 | SET(CMAKE_SYSTEM_NAME Windows) 3 | 4 | # which compilers to use for C and C++ 5 | SET(CMAKE_C_COMPILER i686-w64-mingw32-gcc) 6 | SET(CMAKE_CXX_COMPILER i686-w64-mingw32-g++) 7 | SET(CMAKE_RC_COMPILER i686-w64-mingw32-windres) 8 | 9 | # here is the target environment located 10 | SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32/ "${CMAKE_SOURCE_DIR}/windows") 11 | 12 | # adjust the default behaviour of the FIND_XXX() commands: 13 | # search headers and libraries in the target environment, search 14 | # programs in the host environment 15 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 16 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 17 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 18 | -------------------------------------------------------------------------------- /tools/appveyor-build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | if "%MSVC_VERSION%" == "10" goto msvc10 4 | 5 | call "C:\Program Files (x86)\Microsoft Visual Studio %MSVC_VERSION%.0\VC\vcvarsall.bat" amd64 6 | goto build 7 | 8 | :msvc10 9 | call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 10 | 11 | :build 12 | nmake 13 | 14 | -------------------------------------------------------------------------------- /tools/make_case_fold_inc.py: -------------------------------------------------------------------------------- 1 | # Creates a C lookup table for Unicode case folding (https://unicode.org/Public/UCD/latest/ucd/CaseFolding.txt). 2 | # Usage: python3 tools/make_case_fold_inc.py < data/CaseFolding.txt > src/case_fold.inc 3 | 4 | import sys, re 5 | 6 | prog = re.compile('([0-9A-F]+); [CF];((?: [0-9A-F]+)+);') 7 | main_table = [] 8 | repl_table = [] 9 | repl_idx = 0 10 | test = '' 11 | test_result = '' 12 | 13 | for line in sys.stdin: 14 | m = prog.match(line) 15 | if m is None: 16 | continue 17 | 18 | cp = int(m[1], 16); 19 | if cp < 0x80: 20 | continue 21 | 22 | repl = b'' 23 | for x in m[2].split(): 24 | repl += chr(int(x, 16)).encode('UTF-8') 25 | 26 | # Generate test case 27 | if len(main_table) % 20 == 0: 28 | test += chr(cp) 29 | test_result += repl.decode('UTF-8') 30 | 31 | # 17 bits for code point 32 | if cp >= (1 << 17): 33 | raise Exception("code point too large") 34 | 35 | # 12 bits for upper bits of replacement index 36 | # The lowest bit is always zero. 37 | if repl_idx // 2 >= (1 << 12): 38 | raise Exception("too many replacements") 39 | 40 | # 3 bits for size of replacement 41 | repl_size = len(repl) 42 | if repl_size >= (1 << 3): 43 | raise Exception("too many replacement chars") 44 | 45 | main_table += [ cp | repl_idx // 2 << 17 | repl_size << 29 ] 46 | repl_table += repl 47 | repl_idx += repl_size 48 | 49 | # Make sure that repl_idx is even 50 | if repl_idx % 2 != 0: 51 | repl_table += [0] 52 | repl_idx += 1 53 | 54 | # Print test case 55 | if False: 56 | print("test:", test) 57 | print("test_result:", test_result) 58 | sys.exit(0) 59 | 60 | print("""// Generated by tools/make_case_fold_inc.py 61 | 62 | #define CF_MAX (1 << 17) 63 | #define CF_TABLE_SIZE %d 64 | #define CF_CODE_POINT(x) ((x) & 0x1FFFF) 65 | #define CF_REPL_IDX(x) ((((x) >> 17) & 0xFFF) * 2) 66 | #define CF_REPL_SIZE(x) ((x) >> 29) 67 | 68 | static const uint32_t cf_table[%d] = {""" % (len(main_table), len(main_table))) 69 | 70 | i = 0 71 | size = len(main_table) 72 | for value in main_table: 73 | if i % 6 == 0: 74 | print(" ", end="") 75 | print("0x%X" % value, end="") 76 | i += 1 77 | if i == size: print() 78 | elif i % 6 == 0: print(",") 79 | else: print(", ", end="") 80 | 81 | print("""}; 82 | 83 | static const unsigned char cf_repl[%d] = {""" % len(repl_table)) 84 | 85 | i = 0 86 | size = len(repl_table) 87 | for value in repl_table: 88 | if i % 12 == 0: 89 | print(" ", end="") 90 | print("0x%02X" % value, end="") 91 | i += 1 92 | if i == size: print() 93 | elif i % 12 == 0: print(",") 94 | else: print(", ", end="") 95 | 96 | print("};") 97 | -------------------------------------------------------------------------------- /tools/make_entities_inc.py: -------------------------------------------------------------------------------- 1 | # Creates C data structures for binary lookup table of entities, 2 | # using python's html5 entity data. 3 | # Usage: python3 tools/make_entities_inc.py > src/entities.inc 4 | 5 | import html 6 | 7 | entities5 = html.entities.html5 8 | 9 | # Remove keys without semicolons. HTML5 allows some named character 10 | # references without a trailing semicolon. 11 | entities = sorted([(k[:-1], entities5[k]) for k in entities5.keys() if k[-1] == ';']) 12 | 13 | main_table = [] 14 | text_table = b'' 15 | text_idx = 0 16 | 17 | for (ent, repl) in entities: 18 | ent_bytes = ent.encode('UTF-8') 19 | ent_size = len(ent_bytes) 20 | repl_bytes = repl.encode('UTF-8') 21 | repl_size = len(repl_bytes) 22 | 23 | if text_idx >= (1 << 15): 24 | raise Exception("text index too large") 25 | if ent_size >= (1 << 5): 26 | raise Exception("entity name too long") 27 | if repl_size >= (1 << 3): 28 | raise Exception("entity replacement too long") 29 | 30 | main_table += [ text_idx | ent_size << 15 | repl_size << 20 ] 31 | 32 | text_table += ent_bytes + repl_bytes 33 | text_idx += ent_size + repl_size 34 | 35 | print("""/* Autogenerated by tools/make_headers_inc.py */ 36 | 37 | #define ENT_MIN_LENGTH 2 38 | #define ENT_MAX_LENGTH 32 39 | #define ENT_TABLE_SIZE %d 40 | #define ENT_TEXT_IDX(x) ((x) & 0x7FFF) 41 | #define ENT_NAME_SIZE(x) (((x) >> 15) & 0x1F) 42 | #define ENT_REPL_SIZE(x) ((x) >> 20) 43 | 44 | static const uint32_t cmark_entities[%d] = {""" % (len(main_table), len(main_table))); 45 | 46 | i = 0 47 | size = len(main_table) 48 | for value in main_table: 49 | if i % 6 == 0: 50 | print(" ", end="") 51 | print("0x%X" % value, end="") 52 | i += 1 53 | if i == size: print() 54 | elif i % 6 == 0: print(",") 55 | else: print(", ", end="") 56 | 57 | print("""}; 58 | 59 | static const unsigned char cmark_entity_text[%d] = {""" % len(text_table)) 60 | 61 | i = 0 62 | size = len(text_table) 63 | for value in text_table: 64 | if i % 12 == 0: 65 | print(" ", end="") 66 | print("0x%02X" % value, end="") 67 | i += 1 68 | if i == size: print() 69 | elif i % 12 == 0: print(",") 70 | else: print(", ", end="") 71 | 72 | print("};") 73 | -------------------------------------------------------------------------------- /why-cmark-and-not-x.md: -------------------------------------------------------------------------------- 1 | Why use `cmark` and not X? 2 | ========================== 3 | 4 | `hoedown` 5 | --------- 6 | 7 | `hoedown` (which derives from `sundown`) is slightly faster 8 | than `cmark` in our benchmarks (0.21s vs. 0.29s). But both 9 | are much faster than any other available implementations. 10 | 11 | `hoedown` boasts of including "protection against all possible 12 | DOS attacks," but there are some chinks in the armor: 13 | 14 | % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | cmark 15 | ... 16 | user 0m0.073s 17 | % time python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' | hoedown 18 | ... 19 | 0m17.84s 20 | 21 | `hoedown` has many parsing bugs. Here is a selection (as of 22 | v3.0.3): 23 | 24 | % hoedown 25 | - one 26 | - two 27 | 1. three 28 | ^D 29 |
    30 |
  • one 31 | 32 |
      33 |
    • two
    • 34 |
    • three
    • 35 |
  • 36 |
37 | 38 | 39 | % hoedown 40 | ## hi\### 41 | ^D 42 |

hi\

43 | 44 | 45 | % hoedown 46 | [ΑΓΩ]: /φου 47 | 48 | [αγω] 49 | ^D 50 |

[αγω]

51 | 52 | 53 | % hoedown 54 | ``` 55 | [foo]: /url 56 | ``` 57 | 58 | [foo] 59 | ^D 60 |

```

61 | 62 |

```

63 | 64 |

foo

65 | 66 | 67 | % hoedown 68 | [foo](url "ti\*tle") 69 | ^D 70 |

foo

71 | 72 | 73 | % ./hoedown 74 | - one 75 | - two 76 | - three 77 | - four 78 | ^D 79 |
    80 |
  • one 81 | 82 |
      83 |
    • two
    • 84 |
    • three
    • 85 |
    • four
    • 86 |
  • 87 |
88 | 89 | 90 | `discount` 91 | ---------- 92 | 93 | `cmark` is about six times faster. 94 | 95 | `kramdown` 96 | ---------- 97 | 98 | `cmark` is about a hundred times faster. 99 | 100 | `kramdown` also gets tied in knots by pathological input like 101 | 102 | python -c 'print(("[" * 50000) + "a" + ("]" * 50000))' 103 | 104 | 105 | -------------------------------------------------------------------------------- /wrappers/wrapper.php: -------------------------------------------------------------------------------- 1 | cmark_markdown_to_html($markdown, strlen($markdown), 0); 11 | $html = FFI::string($pointerReturn); 12 | FFI::free($pointerReturn); 13 | 14 | return $html; 15 | } 16 | 17 | $markdown = <<<'md' 18 | # First level title 19 | 20 | ## Second level title 21 | 22 | Paragraph 23 | 24 | md; 25 | 26 | echo markdownToHtml($markdown) . PHP_EOL; 27 | -------------------------------------------------------------------------------- /wrappers/wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Example for using the shared library from python 4 | # Will work with either python 2 or python 3 5 | # Requires cmark library to be installed 6 | 7 | from ctypes import * 8 | import sys 9 | import platform 10 | 11 | sysname = platform.system() 12 | 13 | if sysname == 'Darwin': 14 | libname = "libcmark.dylib" 15 | elif sysname == 'Windows': 16 | libname = "cmark.dll" 17 | else: 18 | libname = "libcmark.so" 19 | cmark = CDLL(libname) 20 | 21 | class cmark_mem(Structure): 22 | _fields_ = [("calloc", c_void_p), 23 | ("realloc", c_void_p), 24 | ("free", CFUNCTYPE(None, c_void_p))] 25 | 26 | get_alloc = cmark.cmark_get_default_mem_allocator 27 | get_alloc.restype = POINTER(cmark_mem) 28 | free_func = get_alloc().contents.free 29 | 30 | markdown = cmark.cmark_markdown_to_html 31 | markdown.restype = POINTER(c_char) 32 | markdown.argtypes = [c_char_p, c_size_t, c_int] 33 | 34 | opts = 0 # defaults 35 | 36 | def md2html(text): 37 | text = text.encode('utf-8') 38 | cstring = markdown(text, len(text), opts) 39 | result = string_at(cstring).decode('utf-8') 40 | free_func(cstring) 41 | return result 42 | 43 | sys.stdout.write(md2html(sys.stdin.read())) 44 | -------------------------------------------------------------------------------- /wrappers/wrapper.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require 'ffi' 3 | 4 | module CMark 5 | extend FFI::Library 6 | 7 | class Mem < FFI::Struct 8 | layout :calloc, :pointer, 9 | :realloc, :pointer, 10 | :free, callback([:pointer], :void) 11 | end 12 | 13 | ffi_lib ['libcmark', 'cmark'] 14 | attach_function :cmark_get_default_mem_allocator, [], :pointer 15 | attach_function :cmark_markdown_to_html, [:string, :size_t, :int], :pointer 16 | end 17 | 18 | def markdown_to_html(s) 19 | len = s.bytesize 20 | cstring = CMark::cmark_markdown_to_html(s, len, 0) 21 | result = cstring.get_string(0) 22 | mem = CMark::cmark_get_default_mem_allocator 23 | CMark::Mem.new(mem)[:free].call(cstring) 24 | result 25 | end 26 | 27 | STDOUT.write(markdown_to_html(ARGF.read())) 28 | -------------------------------------------------------------------------------- /wrappers/wrapper.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; requires racket >= 5.3 because of submodules 4 | 5 | ;; Lowlevel interface 6 | 7 | (module low-level racket/base 8 | 9 | (require ffi/unsafe ffi/unsafe/define) 10 | 11 | (provide (all-defined-out)) 12 | 13 | (define-ffi-definer defcmark (ffi-lib "libcmark")) 14 | 15 | (define _cmark_node_type 16 | (_enum '(;; Error status 17 | none 18 | ;; Block 19 | document block-quote list item code-block 20 | html-block custom-block 21 | paragraph heading thematic-break 22 | ;; ?? first-block = document 23 | ;; ?? last-block = thematic-break 24 | ;; Inline 25 | text softbreak linebreak code html-inline custom-inline 26 | emph strong link image 27 | ;; ?? first-inline = text 28 | ;; ?? last-inline = image 29 | ))) 30 | (define _cmark_list_type 31 | (_enum '(no_list bullet_list ordered_list))) 32 | (define _cmark_delim_type 33 | (_enum '(no_delim period_delim paren_delim))) 34 | (define _cmark_opts 35 | (let ([opts '([sourcepos 1] ; include sourcepos attribute on block elements 36 | [hardbreaks 2] ; render `softbreak` elements as hard line breaks 37 | [safe 3] ; defined here for API compatibility (on by default) 38 | [unsafe 17] ; render raw HTML and unsafe links 39 | [nobreaks 4] ; render `softbreak` elements as spaces 40 | [normalize 8] ; legacy (no effect) 41 | [validate-utf8 9] ; validate UTF-8 in the input 42 | [smart 10] ; straight quotes to curly, ---/-- to em/en dashes 43 | )]) 44 | (_bitmask (apply append (map (λ(o) `(,(car o) = ,(expt 2 (cadr o)))) 45 | opts))))) 46 | 47 | (define-cpointer-type _node) 48 | 49 | (defcmark cmark_markdown_to_html 50 | (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts 51 | -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) 52 | 53 | (defcmark cmark_parse_document 54 | (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts 55 | -> _node)) 56 | 57 | (defcmark cmark_render_html 58 | (_fun _node _cmark_opts 59 | -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r)))) 60 | 61 | (defcmark cmark_node_new (_fun _cmark_node_type -> _node)) 62 | (defcmark cmark_node_free (_fun _node -> _void)) 63 | 64 | (defcmark cmark_node_next (_fun _node -> _node/null)) 65 | (defcmark cmark_node_previous (_fun _node -> _node/null)) 66 | (defcmark cmark_node_parent (_fun _node -> _node/null)) 67 | (defcmark cmark_node_first_child (_fun _node -> _node/null)) 68 | (defcmark cmark_node_last_child (_fun _node -> _node/null)) 69 | 70 | (defcmark cmark_node_get_user_data (_fun _node -> _racket)) 71 | (defcmark cmark_node_set_user_data (_fun _node _racket -> _bool)) 72 | (defcmark cmark_node_get_type (_fun _node -> _cmark_node_type)) 73 | (defcmark cmark_node_get_type_string (_fun _node -> _bytes)) 74 | (defcmark cmark_node_get_literal (_fun _node -> _string)) 75 | (defcmark cmark_node_set_literal (_fun _node _string -> _bool)) 76 | (defcmark cmark_node_get_heading_level (_fun _node -> _int)) 77 | (defcmark cmark_node_set_heading_level (_fun _node _int -> _bool)) 78 | (defcmark cmark_node_get_list_type (_fun _node -> _cmark_list_type)) 79 | (defcmark cmark_node_set_list_type (_fun _node _cmark_list_type -> _bool)) 80 | (defcmark cmark_node_get_list_delim (_fun _node -> _cmark_delim_type)) 81 | (defcmark cmark_node_set_list_delim (_fun _node _cmark_delim_type -> _bool)) 82 | (defcmark cmark_node_get_list_start (_fun _node -> _int)) 83 | (defcmark cmark_node_set_list_start (_fun _node _int -> _bool)) 84 | (defcmark cmark_node_get_list_tight (_fun _node -> _bool)) 85 | (defcmark cmark_node_set_list_tight (_fun _node _bool -> _bool)) 86 | (defcmark cmark_node_get_fence_info (_fun _node -> _string)) 87 | (defcmark cmark_node_set_fence_info (_fun _node _string -> _bool)) 88 | (defcmark cmark_node_get_url (_fun _node -> _string)) 89 | (defcmark cmark_node_set_url (_fun _node _string -> _bool)) 90 | (defcmark cmark_node_get_title (_fun _node -> _string)) 91 | (defcmark cmark_node_set_title (_fun _node _string -> _bool)) 92 | (defcmark cmark_node_get_start_line (_fun _node -> _int)) 93 | (defcmark cmark_node_get_start_column (_fun _node -> _int)) 94 | (defcmark cmark_node_get_end_line (_fun _node -> _int)) 95 | (defcmark cmark_node_get_end_column (_fun _node -> _int)) 96 | 97 | (defcmark cmark_node_unlink (_fun _node -> _void)) 98 | (defcmark cmark_node_insert_before (_fun _node _node -> _bool)) 99 | (defcmark cmark_node_insert_after (_fun _node _node -> _bool)) 100 | (defcmark cmark_node_prepend_child (_fun _node _node -> _bool)) 101 | (defcmark cmark_node_append_child (_fun _node _node -> _bool)) 102 | (defcmark cmark_consolidate_text_nodes (_fun _node -> _void)) 103 | 104 | (defcmark cmark_version (_fun -> _int)) 105 | (defcmark cmark_version_string (_fun -> _string)) 106 | 107 | ) 108 | 109 | ;; Rackety interface 110 | 111 | (module high-level racket/base 112 | 113 | (require (submod ".." low-level) ffi/unsafe) 114 | 115 | (provide cmark-markdown-to-html) 116 | (define (cmark-markdown-to-html str [options '(normalize smart)]) 117 | (cmark_markdown_to_html (if (bytes? str) str (string->bytes/utf-8 str)) 118 | options)) 119 | 120 | (require (for-syntax racket/base racket/syntax)) 121 | (define-syntax (make-getter+setter stx) 122 | (syntax-case stx () 123 | [(_ name) (with-syntax ([(getter setter) 124 | (map (λ(op) (format-id #'name "cmark_node_~a_~a" 125 | op #'name)) 126 | '(get set))]) 127 | #'(cons getter setter))])) 128 | (define-syntax-rule (define-getters+setters name [type field ...] ...) 129 | (define name (list (list 'type (make-getter+setter field) ...) ...))) 130 | (define-getters+setters getters+setters 131 | [heading heading_level] [code-block fence_info] 132 | [link url title] [image url title] 133 | [list list_type list_delim list_start list_tight]) 134 | 135 | (provide cmark->sexpr) 136 | (define (cmark->sexpr node) 137 | (define text (cmark_node_get_literal node)) 138 | (define type (cmark_node_get_type node)) 139 | (define children 140 | (let loop ([node (cmark_node_first_child node)]) 141 | (if (not node) '() 142 | (cons (cmark->sexpr node) (loop (cmark_node_next node)))))) 143 | (define info 144 | (cond [(assq type getters+setters) 145 | => (λ(gss) (map (λ(gs) ((car gs) node)) (cdr gss)))] 146 | [else '()])) 147 | (define (assert-no what-not b) 148 | (when b (error 'cmark->sexpr "unexpected ~a in ~s" what-not type))) 149 | (cond [(memq type '(document paragraph heading block-quote list item 150 | emph strong link image)) 151 | (assert-no 'text text) 152 | (list type info children)] 153 | [(memq type '(text code code-block html-block html-inline 154 | softbreak linebreak thematic-break)) 155 | (assert-no 'children (pair? children)) 156 | (list type info text)] 157 | [else (error 'cmark->sexpr "unknown type: ~s" type)])) 158 | 159 | (provide sexpr->cmark) 160 | (define (sexpr->cmark sexpr) ; assumes valid input, as generated by the above 161 | (define (loop sexpr) 162 | (define type (car sexpr)) 163 | (define info (cadr sexpr)) 164 | (define data (caddr sexpr)) 165 | (define node (cmark_node_new type)) 166 | (let ([gss (assq type getters+setters)]) 167 | (when gss 168 | (unless (= (length (cdr gss)) (length info)) 169 | (error 'sexpr->cmark "bad number of info values in ~s" sexpr)) 170 | (for-each (λ(gs x) ((cdr gs) node x)) (cdr gss) info))) 171 | (cond [(string? data) (cmark_node_set_literal node data)] 172 | [(not data) (void)] 173 | [(list? data) 174 | (for ([child (in-list data)]) 175 | (cmark_node_append_child node (sexpr->cmark child)))] 176 | [else (error 'sexpr->cmark "bad data in ~s" sexpr)]) 177 | node) 178 | (define root (loop sexpr)) 179 | (register-finalizer root cmark_node_free) 180 | root) 181 | 182 | ;; Registers a `cmark_node_free` finalizer 183 | (provide cmark-parse-document) 184 | (define (cmark-parse-document str [options '(normalize smart)]) 185 | (define root (cmark_parse_document 186 | (if (bytes? str) str (string->bytes/utf-8 str)) 187 | options)) 188 | (register-finalizer root cmark_node_free) 189 | root) 190 | 191 | (provide cmark-render-html) 192 | (define (cmark-render-html root [options '(normalize smart)]) 193 | (cmark_render_html root options))) 194 | 195 | #; ;; sample use 196 | (begin 197 | (require 'high-level racket/string) 198 | (cmark-render-html 199 | (cmark-parse-document 200 | (string-join '("foo" 201 | "===" 202 | "" 203 | "> blah" 204 | ">" 205 | "> blah *blah* `bar()` blah:" 206 | ">" 207 | "> function foo() {" 208 | "> bar();" 209 | "> }") 210 | "\n")))) 211 | --------------------------------------------------------------------------------