├── debian ├── compat ├── source │ └── format ├── rules ├── control ├── copyright └── changelog ├── .gitattributes ├── .npmignore ├── Dockerfile ├── .gitignore ├── html_block_names.txt ├── sundown.def ├── src ├── html_entities.h ├── stack.h ├── stack.c ├── autolink.h ├── buffer.h ├── html_entities.gperf ├── buffer.c ├── markdown.h ├── html_blocks.h ├── autolink.c └── markdown.c ├── package.json ├── SECURITY.md ├── snudown.d.ts ├── header.js ├── .github └── workflows │ └── ci.yml ├── footer.js ├── html ├── houdini.h ├── html.h ├── houdini_html_e.c ├── houdini_href_e.c ├── html_smartypants.c └── html.c ├── README.markdown ├── snudown.c └── test_snudown.js /debian/compat: -------------------------------------------------------------------------------- 1 | 7 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.sh text eol=lf 2 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (native) 2 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | * 2 | !dist/*.js 3 | !dist/*.ts 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM emscripten/emsdk:3.1.33 2 | 3 | RUN apt-get update 4 | RUN apt-get install gperf 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | node_modules/ 4 | .idea/ 5 | snudown.egg-info/ 6 | *.pyc 7 | *.so 8 | *.so.* 9 | *.o 10 | 11 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This file was automatically generated by stdeb 0.6.0+git at 4 | # Wed, 16 Nov 2011 10:36:53 -0800 5 | 6 | %: 7 | dh $@ --with python2 --buildsystem=python_distutils 8 | 9 | 10 | -------------------------------------------------------------------------------- /html_block_names.txt: -------------------------------------------------------------------------------- 1 | ## 2 | p 3 | dl 4 | h1 5 | h2 6 | h3 7 | h4 8 | h5 9 | h6 10 | ol 11 | ul 12 | del 13 | div 14 | ins 15 | pre 16 | form 17 | math 18 | table 19 | figure 20 | iframe 21 | script 22 | style 23 | fieldset 24 | noscript 25 | blockquote 26 | span 27 | -------------------------------------------------------------------------------- /sundown.def: -------------------------------------------------------------------------------- 1 | LIBRARY SUNDOWN 2 | EXPORTS 3 | sdhtml_renderer 4 | sdhtml_toc_renderer 5 | sdhtml_smartypants 6 | bufgrow 7 | bufnew 8 | bufcstr 9 | bufprefix 10 | bufput 11 | bufputs 12 | bufputc 13 | bufrelease 14 | bufreset 15 | bufslurp 16 | bufprintf 17 | sd_markdown_new 18 | sd_markdown_render 19 | sd_markdown_free 20 | sd_version -------------------------------------------------------------------------------- /src/html_entities.h: -------------------------------------------------------------------------------- 1 | #ifndef HTML_ENTITIES_H 2 | #define HTML_ENTITIES_H 3 | 4 | #include 5 | #include 6 | 7 | extern const uint32_t MAX_NUM_ENTITY_VAL; 8 | 9 | extern const size_t MAX_NUM_ENTITY_LEN; 10 | 11 | int is_valid_numeric_entity(uint32_t entity_val); 12 | 13 | const char* is_allowed_named_entity (register const char *str, register size_t len); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "snudown-js", 3 | "version": "4.0.1", 4 | "description": "a 'native' port of Snudown to JavaScript", 5 | "type": "module", 6 | "module": "./dist/snudown.js", 7 | "exports": "./dist/snudown.js", 8 | "types": "./dist/snudown.d.ts", 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/erikdesjardins/snudown-js.git" 12 | }, 13 | "devDependencies": { 14 | "uglify-js": "3.17.4" 15 | }, 16 | "license": "MIT", 17 | "dependencies": {} 18 | } 19 | -------------------------------------------------------------------------------- /src/stack.h: -------------------------------------------------------------------------------- 1 | #ifndef STACK_H__ 2 | #define STACK_H__ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct stack { 11 | void **item; 12 | size_t size; 13 | size_t asize; 14 | }; 15 | 16 | void stack_free(struct stack *); 17 | int stack_grow(struct stack *, size_t); 18 | int stack_init(struct stack *, size_t); 19 | 20 | int stack_push(struct stack *, void *); 21 | 22 | void *stack_pop(struct stack *); 23 | void *stack_top(struct stack *); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: snudown 2 | Maintainer: Neil Williams 3 | Section: python 4 | Priority: optional 5 | Build-Depends: python-all-dev (>= 2.6.6-3), debhelper (>= 7), python-setuptools, gperf 6 | Standards-Version: 3.9.3 7 | Homepage: https://github.com/reddit/snudown 8 | Vcs-Git: git://github.com/reddit/snudown.git 9 | 10 | Package: python-snudown 11 | Architecture: any 12 | Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends} 13 | Breaks: ${python:Breaks} 14 | Description: reddit's python wrapper and customization of the Sundown Markdown interpreter. 15 | 16 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | For safety reasons, whenever you add or change something in Snudown, 2 | you should add a few test-cases that demonstrate your change and do a 3 | fuzzing run in `/fuzzing` by running `make afl`. Make sure you have `cmake` 4 | installed and in your `PATH`! 5 | 6 | This uses [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/) and a 7 | modified [Google Gumbo](https://github.com/google/gumbo-parser/) to ensure 8 | there is no way to generate invalid HTML, and that there are no unsafe 9 | memory operations. 10 | 11 | See [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/)'s instructions 12 | for your platform to get started. 13 | -------------------------------------------------------------------------------- /snudown.d.ts: -------------------------------------------------------------------------------- 1 | interface Options { 2 | /** 3 | * Whether to add `rel="nofollow"` to all links. 4 | */ 5 | nofollow?: boolean; 6 | /** 7 | * The `target` property of all links. 8 | */ 9 | target?: string; 10 | /** 11 | * Whether to create a table of contents. 12 | * 13 | * Note that Reddit postprocesses the output instead of using this option to generate a TOC. 14 | */ 15 | enableToc?: boolean; 16 | /** 17 | * Added to the `id` of each TOC link, i.e. `#PREFIXtoc_0`. 18 | */ 19 | tocIdPrefix?: string; 20 | } 21 | 22 | /** 23 | * Render markdown `text` to an HTML string using the usertext renderer. 24 | */ 25 | export function markdown(text: string, options?: Options): string; 26 | 27 | /** 28 | * Render markdown `text` to an HTML string using the wiki renderer. 29 | */ 30 | export function markdownWiki(text: string, options?: Options): string; 31 | -------------------------------------------------------------------------------- /header.js: -------------------------------------------------------------------------------- 1 | /* snudown-js - a 'native' port of Snudown to JavaScript */ 2 | 3 | /* 4 | * Copyright (c) 2009, Natacha Porté 5 | * Copyright (c) 2011, Vicent Marti 6 | * Copyright (c) 2015, Erik Desjardins 7 | * 8 | * Permission to use, copy, modify, and distribute this software for any 9 | * purpose with or without fee is hereby granted, provided that the above 10 | * copyright notice and this permission notice appear in all copies. 11 | * 12 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 13 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 14 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 15 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 16 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 17 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 18 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 19 | */ 20 | 21 | (function() {'use strict'; 22 | var Module = {}; 23 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: http://dep.debian.net/deps/dep5 2 | Upstream-Name: snudown 3 | Source: https://github.com/reddit/snudown 4 | 5 | Files: * 6 | Copyright: 2011-2012 Vicent Marti 7 | 2011-2012 reddit Inc. 8 | License: MIT 9 | 10 | Files: debian/* 11 | Copyright: 2011-2012 reddit Inc. 12 | License: MIT 13 | 14 | Files: test_snudown.py 15 | Copyright: 2011-2012 reddit Inc. 16 | License: MIT 17 | 18 | License: MIT 19 | Permission to use, copy, modify, and distribute this software for any purpose 20 | with or without fee is hereby granted, provided that the above copyright 21 | notice and this permission notice appear in all copies. 22 | . 23 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 24 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 25 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 26 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 27 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 28 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 29 | PERFORMANCE OF THIS SOFTWARE. 30 | 31 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | tags: 8 | - v*.*.* 9 | pull_request: 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | - uses: actions/setup-node@v1 17 | with: 18 | node-version: '12.x' 19 | registry-url: 'https://registry.npmjs.org' 20 | - run: npm install 21 | - run: docker build -t emscripten . 22 | - run: docker run --rm -v $(pwd):/src emscripten ./build.sh 23 | - run: node test_snudown.js 24 | - run: ls -lh dist 25 | if: "!cancelled()" 26 | - uses: actions/upload-artifact@v2 27 | with: 28 | name: dist 29 | path: dist 30 | if: "!cancelled()" 31 | - uses: softprops/action-gh-release@v1 32 | if: startsWith(github.ref, 'refs/tags/') 33 | with: 34 | files: | 35 | dist/snudown.js 36 | dist/snudown_es.js 37 | env: 38 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 39 | - run: npm publish 40 | if: startsWith(github.ref, 'refs/tags/') 41 | env: 42 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 43 | -------------------------------------------------------------------------------- /src/stack.c: -------------------------------------------------------------------------------- 1 | #include "stack.h" 2 | #include 3 | 4 | int 5 | stack_grow(struct stack *st, size_t new_size) 6 | { 7 | void **new_st; 8 | 9 | if (st->asize >= new_size) 10 | return 0; 11 | 12 | new_st = realloc(st->item, new_size * sizeof(void *)); 13 | if (new_st == NULL) 14 | return -1; 15 | 16 | memset(new_st + st->asize, 0x0, 17 | (new_size - st->asize) * sizeof(void *)); 18 | 19 | st->item = new_st; 20 | st->asize = new_size; 21 | 22 | if (st->size > new_size) 23 | st->size = new_size; 24 | 25 | return 0; 26 | } 27 | 28 | void 29 | stack_free(struct stack *st) 30 | { 31 | if (!st) 32 | return; 33 | 34 | free(st->item); 35 | 36 | st->item = NULL; 37 | st->size = 0; 38 | st->asize = 0; 39 | } 40 | 41 | int 42 | stack_init(struct stack *st, size_t initial_size) 43 | { 44 | st->item = NULL; 45 | st->size = 0; 46 | st->asize = 0; 47 | 48 | if (!initial_size) 49 | initial_size = 8; 50 | 51 | return stack_grow(st, initial_size); 52 | } 53 | 54 | void * 55 | stack_pop(struct stack *st) 56 | { 57 | if (!st->size) 58 | return NULL; 59 | 60 | return st->item[--st->size]; 61 | } 62 | 63 | int 64 | stack_push(struct stack *st, void *item) 65 | { 66 | if (stack_grow(st, st->size * 2) < 0) 67 | return -1; 68 | 69 | st->item[st->size++] = item; 70 | return 0; 71 | } 72 | 73 | void * 74 | stack_top(struct stack *st) 75 | { 76 | if (!st->size) 77 | return NULL; 78 | 79 | return st->item[st->size - 1]; 80 | } 81 | 82 | -------------------------------------------------------------------------------- /footer.js: -------------------------------------------------------------------------------- 1 | function _mallocString(str) { 2 | // https://github.com/kripken/emscripten/blob/3ebf0eed375120626ae5c2233b26bf236ea90046/src/preamble.js#L148 3 | // at most 4 bytes per UTF-8 code point, +1 for the trailing '\0' 4 | var len = (str.length << 2) + 1; 5 | var ptr = _malloc(len); 6 | stringToUTF8(str, ptr, len); 7 | return ptr; 8 | } 9 | 10 | function _markdown(renderer, text, options) { 11 | if (typeof text !== 'string') text = ''; 12 | var str = _mallocString(text); 13 | var size = lengthBytesUTF8(text); // excludes null terminator 14 | 15 | if (typeof options !== 'object' || options === null) options = {}; 16 | var nofollow = options['nofollow'] ? 1 : 0; 17 | var target = typeof options['target'] === 'string' ? _mallocString(options['target']) : 0; 18 | var toc_id_prefix = typeof options['tocIdPrefix'] === 'string' ? _mallocString(options['tocIdPrefix']) : 0; 19 | var enable_toc = options['enableToc'] ? 1 : 0; 20 | 21 | var ptr = renderer(str, size, nofollow, target, toc_id_prefix, enable_toc); 22 | var string = UTF8ToString(ptr); 23 | 24 | _free(ptr); 25 | _free(toc_id_prefix); 26 | _free(target); 27 | _free(str); 28 | 29 | return string; 30 | } 31 | 32 | function markdown(text, options) { 33 | return _markdown(_default_renderer, text, options); 34 | } 35 | 36 | function markdownWiki(text, options) { 37 | return _markdown(_wiki_renderer, text, options); 38 | } 39 | 40 | window['markdown'] = markdown; 41 | window['markdownWiki'] = markdownWiki; 42 | })(); 43 | -------------------------------------------------------------------------------- /html/houdini.h: -------------------------------------------------------------------------------- 1 | #ifndef HOUDINI_H__ 2 | #define HOUDINI_H__ 3 | 4 | #include "buffer.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #ifdef HOUDINI_USE_LOCALE 11 | # define _isxdigit(c) isxdigit(c) 12 | # define _isdigit(c) isdigit(c) 13 | #else 14 | /* 15 | * Helper _isdigit methods -- do not trust the current locale 16 | * */ 17 | # define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) 18 | # define _isdigit(c) ((c) >= '0' && (c) <= '9') 19 | #endif 20 | 21 | extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size); 22 | extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure); 23 | extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size); 24 | extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size); 25 | extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size); 26 | extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size); 27 | extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size); 28 | extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size); 29 | extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size); 30 | extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size); 31 | extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size); 32 | 33 | #ifdef __cplusplus 34 | } 35 | #endif 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/autolink.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Vicent Marti 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef UPSKIRT_AUTOLINK_H 18 | #define UPSKIRT_AUTOLINK_H 19 | 20 | #include "buffer.h" 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | enum { 27 | SD_AUTOLINK_SHORT_DOMAINS = (1 << 0), 28 | }; 29 | 30 | int 31 | sd_autolink_issafe(const uint8_t *link, size_t link_len); 32 | 33 | size_t 34 | sd_autolink__www(size_t *rewind_p, struct buf *link, 35 | uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); 36 | 37 | size_t 38 | sd_autolink__email(size_t *rewind_p, struct buf *link, 39 | uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); 40 | 41 | size_t 42 | sd_autolink__url(size_t *rewind_p, struct buf *link, 43 | uint8_t *data, size_t max_rewind, size_t size, unsigned int flags); 44 | 45 | extern size_t 46 | sd_autolink__subreddit(size_t *rewind_p, struct buf *link, uint8_t *data, 47 | size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash); 48 | 49 | extern size_t 50 | sd_autolink__username(size_t *rewind_p, struct buf *link, uint8_t *data, 51 | size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash); 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif 58 | 59 | /* vim: set filetype=c: */ 60 | -------------------------------------------------------------------------------- /html/html.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Vicent Marti 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef UPSKIRT_HTML_H 18 | #define UPSKIRT_HTML_H 19 | 20 | #include "markdown.h" 21 | #include "buffer.h" 22 | #include 23 | 24 | #ifdef __cplusplus 25 | extern "C" { 26 | #endif 27 | 28 | struct html_renderopt { 29 | struct { 30 | int header_count; 31 | int current_level; 32 | int level_offset; 33 | } toc_data; 34 | 35 | char* toc_id_prefix; 36 | 37 | unsigned int flags; 38 | 39 | char** html_element_whitelist; 40 | char** html_attr_whitelist; 41 | 42 | /* extra callbacks */ 43 | void (*link_attributes)(struct buf *ob, const struct buf *url, void *self); 44 | }; 45 | 46 | typedef enum { 47 | HTML_SKIP_HTML = (1 << 0), 48 | HTML_SKIP_STYLE = (1 << 1), 49 | HTML_SKIP_IMAGES = (1 << 2), 50 | HTML_SKIP_LINKS = (1 << 3), 51 | HTML_EXPAND_TABS = (1 << 4), 52 | HTML_SAFELINK = (1 << 5), 53 | HTML_TOC = (1 << 6), 54 | HTML_HARD_WRAP = (1 << 7), 55 | HTML_USE_XHTML = (1 << 8), 56 | HTML_ESCAPE = (1 << 9), 57 | HTML_ALLOW_ELEMENT_WHITELIST = (1 << 10), 58 | } html_render_mode; 59 | 60 | typedef enum { 61 | HTML_TAG_NONE = 0, 62 | HTML_TAG_OPEN, 63 | HTML_TAG_CLOSE, 64 | } html_tag; 65 | 66 | int 67 | sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname); 68 | 69 | extern void 70 | sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags); 71 | 72 | extern void 73 | sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr); 74 | 75 | extern void 76 | sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size); 77 | 78 | #ifdef __cplusplus 79 | } 80 | #endif 81 | 82 | #endif 83 | 84 | -------------------------------------------------------------------------------- /html/houdini_html_e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "houdini.h" 6 | 7 | #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */ 8 | 9 | /** 10 | * According to the OWASP rules: 11 | * 12 | * & --> & 13 | * < --> < 14 | * > --> > 15 | * " --> " 16 | * ' --> ' ' is not recommended 17 | * / --> / forward slash is included as it helps end an HTML entity 18 | * 19 | */ 20 | static const char HTML_ESCAPE_TABLE[] = { 21 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 0, 7, 7, 22 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 23 | 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 24 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 25 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37 | }; 38 | 39 | static const char *HTML_ESCAPES[] = { 40 | "", 41 | """, 42 | "&", 43 | "'", 44 | "/", 45 | "<", 46 | ">", 47 | "", // throw out control characters 48 | }; 49 | 50 | void 51 | houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure) 52 | { 53 | size_t i = 0, org, esc = 0; 54 | 55 | bufgrow(ob, ESCAPE_GROW_FACTOR(size)); 56 | 57 | while (i < size) { 58 | org = i; 59 | while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) 60 | i++; 61 | 62 | if (i > org) 63 | bufput(ob, src + org, i - org); 64 | 65 | /* escaping */ 66 | if (i >= size) 67 | break; 68 | 69 | /* The forward slash is only escaped in secure mode */ 70 | if (src[i] == '/' && !secure) { 71 | bufputc(ob, '/'); 72 | } else if (HTML_ESCAPE_TABLE[src[i]] == 7) { 73 | /* skip control characters */ 74 | } else { 75 | bufputs(ob, HTML_ESCAPES[esc]); 76 | } 77 | 78 | i++; 79 | } 80 | } 81 | 82 | void 83 | houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size) 84 | { 85 | houdini_escape_html0(ob, src, size, 1); 86 | } 87 | 88 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | snudown-js 2 | ======= 3 | 4 | [![Build Status](https://travis-ci.org/erikdesjardins/snudown-js.svg)](https://travis-ci.org/erikdesjardins/snudown-js) 5 | 6 | `snudown-js` is a 'native' (compiled with [Emscripten](https://kripken.github.io/emscripten-site/)) port of [Snudown](https://github.com/reddit/snudown/), the Markdown parser used by Reddit. 7 | 8 | 9 | Usage 10 | ----- 11 | 12 | Import from [npm](https://www.npmjs.com/package/snudown-js): `const Snudown = require('snudown-js')`. 13 | 14 | Basic usage: 15 | 16 | `Snudown.markdown('some text'); // "

some text

\n"` 17 | 18 | `Snudown.markdownWiki(''); // "

\n"` 19 | 20 | For more in-depth documentation, see the comments in [`footer.js`](https://github.com/erikdesjardins/snudown-js/blob/master/footer.js). 21 | 22 | Building 23 | -------- 24 | 25 | ### You will need... 26 | 27 | - to be able to run bash scripts 28 | - `gperf`, [a command-line utility](https://www.gnu.org/software/gperf/) - through your package manager 29 | - `npm`, [node package manager](https://www.npmjs.com/) - through your package manager 30 | - `emcc`, the Emscripten compiler - [from the Emscripten SDK](https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html) (or from [source](http://kripken.github.io/emscripten-site/docs/building_from_source/building_emscripten_from_source_using_the_sdk.html)) (at least version 1.36.0) 31 | 32 | ### Setup 33 | 34 | Run `npm i`. 35 | 36 | Ensure that `gperf` and `emcc` can be invoked from the terminal. 37 | 38 | ### Build 39 | 40 | Run `./build.sh`. 41 | 42 | Output is to `dist/`. 43 | 44 | 45 | Testing 46 | ------- 47 | 48 | ### You will need... 49 | 50 | - `node`, the node.js runtime - probably installed during the build process 51 | - a successful build of `snudown-js` 52 | 53 | After building, run `node test_snudown.js`. 54 | 55 | 56 | License 57 | ------- 58 | 59 | Permission to use, copy, modify, and distribute this software for any 60 | purpose with or without fee is hereby granted, provided that the above 61 | copyright notice and this permission notice appear in all copies. 62 | 63 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 64 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 65 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 66 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 67 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 68 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 69 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 70 | -------------------------------------------------------------------------------- /src/buffer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2008, Natacha Porté 3 | * Copyright (c) 2011, Vicent Martí 4 | * 5 | * Permission to use, copy, modify, and distribute this software for any 6 | * purpose with or without fee is hereby granted, provided that the above 7 | * copyright notice and this permission notice appear in all copies. 8 | * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | */ 17 | 18 | #ifndef BUFFER_H__ 19 | #define BUFFER_H__ 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #if defined(_MSC_VER) 30 | #define __attribute__(x) 31 | #define inline 32 | #endif 33 | 34 | typedef enum { 35 | BUF_OK = 0, 36 | BUF_ENOMEM = -1, 37 | BUF_EINVALIDIDX = -2, 38 | } buferror_t; 39 | 40 | /* struct buf: character array buffer */ 41 | struct buf { 42 | uint8_t *data; /* actual character data */ 43 | size_t size; /* size of the string */ 44 | size_t asize; /* allocated size (0 = volatile buffer) */ 45 | size_t unit; /* reallocation unit size (0 = read-only buffer) */ 46 | }; 47 | 48 | /* CONST_BUF: global buffer from a string litteral */ 49 | #define BUF_STATIC(string) \ 50 | { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 } 51 | 52 | /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */ 53 | #define BUF_VOLATILE(strname) \ 54 | { (uint8_t *)strname, strlen(strname), 0, 0, 0 } 55 | 56 | /* BUFPUTSL: optimized bufputs of a string litteral */ 57 | #define BUFPUTSL(output, literal) \ 58 | bufput(output, literal, sizeof literal - 1) 59 | 60 | /* bufgrow: increasing the allocated size to the given value */ 61 | int bufgrow(struct buf *, size_t); 62 | 63 | /* bufnew: allocation of a new buffer */ 64 | struct buf *bufnew(size_t) __attribute__ ((malloc)); 65 | 66 | /* bufnullterm: NUL-termination of the string array (making a C-string) */ 67 | const char *bufcstr(struct buf *); 68 | 69 | /* bufprefix: compare the beginning of a buffer with a string */ 70 | int bufprefix(const struct buf *buf, const char *prefix); 71 | 72 | /* bufput: appends raw data to a buffer */ 73 | void bufput(struct buf *, const void *, size_t); 74 | 75 | /* bufputs: appends a NUL-terminated string to a buffer */ 76 | void bufputs(struct buf *, const char *); 77 | 78 | /* bufputc: appends a single char to a buffer */ 79 | void bufputc(struct buf *, int); 80 | 81 | /* bufputi: appends a formatted integer to a buffer, like vsnprintf("%d") */ 82 | void bufputi(struct buf *, int); 83 | 84 | /* bufrelease: decrease the reference count and free the buffer if needed */ 85 | void bufrelease(struct buf *); 86 | 87 | /* bufreset: frees internal data of the buffer */ 88 | void bufreset(struct buf *); 89 | 90 | /* bufslurp: removes a given number of bytes from the head of the array */ 91 | void bufslurp(struct buf *, size_t); 92 | 93 | /* buftruncate: truncates the buffer at `size` */ 94 | int buftruncate(struct buf *buf, size_t size); 95 | 96 | #ifdef __cplusplus 97 | } 98 | #endif 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /html/houdini_href_e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "houdini.h" 6 | 7 | #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) 8 | 9 | /* 10 | * The following characters will not be escaped: 11 | * 12 | * -_.+!*'(),%#@?=;:/,+&$ alphanum 13 | * 14 | * Note that this character set is the addition of: 15 | * 16 | * - The characters which are safe to be in an URL 17 | * - The characters which are *not* safe to be in 18 | * an URL because they are RESERVED characters. 19 | * 20 | * We asume (lazily) that any RESERVED char that 21 | * appears inside an URL is actually meant to 22 | * have its native function (i.e. as an URL 23 | * component/separator) and hence needs no escaping. 24 | * 25 | * There are two exceptions: the chacters & (amp) 26 | * and ' (single quote) do not appear in the table. 27 | * They are meant to appear in the URL as components, 28 | * yet they require special HTML-entity escaping 29 | * to generate valid HTML markup. 30 | * 31 | * All other characters will be escaped to %XX. 32 | * 33 | */ 34 | static const char HREF_SAFE[] = { 35 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 37 | 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 39 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 41 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51 | }; 52 | 53 | void 54 | houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size) 55 | { 56 | static const char hex_chars[] = "0123456789ABCDEF"; 57 | size_t i = 0, org; 58 | char hex_str[3]; 59 | 60 | bufgrow(ob, ESCAPE_GROW_FACTOR(size)); 61 | hex_str[0] = '%'; 62 | 63 | while (i < size) { 64 | org = i; 65 | /* Skip by characters that don't need special 66 | * processing */ 67 | while (i < size && HREF_SAFE[src[i]] == 1) 68 | i++; 69 | 70 | if (i > org) 71 | bufput(ob, src + org, i - org); 72 | 73 | /* escaping */ 74 | if (i >= size) 75 | break; 76 | 77 | /* throw out control characters */ 78 | if (HREF_SAFE[src[i]] == 2) { 79 | i++; 80 | continue; 81 | } 82 | 83 | switch (src[i]) { 84 | /* amp appears all the time in URLs, but needs 85 | * HTML-entity escaping to be inside an href */ 86 | case '&': 87 | BUFPUTSL(ob, "&"); 88 | break; 89 | 90 | /* the single quote is a valid URL character 91 | * according to the standard; it needs HTML 92 | * entity escaping too */ 93 | case '\'': 94 | BUFPUTSL(ob, "'"); 95 | break; 96 | 97 | /* the space can be escaped to %20 or a plus 98 | * sign. we're going with the generic escape 99 | * for now. the plus thing is more commonly seen 100 | * when building GET strings */ 101 | #if 0 102 | case ' ': 103 | bufputc(ob, '+'); 104 | break; 105 | #endif 106 | 107 | /* every other character goes with a %XX escaping */ 108 | default: 109 | hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; 110 | hex_str[2] = hex_chars[src[i] & 0xF]; 111 | bufput(ob, hex_str, 3); 112 | } 113 | 114 | i++; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/html_entities.gperf: -------------------------------------------------------------------------------- 1 | %language=ANSI-C 2 | %define lookup-function-name is_allowed_named_entity 3 | %compare-strncmp 4 | %readonly-tables 5 | %define hash-function-name hash_html_entity 6 | %enum 7 | %includes 8 | %{ 9 | #include "html_entities.h" 10 | 11 | #include 12 | #include 13 | 14 | /* Parsers tend to choke on entities with values greater than this */ 15 | const uint32_t MAX_NUM_ENTITY_VAL = 0x10ffff; 16 | /* Any numeric entity longer than this is obviously above MAX_NUM_ENTITY_VAL 17 | * used to avoid dealing with overflows. */ 18 | const size_t MAX_NUM_ENTITY_LEN = 7; 19 | 20 | inline int is_valid_numeric_entity(uint32_t entity_val) 21 | { 22 | /* Some XML parsers will choke on entities with certain 23 | * values (mostly control characters.) 24 | * 25 | * According to lxml these are all problematic: 26 | * 27 | * [xrange(0, 8), 28 | * xrange(11, 12), 29 | * xrange(14, 31), 30 | * xrange(55296, 57343), 31 | * xrange(65534, 65535)] 32 | */ 33 | return (entity_val > 8 34 | && (entity_val != 11 && entity_val != 12) 35 | && (entity_val < 14 || entity_val > 31) 36 | && (entity_val < 55296 || entity_val > 57343) 37 | && (entity_val != 65534 && entity_val != 65535) 38 | && entity_val <= MAX_NUM_ENTITY_VAL); 39 | } 40 | 41 | %} 42 | %% 43 | Æ 44 | Á 45 |  46 | À 47 | Α 48 | Å 49 | à 50 | Ä 51 | Β 52 | Ç 53 | Χ 54 | ‡ 55 | Δ 56 | Ð 57 | É 58 | Ê 59 | È 60 | Ε 61 | Η 62 | Ë 63 | Γ 64 | Í 65 | Î 66 | Ì 67 | Ι 68 | Ï 69 | Κ 70 | Λ 71 | Μ 72 | Ñ 73 | Ν 74 | Œ 75 | Ó 76 | Ô 77 | Ò 78 | Ω 79 | Ο 80 | Ø 81 | Õ 82 | Ö 83 | Φ 84 | Π 85 | ″ 86 | Ψ 87 | Ρ 88 | Š 89 | Σ 90 | Þ 91 | Τ 92 | Θ 93 | Ú 94 | Û 95 | Ù 96 | Υ 97 | Ü 98 | Ξ 99 | Ý 100 | Ÿ 101 | Ζ 102 | á 103 | â 104 | ´ 105 | æ 106 | à 107 | ℵ 108 | α 109 | & 110 | ∧ 111 | ∠ 112 | ' 113 | å 114 | ≈ 115 | ã 116 | ä 117 | „ 118 | β 119 | ¦ 120 | • 121 | ∩ 122 | ç 123 | ¸ 124 | ¢ 125 | χ 126 | ˆ 127 | ♣ 128 | ≅ 129 | © 130 | ↵ 131 | ∪ 132 | ¤ 133 | ⇓ 134 | † 135 | ↓ 136 | ° 137 | δ 138 | ♦ 139 | ÷ 140 | é 141 | ê 142 | è 143 | ∅ 144 |   145 |   146 | ε 147 | ≡ 148 | η 149 | ð 150 | ë 151 | € 152 | ∃ 153 | ƒ 154 | ∀ 155 | ½ 156 | ¼ 157 | ¾ 158 | ⁄ 159 | γ 160 | ≥ 161 | > 162 | ⇔ 163 | ↔ 164 | ♥ 165 | … 166 | í 167 | î 168 | ¡ 169 | ì 170 | ℑ 171 | ∞ 172 | ∫ 173 | ι 174 | ¿ 175 | ∈ 176 | ï 177 | κ 178 | ⇐ 179 | λ 180 | ⟨ 181 | « 182 | ← 183 | ⌈ 184 | “ 185 | ≤ 186 | ⌊ 187 | ∗ 188 | ◊ 189 | ‎ 190 | ‹ 191 | ‘ 192 | < 193 | ¯ 194 | — 195 | µ 196 | · 197 | − 198 | μ 199 | ∇ 200 |   201 | – 202 | ≠ 203 | ∋ 204 | ¬ 205 | ∉ 206 | ⊄ 207 | ñ 208 | ν 209 | ó 210 | ô 211 | œ 212 | ò 213 | ‾ 214 | ω 215 | ο 216 | ⊕ 217 | ∨ 218 | ª 219 | º 220 | ø 221 | õ 222 | ⊗ 223 | ö 224 | ¶ 225 | ∂ 226 | ‰ 227 | ⊥ 228 | φ 229 | π 230 | ϖ 231 | ± 232 | £ 233 | ′ 234 | ∏ 235 | ∝ 236 | ψ 237 | " 238 | ⇒ 239 | √ 240 | ⟩ 241 | » 242 | → 243 | ⌉ 244 | ” 245 | ℜ 246 | ® 247 | ⌋ 248 | ρ 249 | ‏ 250 | › 251 | ’ 252 | ‚ 253 | š 254 | ⋅ 255 | § 256 | ­ 257 | σ 258 | ς 259 | ∼ 260 | ♠ 261 | ⊂ 262 | ⊆ 263 | ∑ 264 | ¹ 265 | ² 266 | ³ 267 | ⊃ 268 | ⊇ 269 | ß 270 | τ 271 | ∴ 272 | θ 273 | ϑ 274 |   275 | þ 276 | ˜ 277 | × 278 | ™ 279 | ⇑ 280 | ú 281 | ↑ 282 | û 283 | ù 284 | ¨ 285 | ϒ 286 | υ 287 | ü 288 | ℘ 289 | ξ 290 | ý 291 | ¥ 292 | ÿ 293 | ζ 294 | ‍ 295 | ‌ 296 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | snudown (1.5.0) unstable; urgency=medium 2 | * add blockspoiler support 3 | * add inline spoiler support 4 | 5 | -- Jesjit Birak Wed, 07 Mar 2018 13:21:45 -0800 6 | 7 | snudown (1.4.0) unstable; urgency=medium 8 | 9 | * autolink r/subreddit and u/user 10 | * security: don't rewind over previous inlines when autolinking 11 | * email autolinks re-enabled due to ^ 12 | * more stringent character entity checks and sanitization 13 | * properly handle URLs containing control characters 14 | 15 | -- Jordan Milne Mon, 01 Jun 2015 13:04:23 -0700 16 | 17 | snudown (1.3.2) unstable; urgency=medium 18 | 19 | * fix alphanumeric-named entities 20 | 21 | -- Neil Williams Wed, 25 Feb 2015 13:32:41 -0800 22 | 23 | snudown (1.3.1) unstable; urgency=medium 24 | 25 | * add missing entities to entity whitelist 26 | 27 | -- Neil Williams Tue, 24 Feb 2015 22:12:29 -0800 28 | 29 | snudown (1.3.0) unstable; urgency=medium 30 | 31 | * validate html entities and escape unrecognized ones 32 | 33 | -- Neil Williams Tue, 24 Feb 2015 17:55:38 -0800 34 | 35 | snudown (1.2.0) unstable; urgency=medium 36 | 37 | * security: fix rewind issues 38 | * email autolinks disabled due to ^ 39 | * security: fix table header OOM bomb 40 | 41 | -- Neil Williams Sat, 20 Sep 2014 11:59:34 -0700 42 | 43 | snudown (1.1.6) unstable; urgency=low 44 | 45 | * add ts3server url scheme to whitelist 46 | * redo html sanitization for wiki renderer 47 | 48 | -- Neil Williams Tue, 01 Apr 2014 17:12:50 -0700 49 | 50 | snudown (1.1.5) unstable; urgency=low 51 | 52 | * bring path stuff into user/subreddit autolinking (multis, subpages etc.) 53 | * make /u/ autolinking case sensitive 54 | 55 | -- Neil Williams Wed, 22 May 2013 16:09:31 -0700 56 | 57 | snudown (1.1.4) unstable; urgency=low 58 | 59 | * make /r/ autolinking case sensitive 60 | 61 | -- Neil Williams Mon, 25 Feb 2013 23:27:10 -0800 62 | 63 | snudown (1.1.3) unstable; urgency=low 64 | 65 | * add support for /r/all-minus 66 | 67 | -- Neil Williams Tue, 08 Jan 2013 12:55:40 -0800 68 | 69 | snudown (1.1.2) unstable; urgency=low 70 | 71 | * don't close the toc div if there wasn't a toc :( 72 | 73 | -- Neil Williams Wed, 12 Dec 2012 17:38:05 -0800 74 | 75 | snudown (1.1.1) unstable; urgency=low 76 | 77 | * minor code cleanup 78 | * add a div around wiki table of contents for styling purposes 79 | 80 | -- Neil Williams Wed, 12 Dec 2012 13:47:49 -0800 81 | 82 | snudown (1.1.0) unstable; urgency=low 83 | 84 | * add wiki variant of markdown syntax (allows links, and 85 | some raw html) 86 | 87 | -- Neil Williams Wed, 05 Sep 2012 23:30:34 -0700 88 | 89 | snudown (1.0.7) unstable; urgency=low 90 | 91 | * add python-setuptools to build-depends 92 | 93 | -- Neil Williams Thu, 09 Aug 2012 14:46:49 -0700 94 | 95 | snudown (1.0.6) unstable; urgency=low 96 | 97 | * made subreddit autolinking more robust thanks to nandhp 98 | * cleaned up packaging 99 | * merged upstream fixes: 100 | * fix blockquotes nested inside paragraphs 101 | * improve parsing of continuous list items 102 | * fix infinite loop parsing strikethrouhgs 103 | 104 | -- Neil Williams Thu, 09 Aug 2012 13:06:38 -0700 105 | 106 | snudown (1.0.5) unstable; urgency=low 107 | 108 | * require a space between url and title 109 | * merged upstream fixes: 110 | * whitespace after tables prevent them from rendering 111 | * escape html in contents of tables 112 | 113 | -- Neil Williams Thu, 23 Feb 2012 08:40:39 -0800 114 | 115 | snudown (1.0.4) unstable; urgency=low 116 | 117 | * change username autolinking to /u/username 118 | * properly handle backslash at end of message 119 | 120 | -- Neil Williams Thu, 26 Jan 2012 18:26:45 -0800 121 | 122 | snudown (1.0.3) unstable; urgency=low 123 | 124 | * ~username auto-linking 125 | * make table headers less strict 126 | * correctly handle ) in link title text 127 | * synced with upstream 128 | * code clean-up 129 | * utf-8 fixes 130 | 131 | -- Neil Williams Wed, 18 Jan 2012 15:20:35 -0800 132 | 133 | snudown (1.0.2) unstable; urgency=low 134 | 135 | * synced up with upstream 136 | * more safelink relaxation based on community requests 137 | * fixed nesting unordered lists within ordered lists and vice versa 138 | 139 | -- Neil Williams Sat, 19 Nov 2011 17:16:47 -0800 140 | 141 | snudown (1.0.1) unstable; urgency=low 142 | 143 | * new version, new package 144 | 145 | -- Neil Williams Thu, 17 Nov 2011 14:22:26 -0800 146 | 147 | snudown (1.0.0) unstable; urgency=low 148 | 149 | * source package automatically created by stdeb 0.6.0+git 150 | 151 | -- Neil Williams Wed, 16 Nov 2011 10:36:53 -0800 152 | -------------------------------------------------------------------------------- /src/buffer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2008, Natacha Porté 3 | * Copyright (c) 2011, Vicent Martí 4 | * 5 | * Permission to use, copy, modify, and distribute this software for any 6 | * purpose with or without fee is hereby granted, provided that the above 7 | * copyright notice and this permission notice appear in all copies. 8 | * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | */ 17 | 18 | #define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb 19 | 20 | #include "buffer.h" 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | int 28 | bufprefix(const struct buf *buf, const char *prefix) 29 | { 30 | size_t i; 31 | assert(buf && buf->unit); 32 | 33 | for (i = 0; i < buf->size; ++i) { 34 | if (prefix[i] == 0) 35 | return 0; 36 | 37 | if (buf->data[i] != prefix[i]) 38 | return buf->data[i] - prefix[i]; 39 | } 40 | 41 | return 0; 42 | } 43 | 44 | /* bufgrow: increasing the allocated size to the given value */ 45 | int 46 | bufgrow(struct buf *buf, size_t neosz) 47 | { 48 | size_t neoasz; 49 | void *neodata; 50 | 51 | assert(buf && buf->unit); 52 | 53 | if (neosz > BUFFER_MAX_ALLOC_SIZE) 54 | return BUF_ENOMEM; 55 | 56 | if (buf->asize >= neosz) 57 | return BUF_OK; 58 | 59 | neoasz = buf->asize + buf->unit; 60 | while (neoasz < neosz) 61 | neoasz += buf->unit; 62 | 63 | neodata = realloc(buf->data, neoasz); 64 | if (!neodata) 65 | return BUF_ENOMEM; 66 | 67 | buf->data = neodata; 68 | buf->asize = neoasz; 69 | return BUF_OK; 70 | } 71 | 72 | 73 | /* bufnew: allocation of a new buffer */ 74 | struct buf * 75 | bufnew(size_t unit) 76 | { 77 | struct buf *ret; 78 | ret = malloc(sizeof (struct buf)); 79 | 80 | if (ret) { 81 | ret->data = 0; 82 | ret->size = ret->asize = 0; 83 | ret->unit = unit; 84 | } 85 | return ret; 86 | } 87 | 88 | /* bufnullterm: NULL-termination of the string array */ 89 | const char * 90 | bufcstr(struct buf *buf) 91 | { 92 | assert(buf && buf->unit); 93 | 94 | if (buf->size < buf->asize && buf->data[buf->size] == 0) 95 | return (char *)buf->data; 96 | 97 | if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) { 98 | buf->data[buf->size] = 0; 99 | return (char *)buf->data; 100 | } 101 | 102 | return NULL; 103 | } 104 | 105 | /* bufput: appends raw data to a buffer */ 106 | void 107 | bufput(struct buf *buf, const void *data, size_t len) 108 | { 109 | assert(buf && buf->unit); 110 | 111 | if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0) 112 | return; 113 | 114 | memcpy(buf->data + buf->size, data, len); 115 | buf->size += len; 116 | } 117 | 118 | /* bufputs: appends a NUL-terminated string to a buffer */ 119 | void 120 | bufputs(struct buf *buf, const char *str) 121 | { 122 | bufput(buf, str, strlen(str)); 123 | } 124 | 125 | 126 | /* bufputc: appends a single uint8_t to a buffer */ 127 | void 128 | bufputc(struct buf *buf, int c) 129 | { 130 | assert(buf && buf->unit); 131 | 132 | if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0) 133 | return; 134 | 135 | buf->data[buf->size] = c; 136 | buf->size += 1; 137 | } 138 | 139 | /* bufputi: appends a formatted integer to a buffer, like vsnprintf("%d") */ 140 | void 141 | bufputi(struct buf *buf, int n) 142 | { 143 | // Based on K&R C 144 | 145 | // number of null-terminated decimal digits to represent x signed bytes is floor(log10(2^(8x-1)))+2 146 | // which is bounded from above by x*3+2 147 | char buffer[sizeof(int)*3+2]; 148 | memset(&buffer, 0, sizeof(buffer)); 149 | 150 | int sign = n; 151 | 152 | if (sign < 0) 153 | n = -n; 154 | 155 | int i = 0; 156 | 157 | do { 158 | buffer[i++] = n % 10 + '0'; 159 | } while ((n /= 10) > 0); 160 | 161 | if (sign < 0) 162 | buffer[i++] = '-'; 163 | 164 | char temp; 165 | for (int j = 0, k = i - 1; j < k; ++j, --k) { 166 | temp = buffer[j]; 167 | buffer[j] = buffer[k]; 168 | buffer[k] = temp; 169 | } 170 | 171 | bufputs(buf, buffer); 172 | } 173 | 174 | /* bufrelease: decrease the reference count and free the buffer if needed */ 175 | void 176 | bufrelease(struct buf *buf) 177 | { 178 | if (!buf) 179 | return; 180 | 181 | free(buf->data); 182 | free(buf); 183 | } 184 | 185 | 186 | /* bufreset: frees internal data of the buffer */ 187 | void 188 | bufreset(struct buf *buf) 189 | { 190 | if (!buf) 191 | return; 192 | 193 | free(buf->data); 194 | buf->data = NULL; 195 | buf->size = buf->asize = 0; 196 | } 197 | 198 | /* bufslurp: removes a given number of bytes from the head of the array */ 199 | void 200 | bufslurp(struct buf *buf, size_t len) 201 | { 202 | assert(buf && buf->unit); 203 | 204 | if (len >= buf->size) { 205 | buf->size = 0; 206 | return; 207 | } 208 | 209 | buf->size -= len; 210 | memmove(buf->data, buf->data + len, buf->size); 211 | } 212 | 213 | /* buftrucate: truncates the buffer at `size` */ 214 | int 215 | buftruncate(struct buf *buf, size_t size) 216 | { 217 | if (buf->size < size || size < 0) { 218 | /* bail out in debug mode so we can figure out why this happened */ 219 | assert(0); 220 | return BUF_EINVALIDIDX; 221 | } 222 | 223 | buf->size = size; 224 | return BUF_OK; 225 | } 226 | -------------------------------------------------------------------------------- /src/markdown.h: -------------------------------------------------------------------------------- 1 | /* markdown.h - generic markdown parser */ 2 | 3 | /* 4 | * Copyright (c) 2009, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #ifndef UPSKIRT_MARKDOWN_H 20 | #define UPSKIRT_MARKDOWN_H 21 | 22 | #include "buffer.h" 23 | #include "autolink.h" 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #define SUNDOWN_VERSION "1.16.0" 30 | #define SUNDOWN_VER_MAJOR 1 31 | #define SUNDOWN_VER_MINOR 16 32 | #define SUNDOWN_VER_REVISION 0 33 | 34 | /******************** 35 | * TYPE DEFINITIONS * 36 | ********************/ 37 | 38 | /* mkd_autolink - type of autolink */ 39 | enum mkd_autolink { 40 | MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/ 41 | MKDA_NORMAL, /* normal http/http/ftp/mailto/etc link */ 42 | MKDA_EMAIL, /* e-mail link without explit mailto: */ 43 | }; 44 | 45 | enum mkd_tableflags { 46 | MKD_TABLE_ALIGN_L = 1, 47 | MKD_TABLE_ALIGN_R = 2, 48 | MKD_TABLE_ALIGN_CENTER = 3, 49 | MKD_TABLE_ALIGNMASK = 3, 50 | MKD_TABLE_HEADER = 4 51 | }; 52 | 53 | enum mkd_extensions { 54 | MKDEXT_NO_INTRA_EMPHASIS = (1 << 0), 55 | MKDEXT_TABLES = (1 << 1), 56 | MKDEXT_FENCED_CODE = (1 << 2), 57 | MKDEXT_AUTOLINK = (1 << 3), 58 | MKDEXT_STRIKETHROUGH = (1 << 4), 59 | MKDEXT_SPACE_HEADERS = (1 << 6), 60 | MKDEXT_SUPERSCRIPT = (1 << 7), 61 | MKDEXT_LAX_SPACING = (1 << 8), 62 | MKDEXT_NO_EMAIL_AUTOLINK = (1 << 9), 63 | }; 64 | 65 | /* sd_callbacks - functions for rendering parsed data */ 66 | struct sd_callbacks { 67 | /* block level callbacks - NULL skips the block */ 68 | void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque); 69 | void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque); 70 | void (*blockspoiler)(struct buf *ob, const struct buf *text, void *opaque); 71 | void (*blockhtml)(struct buf *ob,const struct buf *text, void *opaque); 72 | void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque); 73 | void (*hrule)(struct buf *ob, void *opaque); 74 | void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque); 75 | void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque); 76 | void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque); 77 | void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque); 78 | void (*table_row)(struct buf *ob, const struct buf *text, void *opaque); 79 | void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span); 80 | 81 | 82 | /* span level callbacks - NULL or return 0 prints the span verbatim */ 83 | int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque); 84 | int (*codespan)(struct buf *ob, const struct buf *text, void *opaque); 85 | int (*spoilerspan)(struct buf *ob, const struct buf *text, void *opaque); 86 | int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque); 87 | int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque); 88 | int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque); 89 | int (*linebreak)(struct buf *ob, void *opaque); 90 | int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque); 91 | int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque); 92 | int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque); 93 | int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque); 94 | int (*superscript)(struct buf *ob, const struct buf *text, void *opaque); 95 | 96 | /* low level callbacks - NULL copies input directly into the output */ 97 | void (*entity)(struct buf *ob, const struct buf *entity, void *opaque); 98 | void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque); 99 | 100 | /* header and footer */ 101 | void (*doc_header)(struct buf *ob, void *opaque); 102 | void (*doc_footer)(struct buf *ob, void *opaque); 103 | }; 104 | 105 | struct sd_markdown; 106 | 107 | /********* 108 | * FLAGS * 109 | *********/ 110 | 111 | /* list/listitem flags */ 112 | #define MKD_LIST_ORDERED 1 113 | #define MKD_LI_BLOCK 2 /*
  • containing block data */ 114 | 115 | /********************** 116 | * EXPORTED FUNCTIONS * 117 | **********************/ 118 | 119 | extern struct sd_markdown * 120 | sd_markdown_new( 121 | unsigned int extensions, 122 | size_t max_nesting, 123 | size_t max_table_cols, 124 | const struct sd_callbacks *callbacks, 125 | void *opaque); 126 | 127 | extern void 128 | sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md); 129 | 130 | extern void 131 | sd_markdown_free(struct sd_markdown *md); 132 | 133 | extern void 134 | sd_version(int *major, int *minor, int *revision); 135 | 136 | #ifdef __cplusplus 137 | } 138 | #endif 139 | 140 | #endif 141 | 142 | /* vim: set filetype=c: */ 143 | -------------------------------------------------------------------------------- /snudown.c: -------------------------------------------------------------------------------- 1 | #include "markdown.h" 2 | #include "html.h" 3 | #include "autolink.h" 4 | 5 | #include 6 | 7 | #define SNUDOWN_VERSION "1.5.0" 8 | 9 | enum snudown_renderer_mode { 10 | RENDERER_USERTEXT = 0, 11 | RENDERER_WIKI, 12 | RENDERER_COUNT 13 | }; 14 | 15 | struct snudown_renderopt { 16 | struct html_renderopt html; 17 | int nofollow; 18 | const char *target; 19 | }; 20 | 21 | struct snudown_renderer { 22 | struct sd_markdown* main_renderer; 23 | struct sd_markdown* toc_renderer; 24 | struct module_state* state; 25 | struct module_state* toc_state; 26 | }; 27 | 28 | struct module_state { 29 | struct sd_callbacks callbacks; 30 | struct snudown_renderopt options; 31 | }; 32 | 33 | static int sundown_initialized[RENDERER_COUNT]; 34 | static struct snudown_renderer sundown[RENDERER_COUNT]; 35 | 36 | static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL}; 37 | static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL}; 38 | 39 | static struct module_state usertext_toc_state; 40 | static struct module_state wiki_toc_state; 41 | static struct module_state usertext_state; 42 | static struct module_state wiki_state; 43 | 44 | static const unsigned int snudown_default_md_flags = 45 | MKDEXT_NO_INTRA_EMPHASIS | 46 | MKDEXT_SUPERSCRIPT | 47 | MKDEXT_AUTOLINK | 48 | MKDEXT_STRIKETHROUGH | 49 | MKDEXT_TABLES; 50 | 51 | static const unsigned int snudown_default_render_flags = 52 | HTML_SKIP_HTML | 53 | HTML_SKIP_IMAGES | 54 | HTML_SAFELINK | 55 | HTML_ESCAPE | 56 | HTML_USE_XHTML; 57 | 58 | static const unsigned int snudown_wiki_render_flags = 59 | HTML_SKIP_HTML | 60 | HTML_SAFELINK | 61 | HTML_ALLOW_ELEMENT_WHITELIST | 62 | HTML_ESCAPE | 63 | HTML_USE_XHTML; 64 | 65 | static void 66 | snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque) 67 | { 68 | struct snudown_renderopt *options = opaque; 69 | 70 | if (options->nofollow) 71 | BUFPUTSL(ob, " rel=\"nofollow\""); 72 | 73 | if (options->target != NULL) { 74 | BUFPUTSL(ob, " target=\""); 75 | bufputs(ob, options->target); 76 | bufputc(ob, '\"'); 77 | } 78 | } 79 | 80 | static struct sd_markdown* make_custom_renderer(struct module_state* state, 81 | const unsigned int renderflags, 82 | const unsigned int markdownflags, 83 | int toc_renderer) { 84 | if(toc_renderer) { 85 | sdhtml_toc_renderer(&state->callbacks, 86 | (struct html_renderopt *)&state->options); 87 | } else { 88 | sdhtml_renderer(&state->callbacks, 89 | (struct html_renderopt *)&state->options, 90 | renderflags); 91 | } 92 | 93 | state->options.html.link_attributes = &snudown_link_attr; 94 | state->options.html.html_element_whitelist = html_element_whitelist; 95 | state->options.html.html_attr_whitelist = html_attr_whitelist; 96 | 97 | return sd_markdown_new( 98 | markdownflags, 99 | 16, 100 | 64, 101 | &state->callbacks, 102 | &state->options 103 | ); 104 | } 105 | 106 | void init_default_renderer(void) { 107 | if (sundown_initialized[RENDERER_USERTEXT]) return; 108 | sundown_initialized[RENDERER_USERTEXT] = 1; 109 | sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0); 110 | sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1); 111 | sundown[RENDERER_USERTEXT].state = &usertext_state; 112 | sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state; 113 | } 114 | 115 | void init_wiki_renderer(void) { 116 | if (sundown_initialized[RENDERER_WIKI]) return; 117 | sundown_initialized[RENDERER_WIKI] = 1; 118 | sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0); 119 | sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1); 120 | sundown[RENDERER_WIKI].state = &wiki_state; 121 | sundown[RENDERER_WIKI].toc_state = &wiki_toc_state; 122 | } 123 | 124 | /* size param is necessary because text may contain null */ 125 | const char* 126 | snudown_md(char* text, size_t size, int nofollow, char* target, char* toc_id_prefix, int renderer, int enable_toc) { 127 | struct buf ib, *ob; 128 | char* result_text; 129 | struct snudown_renderer _snudown; 130 | unsigned int flags; 131 | 132 | memset(&ib, 0x0, sizeof(struct buf)); 133 | 134 | /* set up buffer */ 135 | ib.data = (uint8_t*) text; 136 | ib.size = size; 137 | 138 | if (renderer < 0 || renderer >= RENDERER_COUNT) { 139 | return NULL; 140 | } 141 | 142 | _snudown = sundown[renderer]; 143 | 144 | struct snudown_renderopt *options = &(_snudown.state->options); 145 | options->nofollow = nofollow; 146 | options->target = target; 147 | 148 | /* Output buffer */ 149 | ob = bufnew(128); 150 | 151 | flags = options->html.flags; 152 | 153 | if (enable_toc) { 154 | _snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix; 155 | sd_markdown_render(ob, ib.data, ib.size, _snudown.toc_renderer); 156 | _snudown.toc_state->options.html.toc_id_prefix = NULL; 157 | 158 | options->html.flags |= HTML_TOC; 159 | } 160 | 161 | options->html.toc_id_prefix = toc_id_prefix; 162 | 163 | /* do the magic */ 164 | sd_markdown_render(ob, ib.data, ib.size, _snudown.main_renderer); 165 | 166 | options->html.toc_id_prefix = NULL; 167 | options->html.flags = flags; 168 | 169 | /* make a null-terminated result string - the buffer isn't */ 170 | result_text = (char*) malloc(ob->size + 1); 171 | result_text[ob->size] = 0; 172 | if (ob->data) 173 | memcpy(result_text, (char*) ob->data, ob->size); 174 | 175 | /* Cleanup */ 176 | bufrelease(ob); 177 | 178 | return result_text; 179 | } 180 | 181 | const char* default_renderer(char* text, size_t size, int nofollow, char* target, char* toc_id_prefix, int enable_toc) { 182 | init_default_renderer(); 183 | return snudown_md(text, size, nofollow, target, toc_id_prefix, RENDERER_USERTEXT, enable_toc); 184 | } 185 | 186 | const char* wiki_renderer(char* text, size_t size, int nofollow, char* target, char* toc_id_prefix, int enable_toc) { 187 | init_wiki_renderer(); 188 | return snudown_md(text, size, nofollow, target, toc_id_prefix, RENDERER_WIKI, enable_toc); 189 | } 190 | -------------------------------------------------------------------------------- /src/html_blocks.h: -------------------------------------------------------------------------------- 1 | /* C code produced by gperf version 3.0.3 */ 2 | /* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */ 3 | /* Computed positions: -k'1-2' */ 4 | 5 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 6 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 7 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 8 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 9 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 10 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 11 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 12 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 13 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 14 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 15 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 16 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 17 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 18 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 19 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 20 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 21 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 22 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 23 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 24 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 25 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 26 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 27 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 28 | /* The character set is not based on ISO-646. */ 29 | error "gperf generated tables don't work with this execution character set. Please report a bug to ." 30 | #endif 31 | 32 | /* maximum key range = 37, duplicates = 0 */ 33 | 34 | #ifndef GPERF_DOWNCASE 35 | #define GPERF_DOWNCASE 1 36 | static unsigned char gperf_downcase[256] = 37 | { 38 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 39 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 40 | 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 41 | 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 42 | 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 43 | 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 44 | 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 45 | 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 46 | 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 47 | 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 48 | 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 49 | 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 50 | 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 51 | 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 52 | 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 53 | 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 54 | 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 55 | 255 56 | }; 57 | #endif 58 | 59 | #ifndef GPERF_CASE_STRNCMP 60 | #define GPERF_CASE_STRNCMP 1 61 | static int 62 | gperf_case_strncmp (s1, s2, n) 63 | register const char *s1; 64 | register const char *s2; 65 | register unsigned int n; 66 | { 67 | for (; n > 0;) 68 | { 69 | unsigned char c1 = gperf_downcase[(unsigned char)*s1++]; 70 | unsigned char c2 = gperf_downcase[(unsigned char)*s2++]; 71 | if (c1 != 0 && c1 == c2) 72 | { 73 | n--; 74 | continue; 75 | } 76 | return (int)c1 - (int)c2; 77 | } 78 | return 0; 79 | } 80 | #endif 81 | 82 | #ifdef __GNUC__ 83 | __inline 84 | #else 85 | #ifdef __cplusplus 86 | inline 87 | #endif 88 | #endif 89 | static unsigned int 90 | hash_block_tag (str, len) 91 | register const char *str; 92 | register unsigned int len; 93 | { 94 | static const unsigned char asso_values[] = 95 | { 96 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 97 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 98 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 99 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 100 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 101 | 8, 30, 25, 20, 15, 10, 38, 38, 38, 38, 102 | 38, 38, 38, 38, 38, 38, 0, 38, 0, 38, 103 | 5, 5, 5, 15, 0, 38, 38, 0, 15, 10, 104 | 0, 38, 38, 15, 0, 5, 38, 38, 38, 38, 105 | 38, 38, 38, 38, 38, 38, 38, 38, 0, 38, 106 | 0, 38, 5, 5, 5, 15, 0, 38, 38, 0, 107 | 15, 10, 0, 38, 38, 15, 0, 5, 38, 38, 108 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 109 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 110 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 111 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 112 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 113 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 114 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 115 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 116 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 117 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 118 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 119 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 120 | 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 121 | 38, 38, 38, 38, 38, 38, 38 122 | }; 123 | register int hval = len; 124 | 125 | switch (hval) 126 | { 127 | default: 128 | hval += asso_values[(unsigned char)str[1]+1]; 129 | /*FALLTHROUGH*/ 130 | case 1: 131 | hval += asso_values[(unsigned char)str[0]]; 132 | break; 133 | } 134 | return hval; 135 | } 136 | 137 | #ifdef __GNUC__ 138 | __inline 139 | #ifdef __GNUC_STDC_INLINE__ 140 | __attribute__ ((__gnu_inline__)) 141 | #endif 142 | #endif 143 | const char * 144 | find_block_tag (str, len) 145 | register const char *str; 146 | register unsigned int len; 147 | { 148 | enum 149 | { 150 | TOTAL_KEYWORDS = 24, 151 | MIN_WORD_LENGTH = 1, 152 | MAX_WORD_LENGTH = 10, 153 | MIN_HASH_VALUE = 1, 154 | MAX_HASH_VALUE = 37 155 | }; 156 | 157 | static const char * const wordlist[] = 158 | { 159 | "", 160 | "p", 161 | "dl", 162 | "div", 163 | "math", 164 | "table", 165 | "", 166 | "ul", 167 | "del", 168 | "form", 169 | "blockquote", 170 | "figure", 171 | "ol", 172 | "fieldset", 173 | "", 174 | "h1", 175 | "", 176 | "h6", 177 | "pre", 178 | "", "", 179 | "script", 180 | "h5", 181 | "noscript", 182 | "", 183 | "style", 184 | "iframe", 185 | "h4", 186 | "ins", 187 | "", "", "", 188 | "h3", 189 | "", "", "", "", 190 | "h2", 191 | "span" 192 | }; 193 | 194 | if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) 195 | { 196 | register int key = hash_block_tag (str, len); 197 | 198 | if (key <= MAX_HASH_VALUE && key >= 0) 199 | { 200 | register const char *s = wordlist[key]; 201 | 202 | if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0') 203 | return s; 204 | } 205 | } 206 | return 0; 207 | } 208 | -------------------------------------------------------------------------------- /html/html_smartypants.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Vicent Marti 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #include "buffer.h" 18 | #include "html.h" 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #if defined(_WIN32) 26 | #define snprintf _snprintf 27 | #endif 28 | 29 | struct smartypants_data { 30 | int in_squote; 31 | int in_dquote; 32 | }; 33 | 34 | static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 35 | static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 36 | static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 37 | static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 38 | static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 39 | static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 40 | static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 41 | static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 42 | static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 43 | static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); 44 | 45 | static size_t (*smartypants_cb_ptrs[]) 46 | (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) = 47 | { 48 | NULL, /* 0 */ 49 | smartypants_cb__dash, /* 1 */ 50 | smartypants_cb__parens, /* 2 */ 51 | smartypants_cb__squote, /* 3 */ 52 | smartypants_cb__dquote, /* 4 */ 53 | smartypants_cb__amp, /* 5 */ 54 | smartypants_cb__period, /* 6 */ 55 | smartypants_cb__number, /* 7 */ 56 | smartypants_cb__ltag, /* 8 */ 57 | smartypants_cb__backtick, /* 9 */ 58 | smartypants_cb__escape, /* 10 */ 59 | }; 60 | 61 | static const uint8_t smartypants_cb_chars[] = { 62 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64 | 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0, 65 | 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 66 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 68 | 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 71 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78 | }; 79 | 80 | static inline int 81 | word_boundary(uint8_t c) 82 | { 83 | return c == 0 || isspace(c) || ispunct(c); 84 | } 85 | 86 | static int 87 | smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open) 88 | { 89 | char ent[8]; 90 | 91 | if (*is_open && !word_boundary(next_char)) 92 | return 0; 93 | 94 | if (!(*is_open) && !word_boundary(previous_char)) 95 | return 0; 96 | 97 | snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote); 98 | *is_open = !(*is_open); 99 | bufputs(ob, ent); 100 | return 1; 101 | } 102 | 103 | static size_t 104 | smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 105 | { 106 | if (size >= 2) { 107 | uint8_t t1 = tolower(text[1]); 108 | 109 | if (t1 == '\'') { 110 | if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) 111 | return 1; 112 | } 113 | 114 | if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && 115 | (size == 3 || word_boundary(text[2]))) { 116 | BUFPUTSL(ob, "’"); 117 | return 0; 118 | } 119 | 120 | if (size >= 3) { 121 | uint8_t t2 = tolower(text[2]); 122 | 123 | if (((t1 == 'r' && t2 == 'e') || 124 | (t1 == 'l' && t2 == 'l') || 125 | (t1 == 'v' && t2 == 'e')) && 126 | (size == 4 || word_boundary(text[3]))) { 127 | BUFPUTSL(ob, "’"); 128 | return 0; 129 | } 130 | } 131 | } 132 | 133 | if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote)) 134 | return 0; 135 | 136 | bufputc(ob, text[0]); 137 | return 0; 138 | } 139 | 140 | static size_t 141 | smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 142 | { 143 | if (size >= 3) { 144 | uint8_t t1 = tolower(text[1]); 145 | uint8_t t2 = tolower(text[2]); 146 | 147 | if (t1 == 'c' && t2 == ')') { 148 | BUFPUTSL(ob, "©"); 149 | return 2; 150 | } 151 | 152 | if (t1 == 'r' && t2 == ')') { 153 | BUFPUTSL(ob, "®"); 154 | return 2; 155 | } 156 | 157 | if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') { 158 | BUFPUTSL(ob, "™"); 159 | return 3; 160 | } 161 | } 162 | 163 | bufputc(ob, text[0]); 164 | return 0; 165 | } 166 | 167 | static size_t 168 | smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 169 | { 170 | if (size >= 3 && text[1] == '-' && text[2] == '-') { 171 | BUFPUTSL(ob, "—"); 172 | return 2; 173 | } 174 | 175 | if (size >= 2 && text[1] == '-') { 176 | BUFPUTSL(ob, "–"); 177 | return 1; 178 | } 179 | 180 | bufputc(ob, text[0]); 181 | return 0; 182 | } 183 | 184 | static size_t 185 | smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 186 | { 187 | if (size >= 6 && memcmp(text, """, 6) == 0) { 188 | if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote)) 189 | return 5; 190 | } 191 | 192 | if (size >= 4 && memcmp(text, "�", 4) == 0) 193 | return 3; 194 | 195 | bufputc(ob, '&'); 196 | return 0; 197 | } 198 | 199 | static size_t 200 | smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 201 | { 202 | if (size >= 3 && text[1] == '.' && text[2] == '.') { 203 | BUFPUTSL(ob, "…"); 204 | return 2; 205 | } 206 | 207 | if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') { 208 | BUFPUTSL(ob, "…"); 209 | return 4; 210 | } 211 | 212 | bufputc(ob, text[0]); 213 | return 0; 214 | } 215 | 216 | static size_t 217 | smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 218 | { 219 | if (size >= 2 && text[1] == '`') { 220 | if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) 221 | return 1; 222 | } 223 | 224 | return 0; 225 | } 226 | 227 | static size_t 228 | smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 229 | { 230 | if (word_boundary(previous_char) && size >= 3) { 231 | if (text[0] == '1' && text[1] == '/' && text[2] == '2') { 232 | if (size == 3 || word_boundary(text[3])) { 233 | BUFPUTSL(ob, "½"); 234 | return 2; 235 | } 236 | } 237 | 238 | if (text[0] == '1' && text[1] == '/' && text[2] == '4') { 239 | if (size == 3 || word_boundary(text[3]) || 240 | (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) { 241 | BUFPUTSL(ob, "¼"); 242 | return 2; 243 | } 244 | } 245 | 246 | if (text[0] == '3' && text[1] == '/' && text[2] == '4') { 247 | if (size == 3 || word_boundary(text[3]) || 248 | (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) { 249 | BUFPUTSL(ob, "¾"); 250 | return 2; 251 | } 252 | } 253 | } 254 | 255 | bufputc(ob, text[0]); 256 | return 0; 257 | } 258 | 259 | static size_t 260 | smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 261 | { 262 | if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote)) 263 | BUFPUTSL(ob, """); 264 | 265 | return 0; 266 | } 267 | 268 | static size_t 269 | smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 270 | { 271 | static const char *skip_tags[] = { 272 | "pre", "code", "var", "samp", "kbd", "math", "script", "style" 273 | }; 274 | static const size_t skip_tags_count = 8; 275 | 276 | size_t tag, i = 0; 277 | 278 | while (i < size && text[i] != '>') 279 | i++; 280 | 281 | for (tag = 0; tag < skip_tags_count; ++tag) { 282 | if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN) 283 | break; 284 | } 285 | 286 | if (tag < skip_tags_count) { 287 | for (;;) { 288 | while (i < size && text[i] != '<') 289 | i++; 290 | 291 | if (i == size) 292 | break; 293 | 294 | if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE) 295 | break; 296 | 297 | i++; 298 | } 299 | 300 | while (i < size && text[i] != '>') 301 | i++; 302 | } 303 | 304 | bufput(ob, text, i + 1); 305 | return i; 306 | } 307 | 308 | static size_t 309 | smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) 310 | { 311 | if (size < 2) 312 | return 0; 313 | 314 | switch (text[1]) { 315 | case '\\': 316 | case '"': 317 | case '\'': 318 | case '.': 319 | case '-': 320 | case '`': 321 | bufputc(ob, text[1]); 322 | return 1; 323 | 324 | default: 325 | bufputc(ob, '\\'); 326 | return 0; 327 | } 328 | } 329 | 330 | #if 0 331 | static struct { 332 | uint8_t c0; 333 | const uint8_t *pattern; 334 | const uint8_t *entity; 335 | int skip; 336 | } smartypants_subs[] = { 337 | { '\'', "'s>", "’", 0 }, 338 | { '\'', "'t>", "’", 0 }, 339 | { '\'', "'re>", "’", 0 }, 340 | { '\'', "'ll>", "’", 0 }, 341 | { '\'', "'ve>", "’", 0 }, 342 | { '\'', "'m>", "’", 0 }, 343 | { '\'', "'d>", "’", 0 }, 344 | { '-', "--", "—", 1 }, 345 | { '-', "<->", "–", 0 }, 346 | { '.', "...", "…", 2 }, 347 | { '.', ". . .", "…", 4 }, 348 | { '(', "(c)", "©", 2 }, 349 | { '(', "(r)", "®", 2 }, 350 | { '(', "(tm)", "™", 3 }, 351 | { '3', "<3/4>", "¾", 2 }, 352 | { '3', "<3/4ths>", "¾", 2 }, 353 | { '1', "<1/2>", "½", 2 }, 354 | { '1', "<1/4>", "¼", 2 }, 355 | { '1', "<1/4th>", "¼", 2 }, 356 | { '&', "�", 0, 3 }, 357 | }; 358 | #endif 359 | 360 | void 361 | sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size) 362 | { 363 | size_t i; 364 | struct smartypants_data smrt = {0, 0}; 365 | 366 | if (!text) 367 | return; 368 | 369 | bufgrow(ob, size); 370 | 371 | for (i = 0; i < size; ++i) { 372 | size_t org; 373 | uint8_t action = 0; 374 | 375 | org = i; 376 | while (i < size && (action = smartypants_cb_chars[text[i]]) == 0) 377 | i++; 378 | 379 | if (i > org) 380 | bufput(ob, text + org, i - org); 381 | 382 | if (i < size) { 383 | i += smartypants_cb_ptrs[(int)action] 384 | (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i); 385 | } 386 | } 387 | } 388 | 389 | 390 | -------------------------------------------------------------------------------- /src/autolink.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Vicent Marti 3 | * 4 | * Permission to use, copy, modify, and distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #include "buffer.h" 18 | #include "autolink.h" 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #if defined(_WIN32) 26 | #define strncasecmp _strnicmp 27 | #endif 28 | 29 | int 30 | sd_autolink_issafe(const uint8_t *link, size_t link_len) 31 | { 32 | static const size_t valid_uris_count = 14; 33 | static const char *valid_uris[] = { 34 | "http://", "https://", "ftp://", "mailto://", 35 | "/", "git://", "steam://", "irc://", "news://", "mumble://", 36 | "ssh://", "ircs://", "ts3server://", "#" 37 | }; 38 | 39 | size_t i; 40 | 41 | for (i = 0; i < valid_uris_count; ++i) { 42 | size_t len = strlen(valid_uris[i]); 43 | 44 | if (link_len > len && 45 | strncasecmp((char *)link, valid_uris[i], len) == 0 && 46 | (isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?')) 47 | return 1; 48 | } 49 | 50 | return 0; 51 | } 52 | 53 | static size_t 54 | autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size) 55 | { 56 | uint8_t cclose, copen = 0; 57 | size_t i; 58 | 59 | for (i = 0; i < link_end; ++i) 60 | if (data[i] == '<') { 61 | link_end = i; 62 | break; 63 | } 64 | 65 | while (link_end > 0) { 66 | uint8_t c = data[link_end - 1]; 67 | 68 | if (c == 0) 69 | break; 70 | 71 | if (strchr("?!.,", c) != NULL) 72 | link_end--; 73 | 74 | else if (c == ';') { 75 | size_t new_end = link_end - 2; 76 | 77 | while (new_end > 0 && isalpha(data[new_end])) 78 | new_end--; 79 | 80 | if (new_end < link_end - 2 && data[new_end] == '&') 81 | link_end = new_end; 82 | else 83 | link_end--; 84 | } 85 | else break; 86 | } 87 | 88 | if (link_end == 0) 89 | return 0; 90 | 91 | cclose = data[link_end - 1]; 92 | 93 | switch (cclose) { 94 | case '"': copen = '"'; break; 95 | case '\'': copen = '\''; break; 96 | case ')': copen = '('; break; 97 | case ']': copen = '['; break; 98 | case '}': copen = '{'; break; 99 | } 100 | 101 | if (copen != 0) { 102 | size_t closing = 0; 103 | size_t opening = 0; 104 | size_t i = 0; 105 | 106 | /* Try to close the final punctuation sign in this same line; 107 | * if we managed to close it outside of the URL, that means that it's 108 | * not part of the URL. If it closes inside the URL, that means it 109 | * is part of the URL. 110 | * 111 | * Examples: 112 | * 113 | * foo http://www.pokemon.com/Pikachu_(Electric) bar 114 | * => http://www.pokemon.com/Pikachu_(Electric) 115 | * 116 | * foo (http://www.pokemon.com/Pikachu_(Electric)) bar 117 | * => http://www.pokemon.com/Pikachu_(Electric) 118 | * 119 | * foo http://www.pokemon.com/Pikachu_(Electric)) bar 120 | * => http://www.pokemon.com/Pikachu_(Electric)) 121 | * 122 | * (foo http://www.pokemon.com/Pikachu_(Electric)) bar 123 | * => foo http://www.pokemon.com/Pikachu_(Electric) 124 | */ 125 | 126 | while (i < link_end) { 127 | if (data[i] == copen) 128 | opening++; 129 | else if (data[i] == cclose) 130 | closing++; 131 | 132 | i++; 133 | } 134 | 135 | if (closing != opening) 136 | link_end--; 137 | } 138 | 139 | return link_end; 140 | } 141 | 142 | /* 143 | * Checks that `prefix_char` occurs on a word boundary just before `data`, 144 | * where `data` points to the character to search to the left of, and a word boundary 145 | * is (currently) a whitespace character, punctuation, or the start of the string. 146 | * Returns the length of the prefix. 147 | */ 148 | static int 149 | check_reddit_autolink_prefix( 150 | const uint8_t* data, 151 | size_t max_rewind, 152 | size_t max_lookbehind, 153 | size_t size, 154 | char prefix_char 155 | ) 156 | { 157 | /* Make sure this `/` is part of `/?r/` */ 158 | if (size < 2 || max_rewind < 1 || data[-1] != prefix_char) 159 | return 0; 160 | 161 | /* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */ 162 | if (max_rewind > 1) { 163 | const char boundary = data[-2]; 164 | if (boundary == '/') 165 | return 2; 166 | /** 167 | * Here's where our lack of unicode-awareness bites us. We don't correctly 168 | * match punctuation / whitespace characters for the boundary, because we 169 | * reject valid cases like "。r/example" (note the fullwidth period.) 170 | * 171 | * A better implementation might try to rewind over bytes with the 8th bit set, try 172 | * to decode them to a valid codepoint, then do a unicode-aware check on the codepoint. 173 | */ 174 | else if (ispunct(boundary) || isspace(boundary)) 175 | return 1; 176 | else 177 | return 0; 178 | } else if (max_lookbehind > 2) { 179 | /* There's an inline element just left of the `prefix_char`, is it an escaped forward 180 | * slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly 181 | * allow "\\/r/foo". 182 | */ 183 | if (data[-2] == '/' && data[-3] == '\\') 184 | return 0; 185 | } 186 | 187 | /* Must be a new-style shortlink with nothing relevant to the left of it. */ 188 | return 1; 189 | } 190 | 191 | static size_t 192 | check_domain(uint8_t *data, size_t size, int allow_short) 193 | { 194 | size_t i, np = 0; 195 | 196 | if (!isalnum(data[0])) 197 | return 0; 198 | 199 | for (i = 1; i < size - 1; ++i) { 200 | if (data[i] == '.') np++; 201 | else if (!isalnum(data[i]) && data[i] != '-') break; 202 | } 203 | 204 | if (allow_short) { 205 | /* We don't need a valid domain in the strict sense (with 206 | * least one dot; so just make sure it's composed of valid 207 | * domain characters and return the length of the the valid 208 | * sequence. */ 209 | return i; 210 | } else { 211 | /* a valid domain needs to have at least a dot. 212 | * that's as far as we get */ 213 | return np ? i : 0; 214 | } 215 | } 216 | 217 | size_t 218 | sd_autolink__www( 219 | size_t *rewind_p, 220 | struct buf *link, 221 | uint8_t *data, 222 | size_t max_rewind, 223 | size_t size, 224 | unsigned int flags) 225 | { 226 | size_t link_end; 227 | 228 | if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1])) 229 | return 0; 230 | 231 | if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) 232 | return 0; 233 | 234 | link_end = check_domain(data, size, 0); 235 | 236 | if (link_end == 0) 237 | return 0; 238 | 239 | while (link_end < size && !isspace(data[link_end])) 240 | link_end++; 241 | 242 | link_end = autolink_delim(data, link_end, max_rewind, size); 243 | 244 | if (link_end == 0) 245 | return 0; 246 | 247 | bufput(link, data, link_end); 248 | *rewind_p = 0; 249 | 250 | return (int)link_end; 251 | } 252 | 253 | size_t 254 | sd_autolink__email( 255 | size_t *rewind_p, 256 | struct buf *link, 257 | uint8_t *data, 258 | size_t max_rewind, 259 | size_t size, 260 | unsigned int flags) 261 | { 262 | size_t link_end, rewind; 263 | int nb = 0, np = 0; 264 | 265 | for (rewind = 0; rewind < max_rewind; ++rewind) { 266 | uint8_t c = data[-rewind - 1]; 267 | 268 | if (c == 0) 269 | break; 270 | 271 | if (isalnum(c)) 272 | continue; 273 | 274 | if (strchr(".+-_", c) != NULL) 275 | continue; 276 | 277 | break; 278 | } 279 | 280 | if (rewind == 0) 281 | return 0; 282 | 283 | for (link_end = 0; link_end < size; ++link_end) { 284 | uint8_t c = data[link_end]; 285 | 286 | if (isalnum(c)) 287 | continue; 288 | 289 | if (c == '@') 290 | nb++; 291 | else if (c == '.' && link_end < size - 1) 292 | np++; 293 | else if (c != '-' && c != '_') 294 | break; 295 | } 296 | 297 | if (link_end < 2 || nb != 1 || np == 0) 298 | return 0; 299 | 300 | link_end = autolink_delim(data, link_end, max_rewind, size); 301 | 302 | if (link_end == 0) 303 | return 0; 304 | 305 | bufput(link, data - rewind, link_end + rewind); 306 | *rewind_p = rewind; 307 | 308 | return link_end; 309 | } 310 | 311 | size_t 312 | sd_autolink__url( 313 | size_t *rewind_p, 314 | struct buf *link, 315 | uint8_t *data, 316 | size_t max_rewind, 317 | size_t size, 318 | unsigned int flags) 319 | { 320 | size_t link_end, rewind = 0, domain_len; 321 | 322 | if (size < 4 || data[1] != '/' || data[2] != '/') 323 | return 0; 324 | 325 | while (rewind < max_rewind && isalpha(data[-rewind - 1])) 326 | rewind++; 327 | 328 | if (!sd_autolink_issafe(data - rewind, size + rewind)) 329 | return 0; 330 | 331 | link_end = strlen("://"); 332 | 333 | domain_len = check_domain( 334 | data + link_end, 335 | size - link_end, 336 | flags & SD_AUTOLINK_SHORT_DOMAINS); 337 | 338 | if (domain_len == 0) 339 | return 0; 340 | 341 | link_end += domain_len; 342 | while (link_end < size && !isspace(data[link_end])) 343 | link_end++; 344 | 345 | link_end = autolink_delim(data, link_end, max_rewind, size); 346 | 347 | if (link_end == 0) 348 | return 0; 349 | 350 | bufput(link, data - rewind, link_end + rewind); 351 | *rewind_p = rewind; 352 | 353 | return link_end; 354 | } 355 | 356 | size_t 357 | sd_autolink__subreddit( 358 | size_t *rewind_p, 359 | struct buf *link, 360 | uint8_t *data, 361 | size_t max_rewind, 362 | size_t max_lookbehind, 363 | size_t size, 364 | int *no_slash 365 | ) 366 | { 367 | /** 368 | * This is meant to handle both r/foo and /r/foo style subreddit references. 369 | * In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'. 370 | * In pseudo-regex, this matches something like: 371 | * 372 | * `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?` 373 | * where %subreddit% == `((t:)?\w{2,24}|reddit\.com)` 374 | */ 375 | size_t link_end; 376 | size_t rewind; 377 | int is_allminus = 0; 378 | 379 | rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r'); 380 | if (!rewind) 381 | return 0; 382 | 383 | /* offset to the "meat" of the link */ 384 | link_end = strlen("/"); 385 | 386 | if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0) 387 | is_allminus = 1; 388 | 389 | do { 390 | size_t start = link_end; 391 | int max_length = 24; 392 | 393 | /* special case: /r/reddit.com (only subreddit containing '.'). */ 394 | if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) { 395 | link_end += 10; 396 | /* Make sure there are no trailing characters (don't do 397 | * any autolinking for /r/reddit.commission) */ 398 | max_length = 10; 399 | } 400 | 401 | /* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */ 402 | else { 403 | /* support autolinking to timereddits, /r/t:when (1 April 2012) */ 404 | if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 ) 405 | link_end += 2; /* Jump over the 't:' */ 406 | 407 | /* the first character of a subreddit name must be a letter or digit */ 408 | if (!isalnum(data[link_end])) 409 | return 0; 410 | link_end += 1; 411 | } 412 | 413 | /* consume valid characters ([A-Za-z0-9_]) until we run out */ 414 | while (link_end < size && (isalnum(data[link_end]) || 415 | data[link_end] == '_')) 416 | link_end++; 417 | 418 | /* valid subreddit names are between 3 and 21 characters, with 419 | * some subreddits having 2-character names. Don't bother with 420 | * autolinking for anything outside this length range. 421 | * (chksrname function in reddit/.../validator.py) */ 422 | if ( link_end-start < 2 || link_end-start > max_length ) 423 | return 0; 424 | 425 | /* If we are linking to a multireddit, continue */ 426 | } while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ ); 427 | 428 | if (link_end < size && data[link_end] == '/') { 429 | while (link_end < size && (isalnum(data[link_end]) || 430 | data[link_end] == '_' || 431 | data[link_end] == '/' || 432 | data[link_end] == '-')) 433 | link_end++; 434 | } 435 | 436 | /* make the link */ 437 | bufput(link, data - rewind, link_end + rewind); 438 | 439 | *no_slash = (rewind == 1); 440 | *rewind_p = rewind; 441 | 442 | return link_end; 443 | } 444 | 445 | size_t 446 | sd_autolink__username( 447 | size_t *rewind_p, 448 | struct buf *link, 449 | uint8_t *data, 450 | size_t max_rewind, 451 | size_t max_lookbehind, 452 | size_t size, 453 | int *no_slash 454 | ) 455 | { 456 | size_t link_end; 457 | size_t rewind; 458 | 459 | if (size < 3) 460 | return 0; 461 | 462 | rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u'); 463 | if (!rewind) 464 | return 0; 465 | 466 | link_end = strlen("/"); 467 | 468 | /* the first letter of a username must... well, be valid, we don't care otherwise */ 469 | if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-') 470 | return 0; 471 | link_end += 1; 472 | 473 | /* consume valid characters ([A-Za-z0-9_-/]) until we run out */ 474 | while (link_end < size && (isalnum(data[link_end]) || 475 | data[link_end] == '_' || 476 | data[link_end] == '/' || 477 | data[link_end] == '-')) 478 | link_end++; 479 | 480 | /* make the link */ 481 | bufput(link, data - rewind, link_end + rewind); 482 | 483 | *no_slash = (rewind == 1); 484 | *rewind_p = rewind; 485 | 486 | return link_end; 487 | } 488 | -------------------------------------------------------------------------------- /test_snudown.js: -------------------------------------------------------------------------------- 1 | import * as Snudown from 'snudown-js'; 2 | 3 | // http://ecmanaut.blogspot.ca/2006/07/encoding-decoding-utf8-in-javascript.html 4 | function encodeUTF8(s) { 5 | return unescape(encodeURIComponent(s)); 6 | } 7 | 8 | var cases = { 9 | '': '', 10 | 'http://www.reddit.com': 11 | '

    http://www.reddit.com

    \n', 12 | 13 | 'http://www.reddit.com/a\x00b': 14 | '

    http://www.reddit.com/ab

    \n', 15 | 16 | 'foo@example.com': 17 | '

    foo@example.com

    \n', 18 | 19 | '[foo](http://en.wikipedia.org/wiki/Link_(film\\))': 20 | '

    foo

    \n', 21 | 22 | '(http://tsfr.org)': 23 | '

    (http://tsfr.org)

    \n', 24 | 25 | '[A link with a /r/subreddit in it](/lol)': 26 | '

    A link with a /r/subreddit in it

    \n', 27 | 28 | '[A link with a http://www.url.com in it](/lol)': 29 | '

    A link with a http://www.url.com in it

    \n', 30 | 31 | '[Empty Link]()': 32 | '

    [Empty Link]()

    \n', 33 | 34 | 'http://en.wikipedia.org/wiki/café_racer': 35 | '

    http://en.wikipedia.org/wiki/café_racer

    \n', 36 | 37 | '#####################################################hi': 38 | '
    ###############################################hi
    \n', 39 | 40 | '[foo](http://bar\nbar)': 41 | '

    foo

    \n', 42 | 43 | '/r/test': 44 | '

    /r/test

    \n', 45 | 46 | 'Words words /r/test words': 47 | '

    Words words /r/test words

    \n', 48 | 49 | '/r/': 50 | '

    /r/

    \n', 51 | 52 | 'escaped \\/r/test': 53 | '

    escaped /r/test

    \n', 54 | 55 | 'ampersands http://www.google.com?test&blah': 56 | '

    ampersands http://www.google.com?test&blah

    \n', 57 | 58 | '[_regular_ link with nesting](/test)': 59 | '

    regular link with nesting

    \n', 60 | 61 | ' www.a.co?with&test': 62 | '

    www.a.co?with&test

    \n', 63 | 64 | 'Normal^superscript': 65 | '

    Normalsuperscript

    \n', 66 | 67 | 'Escape\\^superscript': 68 | '

    Escape^superscript

    \n', 69 | 70 | '~~normal strikethrough~~': 71 | '

    normal strikethrough

    \n', 72 | 73 | '\\~~escaped strikethrough~~': 74 | '

    ~~escaped strikethrough~~

    \n', 75 | 76 | 'anywhere\x03, you': 77 | '

    anywhere, you

    \n', 78 | 79 | '[Test](//test)': 80 | '

    Test

    \n', 81 | 82 | '[Test](//#test)': 83 | '

    Test

    \n', 84 | 85 | '[Test](#test)': 86 | '

    Test

    \n', 87 | 88 | '[Test](git://github.com)': 89 | '

    Test

    \n', 90 | 91 | '[Speculation](//?)': 92 | '

    Speculation

    \n', 93 | 94 | '/r/sr_with_underscores': 95 | '

    /r/sr_with_underscores

    \n', 96 | 97 | '[Test](///#test)': 98 | '

    Test

    \n', 99 | 100 | '/r/multireddit+test+yay': 101 | '

    /r/multireddit+test+yay

    \n', 102 | 103 | '': 104 | '

    <test>

    \n', 105 | 106 | 'words_with_underscores': 107 | '

    words_with_underscores

    \n', 108 | 109 | 'words*with*asterisks': 110 | '

    wordswithasterisks

    \n', 111 | 112 | '~test': 113 | '

    ~test

    \n', 114 | 115 | '/u/test': 116 | '

    /u/test

    \n', 117 | 118 | '/u/test/m/test test': 119 | '

    /u/test/m/test test

    \n', 120 | 121 | '/U/nope': 122 | '

    /U/nope

    \n', 123 | 124 | '/r/test/m/test test': 125 | '

    /r/test/m/test test

    \n', 126 | 127 | '/r/test/w/test test': 128 | '

    /r/test/w/test test

    \n', 129 | 130 | '/r/test/comments/test test': 131 | '

    /r/test/comments/test test

    \n', 132 | 133 | '/u/test/commentscommentscommentscommentscommentscommentscomments/test test': 134 | '

    /u/test/commentscommentscommentscommentscommentscommentscomments/test test

    \n', 135 | 136 | 'a /u/reddit': 137 | '

    a /u/reddit

    \n', 138 | 139 | 'u/reddit': 140 | '

    u/reddit

    \n', 141 | 142 | 'a u/reddit': 143 | '

    a u/reddit

    \n', 144 | 145 | 'a u/reddit/foobaz': 146 | '

    a u/reddit/foobaz

    \n', 147 | 148 | 'foo:u/reddit': 149 | '

    foo:u/reddit

    \n', 150 | 151 | 'fuu/reddit': 152 | '

    fuu/reddit

    \n', 153 | 154 | /*# Don't treat unicode punctuation as a word boundary for now 155 | [encodeUTF8('a。u/reddit')]: 156 | encodeUTF8('

    a。u/reddit

    \n'),*/ 157 | 158 | '\\/u/me': 159 | '

    /u/me

    \n', 160 | 161 | '\\\\/u/me': 162 | '

    \\/u/me

    \n', 163 | 164 | '\\u/me': 165 | '

    \\u/me

    \n', 166 | 167 | '\\\\u/me': 168 | '

    \\u/me

    \n', 169 | 170 | 'u\\/me': 171 | '

    u/me

    \n', 172 | 173 | '*u/me*': 174 | '

    u/me

    \n', 175 | 176 | 'foo^u/me': 177 | '

    foou/me

    \n', 178 | 179 | '*foo*u/me': 180 | '

    foou/me

    \n', 181 | 182 | 'u/me': 183 | '

    u/me

    \n', 184 | 185 | '/u/me': 186 | '

    /u/me

    \n', 187 | 188 | 'u/m': 189 | '

    u/m

    \n', 190 | 191 | '/u/m': 192 | '

    /u/m

    \n', 193 | 194 | '/f/oobar': 195 | '

    /f/oobar

    \n', 196 | 197 | 'f/oobar': 198 | '

    f/oobar

    \n', 199 | 200 | '/r/test/commentscommentscommentscommentscommentscommentscomments/test test': 201 | '

    /r/test/commentscommentscommentscommentscommentscommentscomments/test test

    \n', 202 | 203 | 'blah \\': 204 | '

    blah \\

    \n', 205 | 206 | '/r/whatever: fork': 207 | '

    /r/whatever: fork

    \n', 208 | 209 | '/r/t:timereddit': 210 | '

    /r/t:timereddit

    \n', 211 | 212 | '/r/reddit.com': 213 | '

    /r/reddit.com

    \n', 214 | 215 | '/r/not.cool': 216 | '

    /r/not.cool

    \n', 217 | 218 | '/r/very+clever+multireddit+reddit.com+t:fork+yay': 219 | '

    /r/very+clever+multireddit+reddit.com+t:fork+yay

    \n', 220 | 221 | '/r/t:heatdeathoftheuniverse': 222 | '

    /r/t:heatdeathoftheuniverse

    \n', 223 | 224 | '/r/all-minus-something': 225 | '

    /r/all-minus-something

    \n', 226 | 227 | '/r/notall-minus': 228 | '

    /r/notall-minus

    \n', 229 | 230 | 'a /r/reddit.com': 231 | '

    a /r/reddit.com

    \n', 232 | 233 | 'a r/reddit.com': 234 | '

    a r/reddit.com

    \n', 235 | 236 | 'foo:r/reddit.com': 237 | '

    foo:r/reddit.com

    \n', 238 | 239 | 'foobar/reddit.com': 240 | '

    foobar/reddit.com

    \n', 241 | 242 | /*[encodeUTF8('a。r/reddit.com')]: 243 | encodeUTF8('

    a。r/reddit.com

    \n'),*/ 244 | 245 | '/R/reddit.com': 246 | '

    /R/reddit.com

    \n', 247 | 248 | '/r/irc://foo.bar/': 249 | '

    /r/irc://foo.bar/

    \n', 250 | 251 | '/r/t:irc//foo.bar/': 252 | '

    /r/t:irc//foo.bar/

    \n', 253 | 254 | '/r/all-irc://foo.bar/': 255 | '

    /r/all-irc://foo.bar/

    \n', 256 | 257 | '/r/foo+irc://foo.bar/': 258 | '

    /r/foo+irc://foo.bar/

    \n', 259 | 260 | '/r/www.example.com': 261 | '

    /r/www.example.com

    \n', 262 | 263 | '.http://reddit.com': 264 | '

    .http://reddit.com

    \n', 265 | 266 | '[r://](/aa)': 267 | '

    r://http://reddit.com/

    \n', 268 | 269 | '/u/http://www.reddit.com/user/reddit': 270 | '

    /u/http://www.reddit.com/user/reddit

    \n', 271 | 272 | 'www.http://example.com/': 273 | '

    www.http://example.com/

    \n', 274 | 275 | /*['|'.repeat(5) + '\n' + '-|'.repeat(5) + '\n|\n']: 276 | '
  • \n\n' + '\n'.repeat(4) + '\n\n\n\n\n
    \n', 277 | 278 | ['|'.repeat(2) + '\n' + '-|'.repeat(2) + '\n|\n']: 279 | '\n\n' + '\n'.repeat(1) + '\n\n\n\n\n
    \n', 280 | 281 | ['|'.repeat(65) + '\n' + '-|'.repeat(65) + '\n|\n']: 282 | '\n\n' + '\n'.repeat(64) + '\n\n\n\n\n
    \n', 283 | 284 | ['|'.repeat(66) + '\n' + '-|'.repeat(66) + '\n|\n']: 285 | '

    ' + '|'.repeat(66) + '\n' + '-|'.repeat(66) + '\n|' + '

    \n',*/ 286 | 287 | 'ϑ': 288 | '

    ϑ

    \n', 289 | 290 | '&foobar;': 291 | '

    &foobar;

    \n', 292 | 293 | ' ': 294 | '

    &nbsp

    \n', 295 | 296 | '&#foobar;': 297 | '

    &#foobar;

    \n', 298 | 299 | 'oobar;': 300 | '

    &#xfoobar;

    \n', 301 | 302 | '�': 303 | '

    &#9999999999;

    \n', 304 | 305 | 'c': 306 | '

    c

    \n', 307 | 308 | '~': 309 | '

    ~

    \n', 310 | 311 | '~': 312 | '

    ~

    \n', 313 | 314 | '½': 315 | '

    ½

    \n', 316 | 317 | 'aaa½aaa': 318 | '

    aaa½aaa

    \n', 319 | 320 | '&': 321 | '

    &

    \n', 322 | 323 | '&;': 324 | '

    &;

    \n', 325 | 326 | '&#;': 327 | '

    &#;

    \n', 328 | 329 | '&#;': 330 | '

    &#;

    \n', 331 | 332 | '&#x;': 333 | '

    &#x;

    \n', 334 | '> quotey mcquoteface': 335 | '
    \n

    quotey mcquoteface

    \n
    \n', 336 | 337 | '> quotey mcquoteface\nnew line of text what happens?': 338 | '
    \n

    quotey mcquoteface\nnew line of text what happens?

    \n
    \n', 339 | 340 | '> quotey mcquoteface\n\ntwo new lines then text what happens?': 341 | '
    \n

    quotey mcquoteface

    \n
    \n\n

    two new lines then text what happens?

    \n', 342 | 343 | '> quotey mcquoteface\n> more quotey': 344 | '
    \n

    quotey mcquoteface\nmore quotey

    \n
    \n', 345 | 346 | '> quotey macquoteface\n\n> another quotey': 347 | '
    \n

    quotey macquoteface

    \n\n

    another quotey

    \n
    \n', 348 | 349 | '>! spoily mcspoilerface': 350 | '
    \n

    spoily mcspoilerface

    \n
    \n', 351 | 352 | '>! spoily mcspoilerface\nmore spoilage goes here': 353 | '
    \n

    spoily mcspoilerface\nmore spoilage goes here

    \n
    \n', 354 | 355 | '>! spoily mcspoilerface > incorrect quote syntax': 356 | '
    \n

    spoily mcspoilerface > incorrect quote syntax

    \n
    \n', 357 | 358 | '>! spoily mcspoilerface\n\n': 359 | '
    \n

    spoily mcspoilerface

    \n
    \n', 360 | 361 | '>! spoily mcspoilerface\n\nnormal text here': 362 | '
    \n

    spoily mcspoilerface

    \n
    \n\n

    normal text here

    \n', 363 | 364 | '>! spoily mcspoilerface\n>! blockspoiler continuation': 365 | '
    \n

    spoily mcspoilerface\nblockspoiler continuation

    \n
    \n', 366 | 367 | '>! spoily mcspoilerface\n> quotey mcquoteface': 368 | '
    \n

    spoily mcspoilerface

    \n\n
    \n

    quotey mcquoteface

    \n
    \n
    \n', 369 | 370 | '>! spoiler p1\n>!\n>! spoiler p2\n>! spoiler p3': 371 | '
    \n

    spoiler p1

    \n\n

    spoiler p2\nspoiler p3

    \n
    \n', 372 | 373 | '>>! spoiler p1\n>!\n>! spoiler p2\n>! spoiler p3': 374 | '
    \n
    \n

    spoiler p1

    \n\n

    spoiler p2\nspoiler p3

    \n
    \n
    \n', 375 | 376 | '>>! spoiler p1\n>!\n>! spoiler p2\n\nnew text': 377 | '
    \n
    \n

    spoiler p1

    \n\n

    spoiler p2

    \n
    \n
    \n\n

    new text

    \n', 378 | 379 | '>>! spoiler p1\n>!\n>! spoiler p2\n\n>! new blockspoiler': 380 | '
    \n
    \n

    spoiler p1

    \n\n

    spoiler p2

    \n
    \n
    \n\n
    \n

    new blockspoiler

    \n
    \n', 381 | 382 | '! this is not a spoiler': 383 | '

    ! this is not a spoiler

    \n', 384 | 385 | '>!\nTesting': 386 | '
    \n

    Testing

    \n
    \n', 387 | 388 | '>!\n\nTesting': 389 | '
    \n
    \n\n

    Testing

    \n', 390 | 391 | '>!': 392 | '
    \n
    \n', 393 | '>!\n>!': 394 | '
    \n
    \n', 395 | '>': 396 | '
    \n
    \n', 397 | '> some quote goes here\n>': 398 | '
    \n

    some quote goes here

    \n
    \n', 399 | 'This is an >!inline spoiler!< sentence.': 400 | '

    This is an inline spoiler sentence.

    \n', 401 | '>!Inline spoiler!< starting the sentence': 402 | '

    Inline spoiler starting the sentence

    \n', 403 | 'Inline >!spoiler with *emphasis*!< test': 404 | '

    Inline spoiler with emphasis test

    \n', 405 | '>! This is an illegal blockspoiler >!with an inline spoiler!<': 406 | '

    >! This is an illegal blockspoiler with an inline spoiler

    \n', 407 | 'This is an >!inline spoiler with some >!additional!< text!<': 408 | '

    This is an inline spoiler with some >!additional text!<

    \n' 409 | }; 410 | 411 | // Older node versions don't support computed property names 412 | 413 | function repeat(str, n) { 414 | return new Array(n + 1).join(str); 415 | } 416 | 417 | 418 | cases[encodeUTF8('a。u/reddit')] = encodeUTF8('

    a。u/reddit

    \n'); 419 | 420 | cases[encodeUTF8('a。r/reddit.com')] = encodeUTF8('

    a。r/reddit.com

    \n'); 421 | 422 | cases[repeat('|', 5) + '\n' + repeat('-|', 5) + '\n|\n'] = '\n\n' + repeat('\n', 4) + '\n\n\n\n\n
    \n'; 423 | cases[repeat('|', 2) + '\n' + repeat('-|', 2) + '\n|\n'] = '\n\n' + repeat('\n', 1) + '\n\n\n\n\n
    \n'; 424 | cases[repeat('|', 65) + '\n' + repeat('-|', 65) + '\n|\n'] = '\n\n' + repeat('\n', 64) + '\n\n\n\n\n
    \n'; 425 | cases[repeat('|', 66) + '\n' + repeat('-|', 66) + '\n|\n'] = '

    ' + repeat('|', 66) + '\n' + repeat('-|', 66) + '\n|' + '

    \n'; 426 | 427 | function* xrange(start, end) { 428 | if (end == undefined) { 429 | end = start; 430 | start = 0; 431 | } 432 | for (var i = start; i < end; i++) { 433 | yield i; 434 | } 435 | } 436 | 437 | function* chain(...iterables) { 438 | for (var iter of iterables) { 439 | yield* iter; 440 | } 441 | } 442 | 443 | // Test that every numeric entity is encoded as 444 | // it should be. 445 | var ILLEGAL_NUMERIC_ENTS = new Set(chain( 446 | xrange(0, 9), 447 | xrange(11, 13), 448 | xrange(14, 32), 449 | xrange(55296, 57344), 450 | xrange(65534, 65536) 451 | )); 452 | 453 | var ent_test_key = ''; 454 | var ent_test_val = ''; 455 | for (const i of xrange(65550)) { 456 | var ent_testcase = '&#' + i + ';&#x' + i.toString(16) + ';'; 457 | ent_test_key += ent_testcase; 458 | if (ILLEGAL_NUMERIC_ENTS.has(i)) 459 | ent_test_val += ent_testcase.replace(/&/g, '&'); 460 | else 461 | ent_test_val += ent_testcase; 462 | } 463 | 464 | cases[ent_test_key] = '

    ' + ent_test_val + '

    \n'; 465 | 466 | var wiki_cases = { 467 | '': 468 | '

    \n', 469 | 470 | '
    ': 471 | '

    \n', 472 | 473 | '
    ': 474 | '

    \n', 475 | 476 | '
    ': 477 | '

    \n', 478 | 479 | '
    ': 480 | '

    \n', 481 | 482 | '
    ': 483 | '

    \n', 484 | 485 | '
    ': 486 | '

    \n', 487 | 488 | '
    ': 489 | '

    \n', 490 | 491 | '
    ': 492 | '

    \n', 493 | 494 | '
    ': 495 | '

    \n', 496 | 497 | '
    ': 498 | '

    \n', 499 | 500 | '
    ': 501 | '

    \n', 502 | 503 | '
    ': 504 | '

    \n', 505 | 506 | '
    ': 507 | '

    \n', 508 | 509 | '
    ': 510 | '

    \n', 511 | 512 | '
    ': 513 | '

    \n', 514 | 515 | '
    ': 516 | '

    \n', 517 | 518 | '
    ': 519 | '

    \n', 520 | 521 | '
    ': 522 | '

    \n', 523 | 524 | '
    ': 525 | '

    \n', 526 | 527 | '
    ': 528 | '

    \n', 529 | }; 530 | 531 | var start = Date.now(); 532 | 533 | function runTest(fn, input, expected_output) { 534 | var output = fn.apply(null, input); 535 | if (output !== expected_output) 536 | throw new Error( 537 | "TEST FAILED:" + 538 | "\n input: " + input[0] + 539 | "\n expected: " + expected_output + 540 | "\n actual: " + output 541 | ); 542 | } 543 | 544 | for (var input in wiki_cases) { 545 | runTest(Snudown.markdownWiki, [input], wiki_cases[input]); 546 | } 547 | 548 | for (var input in cases) { 549 | runTest(Snudown.markdown, [input], cases[input]); 550 | } 551 | 552 | [[ 553 | Snudown.markdown, 554 | ['/u/test', { nofollow: true, target: '_top' }], 555 | '

    /u/test

    \n' 556 | ], [ 557 | Snudown.markdownWiki, 558 | ['
    ', { nofollow: null, target: null }], 559 | '

    \n', 560 | ], [ 561 | Snudown.markdown, 562 | ['
    ', { nofollow: null, target: null }], 563 | '

    <table scope="foo">

    \n' 564 | ], [ 565 | Snudown.markdown, 566 | ['###Test', { enableToc: true, tocIdPrefix: 'prefixed_' }], 567 | '
    \n\n
    \n\n

    Test

    \n' 568 | ], [ 569 | // undefined text 570 | Snudown.markdown, 571 | [], 572 | '' 573 | ], [ 574 | // null text 575 | Snudown.markdown, 576 | [null], 577 | '' 578 | ], [ 579 | // undefined text 580 | Snudown.markdownWiki, 581 | [], 582 | '', 583 | ], [ 584 | // null text 585 | Snudown.markdownWiki, 586 | [null], 587 | '', 588 | ], [ 589 | // all named arguments 590 | Snudown.markdown, 591 | ['###Test\n
    \n/u/test', { nofollow: true, target: '_top', enableToc: true, tocIdPrefix: 'prefixed_' }], 592 | '
    \n\n
    \n\n

    Test

    \n\n

    <table scope="foo">\n/u/test

    \n' 593 | ], [ 594 | // all named arguments 595 | Snudown.markdownWiki, 596 | ['###Test\n
    \n/u/test', { nofollow: true, target: '_top', enableToc: true, tocIdPrefix: 'prefixed_' }], 597 | '
    \n\n
    \n\n

    Test

    \n\n

    \n/u/test

    \n', 598 | ]].forEach(function(testArgs) { 599 | runTest.apply(null, testArgs); 600 | }); 601 | 602 | var elapsed = Date.now() - start; 603 | console.log('Test Passed:', elapsed, 'ms'); 604 | -------------------------------------------------------------------------------- /html/html.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009, Natacha Porté 3 | * Copyright (c) 2011, Vicent Marti 4 | * 5 | * Permission to use, copy, modify, and distribute this software for any 6 | * purpose with or without fee is hereby granted, provided that the above 7 | * copyright notice and this permission notice appear in all copies. 8 | * 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | */ 17 | 18 | #include "markdown.h" 19 | #include "html.h" 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "houdini.h" 28 | 29 | #define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML) 30 | 31 | int 32 | sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname) 33 | { 34 | size_t i; 35 | int closed = 0; 36 | 37 | if (tag_size < 3 || tag_data[0] != '<') 38 | return HTML_TAG_NONE; 39 | 40 | i = 1; 41 | 42 | if (tag_data[i] == '/') { 43 | closed = 1; 44 | i++; 45 | } 46 | 47 | for (; i < tag_size; ++i, ++tagname) { 48 | if (*tagname == 0) 49 | break; 50 | 51 | if (tag_data[i] != *tagname) 52 | return HTML_TAG_NONE; 53 | } 54 | 55 | if (i == tag_size) 56 | return HTML_TAG_NONE; 57 | 58 | if (isspace(tag_data[i]) || tag_data[i] == '>') 59 | return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN; 60 | 61 | return HTML_TAG_NONE; 62 | } 63 | 64 | static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length) 65 | { 66 | houdini_escape_html0(ob, source, length, 0); 67 | } 68 | 69 | static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length) 70 | { 71 | houdini_escape_href(ob, source, length); 72 | } 73 | 74 | /******************** 75 | * GENERIC RENDERER * 76 | ********************/ 77 | static int 78 | rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque) 79 | { 80 | struct html_renderopt *options = opaque; 81 | uint8_t offset = 0; 82 | 83 | if (!link || !link->size) 84 | return 0; 85 | 86 | if ((options->flags & HTML_SAFELINK) != 0 && 87 | !sd_autolink_issafe(link->data, link->size) && 88 | type != MKDA_EMAIL) 89 | return 0; 90 | 91 | BUFPUTSL(ob, "data + offset, link->size - offset); 95 | 96 | if (options->link_attributes) { 97 | bufputc(ob, '\"'); 98 | options->link_attributes(ob, link, opaque); 99 | bufputc(ob, '>'); 100 | } else { 101 | BUFPUTSL(ob, "\">"); 102 | } 103 | 104 | /* 105 | * Pretty printing: if we get an email address as 106 | * an actual URI, e.g. `mailto:foo@bar.com`, we don't 107 | * want to print the `mailto:` prefix 108 | */ 109 | if (bufprefix(link, "mailto:") == 0) { 110 | escape_html(ob, link->data + 7, link->size - 7); 111 | } else { 112 | escape_html(ob, link->data, link->size); 113 | } 114 | 115 | BUFPUTSL(ob, ""); 116 | 117 | return 1; 118 | } 119 | 120 | static void 121 | rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque) 122 | { 123 | if (ob->size) bufputc(ob, '\n'); 124 | 125 | if (lang && lang->size) { 126 | size_t i, cls; 127 | BUFPUTSL(ob, "
    size; ++i, ++cls) {
    130 | 			while (i < lang->size && isspace(lang->data[i]))
    131 | 				i++;
    132 | 
    133 | 			if (i < lang->size) {
    134 | 				size_t org = i;
    135 | 				while (i < lang->size && !isspace(lang->data[i]))
    136 | 					i++;
    137 | 
    138 | 				if (lang->data[org] == '.')
    139 | 					org++;
    140 | 
    141 | 				if (cls) bufputc(ob, ' ');
    142 | 				escape_html(ob, lang->data + org, i - org);
    143 | 			}
    144 | 		}
    145 | 
    146 | 		BUFPUTSL(ob, "\">");
    147 | 	} else
    148 | 		BUFPUTSL(ob, "
    ");
    149 | 
    150 | 	if (text)
    151 | 		escape_html(ob, text->data, text->size);
    152 | 
    153 | 	BUFPUTSL(ob, "
    \n"); 154 | } 155 | 156 | static void 157 | rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque) 158 | { 159 | if (ob->size) bufputc(ob, '\n'); 160 | BUFPUTSL(ob, "
    \n"); 161 | if (text) bufput(ob, text->data, text->size); 162 | BUFPUTSL(ob, "
    \n"); 163 | } 164 | 165 | static void 166 | rndr_blockspoiler(struct buf *ob, const struct buf *text, void *opaque) 167 | { 168 | if (ob->size) bufputc(ob, '\n'); 169 | BUFPUTSL(ob, "
    \n"); 170 | if (text) bufput(ob, text->data, text->size); 171 | BUFPUTSL(ob, "
    \n"); 172 | } 173 | 174 | static int 175 | rndr_codespan(struct buf *ob, const struct buf *text, void *opaque) 176 | { 177 | BUFPUTSL(ob, ""); 178 | if (text) escape_html(ob, text->data, text->size); 179 | BUFPUTSL(ob, ""); 180 | return 1; 181 | } 182 | 183 | static int 184 | rndr_spoilerspan(struct buf *ob, const struct buf *text, void *opaque) 185 | { 186 | if (!text || !text->size) 187 | return 0; 188 | 189 | BUFPUTSL(ob, ""); 190 | bufput(ob, text->data, text->size); 191 | BUFPUTSL(ob, ""); 192 | 193 | return 1; 194 | } 195 | 196 | static int 197 | rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque) 198 | { 199 | if (!text || !text->size) 200 | return 0; 201 | 202 | BUFPUTSL(ob, ""); 203 | bufput(ob, text->data, text->size); 204 | BUFPUTSL(ob, ""); 205 | return 1; 206 | } 207 | 208 | static int 209 | rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque) 210 | { 211 | if (!text || !text->size) 212 | return 0; 213 | 214 | BUFPUTSL(ob, ""); 215 | bufput(ob, text->data, text->size); 216 | BUFPUTSL(ob, ""); 217 | 218 | return 1; 219 | } 220 | 221 | static int 222 | rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque) 223 | { 224 | if (!text || !text->size) return 0; 225 | BUFPUTSL(ob, ""); 226 | if (text) bufput(ob, text->data, text->size); 227 | BUFPUTSL(ob, ""); 228 | return 1; 229 | } 230 | 231 | static int 232 | rndr_linebreak(struct buf *ob, void *opaque) 233 | { 234 | struct html_renderopt *options = opaque; 235 | bufputs(ob, USE_XHTML(options) ? "
    \n" : "
    \n"); 236 | return 1; 237 | } 238 | 239 | static void 240 | rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque) 241 | { 242 | struct html_renderopt *options = opaque; 243 | 244 | if (ob->size) 245 | bufputc(ob, '\n'); 246 | 247 | if (options->flags & HTML_TOC) { 248 | BUFPUTSL(ob, "toc_id_prefix) { 252 | bufputs(ob, options->toc_id_prefix); 253 | } 254 | BUFPUTSL(ob, "toc_"); 255 | bufputi(ob, options->toc_data.header_count++); 256 | BUFPUTSL(ob, "\">"); 257 | } else { 258 | BUFPUTSL(ob, ""); 261 | } 262 | 263 | if (text) bufput(ob, text->data, text->size); 264 | BUFPUTSL(ob, "\n"); 267 | } 268 | 269 | static int 270 | rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque) 271 | { 272 | struct html_renderopt *options = opaque; 273 | 274 | if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size)) 275 | return 0; 276 | 277 | BUFPUTSL(ob, "size) 280 | escape_href(ob, link->data, link->size); 281 | 282 | if (title && title->size) { 283 | BUFPUTSL(ob, "\" title=\""); 284 | escape_html(ob, title->data, title->size); 285 | } 286 | 287 | if (options->link_attributes) { 288 | bufputc(ob, '\"'); 289 | options->link_attributes(ob, link, opaque); 290 | bufputc(ob, '>'); 291 | } else { 292 | BUFPUTSL(ob, "\">"); 293 | } 294 | 295 | if (content && content->size) bufput(ob, content->data, content->size); 296 | BUFPUTSL(ob, ""); 297 | return 1; 298 | } 299 | 300 | static void 301 | rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque) 302 | { 303 | if (ob->size) bufputc(ob, '\n'); 304 | bufput(ob, flags & MKD_LIST_ORDERED ? "
      \n" : "
        \n", 5); 305 | if (text) bufput(ob, text->data, text->size); 306 | bufput(ob, flags & MKD_LIST_ORDERED ? "
    \n" : "\n", 6); 307 | } 308 | 309 | static void 310 | rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque) 311 | { 312 | BUFPUTSL(ob, "
  • "); 313 | if (text) { 314 | size_t size = text->size; 315 | while (size && text->data[size - 1] == '\n') 316 | size--; 317 | 318 | bufput(ob, text->data, size); 319 | } 320 | BUFPUTSL(ob, "
  • \n"); 321 | } 322 | 323 | static void 324 | rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque) 325 | { 326 | struct html_renderopt *options = opaque; 327 | size_t i = 0; 328 | 329 | if (ob->size) bufputc(ob, '\n'); 330 | 331 | if (!text || !text->size) 332 | return; 333 | 334 | while (i < text->size && isspace(text->data[i])) i++; 335 | 336 | if (i == text->size) 337 | return; 338 | 339 | BUFPUTSL(ob, "

    "); 340 | if (options->flags & HTML_HARD_WRAP) { 341 | size_t org; 342 | while (i < text->size) { 343 | org = i; 344 | while (i < text->size && text->data[i] != '\n') 345 | i++; 346 | 347 | if (i > org) 348 | bufput(ob, text->data + org, i - org); 349 | 350 | /* 351 | * do not insert a line break if this newline 352 | * is the last character on the paragraph 353 | */ 354 | if (i >= text->size - 1) 355 | break; 356 | 357 | rndr_linebreak(ob, opaque); 358 | i++; 359 | } 360 | } else { 361 | bufput(ob, &text->data[i], text->size - i); 362 | } 363 | BUFPUTSL(ob, "

    \n"); 364 | } 365 | 366 | static void 367 | rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque) 368 | { 369 | size_t org, sz; 370 | if (!text) return; 371 | sz = text->size; 372 | while (sz > 0 && text->data[sz - 1] == '\n') sz--; 373 | org = 0; 374 | while (org < sz && text->data[org] == '\n') org++; 375 | if (org >= sz) return; 376 | if (ob->size) bufputc(ob, '\n'); 377 | bufput(ob, text->data + org, sz - org); 378 | bufputc(ob, '\n'); 379 | } 380 | 381 | static int 382 | rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque) 383 | { 384 | if (!text || !text->size) return 0; 385 | BUFPUTSL(ob, ""); 386 | bufput(ob, text->data, text->size); 387 | BUFPUTSL(ob, ""); 388 | return 1; 389 | } 390 | 391 | static void 392 | rndr_hrule(struct buf *ob, void *opaque) 393 | { 394 | struct html_renderopt *options = opaque; 395 | if (ob->size) bufputc(ob, '\n'); 396 | bufputs(ob, USE_XHTML(options) ? "
    \n" : "
    \n"); 397 | } 398 | 399 | static int 400 | rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque) 401 | { 402 | struct html_renderopt *options = opaque; 403 | if (!link || !link->size) return 0; 404 | 405 | BUFPUTSL(ob, "data, link->size); 407 | BUFPUTSL(ob, "\" alt=\""); 408 | 409 | if (alt && alt->size) 410 | escape_html(ob, alt->data, alt->size); 411 | 412 | if (title && title->size) { 413 | BUFPUTSL(ob, "\" title=\""); 414 | escape_html(ob, title->data, title->size); } 415 | 416 | bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">"); 417 | return 1; 418 | } 419 | 420 | static void 421 | rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque, 422 | char* tagname, char** whitelist, int tagtype) 423 | { 424 | size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0; 425 | struct buf *attr; 426 | struct buf *value; 427 | char c; 428 | 429 | bufputc(ob, '<'); 430 | 431 | if(tagtype == HTML_TAG_CLOSE) { 432 | bufputc(ob, '/'); 433 | bufputs(ob, tagname); 434 | bufputc(ob, '>'); 435 | return; 436 | } 437 | 438 | bufputs(ob, tagname); 439 | i = 1 + strlen(tagname); 440 | 441 | attr = bufnew(16); 442 | value = bufnew(16); 443 | 444 | for(; i < text->size && !done; i++) { 445 | c = text->data[i]; 446 | done = 0; 447 | reset = 0; 448 | done_attr = 0; 449 | 450 | switch(c) { 451 | case '>': 452 | done = 1; 453 | break; 454 | case '\'': 455 | case '"': 456 | if(!seen_equals) { 457 | reset = 1; 458 | } else if(!in_str) { 459 | in_str = c; 460 | } else if(in_str == c) { 461 | in_str = 0; 462 | done_attr = 1; 463 | } else { 464 | bufputc(value, c); 465 | } 466 | break; 467 | case ' ': 468 | if (in_str) { 469 | bufputc(value, ' '); 470 | } else { 471 | reset = 1; 472 | } 473 | break; 474 | case '=': 475 | if(seen_equals) { 476 | reset = 1; 477 | break; 478 | } 479 | seen_equals = 1; 480 | break; 481 | default: 482 | if(seen_equals && in_str || !seen_equals) { 483 | bufputc(seen_equals ? value : attr, c); 484 | } 485 | break; 486 | } 487 | 488 | if(done_attr) { 489 | int valid = 0; 490 | for(z = 0; whitelist[z]; z++) { 491 | if(strlen(whitelist[z]) != attr->size) { 492 | continue; 493 | } 494 | for(x = 0; x < attr->size; x++) { 495 | if(tolower(whitelist[z][x]) != tolower(attr->data[x])) { 496 | break; 497 | } 498 | } 499 | if(x == attr->size) { 500 | valid = 1; 501 | break; 502 | } 503 | } 504 | if(valid && value->size && attr->size) { 505 | bufputc(ob, ' '); 506 | escape_html(ob, attr->data, attr->size); 507 | bufputs(ob, "=\""); 508 | escape_html(ob, value->data, value->size); 509 | bufputc(ob, '"'); 510 | } 511 | reset = 1; 512 | } 513 | 514 | if(reset) { 515 | seen_equals = 0; 516 | in_str = 0; 517 | bufreset(attr); 518 | bufreset(value); 519 | } 520 | } 521 | 522 | bufrelease(attr); 523 | bufrelease(value); 524 | 525 | bufputc(ob, '>'); 526 | } 527 | 528 | static int 529 | rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque) 530 | { 531 | struct html_renderopt *options = opaque; 532 | char** whitelist = options->html_element_whitelist; 533 | int i, tagtype; 534 | 535 | /* Items on the whitelist ignore all other flags and just output */ 536 | if (((options->flags & HTML_ALLOW_ELEMENT_WHITELIST) != 0) && whitelist) { 537 | for (i = 0; whitelist[i]; i++) { 538 | tagtype = sdhtml_is_tag(text->data, text->size, whitelist[i]); 539 | if (tagtype != HTML_TAG_NONE) { 540 | rndr_html_tag(ob, text, opaque, 541 | whitelist[i], 542 | options->html_attr_whitelist, 543 | tagtype); 544 | return 1; 545 | } 546 | } 547 | } 548 | 549 | /* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES 550 | * It doens't see if there are any valid tags, just escape all of them. */ 551 | if((options->flags & HTML_ESCAPE) != 0) { 552 | escape_html(ob, text->data, text->size); 553 | return 1; 554 | } 555 | 556 | if ((options->flags & HTML_SKIP_HTML) != 0) 557 | return 1; 558 | 559 | if ((options->flags & HTML_SKIP_STYLE) != 0 && 560 | sdhtml_is_tag(text->data, text->size, "style")) 561 | return 1; 562 | 563 | if ((options->flags & HTML_SKIP_LINKS) != 0 && 564 | sdhtml_is_tag(text->data, text->size, "a")) 565 | return 1; 566 | 567 | if ((options->flags & HTML_SKIP_IMAGES) != 0 && 568 | sdhtml_is_tag(text->data, text->size, "img")) 569 | return 1; 570 | 571 | bufput(ob, text->data, text->size); 572 | return 1; 573 | } 574 | 575 | static void 576 | rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque) 577 | { 578 | if (ob->size) bufputc(ob, '\n'); 579 | BUFPUTSL(ob, "
    \n"); 580 | if (header) 581 | bufput(ob, header->data, header->size); 582 | BUFPUTSL(ob, "\n"); 583 | if (body) 584 | bufput(ob, body->data, body->size); 585 | BUFPUTSL(ob, "
    \n"); 586 | } 587 | 588 | static void 589 | rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque) 590 | { 591 | BUFPUTSL(ob, "\n"); 592 | if (text) 593 | bufput(ob, text->data, text->size); 594 | BUFPUTSL(ob, "\n"); 595 | } 596 | 597 | static void 598 | rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span) 599 | { 600 | if (flags & MKD_TABLE_HEADER) { 601 | BUFPUTSL(ob, " 1) { 607 | BUFPUTSL(ob, " colspan=\""); 608 | bufputi(ob, col_span); 609 | BUFPUTSL(ob, "\" "); 610 | } 611 | 612 | switch (flags & MKD_TABLE_ALIGNMASK) { 613 | case MKD_TABLE_ALIGN_CENTER: 614 | BUFPUTSL(ob, " align=\"center\">"); 615 | break; 616 | 617 | case MKD_TABLE_ALIGN_L: 618 | BUFPUTSL(ob, " align=\"left\">"); 619 | break; 620 | 621 | case MKD_TABLE_ALIGN_R: 622 | BUFPUTSL(ob, " align=\"right\">"); 623 | break; 624 | 625 | default: 626 | BUFPUTSL(ob, ">"); 627 | } 628 | 629 | if (text) 630 | bufput(ob, text->data, text->size); 631 | 632 | if (flags & MKD_TABLE_HEADER) { 633 | BUFPUTSL(ob, "\n"); 634 | } else { 635 | BUFPUTSL(ob, "\n"); 636 | } 637 | } 638 | 639 | static int 640 | rndr_superscript(struct buf *ob, const struct buf *text, void *opaque) 641 | { 642 | if (!text || !text->size) return 0; 643 | BUFPUTSL(ob, ""); 644 | bufput(ob, text->data, text->size); 645 | BUFPUTSL(ob, ""); 646 | return 1; 647 | } 648 | 649 | static void 650 | rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque) 651 | { 652 | if (text) 653 | escape_html(ob, text->data, text->size); 654 | } 655 | 656 | static void 657 | toc_header(struct buf *ob, const struct buf *text, int level, void *opaque) 658 | { 659 | struct html_renderopt *options = opaque; 660 | 661 | /* set the level offset if this is the first header 662 | * we're parsing for the document */ 663 | if (options->toc_data.current_level == 0) { 664 | BUFPUTSL(ob, "
    \n"); 665 | options->toc_data.level_offset = level - 1; 666 | } 667 | level -= options->toc_data.level_offset; 668 | 669 | if (level > options->toc_data.current_level) { 670 | while (level > options->toc_data.current_level) { 671 | BUFPUTSL(ob, "
      \n
    • \n"); 672 | options->toc_data.current_level++; 673 | } 674 | } else if (level < options->toc_data.current_level) { 675 | BUFPUTSL(ob, "
    • \n"); 676 | while (level < options->toc_data.current_level) { 677 | BUFPUTSL(ob, "
    \n\n"); 678 | options->toc_data.current_level--; 679 | } 680 | BUFPUTSL(ob,"
  • \n"); 681 | } else { 682 | BUFPUTSL(ob,"
  • \n
  • \n"); 683 | } 684 | 685 | BUFPUTSL(ob, "toc_id_prefix) { 688 | bufputs(ob, options->toc_id_prefix); 689 | } 690 | 691 | BUFPUTSL(ob, "toc_"); 692 | bufputi(ob, options->toc_data.header_count++); 693 | BUFPUTSL(ob, "\">"); 694 | if (text) 695 | escape_html(ob, text->data, text->size); 696 | BUFPUTSL(ob, "\n"); 697 | } 698 | 699 | static int 700 | toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque) 701 | { 702 | if (content && content->size) 703 | bufput(ob, content->data, content->size); 704 | return 1; 705 | } 706 | 707 | static void 708 | reset_toc(struct buf *ob, void *opaque) 709 | { 710 | struct html_renderopt *options = opaque; 711 | 712 | memset(&(options->toc_data), 0, sizeof(options->toc_data)); 713 | } 714 | 715 | static void 716 | toc_finalize(struct buf *ob, void *opaque) 717 | { 718 | struct html_renderopt *options = opaque; 719 | bool has_toc = false; 720 | while (options->toc_data.current_level > 0) { 721 | BUFPUTSL(ob, "
  • \n\n"); 722 | options->toc_data.current_level--; 723 | has_toc = true; 724 | } 725 | if(has_toc) { 726 | BUFPUTSL(ob, "
    \n"); 727 | } 728 | reset_toc(ob, opaque); 729 | } 730 | 731 | void 732 | sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options) 733 | { 734 | static const struct sd_callbacks cb_default = { 735 | NULL, 736 | NULL, 737 | NULL, 738 | NULL, 739 | toc_header, 740 | NULL, 741 | NULL, 742 | NULL, 743 | NULL, 744 | NULL, 745 | NULL, 746 | NULL, 747 | 748 | NULL, 749 | rndr_codespan, 750 | rndr_spoilerspan, 751 | rndr_double_emphasis, 752 | rndr_emphasis, 753 | NULL, 754 | NULL, 755 | toc_link, 756 | NULL, 757 | rndr_triple_emphasis, 758 | rndr_strikethrough, 759 | rndr_superscript, 760 | 761 | NULL, 762 | NULL, 763 | 764 | NULL, 765 | toc_finalize, 766 | }; 767 | 768 | memset(options, 0x0, sizeof(struct html_renderopt)); 769 | options->flags = HTML_TOC | HTML_SKIP_HTML; 770 | 771 | memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks)); 772 | } 773 | 774 | void 775 | sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags) 776 | { 777 | static const struct sd_callbacks cb_default = { 778 | rndr_blockcode, 779 | rndr_blockquote, 780 | rndr_blockspoiler, 781 | rndr_raw_block, 782 | rndr_header, 783 | rndr_hrule, 784 | rndr_list, 785 | rndr_listitem, 786 | rndr_paragraph, 787 | rndr_table, 788 | rndr_tablerow, 789 | rndr_tablecell, 790 | 791 | rndr_autolink, 792 | rndr_codespan, 793 | rndr_spoilerspan, 794 | rndr_double_emphasis, 795 | rndr_emphasis, 796 | rndr_image, 797 | rndr_linebreak, 798 | rndr_link, 799 | rndr_raw_html, 800 | rndr_triple_emphasis, 801 | rndr_strikethrough, 802 | rndr_superscript, 803 | 804 | NULL, 805 | rndr_normal_text, 806 | 807 | NULL, 808 | reset_toc, 809 | }; 810 | 811 | /* Prepare the options pointer */ 812 | memset(options, 0x0, sizeof(struct html_renderopt)); 813 | options->flags = render_flags; 814 | 815 | /* Prepare the callbacks */ 816 | memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks)); 817 | 818 | if (render_flags & HTML_SKIP_IMAGES) 819 | callbacks->image = NULL; 820 | 821 | if (render_flags & HTML_SKIP_LINKS) { 822 | callbacks->link = NULL; 823 | callbacks->autolink = NULL; 824 | } 825 | 826 | if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE) 827 | callbacks->blockhtml = NULL; 828 | } 829 | -------------------------------------------------------------------------------- /src/markdown.c: -------------------------------------------------------------------------------- 1 | /* markdown.c - generic markdown parser */ 2 | 3 | /* 4 | * Copyright (c) 2009, Natacha Porté 5 | * Copyright (c) 2011, Vicent Marti 6 | * 7 | * Permission to use, copy, modify, and distribute this software for any 8 | * purpose with or without fee is hereby granted, provided that the above 9 | * copyright notice and this permission notice appear in all copies. 10 | * 11 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 | */ 19 | 20 | #include "markdown.h" 21 | #include "stack.h" 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #if defined(_WIN32) 29 | #define strncasecmp _strnicmp 30 | #endif 31 | 32 | #define REF_TABLE_SIZE 8 33 | 34 | #define BUFFER_BLOCK 0 35 | #define BUFFER_SPAN 1 36 | 37 | #define MKD_LI_END 8 /* internal list flag */ 38 | 39 | #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n) 40 | #define GPERF_DOWNCASE 1 41 | #define GPERF_CASE_STRNCMP 1 42 | #include "html_blocks.h" 43 | #include "html_entities.h" 44 | 45 | /*************** 46 | * LOCAL TYPES * 47 | ***************/ 48 | 49 | /* link_ref: reference to a link */ 50 | struct link_ref { 51 | unsigned int id; 52 | 53 | struct buf *link; 54 | struct buf *title; 55 | 56 | struct link_ref *next; 57 | }; 58 | 59 | /* char_trigger: function pointer to render active chars */ 60 | /* returns the number of chars taken care of */ 61 | /* data is the pointer of the beginning of the span */ 62 | /* offset is the number of valid chars before data */ 63 | struct sd_markdown; 64 | typedef size_t 65 | (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 66 | 67 | static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 68 | static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 69 | static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 70 | static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 71 | static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 72 | static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 73 | static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 74 | static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 75 | static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 76 | static size_t char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 77 | static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 78 | static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size); 79 | 80 | enum markdown_char_t { 81 | MD_CHAR_NONE = 0, 82 | MD_CHAR_EMPHASIS, 83 | MD_CHAR_CODESPAN, 84 | MD_CHAR_LINEBREAK, 85 | MD_CHAR_LINK, 86 | MD_CHAR_LANGLE, 87 | MD_CHAR_ESCAPE, 88 | MD_CHAR_ENTITITY, 89 | MD_CHAR_AUTOLINK_URL, 90 | MD_CHAR_AUTOLINK_EMAIL, 91 | MD_CHAR_AUTOLINK_WWW, 92 | MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME, 93 | MD_CHAR_SUPERSCRIPT, 94 | }; 95 | 96 | static char_trigger markdown_char_ptrs[] = { 97 | NULL, 98 | &char_emphasis, 99 | &char_codespan, 100 | &char_linebreak, 101 | &char_link, 102 | &char_langle_tag, 103 | &char_escape, 104 | &char_entity, 105 | &char_autolink_url, 106 | &char_autolink_email, 107 | &char_autolink_www, 108 | &char_autolink_subreddit_or_username, 109 | &char_superscript, 110 | }; 111 | 112 | /* render • structure containing one particular render */ 113 | struct sd_markdown { 114 | struct sd_callbacks cb; 115 | void *opaque; 116 | 117 | struct link_ref *refs[REF_TABLE_SIZE]; 118 | uint8_t active_char[256]; 119 | struct stack work_bufs[2]; 120 | unsigned int ext_flags; 121 | size_t max_nesting; 122 | size_t max_table_cols; 123 | int in_link_body; 124 | }; 125 | 126 | /*************************** 127 | * HELPER FUNCTIONS * 128 | ***************************/ 129 | 130 | static inline struct buf * 131 | rndr_newbuf(struct sd_markdown *rndr, int type) 132 | { 133 | static const size_t buf_size[2] = {256, 64}; 134 | struct buf *work = NULL; 135 | struct stack *pool = &rndr->work_bufs[type]; 136 | 137 | if (pool->size < pool->asize && 138 | pool->item[pool->size] != NULL) { 139 | work = pool->item[pool->size++]; 140 | work->size = 0; 141 | } else { 142 | work = bufnew(buf_size[type]); 143 | stack_push(pool, work); 144 | } 145 | 146 | return work; 147 | } 148 | 149 | static inline void 150 | rndr_popbuf(struct sd_markdown *rndr, int type) 151 | { 152 | rndr->work_bufs[type].size--; 153 | } 154 | 155 | static void 156 | unscape_text(struct buf *ob, struct buf *src) 157 | { 158 | size_t i = 0, org; 159 | while (i < src->size) { 160 | org = i; 161 | while (i < src->size && src->data[i] != '\\') 162 | i++; 163 | 164 | if (i > org) 165 | bufput(ob, src->data + org, i - org); 166 | 167 | if (i + 1 >= src->size) 168 | break; 169 | 170 | bufputc(ob, src->data[i + 1]); 171 | i += 2; 172 | } 173 | } 174 | 175 | static unsigned int 176 | hash_link_ref(const uint8_t *link_ref, size_t length) 177 | { 178 | size_t i; 179 | unsigned int hash = 0; 180 | 181 | for (i = 0; i < length; ++i) 182 | hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash; 183 | 184 | return hash; 185 | } 186 | 187 | static struct link_ref * 188 | add_link_ref( 189 | struct link_ref **references, 190 | const uint8_t *name, size_t name_size) 191 | { 192 | struct link_ref *ref = calloc(1, sizeof(struct link_ref)); 193 | 194 | if (!ref) 195 | return NULL; 196 | 197 | ref->id = hash_link_ref(name, name_size); 198 | ref->next = references[ref->id % REF_TABLE_SIZE]; 199 | 200 | references[ref->id % REF_TABLE_SIZE] = ref; 201 | return ref; 202 | } 203 | 204 | static struct link_ref * 205 | find_link_ref(struct link_ref **references, uint8_t *name, size_t length) 206 | { 207 | unsigned int hash = hash_link_ref(name, length); 208 | struct link_ref *ref = NULL; 209 | 210 | ref = references[hash % REF_TABLE_SIZE]; 211 | 212 | while (ref != NULL) { 213 | if (ref->id == hash) 214 | return ref; 215 | 216 | ref = ref->next; 217 | } 218 | 219 | return NULL; 220 | } 221 | 222 | static void 223 | free_link_refs(struct link_ref **references) 224 | { 225 | size_t i; 226 | 227 | for (i = 0; i < REF_TABLE_SIZE; ++i) { 228 | struct link_ref *r = references[i]; 229 | struct link_ref *next; 230 | 231 | while (r) { 232 | next = r->next; 233 | bufrelease(r->link); 234 | bufrelease(r->title); 235 | free(r); 236 | r = next; 237 | } 238 | } 239 | } 240 | 241 | /* 242 | * Check whether a char is a Markdown space. 243 | 244 | * Right now we only consider spaces the actual 245 | * space and a newline: tabs and carriage returns 246 | * are filtered out during the preprocessing phase. 247 | * 248 | * If we wanted to actually be UTF-8 compliant, we 249 | * should instead extract an Unicode codepoint from 250 | * this character and check for space properties. 251 | */ 252 | static inline int 253 | _isspace(int c) 254 | { 255 | return c == ' ' || c == '\n'; 256 | } 257 | 258 | /**************************** 259 | * INLINE PARSING FUNCTIONS * 260 | ****************************/ 261 | 262 | /* is_mail_autolink • looks for the address part of a mail autolink and '>' */ 263 | /* this is less strict than the original markdown e-mail address matching */ 264 | static size_t 265 | is_mail_autolink(uint8_t *data, size_t size) 266 | { 267 | size_t i = 0, nb = 0; 268 | 269 | /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ 270 | for (i = 0; i < size; ++i) { 271 | if (isalnum(data[i])) 272 | continue; 273 | 274 | switch (data[i]) { 275 | case '@': 276 | nb++; 277 | 278 | case '-': 279 | case '.': 280 | case '_': 281 | break; 282 | 283 | case '>': 284 | return (nb == 1) ? i + 1 : 0; 285 | 286 | default: 287 | return 0; 288 | } 289 | } 290 | 291 | return 0; 292 | } 293 | 294 | /* tag_length • returns the length of the given tag, or 0 is it's not valid */ 295 | static size_t 296 | tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink) 297 | { 298 | size_t i, j; 299 | 300 | /* a valid tag can't be shorter than 3 chars */ 301 | if (size < 3) return 0; 302 | 303 | /* begins with a '<' optionally followed by '/', followed by letter or number */ 304 | if (data[0] != '<') return 0; 305 | i = (data[1] == '/') ? 2 : 1; 306 | 307 | if (!isalnum(data[i])) 308 | return 0; 309 | 310 | /* scheme test */ 311 | *autolink = MKDA_NOT_AUTOLINK; 312 | 313 | /* try to find the beginning of an URI */ 314 | while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) 315 | i++; 316 | 317 | if (i > 1 && data[i] == '@') { 318 | if ((j = is_mail_autolink(data + i, size - i)) != 0) { 319 | *autolink = MKDA_EMAIL; 320 | return i + j; 321 | } 322 | } 323 | 324 | if (i > 2 && data[i] == ':') { 325 | *autolink = MKDA_NORMAL; 326 | i++; 327 | } 328 | 329 | /* completing autolink test: no whitespace or ' or " */ 330 | if (i >= size) 331 | *autolink = MKDA_NOT_AUTOLINK; 332 | 333 | else if (*autolink) { 334 | j = i; 335 | 336 | while (i < size) { 337 | if (data[i] == '\\') i += 2; 338 | else if (data[i] == '>' || data[i] == '\'' || 339 | data[i] == '"' || data[i] == ' ' || data[i] == '\n') 340 | break; 341 | else i++; 342 | } 343 | 344 | if (i >= size) return 0; 345 | if (i > j && data[i] == '>') return i + 1; 346 | /* one of the forbidden chars has been found */ 347 | *autolink = MKDA_NOT_AUTOLINK; 348 | } 349 | 350 | /* looking for sometinhg looking like a tag end */ 351 | while (i < size && data[i] != '>') i++; 352 | if (i >= size) return 0; 353 | return i + 1; 354 | } 355 | 356 | /* parse_inline • parses inline markdown elements */ 357 | static void 358 | parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 359 | { 360 | size_t i = 0, end = 0, last_special = 0; 361 | uint8_t action = 0; 362 | struct buf work = { 0, 0, 0, 0 }; 363 | 364 | if (rndr->work_bufs[BUFFER_SPAN].size + 365 | rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting) 366 | return; 367 | 368 | while (i < size) { 369 | /* copying inactive chars into the output */ 370 | while (end < size && (action = rndr->active_char[data[end]]) == 0) { 371 | end++; 372 | } 373 | 374 | if (rndr->cb.normal_text) { 375 | work.data = data + i; 376 | work.size = end - i; 377 | rndr->cb.normal_text(ob, &work, rndr->opaque); 378 | } 379 | else 380 | bufput(ob, data + i, end - i); 381 | 382 | if (end >= size) break; 383 | i = end; 384 | 385 | end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i - last_special, i, size - i); 386 | if (!end) /* no action from the callback */ 387 | end = i + 1; 388 | else { 389 | i += end; 390 | last_special = end = i; 391 | } 392 | } 393 | } 394 | 395 | /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ 396 | static size_t 397 | find_emph_char(uint8_t *data, size_t size, uint8_t c) 398 | { 399 | size_t i = 1; 400 | 401 | while (i < size) { 402 | while (i < size && data[i] != c && data[i] != '`' && data[i] != '[') 403 | i++; 404 | 405 | if (i == size) 406 | return 0; 407 | 408 | if (i < size && c == '<' && data[i] == c && data[i-1] == '!') 409 | return i; 410 | 411 | if (data[i] == c) 412 | return i; 413 | 414 | /* not counting escaped chars */ 415 | if (i && data[i - 1] == '\\') { 416 | i++; continue; 417 | } 418 | 419 | if (data[i] == '`') { 420 | size_t span_nb = 0, bt; 421 | size_t tmp_i = 0; 422 | 423 | /* counting the number of opening backticks */ 424 | while (i < size && data[i] == '`') { 425 | i++; span_nb++; 426 | } 427 | 428 | if (i >= size) return 0; 429 | 430 | /* finding the matching closing sequence */ 431 | bt = 0; 432 | while (i < size && bt < span_nb) { 433 | if (!tmp_i && data[i] == c) tmp_i = i; 434 | if (data[i] == '`') bt++; 435 | else bt = 0; 436 | i++; 437 | } 438 | 439 | if (i >= size) return tmp_i; 440 | } 441 | /* skipping a link */ 442 | else if (data[i] == '[') { 443 | size_t tmp_i = 0; 444 | uint8_t cc; 445 | 446 | i++; 447 | while (i < size && data[i] != ']') { 448 | if (!tmp_i && data[i] == c) tmp_i = i; 449 | i++; 450 | } 451 | 452 | i++; 453 | while (i < size && (data[i] == ' ' || data[i] == '\n')) 454 | i++; 455 | 456 | if (i >= size) 457 | return tmp_i; 458 | 459 | switch (data[i]) { 460 | case '[': 461 | cc = ']'; break; 462 | 463 | case '(': 464 | cc = ')'; break; 465 | 466 | default: 467 | if (tmp_i) 468 | return tmp_i; 469 | else 470 | continue; 471 | } 472 | 473 | i++; 474 | while (i < size && data[i] != cc) { 475 | if (!tmp_i && data[i] == c) tmp_i = i; 476 | i++; 477 | } 478 | 479 | if (i >= size) 480 | return tmp_i; 481 | 482 | i++; 483 | } 484 | } 485 | 486 | return 0; 487 | } 488 | 489 | /* parse_emph1 • parsing single emphase */ 490 | /* closed by a symbol not preceded by whitespace and not followed by symbol */ 491 | static size_t 492 | parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) 493 | { 494 | size_t i = 0, len; 495 | struct buf *work = 0; 496 | int r; 497 | 498 | if (!rndr->cb.emphasis) return 0; 499 | 500 | /* skipping one symbol if coming from emph3 */ 501 | if (size > 1 && data[0] == c && data[1] == c) i = 1; 502 | 503 | while (i < size) { 504 | len = find_emph_char(data + i, size - i, c); 505 | if (!len) return 0; 506 | i += len; 507 | if (i >= size) return 0; 508 | 509 | if (data[i] == c && !_isspace(data[i - 1])) { 510 | if ((rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) && (c == '_')) { 511 | if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1]))) 512 | continue; 513 | } 514 | 515 | work = rndr_newbuf(rndr, BUFFER_SPAN); 516 | parse_inline(work, rndr, data, i); 517 | r = rndr->cb.emphasis(ob, work, rndr->opaque); 518 | rndr_popbuf(rndr, BUFFER_SPAN); 519 | return r ? i + 1 : 0; 520 | } 521 | } 522 | 523 | return 0; 524 | } 525 | 526 | /* parse_emph2 • parsing single emphase */ 527 | static size_t 528 | parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) 529 | { 530 | int (*render_method)(struct buf *ob, const struct buf *text, void *opaque); 531 | size_t i = 0, len; 532 | struct buf *work = 0; 533 | int r; 534 | 535 | render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis; 536 | 537 | if (!render_method) 538 | return 0; 539 | 540 | while (i < size) { 541 | len = find_emph_char(data + i, size - i, c); 542 | if (!len) return 0; 543 | i += len; 544 | 545 | if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) { 546 | work = rndr_newbuf(rndr, BUFFER_SPAN); 547 | parse_inline(work, rndr, data, i); 548 | r = render_method(ob, work, rndr->opaque); 549 | rndr_popbuf(rndr, BUFFER_SPAN); 550 | return r ? i + 2 : 0; 551 | } 552 | i++; 553 | } 554 | return 0; 555 | } 556 | 557 | /* parse_emph3 • parsing single emphase */ 558 | /* finds the first closing tag, and delegates to the other emph */ 559 | static size_t 560 | parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) 561 | { 562 | size_t i = 0, len; 563 | int r; 564 | 565 | while (i < size) { 566 | len = find_emph_char(data + i, size - i, c); 567 | if (!len) return 0; 568 | i += len; 569 | 570 | /* skip whitespace preceded symbols */ 571 | if (data[i] != c || _isspace(data[i - 1])) 572 | continue; 573 | 574 | if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) { 575 | /* triple symbol found */ 576 | struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN); 577 | 578 | parse_inline(work, rndr, data, i); 579 | r = rndr->cb.triple_emphasis(ob, work, rndr->opaque); 580 | rndr_popbuf(rndr, BUFFER_SPAN); 581 | return r ? i + 3 : 0; 582 | 583 | } else if (i + 1 < size && data[i + 1] == c) { 584 | /* double symbol found, handing over to emph1 */ 585 | len = parse_emph1(ob, rndr, data - 2, size + 2, c); 586 | if (!len) return 0; 587 | else return len - 2; 588 | 589 | } else { 590 | /* single symbol found, handing over to emph2 */ 591 | len = parse_emph2(ob, rndr, data - 1, size + 1, c); 592 | if (!len) return 0; 593 | else return len - 1; 594 | } 595 | } 596 | return 0; 597 | } 598 | 599 | static size_t 600 | parse_spoilerspan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 601 | { 602 | int (*render_method)(struct buf *ob, const struct buf *text, void *opaque); 603 | size_t len; 604 | size_t i = 0; 605 | struct buf *work = 0; 606 | int r; 607 | 608 | render_method = rndr->cb.spoilerspan; 609 | 610 | if (!render_method) return 0; 611 | 612 | while (i < size) { 613 | len = find_emph_char(data + i, size - i, '<'); 614 | if (!len) return 0; 615 | i += len; 616 | 617 | if (i < size && data[i] == '<' && data[i - 1] == '!') { 618 | work = rndr_newbuf(rndr, BUFFER_SPAN); 619 | parse_inline(work, rndr, data, i - 1); 620 | r = render_method(ob, work, rndr->opaque); 621 | rndr_popbuf(rndr, BUFFER_SPAN); 622 | 623 | if (!r) return 0; 624 | 625 | return i + 1; 626 | } 627 | i++; 628 | } 629 | return 0; 630 | } 631 | 632 | /* char_emphasis • single and double emphasis parsing */ 633 | static size_t 634 | char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 635 | { 636 | uint8_t c = data[0]; 637 | size_t ret; 638 | 639 | if (size > 3 && c == '>' && data[1] == '!') { 640 | if(_isspace(data[2]) || (ret = parse_spoilerspan(ob, rndr, data + 2, size - 2)) == 0) 641 | return 0; 642 | 643 | return ret + 2; 644 | } 645 | 646 | 647 | if (size > 2 && data[1] != c) { 648 | /* whitespace cannot follow an opening emphasis; 649 | * strikethrough only takes two characters '~~' */ 650 | if (c == '~' || c == '>' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0) 651 | return 0; 652 | 653 | return ret + 1; 654 | } 655 | 656 | 657 | if (size > 3 && data[1] == c && data[2] != c) { 658 | if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0) 659 | return 0; 660 | 661 | return ret + 2; 662 | } 663 | 664 | if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { 665 | if (c == '~' || c == '>' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0) 666 | return 0; 667 | 668 | return ret + 3; 669 | } 670 | 671 | return 0; 672 | } 673 | 674 | 675 | /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */ 676 | static size_t 677 | char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 678 | { 679 | if (max_rewind < 2 || data[-1] != ' ' || data[-2] != ' ') 680 | return 0; 681 | 682 | /* removing the last space from ob and rendering */ 683 | while (ob->size && ob->data[ob->size - 1] == ' ') 684 | ob->size--; 685 | 686 | return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0; 687 | } 688 | 689 | 690 | /* char_codespan • '`' parsing a code span (assuming codespan != 0) */ 691 | static size_t 692 | char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 693 | { 694 | size_t end, nb = 0, i, f_begin, f_end; 695 | 696 | /* counting the number of backticks in the delimiter */ 697 | while (nb < size && data[nb] == '`') 698 | nb++; 699 | 700 | /* finding the next delimiter */ 701 | i = 0; 702 | for (end = nb; end < size && i < nb; end++) { 703 | if (data[end] == '`') i++; 704 | else i = 0; 705 | } 706 | 707 | if (i < nb && end >= size) 708 | return 0; /* no matching delimiter */ 709 | 710 | /* trimming outside whitespaces */ 711 | f_begin = nb; 712 | while (f_begin < end && data[f_begin] == ' ') 713 | f_begin++; 714 | 715 | f_end = end - nb; 716 | while (f_end > nb && data[f_end-1] == ' ') 717 | f_end--; 718 | 719 | /* real code span */ 720 | if (f_begin < f_end) { 721 | struct buf work = { data + f_begin, f_end - f_begin, 0, 0 }; 722 | if (!rndr->cb.codespan(ob, &work, rndr->opaque)) 723 | end = 0; 724 | } else { 725 | if (!rndr->cb.codespan(ob, 0, rndr->opaque)) 726 | end = 0; 727 | } 728 | 729 | return end; 730 | } 731 | 732 | 733 | /* char_escape • '\\' backslash escape */ 734 | static size_t 735 | char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 736 | { 737 | static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>/^~"; 738 | struct buf work = { 0, 0, 0, 0 }; 739 | 740 | if (size > 1) { 741 | if (strchr(escape_chars, data[1]) == NULL) 742 | return 0; 743 | 744 | if (rndr->cb.normal_text) { 745 | work.data = data + 1; 746 | work.size = 1; 747 | rndr->cb.normal_text(ob, &work, rndr->opaque); 748 | } 749 | else bufputc(ob, data[1]); 750 | } else if (size == 1) { 751 | bufputc(ob, data[0]); 752 | } 753 | 754 | return 2; 755 | } 756 | 757 | /* char_entity • '&' escaped when it doesn't belong to an entity */ 758 | static size_t 759 | char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 760 | { 761 | size_t end = 1; 762 | size_t content_start; 763 | size_t content_end; 764 | struct buf work = { 0, 0, 0, 0 }; 765 | int numeric = 0; 766 | int hex = 0; 767 | int entity_base; 768 | uint32_t entity_val; 769 | 770 | if (end < size && data[end] == '#') { 771 | numeric = 1; 772 | end++; 773 | } 774 | 775 | if (end < size && numeric && tolower(data[end]) == 'x') { 776 | hex = 1; 777 | end++; 778 | } 779 | 780 | content_start = end; 781 | 782 | while (end < size) { 783 | const char c = data[end]; 784 | if (hex) { 785 | if (!isxdigit(c)) break; 786 | } else if (numeric) { 787 | if (!isdigit(c)) break; 788 | } else if (!isalnum(c)) { 789 | break; 790 | } 791 | end++; 792 | } 793 | 794 | content_end = end; 795 | 796 | if (end > content_start && end < size && data[end] == ';') 797 | end++; /* well-formed entity */ 798 | else 799 | return 0; /* not an entity */ 800 | 801 | /* way too long to be a valid numeric entity */ 802 | if (numeric && content_end - content_start > MAX_NUM_ENTITY_LEN) 803 | return 0; 804 | 805 | /* Validate the entity's contents */ 806 | if (numeric) { 807 | if (hex) 808 | entity_base = 16; 809 | else 810 | entity_base = 10; 811 | 812 | // This is ok because it'll stop once it hits the ';' 813 | entity_val = strtol((char*)data + content_start, NULL, entity_base); 814 | if (!is_valid_numeric_entity(entity_val)) 815 | return 0; 816 | } else { 817 | if (!is_allowed_named_entity((const char *)data, end)) 818 | return 0; 819 | } 820 | 821 | if (rndr->cb.entity) { 822 | work.data = data; 823 | work.size = end; 824 | rndr->cb.entity(ob, &work, rndr->opaque); 825 | } else { 826 | /* Necessary so we can normalize `>` to `>` */ 827 | bufputc(ob, '&'); 828 | if (numeric) 829 | bufputc(ob, '#'); 830 | if (hex) 831 | bufputc(ob, 'x'); 832 | bufput(ob, data + content_start, end - content_start); 833 | } 834 | 835 | return end; 836 | } 837 | 838 | /* char_langle_tag • '<' when tags or autolinks are allowed */ 839 | static size_t 840 | char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 841 | { 842 | enum mkd_autolink altype = MKDA_NOT_AUTOLINK; 843 | size_t end = tag_length(data, size, &altype); 844 | struct buf work = { data, end, 0, 0 }; 845 | int ret = 0; 846 | 847 | if (end > 2) { 848 | if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) { 849 | struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN); 850 | work.data = data + 1; 851 | work.size = end - 2; 852 | unscape_text(u_link, &work); 853 | ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque); 854 | rndr_popbuf(rndr, BUFFER_SPAN); 855 | } 856 | else if (rndr->cb.raw_html_tag) 857 | ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque); 858 | } 859 | 860 | if (!ret) return 0; 861 | else return end; 862 | } 863 | 864 | static size_t 865 | char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 866 | { 867 | struct buf *link, *link_url, *link_text; 868 | size_t link_len, rewind; 869 | 870 | if (!rndr->cb.link || rndr->in_link_body) 871 | return 0; 872 | 873 | link = rndr_newbuf(rndr, BUFFER_SPAN); 874 | 875 | if ((link_len = sd_autolink__www(&rewind, link, data, max_rewind, size, 0)) > 0) { 876 | link_url = rndr_newbuf(rndr, BUFFER_SPAN); 877 | BUFPUTSL(link_url, "http://"); 878 | bufput(link_url, link->data, link->size); 879 | 880 | buftruncate(ob, ob->size - rewind); 881 | if (rndr->cb.normal_text) { 882 | link_text = rndr_newbuf(rndr, BUFFER_SPAN); 883 | rndr->cb.normal_text(link_text, link, rndr->opaque); 884 | rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque); 885 | rndr_popbuf(rndr, BUFFER_SPAN); 886 | } else { 887 | rndr->cb.link(ob, link_url, NULL, link, rndr->opaque); 888 | } 889 | rndr_popbuf(rndr, BUFFER_SPAN); 890 | } 891 | 892 | rndr_popbuf(rndr, BUFFER_SPAN); 893 | return link_len; 894 | } 895 | 896 | static size_t 897 | char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 898 | { 899 | struct buf *link, *link_text, *link_url; 900 | size_t link_len, rewind; 901 | int no_slash; 902 | 903 | if (!rndr->cb.autolink || rndr->in_link_body) 904 | return 0; 905 | 906 | link = rndr_newbuf(rndr, BUFFER_SPAN); 907 | 908 | link_len = sd_autolink__subreddit(&rewind, link, data, max_rewind, max_lookbehind, size, &no_slash); 909 | if (link_len == 0) 910 | link_len = sd_autolink__username(&rewind, link, data, max_rewind, max_lookbehind, size, &no_slash); 911 | 912 | /* Found either a user or subreddit link */ 913 | if (link_len > 0) { 914 | link_url = rndr_newbuf(rndr, BUFFER_SPAN); 915 | if (no_slash) 916 | bufputc(link_url, '/'); 917 | bufput(link_url, link->data, link->size); 918 | 919 | buftruncate(ob, ob->size - rewind); 920 | if (rndr->cb.normal_text) { 921 | link_text = rndr_newbuf(rndr, BUFFER_SPAN); 922 | rndr->cb.normal_text(link_text, link, rndr->opaque); 923 | rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque); 924 | rndr_popbuf(rndr, BUFFER_SPAN); 925 | } else { 926 | rndr->cb.link(ob, link_url, NULL, link, rndr->opaque); 927 | } 928 | rndr_popbuf(rndr, BUFFER_SPAN); 929 | } 930 | rndr_popbuf(rndr, BUFFER_SPAN); 931 | 932 | return link_len; 933 | } 934 | 935 | static size_t 936 | char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 937 | { 938 | struct buf *link; 939 | size_t link_len, rewind; 940 | 941 | if (!rndr->cb.autolink || rndr->in_link_body) 942 | return 0; 943 | 944 | link = rndr_newbuf(rndr, BUFFER_SPAN); 945 | 946 | if ((link_len = sd_autolink__email(&rewind, link, data, max_rewind, size, 0)) > 0) { 947 | buftruncate(ob, ob->size - rewind); 948 | rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque); 949 | } 950 | 951 | rndr_popbuf(rndr, BUFFER_SPAN); 952 | return link_len; 953 | } 954 | 955 | static size_t 956 | char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 957 | { 958 | struct buf *link; 959 | size_t link_len, rewind; 960 | 961 | if (!rndr->cb.autolink || rndr->in_link_body) 962 | return 0; 963 | 964 | link = rndr_newbuf(rndr, BUFFER_SPAN); 965 | 966 | if ((link_len = sd_autolink__url(&rewind, link, data, max_rewind, size, 0)) > 0) { 967 | buftruncate(ob, ob->size - rewind); 968 | rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque); 969 | } 970 | 971 | rndr_popbuf(rndr, BUFFER_SPAN); 972 | return link_len; 973 | } 974 | 975 | /* char_link • '[': parsing a link or an image */ 976 | static size_t 977 | char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 978 | { 979 | int is_img = (max_rewind && data[-1] == '!'), level; 980 | size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0; 981 | struct buf *content = 0; 982 | struct buf *link = 0; 983 | struct buf *title = 0; 984 | struct buf *u_link = 0; 985 | size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size; 986 | int text_has_nl = 0, ret = 0; 987 | int in_title = 0, qtype = 0; 988 | 989 | /* checking whether the correct renderer exists */ 990 | if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link)) 991 | goto cleanup; 992 | 993 | /* looking for the matching closing bracket */ 994 | for (level = 1; i < size; i++) { 995 | if (data[i] == '\n') 996 | text_has_nl = 1; 997 | 998 | else if (data[i - 1] == '\\') 999 | continue; 1000 | 1001 | else if (data[i] == '[') 1002 | level++; 1003 | 1004 | else if (data[i] == ']') { 1005 | level--; 1006 | if (level <= 0) 1007 | break; 1008 | } 1009 | } 1010 | 1011 | if (i >= size) 1012 | goto cleanup; 1013 | 1014 | txt_e = i; 1015 | i++; 1016 | 1017 | /* skip any amount of whitespace or newline */ 1018 | /* (this is much more laxist than original markdown syntax) */ 1019 | while (i < size && _isspace(data[i])) 1020 | i++; 1021 | 1022 | /* inline style link */ 1023 | if (i < size && data[i] == '(') { 1024 | /* skipping initial whitespace */ 1025 | i++; 1026 | 1027 | while (i < size && _isspace(data[i])) 1028 | i++; 1029 | 1030 | link_b = i; 1031 | 1032 | /* looking for link end: ' " ) */ 1033 | while (i < size) { 1034 | if (data[i] == '\\') i += 2; 1035 | else if (data[i] == ')') break; 1036 | else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break; 1037 | else i++; 1038 | } 1039 | 1040 | if (i >= size) goto cleanup; 1041 | link_e = i; 1042 | 1043 | /* looking for title end if present */ 1044 | if (data[i] == '\'' || data[i] == '"') { 1045 | qtype = data[i]; 1046 | in_title = 1; 1047 | i++; 1048 | title_b = i; 1049 | 1050 | while (i < size) { 1051 | if (data[i] == '\\') i += 2; 1052 | else if (data[i] == qtype) {in_title = 0; i++;} 1053 | else if ((data[i] == ')') && !in_title) break; 1054 | else i++; 1055 | } 1056 | 1057 | if (i >= size) goto cleanup; 1058 | 1059 | /* skipping whitespaces after title */ 1060 | title_e = i - 1; 1061 | while (title_e > title_b && _isspace(data[title_e])) 1062 | title_e--; 1063 | 1064 | /* checking for closing quote presence */ 1065 | if (data[title_e] != '\'' && data[title_e] != '"') { 1066 | title_b = title_e = 0; 1067 | link_e = i; 1068 | } 1069 | } 1070 | 1071 | /* remove whitespace at the end of the link */ 1072 | while (link_e > link_b && _isspace(data[link_e - 1])) 1073 | link_e--; 1074 | 1075 | /* remove optional angle brackets around the link */ 1076 | if (data[link_b] == '<') link_b++; 1077 | if (data[link_e - 1] == '>') link_e--; 1078 | 1079 | /* building escaped link and title */ 1080 | if (link_e > link_b) { 1081 | link = rndr_newbuf(rndr, BUFFER_SPAN); 1082 | bufput(link, data + link_b, link_e - link_b); 1083 | } 1084 | 1085 | if (title_e > title_b) { 1086 | title = rndr_newbuf(rndr, BUFFER_SPAN); 1087 | bufput(title, data + title_b, title_e - title_b); 1088 | } 1089 | 1090 | i++; 1091 | } 1092 | 1093 | /* reference style link */ 1094 | else if (i < size && data[i] == '[') { 1095 | struct buf id = { 0, 0, 0, 0 }; 1096 | struct link_ref *lr; 1097 | 1098 | /* looking for the id */ 1099 | i++; 1100 | link_b = i; 1101 | while (i < size && data[i] != ']') i++; 1102 | if (i >= size) goto cleanup; 1103 | link_e = i; 1104 | 1105 | /* finding the link_ref */ 1106 | if (link_b == link_e) { 1107 | if (text_has_nl) { 1108 | struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN); 1109 | size_t j; 1110 | 1111 | for (j = 1; j < txt_e; j++) { 1112 | if (data[j] != '\n') 1113 | bufputc(b, data[j]); 1114 | else if (data[j - 1] != ' ') 1115 | bufputc(b, ' '); 1116 | } 1117 | 1118 | id.data = b->data; 1119 | id.size = b->size; 1120 | } else { 1121 | id.data = data + 1; 1122 | id.size = txt_e - 1; 1123 | } 1124 | } else { 1125 | id.data = data + link_b; 1126 | id.size = link_e - link_b; 1127 | } 1128 | 1129 | lr = find_link_ref(rndr->refs, id.data, id.size); 1130 | if (!lr) 1131 | goto cleanup; 1132 | 1133 | /* keeping link and title from link_ref */ 1134 | link = lr->link; 1135 | title = lr->title; 1136 | i++; 1137 | } 1138 | 1139 | /* shortcut reference style link */ 1140 | else { 1141 | struct buf id = { 0, 0, 0, 0 }; 1142 | struct link_ref *lr; 1143 | 1144 | /* crafting the id */ 1145 | if (text_has_nl) { 1146 | struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN); 1147 | size_t j; 1148 | 1149 | for (j = 1; j < txt_e; j++) { 1150 | if (data[j] != '\n') 1151 | bufputc(b, data[j]); 1152 | else if (data[j - 1] != ' ') 1153 | bufputc(b, ' '); 1154 | } 1155 | 1156 | id.data = b->data; 1157 | id.size = b->size; 1158 | } else { 1159 | id.data = data + 1; 1160 | id.size = txt_e - 1; 1161 | } 1162 | 1163 | /* finding the link_ref */ 1164 | lr = find_link_ref(rndr->refs, id.data, id.size); 1165 | if (!lr) 1166 | goto cleanup; 1167 | 1168 | /* keeping link and title from link_ref */ 1169 | link = lr->link; 1170 | title = lr->title; 1171 | 1172 | /* rewinding the whitespace */ 1173 | i = txt_e + 1; 1174 | } 1175 | 1176 | /* building content: img alt is escaped, link content is parsed */ 1177 | if (txt_e > 1) { 1178 | content = rndr_newbuf(rndr, BUFFER_SPAN); 1179 | if (is_img) { 1180 | bufput(content, data + 1, txt_e - 1); 1181 | } else { 1182 | /* disable autolinking when parsing inline the 1183 | * content of a link */ 1184 | rndr->in_link_body = 1; 1185 | parse_inline(content, rndr, data + 1, txt_e - 1); 1186 | rndr->in_link_body = 0; 1187 | } 1188 | } 1189 | 1190 | if (link) { 1191 | u_link = rndr_newbuf(rndr, BUFFER_SPAN); 1192 | unscape_text(u_link, link); 1193 | } else { 1194 | goto cleanup; 1195 | } 1196 | 1197 | /* calling the relevant rendering function */ 1198 | if (is_img) { 1199 | if (ob->size && ob->data[ob->size - 1] == '!') 1200 | ob->size -= 1; 1201 | 1202 | ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque); 1203 | } else { 1204 | ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque); 1205 | } 1206 | 1207 | /* cleanup */ 1208 | cleanup: 1209 | rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size; 1210 | return ret ? i : 0; 1211 | } 1212 | 1213 | static size_t 1214 | char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size) 1215 | { 1216 | size_t sup_start, sup_len; 1217 | struct buf *sup; 1218 | 1219 | if (!rndr->cb.superscript) 1220 | return 0; 1221 | 1222 | if (size < 2) 1223 | return 0; 1224 | 1225 | if (data[1] == '(') { 1226 | sup_start = sup_len = 2; 1227 | 1228 | while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\') 1229 | sup_len++; 1230 | 1231 | if (sup_len == size) 1232 | return 0; 1233 | } else { 1234 | sup_start = sup_len = 1; 1235 | 1236 | while (sup_len < size && !_isspace(data[sup_len])) 1237 | sup_len++; 1238 | } 1239 | 1240 | if (sup_len - sup_start == 0) 1241 | return (sup_start == 2) ? 3 : 0; 1242 | 1243 | sup = rndr_newbuf(rndr, BUFFER_SPAN); 1244 | parse_inline(sup, rndr, data + sup_start, sup_len - sup_start); 1245 | rndr->cb.superscript(ob, sup, rndr->opaque); 1246 | rndr_popbuf(rndr, BUFFER_SPAN); 1247 | 1248 | return (sup_start == 2) ? sup_len + 1 : sup_len; 1249 | } 1250 | 1251 | /********************************* 1252 | * BLOCK-LEVEL PARSING FUNCTIONS * 1253 | *********************************/ 1254 | 1255 | /* is_empty • returns the line length when it is empty, 0 otherwise */ 1256 | static size_t 1257 | is_empty(uint8_t *data, size_t size) 1258 | { 1259 | size_t i; 1260 | 1261 | for (i = 0; i < size && data[i] != '\n'; i++) 1262 | if (data[i] != ' ') 1263 | return 0; 1264 | 1265 | return i + 1; 1266 | } 1267 | 1268 | /* is_hrule • returns whether a line is a horizontal rule */ 1269 | static int 1270 | is_hrule(uint8_t *data, size_t size) 1271 | { 1272 | size_t i = 0, n = 0; 1273 | uint8_t c; 1274 | 1275 | /* skipping initial spaces */ 1276 | if (size < 3) return 0; 1277 | if (data[0] == ' ') { i++; 1278 | if (data[1] == ' ') { i++; 1279 | if (data[2] == ' ') { i++; } } } 1280 | 1281 | /* looking at the hrule uint8_t */ 1282 | if (i + 2 >= size 1283 | || (data[i] != '*' && data[i] != '-' && data[i] != '_')) 1284 | return 0; 1285 | c = data[i]; 1286 | 1287 | /* the whole line must be the char or whitespace */ 1288 | while (i < size && data[i] != '\n') { 1289 | if (data[i] == c) n++; 1290 | else if (data[i] != ' ') 1291 | return 0; 1292 | 1293 | i++; 1294 | } 1295 | 1296 | return n >= 3; 1297 | } 1298 | 1299 | /* check if a line begins with a code fence; return the 1300 | * width of the code fence */ 1301 | static size_t 1302 | prefix_codefence(uint8_t *data, size_t size) 1303 | { 1304 | size_t i = 0, n = 0; 1305 | uint8_t c; 1306 | 1307 | /* skipping initial spaces */ 1308 | if (size < 3) return 0; 1309 | if (data[0] == ' ') { i++; 1310 | if (data[1] == ' ') { i++; 1311 | if (data[2] == ' ') { i++; } } } 1312 | 1313 | /* looking at the hrule uint8_t */ 1314 | if (i + 2 >= size || !(data[i] == '~' || data[i] == '`')) 1315 | return 0; 1316 | 1317 | c = data[i]; 1318 | 1319 | /* the whole line must be the uint8_t or whitespace */ 1320 | while (i < size && data[i] == c) { 1321 | n++; i++; 1322 | } 1323 | 1324 | if (n < 3) 1325 | return 0; 1326 | 1327 | return i; 1328 | } 1329 | 1330 | /* check if a line is a code fence; return its size if it is */ 1331 | static size_t 1332 | is_codefence(uint8_t *data, size_t size, struct buf *syntax) 1333 | { 1334 | size_t i = 0, syn_len = 0; 1335 | uint8_t *syn_start; 1336 | 1337 | i = prefix_codefence(data, size); 1338 | if (i == 0) 1339 | return 0; 1340 | 1341 | while (i < size && data[i] == ' ') 1342 | i++; 1343 | 1344 | syn_start = data + i; 1345 | 1346 | if (i < size && data[i] == '{') { 1347 | i++; syn_start++; 1348 | 1349 | while (i < size && data[i] != '}' && data[i] != '\n') { 1350 | syn_len++; i++; 1351 | } 1352 | 1353 | if (i == size || data[i] != '}') 1354 | return 0; 1355 | 1356 | /* strip all whitespace at the beginning and the end 1357 | * of the {} block */ 1358 | while (syn_len > 0 && _isspace(syn_start[0])) { 1359 | syn_start++; syn_len--; 1360 | } 1361 | 1362 | while (syn_len > 0 && _isspace(syn_start[syn_len - 1])) 1363 | syn_len--; 1364 | 1365 | i++; 1366 | } else { 1367 | while (i < size && !_isspace(data[i])) { 1368 | syn_len++; i++; 1369 | } 1370 | } 1371 | 1372 | if (syntax) { 1373 | syntax->data = syn_start; 1374 | syntax->size = syn_len; 1375 | } 1376 | 1377 | while (i < size && data[i] != '\n') { 1378 | if (!_isspace(data[i])) 1379 | return 0; 1380 | 1381 | i++; 1382 | } 1383 | 1384 | return i + 1; 1385 | } 1386 | 1387 | /* is_atxheader • returns whether the line is a hash-prefixed header */ 1388 | static int 1389 | is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size) 1390 | { 1391 | if (data[0] != '#') 1392 | return 0; 1393 | 1394 | if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) { 1395 | size_t level = 0; 1396 | 1397 | while (level < size && level < 6 && data[level] == '#') 1398 | level++; 1399 | 1400 | if (level < size && data[level] != ' ') 1401 | return 0; 1402 | } 1403 | 1404 | return 1; 1405 | } 1406 | 1407 | /* is_headerline • returns whether the line is a setext-style hdr underline */ 1408 | static int 1409 | is_headerline(uint8_t *data, size_t size) 1410 | { 1411 | size_t i = 0; 1412 | 1413 | /* test of level 1 header */ 1414 | if (data[i] == '=') { 1415 | for (i = 1; i < size && data[i] == '='; i++); 1416 | while (i < size && data[i] == ' ') i++; 1417 | return (i >= size || data[i] == '\n') ? 1 : 0; } 1418 | 1419 | /* test of level 2 header */ 1420 | if (data[i] == '-') { 1421 | for (i = 1; i < size && data[i] == '-'; i++); 1422 | while (i < size && data[i] == ' ') i++; 1423 | return (i >= size || data[i] == '\n') ? 2 : 0; } 1424 | 1425 | return 0; 1426 | } 1427 | 1428 | static int 1429 | is_next_headerline(uint8_t *data, size_t size) 1430 | { 1431 | size_t i = 0; 1432 | 1433 | while (i < size && data[i] != '\n') 1434 | i++; 1435 | 1436 | if (++i >= size) 1437 | return 0; 1438 | 1439 | return is_headerline(data + i, size - i); 1440 | } 1441 | 1442 | /* prefix_quote • returns blockquote prefix length */ 1443 | static size_t 1444 | prefix_quote(uint8_t *data, size_t size) 1445 | { 1446 | size_t i = 0; 1447 | if (i < size && data[i] == ' ') i++; 1448 | if (i < size && data[i] == ' ') i++; 1449 | if (i < size && data[i] == ' ') i++; 1450 | 1451 | if ((i < size && data[i] == '>') && (i + 1 < size && data[i+1] != '!')) { 1452 | if (i + 1 < size && data[i + 1] == ' ') 1453 | return i + 2; 1454 | 1455 | return i + 1; 1456 | } 1457 | 1458 | return 0; 1459 | } 1460 | 1461 | static size_t 1462 | prefix_blockspoiler(uint8_t *data, size_t size) 1463 | { 1464 | size_t i = 0; 1465 | if (i < size && data[i] == ' ') i++; 1466 | if (i < size && data[i] == ' ') i++; 1467 | if (i < size && data[i] == ' ') i++; 1468 | 1469 | if (i + 1 < size && data[i] == '>' && data[i + 1] == '!') { 1470 | size_t spoilerspan = find_emph_char(data + i + 1, size - i - 1, '<'); 1471 | if (i + spoilerspan < size && spoilerspan > 0 && data[i + spoilerspan] == '!') 1472 | return 0; 1473 | 1474 | if (i + 2 < size && data[i + 2] == ' ') 1475 | return i + 3; 1476 | 1477 | return i + 2; 1478 | } 1479 | 1480 | return 0; 1481 | } 1482 | 1483 | /* prefix_code • returns prefix length for block code*/ 1484 | static size_t 1485 | prefix_code(uint8_t *data, size_t size) 1486 | { 1487 | if (size > 3 && data[0] == ' ' && data[1] == ' ' 1488 | && data[2] == ' ' && data[3] == ' ') return 4; 1489 | 1490 | return 0; 1491 | } 1492 | 1493 | /* prefix_oli • returns ordered list item prefix */ 1494 | static size_t 1495 | prefix_oli(uint8_t *data, size_t size) 1496 | { 1497 | size_t i = 0; 1498 | 1499 | if (i < size && data[i] == ' ') i++; 1500 | if (i < size && data[i] == ' ') i++; 1501 | if (i < size && data[i] == ' ') i++; 1502 | 1503 | if (i >= size || data[i] < '0' || data[i] > '9') 1504 | return 0; 1505 | 1506 | while (i < size && data[i] >= '0' && data[i] <= '9') 1507 | i++; 1508 | 1509 | if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ') 1510 | return 0; 1511 | 1512 | if (is_next_headerline(data + i, size - i)) 1513 | return 0; 1514 | 1515 | return i + 2; 1516 | } 1517 | 1518 | /* prefix_uli • returns ordered list item prefix */ 1519 | static size_t 1520 | prefix_uli(uint8_t *data, size_t size) 1521 | { 1522 | size_t i = 0; 1523 | 1524 | if (i < size && data[i] == ' ') i++; 1525 | if (i < size && data[i] == ' ') i++; 1526 | if (i < size && data[i] == ' ') i++; 1527 | 1528 | if (i + 1 >= size || 1529 | (data[i] != '*' && data[i] != '+' && data[i] != '-') || 1530 | data[i + 1] != ' ') 1531 | return 0; 1532 | 1533 | if (is_next_headerline(data + i, size - i)) 1534 | return 0; 1535 | 1536 | return i + 2; 1537 | } 1538 | 1539 | 1540 | /* parse_block • parsing of one block, returning next uint8_t to parse */ 1541 | static void parse_block(struct buf *ob, struct sd_markdown *rndr, 1542 | uint8_t *data, size_t size); 1543 | 1544 | 1545 | /* parse_blockquote • handles parsing of a blockquote fragment */ 1546 | static size_t 1547 | parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 1548 | { 1549 | size_t beg, end = 0, pre, work_size = 0; 1550 | uint8_t *work_data = 0; 1551 | struct buf *out = 0; 1552 | 1553 | out = rndr_newbuf(rndr, BUFFER_BLOCK); 1554 | beg = 0; 1555 | while (beg < size) { 1556 | for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); 1557 | 1558 | pre = prefix_quote(data + beg, end - beg); 1559 | 1560 | if (pre) 1561 | beg += pre; /* skipping prefix */ 1562 | 1563 | /* empty line followed by non-quote line */ 1564 | else if (is_empty(data + beg, end - beg) && 1565 | (end >= size || (prefix_quote(data + end, size - end) == 0 && 1566 | !is_empty(data + end, size - end)))) 1567 | break; 1568 | 1569 | if (beg < end) { /* copy into the in-place working buffer */ 1570 | /* bufput(work, data + beg, end - beg); */ 1571 | if (!work_data) 1572 | work_data = data + beg; 1573 | else if (data + beg != work_data + work_size) 1574 | memmove(work_data + work_size, data + beg, end - beg); 1575 | work_size += end - beg; 1576 | } 1577 | beg = end; 1578 | } 1579 | 1580 | parse_block(out, rndr, work_data, work_size); 1581 | if (rndr->cb.blockquote) 1582 | rndr->cb.blockquote(ob, out, rndr->opaque); 1583 | rndr_popbuf(rndr, BUFFER_BLOCK); 1584 | return end; 1585 | } 1586 | 1587 | /* parse_blockspoiler • handles parsing of a blockspoiler fragment */ 1588 | static size_t 1589 | parse_blockspoiler(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 1590 | { 1591 | size_t beg, end = 0, pre, work_size = 0; 1592 | uint8_t *work_data = 0; 1593 | struct buf *out = 0; 1594 | 1595 | out = rndr_newbuf(rndr, BUFFER_BLOCK); 1596 | beg = 0; 1597 | while (beg < size) { 1598 | for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); 1599 | 1600 | pre = prefix_blockspoiler(data + beg, end - beg); 1601 | 1602 | if (pre) 1603 | beg += pre; /* skipping prefix */ 1604 | 1605 | /* empty line followed by non-blockspoiler line */ 1606 | else if (is_empty(data + beg, end - beg) && 1607 | (end >= size || (prefix_blockspoiler(data + end, size - end) == 0 && 1608 | !is_empty(data + end, size - end)))) 1609 | break; 1610 | 1611 | if (beg < end) { /* copy into the in-place working buffer */ 1612 | /* bufput(work, data + beg, end - beg); */ 1613 | if (!work_data) 1614 | work_data = data + beg; 1615 | else if (data + beg != work_data + work_size) 1616 | memmove(work_data + work_size, data + beg, end - beg); 1617 | work_size += end - beg; 1618 | } 1619 | beg = end; 1620 | } 1621 | 1622 | parse_block(out, rndr, work_data, work_size); 1623 | if (rndr->cb.blockspoiler) 1624 | rndr->cb.blockspoiler(ob, out, rndr->opaque); 1625 | rndr_popbuf(rndr, BUFFER_BLOCK); 1626 | return end; 1627 | } 1628 | 1629 | static size_t 1630 | parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render); 1631 | 1632 | /* parse_blockquote • handles parsing of a regular paragraph */ 1633 | static size_t 1634 | parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 1635 | { 1636 | size_t i = 0, end = 0; 1637 | int level = 0; 1638 | struct buf work = { data, 0, 0, 0 }; 1639 | 1640 | while (i < size) { 1641 | for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */; 1642 | 1643 | if (prefix_quote(data + i, end - i) != 0) { 1644 | end = i; 1645 | break; 1646 | } 1647 | 1648 | if (is_empty(data + i, size - i)) 1649 | break; 1650 | 1651 | if ((level = is_headerline(data + i, size - i)) != 0) 1652 | break; 1653 | 1654 | if (is_atxheader(rndr, data + i, size - i) || 1655 | is_hrule(data + i, size - i) || 1656 | prefix_quote(data + i, size - i)) { 1657 | end = i; 1658 | break; 1659 | } 1660 | 1661 | /* 1662 | * Early termination of a paragraph with the same logic 1663 | * as Markdown 1.0.0. If this logic is applied, the 1664 | * Markdown 1.0.3 test suite won't pass cleanly 1665 | * 1666 | * :: If the first character in a new line is not a letter, 1667 | * let's check to see if there's some kind of block starting 1668 | * here 1669 | */ 1670 | if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) { 1671 | if (prefix_oli(data + i, size - i) || 1672 | prefix_uli(data + i, size - i)) { 1673 | end = i; 1674 | break; 1675 | } 1676 | 1677 | /* see if an html block starts here */ 1678 | if (data[i] == '<' && rndr->cb.blockhtml && 1679 | parse_htmlblock(ob, rndr, data + i, size - i, 0)) { 1680 | end = i; 1681 | break; 1682 | } 1683 | 1684 | /* see if a code fence starts here */ 1685 | if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && 1686 | is_codefence(data + i, size - i, NULL) != 0) { 1687 | end = i; 1688 | break; 1689 | } 1690 | } 1691 | 1692 | i = end; 1693 | } 1694 | 1695 | work.size = i; 1696 | while (work.size && data[work.size - 1] == '\n') 1697 | work.size--; 1698 | 1699 | if (!level) { 1700 | struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK); 1701 | parse_inline(tmp, rndr, work.data, work.size); 1702 | if (rndr->cb.paragraph) 1703 | rndr->cb.paragraph(ob, tmp, rndr->opaque); 1704 | rndr_popbuf(rndr, BUFFER_BLOCK); 1705 | } else { 1706 | struct buf *header_work; 1707 | 1708 | if (work.size) { 1709 | size_t beg; 1710 | i = work.size; 1711 | work.size -= 1; 1712 | 1713 | while (work.size && data[work.size] != '\n') 1714 | work.size -= 1; 1715 | 1716 | beg = work.size + 1; 1717 | while (work.size && data[work.size - 1] == '\n') 1718 | work.size -= 1; 1719 | 1720 | if (work.size > 0) { 1721 | struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK); 1722 | parse_inline(tmp, rndr, work.data, work.size); 1723 | 1724 | if (rndr->cb.paragraph) 1725 | rndr->cb.paragraph(ob, tmp, rndr->opaque); 1726 | 1727 | rndr_popbuf(rndr, BUFFER_BLOCK); 1728 | work.data += beg; 1729 | work.size = i - beg; 1730 | } 1731 | else work.size = i; 1732 | } 1733 | 1734 | header_work = rndr_newbuf(rndr, BUFFER_SPAN); 1735 | parse_inline(header_work, rndr, work.data, work.size); 1736 | 1737 | if (rndr->cb.header) 1738 | rndr->cb.header(ob, header_work, (int)level, rndr->opaque); 1739 | 1740 | rndr_popbuf(rndr, BUFFER_SPAN); 1741 | } 1742 | 1743 | return end; 1744 | } 1745 | 1746 | /* parse_fencedcode • handles parsing of a block-level code fragment */ 1747 | static size_t 1748 | parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 1749 | { 1750 | size_t beg, end; 1751 | struct buf *work = 0; 1752 | struct buf lang = { 0, 0, 0, 0 }; 1753 | 1754 | beg = is_codefence(data, size, &lang); 1755 | if (beg == 0) return 0; 1756 | 1757 | work = rndr_newbuf(rndr, BUFFER_BLOCK); 1758 | 1759 | while (beg < size) { 1760 | size_t fence_end; 1761 | struct buf fence_trail = { 0, 0, 0, 0 }; 1762 | 1763 | fence_end = is_codefence(data + beg, size - beg, &fence_trail); 1764 | if (fence_end != 0 && fence_trail.size == 0) { 1765 | beg += fence_end; 1766 | break; 1767 | } 1768 | 1769 | for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); 1770 | 1771 | if (beg < end) { 1772 | /* verbatim copy to the working buffer, 1773 | escaping entities */ 1774 | if (is_empty(data + beg, end - beg)) 1775 | bufputc(work, '\n'); 1776 | else bufput(work, data + beg, end - beg); 1777 | } 1778 | beg = end; 1779 | } 1780 | 1781 | if (work->size && work->data[work->size - 1] != '\n') 1782 | bufputc(work, '\n'); 1783 | 1784 | if (rndr->cb.blockcode) 1785 | rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque); 1786 | 1787 | rndr_popbuf(rndr, BUFFER_BLOCK); 1788 | return beg; 1789 | } 1790 | 1791 | static size_t 1792 | parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 1793 | { 1794 | size_t beg, end, pre; 1795 | struct buf *work = 0; 1796 | 1797 | work = rndr_newbuf(rndr, BUFFER_BLOCK); 1798 | 1799 | beg = 0; 1800 | while (beg < size) { 1801 | for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {}; 1802 | pre = prefix_code(data + beg, end - beg); 1803 | 1804 | if (pre) 1805 | beg += pre; /* skipping prefix */ 1806 | else if (!is_empty(data + beg, end - beg)) 1807 | /* non-empty non-prefixed line breaks the pre */ 1808 | break; 1809 | 1810 | if (beg < end) { 1811 | /* verbatim copy to the working buffer, 1812 | escaping entities */ 1813 | if (is_empty(data + beg, end - beg)) 1814 | bufputc(work, '\n'); 1815 | else bufput(work, data + beg, end - beg); 1816 | } 1817 | beg = end; 1818 | } 1819 | 1820 | while (work->size && work->data[work->size - 1] == '\n') 1821 | work->size -= 1; 1822 | 1823 | bufputc(work, '\n'); 1824 | 1825 | if (rndr->cb.blockcode) 1826 | rndr->cb.blockcode(ob, work, NULL, rndr->opaque); 1827 | 1828 | rndr_popbuf(rndr, BUFFER_BLOCK); 1829 | return beg; 1830 | } 1831 | 1832 | /* parse_listitem • parsing of a single list item */ 1833 | /* assuming initial prefix is already removed */ 1834 | static size_t 1835 | parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags) 1836 | { 1837 | struct buf *work = 0, *inter = 0; 1838 | size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i; 1839 | int in_empty = 0, has_inside_empty = 0, in_fence = 0; 1840 | 1841 | /* keeping track of the first indentation prefix */ 1842 | while (orgpre < 3 && orgpre < size && data[orgpre] == ' ') 1843 | orgpre++; 1844 | 1845 | beg = prefix_uli(data, size); 1846 | if (!beg) 1847 | beg = prefix_oli(data, size); 1848 | 1849 | if (!beg) 1850 | return 0; 1851 | 1852 | /* skipping to the beginning of the following line */ 1853 | end = beg; 1854 | while (end < size && data[end - 1] != '\n') 1855 | end++; 1856 | 1857 | /* getting working buffers */ 1858 | work = rndr_newbuf(rndr, BUFFER_SPAN); 1859 | inter = rndr_newbuf(rndr, BUFFER_SPAN); 1860 | 1861 | /* putting the first line into the working buffer */ 1862 | bufput(work, data + beg, end - beg); 1863 | beg = end; 1864 | 1865 | /* process the following lines */ 1866 | while (beg < size) { 1867 | size_t has_next_uli = 0, has_next_oli = 0; 1868 | 1869 | end++; 1870 | 1871 | while (end < size && data[end - 1] != '\n') 1872 | end++; 1873 | 1874 | /* process an empty line */ 1875 | if (is_empty(data + beg, end - beg)) { 1876 | in_empty = 1; 1877 | beg = end; 1878 | continue; 1879 | } 1880 | 1881 | /* calculating the indentation */ 1882 | i = 0; 1883 | while (i < 4 && beg + i < end && data[beg + i] == ' ') 1884 | i++; 1885 | 1886 | pre = i; 1887 | 1888 | if (rndr->ext_flags & MKDEXT_FENCED_CODE) { 1889 | if (is_codefence(data + beg + i, end - beg - i, NULL) != 0) 1890 | in_fence = !in_fence; 1891 | } 1892 | 1893 | /* Only check for new list items if we are **not** inside 1894 | * a fenced code block */ 1895 | if (!in_fence) { 1896 | has_next_uli = prefix_uli(data + beg + i, end - beg - i); 1897 | has_next_oli = prefix_oli(data + beg + i, end - beg - i); 1898 | } 1899 | 1900 | /* checking for ul/ol switch */ 1901 | if (in_empty && ( 1902 | ((*flags & MKD_LIST_ORDERED) && has_next_uli) || 1903 | (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){ 1904 | *flags |= MKD_LI_END; 1905 | break; /* the following item must have same list type */ 1906 | } 1907 | 1908 | /* checking for a new item */ 1909 | if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) { 1910 | if (in_empty) 1911 | has_inside_empty = 1; 1912 | 1913 | if (pre == orgpre) /* the following item must have */ 1914 | break; /* the same indentation */ 1915 | 1916 | if (!sublist) 1917 | sublist = work->size; 1918 | } 1919 | /* joining only indented stuff after empty lines; 1920 | * note that now we only require 1 space of indentation 1921 | * to continue a list */ 1922 | else if (in_empty && pre == 0) { 1923 | *flags |= MKD_LI_END; 1924 | break; 1925 | } 1926 | else if (in_empty) { 1927 | bufputc(work, '\n'); 1928 | has_inside_empty = 1; 1929 | } 1930 | 1931 | in_empty = 0; 1932 | 1933 | /* adding the line without prefix into the working buffer */ 1934 | bufput(work, data + beg + i, end - beg - i); 1935 | beg = end; 1936 | } 1937 | 1938 | /* render of li contents */ 1939 | if (has_inside_empty) 1940 | *flags |= MKD_LI_BLOCK; 1941 | 1942 | if (*flags & MKD_LI_BLOCK) { 1943 | /* intermediate render of block li */ 1944 | if (sublist && sublist < work->size) { 1945 | parse_block(inter, rndr, work->data, sublist); 1946 | parse_block(inter, rndr, work->data + sublist, work->size - sublist); 1947 | } 1948 | else 1949 | parse_block(inter, rndr, work->data, work->size); 1950 | } else { 1951 | /* intermediate render of inline li */ 1952 | if (sublist && sublist < work->size) { 1953 | parse_inline(inter, rndr, work->data, sublist); 1954 | parse_block(inter, rndr, work->data + sublist, work->size - sublist); 1955 | } 1956 | else 1957 | parse_inline(inter, rndr, work->data, work->size); 1958 | } 1959 | 1960 | /* render of li itself */ 1961 | if (rndr->cb.listitem) 1962 | rndr->cb.listitem(ob, inter, *flags, rndr->opaque); 1963 | 1964 | rndr_popbuf(rndr, BUFFER_SPAN); 1965 | rndr_popbuf(rndr, BUFFER_SPAN); 1966 | return beg; 1967 | } 1968 | 1969 | 1970 | /* parse_list • parsing ordered or unordered list block */ 1971 | static size_t 1972 | parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags) 1973 | { 1974 | struct buf *work = 0; 1975 | size_t i = 0, j; 1976 | 1977 | work = rndr_newbuf(rndr, BUFFER_BLOCK); 1978 | 1979 | while (i < size) { 1980 | j = parse_listitem(work, rndr, data + i, size - i, &flags); 1981 | i += j; 1982 | 1983 | if (!j || (flags & MKD_LI_END)) 1984 | break; 1985 | } 1986 | 1987 | if (rndr->cb.list) 1988 | rndr->cb.list(ob, work, flags, rndr->opaque); 1989 | rndr_popbuf(rndr, BUFFER_BLOCK); 1990 | return i; 1991 | } 1992 | 1993 | /* parse_atxheader • parsing of atx-style headers */ 1994 | static size_t 1995 | parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 1996 | { 1997 | size_t level = 0; 1998 | size_t i, end, skip; 1999 | 2000 | while (level < size && level < 6 && data[level] == '#') 2001 | level++; 2002 | 2003 | for (i = level; i < size && data[i] == ' '; i++); 2004 | 2005 | for (end = i; end < size && data[end] != '\n'; end++); 2006 | skip = end; 2007 | 2008 | while (end && data[end - 1] == '#') 2009 | end--; 2010 | 2011 | while (end && data[end - 1] == ' ') 2012 | end--; 2013 | 2014 | if (end > i) { 2015 | struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN); 2016 | 2017 | parse_inline(work, rndr, data + i, end - i); 2018 | 2019 | if (rndr->cb.header) 2020 | rndr->cb.header(ob, work, (int)level, rndr->opaque); 2021 | 2022 | rndr_popbuf(rndr, BUFFER_SPAN); 2023 | } 2024 | 2025 | return skip; 2026 | } 2027 | 2028 | 2029 | /* htmlblock_end • checking end of HTML block : [ \t]*\n[ \t*]\n */ 2030 | /* returns the length on match, 0 otherwise */ 2031 | static size_t 2032 | htmlblock_end_tag( 2033 | const char *tag, 2034 | size_t tag_len, 2035 | struct sd_markdown *rndr, 2036 | uint8_t *data, 2037 | size_t size) 2038 | { 2039 | size_t i, w; 2040 | 2041 | /* checking if tag is a match */ 2042 | if (tag_len + 3 >= size || 2043 | strncasecmp((char *)data + 2, tag, tag_len) != 0 || 2044 | data[tag_len + 2] != '>') 2045 | return 0; 2046 | 2047 | /* checking white lines */ 2048 | i = tag_len + 3; 2049 | w = 0; 2050 | if (i < size && (w = is_empty(data + i, size - i)) == 0) 2051 | return 0; /* non-blank after tag */ 2052 | i += w; 2053 | w = 0; 2054 | 2055 | if (i < size) 2056 | w = is_empty(data + i, size - i); 2057 | 2058 | return i + w; 2059 | } 2060 | 2061 | static size_t 2062 | htmlblock_end(const char *curtag, 2063 | struct sd_markdown *rndr, 2064 | uint8_t *data, 2065 | size_t size, 2066 | int start_of_line) 2067 | { 2068 | size_t tag_size = strlen(curtag); 2069 | size_t i = 1, end_tag; 2070 | int block_lines = 0; 2071 | 2072 | while (i < size) { 2073 | i++; 2074 | while (i < size && !(data[i - 1] == '<' && data[i] == '/')) { 2075 | if (data[i] == '\n') 2076 | block_lines++; 2077 | 2078 | i++; 2079 | } 2080 | 2081 | /* If we are only looking for unindented tags, skip the tag 2082 | * if it doesn't follow a newline. 2083 | * 2084 | * The only exception to this is if the tag is still on the 2085 | * initial line; in that case it still counts as a closing 2086 | * tag 2087 | */ 2088 | if (start_of_line && block_lines > 0 && data[i - 2] != '\n') 2089 | continue; 2090 | 2091 | if (i + 2 + tag_size >= size) 2092 | break; 2093 | 2094 | end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1); 2095 | if (end_tag) 2096 | return i + end_tag - 1; 2097 | } 2098 | 2099 | return 0; 2100 | } 2101 | 2102 | 2103 | /* parse_htmlblock • parsing of inline HTML block */ 2104 | static size_t 2105 | parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render) 2106 | { 2107 | size_t i, j = 0, tag_end; 2108 | const char *curtag = NULL; 2109 | struct buf work = { data, 0, 0, 0 }; 2110 | 2111 | /* identification of the opening tag */ 2112 | if (size < 2 || data[0] != '<') 2113 | return 0; 2114 | 2115 | i = 1; 2116 | while (i < size && data[i] != '>' && data[i] != ' ') 2117 | i++; 2118 | 2119 | if (i < size) 2120 | curtag = find_block_tag((char *)data + 1, (int)i - 1); 2121 | 2122 | /* handling of special cases */ 2123 | if (!curtag) { 2124 | 2125 | /* HTML comment, laxist form */ 2126 | if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { 2127 | i = 5; 2128 | 2129 | while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) 2130 | i++; 2131 | 2132 | i++; 2133 | 2134 | if (i < size) 2135 | j = is_empty(data + i, size - i); 2136 | 2137 | if (j) { 2138 | work.size = i + j; 2139 | if (do_render && rndr->cb.blockhtml) 2140 | rndr->cb.blockhtml(ob, &work, rndr->opaque); 2141 | return work.size; 2142 | } 2143 | } 2144 | 2145 | /* HR, which is the only self-closing block tag considered */ 2146 | if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { 2147 | i = 3; 2148 | while (i < size && data[i] != '>') 2149 | i++; 2150 | 2151 | if (i + 1 < size) { 2152 | i++; 2153 | j = is_empty(data + i, size - i); 2154 | if (j) { 2155 | work.size = i + j; 2156 | if (do_render && rndr->cb.blockhtml) 2157 | rndr->cb.blockhtml(ob, &work, rndr->opaque); 2158 | return work.size; 2159 | } 2160 | } 2161 | } 2162 | 2163 | /* no special case recognised */ 2164 | return 0; 2165 | } 2166 | 2167 | /* looking for an unindented matching closing tag */ 2168 | /* followed by a blank line */ 2169 | tag_end = htmlblock_end(curtag, rndr, data, size, 1); 2170 | 2171 | /* if not found, trying a second pass looking for indented match */ 2172 | /* but not if tag is "ins" or "del" (following original Markdown.pl) */ 2173 | if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) { 2174 | tag_end = htmlblock_end(curtag, rndr, data, size, 0); 2175 | } 2176 | 2177 | if (!tag_end) 2178 | return 0; 2179 | 2180 | /* the end of the block has been found */ 2181 | work.size = tag_end; 2182 | if (do_render && rndr->cb.blockhtml) 2183 | rndr->cb.blockhtml(ob, &work, rndr->opaque); 2184 | 2185 | return tag_end; 2186 | } 2187 | 2188 | static void 2189 | parse_table_row( 2190 | struct buf *ob, 2191 | struct sd_markdown *rndr, 2192 | uint8_t *data, 2193 | size_t size, 2194 | size_t columns, 2195 | int *col_data, 2196 | int header_flag) 2197 | { 2198 | size_t i = 0, col, cols_left; 2199 | struct buf *row_work = 0; 2200 | 2201 | if (!rndr->cb.table_cell || !rndr->cb.table_row) 2202 | return; 2203 | 2204 | row_work = rndr_newbuf(rndr, BUFFER_SPAN); 2205 | 2206 | if (i < size && data[i] == '|') 2207 | i++; 2208 | 2209 | for (col = 0; col < columns && i < size; ++col) { 2210 | size_t cell_start, cell_end; 2211 | struct buf *cell_work; 2212 | 2213 | cell_work = rndr_newbuf(rndr, BUFFER_SPAN); 2214 | 2215 | while (i < size && _isspace(data[i])) 2216 | i++; 2217 | 2218 | cell_start = i; 2219 | 2220 | while (i < size && data[i] != '|') 2221 | i++; 2222 | 2223 | cell_end = i - 1; 2224 | 2225 | while (cell_end > cell_start && _isspace(data[cell_end])) 2226 | cell_end--; 2227 | 2228 | parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start); 2229 | rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque, 0); 2230 | 2231 | rndr_popbuf(rndr, BUFFER_SPAN); 2232 | i++; 2233 | } 2234 | 2235 | cols_left = columns - col; 2236 | if (cols_left > 0) { 2237 | struct buf empty_cell = { 0, 0, 0, 0 }; 2238 | rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque, cols_left); 2239 | } 2240 | 2241 | rndr->cb.table_row(ob, row_work, rndr->opaque); 2242 | 2243 | rndr_popbuf(rndr, BUFFER_SPAN); 2244 | } 2245 | 2246 | static size_t 2247 | parse_table_header( 2248 | struct buf *ob, 2249 | struct sd_markdown *rndr, 2250 | uint8_t *data, 2251 | size_t size, 2252 | size_t *columns, 2253 | int **column_data) 2254 | { 2255 | int pipes; 2256 | size_t i = 0, col, header_end, under_end; 2257 | 2258 | pipes = 0; 2259 | while (i < size && data[i] != '\n') 2260 | if (data[i++] == '|') 2261 | pipes++; 2262 | 2263 | if (i == size || pipes == 0) 2264 | return 0; 2265 | 2266 | header_end = i; 2267 | 2268 | while (header_end > 0 && _isspace(data[header_end - 1])) 2269 | header_end--; 2270 | 2271 | if (data[0] == '|') 2272 | pipes--; 2273 | 2274 | if (header_end && data[header_end - 1] == '|') 2275 | pipes--; 2276 | 2277 | if (pipes + 1 > rndr->max_table_cols) 2278 | return 0; 2279 | 2280 | *columns = pipes + 1; 2281 | *column_data = calloc(*columns, sizeof(int)); 2282 | 2283 | /* Parse the header underline */ 2284 | i++; 2285 | if (i < size && data[i] == '|') 2286 | i++; 2287 | 2288 | under_end = i; 2289 | while (under_end < size && data[under_end] != '\n') 2290 | under_end++; 2291 | 2292 | for (col = 0; col < *columns && i < under_end; ++col) { 2293 | size_t dashes = 0; 2294 | 2295 | while (i < under_end && data[i] == ' ') 2296 | i++; 2297 | 2298 | if (data[i] == ':') { 2299 | i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L; 2300 | dashes++; 2301 | } 2302 | 2303 | while (i < under_end && data[i] == '-') { 2304 | i++; dashes++; 2305 | } 2306 | 2307 | if (i < under_end && data[i] == ':') { 2308 | i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R; 2309 | dashes++; 2310 | } 2311 | 2312 | while (i < under_end && data[i] == ' ') 2313 | i++; 2314 | 2315 | if (i < under_end && data[i] != '|') 2316 | break; 2317 | 2318 | if (dashes < 1) 2319 | break; 2320 | 2321 | i++; 2322 | } 2323 | 2324 | if (col < *columns) 2325 | return 0; 2326 | 2327 | parse_table_row( 2328 | ob, rndr, data, 2329 | header_end, 2330 | *columns, 2331 | *column_data, 2332 | MKD_TABLE_HEADER 2333 | ); 2334 | 2335 | return under_end + 1; 2336 | } 2337 | 2338 | static size_t 2339 | parse_table( 2340 | struct buf *ob, 2341 | struct sd_markdown *rndr, 2342 | uint8_t *data, 2343 | size_t size) 2344 | { 2345 | size_t i; 2346 | 2347 | struct buf *header_work = 0; 2348 | struct buf *body_work = 0; 2349 | 2350 | size_t columns; 2351 | int *col_data = NULL; 2352 | 2353 | header_work = rndr_newbuf(rndr, BUFFER_SPAN); 2354 | body_work = rndr_newbuf(rndr, BUFFER_BLOCK); 2355 | 2356 | i = parse_table_header(header_work, rndr, data, size, &columns, &col_data); 2357 | if (i > 0) { 2358 | 2359 | while (i < size) { 2360 | size_t row_start; 2361 | int pipes = 0; 2362 | 2363 | row_start = i; 2364 | 2365 | while (i < size && data[i] != '\n') 2366 | if (data[i++] == '|') 2367 | pipes++; 2368 | 2369 | if (pipes == 0 || i == size) { 2370 | i = row_start; 2371 | break; 2372 | } 2373 | 2374 | parse_table_row( 2375 | body_work, 2376 | rndr, 2377 | data + row_start, 2378 | i - row_start, 2379 | columns, 2380 | col_data, 0 2381 | ); 2382 | 2383 | i++; 2384 | } 2385 | 2386 | if (rndr->cb.table) 2387 | rndr->cb.table(ob, header_work, body_work, rndr->opaque); 2388 | } 2389 | 2390 | free(col_data); 2391 | rndr_popbuf(rndr, BUFFER_SPAN); 2392 | rndr_popbuf(rndr, BUFFER_BLOCK); 2393 | return i; 2394 | } 2395 | 2396 | /* parse_block • parsing of one block, returning next uint8_t to parse */ 2397 | static void 2398 | parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) 2399 | { 2400 | size_t beg, end, i; 2401 | uint8_t *txt_data; 2402 | beg = 0; 2403 | 2404 | if (rndr->work_bufs[BUFFER_SPAN].size + 2405 | rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting) 2406 | return; 2407 | 2408 | while (beg < size) { 2409 | txt_data = data + beg; 2410 | end = size - beg; 2411 | 2412 | if (is_atxheader(rndr, txt_data, end)) 2413 | beg += parse_atxheader(ob, rndr, txt_data, end); 2414 | 2415 | else if (data[beg] == '<' && rndr->cb.blockhtml && 2416 | (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0) 2417 | beg += i; 2418 | 2419 | else if ((i = is_empty(txt_data, end)) != 0) 2420 | beg += i; 2421 | 2422 | else if (is_hrule(txt_data, end)) { 2423 | if (rndr->cb.hrule) 2424 | rndr->cb.hrule(ob, rndr->opaque); 2425 | 2426 | while (beg < size && data[beg] != '\n') 2427 | beg++; 2428 | 2429 | beg++; 2430 | } 2431 | 2432 | else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && 2433 | (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0) 2434 | beg += i; 2435 | 2436 | else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 && 2437 | (i = parse_table(ob, rndr, txt_data, end)) != 0) 2438 | beg += i; 2439 | 2440 | else if (prefix_quote(txt_data, end)) 2441 | beg += parse_blockquote(ob, rndr, txt_data, end); 2442 | 2443 | else if (prefix_blockspoiler(txt_data, end)) 2444 | beg += parse_blockspoiler(ob, rndr, txt_data, end); 2445 | 2446 | else if (prefix_code(txt_data, end)) 2447 | beg += parse_blockcode(ob, rndr, txt_data, end); 2448 | 2449 | else if (prefix_uli(txt_data, end)) 2450 | beg += parse_list(ob, rndr, txt_data, end, 0); 2451 | 2452 | else if (prefix_oli(txt_data, end)) 2453 | beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED); 2454 | 2455 | else 2456 | beg += parse_paragraph(ob, rndr, txt_data, end); 2457 | } 2458 | } 2459 | 2460 | 2461 | 2462 | /********************* 2463 | * REFERENCE PARSING * 2464 | *********************/ 2465 | 2466 | /* is_ref • returns whether a line is a reference or not */ 2467 | static int 2468 | is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs) 2469 | { 2470 | /* int n; */ 2471 | size_t i = 0; 2472 | size_t id_offset, id_end; 2473 | size_t link_offset, link_end; 2474 | size_t title_offset, title_end; 2475 | size_t line_end; 2476 | 2477 | /* up to 3 optional leading spaces */ 2478 | if (beg + 3 >= end) return 0; 2479 | if (data[beg] == ' ') { i = 1; 2480 | if (data[beg + 1] == ' ') { i = 2; 2481 | if (data[beg + 2] == ' ') { i = 3; 2482 | if (data[beg + 3] == ' ') return 0; } } } 2483 | i += beg; 2484 | 2485 | /* id part: anything but a newline between brackets */ 2486 | if (data[i] != '[') return 0; 2487 | i++; 2488 | id_offset = i; 2489 | while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') 2490 | i++; 2491 | if (i >= end || data[i] != ']') return 0; 2492 | id_end = i; 2493 | 2494 | /* spacer: colon (space | tab)* newline? (space | tab)* */ 2495 | i++; 2496 | if (i >= end || data[i] != ':') return 0; 2497 | i++; 2498 | while (i < end && data[i] == ' ') i++; 2499 | if (i < end && (data[i] == '\n' || data[i] == '\r')) { 2500 | i++; 2501 | if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; } 2502 | while (i < end && data[i] == ' ') i++; 2503 | if (i >= end) return 0; 2504 | 2505 | /* link: whitespace-free sequence, optionally between angle brackets */ 2506 | if (data[i] == '<') 2507 | i++; 2508 | 2509 | link_offset = i; 2510 | 2511 | while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r') 2512 | i++; 2513 | 2514 | if (data[i - 1] == '>') link_end = i - 1; 2515 | else link_end = i; 2516 | 2517 | /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ 2518 | while (i < end && data[i] == ' ') i++; 2519 | if (i < end && data[i] != '\n' && data[i] != '\r' 2520 | && data[i] != '\'' && data[i] != '"' && data[i] != '(') 2521 | return 0; 2522 | line_end = 0; 2523 | /* computing end-of-line */ 2524 | if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i; 2525 | if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') 2526 | line_end = i + 1; 2527 | 2528 | /* optional (space|tab)* spacer after a newline */ 2529 | if (line_end) { 2530 | i = line_end + 1; 2531 | while (i < end && data[i] == ' ') i++; } 2532 | 2533 | /* optional title: any non-newline sequence enclosed in '"() 2534 | alone on its line */ 2535 | title_offset = title_end = 0; 2536 | if (i + 1 < end 2537 | && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { 2538 | i++; 2539 | title_offset = i; 2540 | /* looking for EOL */ 2541 | while (i < end && data[i] != '\n' && data[i] != '\r') i++; 2542 | if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') 2543 | title_end = i + 1; 2544 | else title_end = i; 2545 | /* stepping back */ 2546 | i -= 1; 2547 | while (i > title_offset && data[i] == ' ') 2548 | i -= 1; 2549 | if (i > title_offset 2550 | && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) { 2551 | line_end = title_end; 2552 | title_end = i; } } 2553 | 2554 | if (!line_end || link_end == link_offset) 2555 | return 0; /* garbage after the link empty link */ 2556 | 2557 | /* a valid ref has been found, filling-in return structures */ 2558 | if (last) 2559 | *last = line_end; 2560 | 2561 | if (refs) { 2562 | struct link_ref *ref; 2563 | 2564 | ref = add_link_ref(refs, data + id_offset, id_end - id_offset); 2565 | if (!ref) 2566 | return 0; 2567 | 2568 | ref->link = bufnew(link_end - link_offset); 2569 | bufput(ref->link, data + link_offset, link_end - link_offset); 2570 | 2571 | if (title_end > title_offset) { 2572 | ref->title = bufnew(title_end - title_offset); 2573 | bufput(ref->title, data + title_offset, title_end - title_offset); 2574 | } 2575 | } 2576 | 2577 | return 1; 2578 | } 2579 | 2580 | static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size) 2581 | { 2582 | size_t i = 0, tab = 0; 2583 | 2584 | while (i < size) { 2585 | size_t org = i; 2586 | 2587 | while (i < size && line[i] != '\t') { 2588 | i++; tab++; 2589 | } 2590 | 2591 | if (i > org) 2592 | bufput(ob, line + org, i - org); 2593 | 2594 | if (i >= size) 2595 | break; 2596 | 2597 | do { 2598 | bufputc(ob, ' '); tab++; 2599 | } while (tab % 4); 2600 | 2601 | i++; 2602 | } 2603 | } 2604 | 2605 | /********************** 2606 | * EXPORTED FUNCTIONS * 2607 | **********************/ 2608 | 2609 | struct sd_markdown * 2610 | sd_markdown_new( 2611 | unsigned int extensions, 2612 | size_t max_nesting, 2613 | size_t max_table_cols, 2614 | const struct sd_callbacks *callbacks, 2615 | void *opaque) 2616 | { 2617 | struct sd_markdown *md = NULL; 2618 | 2619 | assert(max_nesting > 0 && max_table_cols > 0 && callbacks); 2620 | 2621 | md = malloc(sizeof(struct sd_markdown)); 2622 | if (!md) 2623 | return NULL; 2624 | 2625 | memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks)); 2626 | 2627 | stack_init(&md->work_bufs[BUFFER_BLOCK], 4); 2628 | stack_init(&md->work_bufs[BUFFER_SPAN], 8); 2629 | 2630 | memset(md->active_char, 0x0, 256); 2631 | 2632 | if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) { 2633 | md->active_char['*'] = MD_CHAR_EMPHASIS; 2634 | md->active_char['_'] = MD_CHAR_EMPHASIS; 2635 | md->active_char['>'] = MD_CHAR_EMPHASIS; 2636 | if (extensions & MKDEXT_STRIKETHROUGH) 2637 | md->active_char['~'] = MD_CHAR_EMPHASIS; 2638 | } 2639 | 2640 | if (md->cb.codespan) 2641 | md->active_char['`'] = MD_CHAR_CODESPAN; 2642 | 2643 | if (md->cb.linebreak) 2644 | md->active_char['\n'] = MD_CHAR_LINEBREAK; 2645 | 2646 | if (md->cb.image || md->cb.link) 2647 | md->active_char['['] = MD_CHAR_LINK; 2648 | 2649 | md->active_char['<'] = MD_CHAR_LANGLE; 2650 | md->active_char['\\'] = MD_CHAR_ESCAPE; 2651 | md->active_char['&'] = MD_CHAR_ENTITITY; 2652 | 2653 | if (extensions & MKDEXT_AUTOLINK) { 2654 | if (!(extensions & MKDEXT_NO_EMAIL_AUTOLINK)) 2655 | md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; 2656 | md->active_char[':'] = MD_CHAR_AUTOLINK_URL; 2657 | md->active_char['w'] = MD_CHAR_AUTOLINK_WWW; 2658 | md->active_char['/'] = MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME; 2659 | } 2660 | 2661 | if (extensions & MKDEXT_SUPERSCRIPT) 2662 | md->active_char['^'] = MD_CHAR_SUPERSCRIPT; 2663 | 2664 | /* Extension data */ 2665 | md->ext_flags = extensions; 2666 | md->opaque = opaque; 2667 | md->max_nesting = max_nesting; 2668 | md->max_table_cols = max_table_cols; 2669 | md->in_link_body = 0; 2670 | 2671 | return md; 2672 | } 2673 | 2674 | void 2675 | sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md) 2676 | { 2677 | #define MARKDOWN_GROW(x) ((x) + ((x) >> 1)) 2678 | static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF}; 2679 | 2680 | struct buf *text; 2681 | size_t beg, end; 2682 | 2683 | text = bufnew(64); 2684 | if (!text) 2685 | return; 2686 | 2687 | /* Preallocate enough space for our buffer to avoid expanding while copying */ 2688 | bufgrow(text, doc_size); 2689 | 2690 | /* reset the references table */ 2691 | memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *)); 2692 | 2693 | /* first pass: looking for references, copying everything else */ 2694 | beg = 0; 2695 | 2696 | /* Skip a possible UTF-8 BOM, even though the Unicode standard 2697 | * discourages having these in UTF-8 documents */ 2698 | if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0) 2699 | beg += 3; 2700 | 2701 | while (beg < doc_size) /* iterating over lines */ 2702 | if (is_ref(document, beg, doc_size, &end, md->refs)) 2703 | beg = end; 2704 | else { /* skipping to the next line */ 2705 | end = beg; 2706 | while (end < doc_size && document[end] != '\n' && document[end] != '\r') 2707 | end++; 2708 | 2709 | /* adding the line body if present */ 2710 | if (end > beg) 2711 | expand_tabs(text, document + beg, end - beg); 2712 | 2713 | while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) { 2714 | /* add one \n per newline */ 2715 | if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n')) 2716 | bufputc(text, '\n'); 2717 | end++; 2718 | } 2719 | 2720 | beg = end; 2721 | } 2722 | 2723 | /* pre-grow the output buffer to minimize allocations */ 2724 | bufgrow(ob, MARKDOWN_GROW(text->size)); 2725 | 2726 | /* second pass: actual rendering */ 2727 | if (md->cb.doc_header) 2728 | md->cb.doc_header(ob, md->opaque); 2729 | 2730 | if (text->size) { 2731 | /* adding a final newline if not already present */ 2732 | if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r') 2733 | bufputc(text, '\n'); 2734 | 2735 | parse_block(ob, md, text->data, text->size); 2736 | } 2737 | 2738 | if (md->cb.doc_footer) 2739 | md->cb.doc_footer(ob, md->opaque); 2740 | 2741 | /* clean-up */ 2742 | bufrelease(text); 2743 | free_link_refs(md->refs); 2744 | 2745 | assert(md->work_bufs[BUFFER_SPAN].size == 0); 2746 | assert(md->work_bufs[BUFFER_BLOCK].size == 0); 2747 | } 2748 | 2749 | void 2750 | sd_markdown_free(struct sd_markdown *md) 2751 | { 2752 | size_t i; 2753 | 2754 | for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i) 2755 | bufrelease(md->work_bufs[BUFFER_SPAN].item[i]); 2756 | 2757 | for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i) 2758 | bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]); 2759 | 2760 | stack_free(&md->work_bufs[BUFFER_SPAN]); 2761 | stack_free(&md->work_bufs[BUFFER_BLOCK]); 2762 | 2763 | free(md); 2764 | } 2765 | 2766 | void 2767 | sd_version(int *ver_major, int *ver_minor, int *ver_revision) 2768 | { 2769 | *ver_major = SUNDOWN_VER_MAJOR; 2770 | *ver_minor = SUNDOWN_VER_MINOR; 2771 | *ver_revision = SUNDOWN_VER_REVISION; 2772 | } 2773 | 2774 | /* vim: set filetype=c: */ 2775 | --------------------------------------------------------------------------------