├── debian
    ├── compat
    ├── source
    │   └── format
    ├── rules
    ├── control
    ├── copyright
    └── changelog
├── .gitattributes
├── .npmignore
├── Dockerfile
├── .gitignore
├── html_block_names.txt
├── sundown.def
├── src
    ├── html_entities.h
    ├── stack.h
    ├── stack.c
    ├── autolink.h
    ├── buffer.h
    ├── html_entities.gperf
    ├── buffer.c
    ├── markdown.h
    ├── html_blocks.h
    ├── autolink.c
    └── markdown.c
├── package.json
├── SECURITY.md
├── snudown.d.ts
├── header.js
├── .github
    └── workflows
    │   └── ci.yml
├── footer.js
├── html
    ├── houdini.h
    ├── html.h
    ├── houdini_html_e.c
    ├── houdini_href_e.c
    ├── html_smartypants.c
    └── html.c
├── README.markdown
├── snudown.c
└── test_snudown.js


/debian/compat:
--------------------------------------------------------------------------------
1 | 7
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.sh text eol=lf
2 | 


--------------------------------------------------------------------------------
/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (native)
2 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | *
2 | !dist/*.js
3 | !dist/*.ts
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM emscripten/emsdk:3.1.33
2 | 
3 | RUN apt-get update
4 | RUN apt-get install gperf
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | dist/
 3 | node_modules/
 4 | .idea/
 5 | snudown.egg-info/
 6 | *.pyc
 7 | *.so
 8 | *.so.*
 9 | *.o
10 | 
11 | 


--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | # This file was automatically generated by stdeb 0.6.0+git at
 4 | # Wed, 16 Nov 2011 10:36:53 -0800
 5 | 
 6 | %:
 7 | 	dh $@ --with python2 --buildsystem=python_distutils
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/html_block_names.txt:
--------------------------------------------------------------------------------
 1 | ##
 2 | p
 3 | dl
 4 | h1
 5 | h2
 6 | h3
 7 | h4
 8 | h5
 9 | h6
10 | ol
11 | ul
12 | del
13 | div
14 | ins
15 | pre
16 | form
17 | math
18 | table
19 | figure
20 | iframe
21 | script
22 | style
23 | fieldset
24 | noscript
25 | blockquote
26 | span
27 | 


--------------------------------------------------------------------------------
/sundown.def:
--------------------------------------------------------------------------------
 1 | LIBRARY SUNDOWN
 2 | EXPORTS
 3 | 	sdhtml_renderer
 4 | 	sdhtml_toc_renderer
 5 | 	sdhtml_smartypants
 6 | 	bufgrow
 7 | 	bufnew
 8 | 	bufcstr
 9 | 	bufprefix
10 | 	bufput 
11 | 	bufputs
12 | 	bufputc
13 | 	bufrelease
14 | 	bufreset
15 | 	bufslurp
16 | 	bufprintf
17 | 	sd_markdown_new
18 | 	sd_markdown_render
19 | 	sd_markdown_free
20 | 	sd_version


--------------------------------------------------------------------------------
/src/html_entities.h:
--------------------------------------------------------------------------------
 1 | #ifndef HTML_ENTITIES_H
 2 | #define HTML_ENTITIES_H
 3 | 
 4 | #include <stdlib.h>
 5 | #include <stdint.h>
 6 | 
 7 | extern const uint32_t MAX_NUM_ENTITY_VAL;
 8 | 
 9 | extern const size_t MAX_NUM_ENTITY_LEN;
10 | 
11 | int is_valid_numeric_entity(uint32_t entity_val);
12 | 
13 | const char* is_allowed_named_entity (register const char *str, register size_t len);
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "snudown-js",
 3 |   "version": "4.0.1",
 4 |   "description": "a 'native' port of Snudown to JavaScript",
 5 |   "type": "module",
 6 |   "module": "./dist/snudown.js",
 7 |   "exports": "./dist/snudown.js",
 8 |   "types": "./dist/snudown.d.ts",
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "https://github.com/erikdesjardins/snudown-js.git"
12 |   },
13 |   "devDependencies": {
14 |     "uglify-js": "3.17.4"
15 |   },
16 |   "license": "MIT",
17 |   "dependencies": {}
18 | }
19 | 


--------------------------------------------------------------------------------
/src/stack.h:
--------------------------------------------------------------------------------
 1 | #ifndef STACK_H__
 2 | #define STACK_H__
 3 | 
 4 | #include <stdlib.h>
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | struct stack {
11 | 	void **item;
12 | 	size_t size;
13 | 	size_t asize;
14 | };
15 | 
16 | void stack_free(struct stack *);
17 | int stack_grow(struct stack *, size_t);
18 | int stack_init(struct stack *, size_t);
19 | 
20 | int stack_push(struct stack *, void *);
21 | 
22 | void *stack_pop(struct stack *);
23 | void *stack_top(struct stack *);
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
 1 | Source: snudown
 2 | Maintainer: Neil Williams <neil@reddit.com>
 3 | Section: python
 4 | Priority: optional
 5 | Build-Depends: python-all-dev (>= 2.6.6-3), debhelper (>= 7), python-setuptools, gperf
 6 | Standards-Version: 3.9.3
 7 | Homepage: https://github.com/reddit/snudown
 8 | Vcs-Git: git://github.com/reddit/snudown.git
 9 | 
10 | Package: python-snudown
11 | Architecture: any
12 | Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends}
13 | Breaks: ${python:Breaks}
14 | Description: reddit's python wrapper and customization of the Sundown Markdown interpreter.
15 | 
16 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | For safety reasons, whenever you add or change something in Snudown,
 2 | you should add a few test-cases that demonstrate your change and do a
 3 | fuzzing run in `/fuzzing` by running `make afl`. Make sure you have `cmake`
 4 | installed and in your `PATH`!
 5 | 
 6 | This uses [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/) and a
 7 | modified [Google Gumbo](https://github.com/google/gumbo-parser/) to ensure
 8 | there is no way to generate invalid HTML, and that there are no unsafe
 9 | memory operations.
10 | 
11 | See [American Fuzzy Lop](http://lcamtuf.coredump.cx/afl/)'s instructions
12 | for your platform to get started.
13 | 


--------------------------------------------------------------------------------
/snudown.d.ts:
--------------------------------------------------------------------------------
 1 | interface Options {
 2 |     /**
 3 |      * Whether to add `rel="nofollow"` to all links.
 4 |      */
 5 |     nofollow?: boolean;
 6 |     /**
 7 |      * The `target` property of all links.
 8 |      */
 9 |     target?: string;
10 |     /**
11 |      * Whether to create a table of contents.
12 |      *
13 |      * Note that Reddit postprocesses the output instead of using this option to generate a TOC.
14 |      */
15 |     enableToc?: boolean;
16 |     /**
17 |      * Added to the `id` of each TOC link, i.e. `#PREFIXtoc_0`.
18 |      */
19 |     tocIdPrefix?: string;
20 | }
21 | 
22 | /**
23 |  * Render markdown `text` to an HTML string using the usertext renderer.
24 |  */
25 | export function markdown(text: string, options?: Options): string;
26 | 
27 | /**
28 |  * Render markdown `text` to an HTML string using the wiki renderer.
29 |  */
30 | export function markdownWiki(text: string, options?: Options): string;
31 | 


--------------------------------------------------------------------------------
/header.js:
--------------------------------------------------------------------------------
 1 | /* snudown-js - a 'native' port of Snudown to JavaScript */
 2 | 
 3 | /*
 4 |  * Copyright (c) 2009, Natacha Porté
 5 |  * Copyright (c) 2011, Vicent Marti
 6 |  * Copyright (c) 2015, Erik Desjardins
 7 |  *
 8 |  * Permission to use, copy, modify, and distribute this software for any
 9 |  * purpose with or without fee is hereby granted, provided that the above
10 |  * copyright notice and this permission notice appear in all copies.
11 |  *
12 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 |  */
20 | 
21 | (function() {'use strict';
22 | var Module = {};
23 | 


--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
 1 | Format: http://dep.debian.net/deps/dep5
 2 | Upstream-Name: snudown
 3 | Source: https://github.com/reddit/snudown
 4 | 
 5 | Files: *
 6 | Copyright: 2011-2012 Vicent Marti
 7 |            2011-2012 reddit Inc.
 8 | License: MIT
 9 | 
10 | Files: debian/*
11 | Copyright: 2011-2012 reddit Inc.
12 | License: MIT
13 | 
14 | Files: test_snudown.py
15 | Copyright: 2011-2012 reddit Inc.
16 | License: MIT
17 | 
18 | License: MIT
19 |  Permission to use, copy, modify, and distribute this software for any purpose
20 |  with or without fee is hereby granted, provided that the above copyright
21 |  notice and this permission notice appear in all copies.
22 |  .
23 |  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
24 |  REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
25 |  AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
26 |  INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
27 |  LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
28 |  OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
29 |  PERFORMANCE OF THIS SOFTWARE.
30 | 
31 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 |     tags:
 8 |     - v*.*.*
 9 |   pull_request:
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - uses: actions/setup-node@v1
17 |       with:
18 |         node-version: '12.x'
19 |         registry-url: 'https://registry.npmjs.org'
20 |     - run: npm install
21 |     - run: docker build -t emscripten .
22 |     - run: docker run --rm -v $(pwd):/src emscripten ./build.sh
23 |     - run: node test_snudown.js
24 |     - run: ls -lh dist
25 |       if: "!cancelled()"
26 |     - uses: actions/upload-artifact@v2
27 |       with:
28 |         name: dist
29 |         path: dist
30 |       if: "!cancelled()"
31 |     - uses: softprops/action-gh-release@v1
32 |       if: startsWith(github.ref, 'refs/tags/')
33 |       with:
34 |         files: |
35 |           dist/snudown.js
36 |           dist/snudown_es.js
37 |       env:
38 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
39 |     - run: npm publish
40 |       if: startsWith(github.ref, 'refs/tags/')
41 |       env:
42 |         NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
43 | 


--------------------------------------------------------------------------------
/src/stack.c:
--------------------------------------------------------------------------------
 1 | #include "stack.h"
 2 | #include <string.h>
 3 | 
 4 | int
 5 | stack_grow(struct stack *st, size_t new_size)
 6 | {
 7 | 	void **new_st;
 8 | 
 9 | 	if (st->asize >= new_size)
10 | 		return 0;
11 | 
12 | 	new_st = realloc(st->item, new_size * sizeof(void *));
13 | 	if (new_st == NULL)
14 | 		return -1;
15 | 
16 | 	memset(new_st + st->asize, 0x0,
17 | 		(new_size - st->asize) * sizeof(void *));
18 | 
19 | 	st->item = new_st;
20 | 	st->asize = new_size;
21 | 
22 | 	if (st->size > new_size)
23 | 		st->size = new_size;
24 | 
25 | 	return 0;
26 | }
27 | 
28 | void
29 | stack_free(struct stack *st)
30 | {
31 | 	if (!st)
32 | 		return;
33 | 
34 | 	free(st->item);
35 | 
36 | 	st->item = NULL;
37 | 	st->size = 0;
38 | 	st->asize = 0;
39 | }
40 | 
41 | int
42 | stack_init(struct stack *st, size_t initial_size)
43 | {
44 | 	st->item = NULL;
45 | 	st->size = 0;
46 | 	st->asize = 0;
47 | 
48 | 	if (!initial_size)
49 | 		initial_size = 8;
50 | 
51 | 	return stack_grow(st, initial_size);
52 | }
53 | 
54 | void *
55 | stack_pop(struct stack *st)
56 | {
57 | 	if (!st->size)
58 | 		return NULL;
59 | 
60 | 	return st->item[--st->size];
61 | }
62 | 
63 | int
64 | stack_push(struct stack *st, void *item)
65 | {
66 | 	if (stack_grow(st, st->size * 2) < 0)
67 | 		return -1;
68 | 
69 | 	st->item[st->size++] = item;
70 | 	return 0;
71 | }
72 | 
73 | void *
74 | stack_top(struct stack *st)
75 | {
76 | 	if (!st->size)
77 | 		return NULL;
78 | 
79 | 	return st->item[st->size - 1];
80 | }
81 | 
82 | 


--------------------------------------------------------------------------------
/footer.js:
--------------------------------------------------------------------------------
 1 | function _mallocString(str) {
 2 | 	// https://github.com/kripken/emscripten/blob/3ebf0eed375120626ae5c2233b26bf236ea90046/src/preamble.js#L148
 3 | 	// at most 4 bytes per UTF-8 code point, +1 for the trailing '\0'
 4 | 	var len = (str.length << 2) + 1;
 5 | 	var ptr = _malloc(len);
 6 | 	stringToUTF8(str, ptr, len);
 7 | 	return ptr;
 8 | }
 9 | 
10 | function _markdown(renderer, text, options) {
11 | 	if (typeof text !== 'string') text = '';
12 | 	var str = _mallocString(text);
13 | 	var size = lengthBytesUTF8(text); // excludes null terminator
14 | 
15 | 	if (typeof options !== 'object' || options === null) options = {};
16 | 	var nofollow = options['nofollow'] ? 1 : 0;
17 | 	var target = typeof options['target'] === 'string' ? _mallocString(options['target']) : 0;
18 | 	var toc_id_prefix = typeof options['tocIdPrefix'] === 'string' ? _mallocString(options['tocIdPrefix']) : 0;
19 | 	var enable_toc = options['enableToc'] ? 1 : 0;
20 | 
21 | 	var ptr = renderer(str, size, nofollow, target, toc_id_prefix, enable_toc);
22 | 	var string = UTF8ToString(ptr);
23 | 
24 | 	_free(ptr);
25 | 	_free(toc_id_prefix);
26 | 	_free(target);
27 | 	_free(str);
28 | 
29 | 	return string;
30 | }
31 | 
32 | function markdown(text, options) {
33 | 	return _markdown(_default_renderer, text, options);
34 | }
35 | 
36 | function markdownWiki(text,	options) {
37 | 	return _markdown(_wiki_renderer, text, options);
38 | }
39 | 
40 | window['markdown'] = markdown;
41 | window['markdownWiki'] = markdownWiki;
42 | })();
43 | 


--------------------------------------------------------------------------------
/html/houdini.h:
--------------------------------------------------------------------------------
 1 | #ifndef HOUDINI_H__
 2 | #define HOUDINI_H__
 3 | 
 4 | #include "buffer.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | #ifdef HOUDINI_USE_LOCALE
11 | #	define _isxdigit(c) isxdigit(c)
12 | #	define _isdigit(c) isdigit(c)
13 | #else
14 | /*
15 |  * Helper _isdigit methods -- do not trust the current locale
16 |  * */
17 | #	define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
18 | #	define _isdigit(c) ((c) >= '0' && (c) <= '9')
19 | #endif
20 | 
21 | extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
22 | extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
23 | extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
24 | extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
25 | extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
26 | extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
27 | extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
28 | extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
29 | extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
30 | extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
31 | extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
32 | 
33 | #ifdef __cplusplus
34 | }
35 | #endif
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/src/autolink.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Vicent Marti
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef UPSKIRT_AUTOLINK_H
18 | #define UPSKIRT_AUTOLINK_H
19 | 
20 | #include "buffer.h"
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | enum {
27 | 	SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
28 | };
29 | 
30 | int
31 | sd_autolink_issafe(const uint8_t *link, size_t link_len);
32 | 
33 | size_t
34 | sd_autolink__www(size_t *rewind_p, struct buf *link,
35 | 	uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
36 | 
37 | size_t
38 | sd_autolink__email(size_t *rewind_p, struct buf *link,
39 | 	uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
40 | 
41 | size_t
42 | sd_autolink__url(size_t *rewind_p, struct buf *link,
43 | 	uint8_t *data, size_t max_rewind, size_t size, unsigned int flags);
44 | 
45 | extern size_t
46 | sd_autolink__subreddit(size_t *rewind_p, struct buf *link, uint8_t *data, 
47 | 	size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
48 | 
49 | extern size_t
50 | sd_autolink__username(size_t *rewind_p, struct buf *link, uint8_t *data, 
51 | 	size_t max_rewind, size_t max_lookbehind, size_t size, int *no_slash);
52 | 
53 | #ifdef __cplusplus
54 | }
55 | #endif
56 | 
57 | #endif
58 | 
59 | /* vim: set filetype=c: */
60 | 


--------------------------------------------------------------------------------
/html/html.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Vicent Marti
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef UPSKIRT_HTML_H
18 | #define UPSKIRT_HTML_H
19 | 
20 | #include "markdown.h"
21 | #include "buffer.h"
22 | #include <stdlib.h>
23 | 
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 | 
28 | struct html_renderopt {
29 | 	struct {
30 | 		int header_count;
31 | 		int current_level;
32 | 		int level_offset;
33 | 	} toc_data;
34 | 
35 | 	char* toc_id_prefix;
36 | 
37 | 	unsigned int flags;
38 | 
39 | 	char** html_element_whitelist;
40 | 	char** html_attr_whitelist;
41 | 
42 | 	/* extra callbacks */
43 | 	void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
44 | };
45 | 
46 | typedef enum {
47 | 	HTML_SKIP_HTML = (1 << 0),
48 | 	HTML_SKIP_STYLE = (1 << 1),
49 | 	HTML_SKIP_IMAGES = (1 << 2),
50 | 	HTML_SKIP_LINKS = (1 << 3),
51 | 	HTML_EXPAND_TABS = (1 << 4),
52 | 	HTML_SAFELINK = (1 << 5),
53 | 	HTML_TOC = (1 << 6),
54 | 	HTML_HARD_WRAP = (1 << 7),
55 | 	HTML_USE_XHTML = (1 << 8),
56 | 	HTML_ESCAPE = (1 << 9),
57 | 	HTML_ALLOW_ELEMENT_WHITELIST = (1 << 10),
58 | } html_render_mode;
59 | 
60 | typedef enum {
61 | 	HTML_TAG_NONE = 0,
62 | 	HTML_TAG_OPEN,
63 | 	HTML_TAG_CLOSE,
64 | } html_tag;
65 | 
66 | int
67 | sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
68 | 
69 | extern void
70 | sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
71 | 
72 | extern void
73 | sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
74 | 
75 | extern void
76 | sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
77 | 
78 | #ifdef __cplusplus
79 | }
80 | #endif
81 | 
82 | #endif
83 | 
84 | 


--------------------------------------------------------------------------------
/html/houdini_html_e.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | #include "houdini.h"
 6 | 
 7 | #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
 8 | 
 9 | /**
10 |  * According to the OWASP rules:
11 |  *
12 |  * & --> &amp;
13 |  * < --> &lt;
14 |  * > --> &gt;
15 |  * " --> &quot;
16 |  * ' --> &#x27;     &apos; is not recommended
17 |  * / --> &#x2F;     forward slash is included as it helps end an HTML entity
18 |  *
19 |  */
20 | static const char HTML_ESCAPE_TABLE[] = {
21 | 	7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 0, 7, 7,
22 | 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
23 | 	0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
24 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
25 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37 | };
38 | 
39 | static const char *HTML_ESCAPES[] = {
40 |         "",
41 |         "&quot;",
42 |         "&amp;",
43 |         "&#39;",
44 |         "&#47;",
45 |         "&lt;",
46 |         "&gt;",
47 |         "", // throw out control characters
48 | };
49 | 
50 | void
51 | houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
52 | {
53 | 	size_t i = 0, org, esc = 0;
54 | 
55 | 	bufgrow(ob, ESCAPE_GROW_FACTOR(size));
56 | 
57 | 	while (i < size) {
58 | 		org = i;
59 | 		while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
60 | 			i++;
61 | 
62 | 		if (i > org)
63 | 			bufput(ob, src + org, i - org);
64 | 
65 | 		/* escaping */
66 | 		if (i >= size)
67 | 			break;
68 | 
69 | 		/* The forward slash is only escaped in secure mode */
70 | 		if (src[i] == '/' && !secure) {
71 | 			bufputc(ob, '/');
72 | 		} else if (HTML_ESCAPE_TABLE[src[i]] == 7) {
73 | 			/* skip control characters */
74 | 		} else {
75 | 			bufputs(ob, HTML_ESCAPES[esc]);
76 | 		}
77 | 
78 | 		i++;
79 | 	}
80 | }
81 | 
82 | void
83 | houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
84 | {
85 | 	houdini_escape_html0(ob, src, size, 1);
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
 1 | snudown-js
 2 | =======
 3 | 
 4 | [![Build Status](https://travis-ci.org/erikdesjardins/snudown-js.svg)](https://travis-ci.org/erikdesjardins/snudown-js)
 5 | 
 6 | `snudown-js` is a 'native' (compiled with [Emscripten](https://kripken.github.io/emscripten-site/)) port of [Snudown](https://github.com/reddit/snudown/), the Markdown parser used by Reddit.
 7 | 
 8 | 
 9 | Usage
10 | -----
11 | 
12 | Import from [npm](https://www.npmjs.com/package/snudown-js): `const Snudown = require('snudown-js')`.
13 | 
14 | Basic usage:
15 | 
16 | `Snudown.markdown('some text'); // "<p>some text</p>\n"`
17 | 
18 | `Snudown.markdownWiki('<table scope="foo">'); // "<p><table scope="foo"></p>\n"`
19 | 
20 | For more in-depth documentation, see the comments in [`footer.js`](https://github.com/erikdesjardins/snudown-js/blob/master/footer.js).
21 | 
22 | Building
23 | --------
24 | 
25 | ### You will need...
26 | 
27 | - to be able to run bash scripts
28 | - `gperf`, [a command-line utility](https://www.gnu.org/software/gperf/) - through your package manager
29 | - `npm`, [node package manager](https://www.npmjs.com/) - through your package manager
30 | - `emcc`, the Emscripten compiler - [from the Emscripten SDK](https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html) (or from [source](http://kripken.github.io/emscripten-site/docs/building_from_source/building_emscripten_from_source_using_the_sdk.html)) (at least version 1.36.0)
31 | 
32 | ### Setup
33 | 
34 | Run	`npm i`.
35 | 
36 | Ensure that `gperf` and `emcc` can be invoked from the terminal.
37 | 
38 | ### Build
39 | 
40 | Run `./build.sh`.
41 | 
42 | Output is to `dist/`.
43 | 
44 | 
45 | Testing
46 | -------
47 | 
48 | ### You will need...
49 | 
50 | - `node`, the node.js runtime - probably installed during the build process
51 | - a successful build of `snudown-js`
52 | 
53 | After building, run `node test_snudown.js`.
54 | 
55 | 
56 | License
57 | -------
58 | 
59 | Permission to use, copy, modify, and distribute this software for any
60 | purpose with or without fee is hereby granted, provided that the above
61 | copyright notice and this permission notice appear in all copies.
62 | 
63 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
64 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
65 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
66 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
67 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
68 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
69 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
70 | 


--------------------------------------------------------------------------------
/src/buffer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2008, Natacha Porté
  3 |  * Copyright (c) 2011, Vicent Martí
  4 |  *
  5 |  * Permission to use, copy, modify, and distribute this software for any
  6 |  * purpose with or without fee is hereby granted, provided that the above
  7 |  * copyright notice and this permission notice appear in all copies.
  8 |  *
  9 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 10 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 11 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 12 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 13 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 14 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 15 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 16 |  */
 17 | 
 18 | #ifndef BUFFER_H__
 19 | #define BUFFER_H__
 20 | 
 21 | #include <stddef.h>
 22 | #include <stdarg.h>
 23 | #include <stdint.h>
 24 | 
 25 | #ifdef __cplusplus
 26 | extern "C" {
 27 | #endif
 28 | 
 29 | #if defined(_MSC_VER)
 30 | #define __attribute__(x)
 31 | #define inline
 32 | #endif
 33 | 
 34 | typedef enum {
 35 | 	BUF_OK = 0,
 36 | 	BUF_ENOMEM = -1,
 37 | 	BUF_EINVALIDIDX = -2,
 38 | } buferror_t;
 39 | 
 40 | /* struct buf: character array buffer */
 41 | struct buf {
 42 | 	uint8_t *data;		/* actual character data */
 43 | 	size_t size;	/* size of the string */
 44 | 	size_t asize;	/* allocated size (0 = volatile buffer) */
 45 | 	size_t unit;	/* reallocation unit size (0 = read-only buffer) */
 46 | };
 47 | 
 48 | /* CONST_BUF: global buffer from a string litteral */
 49 | #define BUF_STATIC(string) \
 50 | 	{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
 51 | 
 52 | /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
 53 | #define BUF_VOLATILE(strname) \
 54 | 	{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
 55 | 
 56 | /* BUFPUTSL: optimized bufputs of a string litteral */
 57 | #define BUFPUTSL(output, literal) \
 58 | 	bufput(output, literal, sizeof literal - 1)
 59 | 
 60 | /* bufgrow: increasing the allocated size to the given value */
 61 | int bufgrow(struct buf *, size_t);
 62 | 
 63 | /* bufnew: allocation of a new buffer */
 64 | struct buf *bufnew(size_t) __attribute__ ((malloc));
 65 | 
 66 | /* bufnullterm: NUL-termination of the string array (making a C-string) */
 67 | const char *bufcstr(struct buf *);
 68 | 
 69 | /* bufprefix: compare the beginning of a buffer with a string */
 70 | int bufprefix(const struct buf *buf, const char *prefix);
 71 | 
 72 | /* bufput: appends raw data to a buffer */
 73 | void bufput(struct buf *, const void *, size_t);
 74 | 
 75 | /* bufputs: appends a NUL-terminated string to a buffer */
 76 | void bufputs(struct buf *, const char *);
 77 | 
 78 | /* bufputc: appends a single char to a buffer */
 79 | void bufputc(struct buf *, int);
 80 | 
 81 | /* bufputi: appends a formatted integer to a buffer, like vsnprintf("%d") */
 82 | void bufputi(struct buf *, int);
 83 | 
 84 | /* bufrelease: decrease the reference count and free the buffer if needed */
 85 | void bufrelease(struct buf *);
 86 | 
 87 | /* bufreset: frees internal data of the buffer */
 88 | void bufreset(struct buf *);
 89 | 
 90 | /* bufslurp: removes a given number of bytes from the head of the array */
 91 | void bufslurp(struct buf *, size_t);
 92 | 
 93 | /* buftruncate: truncates the buffer at `size` */
 94 | int buftruncate(struct buf *buf, size_t size);
 95 | 
 96 | #ifdef __cplusplus
 97 | }
 98 | #endif
 99 | 
100 | #endif
101 | 


--------------------------------------------------------------------------------
/html/houdini_href_e.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | 
  5 | #include "houdini.h"
  6 | 
  7 | #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
  8 | 
  9 | /*
 10 |  * The following characters will not be escaped:
 11 |  *
 12 |  *		-_.+!*'(),%#@?=;:/,+&$ alphanum
 13 |  *
 14 |  * Note that this character set is the addition of:
 15 |  *
 16 |  *	- The characters which are safe to be in an URL
 17 |  *	- The characters which are *not* safe to be in
 18 |  *	an URL because they are RESERVED characters.
 19 |  *
 20 |  * We asume (lazily) that any RESERVED char that
 21 |  * appears inside an URL is actually meant to
 22 |  * have its native function (i.e. as an URL
 23 |  * component/separator) and hence needs no escaping.
 24 |  *
 25 |  * There are two exceptions: the chacters & (amp)
 26 |  * and ' (single quote) do not appear in the table.
 27 |  * They are meant to appear in the URL as components,
 28 |  * yet they require special HTML-entity escaping
 29 |  * to generate valid HTML markup.
 30 |  *
 31 |  * All other characters will be escaped to %XX.
 32 |  *
 33 |  */
 34 | static const char HREF_SAFE[] = {
 35 | 	2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
 36 | 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 37 | 	0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
 38 | 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
 39 | 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 40 | 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
 41 | 	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 42 | 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
 43 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 44 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 45 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 46 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 47 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 48 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 49 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 50 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 51 | };
 52 | 
 53 | void
 54 | houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
 55 | {
 56 | 	static const char hex_chars[] = "0123456789ABCDEF";
 57 | 	size_t  i = 0, org;
 58 | 	char hex_str[3];
 59 | 
 60 | 	bufgrow(ob, ESCAPE_GROW_FACTOR(size));
 61 | 	hex_str[0] = '%';
 62 | 
 63 | 	while (i < size) {
 64 | 		org = i;
 65 | 		/* Skip by characters that don't need special
 66 | 		 * processing */
 67 | 		while (i < size && HREF_SAFE[src[i]] == 1)
 68 | 			i++;
 69 | 
 70 | 		if (i > org)
 71 | 			bufput(ob, src + org, i - org);
 72 | 
 73 | 		/* escaping */
 74 | 		if (i >= size)
 75 | 			break;
 76 | 
 77 | 		/* throw out control characters */
 78 | 		if (HREF_SAFE[src[i]] == 2) {
 79 | 			i++;
 80 | 			continue;
 81 | 		}
 82 | 
 83 | 		switch (src[i]) {
 84 | 		/* amp appears all the time in URLs, but needs
 85 | 		 * HTML-entity escaping to be inside an href */
 86 | 		case '&':
 87 | 			BUFPUTSL(ob, "&amp;");
 88 | 			break;
 89 | 
 90 | 		/* the single quote is a valid URL character
 91 | 		 * according to the standard; it needs HTML
 92 | 		 * entity escaping too */
 93 | 		case '\'':
 94 | 			BUFPUTSL(ob, "&#x27;");
 95 | 			break;
 96 | 
 97 | 		/* the space can be escaped to %20 or a plus
 98 | 		 * sign. we're going with the generic escape
 99 | 		 * for now. the plus thing is more commonly seen
100 | 		 * when building GET strings */
101 | #if 0
102 | 		case ' ':
103 | 			bufputc(ob, '+');
104 | 			break;
105 | #endif
106 | 
107 | 		/* every other character goes with a %XX escaping */
108 | 		default:
109 | 			hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
110 | 			hex_str[2] = hex_chars[src[i] & 0xF];
111 | 			bufput(ob, hex_str, 3);
112 | 		}
113 | 
114 | 		i++;
115 | 	}
116 | }
117 | 


--------------------------------------------------------------------------------
/src/html_entities.gperf:
--------------------------------------------------------------------------------
  1 | %language=ANSI-C
  2 | %define lookup-function-name is_allowed_named_entity
  3 | %compare-strncmp
  4 | %readonly-tables
  5 | %define hash-function-name hash_html_entity
  6 | %enum
  7 | %includes
  8 | %{
  9 | #include "html_entities.h"
 10 | 
 11 | #include <stdlib.h>
 12 | #include <stdint.h>
 13 | 
 14 | /* Parsers tend to choke on entities with values greater than this */
 15 | const uint32_t MAX_NUM_ENTITY_VAL = 0x10ffff;
 16 | /* Any numeric entity longer than this is obviously above MAX_NUM_ENTITY_VAL
 17 |  * used to avoid dealing with overflows. */
 18 | const size_t MAX_NUM_ENTITY_LEN = 7;
 19 | 
 20 | inline int is_valid_numeric_entity(uint32_t entity_val)
 21 | {
 22 | 	/* Some XML parsers will choke on entities with certain
 23 | 	 * values (mostly control characters.)
 24 | 	 *
 25 | 	 * According to lxml these are all problematic:
 26 | 	 *
 27 | 	 *	[xrange(0, 8),
 28 | 	 *	 xrange(11, 12),
 29 | 	 *	 xrange(14, 31),
 30 | 	 *	 xrange(55296, 57343),
 31 | 	 *	 xrange(65534, 65535)]
 32 | 	 */
 33 | 	return (entity_val > 8
 34 | 			&& (entity_val != 11 && entity_val != 12)
 35 | 			&& (entity_val < 14 || entity_val > 31)
 36 | 			&& (entity_val < 55296 || entity_val > 57343)
 37 | 			&& (entity_val != 65534 && entity_val != 65535)
 38 | 			&& entity_val <= MAX_NUM_ENTITY_VAL);
 39 | }
 40 | 
 41 | %}
 42 | %%
 43 | &AElig;
 44 | &Aacute;
 45 | &Acirc;
 46 | &Agrave;
 47 | &Alpha;
 48 | &Aring;
 49 | &Atilde;
 50 | &Auml;
 51 | &Beta;
 52 | &Ccedil;
 53 | &Chi;
 54 | &Dagger;
 55 | &Delta;
 56 | &ETH;
 57 | &Eacute;
 58 | &Ecirc;
 59 | &Egrave;
 60 | &Epsilon;
 61 | &Eta;
 62 | &Euml;
 63 | &Gamma;
 64 | &Iacute;
 65 | &Icirc;
 66 | &Igrave;
 67 | &Iota;
 68 | &Iuml;
 69 | &Kappa;
 70 | &Lambda;
 71 | &Mu;
 72 | &Ntilde;
 73 | &Nu;
 74 | &OElig;
 75 | &Oacute;
 76 | &Ocirc;
 77 | &Ograve;
 78 | &Omega;
 79 | &Omicron;
 80 | &Oslash;
 81 | &Otilde;
 82 | &Ouml;
 83 | &Phi;
 84 | &Pi;
 85 | &Prime;
 86 | &Psi;
 87 | &Rho;
 88 | &Scaron;
 89 | &Sigma;
 90 | &THORN;
 91 | &Tau;
 92 | &Theta;
 93 | &Uacute;
 94 | &Ucirc;
 95 | &Ugrave;
 96 | &Upsilon;
 97 | &Uuml;
 98 | &Xi;
 99 | &Yacute;
100 | &Yuml;
101 | &Zeta;
102 | &aacute;
103 | &acirc;
104 | &acute;
105 | &aelig;
106 | &agrave;
107 | &alefsym;
108 | &alpha;
109 | &amp;
110 | &and;
111 | &ang;
112 | &apos;
113 | &aring;
114 | &asymp;
115 | &atilde;
116 | &auml;
117 | &bdquo;
118 | &beta;
119 | &brvbar;
120 | &bull;
121 | &cap;
122 | &ccedil;
123 | &cedil;
124 | &cent;
125 | &chi;
126 | &circ;
127 | &clubs;
128 | &cong;
129 | &copy;
130 | &crarr;
131 | &cup;
132 | &curren;
133 | &dArr;
134 | &dagger;
135 | &darr;
136 | &deg;
137 | &delta;
138 | &diams;
139 | &divide;
140 | &eacute;
141 | &ecirc;
142 | &egrave;
143 | &empty;
144 | &emsp;
145 | &ensp;
146 | &epsilon;
147 | &equiv;
148 | &eta;
149 | &eth;
150 | &euml;
151 | &euro;
152 | &exist;
153 | &fnof;
154 | &forall;
155 | &frac12;
156 | &frac14;
157 | &frac34;
158 | &frasl;
159 | &gamma;
160 | &ge;
161 | &gt;
162 | &hArr;
163 | &harr;
164 | &hearts;
165 | &hellip;
166 | &iacute;
167 | &icirc;
168 | &iexcl;
169 | &igrave;
170 | &image;
171 | &infin;
172 | &int;
173 | &iota;
174 | &iquest;
175 | &isin;
176 | &iuml;
177 | &kappa;
178 | &lArr;
179 | &lambda;
180 | &lang;
181 | &laquo;
182 | &larr;
183 | &lceil;
184 | &ldquo;
185 | &le;
186 | &lfloor;
187 | &lowast;
188 | &loz;
189 | &lrm;
190 | &lsaquo;
191 | &lsquo;
192 | &lt;
193 | &macr;
194 | &mdash;
195 | &micro;
196 | &middot;
197 | &minus;
198 | &mu;
199 | &nabla;
200 | &nbsp;
201 | &ndash;
202 | &ne;
203 | &ni;
204 | &not;
205 | &notin;
206 | &nsub;
207 | &ntilde;
208 | &nu;
209 | &oacute;
210 | &ocirc;
211 | &oelig;
212 | &ograve;
213 | &oline;
214 | &omega;
215 | &omicron;
216 | &oplus;
217 | &or;
218 | &ordf;
219 | &ordm;
220 | &oslash;
221 | &otilde;
222 | &otimes;
223 | &ouml;
224 | &para;
225 | &part;
226 | &permil;
227 | &perp;
228 | &phi;
229 | &pi;
230 | &piv;
231 | &plusmn;
232 | &pound;
233 | &prime;
234 | &prod;
235 | &prop;
236 | &psi;
237 | &quot;
238 | &rArr;
239 | &radic;
240 | &rang;
241 | &raquo;
242 | &rarr;
243 | &rceil;
244 | &rdquo;
245 | &real;
246 | &reg;
247 | &rfloor;
248 | &rho;
249 | &rlm;
250 | &rsaquo;
251 | &rsquo;
252 | &sbquo;
253 | &scaron;
254 | &sdot;
255 | &sect;
256 | &shy;
257 | &sigma;
258 | &sigmaf;
259 | &sim;
260 | &spades;
261 | &sub;
262 | &sube;
263 | &sum;
264 | &sup1;
265 | &sup2;
266 | &sup3;
267 | &sup;
268 | &supe;
269 | &szlig;
270 | &tau;
271 | &there4;
272 | &theta;
273 | &thetasym;
274 | &thinsp;
275 | &thorn;
276 | &tilde;
277 | &times;
278 | &trade;
279 | &uArr;
280 | &uacute;
281 | &uarr;
282 | &ucirc;
283 | &ugrave;
284 | &uml;
285 | &upsih;
286 | &upsilon;
287 | &uuml;
288 | &weierp;
289 | &xi;
290 | &yacute;
291 | &yen;
292 | &yuml;
293 | &zeta;
294 | &zwj;
295 | &zwnj;
296 | 


--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
  1 | snudown (1.5.0) unstable; urgency=medium
  2 |   * add blockspoiler support
  3 |   * add inline spoiler support
  4 | 
  5 |  -- Jesjit Birak <jesjit.birak@reddit.com> Wed, 07 Mar 2018 13:21:45 -0800
  6 | 
  7 | snudown (1.4.0) unstable; urgency=medium
  8 | 
  9 |   * autolink r/subreddit and u/user
 10 |   * security: don't rewind over previous inlines when autolinking
 11 |   * email autolinks re-enabled due to ^
 12 |   * more stringent character entity checks and sanitization
 13 |   * properly handle URLs containing control characters
 14 | 
 15 |  -- Jordan Milne <jordan.milne@reddit.com>  Mon, 01 Jun 2015 13:04:23 -0700
 16 | 
 17 | snudown (1.3.2) unstable; urgency=medium
 18 | 
 19 |   * fix alphanumeric-named entities
 20 | 
 21 |  -- Neil Williams <neil@reddit.com>  Wed, 25 Feb 2015 13:32:41 -0800
 22 | 
 23 | snudown (1.3.1) unstable; urgency=medium
 24 | 
 25 |   * add missing entities to entity whitelist
 26 | 
 27 |  -- Neil Williams <neil@reddit.com>  Tue, 24 Feb 2015 22:12:29 -0800
 28 | 
 29 | snudown (1.3.0) unstable; urgency=medium
 30 | 
 31 |   * validate html entities and escape unrecognized ones
 32 | 
 33 |  -- Neil Williams <neil@reddit.com>  Tue, 24 Feb 2015 17:55:38 -0800
 34 | 
 35 | snudown (1.2.0) unstable; urgency=medium
 36 | 
 37 |   * security: fix rewind issues
 38 |   * email autolinks disabled due to ^
 39 |   * security: fix table header OOM bomb
 40 | 
 41 |  -- Neil Williams <neil@reddit.com>  Sat, 20 Sep 2014 11:59:34 -0700
 42 | 
 43 | snudown (1.1.6) unstable; urgency=low
 44 | 
 45 |   * add ts3server url scheme to whitelist
 46 |   * redo html sanitization for wiki renderer
 47 | 
 48 |  -- Neil Williams <neil@reddit.com>  Tue, 01 Apr 2014 17:12:50 -0700
 49 | 
 50 | snudown (1.1.5) unstable; urgency=low
 51 | 
 52 |   * bring path stuff into user/subreddit autolinking (multis, subpages etc.)
 53 |   * make /u/ autolinking case sensitive
 54 | 
 55 |  -- Neil Williams <neil@reddit.com>  Wed, 22 May 2013 16:09:31 -0700
 56 | 
 57 | snudown (1.1.4) unstable; urgency=low
 58 | 
 59 |   * make /r/ autolinking case sensitive
 60 | 
 61 |  -- Neil Williams <neil@reddit.com>  Mon, 25 Feb 2013 23:27:10 -0800
 62 | 
 63 | snudown (1.1.3) unstable; urgency=low
 64 | 
 65 |   * add support for /r/all-minus
 66 | 
 67 |  -- Neil Williams <neil@reddit.com>  Tue, 08 Jan 2013 12:55:40 -0800
 68 | 
 69 | snudown (1.1.2) unstable; urgency=low
 70 | 
 71 |   * don't close the toc div if there wasn't a toc :(
 72 | 
 73 |  -- Neil Williams <neil@reddit.com>  Wed, 12 Dec 2012 17:38:05 -0800
 74 | 
 75 | snudown (1.1.1) unstable; urgency=low
 76 | 
 77 |   * minor code cleanup
 78 |   * add a div around wiki table of contents for styling purposes
 79 | 
 80 |  -- Neil Williams <neil@reddit.com>  Wed, 12 Dec 2012 13:47:49 -0800
 81 | 
 82 | snudown (1.1.0) unstable; urgency=low
 83 | 
 84 |   * add wiki variant of markdown syntax (allows links, and
 85 |     some raw html)
 86 | 
 87 |  -- Neil Williams <neil@reddit.com>  Wed, 05 Sep 2012 23:30:34 -0700
 88 | 
 89 | snudown (1.0.7) unstable; urgency=low
 90 | 
 91 |   * add python-setuptools to build-depends
 92 | 
 93 |  -- Neil Williams <neil@reddit.com>  Thu, 09 Aug 2012 14:46:49 -0700
 94 | 
 95 | snudown (1.0.6) unstable; urgency=low
 96 | 
 97 |   * made subreddit autolinking more robust thanks to nandhp
 98 |   * cleaned up packaging
 99 |   * merged upstream fixes:
100 |     * fix blockquotes nested inside paragraphs
101 |     * improve parsing of continuous list items
102 |     * fix infinite loop parsing strikethrouhgs
103 | 
104 |  -- Neil Williams <neil@reddit.com>  Thu, 09 Aug 2012 13:06:38 -0700
105 | 
106 | snudown (1.0.5) unstable; urgency=low
107 | 
108 |   * require a space between url and title
109 |   * merged upstream fixes:
110 |     * whitespace after tables prevent them from rendering
111 |     * escape html in contents of tables
112 | 
113 |  -- Neil Williams <neil@reddit.com>  Thu, 23 Feb 2012 08:40:39 -0800
114 | 
115 | snudown (1.0.4) unstable; urgency=low
116 | 
117 |   * change username autolinking to /u/username
118 |   * properly handle backslash at end of message
119 | 
120 |  -- Neil Williams <neil@reddit.com>  Thu, 26 Jan 2012 18:26:45 -0800
121 | 
122 | snudown (1.0.3) unstable; urgency=low
123 | 
124 |   * ~username auto-linking
125 |   * make table headers less strict
126 |   * correctly handle ) in link title text
127 |   * synced with upstream
128 |     * code clean-up
129 |     * utf-8 fixes
130 | 
131 |  -- Neil Williams <neil@reddit.com>  Wed, 18 Jan 2012 15:20:35 -0800
132 | 
133 | snudown (1.0.2) unstable; urgency=low
134 | 
135 |   * synced up with upstream
136 |   * more safelink relaxation based on community requests
137 |   * fixed nesting unordered lists within ordered lists and vice versa
138 | 
139 |  -- Neil Williams <neil@reddit.com>  Sat, 19 Nov 2011 17:16:47 -0800
140 | 
141 | snudown (1.0.1) unstable; urgency=low
142 | 
143 |   * new version, new package
144 | 
145 |  -- Neil Williams <neil@reddit.com>  Thu, 17 Nov 2011 14:22:26 -0800
146 | 
147 | snudown (1.0.0) unstable; urgency=low
148 | 
149 |   * source package automatically created by stdeb 0.6.0+git
150 | 
151 |  -- Neil Williams <neil@reddit.com>  Wed, 16 Nov 2011 10:36:53 -0800
152 | 


--------------------------------------------------------------------------------
/src/buffer.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2008, Natacha Porté
  3 |  * Copyright (c) 2011, Vicent Martí
  4 |  *
  5 |  * Permission to use, copy, modify, and distribute this software for any
  6 |  * purpose with or without fee is hereby granted, provided that the above
  7 |  * copyright notice and this permission notice appear in all copies.
  8 |  *
  9 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 10 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 11 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 12 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 13 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 14 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 15 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 16 |  */
 17 | 
 18 | #define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
 19 | 
 20 | #include "buffer.h"
 21 | 
 22 | #include <stdio.h>
 23 | #include <stdlib.h>
 24 | #include <string.h>
 25 | #include <assert.h>
 26 | 
 27 | int
 28 | bufprefix(const struct buf *buf, const char *prefix)
 29 | {
 30 | 	size_t i;
 31 | 	assert(buf && buf->unit);
 32 | 
 33 | 	for (i = 0; i < buf->size; ++i) {
 34 | 		if (prefix[i] == 0)
 35 | 			return 0;
 36 | 
 37 | 		if (buf->data[i] != prefix[i])
 38 | 			return buf->data[i] - prefix[i];
 39 | 	}
 40 | 
 41 | 	return 0;
 42 | }
 43 | 
 44 | /* bufgrow: increasing the allocated size to the given value */
 45 | int
 46 | bufgrow(struct buf *buf, size_t neosz)
 47 | {
 48 | 	size_t neoasz;
 49 | 	void *neodata;
 50 | 
 51 | 	assert(buf && buf->unit);
 52 | 
 53 | 	if (neosz > BUFFER_MAX_ALLOC_SIZE)
 54 | 		return BUF_ENOMEM;
 55 | 
 56 | 	if (buf->asize >= neosz)
 57 | 		return BUF_OK;
 58 | 
 59 | 	neoasz = buf->asize + buf->unit;
 60 | 	while (neoasz < neosz)
 61 | 		neoasz += buf->unit;
 62 | 
 63 | 	neodata = realloc(buf->data, neoasz);
 64 | 	if (!neodata)
 65 | 		return BUF_ENOMEM;
 66 | 
 67 | 	buf->data = neodata;
 68 | 	buf->asize = neoasz;
 69 | 	return BUF_OK;
 70 | }
 71 | 
 72 | 
 73 | /* bufnew: allocation of a new buffer */
 74 | struct buf *
 75 | bufnew(size_t unit)
 76 | {
 77 | 	struct buf *ret;
 78 | 	ret = malloc(sizeof (struct buf));
 79 | 
 80 | 	if (ret) {
 81 | 		ret->data = 0;
 82 | 		ret->size = ret->asize = 0;
 83 | 		ret->unit = unit;
 84 | 	}
 85 | 	return ret;
 86 | }
 87 | 
 88 | /* bufnullterm: NULL-termination of the string array */
 89 | const char *
 90 | bufcstr(struct buf *buf)
 91 | {
 92 | 	assert(buf && buf->unit);
 93 | 
 94 | 	if (buf->size < buf->asize && buf->data[buf->size] == 0)
 95 | 		return (char *)buf->data;
 96 | 
 97 | 	if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
 98 | 		buf->data[buf->size] = 0;
 99 | 		return (char *)buf->data;
100 | 	}
101 | 
102 | 	return NULL;
103 | }
104 | 
105 | /* bufput: appends raw data to a buffer */
106 | void
107 | bufput(struct buf *buf, const void *data, size_t len)
108 | {
109 | 	assert(buf && buf->unit);
110 | 
111 | 	if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
112 | 		return;
113 | 
114 | 	memcpy(buf->data + buf->size, data, len);
115 | 	buf->size += len;
116 | }
117 | 
118 | /* bufputs: appends a NUL-terminated string to a buffer */
119 | void
120 | bufputs(struct buf *buf, const char *str)
121 | {
122 | 	bufput(buf, str, strlen(str));
123 | }
124 | 
125 | 
126 | /* bufputc: appends a single uint8_t to a buffer */
127 | void
128 | bufputc(struct buf *buf, int c)
129 | {
130 | 	assert(buf && buf->unit);
131 | 
132 | 	if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
133 | 		return;
134 | 
135 | 	buf->data[buf->size] = c;
136 | 	buf->size += 1;
137 | }
138 | 
139 | /* bufputi: appends a formatted integer to a buffer, like vsnprintf("%d") */
140 | void
141 | bufputi(struct buf *buf, int n)
142 | {
143 | 	// Based on K&R C
144 | 
145 | 	// number of null-terminated decimal digits to represent x signed bytes is floor(log10(2^(8x-1)))+2
146 | 	// which is bounded from above by x*3+2
147 | 	char buffer[sizeof(int)*3+2];
148 | 	memset(&buffer, 0, sizeof(buffer));
149 | 
150 | 	int sign = n;
151 | 
152 | 	if (sign < 0)
153 | 		n = -n;
154 | 
155 | 	int i = 0;
156 | 
157 | 	do {
158 | 		buffer[i++] = n % 10 + '0';
159 | 	} while ((n /= 10) > 0);
160 | 
161 | 	if (sign < 0)
162 | 		buffer[i++] = '-';
163 | 
164 | 	char temp;
165 | 	for (int j = 0, k = i - 1; j < k; ++j, --k) {
166 | 		temp = buffer[j];
167 | 		buffer[j] = buffer[k];
168 | 		buffer[k] = temp;
169 | 	}
170 | 
171 | 	bufputs(buf, buffer);
172 | }
173 | 
174 | /* bufrelease: decrease the reference count and free the buffer if needed */
175 | void
176 | bufrelease(struct buf *buf)
177 | {
178 | 	if (!buf)
179 | 		return;
180 | 
181 | 	free(buf->data);
182 | 	free(buf);
183 | }
184 | 
185 | 
186 | /* bufreset: frees internal data of the buffer */
187 | void
188 | bufreset(struct buf *buf)
189 | {
190 | 	if (!buf)
191 | 		return;
192 | 
193 | 	free(buf->data);
194 | 	buf->data = NULL;
195 | 	buf->size = buf->asize = 0;
196 | }
197 | 
198 | /* bufslurp: removes a given number of bytes from the head of the array */
199 | void
200 | bufslurp(struct buf *buf, size_t len)
201 | {
202 | 	assert(buf && buf->unit);
203 | 
204 | 	if (len >= buf->size) {
205 | 		buf->size = 0;
206 | 		return;
207 | 	}
208 | 
209 | 	buf->size -= len;
210 | 	memmove(buf->data, buf->data + len, buf->size);
211 | }
212 | 
213 | /* buftrucate: truncates the buffer at `size` */
214 | int
215 | buftruncate(struct buf *buf, size_t size)
216 | {
217 | 	if (buf->size < size || size < 0) {
218 | 		/* bail out in debug mode so we can figure out why this happened */
219 | 		assert(0);
220 | 		return BUF_EINVALIDIDX;
221 | 	}
222 | 
223 | 	buf->size = size;
224 | 	return BUF_OK;
225 | }
226 | 


--------------------------------------------------------------------------------
/src/markdown.h:
--------------------------------------------------------------------------------
  1 | /* markdown.h - generic markdown parser */
  2 | 
  3 | /*
  4 |  * Copyright (c) 2009, Natacha Porté
  5 |  *
  6 |  * Permission to use, copy, modify, and distribute this software for any
  7 |  * purpose with or without fee is hereby granted, provided that the above
  8 |  * copyright notice and this permission notice appear in all copies.
  9 |  *
 10 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 11 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 12 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 13 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 14 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 15 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 16 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 17 |  */
 18 | 
 19 | #ifndef UPSKIRT_MARKDOWN_H
 20 | #define UPSKIRT_MARKDOWN_H
 21 | 
 22 | #include "buffer.h"
 23 | #include "autolink.h"
 24 | 
 25 | #ifdef __cplusplus
 26 | extern "C" {
 27 | #endif
 28 | 
 29 | #define SUNDOWN_VERSION "1.16.0"
 30 | #define SUNDOWN_VER_MAJOR 1
 31 | #define SUNDOWN_VER_MINOR 16
 32 | #define SUNDOWN_VER_REVISION 0
 33 | 
 34 | /********************
 35 |  * TYPE DEFINITIONS *
 36 |  ********************/
 37 | 
 38 | /* mkd_autolink - type of autolink */
 39 | enum mkd_autolink {
 40 | 	MKDA_NOT_AUTOLINK,	/* used internally when it is not an autolink*/
 41 | 	MKDA_NORMAL,		/* normal http/http/ftp/mailto/etc link */
 42 | 	MKDA_EMAIL,			/* e-mail link without explit mailto: */
 43 | };
 44 | 
 45 | enum mkd_tableflags {
 46 | 	MKD_TABLE_ALIGN_L = 1,
 47 | 	MKD_TABLE_ALIGN_R = 2,
 48 | 	MKD_TABLE_ALIGN_CENTER = 3,
 49 | 	MKD_TABLE_ALIGNMASK = 3,
 50 | 	MKD_TABLE_HEADER = 4
 51 | };
 52 | 
 53 | enum mkd_extensions {
 54 | 	MKDEXT_NO_INTRA_EMPHASIS = (1 << 0),
 55 | 	MKDEXT_TABLES = (1 << 1),
 56 | 	MKDEXT_FENCED_CODE = (1 << 2),
 57 | 	MKDEXT_AUTOLINK = (1 << 3),
 58 | 	MKDEXT_STRIKETHROUGH = (1 << 4),
 59 | 	MKDEXT_SPACE_HEADERS = (1 << 6),
 60 | 	MKDEXT_SUPERSCRIPT = (1 << 7),
 61 | 	MKDEXT_LAX_SPACING = (1 << 8),
 62 | 	MKDEXT_NO_EMAIL_AUTOLINK = (1 << 9),
 63 | };
 64 | 
 65 | /* sd_callbacks - functions for rendering parsed data */
 66 | struct sd_callbacks {
 67 | 	/* block level callbacks - NULL skips the block */
 68 | 	void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque);
 69 | 	void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque);
 70 | 	void (*blockspoiler)(struct buf *ob, const struct buf *text, void *opaque);
 71 | 	void (*blockhtml)(struct buf *ob,const  struct buf *text, void *opaque);
 72 | 	void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque);
 73 | 	void (*hrule)(struct buf *ob, void *opaque);
 74 | 	void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque);
 75 | 	void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque);
 76 | 	void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque);
 77 | 	void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque);
 78 | 	void (*table_row)(struct buf *ob, const struct buf *text, void *opaque);
 79 | 	void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span);
 80 | 
 81 | 
 82 | 	/* span level callbacks - NULL or return 0 prints the span verbatim */
 83 | 	int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque);
 84 | 	int (*codespan)(struct buf *ob, const struct buf *text, void *opaque);
 85 | 	int (*spoilerspan)(struct buf *ob, const struct buf *text, void *opaque);
 86 | 	int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
 87 | 	int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque);
 88 | 	int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque);
 89 | 	int (*linebreak)(struct buf *ob, void *opaque);
 90 | 	int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque);
 91 | 	int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque);
 92 | 	int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
 93 | 	int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque);
 94 | 	int (*superscript)(struct buf *ob, const struct buf *text, void *opaque);
 95 | 
 96 | 	/* low level callbacks - NULL copies input directly into the output */
 97 | 	void (*entity)(struct buf *ob, const struct buf *entity, void *opaque);
 98 | 	void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque);
 99 | 
100 | 	/* header and footer */
101 | 	void (*doc_header)(struct buf *ob, void *opaque);
102 | 	void (*doc_footer)(struct buf *ob, void *opaque);
103 | };
104 | 
105 | struct sd_markdown;
106 | 
107 | /*********
108 |  * FLAGS *
109 |  *********/
110 | 
111 | /* list/listitem flags */
112 | #define MKD_LIST_ORDERED	1
113 | #define MKD_LI_BLOCK		2  /* <li> containing block data */
114 | 
115 | /**********************
116 |  * EXPORTED FUNCTIONS *
117 |  **********************/
118 | 
119 | extern struct sd_markdown *
120 | sd_markdown_new(
121 | 	unsigned int extensions,
122 | 	size_t max_nesting,
123 | 	size_t max_table_cols,
124 | 	const struct sd_callbacks *callbacks,
125 | 	void *opaque);
126 | 
127 | extern void
128 | sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md);
129 | 
130 | extern void
131 | sd_markdown_free(struct sd_markdown *md);
132 | 
133 | extern void
134 | sd_version(int *major, int *minor, int *revision);
135 | 
136 | #ifdef __cplusplus
137 | }
138 | #endif
139 | 
140 | #endif
141 | 
142 | /* vim: set filetype=c: */
143 | 


--------------------------------------------------------------------------------
/snudown.c:
--------------------------------------------------------------------------------
  1 | #include "markdown.h"
  2 | #include "html.h"
  3 | #include "autolink.h"
  4 | 
  5 | #include <string.h>
  6 | 
  7 | #define SNUDOWN_VERSION "1.5.0"
  8 | 
  9 | enum snudown_renderer_mode {
 10 | 	RENDERER_USERTEXT = 0,
 11 | 	RENDERER_WIKI,
 12 | 	RENDERER_COUNT
 13 | };
 14 | 
 15 | struct snudown_renderopt {
 16 | 	struct html_renderopt html;
 17 | 	int nofollow;
 18 | 	const char *target;
 19 | };
 20 | 
 21 | struct snudown_renderer {
 22 | 	struct sd_markdown* main_renderer;
 23 | 	struct sd_markdown* toc_renderer;
 24 | 	struct module_state* state;
 25 | 	struct module_state* toc_state;
 26 | };
 27 | 
 28 | struct module_state {
 29 | 	struct sd_callbacks callbacks;
 30 | 	struct snudown_renderopt options;
 31 | };
 32 | 
 33 | static int sundown_initialized[RENDERER_COUNT];
 34 | static struct snudown_renderer sundown[RENDERER_COUNT];
 35 | 
 36 | static char* html_element_whitelist[] = {"tr", "th", "td", "table", "tbody", "thead", "tfoot", "caption", NULL};
 37 | static char* html_attr_whitelist[] = {"colspan", "rowspan", "cellspacing", "cellpadding", "scope", NULL};
 38 | 
 39 | static struct module_state usertext_toc_state;
 40 | static struct module_state wiki_toc_state;
 41 | static struct module_state usertext_state;
 42 | static struct module_state wiki_state;
 43 | 
 44 | static const unsigned int snudown_default_md_flags =
 45 | 	MKDEXT_NO_INTRA_EMPHASIS |
 46 | 	MKDEXT_SUPERSCRIPT |
 47 | 	MKDEXT_AUTOLINK |
 48 | 	MKDEXT_STRIKETHROUGH |
 49 | 	MKDEXT_TABLES;
 50 | 
 51 | static const unsigned int snudown_default_render_flags =
 52 | 	HTML_SKIP_HTML |
 53 | 	HTML_SKIP_IMAGES |
 54 | 	HTML_SAFELINK |
 55 | 	HTML_ESCAPE |
 56 | 	HTML_USE_XHTML;
 57 | 
 58 | static const unsigned int snudown_wiki_render_flags =
 59 | 	HTML_SKIP_HTML |
 60 | 	HTML_SAFELINK |
 61 | 	HTML_ALLOW_ELEMENT_WHITELIST |
 62 | 	HTML_ESCAPE |
 63 | 	HTML_USE_XHTML;
 64 | 
 65 | static void
 66 | snudown_link_attr(struct buf *ob, const struct buf *link, void *opaque)
 67 | {
 68 | 	struct snudown_renderopt *options = opaque;
 69 | 
 70 | 	if (options->nofollow)
 71 | 		BUFPUTSL(ob, " rel=\"nofollow\"");
 72 | 
 73 | 	if (options->target != NULL) {
 74 | 		BUFPUTSL(ob, " target=\"");
 75 | 		bufputs(ob, options->target);
 76 | 		bufputc(ob, '\"');
 77 | 	}
 78 | }
 79 | 
 80 | static struct sd_markdown* make_custom_renderer(struct module_state* state,
 81 | 												const unsigned int renderflags,
 82 | 												const unsigned int markdownflags,
 83 | 												int toc_renderer) {
 84 | 	if(toc_renderer) {
 85 | 		sdhtml_toc_renderer(&state->callbacks,
 86 | 			(struct html_renderopt *)&state->options);
 87 | 	} else {
 88 | 		sdhtml_renderer(&state->callbacks,
 89 | 			(struct html_renderopt *)&state->options,
 90 | 			renderflags);
 91 | 	}
 92 | 
 93 | 	state->options.html.link_attributes = &snudown_link_attr;
 94 | 	state->options.html.html_element_whitelist = html_element_whitelist;
 95 | 	state->options.html.html_attr_whitelist = html_attr_whitelist;
 96 | 
 97 | 	return sd_markdown_new(
 98 | 		markdownflags,
 99 | 		16,
100 | 		64,
101 | 		&state->callbacks,
102 | 		&state->options
103 | 	);
104 | }
105 | 
106 | void init_default_renderer(void) {
107 | 	if (sundown_initialized[RENDERER_USERTEXT]) return;
108 | 	sundown_initialized[RENDERER_USERTEXT] = 1;
109 | 	sundown[RENDERER_USERTEXT].main_renderer = make_custom_renderer(&usertext_state, snudown_default_render_flags, snudown_default_md_flags, 0);
110 | 	sundown[RENDERER_USERTEXT].toc_renderer = make_custom_renderer(&usertext_toc_state, snudown_default_render_flags, snudown_default_md_flags, 1);
111 | 	sundown[RENDERER_USERTEXT].state = &usertext_state;
112 | 	sundown[RENDERER_USERTEXT].toc_state = &usertext_toc_state;
113 | }
114 | 
115 | void init_wiki_renderer(void) {
116 | 	if (sundown_initialized[RENDERER_WIKI]) return;
117 | 	sundown_initialized[RENDERER_WIKI] = 1;
118 | 	sundown[RENDERER_WIKI].main_renderer = make_custom_renderer(&wiki_state, snudown_wiki_render_flags, snudown_default_md_flags, 0);
119 | 	sundown[RENDERER_WIKI].toc_renderer = make_custom_renderer(&wiki_toc_state, snudown_wiki_render_flags, snudown_default_md_flags, 1);
120 | 	sundown[RENDERER_WIKI].state = &wiki_state;
121 | 	sundown[RENDERER_WIKI].toc_state = &wiki_toc_state;
122 | }
123 | 
124 | /* size param is necessary because text may contain null */
125 | const char*
126 | snudown_md(char* text, size_t size, int nofollow, char* target, char* toc_id_prefix, int renderer, int enable_toc) {
127 | 	struct buf ib, *ob;
128 | 	char* result_text;
129 | 	struct snudown_renderer _snudown;
130 | 	unsigned int flags;
131 | 
132 | 	memset(&ib, 0x0, sizeof(struct buf));
133 | 
134 | 	/* set up buffer */
135 | 	ib.data = (uint8_t*) text;
136 | 	ib.size = size;
137 | 
138 | 	if (renderer < 0 || renderer >= RENDERER_COUNT) {
139 | 		return NULL;
140 | 	}
141 | 
142 | 	_snudown = sundown[renderer];
143 | 
144 | 	struct snudown_renderopt *options = &(_snudown.state->options);
145 | 	options->nofollow = nofollow;
146 | 	options->target = target;
147 | 
148 | 	/* Output buffer */
149 | 	ob = bufnew(128);
150 | 
151 | 	flags = options->html.flags;
152 | 
153 | 	if (enable_toc) {
154 | 		_snudown.toc_state->options.html.toc_id_prefix = toc_id_prefix;
155 | 		sd_markdown_render(ob, ib.data, ib.size, _snudown.toc_renderer);
156 | 		_snudown.toc_state->options.html.toc_id_prefix = NULL;
157 | 
158 | 		options->html.flags |= HTML_TOC;
159 | 	}
160 | 
161 | 	options->html.toc_id_prefix = toc_id_prefix;
162 | 
163 | 	/* do the magic */
164 | 	sd_markdown_render(ob, ib.data, ib.size, _snudown.main_renderer);
165 | 
166 | 	options->html.toc_id_prefix = NULL;
167 | 	options->html.flags = flags;
168 | 
169 | 	/* make a null-terminated result string - the buffer isn't */
170 | 	result_text = (char*) malloc(ob->size + 1);
171 | 	result_text[ob->size] = 0;
172 | 	if (ob->data)
173 | 		memcpy(result_text, (char*) ob->data, ob->size);
174 | 
175 | 	/* Cleanup */
176 | 	bufrelease(ob);
177 | 
178 | 	return result_text;
179 | }
180 | 
181 | const char* default_renderer(char* text, size_t size, int nofollow, char* target, char* toc_id_prefix, int enable_toc) {
182 | 	init_default_renderer();
183 | 	return snudown_md(text, size, nofollow, target, toc_id_prefix, RENDERER_USERTEXT, enable_toc);
184 | }
185 | 
186 | const char* wiki_renderer(char* text, size_t size, int nofollow, char* target, char* toc_id_prefix, int enable_toc) {
187 | 	init_wiki_renderer();
188 | 	return snudown_md(text, size, nofollow, target, toc_id_prefix, RENDERER_WIKI, enable_toc);
189 | }
190 | 


--------------------------------------------------------------------------------
/src/html_blocks.h:
--------------------------------------------------------------------------------
  1 | /* C code produced by gperf version 3.0.3 */
  2 | /* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt  */
  3 | /* Computed positions: -k'1-2' */
  4 | 
  5 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
  6 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
  7 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
  8 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
  9 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
 10 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
 11 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
 12 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
 13 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
 14 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
 15 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
 16 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
 17 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
 18 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
 19 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
 20 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
 21 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
 22 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
 23 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
 24 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
 25 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
 26 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
 27 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
 28 | /* The character set is not based on ISO-646.  */
 29 | error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
 30 | #endif
 31 | 
 32 | /* maximum key range = 37, duplicates = 0 */
 33 | 
 34 | #ifndef GPERF_DOWNCASE
 35 | #define GPERF_DOWNCASE 1
 36 | static unsigned char gperf_downcase[256] =
 37 |   {
 38 |       0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
 39 |      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
 40 |      30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
 41 |      45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
 42 |      60,  61,  62,  63,  64,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
 43 |     107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
 44 |     122,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
 45 |     105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
 46 |     120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
 47 |     135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
 48 |     150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
 49 |     165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
 50 |     180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
 51 |     195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
 52 |     210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
 53 |     225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
 54 |     240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
 55 |     255
 56 |   };
 57 | #endif
 58 | 
 59 | #ifndef GPERF_CASE_STRNCMP
 60 | #define GPERF_CASE_STRNCMP 1
 61 | static int
 62 | gperf_case_strncmp (s1, s2, n)
 63 |      register const char *s1;
 64 |      register const char *s2;
 65 |      register unsigned int n;
 66 | {
 67 |   for (; n > 0;)
 68 |     {
 69 |       unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
 70 |       unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
 71 |       if (c1 != 0 && c1 == c2)
 72 |         {
 73 |           n--;
 74 |           continue;
 75 |         }
 76 |       return (int)c1 - (int)c2;
 77 |     }
 78 |   return 0;
 79 | }
 80 | #endif
 81 | 
 82 | #ifdef __GNUC__
 83 | __inline
 84 | #else
 85 | #ifdef __cplusplus
 86 | inline
 87 | #endif
 88 | #endif
 89 | static unsigned int
 90 | hash_block_tag (str, len)
 91 |      register const char *str;
 92 |      register unsigned int len;
 93 | {
 94 |   static const unsigned char asso_values[] =
 95 |     {
 96 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
 97 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
 98 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
 99 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
100 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
101 |        8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
102 |       38, 38, 38, 38, 38, 38,  0, 38,  0, 38,
103 |        5,  5,  5, 15,  0, 38, 38,  0, 15, 10,
104 |        0, 38, 38, 15,  0,  5, 38, 38, 38, 38,
105 |       38, 38, 38, 38, 38, 38, 38, 38,  0, 38,
106 |        0, 38,  5,  5,  5, 15,  0, 38, 38,  0,
107 |       15, 10,  0, 38, 38, 15,  0,  5, 38, 38,
108 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
109 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
110 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
111 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
112 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
113 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
114 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
115 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
116 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
117 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
118 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
119 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
120 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
121 |       38, 38, 38, 38, 38, 38, 38
122 |     };
123 |   register int hval = len;
124 | 
125 |   switch (hval)
126 |     {
127 |       default:
128 |         hval += asso_values[(unsigned char)str[1]+1];
129 |       /*FALLTHROUGH*/
130 |       case 1:
131 |         hval += asso_values[(unsigned char)str[0]];
132 |         break;
133 |     }
134 |   return hval;
135 | }
136 | 
137 | #ifdef __GNUC__
138 | __inline
139 | #ifdef __GNUC_STDC_INLINE__
140 | __attribute__ ((__gnu_inline__))
141 | #endif
142 | #endif
143 | const char *
144 | find_block_tag (str, len)
145 |      register const char *str;
146 |      register unsigned int len;
147 | {
148 |   enum
149 |     {
150 |       TOTAL_KEYWORDS = 24,
151 |       MIN_WORD_LENGTH = 1,
152 |       MAX_WORD_LENGTH = 10,
153 |       MIN_HASH_VALUE = 1,
154 |       MAX_HASH_VALUE = 37
155 |     };
156 | 
157 |   static const char * const wordlist[] =
158 |     {
159 |       "",
160 |       "p",
161 |       "dl",
162 |       "div",
163 |       "math",
164 |       "table",
165 |       "",
166 |       "ul",
167 |       "del",
168 |       "form",
169 |       "blockquote",
170 |       "figure",
171 |       "ol",
172 |       "fieldset",
173 |       "",
174 |       "h1",
175 |       "",
176 |       "h6",
177 |       "pre",
178 |       "", "",
179 |       "script",
180 |       "h5",
181 |       "noscript",
182 |       "",
183 |       "style",
184 |       "iframe",
185 |       "h4",
186 |       "ins",
187 |       "", "", "",
188 |       "h3",
189 |       "", "", "", "",
190 |       "h2",
191 |       "span"
192 |     };
193 | 
194 |   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
195 |     {
196 |       register int key = hash_block_tag (str, len);
197 | 
198 |       if (key <= MAX_HASH_VALUE && key >= 0)
199 |         {
200 |           register const char *s = wordlist[key];
201 | 
202 |           if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
203 |             return s;
204 |         }
205 |     }
206 |   return 0;
207 | }
208 | 


--------------------------------------------------------------------------------
/html/html_smartypants.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Vicent Marti
  3 |  *
  4 |  * Permission to use, copy, modify, and distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  */
 16 | 
 17 | #include "buffer.h"
 18 | #include "html.h"
 19 | 
 20 | #include <string.h>
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <ctype.h>
 24 | 
 25 | #if defined(_WIN32)
 26 | #define snprintf	_snprintf
 27 | #endif
 28 | 
 29 | struct smartypants_data {
 30 | 	int in_squote;
 31 | 	int in_dquote;
 32 | };
 33 | 
 34 | static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 35 | static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 36 | static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 37 | static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 38 | static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 39 | static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 40 | static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 41 | static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 42 | static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 43 | static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 44 | 
 45 | static size_t (*smartypants_cb_ptrs[])
 46 | 	(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
 47 | {
 48 | 	NULL,					/* 0 */
 49 | 	smartypants_cb__dash,	/* 1 */
 50 | 	smartypants_cb__parens,	/* 2 */
 51 | 	smartypants_cb__squote, /* 3 */
 52 | 	smartypants_cb__dquote, /* 4 */
 53 | 	smartypants_cb__amp,	/* 5 */
 54 | 	smartypants_cb__period,	/* 6 */
 55 | 	smartypants_cb__number,	/* 7 */
 56 | 	smartypants_cb__ltag,	/* 8 */
 57 | 	smartypants_cb__backtick, /* 9 */
 58 | 	smartypants_cb__escape, /* 10 */
 59 | };
 60 | 
 61 | static const uint8_t smartypants_cb_chars[] = {
 62 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 63 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 64 | 	0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
 65 | 	0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
 66 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 67 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
 68 | 	9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 69 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 70 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 71 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 72 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 73 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 74 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 75 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 76 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 77 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 78 | };
 79 | 
 80 | static inline int
 81 | word_boundary(uint8_t c)
 82 | {
 83 | 	return c == 0 || isspace(c) || ispunct(c);
 84 | }
 85 | 
 86 | static int
 87 | smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
 88 | {
 89 | 	char ent[8];
 90 | 
 91 | 	if (*is_open && !word_boundary(next_char))
 92 | 		return 0;
 93 | 
 94 | 	if (!(*is_open) && !word_boundary(previous_char))
 95 | 		return 0;
 96 | 
 97 | 	snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
 98 | 	*is_open = !(*is_open);
 99 | 	bufputs(ob, ent);
100 | 	return 1;
101 | }
102 | 
103 | static size_t
104 | smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
105 | {
106 | 	if (size >= 2) {
107 | 		uint8_t t1 = tolower(text[1]);
108 | 
109 | 		if (t1 == '\'') {
110 | 			if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
111 | 				return 1;
112 | 		}
113 | 
114 | 		if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
115 | 			(size == 3 || word_boundary(text[2]))) {
116 | 			BUFPUTSL(ob, "&rsquo;");
117 | 			return 0;
118 | 		}
119 | 
120 | 		if (size >= 3) {
121 | 			uint8_t t2 = tolower(text[2]);
122 | 
123 | 			if (((t1 == 'r' && t2 == 'e') ||
124 | 				(t1 == 'l' && t2 == 'l') ||
125 | 				(t1 == 'v' && t2 == 'e')) &&
126 | 				(size == 4 || word_boundary(text[3]))) {
127 | 				BUFPUTSL(ob, "&rsquo;");
128 | 				return 0;
129 | 			}
130 | 		}
131 | 	}
132 | 
133 | 	if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
134 | 		return 0;
135 | 
136 | 	bufputc(ob, text[0]);
137 | 	return 0;
138 | }
139 | 
140 | static size_t
141 | smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
142 | {
143 | 	if (size >= 3) {
144 | 		uint8_t t1 = tolower(text[1]);
145 | 		uint8_t t2 = tolower(text[2]);
146 | 
147 | 		if (t1 == 'c' && t2 == ')') {
148 | 			BUFPUTSL(ob, "&copy;");
149 | 			return 2;
150 | 		}
151 | 
152 | 		if (t1 == 'r' && t2 == ')') {
153 | 			BUFPUTSL(ob, "&reg;");
154 | 			return 2;
155 | 		}
156 | 
157 | 		if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
158 | 			BUFPUTSL(ob, "&trade;");
159 | 			return 3;
160 | 		}
161 | 	}
162 | 
163 | 	bufputc(ob, text[0]);
164 | 	return 0;
165 | }
166 | 
167 | static size_t
168 | smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
169 | {
170 | 	if (size >= 3 && text[1] == '-' && text[2] == '-') {
171 | 		BUFPUTSL(ob, "&mdash;");
172 | 		return 2;
173 | 	}
174 | 
175 | 	if (size >= 2 && text[1] == '-') {
176 | 		BUFPUTSL(ob, "&ndash;");
177 | 		return 1;
178 | 	}
179 | 
180 | 	bufputc(ob, text[0]);
181 | 	return 0;
182 | }
183 | 
184 | static size_t
185 | smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
186 | {
187 | 	if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
188 | 		if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
189 | 			return 5;
190 | 	}
191 | 
192 | 	if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
193 | 		return 3;
194 | 
195 | 	bufputc(ob, '&');
196 | 	return 0;
197 | }
198 | 
199 | static size_t
200 | smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
201 | {
202 | 	if (size >= 3 && text[1] == '.' && text[2] == '.') {
203 | 		BUFPUTSL(ob, "&hellip;");
204 | 		return 2;
205 | 	}
206 | 
207 | 	if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
208 | 		BUFPUTSL(ob, "&hellip;");
209 | 		return 4;
210 | 	}
211 | 
212 | 	bufputc(ob, text[0]);
213 | 	return 0;
214 | }
215 | 
216 | static size_t
217 | smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
218 | {
219 | 	if (size >= 2 && text[1] == '`') {
220 | 		if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
221 | 			return 1;
222 | 	}
223 | 
224 | 	return 0;
225 | }
226 | 
227 | static size_t
228 | smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
229 | {
230 | 	if (word_boundary(previous_char) && size >= 3) {
231 | 		if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
232 | 			if (size == 3 || word_boundary(text[3])) {
233 | 				BUFPUTSL(ob, "&frac12;");
234 | 				return 2;
235 | 			}
236 | 		}
237 | 
238 | 		if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
239 | 			if (size == 3 || word_boundary(text[3]) ||
240 | 				(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
241 | 				BUFPUTSL(ob, "&frac14;");
242 | 				return 2;
243 | 			}
244 | 		}
245 | 
246 | 		if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
247 | 			if (size == 3 || word_boundary(text[3]) ||
248 | 				(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
249 | 				BUFPUTSL(ob, "&frac34;");
250 | 				return 2;
251 | 			}
252 | 		}
253 | 	}
254 | 
255 | 	bufputc(ob, text[0]);
256 | 	return 0;
257 | }
258 | 
259 | static size_t
260 | smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
261 | {
262 | 	if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
263 | 		BUFPUTSL(ob, "&quot;");
264 | 
265 | 	return 0;
266 | }
267 | 
268 | static size_t
269 | smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
270 | {
271 | 	static const char *skip_tags[] = {
272 | 	  "pre", "code", "var", "samp", "kbd", "math", "script", "style"
273 | 	};
274 | 	static const size_t skip_tags_count = 8;
275 | 
276 | 	size_t tag, i = 0;
277 | 
278 | 	while (i < size && text[i] != '>')
279 | 		i++;
280 | 
281 | 	for (tag = 0; tag < skip_tags_count; ++tag) {
282 | 		if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
283 | 			break;
284 | 	}
285 | 
286 | 	if (tag < skip_tags_count) {
287 | 		for (;;) {
288 | 			while (i < size && text[i] != '<')
289 | 				i++;
290 | 
291 | 			if (i == size)
292 | 				break;
293 | 
294 | 			if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
295 | 				break;
296 | 
297 | 			i++;
298 | 		}
299 | 
300 | 		while (i < size && text[i] != '>')
301 | 			i++;
302 | 	}
303 | 
304 | 	bufput(ob, text, i + 1);
305 | 	return i;
306 | }
307 | 
308 | static size_t
309 | smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
310 | {
311 | 	if (size < 2)
312 | 		return 0;
313 | 
314 | 	switch (text[1]) {
315 | 	case '\\':
316 | 	case '"':
317 | 	case '\'':
318 | 	case '.':
319 | 	case '-':
320 | 	case '`':
321 | 		bufputc(ob, text[1]);
322 | 		return 1;
323 | 
324 | 	default:
325 | 		bufputc(ob, '\\');
326 | 		return 0;
327 | 	}
328 | }
329 | 
330 | #if 0
331 | static struct {
332 |     uint8_t c0;
333 |     const uint8_t *pattern;
334 |     const uint8_t *entity;
335 |     int skip;
336 | } smartypants_subs[] = {
337 |     { '\'', "'s>",      "&rsquo;",  0 },
338 |     { '\'', "'t>",      "&rsquo;",  0 },
339 |     { '\'', "'re>",     "&rsquo;",  0 },
340 |     { '\'', "'ll>",     "&rsquo;",  0 },
341 |     { '\'', "'ve>",     "&rsquo;",  0 },
342 |     { '\'', "'m>",      "&rsquo;",  0 },
343 |     { '\'', "'d>",      "&rsquo;",  0 },
344 |     { '-',  "--",       "&mdash;",  1 },
345 |     { '-',  "<->",      "&ndash;",  0 },
346 |     { '.',  "...",      "&hellip;", 2 },
347 |     { '.',  ". . .",    "&hellip;", 4 },
348 |     { '(',  "(c)",      "&copy;",   2 },
349 |     { '(',  "(r)",      "&reg;",    2 },
350 |     { '(',  "(tm)",     "&trade;",  3 },
351 |     { '3',  "<3/4>",    "&frac34;", 2 },
352 |     { '3',  "<3/4ths>", "&frac34;", 2 },
353 |     { '1',  "<1/2>",    "&frac12;", 2 },
354 |     { '1',  "<1/4>",    "&frac14;", 2 },
355 |     { '1',  "<1/4th>",  "&frac14;", 2 },
356 |     { '&',  "&#0;",      0,       3 },
357 | };
358 | #endif
359 | 
360 | void
361 | sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
362 | {
363 | 	size_t i;
364 | 	struct smartypants_data smrt = {0, 0};
365 | 
366 | 	if (!text)
367 | 		return;
368 | 
369 | 	bufgrow(ob, size);
370 | 
371 | 	for (i = 0; i < size; ++i) {
372 | 		size_t org;
373 | 		uint8_t action = 0;
374 | 
375 | 		org = i;
376 | 		while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
377 | 			i++;
378 | 
379 | 		if (i > org)
380 | 			bufput(ob, text + org, i - org);
381 | 
382 | 		if (i < size) {
383 | 			i += smartypants_cb_ptrs[(int)action]
384 | 				(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
385 | 		}
386 | 	}
387 | }
388 | 
389 | 
390 | 


--------------------------------------------------------------------------------
/src/autolink.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Vicent Marti
  3 |  *
  4 |  * Permission to use, copy, modify, and distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  */
 16 | 
 17 | #include "buffer.h"
 18 | #include "autolink.h"
 19 | 
 20 | #include <string.h>
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <ctype.h>
 24 | 
 25 | #if defined(_WIN32)
 26 | #define strncasecmp	_strnicmp
 27 | #endif
 28 | 
 29 | int
 30 | sd_autolink_issafe(const uint8_t *link, size_t link_len)
 31 | {
 32 | 	static const size_t valid_uris_count = 14;
 33 | 	static const char *valid_uris[] = {
 34 | 		"http://", "https://", "ftp://", "mailto://",
 35 | 		"/", "git://", "steam://", "irc://", "news://", "mumble://",
 36 | 		"ssh://", "ircs://", "ts3server://", "#"
 37 | 	};
 38 | 
 39 | 	size_t i;
 40 | 
 41 | 	for (i = 0; i < valid_uris_count; ++i) {
 42 | 		size_t len = strlen(valid_uris[i]);
 43 | 
 44 | 		if (link_len > len &&
 45 | 			strncasecmp((char *)link, valid_uris[i], len) == 0 &&
 46 | 			(isalnum(link[len]) || link[len] == '#' || link[len] == '/' || link[len] == '?'))
 47 | 			return 1;
 48 | 	}
 49 | 
 50 | 	return 0;
 51 | }
 52 | 
 53 | static size_t
 54 | autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
 55 | {
 56 | 	uint8_t cclose, copen = 0;
 57 | 	size_t i;
 58 | 
 59 | 	for (i = 0; i < link_end; ++i)
 60 | 		if (data[i] == '<') {
 61 | 			link_end = i;
 62 | 			break;
 63 | 		}
 64 | 
 65 | 	while (link_end > 0) {
 66 | 		uint8_t c = data[link_end - 1];
 67 | 
 68 | 		if (c == 0)
 69 | 			break;
 70 | 
 71 | 		if (strchr("?!.,", c) != NULL)
 72 | 			link_end--;
 73 | 
 74 | 		else if (c == ';') {
 75 | 			size_t new_end = link_end - 2;
 76 | 
 77 | 			while (new_end > 0 && isalpha(data[new_end]))
 78 | 				new_end--;
 79 | 
 80 | 			if (new_end < link_end - 2 && data[new_end] == '&')
 81 | 				link_end = new_end;
 82 | 			else
 83 | 				link_end--;
 84 | 		}
 85 | 		else break;
 86 | 	}
 87 | 
 88 | 	if (link_end == 0)
 89 | 		return 0;
 90 | 
 91 | 	cclose = data[link_end - 1];
 92 | 
 93 | 	switch (cclose) {
 94 | 	case '"':	copen = '"'; break;
 95 | 	case '\'':	copen = '\''; break;
 96 | 	case ')':	copen = '('; break;
 97 | 	case ']':	copen = '['; break;
 98 | 	case '}':	copen = '{'; break;
 99 | 	}
100 | 
101 | 	if (copen != 0) {
102 | 		size_t closing = 0;
103 | 		size_t opening = 0;
104 | 		size_t i = 0;
105 | 
106 | 		/* Try to close the final punctuation sign in this same line;
107 | 		 * if we managed to close it outside of the URL, that means that it's
108 | 		 * not part of the URL. If it closes inside the URL, that means it
109 | 		 * is part of the URL.
110 | 		 *
111 | 		 * Examples:
112 | 		 *
113 | 		 *	foo http://www.pokemon.com/Pikachu_(Electric) bar
114 | 		 *		=> http://www.pokemon.com/Pikachu_(Electric)
115 | 		 *
116 | 		 *	foo (http://www.pokemon.com/Pikachu_(Electric)) bar
117 | 		 *		=> http://www.pokemon.com/Pikachu_(Electric)
118 | 		 *
119 | 		 *	foo http://www.pokemon.com/Pikachu_(Electric)) bar
120 | 		 *		=> http://www.pokemon.com/Pikachu_(Electric))
121 | 		 *
122 | 		 *	(foo http://www.pokemon.com/Pikachu_(Electric)) bar
123 | 		 *		=> foo http://www.pokemon.com/Pikachu_(Electric)
124 | 		 */
125 | 
126 | 		while (i < link_end) {
127 | 			if (data[i] == copen)
128 | 				opening++;
129 | 			else if (data[i] == cclose)
130 | 				closing++;
131 | 
132 | 			i++;
133 | 		}
134 | 
135 | 		if (closing != opening)
136 | 			link_end--;
137 | 	}
138 | 
139 | 	return link_end;
140 | }
141 | 
142 | /*
143 |  * Checks that `prefix_char` occurs on a word boundary just before `data`,
144 |  * where `data` points to the character to search to the left of, and a word boundary
145 |  * is (currently) a whitespace character, punctuation, or the start of the string.
146 |  * Returns the length of the prefix.
147 |  */
148 | static int
149 | check_reddit_autolink_prefix(
150 | 	const uint8_t* data,
151 | 	size_t max_rewind,
152 | 	size_t max_lookbehind,
153 | 	size_t size,
154 | 	char prefix_char
155 | 	)
156 | {
157 | 	/* Make sure this `/` is part of `/?r/` */
158 | 	if (size < 2 || max_rewind < 1 || data[-1] != prefix_char)
159 | 		return 0;
160 | 
161 | 	/* Not at the start of the buffer, no inlines to the immediate left of the `prefix_char` */
162 | 	if (max_rewind > 1) {
163 | 		const char boundary = data[-2];
164 | 		if (boundary == '/')
165 | 			return 2;
166 | 		/**
167 | 		 * Here's where our lack of unicode-awareness bites us. We don't correctly
168 | 		 * match punctuation / whitespace characters for the boundary, because we
169 | 		 * reject valid cases like "。r/example" (note the fullwidth period.)
170 | 		 *
171 | 		 * A better implementation might try to rewind over bytes with the 8th bit set, try
172 | 		 * to decode them to a valid codepoint, then do a unicode-aware check on the codepoint.
173 | 		 */
174 | 		else if (ispunct(boundary) || isspace(boundary))
175 | 			return 1;
176 | 		else
177 | 			return 0;
178 | 	} else if (max_lookbehind > 2) {
179 | 		/* There's an inline element just left of the `prefix_char`, is it an escaped forward
180 | 		 * slash? bail out so we correctly handle stuff like "\/r/foo". This will also correctly
181 | 		 * allow "\\/r/foo".
182 | 		 */
183 | 		if (data[-2] == '/' && data[-3] == '\\')
184 | 			return 0;
185 | 	}
186 | 
187 | 	/* Must be a new-style shortlink with nothing relevant to the left of it. */
188 | 	return 1;
189 | }
190 | 
191 | static size_t
192 | check_domain(uint8_t *data, size_t size, int allow_short)
193 | {
194 | 	size_t i, np = 0;
195 | 
196 | 	if (!isalnum(data[0]))
197 | 		return 0;
198 | 
199 | 	for (i = 1; i < size - 1; ++i) {
200 | 		if (data[i] == '.') np++;
201 | 		else if (!isalnum(data[i]) && data[i] != '-') break;
202 | 	}
203 | 
204 | 	if (allow_short) {
205 | 		/* We don't need a valid domain in the strict sense (with
206 | 		 * least one dot; so just make sure it's composed of valid
207 | 		 * domain characters and return the length of the the valid
208 | 		 * sequence. */
209 | 		return i;
210 | 	} else {
211 | 		/* a valid domain needs to have at least a dot.
212 | 		 * that's as far as we get */
213 | 		return np ? i : 0;
214 | 	}
215 | }
216 | 
217 | size_t
218 | sd_autolink__www(
219 | 	size_t *rewind_p,
220 | 	struct buf *link,
221 | 	uint8_t *data,
222 | 	size_t max_rewind,
223 | 	size_t size,
224 | 	unsigned int flags)
225 | {
226 | 	size_t link_end;
227 | 
228 | 	if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
229 | 		return 0;
230 | 
231 | 	if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
232 | 		return 0;
233 | 
234 | 	link_end = check_domain(data, size, 0);
235 | 
236 | 	if (link_end == 0)
237 | 		return 0;
238 | 
239 | 	while (link_end < size && !isspace(data[link_end]))
240 | 		link_end++;
241 | 
242 | 	link_end = autolink_delim(data, link_end, max_rewind, size);
243 | 
244 | 	if (link_end == 0)
245 | 		return 0;
246 | 
247 | 	bufput(link, data, link_end);
248 | 	*rewind_p = 0;
249 | 
250 | 	return (int)link_end;
251 | }
252 | 
253 | size_t
254 | sd_autolink__email(
255 | 	size_t *rewind_p,
256 | 	struct buf *link,
257 | 	uint8_t *data,
258 | 	size_t max_rewind,
259 | 	size_t size,
260 | 	unsigned int flags)
261 | {
262 | 	size_t link_end, rewind;
263 | 	int nb = 0, np = 0;
264 | 
265 | 	for (rewind = 0; rewind < max_rewind; ++rewind) {
266 | 		uint8_t c = data[-rewind - 1];
267 | 
268 | 		if (c == 0)
269 | 			break;
270 | 
271 | 		if (isalnum(c))
272 | 			continue;
273 | 
274 | 		if (strchr(".+-_", c) != NULL)
275 | 			continue;
276 | 
277 | 		break;
278 | 	}
279 | 
280 | 	if (rewind == 0)
281 | 		return 0;
282 | 
283 | 	for (link_end = 0; link_end < size; ++link_end) {
284 | 		uint8_t c = data[link_end];
285 | 
286 | 		if (isalnum(c))
287 | 			continue;
288 | 
289 | 		if (c == '@')
290 | 			nb++;
291 | 		else if (c == '.' && link_end < size - 1)
292 | 			np++;
293 | 		else if (c != '-' && c != '_')
294 | 			break;
295 | 	}
296 | 
297 | 	if (link_end < 2 || nb != 1 || np == 0)
298 | 		return 0;
299 | 
300 | 	link_end = autolink_delim(data, link_end, max_rewind, size);
301 | 
302 | 	if (link_end == 0)
303 | 		return 0;
304 | 
305 | 	bufput(link, data - rewind, link_end + rewind);
306 | 	*rewind_p = rewind;
307 | 
308 | 	return link_end;
309 | }
310 | 
311 | size_t
312 | sd_autolink__url(
313 | 	size_t *rewind_p,
314 | 	struct buf *link,
315 | 	uint8_t *data,
316 | 	size_t max_rewind,
317 | 	size_t size,
318 | 	unsigned int flags)
319 | {
320 | 	size_t link_end, rewind = 0, domain_len;
321 | 
322 | 	if (size < 4 || data[1] != '/' || data[2] != '/')
323 | 		return 0;
324 | 
325 | 	while (rewind < max_rewind && isalpha(data[-rewind - 1]))
326 | 		rewind++;
327 | 
328 | 	if (!sd_autolink_issafe(data - rewind, size + rewind))
329 | 		return 0;
330 | 
331 | 	link_end = strlen("://");
332 | 
333 | 	domain_len = check_domain(
334 | 		data + link_end,
335 | 		size - link_end,
336 | 		flags & SD_AUTOLINK_SHORT_DOMAINS);
337 | 
338 | 	if (domain_len == 0)
339 | 		return 0;
340 | 
341 | 	link_end += domain_len;
342 | 	while (link_end < size && !isspace(data[link_end]))
343 | 		link_end++;
344 | 
345 | 	link_end = autolink_delim(data, link_end, max_rewind, size);
346 | 
347 | 	if (link_end == 0)
348 | 		return 0;
349 | 
350 | 	bufput(link, data - rewind, link_end + rewind);
351 | 	*rewind_p = rewind;
352 | 
353 | 	return link_end;
354 | }
355 | 
356 | size_t
357 | sd_autolink__subreddit(
358 | 	size_t *rewind_p,
359 | 	struct buf *link,
360 | 	uint8_t *data,
361 | 	size_t max_rewind,
362 | 	size_t max_lookbehind,
363 | 	size_t size,
364 | 	int *no_slash
365 | 	)
366 | {
367 | 	/**
368 | 	 * This is meant to handle both r/foo and /r/foo style subreddit references.
369 | 	 * In a valid /?r/ link, `*data` will always point to the '/' after the first 'r'.
370 | 	 * In pseudo-regex, this matches something like:
371 | 	 *
372 | 	 * `(/|(?<=\b))r/(all-)?%subreddit%([-+]%subreddit%)*(/[\w\-/]*)?`
373 | 	 * where %subreddit% == `((t:)?\w{2,24}|reddit\.com)`
374 | 	 */
375 | 	size_t link_end;
376 | 	size_t rewind;
377 | 	int is_allminus = 0;
378 | 
379 | 	rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'r');
380 | 	if (!rewind)
381 | 		return 0;
382 | 
383 | 	/* offset to the "meat" of the link */
384 | 	link_end = strlen("/");
385 | 
386 | 	if (size >= link_end + 4 && strncasecmp((char*)data + link_end, "all-", 4) == 0)
387 | 		is_allminus = 1;
388 | 
389 | 	do {
390 | 		size_t start = link_end;
391 | 		int max_length = 24;
392 | 
393 | 		/* special case: /r/reddit.com (only subreddit containing '.'). */
394 | 		if ( size >= link_end+10 && strncasecmp((char*)data+link_end, "reddit.com", 10) == 0 ) {
395 | 			link_end += 10;
396 | 			/* Make sure there are no trailing characters (don't do
397 | 			 * any autolinking for /r/reddit.commission) */
398 | 			max_length = 10;
399 | 		}
400 | 
401 | 		/* If not a special case, verify it begins with (t:)?[A-Za-z0-9] */
402 | 		else {
403 | 			/* support autolinking to timereddits, /r/t:when (1 April 2012) */
404 | 			if ( size > link_end+2 && strncasecmp((char*)data+link_end, "t:", 2) == 0 )
405 | 				link_end += 2;  /* Jump over the 't:' */
406 | 
407 | 			/* the first character of a subreddit name must be a letter or digit */
408 | 			if (!isalnum(data[link_end]))
409 | 				return 0;
410 | 			link_end += 1;
411 | 		}
412 | 
413 | 		/* consume valid characters ([A-Za-z0-9_]) until we run out */
414 | 		while (link_end < size && (isalnum(data[link_end]) ||
415 | 							data[link_end] == '_'))
416 | 			link_end++;
417 | 
418 | 		/* valid subreddit names are between 3 and 21 characters, with
419 | 		 * some subreddits having 2-character names. Don't bother with
420 | 		 * autolinking for anything outside this length range.
421 | 		 * (chksrname function in reddit/.../validator.py) */
422 | 		if ( link_end-start < 2 || link_end-start > max_length )
423 | 			return 0;
424 | 
425 | 		/* If we are linking to a multireddit, continue */
426 | 	} while ( link_end < size && (data[link_end] == '+' || (is_allminus && data[link_end] == '-')) && link_end++ );
427 | 
428 | 	if (link_end < size && data[link_end] == '/') {
429 | 		while (link_end < size && (isalnum(data[link_end]) ||
430 | 									data[link_end] == '_' ||
431 | 									data[link_end] == '/' ||
432 | 									data[link_end] == '-'))
433 | 			link_end++;
434 | 	}
435 | 
436 | 	/* make the link */
437 | 	bufput(link, data - rewind, link_end + rewind);
438 | 
439 | 	*no_slash = (rewind == 1);
440 | 	*rewind_p = rewind;
441 | 
442 | 	return link_end;
443 | }
444 | 
445 | size_t
446 | sd_autolink__username(
447 | 	size_t *rewind_p,
448 | 	struct buf *link,
449 | 	uint8_t *data,
450 | 	size_t max_rewind,
451 | 	size_t max_lookbehind,
452 | 	size_t size,
453 | 	int *no_slash
454 | 	)
455 | {
456 | 	size_t link_end;
457 | 	size_t rewind;
458 | 
459 | 	if (size < 3)
460 | 		return 0;
461 | 
462 | 	rewind = check_reddit_autolink_prefix(data, max_rewind, max_lookbehind, size, 'u');
463 | 	if (!rewind)
464 | 		return 0;
465 | 
466 | 	link_end = strlen("/");
467 | 
468 | 	/* the first letter of a username must... well, be valid, we don't care otherwise */
469 | 	if (!isalnum(data[link_end]) && data[link_end] != '_' && data[link_end] != '-')
470 | 		return 0;
471 | 	link_end += 1;
472 | 
473 | 	/* consume valid characters ([A-Za-z0-9_-/]) until we run out */
474 | 	while (link_end < size && (isalnum(data[link_end]) ||
475 | 								data[link_end] == '_' ||
476 | 								data[link_end] == '/' ||
477 | 								data[link_end] == '-'))
478 | 		link_end++;
479 | 
480 | 	/* make the link */
481 | 	bufput(link, data - rewind, link_end + rewind);
482 | 
483 | 	*no_slash = (rewind == 1);
484 | 	*rewind_p = rewind;
485 | 
486 | 	return link_end;
487 | }
488 | 


--------------------------------------------------------------------------------
/test_snudown.js:
--------------------------------------------------------------------------------
  1 | import * as Snudown from 'snudown-js';
  2 | 
  3 | // http://ecmanaut.blogspot.ca/2006/07/encoding-decoding-utf8-in-javascript.html
  4 | function encodeUTF8(s) {
  5 |     return unescape(encodeURIComponent(s));
  6 | }
  7 | 
  8 | var cases = {
  9 |     '': '',
 10 |     'http://www.reddit.com':
 11 |         '<p><a href="http://www.reddit.com">http://www.reddit.com</a></p>\n',
 12 | 
 13 |     'http://www.reddit.com/a\x00b':
 14 |         '<p><a href="http://www.reddit.com/ab">http://www.reddit.com/ab</a></p>\n',
 15 | 
 16 |     'foo@example.com':
 17 |         '<p><a href="mailto:foo@example.com">foo@example.com</a></p>\n',
 18 | 
 19 |     '[foo](http://en.wikipedia.org/wiki/Link_(film\\))':
 20 |         '<p><a href="http://en.wikipedia.org/wiki/Link_(film)">foo</a></p>\n',
 21 | 
 22 |     '(http://tsfr.org)':
 23 |         '<p>(<a href="http://tsfr.org">http://tsfr.org</a>)</p>\n',
 24 | 
 25 |     '[A link with a /r/subreddit in it](/lol)':
 26 |         '<p><a href="/lol">A link with a /r/subreddit in it</a></p>\n',
 27 | 
 28 |     '[A link with a http://www.url.com in it](/lol)':
 29 |         '<p><a href="/lol">A link with a http://www.url.com in it</a></p>\n',
 30 | 
 31 |     '[Empty Link]()':
 32 |         '<p>[Empty Link]()</p>\n',
 33 | 
 34 |     'http://en.wikipedia.org/wiki/café_racer':
 35 |         '<p><a href="http://en.wikipedia.org/wiki/caf%C3%A9_racer">http://en.wikipedia.org/wiki/café_racer</a></p>\n',
 36 | 
 37 |     '#####################################################hi':
 38 |         '<h6>###############################################hi</h6>\n',
 39 | 
 40 |     '[foo](http://bar\nbar)':
 41 |         '<p><a href="http://bar%0Abar">foo</a></p>\n',
 42 | 
 43 |     '/r/test':
 44 |         '<p><a href="/r/test">/r/test</a></p>\n',
 45 | 
 46 |     'Words words /r/test words':
 47 |         '<p>Words words <a href="/r/test">/r/test</a> words</p>\n',
 48 | 
 49 |     '/r/':
 50 |         '<p>/r/</p>\n',
 51 | 
 52 |     'escaped \\/r/test':
 53 |         '<p>escaped /r/test</p>\n',
 54 | 
 55 |     'ampersands http://www.google.com?test&blah':
 56 |         '<p>ampersands <a href="http://www.google.com?test&amp;blah">http://www.google.com?test&amp;blah</a></p>\n',
 57 | 
 58 |     '[_regular_ link with nesting](/test)':
 59 |         '<p><a href="/test"><em>regular</em> link with nesting</a></p>\n',
 60 | 
 61 |     ' www.a.co?with&test':
 62 |         '<p><a href="http://www.a.co?with&amp;test">www.a.co?with&amp;test</a></p>\n',
 63 | 
 64 |     'Normal^superscript':
 65 |         '<p>Normal<sup>superscript</sup></p>\n',
 66 | 
 67 |     'Escape\\^superscript':
 68 |         '<p>Escape^superscript</p>\n',
 69 | 
 70 |     '~~normal strikethrough~~':
 71 |         '<p><del>normal strikethrough</del></p>\n',
 72 | 
 73 |     '\\~~escaped strikethrough~~':
 74 |         '<p>~~escaped strikethrough~~</p>\n',
 75 | 
 76 |     'anywhere\x03, you':
 77 |         '<p>anywhere, you</p>\n',
 78 | 
 79 |     '[Test](//test)':
 80 |         '<p><a href="//test">Test</a></p>\n',
 81 | 
 82 |     '[Test](//#test)':
 83 |         '<p><a href="//#test">Test</a></p>\n',
 84 | 
 85 |     '[Test](#test)':
 86 |         '<p><a href="#test">Test</a></p>\n',
 87 | 
 88 |     '[Test](git://github.com)':
 89 |         '<p><a href="git://github.com">Test</a></p>\n',
 90 | 
 91 |     '[Speculation](//?)':
 92 |         '<p><a href="//?">Speculation</a></p>\n',
 93 | 
 94 |     '/r/sr_with_underscores':
 95 |         '<p><a href="/r/sr_with_underscores">/r/sr_with_underscores</a></p>\n',
 96 | 
 97 |     '[Test](///#test)':
 98 |         '<p><a href="///#test">Test</a></p>\n',
 99 | 
100 |     '/r/multireddit+test+yay':
101 |         '<p><a href="/r/multireddit+test+yay">/r/multireddit+test+yay</a></p>\n',
102 | 
103 |     '<test>':
104 |         '<p>&lt;test&gt;</p>\n',
105 | 
106 |     'words_with_underscores':
107 |         '<p>words_with_underscores</p>\n',
108 | 
109 |     'words*with*asterisks':
110 |         '<p>words<em>with</em>asterisks</p>\n',
111 | 
112 |     '~test':
113 |         '<p>~test</p>\n',
114 | 
115 |     '/u/test':
116 |         '<p><a href="/u/test">/u/test</a></p>\n',
117 | 
118 |     '/u/test/m/test test':
119 |         '<p><a href="/u/test/m/test">/u/test/m/test</a> test</p>\n',
120 | 
121 |     '/U/nope':
122 |         '<p>/U/nope</p>\n',
123 | 
124 |     '/r/test/m/test test':
125 |         '<p><a href="/r/test/m/test">/r/test/m/test</a> test</p>\n',
126 | 
127 |     '/r/test/w/test test':
128 |         '<p><a href="/r/test/w/test">/r/test/w/test</a> test</p>\n',
129 | 
130 |     '/r/test/comments/test test':
131 |         '<p><a href="/r/test/comments/test">/r/test/comments/test</a> test</p>\n',
132 | 
133 |     '/u/test/commentscommentscommentscommentscommentscommentscomments/test test':
134 |         '<p><a href="/u/test/commentscommentscommentscommentscommentscommentscomments/test">/u/test/commentscommentscommentscommentscommentscommentscomments/test</a> test</p>\n',
135 | 
136 |     'a /u/reddit':
137 |         '<p>a <a href="/u/reddit">/u/reddit</a></p>\n',
138 | 
139 |     'u/reddit':
140 |         '<p><a href="/u/reddit">u/reddit</a></p>\n',
141 | 
142 |     'a u/reddit':
143 |         '<p>a <a href="/u/reddit">u/reddit</a></p>\n',
144 | 
145 |     'a u/reddit/foobaz':
146 |         '<p>a <a href="/u/reddit/foobaz">u/reddit/foobaz</a></p>\n',
147 | 
148 |     'foo:u/reddit':
149 |         '<p>foo:<a href="/u/reddit">u/reddit</a></p>\n',
150 | 
151 |     'fuu/reddit':
152 |         '<p>fuu/reddit</p>\n',
153 | 
154 |     /*# Don't treat unicode punctuation as a word boundary for now
155 |     [encodeUTF8('a。u/reddit')]:
156 |         encodeUTF8('<p>a。u/reddit</p>\n'),*/
157 | 
158 |     '\\/u/me':
159 |         '<p>/u/me</p>\n',
160 | 
161 |     '\\\\/u/me':
162 |         '<p>\\<a href="/u/me">/u/me</a></p>\n',
163 | 
164 |     '\\u/me':
165 |         '<p>\\<a href="/u/me">u/me</a></p>\n',
166 | 
167 |     '\\\\u/me':
168 |         '<p>\\<a href="/u/me">u/me</a></p>\n',
169 | 
170 |     'u\\/me':
171 |         '<p>u/me</p>\n',
172 | 
173 |     '*u/me*':
174 |         '<p><em><a href="/u/me">u/me</a></em></p>\n',
175 | 
176 |     'foo^u/me':
177 |         '<p>foo<sup><a href="/u/me">u/me</a></sup></p>\n',
178 | 
179 |     '*foo*u/me':
180 |         '<p><em>foo</em><a href="/u/me">u/me</a></p>\n',
181 | 
182 |     'u/me':
183 |         '<p><a href="/u/me">u/me</a></p>\n',
184 | 
185 |     '/u/me':
186 |         '<p><a href="/u/me">/u/me</a></p>\n',
187 | 
188 |     'u/m':
189 |         '<p>u/m</p>\n',
190 | 
191 |     '/u/m':
192 |         '<p>/u/m</p>\n',
193 | 
194 |     '/f/oobar':
195 |         '<p>/f/oobar</p>\n',
196 | 
197 |     'f/oobar':
198 |         '<p>f/oobar</p>\n',
199 | 
200 |     '/r/test/commentscommentscommentscommentscommentscommentscomments/test test':
201 |         '<p><a href="/r/test/commentscommentscommentscommentscommentscommentscomments/test">/r/test/commentscommentscommentscommentscommentscommentscomments/test</a> test</p>\n',
202 | 
203 |     'blah \\':
204 |         '<p>blah \\</p>\n',
205 | 
206 |     '/r/whatever: fork':
207 |         '<p><a href="/r/whatever">/r/whatever</a>: fork</p>\n',
208 | 
209 |     '/r/t:timereddit':
210 |         '<p><a href="/r/t:timereddit">/r/t:timereddit</a></p>\n',
211 | 
212 |     '/r/reddit.com':
213 |         '<p><a href="/r/reddit.com">/r/reddit.com</a></p>\n',
214 | 
215 |     '/r/not.cool':
216 |         '<p><a href="/r/not">/r/not</a>.cool</p>\n',
217 | 
218 |     '/r/very+clever+multireddit+reddit.com+t:fork+yay':
219 |         '<p><a href="/r/very+clever+multireddit+reddit.com+t:fork+yay">/r/very+clever+multireddit+reddit.com+t:fork+yay</a></p>\n',
220 | 
221 |     '/r/t:heatdeathoftheuniverse':
222 |         '<p><a href="/r/t:heatdeathoftheuniverse">/r/t:heatdeathoftheuniverse</a></p>\n',
223 | 
224 |     '/r/all-minus-something':
225 |         '<p><a href="/r/all-minus-something">/r/all-minus-something</a></p>\n',
226 | 
227 |     '/r/notall-minus':
228 |         '<p><a href="/r/notall">/r/notall</a>-minus</p>\n',
229 | 
230 |     'a /r/reddit.com':
231 |         '<p>a <a href="/r/reddit.com">/r/reddit.com</a></p>\n',
232 | 
233 |     'a r/reddit.com':
234 |         '<p>a <a href="/r/reddit.com">r/reddit.com</a></p>\n',
235 | 
236 |     'foo:r/reddit.com':
237 |         '<p>foo:<a href="/r/reddit.com">r/reddit.com</a></p>\n',
238 | 
239 |     'foobar/reddit.com':
240 |         '<p>foobar/reddit.com</p>\n',
241 | 
242 |     /*[encodeUTF8('a。r/reddit.com')]:
243 |         encodeUTF8('<p>a。r/reddit.com</p>\n'),*/
244 | 
245 |     '/R/reddit.com':
246 |         '<p>/R/reddit.com</p>\n',
247 | 
248 |     '/r/irc://foo.bar/':
249 |         '<p><a href="/r/irc">/r/irc</a>://foo.bar/</p>\n',
250 | 
251 |     '/r/t:irc//foo.bar/':
252 |         '<p><a href="/r/t:irc//foo">/r/t:irc//foo</a>.bar/</p>\n',
253 | 
254 |     '/r/all-irc://foo.bar/':
255 |         '<p><a href="/r/all-irc">/r/all-irc</a>://foo.bar/</p>\n',
256 | 
257 |     '/r/foo+irc://foo.bar/':
258 |         '<p><a href="/r/foo+irc">/r/foo+irc</a>://foo.bar/</p>\n',
259 | 
260 |     '/r/www.example.com':
261 |         '<p><a href="/r/www">/r/www</a>.example.com</p>\n',
262 | 
263 |     '.http://reddit.com':
264 |         '<p>.<a href="http://reddit.com">http://reddit.com</a></p>\n',
265 | 
266 |     '[r://<http://reddit.com/>](/aa)':
267 |         '<p><a href="/aa">r://<a href="http://reddit.com/">http://reddit.com/</a></a></p>\n',
268 | 
269 |     '/u/http://www.reddit.com/user/reddit':
270 |         '<p><a href="/u/http">/u/http</a>://<a href="http://www.reddit.com/user/reddit">www.reddit.com/user/reddit</a></p>\n',
271 | 
272 |     'www.http://example.com/':
273 |         '<p><a href="http://www.http://example.com/">www.http://example.com/</a></p>\n',
274 | 
275 |     /*['|'.repeat(5) + '\n' + '-|'.repeat(5) + '\n|\n']:
276 |         '<table><thead>\n<tr>\n' + '<th></th>\n'.repeat(4) + '</tr>\n</thead><tbody>\n<tr>\n<td colspan="4" ></td>\n</tr>\n</tbody></table>\n',
277 | 
278 |     ['|'.repeat(2) + '\n' + '-|'.repeat(2) + '\n|\n']:
279 |         '<table><thead>\n<tr>\n' + '<th></th>\n'.repeat(1) + '</tr>\n</thead><tbody>\n<tr>\n<td></td>\n</tr>\n</tbody></table>\n',
280 | 
281 |     ['|'.repeat(65) + '\n' + '-|'.repeat(65) + '\n|\n']:
282 |         '<table><thead>\n<tr>\n' + '<th></th>\n'.repeat(64) + '</tr>\n</thead><tbody>\n<tr>\n<td colspan="64" ></td>\n</tr>\n</tbody></table>\n',
283 | 
284 |     ['|'.repeat(66) + '\n' + '-|'.repeat(66) + '\n|\n']:
285 |         '<p>' + '|'.repeat(66) + '\n' + '-|'.repeat(66) + '\n|' + '</p>\n',*/
286 | 
287 |     '&thetasym;':
288 |         '<p>&thetasym;</p>\n',
289 | 
290 |     '&foobar;':
291 |         '<p>&amp;foobar;</p>\n',
292 | 
293 |     '&nbsp':
294 |         '<p>&amp;nbsp</p>\n',
295 | 
296 |     '&#foobar;':
297 |         '<p>&amp;#foobar;</p>\n',
298 | 
299 |     '&#xfoobar;':
300 |         '<p>&amp;#xfoobar;</p>\n',
301 | 
302 |     '&#9999999999;':
303 |         '<p>&amp;#9999999999;</p>\n',
304 | 
305 |     '&#99;':
306 |         '<p>&#99;</p>\n',
307 | 
308 |     '&#x7E;':
309 |         '<p>&#x7E;</p>\n',
310 | 
311 |     '&#X7E;':
312 |         '<p>&#x7E;</p>\n',
313 | 
314 |     '&frac12;':
315 |         '<p>&frac12;</p>\n',
316 | 
317 |     'aaa&frac12;aaa':
318 |         '<p>aaa&frac12;aaa</p>\n',
319 | 
320 |     '&':
321 |         '<p>&amp;</p>\n',
322 | 
323 |     '&;':
324 |         '<p>&amp;;</p>\n',
325 | 
326 |     '&#;':
327 |         '<p>&amp;#;</p>\n',
328 | 
329 |     '&#;':
330 |         '<p>&amp;#;</p>\n',
331 | 
332 |     '&#x;':
333 |         '<p>&amp;#x;</p>\n',
334 |     '> quotey mcquoteface':
335 |         '<blockquote>\n<p>quotey mcquoteface</p>\n</blockquote>\n',
336 | 
337 |     '> quotey mcquoteface\nnew line of text what happens?':
338 |         '<blockquote>\n<p>quotey mcquoteface\nnew line of text what happens?</p>\n</blockquote>\n',
339 | 
340 |     '> quotey mcquoteface\n\ntwo new lines then text what happens?':
341 |         '<blockquote>\n<p>quotey mcquoteface</p>\n</blockquote>\n\n<p>two new lines then text what happens?</p>\n',
342 | 
343 |     '> quotey mcquoteface\n> more quotey':
344 |         '<blockquote>\n<p>quotey mcquoteface\nmore quotey</p>\n</blockquote>\n',
345 | 
346 |     '> quotey macquoteface\n\n> another quotey':
347 |         '<blockquote>\n<p>quotey macquoteface</p>\n\n<p>another quotey</p>\n</blockquote>\n',
348 | 
349 |     '>! spoily mcspoilerface':
350 |         '<blockquote class="md-spoiler-text">\n<p>spoily mcspoilerface</p>\n</blockquote>\n',
351 | 
352 |     '>! spoily mcspoilerface\nmore spoilage goes here':
353 |         '<blockquote class="md-spoiler-text">\n<p>spoily mcspoilerface\nmore spoilage goes here</p>\n</blockquote>\n',
354 | 
355 |     '>! spoily mcspoilerface > incorrect quote syntax':
356 |         '<blockquote class="md-spoiler-text">\n<p>spoily mcspoilerface &gt; incorrect quote syntax</p>\n</blockquote>\n',
357 | 
358 |     '>! spoily mcspoilerface\n\n':
359 |         '<blockquote class="md-spoiler-text">\n<p>spoily mcspoilerface</p>\n</blockquote>\n',
360 | 
361 |     '>! spoily mcspoilerface\n\nnormal text here':
362 |         '<blockquote class="md-spoiler-text">\n<p>spoily mcspoilerface</p>\n</blockquote>\n\n<p>normal text here</p>\n',
363 | 
364 |     '>! spoily mcspoilerface\n>! blockspoiler continuation':
365 |         '<blockquote class="md-spoiler-text">\n<p>spoily mcspoilerface\nblockspoiler continuation</p>\n</blockquote>\n',
366 | 
367 |     '>! spoily mcspoilerface\n> quotey mcquoteface':
368 |         '<blockquote class="md-spoiler-text">\n<p>spoily mcspoilerface</p>\n\n<blockquote>\n<p>quotey mcquoteface</p>\n</blockquote>\n</blockquote>\n',
369 | 
370 |     '>! spoiler p1\n>!\n>! spoiler p2\n>! spoiler p3':
371 |         '<blockquote class="md-spoiler-text">\n<p>spoiler p1</p>\n\n<p>spoiler p2\nspoiler p3</p>\n</blockquote>\n',
372 | 
373 |     '>>! spoiler p1\n>!\n>! spoiler p2\n>! spoiler p3':
374 |         '<blockquote>\n<blockquote class="md-spoiler-text">\n<p>spoiler p1</p>\n\n<p>spoiler p2\nspoiler p3</p>\n</blockquote>\n</blockquote>\n',
375 | 
376 |     '>>! spoiler p1\n>!\n>! spoiler p2\n\nnew text':
377 |         '<blockquote>\n<blockquote class="md-spoiler-text">\n<p>spoiler p1</p>\n\n<p>spoiler p2</p>\n</blockquote>\n</blockquote>\n\n<p>new text</p>\n',
378 | 
379 |     '>>! spoiler p1\n>!\n>! spoiler p2\n\n>! new blockspoiler':
380 |         '<blockquote>\n<blockquote class="md-spoiler-text">\n<p>spoiler p1</p>\n\n<p>spoiler p2</p>\n</blockquote>\n</blockquote>\n\n<blockquote class="md-spoiler-text">\n<p>new blockspoiler</p>\n</blockquote>\n',
381 | 
382 |     '! this is not a spoiler':
383 |         '<p>! this is not a spoiler</p>\n',
384 | 
385 |     '>!\nTesting':
386 |         '<blockquote class="md-spoiler-text">\n<p>Testing</p>\n</blockquote>\n',
387 | 
388 |     '>!\n\nTesting':
389 |         '<blockquote class="md-spoiler-text">\n</blockquote>\n\n<p>Testing</p>\n',
390 | 
391 |     '>!':
392 |         '<blockquote class="md-spoiler-text">\n</blockquote>\n',
393 |     '>!\n>!':
394 |         '<blockquote class="md-spoiler-text">\n</blockquote>\n',
395 |     '>':
396 |         '<blockquote>\n</blockquote>\n',
397 |     '> some quote goes here\n>':
398 |         '<blockquote>\n<p>some quote goes here</p>\n</blockquote>\n',
399 |     'This is an >!inline spoiler!< sentence.':
400 |         '<p>This is an <span class="md-spoiler-text">inline spoiler</span> sentence.</p>\n',
401 |     '>!Inline spoiler!< starting the sentence':
402 |         '<p><span class="md-spoiler-text">Inline spoiler</span> starting the sentence</p>\n',
403 |     'Inline >!spoiler with *emphasis*!< test':
404 |         '<p>Inline <span class="md-spoiler-text">spoiler with <em>emphasis</em></span> test</p>\n',
405 |     '>! This is an illegal blockspoiler >!with an inline spoiler!<':
406 |         '<p>&gt;! This is an illegal blockspoiler <span class="md-spoiler-text">with an inline spoiler</span></p>\n',
407 |     'This is an >!inline spoiler with some >!additional!< text!<':
408 |         '<p>This is an <span class="md-spoiler-text">inline spoiler with some &gt;!additional</span> text!&lt;</p>\n'
409 | };
410 | 
411 | // Older node versions don't support computed property names
412 | 
413 | function repeat(str, n) {
414 |     return new Array(n + 1).join(str);
415 | }
416 | 
417 | 
418 | cases[encodeUTF8('a。u/reddit')] = encodeUTF8('<p>a。u/reddit</p>\n');
419 | 
420 | cases[encodeUTF8('a。r/reddit.com')] = encodeUTF8('<p>a。r/reddit.com</p>\n');
421 | 
422 | cases[repeat('|', 5) + '\n' + repeat('-|', 5) + '\n|\n'] = '<table><thead>\n<tr>\n' + repeat('<th></th>\n', 4) + '</tr>\n</thead><tbody>\n<tr>\n<td colspan="4" ></td>\n</tr>\n</tbody></table>\n';
423 | cases[repeat('|', 2) + '\n' + repeat('-|', 2) + '\n|\n'] = '<table><thead>\n<tr>\n' + repeat('<th></th>\n', 1) + '</tr>\n</thead><tbody>\n<tr>\n<td></td>\n</tr>\n</tbody></table>\n';
424 | cases[repeat('|', 65) + '\n' + repeat('-|', 65) + '\n|\n'] = '<table><thead>\n<tr>\n' + repeat('<th></th>\n', 64) + '</tr>\n</thead><tbody>\n<tr>\n<td colspan="64" ></td>\n</tr>\n</tbody></table>\n';
425 | cases[repeat('|', 66) + '\n' + repeat('-|', 66) + '\n|\n'] = '<p>' + repeat('|', 66) + '\n' + repeat('-|', 66) + '\n|' + '</p>\n';
426 | 
427 | function* xrange(start, end) {
428 |     if (end == undefined) {
429 |         end = start;
430 |         start = 0;
431 |     }
432 |     for (var i = start; i < end; i++) {
433 |         yield i;
434 |     }
435 | }
436 | 
437 | function* chain(...iterables) {
438 |     for (var iter of iterables) {
439 |         yield* iter;
440 |     }
441 | }
442 | 
443 | // Test that every numeric entity is encoded as
444 | // it should be.
445 | var ILLEGAL_NUMERIC_ENTS = new Set(chain(
446 |     xrange(0, 9),
447 |     xrange(11, 13),
448 |     xrange(14, 32),
449 |     xrange(55296, 57344),
450 |     xrange(65534, 65536)
451 | ));
452 | 
453 | var ent_test_key = '';
454 | var ent_test_val = '';
455 | for (const i of xrange(65550)) {
456 |     var ent_testcase = '&#' + i + ';&#x' + i.toString(16) + ';';
457 |     ent_test_key += ent_testcase;
458 |     if (ILLEGAL_NUMERIC_ENTS.has(i))
459 |         ent_test_val += ent_testcase.replace(/&/g, '&amp;');
460 |     else
461 |         ent_test_val += ent_testcase;
462 | }
463 | 
464 | cases[ent_test_key] = '<p>' + ent_test_val + '</p>\n';
465 | 
466 | var wiki_cases = {
467 |     '<table scope="foo"bar>':
468 |         '<p><table scope="foo"></p>\n',
469 | 
470 |     '<table scope="foo"bar colspan="2">':
471 |         '<p><table scope="foo" colspan="2"></p>\n',
472 | 
473 |     '<table scope="foo" colspan="2"bar>':
474 |         '<p><table scope="foo" colspan="2"></p>\n',
475 | 
476 |     '<table scope="foo">':
477 |         '<p><table scope="foo"></p>\n',
478 | 
479 |     '<table scop="foo">':
480 |         '<p><table></p>\n',
481 | 
482 |     '<table ff= scope="foo">':
483 |         '<p><table scope="foo"></p>\n',
484 | 
485 |     '<table colspan= scope="foo">':
486 |         '<p><table scope="foo"></p>\n',
487 | 
488 |     '<table scope=ff"foo">':
489 |         '<p><table scope="foo"></p>\n',
490 | 
491 |     '<table scope="foo" test="test">':
492 |         '<p><table scope="foo"></p>\n',
493 | 
494 |     '<table scope="foo" longervalue="testing test" scope="test">':
495 |         '<p><table scope="foo" scope="test"></p>\n',
496 | 
497 |     '<table scope=`"foo">':
498 |         '<p><table scope="foo"></p>\n',
499 | 
500 |     '<table scope="foo bar">':
501 |         '<p><table scope="foo bar"></p>\n',
502 | 
503 |     '<table scope=\'foo colspan="foo">':
504 |         '<p><table></p>\n',
505 | 
506 |     '<table scope=\'foo\' colspan="foo">':
507 |         '<p><table scope="foo" colspan="foo"></p>\n',
508 | 
509 |     '<table scope=>':
510 |         '<p><table></p>\n',
511 | 
512 |     '<table scope= colspan="test" scope=>':
513 |         '<p><table colspan="test"></p>\n',
514 | 
515 |     '<table colspan="\'test">':
516 |         '<p><table colspan="&#39;test"></p>\n',
517 | 
518 |     '<table scope="foo" colspan="2">':
519 |         '<p><table scope="foo" colspan="2"></p>\n',
520 | 
521 |     '<table scope="foo" colspan="2" ff="test">':
522 |         '<p><table scope="foo" colspan="2"></p>\n',
523 | 
524 |     '<table ff="test" scope="foo" colspan="2" colspan=>':
525 |         '<p><table scope="foo" colspan="2"></p>\n',
526 | 
527 |     ' <table colspan=\'\'\' a="" \' scope="foo">':
528 |         '<p><table scope="foo"></p>\n',
529 | };
530 | 
531 | var start = Date.now();
532 | 
533 | function runTest(fn, input, expected_output) {
534 |     var output = fn.apply(null, input);
535 |     if (output !== expected_output)
536 |         throw new Error(
537 |             "TEST FAILED:" +
538 |             "\n       input: " + input[0] +
539 |             "\n    expected: " + expected_output +
540 |             "\n      actual: " + output
541 |         );
542 | }
543 | 
544 | for (var input in wiki_cases) {
545 |     runTest(Snudown.markdownWiki, [input], wiki_cases[input]);
546 | }
547 | 
548 | for (var input in cases) {
549 |     runTest(Snudown.markdown, [input], cases[input]);
550 | }
551 | 
552 | [[
553 | 	Snudown.markdown,
554 |     ['/u/test', { nofollow: true, target: '_top' }],
555 |     '<p><a href="/u/test" rel="nofollow" target="_top">/u/test</a></p>\n'
556 | ], [
557 | 	Snudown.markdownWiki,
558 |     ['<table scope="foo">', { nofollow: null, target: null }],
559 |     '<p><table scope="foo"></p>\n',
560 | ], [
561 | 	Snudown.markdown,
562 | 	['<table scope="foo">', { nofollow: null, target: null }],
563 |     '<p>&lt;table scope=&quot;foo&quot;&gt;</p>\n'
564 | ], [
565 | 	Snudown.markdown,
566 |     ['###Test', { enableToc: true, tocIdPrefix: 'prefixed_' }],
567 |     '<div class="toc">\n<ul>\n<li>\n<a href="#prefixed_toc_0">Test</a>\n</li>\n</ul>\n</div>\n\n<h3 id="prefixed_toc_0">Test</h3>\n'
568 | ], [
569 |     // undefined text
570 | 	Snudown.markdown,
571 |     [],
572 |     ''
573 | ], [
574 |     // null text
575 | 	Snudown.markdown,
576 |     [null],
577 |     ''
578 | ], [
579 |     // undefined text
580 | 	Snudown.markdownWiki,
581 |     [],
582 |     '',
583 | ], [
584 |     // null text
585 | 	Snudown.markdownWiki,
586 |     [null],
587 |     '',
588 | ], [
589 |     // all named arguments
590 | 	Snudown.markdown,
591 |     ['###Test\n<table scope="foo">\n/u/test', { nofollow: true, target: '_top', enableToc: true, tocIdPrefix: 'prefixed_' }],
592 |     '<div class="toc">\n<ul>\n<li>\n<a href="#prefixed_toc_0">Test</a>\n</li>\n</ul>\n</div>\n\n<h3 id="prefixed_toc_0">Test</h3>\n\n<p>&lt;table scope=&quot;foo&quot;&gt;\n<a href="/u/test" rel="nofollow" target="_top">/u/test</a></p>\n'
593 | ], [
594 |     // all named arguments
595 | 	Snudown.markdownWiki,
596 |     ['###Test\n<table scope="foo">\n/u/test', { nofollow: true, target: '_top', enableToc: true, tocIdPrefix: 'prefixed_' }],
597 |     '<div class="toc">\n<ul>\n<li>\n<a href="#prefixed_toc_0">Test</a>\n</li>\n</ul>\n</div>\n\n<h3 id="prefixed_toc_0">Test</h3>\n\n<p><table scope="foo">\n<a href="/u/test" rel="nofollow" target="_top">/u/test</a></p>\n',
598 | ]].forEach(function(testArgs) {
599 |     runTest.apply(null, testArgs);
600 | });
601 | 
602 | var elapsed = Date.now() - start;
603 | console.log('Test Passed:', elapsed, 'ms');
604 | 


--------------------------------------------------------------------------------
/html/html.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2009, Natacha Porté
  3 |  * Copyright (c) 2011, Vicent Marti
  4 |  *
  5 |  * Permission to use, copy, modify, and distribute this software for any
  6 |  * purpose with or without fee is hereby granted, provided that the above
  7 |  * copyright notice and this permission notice appear in all copies.
  8 |  *
  9 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 10 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 11 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 12 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 13 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 14 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 15 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 16 |  */
 17 | 
 18 | #include "markdown.h"
 19 | #include "html.h"
 20 | 
 21 | #include <string.h>
 22 | #include <stdlib.h>
 23 | #include <stdio.h>
 24 | #include <ctype.h>
 25 | #include <stdbool.h>
 26 | 
 27 | #include "houdini.h"
 28 | 
 29 | #define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML)
 30 | 
 31 | int
 32 | sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
 33 | {
 34 | 	size_t i;
 35 | 	int closed = 0;
 36 | 
 37 | 	if (tag_size < 3 || tag_data[0] != '<')
 38 | 		return HTML_TAG_NONE;
 39 | 
 40 | 	i = 1;
 41 | 
 42 | 	if (tag_data[i] == '/') {
 43 | 		closed = 1;
 44 | 		i++;
 45 | 	}
 46 | 
 47 | 	for (; i < tag_size; ++i, ++tagname) {
 48 | 		if (*tagname == 0)
 49 | 			break;
 50 | 
 51 | 		if (tag_data[i] != *tagname)
 52 | 			return HTML_TAG_NONE;
 53 | 	}
 54 | 
 55 | 	if (i == tag_size)
 56 | 		return HTML_TAG_NONE;
 57 | 
 58 | 	if (isspace(tag_data[i]) || tag_data[i] == '>')
 59 | 		return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
 60 | 
 61 | 	return HTML_TAG_NONE;
 62 | }
 63 | 
 64 | static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length)
 65 | {
 66 | 	houdini_escape_html0(ob, source, length, 0);
 67 | }
 68 | 
 69 | static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length)
 70 | {
 71 | 	houdini_escape_href(ob, source, length);
 72 | }
 73 | 
 74 | /********************
 75 |  * GENERIC RENDERER *
 76 |  ********************/
 77 | static int
 78 | rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque)
 79 | {
 80 | 	struct html_renderopt *options = opaque;
 81 | 	uint8_t offset = 0;
 82 | 
 83 | 	if (!link || !link->size)
 84 | 		return 0;
 85 | 
 86 | 	if ((options->flags & HTML_SAFELINK) != 0 &&
 87 | 		!sd_autolink_issafe(link->data, link->size) &&
 88 | 		type != MKDA_EMAIL)
 89 | 		return 0;
 90 | 
 91 | 	BUFPUTSL(ob, "<a href=\"");
 92 | 	if (type == MKDA_EMAIL)
 93 | 		BUFPUTSL(ob, "mailto:");
 94 | 	escape_href(ob, link->data + offset, link->size - offset);
 95 | 
 96 | 	if (options->link_attributes) {
 97 | 		bufputc(ob, '\"');
 98 | 		options->link_attributes(ob, link, opaque);
 99 | 		bufputc(ob, '>');
100 | 	} else {
101 | 		BUFPUTSL(ob, "\">");
102 | 	}
103 | 
104 | 	/*
105 | 	 * Pretty printing: if we get an email address as
106 | 	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
107 | 	 * want to print the `mailto:` prefix
108 | 	 */
109 | 	if (bufprefix(link, "mailto:") == 0) {
110 | 		escape_html(ob, link->data + 7, link->size - 7);
111 | 	} else {
112 | 		escape_html(ob, link->data, link->size);
113 | 	}
114 | 
115 | 	BUFPUTSL(ob, "</a>");
116 | 
117 | 	return 1;
118 | }
119 | 
120 | static void
121 | rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque)
122 | {
123 | 	if (ob->size) bufputc(ob, '\n');
124 | 
125 | 	if (lang && lang->size) {
126 | 		size_t i, cls;
127 | 		BUFPUTSL(ob, "<pre><code class=\"");
128 | 
129 | 		for (i = 0, cls = 0; i < lang->size; ++i, ++cls) {
130 | 			while (i < lang->size && isspace(lang->data[i]))
131 | 				i++;
132 | 
133 | 			if (i < lang->size) {
134 | 				size_t org = i;
135 | 				while (i < lang->size && !isspace(lang->data[i]))
136 | 					i++;
137 | 
138 | 				if (lang->data[org] == '.')
139 | 					org++;
140 | 
141 | 				if (cls) bufputc(ob, ' ');
142 | 				escape_html(ob, lang->data + org, i - org);
143 | 			}
144 | 		}
145 | 
146 | 		BUFPUTSL(ob, "\">");
147 | 	} else
148 | 		BUFPUTSL(ob, "<pre><code>");
149 | 
150 | 	if (text)
151 | 		escape_html(ob, text->data, text->size);
152 | 
153 | 	BUFPUTSL(ob, "</code></pre>\n");
154 | }
155 | 
156 | static void
157 | rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque)
158 | {
159 | 	if (ob->size) bufputc(ob, '\n');
160 | 	BUFPUTSL(ob, "<blockquote>\n");
161 | 	if (text) bufput(ob, text->data, text->size);
162 | 	BUFPUTSL(ob, "</blockquote>\n");
163 | }
164 | 
165 | static void
166 | rndr_blockspoiler(struct buf *ob, const struct buf *text, void *opaque)
167 | {
168 | 	if (ob->size) bufputc(ob, '\n');
169 | 	BUFPUTSL(ob, "<blockquote class=\"md-spoiler-text\">\n");
170 | 	if (text) bufput(ob, text->data, text->size);
171 | 	BUFPUTSL(ob, "</blockquote>\n");
172 | }
173 | 
174 | static int
175 | rndr_codespan(struct buf *ob, const struct buf *text, void *opaque)
176 | {
177 | 	BUFPUTSL(ob, "<code>");
178 | 	if (text) escape_html(ob, text->data, text->size);
179 | 	BUFPUTSL(ob, "</code>");
180 | 	return 1;
181 | }
182 | 
183 | static int
184 | rndr_spoilerspan(struct buf *ob, const struct buf *text, void *opaque)
185 | {
186 |     if (!text || !text->size)
187 |         return 0;
188 | 
189 |     BUFPUTSL(ob, "<span class=\"md-spoiler-text\">");
190 |     bufput(ob, text->data, text->size);
191 |     BUFPUTSL(ob, "</span>");
192 | 
193 |     return 1;
194 | }
195 | 
196 | static int
197 | rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque)
198 | {
199 | 	if (!text || !text->size)
200 | 		return 0;
201 | 
202 | 	BUFPUTSL(ob, "<del>");
203 | 	bufput(ob, text->data, text->size);
204 | 	BUFPUTSL(ob, "</del>");
205 | 	return 1;
206 | }
207 | 
208 | static int
209 | rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque)
210 | {
211 | 	if (!text || !text->size)
212 | 		return 0;
213 | 
214 | 	BUFPUTSL(ob, "<strong>");
215 | 	bufput(ob, text->data, text->size);
216 | 	BUFPUTSL(ob, "</strong>");
217 | 
218 | 	return 1;
219 | }
220 | 
221 | static int
222 | rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque)
223 | {
224 | 	if (!text || !text->size) return 0;
225 | 	BUFPUTSL(ob, "<em>");
226 | 	if (text) bufput(ob, text->data, text->size);
227 | 	BUFPUTSL(ob, "</em>");
228 | 	return 1;
229 | }
230 | 
231 | static int
232 | rndr_linebreak(struct buf *ob, void *opaque)
233 | {
234 | 	struct html_renderopt *options = opaque;
235 | 	bufputs(ob, USE_XHTML(options) ? "<br/>\n" : "<br>\n");
236 | 	return 1;
237 | }
238 | 
239 | static void
240 | rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque)
241 | {
242 | 	struct html_renderopt *options = opaque;
243 | 
244 | 	if (ob->size)
245 | 		bufputc(ob, '\n');
246 | 
247 | 	if (options->flags & HTML_TOC) {
248 | 		BUFPUTSL(ob, "<h");
249 | 		bufputi(ob, level);
250 | 		BUFPUTSL(ob, " id=\"");
251 | 		if (options->toc_id_prefix) {
252 | 			bufputs(ob, options->toc_id_prefix);
253 | 		}
254 | 		BUFPUTSL(ob, "toc_");
255 | 		bufputi(ob, options->toc_data.header_count++);
256 | 		BUFPUTSL(ob, "\">");
257 | 	} else {
258 | 		BUFPUTSL(ob, "<h");
259 | 		bufputi(ob, level);
260 | 		BUFPUTSL(ob, ">");
261 | 	}
262 | 
263 | 	if (text) bufput(ob, text->data, text->size);
264 | 	BUFPUTSL(ob, "</h");
265 | 	bufputi(ob, level);
266 | 	BUFPUTSL(ob, ">\n");
267 | }
268 | 
269 | static int
270 | rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
271 | {
272 | 	struct html_renderopt *options = opaque;
273 | 
274 | 	if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size))
275 | 		return 0;
276 | 
277 | 	BUFPUTSL(ob, "<a href=\"");
278 | 
279 | 	if (link && link->size)
280 | 		escape_href(ob, link->data, link->size);
281 | 
282 | 	if (title && title->size) {
283 | 		BUFPUTSL(ob, "\" title=\"");
284 | 		escape_html(ob, title->data, title->size);
285 | 	}
286 | 
287 | 	if (options->link_attributes) {
288 | 		bufputc(ob, '\"');
289 | 		options->link_attributes(ob, link, opaque);
290 | 		bufputc(ob, '>');
291 | 	} else {
292 | 		BUFPUTSL(ob, "\">");
293 | 	}
294 | 
295 | 	if (content && content->size) bufput(ob, content->data, content->size);
296 | 	BUFPUTSL(ob, "</a>");
297 | 	return 1;
298 | }
299 | 
300 | static void
301 | rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque)
302 | {
303 | 	if (ob->size) bufputc(ob, '\n');
304 | 	bufput(ob, flags & MKD_LIST_ORDERED ? "<ol>\n" : "<ul>\n", 5);
305 | 	if (text) bufput(ob, text->data, text->size);
306 | 	bufput(ob, flags & MKD_LIST_ORDERED ? "</ol>\n" : "</ul>\n", 6);
307 | }
308 | 
309 | static void
310 | rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque)
311 | {
312 | 	BUFPUTSL(ob, "<li>");
313 | 	if (text) {
314 | 		size_t size = text->size;
315 | 		while (size && text->data[size - 1] == '\n')
316 | 			size--;
317 | 
318 | 		bufput(ob, text->data, size);
319 | 	}
320 | 	BUFPUTSL(ob, "</li>\n");
321 | }
322 | 
323 | static void
324 | rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque)
325 | {
326 | 	struct html_renderopt *options = opaque;
327 | 	size_t i = 0;
328 | 
329 | 	if (ob->size) bufputc(ob, '\n');
330 | 
331 | 	if (!text || !text->size)
332 | 		return;
333 | 
334 | 	while (i < text->size && isspace(text->data[i])) i++;
335 | 
336 | 	if (i == text->size)
337 | 		return;
338 | 
339 | 	BUFPUTSL(ob, "<p>");
340 | 	if (options->flags & HTML_HARD_WRAP) {
341 | 		size_t org;
342 | 		while (i < text->size) {
343 | 			org = i;
344 | 			while (i < text->size && text->data[i] != '\n')
345 | 				i++;
346 | 
347 | 			if (i > org)
348 | 				bufput(ob, text->data + org, i - org);
349 | 
350 | 			/*
351 | 			 * do not insert a line break if this newline
352 | 			 * is the last character on the paragraph
353 | 			 */
354 | 			if (i >= text->size - 1)
355 | 				break;
356 | 
357 | 			rndr_linebreak(ob, opaque);
358 | 			i++;
359 | 		}
360 | 	} else {
361 | 		bufput(ob, &text->data[i], text->size - i);
362 | 	}
363 | 	BUFPUTSL(ob, "</p>\n");
364 | }
365 | 
366 | static void
367 | rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque)
368 | {
369 | 	size_t org, sz;
370 | 	if (!text) return;
371 | 	sz = text->size;
372 | 	while (sz > 0 && text->data[sz - 1] == '\n') sz--;
373 | 	org = 0;
374 | 	while (org < sz && text->data[org] == '\n') org++;
375 | 	if (org >= sz) return;
376 | 	if (ob->size) bufputc(ob, '\n');
377 | 	bufput(ob, text->data + org, sz - org);
378 | 	bufputc(ob, '\n');
379 | }
380 | 
381 | static int
382 | rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque)
383 | {
384 | 	if (!text || !text->size) return 0;
385 | 	BUFPUTSL(ob, "<strong><em>");
386 | 	bufput(ob, text->data, text->size);
387 | 	BUFPUTSL(ob, "</em></strong>");
388 | 	return 1;
389 | }
390 | 
391 | static void
392 | rndr_hrule(struct buf *ob, void *opaque)
393 | {
394 | 	struct html_renderopt *options = opaque;
395 | 	if (ob->size) bufputc(ob, '\n');
396 | 	bufputs(ob, USE_XHTML(options) ? "<hr/>\n" : "<hr>\n");
397 | }
398 | 
399 | static int
400 | rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque)
401 | {
402 | 	struct html_renderopt *options = opaque;
403 | 	if (!link || !link->size) return 0;
404 | 
405 | 	BUFPUTSL(ob, "<img src=\"");
406 | 	escape_href(ob, link->data, link->size);
407 | 	BUFPUTSL(ob, "\" alt=\"");
408 | 
409 | 	if (alt && alt->size)
410 | 		escape_html(ob, alt->data, alt->size);
411 | 
412 | 	if (title && title->size) {
413 | 		BUFPUTSL(ob, "\" title=\"");
414 | 		escape_html(ob, title->data, title->size); }
415 | 
416 | 	bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">");
417 | 	return 1;
418 | }
419 | 
420 | static void
421 | rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque,
422 |              char* tagname, char** whitelist, int tagtype)
423 | {
424 |     size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0;
425 |     struct buf *attr;
426 |     struct buf *value;
427 |     char c;
428 | 
429 |     bufputc(ob, '<');
430 | 
431 |     if(tagtype == HTML_TAG_CLOSE) {
432 |         bufputc(ob, '/');
433 |         bufputs(ob, tagname);
434 |         bufputc(ob, '>');
435 |         return;
436 |     }
437 | 
438 |     bufputs(ob, tagname);
439 |     i = 1 + strlen(tagname);
440 | 
441 |     attr = bufnew(16);
442 |     value = bufnew(16);
443 | 
444 |     for(; i < text->size && !done; i++) {
445 |         c = text->data[i];
446 |         done = 0;
447 |         reset = 0;
448 |         done_attr = 0;
449 | 
450 |         switch(c) {
451 |             case '>':
452 |                 done = 1;
453 |                 break;
454 |             case '\'':
455 |             case '"':
456 |                 if(!seen_equals) {
457 |                     reset = 1;
458 |                 } else if(!in_str) {
459 |                     in_str = c;
460 |                 } else if(in_str == c) {
461 |                     in_str = 0;
462 |                     done_attr = 1;
463 |                 } else {
464 |                     bufputc(value, c);
465 |                 }
466 |                 break;
467 |             case ' ':
468 |                 if (in_str) {
469 |                     bufputc(value, ' ');
470 |                 } else {
471 |                     reset = 1;
472 |                 }
473 |                 break;
474 |             case '=':
475 |                 if(seen_equals) {
476 |                     reset = 1;
477 |                     break;
478 |                 }
479 |                 seen_equals = 1;
480 |                 break;
481 |             default:
482 |                 if(seen_equals && in_str || !seen_equals) {
483 |                     bufputc(seen_equals ? value : attr, c);
484 |                 }
485 |                 break;
486 |         }
487 | 
488 |         if(done_attr) {
489 |             int valid = 0;
490 |             for(z = 0; whitelist[z]; z++) {
491 |                 if(strlen(whitelist[z]) != attr->size) {
492 |                     continue;
493 |                 }
494 |                 for(x = 0; x < attr->size; x++) {
495 |                     if(tolower(whitelist[z][x]) != tolower(attr->data[x])) {
496 |                         break;
497 |                     }
498 |                 }
499 |                 if(x == attr->size) {
500 |                     valid = 1;
501 |                     break;
502 |                 }
503 |             }
504 |             if(valid && value->size && attr->size) {
505 |                 bufputc(ob, ' ');
506 |                 escape_html(ob, attr->data, attr->size);
507 |                 bufputs(ob, "=\"");
508 |                 escape_html(ob, value->data, value->size);
509 |                 bufputc(ob, '"');
510 |             }
511 |             reset = 1;
512 |         }
513 | 
514 |         if(reset) {
515 |             seen_equals = 0;
516 |             in_str = 0;
517 |             bufreset(attr);
518 |             bufreset(value);
519 |         }
520 |     }
521 | 
522 |     bufrelease(attr);
523 |     bufrelease(value);
524 | 
525 |     bufputc(ob, '>');
526 | }
527 | 
528 | static int
529 | rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque)
530 | {
531 |     struct html_renderopt *options = opaque;
532 |     char** whitelist = options->html_element_whitelist;
533 |     int i, tagtype;
534 | 
535 |     /* Items on the whitelist ignore all other flags and just output */
536 |     if (((options->flags & HTML_ALLOW_ELEMENT_WHITELIST) != 0) && whitelist) {
537 |         for (i = 0; whitelist[i]; i++) {
538 |             tagtype = sdhtml_is_tag(text->data, text->size, whitelist[i]);
539 |             if (tagtype != HTML_TAG_NONE) {
540 |                 rndr_html_tag(ob, text, opaque,
541 |                               whitelist[i],
542 |                               options->html_attr_whitelist,
543 |                               tagtype);
544 |                 return 1;
545 |             }
546 |         }
547 |     }
548 | 
549 |     /* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES
550 |     * It doens't see if there are any valid tags, just escape all of them. */
551 |     if((options->flags & HTML_ESCAPE) != 0) {
552 |         escape_html(ob, text->data, text->size);
553 |         return 1;
554 |     }
555 | 
556 |     if ((options->flags & HTML_SKIP_HTML) != 0)
557 |         return 1;
558 | 
559 |     if ((options->flags & HTML_SKIP_STYLE) != 0 &&
560 |         sdhtml_is_tag(text->data, text->size, "style"))
561 |         return 1;
562 | 
563 |     if ((options->flags & HTML_SKIP_LINKS) != 0 &&
564 |         sdhtml_is_tag(text->data, text->size, "a"))
565 |         return 1;
566 | 
567 |     if ((options->flags & HTML_SKIP_IMAGES) != 0 &&
568 |         sdhtml_is_tag(text->data, text->size, "img"))
569 |         return 1;
570 | 
571 |     bufput(ob, text->data, text->size);
572 |     return 1;
573 | }
574 | 
575 | static void
576 | rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque)
577 | {
578 | 	if (ob->size) bufputc(ob, '\n');
579 | 	BUFPUTSL(ob, "<table><thead>\n");
580 | 	if (header)
581 | 		bufput(ob, header->data, header->size);
582 | 	BUFPUTSL(ob, "</thead><tbody>\n");
583 | 	if (body)
584 | 		bufput(ob, body->data, body->size);
585 | 	BUFPUTSL(ob, "</tbody></table>\n");
586 | }
587 | 
588 | static void
589 | rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque)
590 | {
591 | 	BUFPUTSL(ob, "<tr>\n");
592 | 	if (text)
593 | 		bufput(ob, text->data, text->size);
594 | 	BUFPUTSL(ob, "</tr>\n");
595 | }
596 | 
597 | static void
598 | rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque, int col_span)
599 | {
600 | 	if (flags & MKD_TABLE_HEADER) {
601 | 		BUFPUTSL(ob, "<th");
602 | 	} else {
603 | 		BUFPUTSL(ob, "<td");
604 | 	}
605 | 
606 | 	if (col_span > 1) {
607 | 		BUFPUTSL(ob, " colspan=\"");
608 | 		bufputi(ob, col_span);
609 | 		BUFPUTSL(ob, "\" ");
610 | 	}
611 | 
612 | 	switch (flags & MKD_TABLE_ALIGNMASK) {
613 | 	case MKD_TABLE_ALIGN_CENTER:
614 | 		BUFPUTSL(ob, " align=\"center\">");
615 | 		break;
616 | 
617 | 	case MKD_TABLE_ALIGN_L:
618 | 		BUFPUTSL(ob, " align=\"left\">");
619 | 		break;
620 | 
621 | 	case MKD_TABLE_ALIGN_R:
622 | 		BUFPUTSL(ob, " align=\"right\">");
623 | 		break;
624 | 
625 | 	default:
626 | 		BUFPUTSL(ob, ">");
627 | 	}
628 | 
629 | 	if (text)
630 | 		bufput(ob, text->data, text->size);
631 | 
632 | 	if (flags & MKD_TABLE_HEADER) {
633 | 		BUFPUTSL(ob, "</th>\n");
634 | 	} else {
635 | 		BUFPUTSL(ob, "</td>\n");
636 | 	}
637 | }
638 | 
639 | static int
640 | rndr_superscript(struct buf *ob, const struct buf *text, void *opaque)
641 | {
642 | 	if (!text || !text->size) return 0;
643 | 	BUFPUTSL(ob, "<sup>");
644 | 	bufput(ob, text->data, text->size);
645 | 	BUFPUTSL(ob, "</sup>");
646 | 	return 1;
647 | }
648 | 
649 | static void
650 | rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque)
651 | {
652 | 	if (text)
653 | 		escape_html(ob, text->data, text->size);
654 | }
655 | 
656 | static void
657 | toc_header(struct buf *ob, const struct buf *text, int level, void *opaque)
658 | {
659 | 	struct html_renderopt *options = opaque;
660 | 
661 | 	/* set the level offset if this is the first header
662 | 	 * we're parsing for the document */
663 | 	if (options->toc_data.current_level == 0) {
664 | 		BUFPUTSL(ob, "<div class=\"toc\">\n");
665 | 		options->toc_data.level_offset = level - 1;
666 | 	}
667 | 	level -= options->toc_data.level_offset;
668 | 
669 | 	if (level > options->toc_data.current_level) {
670 | 		while (level > options->toc_data.current_level) {
671 | 			BUFPUTSL(ob, "<ul>\n<li>\n");
672 | 			options->toc_data.current_level++;
673 | 		}
674 | 	} else if (level < options->toc_data.current_level) {
675 | 		BUFPUTSL(ob, "</li>\n");
676 | 		while (level < options->toc_data.current_level) {
677 | 			BUFPUTSL(ob, "</ul>\n</li>\n");
678 | 			options->toc_data.current_level--;
679 | 		}
680 | 		BUFPUTSL(ob,"<li>\n");
681 | 	} else {
682 | 		BUFPUTSL(ob,"</li>\n<li>\n");
683 | 	}
684 | 
685 | 	BUFPUTSL(ob, "<a href=\"#");
686 | 
687 | 	if (options->toc_id_prefix) {
688 | 		bufputs(ob, options->toc_id_prefix);
689 | 	}
690 | 
691 | 	BUFPUTSL(ob, "toc_");
692 | 	bufputi(ob, options->toc_data.header_count++);
693 | 	BUFPUTSL(ob, "\">");
694 | 	if (text)
695 | 		escape_html(ob, text->data, text->size);
696 | 	BUFPUTSL(ob, "</a>\n");
697 | }
698 | 
699 | static int
700 | toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
701 | {
702 | 	if (content && content->size)
703 | 		bufput(ob, content->data, content->size);
704 | 	return 1;
705 | }
706 | 
707 | static void
708 | reset_toc(struct buf *ob, void *opaque)
709 | {
710 | 	struct html_renderopt *options = opaque;
711 | 
712 | 	memset(&(options->toc_data), 0, sizeof(options->toc_data));
713 | }
714 | 
715 | static void
716 | toc_finalize(struct buf *ob, void *opaque)
717 | {
718 | 	struct html_renderopt *options = opaque;
719 | 	bool has_toc = false;
720 | 	while (options->toc_data.current_level > 0) {
721 | 		BUFPUTSL(ob, "</li>\n</ul>\n");
722 | 		options->toc_data.current_level--;
723 | 		has_toc = true;
724 | 	}
725 | 	if(has_toc) {
726 | 		BUFPUTSL(ob, "</div>\n");
727 | 	}
728 | 	reset_toc(ob, opaque);
729 | }
730 | 
731 | void
732 | sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options)
733 | {
734 | 	static const struct sd_callbacks cb_default = {
735 | 		NULL,
736 | 		NULL,
737 | 		NULL,
738 | 		NULL,
739 | 		toc_header,
740 | 		NULL,
741 | 		NULL,
742 | 		NULL,
743 | 		NULL,
744 | 		NULL,
745 | 		NULL,
746 | 		NULL,
747 | 
748 | 		NULL,
749 | 		rndr_codespan,
750 | 		rndr_spoilerspan,
751 | 		rndr_double_emphasis,
752 | 		rndr_emphasis,
753 | 		NULL,
754 | 		NULL,
755 | 		toc_link,
756 | 		NULL,
757 | 		rndr_triple_emphasis,
758 | 		rndr_strikethrough,
759 | 		rndr_superscript,
760 | 
761 | 		NULL,
762 | 		NULL,
763 | 
764 | 		NULL,
765 | 		toc_finalize,
766 | 	};
767 | 
768 | 	memset(options, 0x0, sizeof(struct html_renderopt));
769 | 	options->flags = HTML_TOC | HTML_SKIP_HTML;
770 | 
771 | 	memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
772 | }
773 | 
774 | void
775 | sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags)
776 | {
777 | 	static const struct sd_callbacks cb_default = {
778 | 		rndr_blockcode,
779 | 		rndr_blockquote,
780 | 		rndr_blockspoiler,
781 | 		rndr_raw_block,
782 | 		rndr_header,
783 | 		rndr_hrule,
784 | 		rndr_list,
785 | 		rndr_listitem,
786 | 		rndr_paragraph,
787 | 		rndr_table,
788 | 		rndr_tablerow,
789 | 		rndr_tablecell,
790 | 
791 | 		rndr_autolink,
792 | 		rndr_codespan,
793 | 		rndr_spoilerspan,
794 | 		rndr_double_emphasis,
795 | 		rndr_emphasis,
796 | 		rndr_image,
797 | 		rndr_linebreak,
798 | 		rndr_link,
799 | 		rndr_raw_html,
800 | 		rndr_triple_emphasis,
801 | 		rndr_strikethrough,
802 | 		rndr_superscript,
803 | 
804 | 		NULL,
805 | 		rndr_normal_text,
806 | 
807 | 		NULL,
808 | 		reset_toc,
809 | 	};
810 | 
811 | 	/* Prepare the options pointer */
812 | 	memset(options, 0x0, sizeof(struct html_renderopt));
813 | 	options->flags = render_flags;
814 | 
815 | 	/* Prepare the callbacks */
816 | 	memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
817 | 
818 | 	if (render_flags & HTML_SKIP_IMAGES)
819 | 		callbacks->image = NULL;
820 | 
821 | 	if (render_flags & HTML_SKIP_LINKS) {
822 | 		callbacks->link = NULL;
823 | 		callbacks->autolink = NULL;
824 | 	}
825 | 
826 | 	if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE)
827 | 		callbacks->blockhtml = NULL;
828 | }
829 | 


--------------------------------------------------------------------------------
/src/markdown.c:
--------------------------------------------------------------------------------
   1 | /* markdown.c - generic markdown parser */
   2 | 
   3 | /*
   4 |  * Copyright (c) 2009, Natacha Porté
   5 |  * Copyright (c) 2011, Vicent Marti
   6 |  *
   7 |  * Permission to use, copy, modify, and distribute this software for any
   8 |  * purpose with or without fee is hereby granted, provided that the above
   9 |  * copyright notice and this permission notice appear in all copies.
  10 |  *
  11 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18 |  */
  19 | 
  20 | #include "markdown.h"
  21 | #include "stack.h"
  22 | 
  23 | #include <assert.h>
  24 | #include <string.h>
  25 | #include <ctype.h>
  26 | #include <stdio.h>
  27 | 
  28 | #if defined(_WIN32)
  29 | #define strncasecmp	_strnicmp
  30 | #endif
  31 | 
  32 | #define REF_TABLE_SIZE 8
  33 | 
  34 | #define BUFFER_BLOCK 0
  35 | #define BUFFER_SPAN 1
  36 | 
  37 | #define MKD_LI_END 8	/* internal list flag */
  38 | 
  39 | #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  40 | #define GPERF_DOWNCASE 1
  41 | #define GPERF_CASE_STRNCMP 1
  42 | #include "html_blocks.h"
  43 | #include "html_entities.h"
  44 | 
  45 | /***************
  46 |  * LOCAL TYPES *
  47 |  ***************/
  48 | 
  49 | /* link_ref: reference to a link */
  50 | struct link_ref {
  51 | 	unsigned int id;
  52 | 
  53 | 	struct buf *link;
  54 | 	struct buf *title;
  55 | 
  56 | 	struct link_ref *next;
  57 | };
  58 | 
  59 | /* char_trigger: function pointer to render active chars */
  60 | /*   returns the number of chars taken care of */
  61 | /*   data is the pointer of the beginning of the span */
  62 | /*   offset is the number of valid chars before data */
  63 | struct sd_markdown;
  64 | typedef size_t
  65 | (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  66 | 
  67 | static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  68 | static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  69 | static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  70 | static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  71 | static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  72 | static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  73 | static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  74 | static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  75 | static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  76 | static size_t char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  77 | static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  78 | static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size);
  79 | 
  80 | enum markdown_char_t {
  81 | 	MD_CHAR_NONE = 0,
  82 | 	MD_CHAR_EMPHASIS,
  83 | 	MD_CHAR_CODESPAN,
  84 | 	MD_CHAR_LINEBREAK,
  85 | 	MD_CHAR_LINK,
  86 | 	MD_CHAR_LANGLE,
  87 | 	MD_CHAR_ESCAPE,
  88 | 	MD_CHAR_ENTITITY,
  89 | 	MD_CHAR_AUTOLINK_URL,
  90 | 	MD_CHAR_AUTOLINK_EMAIL,
  91 | 	MD_CHAR_AUTOLINK_WWW,
  92 | 	MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME,
  93 | 	MD_CHAR_SUPERSCRIPT,
  94 | };
  95 | 
  96 | static char_trigger markdown_char_ptrs[] = {
  97 | 	NULL,
  98 | 	&char_emphasis,
  99 | 	&char_codespan,
 100 | 	&char_linebreak,
 101 | 	&char_link,
 102 | 	&char_langle_tag,
 103 | 	&char_escape,
 104 | 	&char_entity,
 105 | 	&char_autolink_url,
 106 | 	&char_autolink_email,
 107 | 	&char_autolink_www,
 108 | 	&char_autolink_subreddit_or_username,
 109 | 	&char_superscript,
 110 | };
 111 | 
 112 | /* render • structure containing one particular render */
 113 | struct sd_markdown {
 114 | 	struct sd_callbacks	cb;
 115 | 	void *opaque;
 116 | 
 117 | 	struct link_ref *refs[REF_TABLE_SIZE];
 118 | 	uint8_t active_char[256];
 119 | 	struct stack work_bufs[2];
 120 | 	unsigned int ext_flags;
 121 | 	size_t max_nesting;
 122 | 	size_t max_table_cols;
 123 | 	int in_link_body;
 124 | };
 125 | 
 126 | /***************************
 127 |  * HELPER FUNCTIONS *
 128 |  ***************************/
 129 | 
 130 | static inline struct buf *
 131 | rndr_newbuf(struct sd_markdown *rndr, int type)
 132 | {
 133 | 	static const size_t buf_size[2] = {256, 64};
 134 | 	struct buf *work = NULL;
 135 | 	struct stack *pool = &rndr->work_bufs[type];
 136 | 
 137 | 	if (pool->size < pool->asize &&
 138 | 		pool->item[pool->size] != NULL) {
 139 | 		work = pool->item[pool->size++];
 140 | 		work->size = 0;
 141 | 	} else {
 142 | 		work = bufnew(buf_size[type]);
 143 | 		stack_push(pool, work);
 144 | 	}
 145 | 
 146 | 	return work;
 147 | }
 148 | 
 149 | static inline void
 150 | rndr_popbuf(struct sd_markdown *rndr, int type)
 151 | {
 152 | 	rndr->work_bufs[type].size--;
 153 | }
 154 | 
 155 | static void
 156 | unscape_text(struct buf *ob, struct buf *src)
 157 | {
 158 | 	size_t i = 0, org;
 159 | 	while (i < src->size) {
 160 | 		org = i;
 161 | 		while (i < src->size && src->data[i] != '\\')
 162 | 			i++;
 163 | 
 164 | 		if (i > org)
 165 | 			bufput(ob, src->data + org, i - org);
 166 | 
 167 | 		if (i + 1 >= src->size)
 168 | 			break;
 169 | 
 170 | 		bufputc(ob, src->data[i + 1]);
 171 | 		i += 2;
 172 | 	}
 173 | }
 174 | 
 175 | static unsigned int
 176 | hash_link_ref(const uint8_t *link_ref, size_t length)
 177 | {
 178 | 	size_t i;
 179 | 	unsigned int hash = 0;
 180 | 
 181 | 	for (i = 0; i < length; ++i)
 182 | 		hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
 183 | 
 184 | 	return hash;
 185 | }
 186 | 
 187 | static struct link_ref *
 188 | add_link_ref(
 189 | 	struct link_ref **references,
 190 | 	const uint8_t *name, size_t name_size)
 191 | {
 192 | 	struct link_ref *ref = calloc(1, sizeof(struct link_ref));
 193 | 
 194 | 	if (!ref)
 195 | 		return NULL;
 196 | 
 197 | 	ref->id = hash_link_ref(name, name_size);
 198 | 	ref->next = references[ref->id % REF_TABLE_SIZE];
 199 | 
 200 | 	references[ref->id % REF_TABLE_SIZE] = ref;
 201 | 	return ref;
 202 | }
 203 | 
 204 | static struct link_ref *
 205 | find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
 206 | {
 207 | 	unsigned int hash = hash_link_ref(name, length);
 208 | 	struct link_ref *ref = NULL;
 209 | 
 210 | 	ref = references[hash % REF_TABLE_SIZE];
 211 | 
 212 | 	while (ref != NULL) {
 213 | 		if (ref->id == hash)
 214 | 			return ref;
 215 | 
 216 | 		ref = ref->next;
 217 | 	}
 218 | 
 219 | 	return NULL;
 220 | }
 221 | 
 222 | static void
 223 | free_link_refs(struct link_ref **references)
 224 | {
 225 | 	size_t i;
 226 | 
 227 | 	for (i = 0; i < REF_TABLE_SIZE; ++i) {
 228 | 		struct link_ref *r = references[i];
 229 | 		struct link_ref *next;
 230 | 
 231 | 		while (r) {
 232 | 			next = r->next;
 233 | 			bufrelease(r->link);
 234 | 			bufrelease(r->title);
 235 | 			free(r);
 236 | 			r = next;
 237 | 		}
 238 | 	}
 239 | }
 240 | 
 241 | /*
 242 |  * Check whether a char is a Markdown space.
 243 | 
 244 |  * Right now we only consider spaces the actual
 245 |  * space and a newline: tabs and carriage returns
 246 |  * are filtered out during the preprocessing phase.
 247 |  *
 248 |  * If we wanted to actually be UTF-8 compliant, we
 249 |  * should instead extract an Unicode codepoint from
 250 |  * this character and check for space properties.
 251 |  */
 252 | static inline int
 253 | _isspace(int c)
 254 | {
 255 | 	return c == ' ' || c == '\n';
 256 | }
 257 | 
 258 | /****************************
 259 |  * INLINE PARSING FUNCTIONS *
 260 |  ****************************/
 261 | 
 262 | /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
 263 | /* this is less strict than the original markdown e-mail address matching */
 264 | static size_t
 265 | is_mail_autolink(uint8_t *data, size_t size)
 266 | {
 267 | 	size_t i = 0, nb = 0;
 268 | 
 269 | 	/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
 270 | 	for (i = 0; i < size; ++i) {
 271 | 		if (isalnum(data[i]))
 272 | 			continue;
 273 | 
 274 | 		switch (data[i]) {
 275 | 			case '@':
 276 | 				nb++;
 277 | 
 278 | 			case '-':
 279 | 			case '.':
 280 | 			case '_':
 281 | 				break;
 282 | 
 283 | 			case '>':
 284 | 				return (nb == 1) ? i + 1 : 0;
 285 | 
 286 | 			default:
 287 | 				return 0;
 288 | 		}
 289 | 	}
 290 | 
 291 | 	return 0;
 292 | }
 293 | 
 294 | /* tag_length • returns the length of the given tag, or 0 is it's not valid */
 295 | static size_t
 296 | tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
 297 | {
 298 | 	size_t i, j;
 299 | 
 300 | 	/* a valid tag can't be shorter than 3 chars */
 301 | 	if (size < 3) return 0;
 302 | 
 303 | 	/* begins with a '<' optionally followed by '/', followed by letter or number */
 304 | 	if (data[0] != '<') return 0;
 305 | 	i = (data[1] == '/') ? 2 : 1;
 306 | 
 307 | 	if (!isalnum(data[i]))
 308 | 		return 0;
 309 | 
 310 | 	/* scheme test */
 311 | 	*autolink = MKDA_NOT_AUTOLINK;
 312 | 
 313 | 	/* try to find the beginning of an URI */
 314 | 	while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
 315 | 		i++;
 316 | 
 317 | 	if (i > 1 && data[i] == '@') {
 318 | 		if ((j = is_mail_autolink(data + i, size - i)) != 0) {
 319 | 			*autolink = MKDA_EMAIL;
 320 | 			return i + j;
 321 | 		}
 322 | 	}
 323 | 
 324 | 	if (i > 2 && data[i] == ':') {
 325 | 		*autolink = MKDA_NORMAL;
 326 | 		i++;
 327 | 	}
 328 | 
 329 | 	/* completing autolink test: no whitespace or ' or " */
 330 | 	if (i >= size)
 331 | 		*autolink = MKDA_NOT_AUTOLINK;
 332 | 
 333 | 	else if (*autolink) {
 334 | 		j = i;
 335 | 
 336 | 		while (i < size) {
 337 | 			if (data[i] == '\\') i += 2;
 338 | 			else if (data[i] == '>' || data[i] == '\'' ||
 339 | 					data[i] == '"' || data[i] == ' ' || data[i] == '\n')
 340 | 					break;
 341 | 			else i++;
 342 | 		}
 343 | 
 344 | 		if (i >= size) return 0;
 345 | 		if (i > j && data[i] == '>') return i + 1;
 346 | 		/* one of the forbidden chars has been found */
 347 | 		*autolink = MKDA_NOT_AUTOLINK;
 348 | 	}
 349 | 
 350 | 	/* looking for sometinhg looking like a tag end */
 351 | 	while (i < size && data[i] != '>') i++;
 352 | 	if (i >= size) return 0;
 353 | 	return i + 1;
 354 | }
 355 | 
 356 | /* parse_inline • parses inline markdown elements */
 357 | static void
 358 | parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
 359 | {
 360 | 	size_t i = 0, end = 0, last_special = 0;
 361 | 	uint8_t action = 0;
 362 | 	struct buf work = { 0, 0, 0, 0 };
 363 | 
 364 | 	if (rndr->work_bufs[BUFFER_SPAN].size +
 365 | 		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
 366 | 		return;
 367 | 
 368 | 	while (i < size) {
 369 | 		/* copying inactive chars into the output */
 370 | 		while (end < size && (action = rndr->active_char[data[end]]) == 0) {
 371 | 			end++;
 372 | 		}
 373 | 
 374 | 		if (rndr->cb.normal_text) {
 375 | 			work.data = data + i;
 376 | 			work.size = end - i;
 377 | 			rndr->cb.normal_text(ob, &work, rndr->opaque);
 378 | 		}
 379 | 		else
 380 | 			bufput(ob, data + i, end - i);
 381 | 
 382 | 		if (end >= size) break;
 383 | 		i = end;
 384 | 
 385 | 		end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i - last_special, i, size - i);
 386 | 		if (!end) /* no action from the callback */
 387 | 			end = i + 1;
 388 | 		else {
 389 | 			i += end;
 390 | 			last_special = end = i;
 391 | 		}
 392 | 	}
 393 | }
 394 | 
 395 | /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
 396 | static size_t
 397 | find_emph_char(uint8_t *data, size_t size, uint8_t c)
 398 | {
 399 | 	size_t i = 1;
 400 | 
 401 | 	while (i < size) {
 402 | 		while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
 403 | 			i++;
 404 | 
 405 | 		if (i == size)
 406 | 			return 0;
 407 | 
 408 | 		if (i < size && c == '<' && data[i] == c && data[i-1] == '!')
 409 | 			return i;
 410 | 
 411 | 		if (data[i] == c)
 412 | 			return i;
 413 | 
 414 | 		/* not counting escaped chars */
 415 | 		if (i && data[i - 1] == '\\') {
 416 | 			i++; continue;
 417 | 		}
 418 | 
 419 | 		if (data[i] == '`') {
 420 | 			size_t span_nb = 0, bt;
 421 | 			size_t tmp_i = 0;
 422 | 
 423 | 			/* counting the number of opening backticks */
 424 | 			while (i < size && data[i] == '`') {
 425 | 				i++; span_nb++;
 426 | 			}
 427 | 
 428 | 			if (i >= size) return 0;
 429 | 
 430 | 			/* finding the matching closing sequence */
 431 | 			bt = 0;
 432 | 			while (i < size && bt < span_nb) {
 433 | 				if (!tmp_i && data[i] == c) tmp_i = i;
 434 | 				if (data[i] == '`') bt++;
 435 | 				else bt = 0;
 436 | 				i++;
 437 | 			}
 438 | 
 439 | 			if (i >= size) return tmp_i;
 440 | 		}
 441 | 		/* skipping a link */
 442 | 		else if (data[i] == '[') {
 443 | 			size_t tmp_i = 0;
 444 | 			uint8_t cc;
 445 | 
 446 | 			i++;
 447 | 			while (i < size && data[i] != ']') {
 448 | 				if (!tmp_i && data[i] == c) tmp_i = i;
 449 | 				i++;
 450 | 			}
 451 | 
 452 | 			i++;
 453 | 			while (i < size && (data[i] == ' ' || data[i] == '\n'))
 454 | 				i++;
 455 | 
 456 | 			if (i >= size)
 457 | 				return tmp_i;
 458 | 
 459 | 			switch (data[i]) {
 460 | 			case '[':
 461 | 				cc = ']'; break;
 462 | 
 463 | 			case '(':
 464 | 				cc = ')'; break;
 465 | 
 466 | 			default:
 467 | 				if (tmp_i)
 468 | 					return tmp_i;
 469 | 				else
 470 | 					continue;
 471 | 			}
 472 | 
 473 | 			i++;
 474 | 			while (i < size && data[i] != cc) {
 475 | 				if (!tmp_i && data[i] == c) tmp_i = i;
 476 | 				i++;
 477 | 			}
 478 | 
 479 | 			if (i >= size)
 480 | 				return tmp_i;
 481 | 
 482 | 			i++;
 483 | 		}
 484 | 	}
 485 | 
 486 | 	return 0;
 487 | }
 488 | 
 489 | /* parse_emph1 • parsing single emphase */
 490 | /* closed by a symbol not preceded by whitespace and not followed by symbol */
 491 | static size_t
 492 | parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 493 | {
 494 | 	size_t i = 0, len;
 495 | 	struct buf *work = 0;
 496 | 	int r;
 497 | 
 498 | 	if (!rndr->cb.emphasis) return 0;
 499 | 
 500 | 	/* skipping one symbol if coming from emph3 */
 501 | 	if (size > 1 && data[0] == c && data[1] == c) i = 1;
 502 | 
 503 | 	while (i < size) {
 504 | 		len = find_emph_char(data + i, size - i, c);
 505 | 		if (!len) return 0;
 506 | 		i += len;
 507 | 		if (i >= size) return 0;
 508 | 
 509 | 		if (data[i] == c && !_isspace(data[i - 1])) {
 510 | 			if ((rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) && (c == '_')) {
 511 | 				if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
 512 | 					continue;
 513 | 			}
 514 | 
 515 | 			work = rndr_newbuf(rndr, BUFFER_SPAN);
 516 | 			parse_inline(work, rndr, data, i);
 517 | 			r = rndr->cb.emphasis(ob, work, rndr->opaque);
 518 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 519 | 			return r ? i + 1 : 0;
 520 | 		}
 521 | 	}
 522 | 
 523 | 	return 0;
 524 | }
 525 | 
 526 | /* parse_emph2 • parsing single emphase */
 527 | static size_t
 528 | parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 529 | {
 530 | 	int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
 531 | 	size_t i = 0, len;
 532 | 	struct buf *work = 0;
 533 | 	int r;
 534 | 
 535 | 	render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
 536 | 
 537 | 	if (!render_method)
 538 | 		return 0;
 539 | 
 540 | 	while (i < size) {
 541 | 		len = find_emph_char(data + i, size - i, c);
 542 | 		if (!len) return 0;
 543 | 		i += len;
 544 | 
 545 | 		if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
 546 | 			work = rndr_newbuf(rndr, BUFFER_SPAN);
 547 | 			parse_inline(work, rndr, data, i);
 548 | 			r = render_method(ob, work, rndr->opaque);
 549 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 550 | 			return r ? i + 2 : 0;
 551 | 		}
 552 | 		i++;
 553 | 	}
 554 | 	return 0;
 555 | }
 556 | 
 557 | /* parse_emph3 • parsing single emphase */
 558 | /* finds the first closing tag, and delegates to the other emph */
 559 | static size_t
 560 | parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 561 | {
 562 | 	size_t i = 0, len;
 563 | 	int r;
 564 | 
 565 | 	while (i < size) {
 566 | 		len = find_emph_char(data + i, size - i, c);
 567 | 		if (!len) return 0;
 568 | 		i += len;
 569 | 
 570 | 		/* skip whitespace preceded symbols */
 571 | 		if (data[i] != c || _isspace(data[i - 1]))
 572 | 			continue;
 573 | 
 574 | 		if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
 575 | 			/* triple symbol found */
 576 | 			struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
 577 | 
 578 | 			parse_inline(work, rndr, data, i);
 579 | 			r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
 580 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 581 | 			return r ? i + 3 : 0;
 582 | 
 583 | 		} else if (i + 1 < size && data[i + 1] == c) {
 584 | 			/* double symbol found, handing over to emph1 */
 585 | 			len = parse_emph1(ob, rndr, data - 2, size + 2, c);
 586 | 			if (!len) return 0;
 587 | 			else return len - 2;
 588 | 
 589 | 		} else {
 590 | 			/* single symbol found, handing over to emph2 */
 591 | 			len = parse_emph2(ob, rndr, data - 1, size + 1, c);
 592 | 			if (!len) return 0;
 593 | 			else return len - 1;
 594 | 		}
 595 | 	}
 596 | 	return 0;
 597 | }
 598 | 
 599 | static size_t
 600 | parse_spoilerspan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
 601 | {
 602 | 	int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
 603 | 	size_t len;
 604 | 	size_t i = 0;
 605 | 	struct buf *work = 0;
 606 | 	int r;
 607 | 
 608 | 	render_method = rndr->cb.spoilerspan;
 609 | 
 610 | 	if (!render_method) return 0;
 611 | 
 612 | 	while (i < size) {
 613 | 		len = find_emph_char(data + i, size - i, '<');
 614 | 		if (!len) return 0;
 615 | 		i += len;
 616 | 
 617 | 		if (i < size && data[i] == '<' && data[i - 1] == '!') {
 618 | 			work = rndr_newbuf(rndr, BUFFER_SPAN);
 619 | 			parse_inline(work, rndr, data, i - 1);
 620 | 			r = render_method(ob, work, rndr->opaque);
 621 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 622 | 
 623 | 			if (!r) return 0;
 624 | 
 625 | 			return i + 1;
 626 | 		}
 627 | 		i++;
 628 | 	}
 629 | 	return 0;
 630 | }
 631 | 
 632 | /* char_emphasis • single and double emphasis parsing */
 633 | static size_t
 634 | char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 635 | {
 636 | 	uint8_t c = data[0];
 637 | 	size_t ret;
 638 | 
 639 | 	if (size > 3 && c == '>' && data[1] == '!') {
 640 | 		if(_isspace(data[2]) || (ret = parse_spoilerspan(ob, rndr, data + 2, size - 2)) == 0)
 641 | 			return 0;
 642 | 
 643 | 		return ret + 2;
 644 | 	}
 645 | 
 646 | 
 647 | 	if (size > 2 && data[1] != c) {
 648 | 		/* whitespace cannot follow an opening emphasis;
 649 | 		 * strikethrough only takes two characters '~~' */
 650 | 		if (c == '~' || c == '>' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
 651 | 			return 0;
 652 | 
 653 | 		return ret + 1;
 654 | 	}
 655 | 
 656 | 
 657 | 	if (size > 3 && data[1] == c && data[2] != c) {
 658 | 		if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
 659 | 			return 0;
 660 | 
 661 | 		return ret + 2;
 662 | 	}
 663 | 
 664 | 	if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
 665 | 		if (c == '~' || c == '>' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
 666 | 			return 0;
 667 | 
 668 | 		return ret + 3;
 669 | 	}
 670 | 
 671 | 	return 0;
 672 | }
 673 | 
 674 | 
 675 | /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
 676 | static size_t
 677 | char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 678 | {
 679 | 	if (max_rewind < 2 || data[-1] != ' ' || data[-2] != ' ')
 680 | 		return 0;
 681 | 
 682 | 	/* removing the last space from ob and rendering */
 683 | 	while (ob->size && ob->data[ob->size - 1] == ' ')
 684 | 		ob->size--;
 685 | 
 686 | 	return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
 687 | }
 688 | 
 689 | 
 690 | /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
 691 | static size_t
 692 | char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 693 | {
 694 | 	size_t end, nb = 0, i, f_begin, f_end;
 695 | 
 696 | 	/* counting the number of backticks in the delimiter */
 697 | 	while (nb < size && data[nb] == '`')
 698 | 		nb++;
 699 | 
 700 | 	/* finding the next delimiter */
 701 | 	i = 0;
 702 | 	for (end = nb; end < size && i < nb; end++) {
 703 | 		if (data[end] == '`') i++;
 704 | 		else i = 0;
 705 | 	}
 706 | 
 707 | 	if (i < nb && end >= size)
 708 | 		return 0; /* no matching delimiter */
 709 | 
 710 | 	/* trimming outside whitespaces */
 711 | 	f_begin = nb;
 712 | 	while (f_begin < end && data[f_begin] == ' ')
 713 | 		f_begin++;
 714 | 
 715 | 	f_end = end - nb;
 716 | 	while (f_end > nb && data[f_end-1] == ' ')
 717 | 		f_end--;
 718 | 
 719 | 	/* real code span */
 720 | 	if (f_begin < f_end) {
 721 | 		struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
 722 | 		if (!rndr->cb.codespan(ob, &work, rndr->opaque))
 723 | 			end = 0;
 724 | 	} else {
 725 | 		if (!rndr->cb.codespan(ob, 0, rndr->opaque))
 726 | 			end = 0;
 727 | 	}
 728 | 
 729 | 	return end;
 730 | }
 731 | 
 732 | 
 733 | /* char_escape • '\\' backslash escape */
 734 | static size_t
 735 | char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 736 | {
 737 | 	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>/^~";
 738 | 	struct buf work = { 0, 0, 0, 0 };
 739 | 
 740 | 	if (size > 1) {
 741 | 		if (strchr(escape_chars, data[1]) == NULL)
 742 | 			return 0;
 743 | 
 744 | 		if (rndr->cb.normal_text) {
 745 | 			work.data = data + 1;
 746 | 			work.size = 1;
 747 | 			rndr->cb.normal_text(ob, &work, rndr->opaque);
 748 | 		}
 749 | 		else bufputc(ob, data[1]);
 750 | 	} else if (size == 1) {
 751 | 		bufputc(ob, data[0]);
 752 | 	}
 753 | 
 754 | 	return 2;
 755 | }
 756 | 
 757 | /* char_entity • '&' escaped when it doesn't belong to an entity */
 758 | static size_t
 759 | char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 760 | {
 761 | 	size_t end = 1;
 762 | 	size_t content_start;
 763 | 	size_t content_end;
 764 | 	struct buf work = { 0, 0, 0, 0 };
 765 | 	int numeric = 0;
 766 | 	int hex = 0;
 767 | 	int entity_base;
 768 | 	uint32_t entity_val;
 769 | 
 770 | 	if (end < size && data[end] == '#') {
 771 | 		numeric = 1;
 772 | 		end++;
 773 | 	}
 774 | 
 775 | 	if (end < size && numeric && tolower(data[end]) == 'x') {
 776 | 		hex = 1;
 777 | 		end++;
 778 | 	}
 779 | 
 780 | 	content_start = end;
 781 | 
 782 | 	while (end < size) {
 783 | 		const char c = data[end];
 784 | 		if (hex) {
 785 | 			if (!isxdigit(c)) break;
 786 | 		} else if (numeric) {
 787 | 			if (!isdigit(c)) break;
 788 | 		} else if (!isalnum(c)) {
 789 | 			break;
 790 | 		}
 791 | 		end++;
 792 | 	}
 793 | 
 794 | 	content_end = end;
 795 | 
 796 | 	if (end > content_start && end < size && data[end] == ';')
 797 | 		end++; /* well-formed entity */
 798 | 	else
 799 | 		return 0; /* not an entity */
 800 | 
 801 | 	/* way too long to be a valid numeric entity */
 802 | 	if (numeric && content_end - content_start > MAX_NUM_ENTITY_LEN)
 803 | 		return 0;
 804 | 
 805 | 	/* Validate the entity's contents */
 806 | 	if (numeric) {
 807 | 		if (hex)
 808 | 			entity_base = 16;
 809 | 		else
 810 | 			entity_base = 10;
 811 | 
 812 | 		// This is ok because  it'll stop once it hits the ';'
 813 | 		entity_val = strtol((char*)data + content_start, NULL, entity_base);
 814 | 		if (!is_valid_numeric_entity(entity_val))
 815 | 			return 0;
 816 | 	} else {
 817 | 		if (!is_allowed_named_entity((const char *)data, end))
 818 | 			return 0;
 819 | 	}
 820 | 
 821 | 	if (rndr->cb.entity) {
 822 | 		work.data = data;
 823 | 		work.size = end;
 824 | 		rndr->cb.entity(ob, &work, rndr->opaque);
 825 | 	} else {
 826 | 		/* Necessary so we can normalize `&#X3E;` to `&#x3E;` */
 827 | 		bufputc(ob, '&');
 828 | 		if (numeric)
 829 | 			bufputc(ob, '#');
 830 | 		if (hex)
 831 | 			bufputc(ob, 'x');
 832 | 		bufput(ob, data + content_start, end - content_start);
 833 | 	}
 834 | 
 835 | 	return end;
 836 | }
 837 | 
 838 | /* char_langle_tag • '<' when tags or autolinks are allowed */
 839 | static size_t
 840 | char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 841 | {
 842 | 	enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
 843 | 	size_t end = tag_length(data, size, &altype);
 844 | 	struct buf work = { data, end, 0, 0 };
 845 | 	int ret = 0;
 846 | 
 847 | 	if (end > 2) {
 848 | 		if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
 849 | 			struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
 850 | 			work.data = data + 1;
 851 | 			work.size = end - 2;
 852 | 			unscape_text(u_link, &work);
 853 | 			ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
 854 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 855 | 		}
 856 | 		else if (rndr->cb.raw_html_tag)
 857 | 			ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
 858 | 	}
 859 | 
 860 | 	if (!ret) return 0;
 861 | 	else return end;
 862 | }
 863 | 
 864 | static size_t
 865 | char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 866 | {
 867 | 	struct buf *link, *link_url, *link_text;
 868 | 	size_t link_len, rewind;
 869 | 
 870 | 	if (!rndr->cb.link || rndr->in_link_body)
 871 | 		return 0;
 872 | 
 873 | 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 874 | 
 875 | 	if ((link_len = sd_autolink__www(&rewind, link, data, max_rewind, size, 0)) > 0) {
 876 | 		link_url = rndr_newbuf(rndr, BUFFER_SPAN);
 877 | 		BUFPUTSL(link_url, "http://");
 878 | 		bufput(link_url, link->data, link->size);
 879 | 
 880 | 		buftruncate(ob, ob->size - rewind);
 881 | 		if (rndr->cb.normal_text) {
 882 | 			link_text = rndr_newbuf(rndr, BUFFER_SPAN);
 883 | 			rndr->cb.normal_text(link_text, link, rndr->opaque);
 884 | 			rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
 885 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 886 | 		} else {
 887 | 			rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
 888 | 		}
 889 | 		rndr_popbuf(rndr, BUFFER_SPAN);
 890 | 	}
 891 | 
 892 | 	rndr_popbuf(rndr, BUFFER_SPAN);
 893 | 	return link_len;
 894 | }
 895 | 
 896 | static size_t
 897 | char_autolink_subreddit_or_username(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 898 | {
 899 | 	struct buf *link, *link_text, *link_url;
 900 | 	size_t link_len, rewind;
 901 | 	int no_slash;
 902 | 
 903 | 	if (!rndr->cb.autolink || rndr->in_link_body)
 904 | 		return 0;
 905 | 
 906 | 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 907 | 
 908 | 	link_len = sd_autolink__subreddit(&rewind, link, data, max_rewind, max_lookbehind, size, &no_slash);
 909 | 	if (link_len == 0)
 910 | 		link_len = sd_autolink__username(&rewind, link, data, max_rewind, max_lookbehind, size, &no_slash);
 911 | 
 912 | 	/* Found either a user or subreddit link */
 913 | 	if (link_len > 0) {
 914 | 		link_url = rndr_newbuf(rndr, BUFFER_SPAN);
 915 | 		if (no_slash)
 916 | 			bufputc(link_url, '/');
 917 | 		bufput(link_url, link->data, link->size);
 918 | 
 919 | 		buftruncate(ob, ob->size - rewind);
 920 | 		if (rndr->cb.normal_text) {
 921 | 			link_text = rndr_newbuf(rndr, BUFFER_SPAN);
 922 | 			rndr->cb.normal_text(link_text, link, rndr->opaque);
 923 | 			rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
 924 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 925 | 		} else {
 926 | 			rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
 927 | 		}
 928 | 		rndr_popbuf(rndr, BUFFER_SPAN);
 929 | 	}
 930 | 	rndr_popbuf(rndr, BUFFER_SPAN);
 931 | 
 932 | 	return link_len;
 933 | }
 934 | 
 935 | static size_t
 936 | char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 937 | {
 938 | 	struct buf *link;
 939 | 	size_t link_len, rewind;
 940 | 
 941 | 	if (!rndr->cb.autolink || rndr->in_link_body)
 942 | 		return 0;
 943 | 
 944 | 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 945 | 
 946 | 	if ((link_len = sd_autolink__email(&rewind, link, data, max_rewind, size, 0)) > 0) {
 947 | 		buftruncate(ob, ob->size - rewind);
 948 | 		rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
 949 | 	}
 950 | 
 951 | 	rndr_popbuf(rndr, BUFFER_SPAN);
 952 | 	return link_len;
 953 | }
 954 | 
 955 | static size_t
 956 | char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 957 | {
 958 | 	struct buf *link;
 959 | 	size_t link_len, rewind;
 960 | 
 961 | 	if (!rndr->cb.autolink || rndr->in_link_body)
 962 | 		return 0;
 963 | 
 964 | 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 965 | 
 966 | 	if ((link_len = sd_autolink__url(&rewind, link, data, max_rewind, size, 0)) > 0) {
 967 | 		buftruncate(ob, ob->size - rewind);
 968 | 		rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
 969 | 	}
 970 | 
 971 | 	rndr_popbuf(rndr, BUFFER_SPAN);
 972 | 	return link_len;
 973 | }
 974 | 
 975 | /* char_link • '[': parsing a link or an image */
 976 | static size_t
 977 | char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
 978 | {
 979 | 	int is_img = (max_rewind && data[-1] == '!'), level;
 980 | 	size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
 981 | 	struct buf *content = 0;
 982 | 	struct buf *link = 0;
 983 | 	struct buf *title = 0;
 984 | 	struct buf *u_link = 0;
 985 | 	size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
 986 | 	int text_has_nl = 0, ret = 0;
 987 | 	int in_title = 0, qtype = 0;
 988 | 
 989 | 	/* checking whether the correct renderer exists */
 990 | 	if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
 991 | 		goto cleanup;
 992 | 
 993 | 	/* looking for the matching closing bracket */
 994 | 	for (level = 1; i < size; i++) {
 995 | 		if (data[i] == '\n')
 996 | 			text_has_nl = 1;
 997 | 
 998 | 		else if (data[i - 1] == '\\')
 999 | 			continue;
1000 | 
1001 | 		else if (data[i] == '[')
1002 | 			level++;
1003 | 
1004 | 		else if (data[i] == ']') {
1005 | 			level--;
1006 | 			if (level <= 0)
1007 | 				break;
1008 | 		}
1009 | 	}
1010 | 
1011 | 	if (i >= size)
1012 | 		goto cleanup;
1013 | 
1014 | 	txt_e = i;
1015 | 	i++;
1016 | 
1017 | 	/* skip any amount of whitespace or newline */
1018 | 	/* (this is much more laxist than original markdown syntax) */
1019 | 	while (i < size && _isspace(data[i]))
1020 | 		i++;
1021 | 
1022 | 	/* inline style link */
1023 | 	if (i < size && data[i] == '(') {
1024 | 		/* skipping initial whitespace */
1025 | 		i++;
1026 | 
1027 | 		while (i < size && _isspace(data[i]))
1028 | 			i++;
1029 | 
1030 | 		link_b = i;
1031 | 
1032 | 		/* looking for link end: ' " ) */
1033 | 		while (i < size) {
1034 | 			if (data[i] == '\\') i += 2;
1035 | 			else if (data[i] == ')') break;
1036 | 			else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
1037 | 			else i++;
1038 | 		}
1039 | 
1040 | 		if (i >= size) goto cleanup;
1041 | 		link_e = i;
1042 | 
1043 | 		/* looking for title end if present */
1044 | 		if (data[i] == '\'' || data[i] == '"') {
1045 | 			qtype = data[i];
1046 | 			in_title = 1;
1047 | 			i++;
1048 | 			title_b = i;
1049 | 
1050 | 			while (i < size) {
1051 | 				if (data[i] == '\\') i += 2;
1052 | 				else if (data[i] == qtype) {in_title = 0; i++;}
1053 | 				else if ((data[i] == ')') && !in_title) break;
1054 | 				else i++;
1055 | 			}
1056 | 
1057 | 			if (i >= size) goto cleanup;
1058 | 
1059 | 			/* skipping whitespaces after title */
1060 | 			title_e = i - 1;
1061 | 			while (title_e > title_b && _isspace(data[title_e]))
1062 | 				title_e--;
1063 | 
1064 | 			/* checking for closing quote presence */
1065 | 			if (data[title_e] != '\'' &&  data[title_e] != '"') {
1066 | 				title_b = title_e = 0;
1067 | 				link_e = i;
1068 | 			}
1069 | 		}
1070 | 
1071 | 		/* remove whitespace at the end of the link */
1072 | 		while (link_e > link_b && _isspace(data[link_e - 1]))
1073 | 			link_e--;
1074 | 
1075 | 		/* remove optional angle brackets around the link */
1076 | 		if (data[link_b] == '<') link_b++;
1077 | 		if (data[link_e - 1] == '>') link_e--;
1078 | 
1079 | 		/* building escaped link and title */
1080 | 		if (link_e > link_b) {
1081 | 			link = rndr_newbuf(rndr, BUFFER_SPAN);
1082 | 			bufput(link, data + link_b, link_e - link_b);
1083 | 		}
1084 | 
1085 | 		if (title_e > title_b) {
1086 | 			title = rndr_newbuf(rndr, BUFFER_SPAN);
1087 | 			bufput(title, data + title_b, title_e - title_b);
1088 | 		}
1089 | 
1090 | 		i++;
1091 | 	}
1092 | 
1093 | 	/* reference style link */
1094 | 	else if (i < size && data[i] == '[') {
1095 | 		struct buf id = { 0, 0, 0, 0 };
1096 | 		struct link_ref *lr;
1097 | 
1098 | 		/* looking for the id */
1099 | 		i++;
1100 | 		link_b = i;
1101 | 		while (i < size && data[i] != ']') i++;
1102 | 		if (i >= size) goto cleanup;
1103 | 		link_e = i;
1104 | 
1105 | 		/* finding the link_ref */
1106 | 		if (link_b == link_e) {
1107 | 			if (text_has_nl) {
1108 | 				struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1109 | 				size_t j;
1110 | 
1111 | 				for (j = 1; j < txt_e; j++) {
1112 | 					if (data[j] != '\n')
1113 | 						bufputc(b, data[j]);
1114 | 					else if (data[j - 1] != ' ')
1115 | 						bufputc(b, ' ');
1116 | 				}
1117 | 
1118 | 				id.data = b->data;
1119 | 				id.size = b->size;
1120 | 			} else {
1121 | 				id.data = data + 1;
1122 | 				id.size = txt_e - 1;
1123 | 			}
1124 | 		} else {
1125 | 			id.data = data + link_b;
1126 | 			id.size = link_e - link_b;
1127 | 		}
1128 | 
1129 | 		lr = find_link_ref(rndr->refs, id.data, id.size);
1130 | 		if (!lr)
1131 | 			goto cleanup;
1132 | 
1133 | 		/* keeping link and title from link_ref */
1134 | 		link = lr->link;
1135 | 		title = lr->title;
1136 | 		i++;
1137 | 	}
1138 | 
1139 | 	/* shortcut reference style link */
1140 | 	else {
1141 | 		struct buf id = { 0, 0, 0, 0 };
1142 | 		struct link_ref *lr;
1143 | 
1144 | 		/* crafting the id */
1145 | 		if (text_has_nl) {
1146 | 			struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1147 | 			size_t j;
1148 | 
1149 | 			for (j = 1; j < txt_e; j++) {
1150 | 				if (data[j] != '\n')
1151 | 					bufputc(b, data[j]);
1152 | 				else if (data[j - 1] != ' ')
1153 | 					bufputc(b, ' ');
1154 | 			}
1155 | 
1156 | 			id.data = b->data;
1157 | 			id.size = b->size;
1158 | 		} else {
1159 | 			id.data = data + 1;
1160 | 			id.size = txt_e - 1;
1161 | 		}
1162 | 
1163 | 		/* finding the link_ref */
1164 | 		lr = find_link_ref(rndr->refs, id.data, id.size);
1165 | 		if (!lr)
1166 | 			goto cleanup;
1167 | 
1168 | 		/* keeping link and title from link_ref */
1169 | 		link = lr->link;
1170 | 		title = lr->title;
1171 | 
1172 | 		/* rewinding the whitespace */
1173 | 		i = txt_e + 1;
1174 | 	}
1175 | 
1176 | 	/* building content: img alt is escaped, link content is parsed */
1177 | 	if (txt_e > 1) {
1178 | 		content = rndr_newbuf(rndr, BUFFER_SPAN);
1179 | 		if (is_img) {
1180 | 			bufput(content, data + 1, txt_e - 1);
1181 | 		} else {
1182 | 			/* disable autolinking when parsing inline the
1183 | 			 * content of a link */
1184 | 			rndr->in_link_body = 1;
1185 | 			parse_inline(content, rndr, data + 1, txt_e - 1);
1186 | 			rndr->in_link_body = 0;
1187 | 		}
1188 | 	}
1189 | 
1190 | 	if (link) {
1191 | 		u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1192 | 		unscape_text(u_link, link);
1193 | 	} else {
1194 | 		goto cleanup;
1195 | 	}
1196 | 
1197 | 	/* calling the relevant rendering function */
1198 | 	if (is_img) {
1199 | 		if (ob->size && ob->data[ob->size - 1] == '!')
1200 | 			ob->size -= 1;
1201 | 
1202 | 		ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1203 | 	} else {
1204 | 		ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1205 | 	}
1206 | 
1207 | 	/* cleanup */
1208 | cleanup:
1209 | 	rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1210 | 	return ret ? i : 0;
1211 | }
1212 | 
1213 | static size_t
1214 | char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t max_rewind, size_t max_lookbehind, size_t size)
1215 | {
1216 | 	size_t sup_start, sup_len;
1217 | 	struct buf *sup;
1218 | 
1219 | 	if (!rndr->cb.superscript)
1220 | 		return 0;
1221 | 
1222 | 	if (size < 2)
1223 | 		return 0;
1224 | 
1225 | 	if (data[1] == '(') {
1226 | 		sup_start = sup_len = 2;
1227 | 
1228 | 		while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1229 | 			sup_len++;
1230 | 
1231 | 		if (sup_len == size)
1232 | 			return 0;
1233 | 	} else {
1234 | 		sup_start = sup_len = 1;
1235 | 
1236 | 		while (sup_len < size && !_isspace(data[sup_len]))
1237 | 			sup_len++;
1238 | 	}
1239 | 
1240 | 	if (sup_len - sup_start == 0)
1241 | 		return (sup_start == 2) ? 3 : 0;
1242 | 
1243 | 	sup = rndr_newbuf(rndr, BUFFER_SPAN);
1244 | 	parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1245 | 	rndr->cb.superscript(ob, sup, rndr->opaque);
1246 | 	rndr_popbuf(rndr, BUFFER_SPAN);
1247 | 
1248 | 	return (sup_start == 2) ? sup_len + 1 : sup_len;
1249 | }
1250 | 
1251 | /*********************************
1252 |  * BLOCK-LEVEL PARSING FUNCTIONS *
1253 |  *********************************/
1254 | 
1255 | /* is_empty • returns the line length when it is empty, 0 otherwise */
1256 | static size_t
1257 | is_empty(uint8_t *data, size_t size)
1258 | {
1259 | 	size_t i;
1260 | 
1261 | 	for (i = 0; i < size && data[i] != '\n'; i++)
1262 | 		if (data[i] != ' ')
1263 | 			return 0;
1264 | 
1265 | 	return i + 1;
1266 | }
1267 | 
1268 | /* is_hrule • returns whether a line is a horizontal rule */
1269 | static int
1270 | is_hrule(uint8_t *data, size_t size)
1271 | {
1272 | 	size_t i = 0, n = 0;
1273 | 	uint8_t c;
1274 | 
1275 | 	/* skipping initial spaces */
1276 | 	if (size < 3) return 0;
1277 | 	if (data[0] == ' ') { i++;
1278 | 	if (data[1] == ' ') { i++;
1279 | 	if (data[2] == ' ') { i++; } } }
1280 | 
1281 | 	/* looking at the hrule uint8_t */
1282 | 	if (i + 2 >= size
1283 | 	|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1284 | 		return 0;
1285 | 	c = data[i];
1286 | 
1287 | 	/* the whole line must be the char or whitespace */
1288 | 	while (i < size && data[i] != '\n') {
1289 | 		if (data[i] == c) n++;
1290 | 		else if (data[i] != ' ')
1291 | 			return 0;
1292 | 
1293 | 		i++;
1294 | 	}
1295 | 
1296 | 	return n >= 3;
1297 | }
1298 | 
1299 | /* check if a line begins with a code fence; return the
1300 |  * width of the code fence */
1301 | static size_t
1302 | prefix_codefence(uint8_t *data, size_t size)
1303 | {
1304 | 	size_t i = 0, n = 0;
1305 | 	uint8_t c;
1306 | 
1307 | 	/* skipping initial spaces */
1308 | 	if (size < 3) return 0;
1309 | 	if (data[0] == ' ') { i++;
1310 | 	if (data[1] == ' ') { i++;
1311 | 	if (data[2] == ' ') { i++; } } }
1312 | 
1313 | 	/* looking at the hrule uint8_t */
1314 | 	if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1315 | 		return 0;
1316 | 
1317 | 	c = data[i];
1318 | 
1319 | 	/* the whole line must be the uint8_t or whitespace */
1320 | 	while (i < size && data[i] == c) {
1321 | 		n++; i++;
1322 | 	}
1323 | 
1324 | 	if (n < 3)
1325 | 		return 0;
1326 | 
1327 | 	return i;
1328 | }
1329 | 
1330 | /* check if a line is a code fence; return its size if it is */
1331 | static size_t
1332 | is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1333 | {
1334 | 	size_t i = 0, syn_len = 0;
1335 | 	uint8_t *syn_start;
1336 | 
1337 | 	i = prefix_codefence(data, size);
1338 | 	if (i == 0)
1339 | 		return 0;
1340 | 
1341 | 	while (i < size && data[i] == ' ')
1342 | 		i++;
1343 | 
1344 | 	syn_start = data + i;
1345 | 
1346 | 	if (i < size && data[i] == '{') {
1347 | 		i++; syn_start++;
1348 | 
1349 | 		while (i < size && data[i] != '}' && data[i] != '\n') {
1350 | 			syn_len++; i++;
1351 | 		}
1352 | 
1353 | 		if (i == size || data[i] != '}')
1354 | 			return 0;
1355 | 
1356 | 		/* strip all whitespace at the beginning and the end
1357 | 		 * of the {} block */
1358 | 		while (syn_len > 0 && _isspace(syn_start[0])) {
1359 | 			syn_start++; syn_len--;
1360 | 		}
1361 | 
1362 | 		while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1363 | 			syn_len--;
1364 | 
1365 | 		i++;
1366 | 	} else {
1367 | 		while (i < size && !_isspace(data[i])) {
1368 | 			syn_len++; i++;
1369 | 		}
1370 | 	}
1371 | 
1372 | 	if (syntax) {
1373 | 		syntax->data = syn_start;
1374 | 		syntax->size = syn_len;
1375 | 	}
1376 | 
1377 | 	while (i < size && data[i] != '\n') {
1378 | 		if (!_isspace(data[i]))
1379 | 			return 0;
1380 | 
1381 | 		i++;
1382 | 	}
1383 | 
1384 | 	return i + 1;
1385 | }
1386 | 
1387 | /* is_atxheader • returns whether the line is a hash-prefixed header */
1388 | static int
1389 | is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1390 | {
1391 | 	if (data[0] != '#')
1392 | 		return 0;
1393 | 
1394 | 	if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1395 | 		size_t level = 0;
1396 | 
1397 | 		while (level < size && level < 6 && data[level] == '#')
1398 | 			level++;
1399 | 
1400 | 		if (level < size && data[level] != ' ')
1401 | 			return 0;
1402 | 	}
1403 | 
1404 | 	return 1;
1405 | }
1406 | 
1407 | /* is_headerline • returns whether the line is a setext-style hdr underline */
1408 | static int
1409 | is_headerline(uint8_t *data, size_t size)
1410 | {
1411 | 	size_t i = 0;
1412 | 
1413 | 	/* test of level 1 header */
1414 | 	if (data[i] == '=') {
1415 | 		for (i = 1; i < size && data[i] == '='; i++);
1416 | 		while (i < size && data[i] == ' ') i++;
1417 | 		return (i >= size || data[i] == '\n') ? 1 : 0; }
1418 | 
1419 | 	/* test of level 2 header */
1420 | 	if (data[i] == '-') {
1421 | 		for (i = 1; i < size && data[i] == '-'; i++);
1422 | 		while (i < size && data[i] == ' ') i++;
1423 | 		return (i >= size || data[i] == '\n') ? 2 : 0; }
1424 | 
1425 | 	return 0;
1426 | }
1427 | 
1428 | static int
1429 | is_next_headerline(uint8_t *data, size_t size)
1430 | {
1431 | 	size_t i = 0;
1432 | 
1433 | 	while (i < size && data[i] != '\n')
1434 | 		i++;
1435 | 
1436 | 	if (++i >= size)
1437 | 		return 0;
1438 | 
1439 | 	return is_headerline(data + i, size - i);
1440 | }
1441 | 
1442 | /* prefix_quote • returns blockquote prefix length */
1443 | static size_t
1444 | prefix_quote(uint8_t *data, size_t size)
1445 | {
1446 | 	size_t i = 0;
1447 | 	if (i < size && data[i] == ' ') i++;
1448 | 	if (i < size && data[i] == ' ') i++;
1449 | 	if (i < size && data[i] == ' ') i++;
1450 | 
1451 | 	if ((i < size && data[i] == '>') && (i + 1 < size && data[i+1] != '!')) {
1452 | 		if (i + 1 < size && data[i + 1] == ' ')
1453 | 			return i + 2;
1454 | 
1455 | 		return i + 1;
1456 | 	}
1457 | 
1458 | 	return 0;
1459 | }
1460 | 
1461 | static size_t
1462 | prefix_blockspoiler(uint8_t *data, size_t size)
1463 | {
1464 |     size_t i = 0;
1465 |     if (i < size && data[i] == ' ') i++;
1466 |     if (i < size && data[i] == ' ') i++;
1467 |     if (i < size && data[i] == ' ') i++;
1468 | 
1469 |     if (i + 1 < size && data[i] == '>' && data[i + 1] == '!') {
1470 | 		size_t spoilerspan = find_emph_char(data + i + 1, size - i - 1, '<');
1471 | 		if (i + spoilerspan < size && spoilerspan > 0 && data[i + spoilerspan] == '!')
1472 | 			return 0;
1473 | 
1474 | 		if (i + 2 < size && data[i + 2] == ' ')
1475 | 			return i + 3;
1476 | 
1477 | 		return i + 2;
1478 |     }
1479 | 
1480 |     return 0;
1481 | }
1482 | 
1483 | /* prefix_code • returns prefix length for block code*/
1484 | static size_t
1485 | prefix_code(uint8_t *data, size_t size)
1486 | {
1487 | 	if (size > 3 && data[0] == ' ' && data[1] == ' '
1488 | 		&& data[2] == ' ' && data[3] == ' ') return 4;
1489 | 
1490 | 	return 0;
1491 | }
1492 | 
1493 | /* prefix_oli • returns ordered list item prefix */
1494 | static size_t
1495 | prefix_oli(uint8_t *data, size_t size)
1496 | {
1497 | 	size_t i = 0;
1498 | 
1499 | 	if (i < size && data[i] == ' ') i++;
1500 | 	if (i < size && data[i] == ' ') i++;
1501 | 	if (i < size && data[i] == ' ') i++;
1502 | 
1503 | 	if (i >= size || data[i] < '0' || data[i] > '9')
1504 | 		return 0;
1505 | 
1506 | 	while (i < size && data[i] >= '0' && data[i] <= '9')
1507 | 		i++;
1508 | 
1509 | 	if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1510 | 		return 0;
1511 | 
1512 | 	if (is_next_headerline(data + i, size - i))
1513 | 		return 0;
1514 | 
1515 | 	return i + 2;
1516 | }
1517 | 
1518 | /* prefix_uli • returns ordered list item prefix */
1519 | static size_t
1520 | prefix_uli(uint8_t *data, size_t size)
1521 | {
1522 | 	size_t i = 0;
1523 | 
1524 | 	if (i < size && data[i] == ' ') i++;
1525 | 	if (i < size && data[i] == ' ') i++;
1526 | 	if (i < size && data[i] == ' ') i++;
1527 | 
1528 | 	if (i + 1 >= size ||
1529 | 		(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1530 | 		data[i + 1] != ' ')
1531 | 		return 0;
1532 | 
1533 | 	if (is_next_headerline(data + i, size - i))
1534 | 		return 0;
1535 | 
1536 | 	return i + 2;
1537 | }
1538 | 
1539 | 
1540 | /* parse_block • parsing of one block, returning next uint8_t to parse */
1541 | static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1542 | 			uint8_t *data, size_t size);
1543 | 
1544 | 
1545 | /* parse_blockquote • handles parsing of a blockquote fragment */
1546 | static size_t
1547 | parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1548 | {
1549 | 	size_t beg, end = 0, pre, work_size = 0;
1550 | 	uint8_t *work_data = 0;
1551 | 	struct buf *out = 0;
1552 | 
1553 | 	out = rndr_newbuf(rndr, BUFFER_BLOCK);
1554 | 	beg = 0;
1555 | 	while (beg < size) {
1556 | 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1557 | 
1558 | 		pre = prefix_quote(data + beg, end - beg);
1559 | 
1560 | 		if (pre)
1561 | 			beg += pre; /* skipping prefix */
1562 | 
1563 | 		/* empty line followed by non-quote line */
1564 | 		else if (is_empty(data + beg, end - beg) &&
1565 | 				(end >= size || (prefix_quote(data + end, size - end) == 0 &&
1566 | 				!is_empty(data + end, size - end))))
1567 | 			break;
1568 | 
1569 | 		if (beg < end) { /* copy into the in-place working buffer */
1570 | 			/* bufput(work, data + beg, end - beg); */
1571 | 			if (!work_data)
1572 | 				work_data = data + beg;
1573 | 			else if (data + beg != work_data + work_size)
1574 | 				memmove(work_data + work_size, data + beg, end - beg);
1575 | 			work_size += end - beg;
1576 | 		}
1577 | 		beg = end;
1578 | 	}
1579 | 
1580 | 	parse_block(out, rndr, work_data, work_size);
1581 | 	if (rndr->cb.blockquote)
1582 | 		rndr->cb.blockquote(ob, out, rndr->opaque);
1583 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1584 | 	return end;
1585 | }
1586 | 
1587 | /* parse_blockspoiler • handles parsing of a blockspoiler fragment */
1588 | static size_t
1589 | parse_blockspoiler(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1590 | {
1591 | 	size_t beg, end = 0, pre, work_size = 0;
1592 | 	uint8_t *work_data = 0;
1593 | 	struct buf *out = 0;
1594 | 
1595 | 	out = rndr_newbuf(rndr, BUFFER_BLOCK);
1596 | 	beg = 0;
1597 | 	while (beg < size) {
1598 | 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1599 | 
1600 | 		pre = prefix_blockspoiler(data + beg, end - beg);
1601 | 
1602 | 		if (pre)
1603 | 			beg += pre; /* skipping prefix */
1604 | 
1605 | 		/* empty line followed by non-blockspoiler line */
1606 | 		else if (is_empty(data + beg, end - beg) &&
1607 | 				(end >= size || (prefix_blockspoiler(data + end, size - end) == 0 &&
1608 | 				!is_empty(data + end, size - end))))
1609 | 			break;
1610 | 
1611 | 		if (beg < end) { /* copy into the in-place working buffer */
1612 | 			/* bufput(work, data + beg, end - beg); */
1613 | 			if (!work_data)
1614 | 				work_data = data + beg;
1615 | 			else if (data + beg != work_data + work_size)
1616 | 				memmove(work_data + work_size, data + beg, end - beg);
1617 | 			work_size += end - beg;
1618 | 		}
1619 | 		beg = end;
1620 | 	}
1621 | 
1622 | 	parse_block(out, rndr, work_data, work_size);
1623 | 	if (rndr->cb.blockspoiler)
1624 | 		rndr->cb.blockspoiler(ob, out, rndr->opaque);
1625 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1626 | 	return end;
1627 | }
1628 | 
1629 | static size_t
1630 | parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1631 | 
1632 | /* parse_blockquote • handles parsing of a regular paragraph */
1633 | static size_t
1634 | parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1635 | {
1636 | 	size_t i = 0, end = 0;
1637 | 	int level = 0;
1638 | 	struct buf work = { data, 0, 0, 0 };
1639 | 
1640 | 	while (i < size) {
1641 | 		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1642 | 
1643 | 		if (prefix_quote(data + i, end - i) != 0) {
1644 | 			end = i;
1645 | 			break;
1646 | 		}
1647 | 
1648 | 		if (is_empty(data + i, size - i))
1649 | 			break;
1650 | 
1651 | 		if ((level = is_headerline(data + i, size - i)) != 0)
1652 | 			break;
1653 | 
1654 | 		if (is_atxheader(rndr, data + i, size - i) ||
1655 | 			is_hrule(data + i, size - i) ||
1656 | 			prefix_quote(data + i, size - i)) {
1657 | 			end = i;
1658 | 			break;
1659 | 		}
1660 | 
1661 | 		/*
1662 | 		 * Early termination of a paragraph with the same logic
1663 | 		 * as Markdown 1.0.0. If this logic is applied, the
1664 | 		 * Markdown 1.0.3 test suite won't pass cleanly
1665 | 		 *
1666 | 		 * :: If the first character in a new line is not a letter,
1667 | 		 * let's check to see if there's some kind of block starting
1668 | 		 * here
1669 | 		 */
1670 | 		if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
1671 | 			if (prefix_oli(data + i, size - i) ||
1672 | 				prefix_uli(data + i, size - i)) {
1673 | 				end = i;
1674 | 				break;
1675 | 			}
1676 | 
1677 | 			/* see if an html block starts here */
1678 | 			if (data[i] == '<' && rndr->cb.blockhtml &&
1679 | 				parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1680 | 				end = i;
1681 | 				break;
1682 | 			}
1683 | 
1684 | 			/* see if a code fence starts here */
1685 | 			if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1686 | 				is_codefence(data + i, size - i, NULL) != 0) {
1687 | 				end = i;
1688 | 				break;
1689 | 			}
1690 | 		}
1691 | 
1692 | 		i = end;
1693 | 	}
1694 | 
1695 | 	work.size = i;
1696 | 	while (work.size && data[work.size - 1] == '\n')
1697 | 		work.size--;
1698 | 
1699 | 	if (!level) {
1700 | 		struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1701 | 		parse_inline(tmp, rndr, work.data, work.size);
1702 | 		if (rndr->cb.paragraph)
1703 | 			rndr->cb.paragraph(ob, tmp, rndr->opaque);
1704 | 		rndr_popbuf(rndr, BUFFER_BLOCK);
1705 | 	} else {
1706 | 		struct buf *header_work;
1707 | 
1708 | 		if (work.size) {
1709 | 			size_t beg;
1710 | 			i = work.size;
1711 | 			work.size -= 1;
1712 | 
1713 | 			while (work.size && data[work.size] != '\n')
1714 | 				work.size -= 1;
1715 | 
1716 | 			beg = work.size + 1;
1717 | 			while (work.size && data[work.size - 1] == '\n')
1718 | 				work.size -= 1;
1719 | 
1720 | 			if (work.size > 0) {
1721 | 				struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1722 | 				parse_inline(tmp, rndr, work.data, work.size);
1723 | 
1724 | 				if (rndr->cb.paragraph)
1725 | 					rndr->cb.paragraph(ob, tmp, rndr->opaque);
1726 | 
1727 | 				rndr_popbuf(rndr, BUFFER_BLOCK);
1728 | 				work.data += beg;
1729 | 				work.size = i - beg;
1730 | 			}
1731 | 			else work.size = i;
1732 | 		}
1733 | 
1734 | 		header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1735 | 		parse_inline(header_work, rndr, work.data, work.size);
1736 | 
1737 | 		if (rndr->cb.header)
1738 | 			rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1739 | 
1740 | 		rndr_popbuf(rndr, BUFFER_SPAN);
1741 | 	}
1742 | 
1743 | 	return end;
1744 | }
1745 | 
1746 | /* parse_fencedcode • handles parsing of a block-level code fragment */
1747 | static size_t
1748 | parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1749 | {
1750 | 	size_t beg, end;
1751 | 	struct buf *work = 0;
1752 | 	struct buf lang = { 0, 0, 0, 0 };
1753 | 
1754 | 	beg = is_codefence(data, size, &lang);
1755 | 	if (beg == 0) return 0;
1756 | 
1757 | 	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1758 | 
1759 | 	while (beg < size) {
1760 | 		size_t fence_end;
1761 | 		struct buf fence_trail = { 0, 0, 0, 0 };
1762 | 
1763 | 		fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1764 | 		if (fence_end != 0 && fence_trail.size == 0) {
1765 | 			beg += fence_end;
1766 | 			break;
1767 | 		}
1768 | 
1769 | 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1770 | 
1771 | 		if (beg < end) {
1772 | 			/* verbatim copy to the working buffer,
1773 | 				escaping entities */
1774 | 			if (is_empty(data + beg, end - beg))
1775 | 				bufputc(work, '\n');
1776 | 			else bufput(work, data + beg, end - beg);
1777 | 		}
1778 | 		beg = end;
1779 | 	}
1780 | 
1781 | 	if (work->size && work->data[work->size - 1] != '\n')
1782 | 		bufputc(work, '\n');
1783 | 
1784 | 	if (rndr->cb.blockcode)
1785 | 		rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1786 | 
1787 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1788 | 	return beg;
1789 | }
1790 | 
1791 | static size_t
1792 | parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1793 | {
1794 | 	size_t beg, end, pre;
1795 | 	struct buf *work = 0;
1796 | 
1797 | 	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1798 | 
1799 | 	beg = 0;
1800 | 	while (beg < size) {
1801 | 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1802 | 		pre = prefix_code(data + beg, end - beg);
1803 | 
1804 | 		if (pre)
1805 | 			beg += pre; /* skipping prefix */
1806 | 		else if (!is_empty(data + beg, end - beg))
1807 | 			/* non-empty non-prefixed line breaks the pre */
1808 | 			break;
1809 | 
1810 | 		if (beg < end) {
1811 | 			/* verbatim copy to the working buffer,
1812 | 				escaping entities */
1813 | 			if (is_empty(data + beg, end - beg))
1814 | 				bufputc(work, '\n');
1815 | 			else bufput(work, data + beg, end - beg);
1816 | 		}
1817 | 		beg = end;
1818 | 	}
1819 | 
1820 | 	while (work->size && work->data[work->size - 1] == '\n')
1821 | 		work->size -= 1;
1822 | 
1823 | 	bufputc(work, '\n');
1824 | 
1825 | 	if (rndr->cb.blockcode)
1826 | 		rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1827 | 
1828 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1829 | 	return beg;
1830 | }
1831 | 
1832 | /* parse_listitem • parsing of a single list item */
1833 | /*	assuming initial prefix is already removed */
1834 | static size_t
1835 | parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1836 | {
1837 | 	struct buf *work = 0, *inter = 0;
1838 | 	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1839 | 	int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1840 | 
1841 | 	/* keeping track of the first indentation prefix */
1842 | 	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1843 | 		orgpre++;
1844 | 
1845 | 	beg = prefix_uli(data, size);
1846 | 	if (!beg)
1847 | 		beg = prefix_oli(data, size);
1848 | 
1849 | 	if (!beg)
1850 | 		return 0;
1851 | 
1852 | 	/* skipping to the beginning of the following line */
1853 | 	end = beg;
1854 | 	while (end < size && data[end - 1] != '\n')
1855 | 		end++;
1856 | 
1857 | 	/* getting working buffers */
1858 | 	work = rndr_newbuf(rndr, BUFFER_SPAN);
1859 | 	inter = rndr_newbuf(rndr, BUFFER_SPAN);
1860 | 
1861 | 	/* putting the first line into the working buffer */
1862 | 	bufput(work, data + beg, end - beg);
1863 | 	beg = end;
1864 | 
1865 | 	/* process the following lines */
1866 | 	while (beg < size) {
1867 | 		size_t has_next_uli = 0, has_next_oli = 0;
1868 | 
1869 | 		end++;
1870 | 
1871 | 		while (end < size && data[end - 1] != '\n')
1872 | 			end++;
1873 | 
1874 | 		/* process an empty line */
1875 | 		if (is_empty(data + beg, end - beg)) {
1876 | 			in_empty = 1;
1877 | 			beg = end;
1878 | 			continue;
1879 | 		}
1880 | 
1881 | 		/* calculating the indentation */
1882 | 		i = 0;
1883 | 		while (i < 4 && beg + i < end && data[beg + i] == ' ')
1884 | 			i++;
1885 | 
1886 | 		pre = i;
1887 | 
1888 | 		if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1889 | 			if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1890 | 				in_fence = !in_fence;
1891 | 		}
1892 | 
1893 | 		/* Only check for new list items if we are **not** inside
1894 | 		 * a fenced code block */
1895 | 		if (!in_fence) {
1896 | 			has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1897 | 			has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1898 | 		}
1899 | 
1900 | 		/* checking for ul/ol switch */
1901 | 		if (in_empty && (
1902 | 			((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1903 | 			(!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1904 | 			*flags |= MKD_LI_END;
1905 | 			break; /* the following item must have same list type */
1906 | 		}
1907 | 
1908 | 		/* checking for a new item */
1909 | 		if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1910 | 			if (in_empty)
1911 | 				has_inside_empty = 1;
1912 | 
1913 | 			if (pre == orgpre) /* the following item must have */
1914 | 				break;             /* the same indentation */
1915 | 
1916 | 			if (!sublist)
1917 | 				sublist = work->size;
1918 | 		}
1919 | 		/* joining only indented stuff after empty lines;
1920 | 		 * note that now we only require 1 space of indentation
1921 | 		 * to continue a list */
1922 | 		else if (in_empty && pre == 0) {
1923 | 			*flags |= MKD_LI_END;
1924 | 			break;
1925 | 		}
1926 | 		else if (in_empty) {
1927 | 			bufputc(work, '\n');
1928 | 			has_inside_empty = 1;
1929 | 		}
1930 | 
1931 | 		in_empty = 0;
1932 | 
1933 | 		/* adding the line without prefix into the working buffer */
1934 | 		bufput(work, data + beg + i, end - beg - i);
1935 | 		beg = end;
1936 | 	}
1937 | 
1938 | 	/* render of li contents */
1939 | 	if (has_inside_empty)
1940 | 		*flags |= MKD_LI_BLOCK;
1941 | 
1942 | 	if (*flags & MKD_LI_BLOCK) {
1943 | 		/* intermediate render of block li */
1944 | 		if (sublist && sublist < work->size) {
1945 | 			parse_block(inter, rndr, work->data, sublist);
1946 | 			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1947 | 		}
1948 | 		else
1949 | 			parse_block(inter, rndr, work->data, work->size);
1950 | 	} else {
1951 | 		/* intermediate render of inline li */
1952 | 		if (sublist && sublist < work->size) {
1953 | 			parse_inline(inter, rndr, work->data, sublist);
1954 | 			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1955 | 		}
1956 | 		else
1957 | 			parse_inline(inter, rndr, work->data, work->size);
1958 | 	}
1959 | 
1960 | 	/* render of li itself */
1961 | 	if (rndr->cb.listitem)
1962 | 		rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1963 | 
1964 | 	rndr_popbuf(rndr, BUFFER_SPAN);
1965 | 	rndr_popbuf(rndr, BUFFER_SPAN);
1966 | 	return beg;
1967 | }
1968 | 
1969 | 
1970 | /* parse_list • parsing ordered or unordered list block */
1971 | static size_t
1972 | parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1973 | {
1974 | 	struct buf *work = 0;
1975 | 	size_t i = 0, j;
1976 | 
1977 | 	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1978 | 
1979 | 	while (i < size) {
1980 | 		j = parse_listitem(work, rndr, data + i, size - i, &flags);
1981 | 		i += j;
1982 | 
1983 | 		if (!j || (flags & MKD_LI_END))
1984 | 			break;
1985 | 	}
1986 | 
1987 | 	if (rndr->cb.list)
1988 | 		rndr->cb.list(ob, work, flags, rndr->opaque);
1989 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1990 | 	return i;
1991 | }
1992 | 
1993 | /* parse_atxheader • parsing of atx-style headers */
1994 | static size_t
1995 | parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1996 | {
1997 | 	size_t level = 0;
1998 | 	size_t i, end, skip;
1999 | 
2000 | 	while (level < size && level < 6 && data[level] == '#')
2001 | 		level++;
2002 | 
2003 | 	for (i = level; i < size && data[i] == ' '; i++);
2004 | 
2005 | 	for (end = i; end < size && data[end] != '\n'; end++);
2006 | 	skip = end;
2007 | 
2008 | 	while (end && data[end - 1] == '#')
2009 | 		end--;
2010 | 
2011 | 	while (end && data[end - 1] == ' ')
2012 | 		end--;
2013 | 
2014 | 	if (end > i) {
2015 | 		struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
2016 | 
2017 | 		parse_inline(work, rndr, data + i, end - i);
2018 | 
2019 | 		if (rndr->cb.header)
2020 | 			rndr->cb.header(ob, work, (int)level, rndr->opaque);
2021 | 
2022 | 		rndr_popbuf(rndr, BUFFER_SPAN);
2023 | 	}
2024 | 
2025 | 	return skip;
2026 | }
2027 | 
2028 | 
2029 | /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
2030 | /*	returns the length on match, 0 otherwise */
2031 | static size_t
2032 | htmlblock_end_tag(
2033 | 	const char *tag,
2034 | 	size_t tag_len,
2035 | 	struct sd_markdown *rndr,
2036 | 	uint8_t *data,
2037 | 	size_t size)
2038 | {
2039 | 	size_t i, w;
2040 | 
2041 | 	/* checking if tag is a match */
2042 | 	if (tag_len + 3 >= size ||
2043 | 		strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
2044 | 		data[tag_len + 2] != '>')
2045 | 		return 0;
2046 | 
2047 | 	/* checking white lines */
2048 | 	i = tag_len + 3;
2049 | 	w = 0;
2050 | 	if (i < size && (w = is_empty(data + i, size - i)) == 0)
2051 | 		return 0; /* non-blank after tag */
2052 | 	i += w;
2053 | 	w = 0;
2054 | 
2055 | 	if (i < size)
2056 | 		w = is_empty(data + i, size - i);
2057 | 
2058 | 	return i + w;
2059 | }
2060 | 
2061 | static size_t
2062 | htmlblock_end(const char *curtag,
2063 | 	struct sd_markdown *rndr,
2064 | 	uint8_t *data,
2065 | 	size_t size,
2066 | 	int start_of_line)
2067 | {
2068 | 	size_t tag_size = strlen(curtag);
2069 | 	size_t i = 1, end_tag;
2070 | 	int block_lines = 0;
2071 | 
2072 | 	while (i < size) {
2073 | 		i++;
2074 | 		while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
2075 | 			if (data[i] == '\n')
2076 | 				block_lines++;
2077 | 
2078 | 			i++;
2079 | 		}
2080 | 
2081 | 		/* If we are only looking for unindented tags, skip the tag
2082 | 		 * if it doesn't follow a newline.
2083 | 		 *
2084 | 		 * The only exception to this is if the tag is still on the
2085 | 		 * initial line; in that case it still counts as a closing
2086 | 		 * tag
2087 | 		 */
2088 | 		if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
2089 | 			continue;
2090 | 
2091 | 		if (i + 2 + tag_size >= size)
2092 | 			break;
2093 | 
2094 | 		end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
2095 | 		if (end_tag)
2096 | 			return i + end_tag - 1;
2097 | 	}
2098 | 
2099 | 	return 0;
2100 | }
2101 | 
2102 | 
2103 | /* parse_htmlblock • parsing of inline HTML block */
2104 | static size_t
2105 | parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
2106 | {
2107 | 	size_t i, j = 0, tag_end;
2108 | 	const char *curtag = NULL;
2109 | 	struct buf work = { data, 0, 0, 0 };
2110 | 
2111 | 	/* identification of the opening tag */
2112 | 	if (size < 2 || data[0] != '<')
2113 | 		return 0;
2114 | 
2115 | 	i = 1;
2116 | 	while (i < size && data[i] != '>' && data[i] != ' ')
2117 | 		i++;
2118 | 
2119 | 	if (i < size)
2120 | 		curtag = find_block_tag((char *)data + 1, (int)i - 1);
2121 | 
2122 | 	/* handling of special cases */
2123 | 	if (!curtag) {
2124 | 
2125 | 		/* HTML comment, laxist form */
2126 | 		if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
2127 | 			i = 5;
2128 | 
2129 | 			while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
2130 | 				i++;
2131 | 
2132 | 			i++;
2133 | 
2134 | 			if (i < size)
2135 | 				j = is_empty(data + i, size - i);
2136 | 
2137 | 			if (j) {
2138 | 				work.size = i + j;
2139 | 				if (do_render && rndr->cb.blockhtml)
2140 | 					rndr->cb.blockhtml(ob, &work, rndr->opaque);
2141 | 				return work.size;
2142 | 			}
2143 | 		}
2144 | 
2145 | 		/* HR, which is the only self-closing block tag considered */
2146 | 		if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
2147 | 			i = 3;
2148 | 			while (i < size && data[i] != '>')
2149 | 				i++;
2150 | 
2151 | 			if (i + 1 < size) {
2152 | 				i++;
2153 | 				j = is_empty(data + i, size - i);
2154 | 				if (j) {
2155 | 					work.size = i + j;
2156 | 					if (do_render && rndr->cb.blockhtml)
2157 | 						rndr->cb.blockhtml(ob, &work, rndr->opaque);
2158 | 					return work.size;
2159 | 				}
2160 | 			}
2161 | 		}
2162 | 
2163 | 		/* no special case recognised */
2164 | 		return 0;
2165 | 	}
2166 | 
2167 | 	/* looking for an unindented matching closing tag */
2168 | 	/*	followed by a blank line */
2169 | 	tag_end = htmlblock_end(curtag, rndr, data, size, 1);
2170 | 
2171 | 	/* if not found, trying a second pass looking for indented match */
2172 | 	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
2173 | 	if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
2174 | 		tag_end = htmlblock_end(curtag, rndr, data, size, 0);
2175 | 	}
2176 | 
2177 | 	if (!tag_end)
2178 | 		return 0;
2179 | 
2180 | 	/* the end of the block has been found */
2181 | 	work.size = tag_end;
2182 | 	if (do_render && rndr->cb.blockhtml)
2183 | 		rndr->cb.blockhtml(ob, &work, rndr->opaque);
2184 | 
2185 | 	return tag_end;
2186 | }
2187 | 
2188 | static void
2189 | parse_table_row(
2190 | 	struct buf *ob,
2191 | 	struct sd_markdown *rndr,
2192 | 	uint8_t *data,
2193 | 	size_t size,
2194 | 	size_t columns,
2195 | 	int *col_data,
2196 | 	int header_flag)
2197 | {
2198 | 	size_t i = 0, col, cols_left;
2199 | 	struct buf *row_work = 0;
2200 | 
2201 | 	if (!rndr->cb.table_cell || !rndr->cb.table_row)
2202 | 		return;
2203 | 
2204 | 	row_work = rndr_newbuf(rndr, BUFFER_SPAN);
2205 | 
2206 | 	if (i < size && data[i] == '|')
2207 | 		i++;
2208 | 
2209 | 	for (col = 0; col < columns && i < size; ++col) {
2210 | 		size_t cell_start, cell_end;
2211 | 		struct buf *cell_work;
2212 | 
2213 | 		cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2214 | 
2215 | 		while (i < size && _isspace(data[i]))
2216 | 			i++;
2217 | 
2218 | 		cell_start = i;
2219 | 
2220 | 		while (i < size && data[i] != '|')
2221 | 			i++;
2222 | 
2223 | 		cell_end = i - 1;
2224 | 
2225 | 		while (cell_end > cell_start && _isspace(data[cell_end]))
2226 | 			cell_end--;
2227 | 
2228 | 		parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2229 | 		rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque, 0);
2230 | 
2231 | 		rndr_popbuf(rndr, BUFFER_SPAN);
2232 | 		i++;
2233 | 	}
2234 | 
2235 | 	cols_left = columns - col;
2236 | 	if (cols_left > 0) {
2237 | 		struct buf empty_cell = { 0, 0, 0, 0 };
2238 | 		rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque, cols_left);
2239 | 	}
2240 | 
2241 | 	rndr->cb.table_row(ob, row_work, rndr->opaque);
2242 | 
2243 | 	rndr_popbuf(rndr, BUFFER_SPAN);
2244 | }
2245 | 
2246 | static size_t
2247 | parse_table_header(
2248 | 	struct buf *ob,
2249 | 	struct sd_markdown *rndr,
2250 | 	uint8_t *data,
2251 | 	size_t size,
2252 | 	size_t *columns,
2253 | 	int **column_data)
2254 | {
2255 | 	int pipes;
2256 | 	size_t i = 0, col, header_end, under_end;
2257 | 
2258 | 	pipes = 0;
2259 | 	while (i < size && data[i] != '\n')
2260 | 		if (data[i++] == '|')
2261 | 			pipes++;
2262 | 
2263 | 	if (i == size || pipes == 0)
2264 | 		return 0;
2265 | 
2266 | 	header_end = i;
2267 | 
2268 | 	while (header_end > 0 && _isspace(data[header_end - 1]))
2269 | 		header_end--;
2270 | 
2271 | 	if (data[0] == '|')
2272 | 		pipes--;
2273 | 
2274 | 	if (header_end && data[header_end - 1] == '|')
2275 | 		pipes--;
2276 | 
2277 | 	if (pipes + 1 > rndr->max_table_cols)
2278 | 		return 0;
2279 | 
2280 | 	*columns = pipes + 1;
2281 | 	*column_data = calloc(*columns, sizeof(int));
2282 | 
2283 | 	/* Parse the header underline */
2284 | 	i++;
2285 | 	if (i < size && data[i] == '|')
2286 | 		i++;
2287 | 
2288 | 	under_end = i;
2289 | 	while (under_end < size && data[under_end] != '\n')
2290 | 		under_end++;
2291 | 
2292 | 	for (col = 0; col < *columns && i < under_end; ++col) {
2293 | 		size_t dashes = 0;
2294 | 
2295 | 		while (i < under_end && data[i] == ' ')
2296 | 			i++;
2297 | 
2298 | 		if (data[i] == ':') {
2299 | 			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2300 | 			dashes++;
2301 | 		}
2302 | 
2303 | 		while (i < under_end && data[i] == '-') {
2304 | 			i++; dashes++;
2305 | 		}
2306 | 
2307 | 		if (i < under_end && data[i] == ':') {
2308 | 			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2309 | 			dashes++;
2310 | 		}
2311 | 
2312 | 		while (i < under_end && data[i] == ' ')
2313 | 			i++;
2314 | 
2315 | 		if (i < under_end && data[i] != '|')
2316 | 			break;
2317 | 
2318 | 		if (dashes < 1)
2319 | 			break;
2320 | 
2321 | 		i++;
2322 | 	}
2323 | 
2324 | 	if (col < *columns)
2325 | 		return 0;
2326 | 
2327 | 	parse_table_row(
2328 | 		ob, rndr, data,
2329 | 		header_end,
2330 | 		*columns,
2331 | 		*column_data,
2332 | 		MKD_TABLE_HEADER
2333 | 	);
2334 | 
2335 | 	return under_end + 1;
2336 | }
2337 | 
2338 | static size_t
2339 | parse_table(
2340 | 	struct buf *ob,
2341 | 	struct sd_markdown *rndr,
2342 | 	uint8_t *data,
2343 | 	size_t size)
2344 | {
2345 | 	size_t i;
2346 | 
2347 | 	struct buf *header_work = 0;
2348 | 	struct buf *body_work = 0;
2349 | 
2350 | 	size_t columns;
2351 | 	int *col_data = NULL;
2352 | 
2353 | 	header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2354 | 	body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2355 | 
2356 | 	i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2357 | 	if (i > 0) {
2358 | 
2359 | 		while (i < size) {
2360 | 			size_t row_start;
2361 | 			int pipes = 0;
2362 | 
2363 | 			row_start = i;
2364 | 
2365 | 			while (i < size && data[i] != '\n')
2366 | 				if (data[i++] == '|')
2367 | 					pipes++;
2368 | 
2369 | 			if (pipes == 0 || i == size) {
2370 | 				i = row_start;
2371 | 				break;
2372 | 			}
2373 | 
2374 | 			parse_table_row(
2375 | 				body_work,
2376 | 				rndr,
2377 | 				data + row_start,
2378 | 				i - row_start,
2379 | 				columns,
2380 | 				col_data, 0
2381 | 			);
2382 | 
2383 | 			i++;
2384 | 		}
2385 | 
2386 | 		if (rndr->cb.table)
2387 | 			rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2388 | 	}
2389 | 
2390 | 	free(col_data);
2391 | 	rndr_popbuf(rndr, BUFFER_SPAN);
2392 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
2393 | 	return i;
2394 | }
2395 | 
2396 | /* parse_block • parsing of one block, returning next uint8_t to parse */
2397 | static void
2398 | parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2399 | {
2400 | 	size_t beg, end, i;
2401 | 	uint8_t *txt_data;
2402 | 	beg = 0;
2403 | 
2404 | 	if (rndr->work_bufs[BUFFER_SPAN].size +
2405 | 		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
2406 | 		return;
2407 | 
2408 | 	while (beg < size) {
2409 | 		txt_data = data + beg;
2410 | 		end = size - beg;
2411 | 
2412 | 		if (is_atxheader(rndr, txt_data, end))
2413 | 			beg += parse_atxheader(ob, rndr, txt_data, end);
2414 | 
2415 | 		else if (data[beg] == '<' && rndr->cb.blockhtml &&
2416 | 				(i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
2417 | 			beg += i;
2418 | 
2419 | 		else if ((i = is_empty(txt_data, end)) != 0)
2420 | 			beg += i;
2421 | 
2422 | 		else if (is_hrule(txt_data, end)) {
2423 | 			if (rndr->cb.hrule)
2424 | 				rndr->cb.hrule(ob, rndr->opaque);
2425 | 
2426 | 			while (beg < size && data[beg] != '\n')
2427 | 				beg++;
2428 | 
2429 | 			beg++;
2430 | 		}
2431 | 
2432 | 		else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
2433 | 			(i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
2434 | 			beg += i;
2435 | 
2436 | 		else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
2437 | 			(i = parse_table(ob, rndr, txt_data, end)) != 0)
2438 | 			beg += i;
2439 | 
2440 | 		else if (prefix_quote(txt_data, end))
2441 | 			beg += parse_blockquote(ob, rndr, txt_data, end);
2442 | 
2443 | 		else if (prefix_blockspoiler(txt_data, end))
2444 | 			beg += parse_blockspoiler(ob, rndr, txt_data, end);
2445 | 
2446 | 		else if (prefix_code(txt_data, end))
2447 | 			beg += parse_blockcode(ob, rndr, txt_data, end);
2448 | 
2449 | 		else if (prefix_uli(txt_data, end))
2450 | 			beg += parse_list(ob, rndr, txt_data, end, 0);
2451 | 
2452 | 		else if (prefix_oli(txt_data, end))
2453 | 			beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
2454 | 
2455 | 		else
2456 | 			beg += parse_paragraph(ob, rndr, txt_data, end);
2457 | 	}
2458 | }
2459 | 
2460 | 
2461 | 
2462 | /*********************
2463 |  * REFERENCE PARSING *
2464 |  *********************/
2465 | 
2466 | /* is_ref • returns whether a line is a reference or not */
2467 | static int
2468 | is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2469 | {
2470 | /*	int n; */
2471 | 	size_t i = 0;
2472 | 	size_t id_offset, id_end;
2473 | 	size_t link_offset, link_end;
2474 | 	size_t title_offset, title_end;
2475 | 	size_t line_end;
2476 | 
2477 | 	/* up to 3 optional leading spaces */
2478 | 	if (beg + 3 >= end) return 0;
2479 | 	if (data[beg] == ' ') { i = 1;
2480 | 	if (data[beg + 1] == ' ') { i = 2;
2481 | 	if (data[beg + 2] == ' ') { i = 3;
2482 | 	if (data[beg + 3] == ' ') return 0; } } }
2483 | 	i += beg;
2484 | 
2485 | 	/* id part: anything but a newline between brackets */
2486 | 	if (data[i] != '[') return 0;
2487 | 	i++;
2488 | 	id_offset = i;
2489 | 	while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2490 | 		i++;
2491 | 	if (i >= end || data[i] != ']') return 0;
2492 | 	id_end = i;
2493 | 
2494 | 	/* spacer: colon (space | tab)* newline? (space | tab)* */
2495 | 	i++;
2496 | 	if (i >= end || data[i] != ':') return 0;
2497 | 	i++;
2498 | 	while (i < end && data[i] == ' ') i++;
2499 | 	if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2500 | 		i++;
2501 | 		if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2502 | 	while (i < end && data[i] == ' ') i++;
2503 | 	if (i >= end) return 0;
2504 | 
2505 | 	/* link: whitespace-free sequence, optionally between angle brackets */
2506 | 	if (data[i] == '<')
2507 | 		i++;
2508 | 
2509 | 	link_offset = i;
2510 | 
2511 | 	while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2512 | 		i++;
2513 | 
2514 | 	if (data[i - 1] == '>') link_end = i - 1;
2515 | 	else link_end = i;
2516 | 
2517 | 	/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2518 | 	while (i < end && data[i] == ' ') i++;
2519 | 	if (i < end && data[i] != '\n' && data[i] != '\r'
2520 | 			&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
2521 | 		return 0;
2522 | 	line_end = 0;
2523 | 	/* computing end-of-line */
2524 | 	if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2525 | 	if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2526 | 		line_end = i + 1;
2527 | 
2528 | 	/* optional (space|tab)* spacer after a newline */
2529 | 	if (line_end) {
2530 | 		i = line_end + 1;
2531 | 		while (i < end && data[i] == ' ') i++; }
2532 | 
2533 | 	/* optional title: any non-newline sequence enclosed in '"()
2534 | 					alone on its line */
2535 | 	title_offset = title_end = 0;
2536 | 	if (i + 1 < end
2537 | 	&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2538 | 		i++;
2539 | 		title_offset = i;
2540 | 		/* looking for EOL */
2541 | 		while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2542 | 		if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2543 | 			title_end = i + 1;
2544 | 		else	title_end = i;
2545 | 		/* stepping back */
2546 | 		i -= 1;
2547 | 		while (i > title_offset && data[i] == ' ')
2548 | 			i -= 1;
2549 | 		if (i > title_offset
2550 | 		&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2551 | 			line_end = title_end;
2552 | 			title_end = i; } }
2553 | 
2554 | 	if (!line_end || link_end == link_offset)
2555 | 		return 0; /* garbage after the link empty link */
2556 | 
2557 | 	/* a valid ref has been found, filling-in return structures */
2558 | 	if (last)
2559 | 		*last = line_end;
2560 | 
2561 | 	if (refs) {
2562 | 		struct link_ref *ref;
2563 | 
2564 | 		ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2565 | 		if (!ref)
2566 | 			return 0;
2567 | 
2568 | 		ref->link = bufnew(link_end - link_offset);
2569 | 		bufput(ref->link, data + link_offset, link_end - link_offset);
2570 | 
2571 | 		if (title_end > title_offset) {
2572 | 			ref->title = bufnew(title_end - title_offset);
2573 | 			bufput(ref->title, data + title_offset, title_end - title_offset);
2574 | 		}
2575 | 	}
2576 | 
2577 | 	return 1;
2578 | }
2579 | 
2580 | static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2581 | {
2582 | 	size_t  i = 0, tab = 0;
2583 | 
2584 | 	while (i < size) {
2585 | 		size_t org = i;
2586 | 
2587 | 		while (i < size && line[i] != '\t') {
2588 | 			i++; tab++;
2589 | 		}
2590 | 
2591 | 		if (i > org)
2592 | 			bufput(ob, line + org, i - org);
2593 | 
2594 | 		if (i >= size)
2595 | 			break;
2596 | 
2597 | 		do {
2598 | 			bufputc(ob, ' '); tab++;
2599 | 		} while (tab % 4);
2600 | 
2601 | 		i++;
2602 | 	}
2603 | }
2604 | 
2605 | /**********************
2606 |  * EXPORTED FUNCTIONS *
2607 |  **********************/
2608 | 
2609 | struct sd_markdown *
2610 | sd_markdown_new(
2611 | 	unsigned int extensions,
2612 | 	size_t max_nesting,
2613 | 	size_t max_table_cols,
2614 | 	const struct sd_callbacks *callbacks,
2615 | 	void *opaque)
2616 | {
2617 | 	struct sd_markdown *md = NULL;
2618 | 
2619 | 	assert(max_nesting > 0 && max_table_cols > 0 && callbacks);
2620 | 
2621 | 	md = malloc(sizeof(struct sd_markdown));
2622 | 	if (!md)
2623 | 		return NULL;
2624 | 
2625 | 	memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2626 | 
2627 | 	stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2628 | 	stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2629 | 
2630 | 	memset(md->active_char, 0x0, 256);
2631 | 
2632 | 	if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2633 | 		md->active_char['*'] = MD_CHAR_EMPHASIS;
2634 | 		md->active_char['_'] = MD_CHAR_EMPHASIS;
2635 | 		md->active_char['>'] = MD_CHAR_EMPHASIS;
2636 | 		if (extensions & MKDEXT_STRIKETHROUGH)
2637 | 			md->active_char['~'] = MD_CHAR_EMPHASIS;
2638 | 	}
2639 | 
2640 | 	if (md->cb.codespan)
2641 | 		md->active_char['`'] = MD_CHAR_CODESPAN;
2642 | 
2643 | 	if (md->cb.linebreak)
2644 | 		md->active_char['\n'] = MD_CHAR_LINEBREAK;
2645 | 
2646 | 	if (md->cb.image || md->cb.link)
2647 | 		md->active_char['['] = MD_CHAR_LINK;
2648 | 
2649 | 	md->active_char['<'] = MD_CHAR_LANGLE;
2650 | 	md->active_char['\\'] = MD_CHAR_ESCAPE;
2651 | 	md->active_char['&'] = MD_CHAR_ENTITITY;
2652 | 
2653 | 	if (extensions & MKDEXT_AUTOLINK) {
2654 | 		if (!(extensions & MKDEXT_NO_EMAIL_AUTOLINK))
2655 | 			md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2656 | 		md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2657 | 		md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2658 | 		md->active_char['/'] = MD_CHAR_AUTOLINK_SUBREDDIT_OR_USERNAME;
2659 | 	}
2660 | 
2661 | 	if (extensions & MKDEXT_SUPERSCRIPT)
2662 | 		md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2663 | 
2664 | 	/* Extension data */
2665 | 	md->ext_flags = extensions;
2666 | 	md->opaque = opaque;
2667 | 	md->max_nesting = max_nesting;
2668 | 	md->max_table_cols = max_table_cols;
2669 | 	md->in_link_body = 0;
2670 | 
2671 | 	return md;
2672 | }
2673 | 
2674 | void
2675 | sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2676 | {
2677 | #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
2678 | 	static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2679 | 
2680 | 	struct buf *text;
2681 | 	size_t beg, end;
2682 | 
2683 | 	text = bufnew(64);
2684 | 	if (!text)
2685 | 		return;
2686 | 
2687 | 	/* Preallocate enough space for our buffer to avoid expanding while copying */
2688 | 	bufgrow(text, doc_size);
2689 | 
2690 | 	/* reset the references table */
2691 | 	memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2692 | 
2693 | 	/* first pass: looking for references, copying everything else */
2694 | 	beg = 0;
2695 | 
2696 | 	/* Skip a possible UTF-8 BOM, even though the Unicode standard
2697 | 	 * discourages having these in UTF-8 documents */
2698 | 	if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2699 | 		beg += 3;
2700 | 
2701 | 	while (beg < doc_size) /* iterating over lines */
2702 | 		if (is_ref(document, beg, doc_size, &end, md->refs))
2703 | 			beg = end;
2704 | 		else { /* skipping to the next line */
2705 | 			end = beg;
2706 | 			while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2707 | 				end++;
2708 | 
2709 | 			/* adding the line body if present */
2710 | 			if (end > beg)
2711 | 				expand_tabs(text, document + beg, end - beg);
2712 | 
2713 | 			while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2714 | 				/* add one \n per newline */
2715 | 				if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2716 | 					bufputc(text, '\n');
2717 | 				end++;
2718 | 			}
2719 | 
2720 | 			beg = end;
2721 | 		}
2722 | 
2723 | 	/* pre-grow the output buffer to minimize allocations */
2724 | 	bufgrow(ob, MARKDOWN_GROW(text->size));
2725 | 
2726 | 	/* second pass: actual rendering */
2727 | 	if (md->cb.doc_header)
2728 | 		md->cb.doc_header(ob, md->opaque);
2729 | 
2730 | 	if (text->size) {
2731 | 		/* adding a final newline if not already present */
2732 | 		if (text->data[text->size - 1] != '\n' &&  text->data[text->size - 1] != '\r')
2733 | 			bufputc(text, '\n');
2734 | 
2735 | 		parse_block(ob, md, text->data, text->size);
2736 | 	}
2737 | 
2738 | 	if (md->cb.doc_footer)
2739 | 		md->cb.doc_footer(ob, md->opaque);
2740 | 
2741 | 	/* clean-up */
2742 | 	bufrelease(text);
2743 | 	free_link_refs(md->refs);
2744 | 
2745 | 	assert(md->work_bufs[BUFFER_SPAN].size == 0);
2746 | 	assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2747 | }
2748 | 
2749 | void
2750 | sd_markdown_free(struct sd_markdown *md)
2751 | {
2752 | 	size_t i;
2753 | 
2754 | 	for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2755 | 		bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2756 | 
2757 | 	for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2758 | 		bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2759 | 
2760 | 	stack_free(&md->work_bufs[BUFFER_SPAN]);
2761 | 	stack_free(&md->work_bufs[BUFFER_BLOCK]);
2762 | 
2763 | 	free(md);
2764 | }
2765 | 
2766 | void
2767 | sd_version(int *ver_major, int *ver_minor, int *ver_revision)
2768 | {
2769 | 	*ver_major = SUNDOWN_VER_MAJOR;
2770 | 	*ver_minor = SUNDOWN_VER_MINOR;
2771 | 	*ver_revision = SUNDOWN_VER_REVISION;
2772 | }
2773 | 
2774 | /* vim: set filetype=c: */
2775 | 


--------------------------------------------------------------------------------