├── .gitignore
├── html_block_names.txt
├── sundown.def
├── CONTRIBUTING.md
├── src
    ├── stack.h
    ├── stack.c
    ├── autolink.h
    ├── buffer.h
    ├── markdown.h
    ├── buffer.c
    ├── autolink.c
    ├── html_blocks.h
    └── markdown.c
├── Makefile.win
├── html
    ├── houdini.h
    ├── html.h
    ├── houdini_html_e.c
    ├── houdini_href_e.c
    ├── html_smartypants.c
    └── html.c
├── examples
    ├── smartypants.c
    └── sundown.c
├── Makefile
└── README.markdown


/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | libsundown.so*
3 | sundown
4 | smartypants
5 | *.exe
6 | 


--------------------------------------------------------------------------------
/html_block_names.txt:
--------------------------------------------------------------------------------
 1 | ##
 2 | p
 3 | dl
 4 | h1
 5 | h2
 6 | h3
 7 | h4
 8 | h5
 9 | h6
10 | ol
11 | ul
12 | del
13 | div
14 | ins
15 | pre
16 | form
17 | math
18 | table
19 | figure
20 | iframe
21 | script
22 | style
23 | fieldset
24 | noscript
25 | blockquote
26 | 


--------------------------------------------------------------------------------
/sundown.def:
--------------------------------------------------------------------------------
 1 | LIBRARY SUNDOWN
 2 | EXPORTS
 3 | 	sdhtml_renderer
 4 | 	sdhtml_toc_renderer
 5 | 	sdhtml_smartypants
 6 | 	bufgrow
 7 | 	bufnew
 8 | 	bufcstr
 9 | 	bufprefix
10 | 	bufput 
11 | 	bufputs
12 | 	bufputc
13 | 	bufrelease
14 | 	bufreset
15 | 	bufslurp
16 | 	bufprintf
17 | 	sd_markdown_new
18 | 	sd_markdown_render
19 | 	sd_markdown_free
20 | 	sd_version


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributing to Sundown
 2 | =======================
 3 | 
 4 | Do not.
 5 | 
 6 | Unfortunately, Sundown is currently frozen as we're working with the Reddit, StackOverflow and Meteor developers to design a formal Markdown standard and parser that will supersede Sundown in all these websites (and in GitHub, of course). Our goal is to deprecate Sundown altogether before the end of the year.
 7 | 
 8 | The new parser will be smaller, faster, safer and most importantly, more consistent.
 9 | 
10 | Please stay tuned.


--------------------------------------------------------------------------------
/src/stack.h:
--------------------------------------------------------------------------------
 1 | #ifndef STACK_H__
 2 | #define STACK_H__
 3 | 
 4 | #include <stdlib.h>
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | struct stack {
11 | 	void **item;
12 | 	size_t size;
13 | 	size_t asize;
14 | };
15 | 
16 | void stack_free(struct stack *);
17 | int stack_grow(struct stack *, size_t);
18 | int stack_init(struct stack *, size_t);
19 | 
20 | int stack_push(struct stack *, void *);
21 | 
22 | void *stack_pop(struct stack *);
23 | void *stack_top(struct stack *);
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/Makefile.win:
--------------------------------------------------------------------------------
 1 | 
 2 | CFLAGS=/O2 /EHsc /I"src/" /I"examples"/ /I"html"/
 3 | CC=cl
 4 | 
 5 | SUNDOWN_SRC=\
 6 | 	src\markdown.obj \
 7 | 	src\stack.obj \
 8 | 	src\buffer.obj \
 9 | 	src\autolink.obj \
10 | 	html\html.obj \
11 | 	html\html_smartypants.obj \
12 | 	html\houdini_html_e.obj \
13 | 	html\houdini_href_e.obj
14 | 
15 | all: sundown.dll sundown.exe
16 | 
17 | sundown.dll: $(SUNDOWN_SRC) sundown.def
18 | 	$(CC) $(SUNDOWN_SRC) sundown.def /link /DLL $(LDFLAGS) /out:$@
19 | 
20 | sundown.exe: examples\sundown.obj $(SUNDOWN_SRC)
21 | 	$(CC) examples\sundown.obj $(SUNDOWN_SRC) /link $(LDFLAGS) /out:$@
22 | 
23 | # housekeeping
24 | clean:
25 | 	del $(SUNDOWN_SRC)
26 | 	del sundown.dll sundown.exe
27 | 	del sundown.exp sundown.lib
28 | 
29 | # generic object compilations
30 | 
31 | .c.obj:
32 | 	$(CC) $(CFLAGS) /c $< /Fo$@
33 | 
34 | 


--------------------------------------------------------------------------------
/src/stack.c:
--------------------------------------------------------------------------------
 1 | #include "stack.h"
 2 | #include <string.h>
 3 | 
 4 | int
 5 | stack_grow(struct stack *st, size_t new_size)
 6 | {
 7 | 	void **new_st;
 8 | 
 9 | 	if (st->asize >= new_size)
10 | 		return 0;
11 | 
12 | 	new_st = realloc(st->item, new_size * sizeof(void *));
13 | 	if (new_st == NULL)
14 | 		return -1;
15 | 
16 | 	memset(new_st + st->asize, 0x0,
17 | 		(new_size - st->asize) * sizeof(void *));
18 | 
19 | 	st->item = new_st;
20 | 	st->asize = new_size;
21 | 
22 | 	if (st->size > new_size)
23 | 		st->size = new_size;
24 | 
25 | 	return 0;
26 | }
27 | 
28 | void
29 | stack_free(struct stack *st)
30 | {
31 | 	if (!st)
32 | 		return;
33 | 
34 | 	free(st->item);
35 | 
36 | 	st->item = NULL;
37 | 	st->size = 0;
38 | 	st->asize = 0;
39 | }
40 | 
41 | int
42 | stack_init(struct stack *st, size_t initial_size)
43 | {
44 | 	st->item = NULL;
45 | 	st->size = 0;
46 | 	st->asize = 0;
47 | 
48 | 	if (!initial_size)
49 | 		initial_size = 8;
50 | 
51 | 	return stack_grow(st, initial_size);
52 | }
53 | 
54 | void *
55 | stack_pop(struct stack *st)
56 | {
57 | 	if (!st->size)
58 | 		return NULL;
59 | 
60 | 	return st->item[--st->size];
61 | }
62 | 
63 | int
64 | stack_push(struct stack *st, void *item)
65 | {
66 | 	if (stack_grow(st, st->size * 2) < 0)
67 | 		return -1;
68 | 
69 | 	st->item[st->size++] = item;
70 | 	return 0;
71 | }
72 | 
73 | void *
74 | stack_top(struct stack *st)
75 | {
76 | 	if (!st->size)
77 | 		return NULL;
78 | 
79 | 	return st->item[st->size - 1];
80 | }
81 | 
82 | 


--------------------------------------------------------------------------------
/html/houdini.h:
--------------------------------------------------------------------------------
 1 | #ifndef HOUDINI_H__
 2 | #define HOUDINI_H__
 3 | 
 4 | #include "buffer.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | #ifdef HOUDINI_USE_LOCALE
11 | #	define _isxdigit(c) isxdigit(c)
12 | #	define _isdigit(c) isdigit(c)
13 | #else
14 | /*
15 |  * Helper _isdigit methods -- do not trust the current locale
16 |  * */
17 | #	define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
18 | #	define _isdigit(c) ((c) >= '0' && (c) <= '9')
19 | #endif
20 | 
21 | extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
22 | extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
23 | extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
24 | extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
25 | extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
26 | extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
27 | extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
28 | extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
29 | extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
30 | extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
31 | extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
32 | 
33 | #ifdef __cplusplus
34 | }
35 | #endif
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/src/autolink.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Vicent Marti
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef UPSKIRT_AUTOLINK_H
18 | #define UPSKIRT_AUTOLINK_H
19 | 
20 | #include "buffer.h"
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | enum {
27 | 	SD_AUTOLINK_SHORT_DOMAINS = (1 << 0),
28 | };
29 | 
30 | int
31 | sd_autolink_issafe(const uint8_t *link, size_t link_len);
32 | 
33 | size_t
34 | sd_autolink__www(size_t *rewind_p, struct buf *link,
35 | 	uint8_t *data, size_t offset, size_t size, unsigned int flags);
36 | 
37 | size_t
38 | sd_autolink__email(size_t *rewind_p, struct buf *link,
39 | 	uint8_t *data, size_t offset, size_t size, unsigned int flags);
40 | 
41 | size_t
42 | sd_autolink__url(size_t *rewind_p, struct buf *link,
43 | 	uint8_t *data, size_t offset, size_t size, unsigned int flags);
44 | 
45 | #ifdef __cplusplus
46 | }
47 | #endif
48 | 
49 | #endif
50 | 
51 | /* vim: set filetype=c: */
52 | 


--------------------------------------------------------------------------------
/examples/smartypants.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Vicent Marti
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #include "markdown.h"
18 | #include "html.h"
19 | #include "buffer.h"
20 | 
21 | #include <errno.h>
22 | #include <getopt.h>
23 | #include <stdio.h>
24 | #include <stdlib.h>
25 | #include <string.h>
26 | 
27 | #define READ_UNIT 1024
28 | #define OUTPUT_UNIT 64
29 | 
30 | int
31 | main(int argc, char **argv)
32 | {
33 | 	struct buf *ib, *ob;
34 | 	size_t ret;
35 | 	FILE *in = stdin;
36 | 
37 | 	/* opening the file if given from the command line */
38 | 	if (argc > 1) {
39 | 		in = fopen(argv[1], "r");
40 | 		if (!in) {
41 | 			fprintf(stderr, "Unable to open input file \"%s\": %s\n", argv[0], strerror(errno));
42 | 			return 1;
43 | 		}
44 | 	}
45 | 
46 | 	/* reading everything */
47 | 	ib = bufnew(READ_UNIT);
48 | 	bufgrow(ib, READ_UNIT);
49 | 	while ((ret = fread(ib->data + ib->size, 1, ib->asize - ib->size, in)) > 0) {
50 | 		ib->size += ret;
51 | 		bufgrow(ib, ib->size + READ_UNIT);
52 | 	}
53 | 
54 | 	if (in != stdin)
55 | 		fclose(in);
56 | 
57 | 	/* performing markdown parsing */
58 | 	ob = bufnew(OUTPUT_UNIT);
59 | 
60 | 	sdhtml_smartypants(ob, ib->data, ib->size);
61 | 
62 | 	/* writing the result to stdout */
63 | 	(void)fwrite(ob->data, 1, ob->size, stdout);
64 | 
65 | 	/* cleanup */
66 | 	bufrelease(ib);
67 | 	bufrelease(ob);
68 | 
69 | 	return 0;
70 | }
71 | 
72 | /* vim: set filetype=c: */
73 | 


--------------------------------------------------------------------------------
/html/html.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Vicent Marti
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #ifndef UPSKIRT_HTML_H
18 | #define UPSKIRT_HTML_H
19 | 
20 | #include "markdown.h"
21 | #include "buffer.h"
22 | #include <stdlib.h>
23 | 
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 | 
28 | struct html_renderopt {
29 | 	struct {
30 | 		int header_count;
31 | 		int current_level;
32 | 		int level_offset;
33 | 	} toc_data;
34 | 
35 | 	unsigned int flags;
36 | 
37 | 	/* extra callbacks */
38 | 	void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
39 | };
40 | 
41 | typedef enum {
42 | 	HTML_SKIP_HTML = (1 << 0),
43 | 	HTML_SKIP_STYLE = (1 << 1),
44 | 	HTML_SKIP_IMAGES = (1 << 2),
45 | 	HTML_SKIP_LINKS = (1 << 3),
46 | 	HTML_EXPAND_TABS = (1 << 4),
47 | 	HTML_SAFELINK = (1 << 5),
48 | 	HTML_TOC = (1 << 6),
49 | 	HTML_HARD_WRAP = (1 << 7),
50 | 	HTML_USE_XHTML = (1 << 8),
51 | 	HTML_ESCAPE = (1 << 9),
52 | } html_render_mode;
53 | 
54 | typedef enum {
55 | 	HTML_TAG_NONE = 0,
56 | 	HTML_TAG_OPEN,
57 | 	HTML_TAG_CLOSE,
58 | } html_tag;
59 | 
60 | int
61 | sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
62 | 
63 | extern void
64 | sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
65 | 
66 | extern void
67 | sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
68 | 
69 | extern void
70 | sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
71 | 
72 | #ifdef __cplusplus
73 | }
74 | #endif
75 | 
76 | #endif
77 | 
78 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile
 2 | 
 3 | # Copyright (c) 2009, Natacha Porté
 4 | #
 5 | # Permission to use, copy, modify, and distribute this software for any
 6 | # purpose with or without fee is hereby granted, provided that the above
 7 | # copyright notice and this permission notice appear in all copies.
 8 | #
 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 | 
17 | DEPDIR=depends
18 | 
19 | # "Machine-dependant" options
20 | #MFLAGS=-fPIC
21 | 
22 | CFLAGS=-c -g -O3 -fPIC -Wall -Werror -Wsign-compare -Isrc -Ihtml
23 | LDFLAGS=-g -O3 -Wall -Werror 
24 | CC=gcc
25 | 
26 | 
27 | SUNDOWN_SRC=\
28 | 	src/markdown.o \
29 | 	src/stack.o \
30 | 	src/buffer.o \
31 | 	src/autolink.o \
32 | 	html/html.o \
33 | 	html/html_smartypants.o \
34 | 	html/houdini_html_e.o \
35 | 	html/houdini_href_e.o
36 | 
37 | all:		libsundown.so sundown smartypants html_blocks
38 | 
39 | .PHONY:		all clean
40 | 
41 | # libraries
42 | 
43 | libsundown.so:	libsundown.so.1
44 | 	ln -f -s $^ $@
45 | 
46 | libsundown.so.1: $(SUNDOWN_SRC)
47 | 	$(CC) $(LDFLAGS) -shared -Wl $^ -o $@
48 | 
49 | # executables
50 | 
51 | sundown:	examples/sundown.o $(SUNDOWN_SRC)
52 | 	$(CC) $(LDFLAGS) $^ -o $@
53 | 
54 | smartypants: examples/smartypants.o $(SUNDOWN_SRC)
55 | 	$(CC) $(LDFLAGS) $^ -o $@
56 | 
57 | # perfect hashing
58 | html_blocks: src/html_blocks.h
59 | 
60 | src/html_blocks.h: html_block_names.txt
61 | 	gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case $^ > $@
62 | 
63 | 
64 | # housekeeping
65 | clean:
66 | 	rm -f src/*.o html/*.o examples/*.o
67 | 	rm -f libsundown.so libsundown.so.1 sundown smartypants
68 | 	rm -f sundown.exe smartypants.exe
69 | 	rm -rf $(DEPDIR)
70 | 
71 | 
72 | # dependencies
73 | 
74 | include $(wildcard $(DEPDIR)/*.d)
75 | 
76 | 
77 | # generic object compilations
78 | 
79 | %.o:	src/%.c examples/%.c html/%.c
80 | 	@mkdir -p $(DEPDIR)
81 | 	@$(CC) -MM $< > $(DEPDIR)/$*.d
82 | 	$(CC) $(CFLAGS) -o $@ $<
83 | 
84 | 


--------------------------------------------------------------------------------
/html/houdini_html_e.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | #include "houdini.h"
 6 | 
 7 | #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
 8 | 
 9 | /**
10 |  * According to the OWASP rules:
11 |  *
12 |  * & --> &amp;
13 |  * < --> &lt;
14 |  * > --> &gt;
15 |  * " --> &quot;
16 |  * ' --> &#x27;     &apos; is not recommended
17 |  * / --> &#x2F;     forward slash is included as it helps end an HTML entity
18 |  *
19 |  */
20 | static const char HTML_ESCAPE_TABLE[] = {
21 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
22 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
23 | 	0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 
24 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 
25 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
26 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
27 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
28 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
30 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
31 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
32 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
34 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
35 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
36 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37 | };
38 | 
39 | static const char *HTML_ESCAPES[] = {
40 |         "",
41 |         "&quot;",
42 |         "&amp;",
43 |         "&#39;",
44 |         "&#47;",
45 |         "&lt;",
46 |         "&gt;"
47 | };
48 | 
49 | void
50 | houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
51 | {
52 | 	size_t i = 0, org, esc = 0;
53 | 
54 | 	bufgrow(ob, ESCAPE_GROW_FACTOR(size));
55 | 
56 | 	while (i < size) {
57 | 		org = i;
58 | 		while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
59 | 			i++;
60 | 
61 | 		if (i > org)
62 | 			bufput(ob, src + org, i - org);
63 | 
64 | 		/* escaping */
65 | 		if (i >= size)
66 | 			break;
67 | 
68 | 		/* The forward slash is only escaped in secure mode */
69 | 		if (src[i] == '/' && !secure) {
70 | 			bufputc(ob, '/');
71 | 		} else {
72 | 			bufputs(ob, HTML_ESCAPES[esc]);
73 | 		}
74 | 
75 | 		i++;
76 | 	}
77 | }
78 | 
79 | void
80 | houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
81 | {
82 | 	houdini_escape_html0(ob, src, size, 1);
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/examples/sundown.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011, Vicent Marti
 3 |  *
 4 |  * Permission to use, copy, modify, and distribute this software for any
 5 |  * purpose with or without fee is hereby granted, provided that the above
 6 |  * copyright notice and this permission notice appear in all copies.
 7 |  *
 8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 |  */
16 | 
17 | #include "markdown.h"
18 | #include "html.h"
19 | #include "buffer.h"
20 | 
21 | #include <errno.h>
22 | #include <stdio.h>
23 | #include <stdlib.h>
24 | #include <string.h>
25 | 
26 | #define READ_UNIT 1024
27 | #define OUTPUT_UNIT 64
28 | 
29 | /* main • main function, interfacing STDIO with the parser */
30 | int
31 | main(int argc, char **argv)
32 | {
33 | 	struct buf *ib, *ob;
34 | 	int ret;
35 | 	FILE *in = stdin;
36 | 
37 | 	struct sd_callbacks callbacks;
38 | 	struct html_renderopt options;
39 | 	struct sd_markdown *markdown;
40 | 
41 | 	/* opening the file if given from the command line */
42 | 	if (argc > 1) {
43 | 		in = fopen(argv[1], "r");
44 | 		if (!in) {
45 | 			fprintf(stderr,"Unable to open input file \"%s\": %s\n", argv[1], strerror(errno));
46 | 			return 1;
47 | 		}
48 | 	}
49 | 
50 | 	/* reading everything */
51 | 	ib = bufnew(READ_UNIT);
52 | 	bufgrow(ib, READ_UNIT);
53 | 	while ((ret = fread(ib->data + ib->size, 1, ib->asize - ib->size, in)) > 0) {
54 | 		ib->size += ret;
55 | 		bufgrow(ib, ib->size + READ_UNIT);
56 | 	}
57 | 
58 | 	if (in != stdin)
59 | 		fclose(in);
60 | 
61 | 	/* performing markdown parsing */
62 | 	ob = bufnew(OUTPUT_UNIT);
63 | 
64 | 	sdhtml_renderer(&callbacks, &options, 0);
65 | 	markdown = sd_markdown_new(0, 16, &callbacks, &options);
66 | 
67 | 	sd_markdown_render(ob, ib->data, ib->size, markdown);
68 | 	sd_markdown_free(markdown);
69 | 
70 | 	/* writing the result to stdout */
71 | 	ret = fwrite(ob->data, 1, ob->size, stdout);
72 | 
73 | 	/* cleanup */
74 | 	bufrelease(ib);
75 | 	bufrelease(ob);
76 | 
77 | 	return (ret < 0) ? -1 : 0;
78 | }
79 | 
80 | /* vim: set filetype=c: */
81 | 


--------------------------------------------------------------------------------
/src/buffer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2008, Natacha Porté
 3 |  * Copyright (c) 2011, Vicent Martí
 4 |  *
 5 |  * Permission to use, copy, modify, and distribute this software for any
 6 |  * purpose with or without fee is hereby granted, provided that the above
 7 |  * copyright notice and this permission notice appear in all copies.
 8 |  *
 9 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 |  */
17 | 
18 | #ifndef BUFFER_H__
19 | #define BUFFER_H__
20 | 
21 | #include <stddef.h>
22 | #include <stdarg.h>
23 | #include <stdint.h>
24 | 
25 | #ifdef __cplusplus
26 | extern "C" {
27 | #endif
28 | 
29 | #if defined(_MSC_VER)
30 | #define __attribute__(x)
31 | #define inline
32 | #endif
33 | 
34 | typedef enum {
35 | 	BUF_OK = 0,
36 | 	BUF_ENOMEM = -1,
37 | } buferror_t;
38 | 
39 | /* struct buf: character array buffer */
40 | struct buf {
41 | 	uint8_t *data;		/* actual character data */
42 | 	size_t size;	/* size of the string */
43 | 	size_t asize;	/* allocated size (0 = volatile buffer) */
44 | 	size_t unit;	/* reallocation unit size (0 = read-only buffer) */
45 | };
46 | 
47 | /* CONST_BUF: global buffer from a string litteral */
48 | #define BUF_STATIC(string) \
49 | 	{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
50 | 
51 | /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
52 | #define BUF_VOLATILE(strname) \
53 | 	{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
54 | 
55 | /* BUFPUTSL: optimized bufputs of a string litteral */
56 | #define BUFPUTSL(output, literal) \
57 | 	bufput(output, literal, sizeof literal - 1)
58 | 
59 | /* bufgrow: increasing the allocated size to the given value */
60 | int bufgrow(struct buf *, size_t);
61 | 
62 | /* bufnew: allocation of a new buffer */
63 | struct buf *bufnew(size_t) __attribute__ ((malloc));
64 | 
65 | /* bufnullterm: NUL-termination of the string array (making a C-string) */
66 | const char *bufcstr(struct buf *);
67 | 
68 | /* bufprefix: compare the beginning of a buffer with a string */
69 | int bufprefix(const struct buf *buf, const char *prefix);
70 | 
71 | /* bufput: appends raw data to a buffer */
72 | void bufput(struct buf *, const void *, size_t);
73 | 
74 | /* bufputs: appends a NUL-terminated string to a buffer */
75 | void bufputs(struct buf *, const char *);
76 | 
77 | /* bufputc: appends a single char to a buffer */
78 | void bufputc(struct buf *, int);
79 | 
80 | /* bufrelease: decrease the reference count and free the buffer if needed */
81 | void bufrelease(struct buf *);
82 | 
83 | /* bufreset: frees internal data of the buffer */
84 | void bufreset(struct buf *);
85 | 
86 | /* bufslurp: removes a given number of bytes from the head of the array */
87 | void bufslurp(struct buf *, size_t);
88 | 
89 | /* bufprintf: formatted printing to a buffer */
90 | void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
91 | 
92 | #ifdef __cplusplus
93 | }
94 | #endif
95 | 
96 | #endif
97 | 


--------------------------------------------------------------------------------
/html/houdini_href_e.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | 
  5 | #include "houdini.h"
  6 | 
  7 | #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
  8 | 
  9 | /*
 10 |  * The following characters will not be escaped:
 11 |  *
 12 |  *		-_.+!*'(),%#@?=;:/,+&$ alphanum
 13 |  *
 14 |  * Note that this character set is the addition of:
 15 |  *
 16 |  *	- The characters which are safe to be in an URL
 17 |  *	- The characters which are *not* safe to be in
 18 |  *	an URL because they are RESERVED characters.
 19 |  *
 20 |  * We asume (lazily) that any RESERVED char that
 21 |  * appears inside an URL is actually meant to
 22 |  * have its native function (i.e. as an URL 
 23 |  * component/separator) and hence needs no escaping.
 24 |  *
 25 |  * There are two exceptions: the chacters & (amp)
 26 |  * and ' (single quote) do not appear in the table.
 27 |  * They are meant to appear in the URL as components,
 28 |  * yet they require special HTML-entity escaping
 29 |  * to generate valid HTML markup.
 30 |  *
 31 |  * All other characters will be escaped to %XX.
 32 |  *
 33 |  */
 34 | static const char HREF_SAFE[] = {
 35 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 36 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 37 | 	0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 
 38 | 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 
 39 | 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 40 | 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 
 41 | 	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
 42 | 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
 43 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 44 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 45 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 46 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 47 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 48 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 49 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 50 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 51 | };
 52 | 
 53 | void
 54 | houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
 55 | {
 56 | 	static const char hex_chars[] = "0123456789ABCDEF";
 57 | 	size_t  i = 0, org;
 58 | 	char hex_str[3];
 59 | 
 60 | 	bufgrow(ob, ESCAPE_GROW_FACTOR(size));
 61 | 	hex_str[0] = '%';
 62 | 
 63 | 	while (i < size) {
 64 | 		org = i;
 65 | 		while (i < size && HREF_SAFE[src[i]] != 0)
 66 | 			i++;
 67 | 
 68 | 		if (i > org)
 69 | 			bufput(ob, src + org, i - org);
 70 | 
 71 | 		/* escaping */
 72 | 		if (i >= size)
 73 | 			break;
 74 | 
 75 | 		switch (src[i]) {
 76 | 		/* amp appears all the time in URLs, but needs
 77 | 		 * HTML-entity escaping to be inside an href */
 78 | 		case '&': 
 79 | 			BUFPUTSL(ob, "&amp;");
 80 | 			break;
 81 | 
 82 | 		/* the single quote is a valid URL character
 83 | 		 * according to the standard; it needs HTML
 84 | 		 * entity escaping too */
 85 | 		case '\'':
 86 | 			BUFPUTSL(ob, "&#x27;");
 87 | 			break;
 88 | 		
 89 | 		/* the space can be escaped to %20 or a plus
 90 | 		 * sign. we're going with the generic escape
 91 | 		 * for now. the plus thing is more commonly seen
 92 | 		 * when building GET strings */
 93 | #if 0
 94 | 		case ' ':
 95 | 			bufputc(ob, '+');
 96 | 			break;
 97 | #endif
 98 | 
 99 | 		/* every other character goes with a %XX escaping */
100 | 		default:
101 | 			hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
102 | 			hex_str[2] = hex_chars[src[i] & 0xF];
103 | 			bufput(ob, hex_str, 3);
104 | 		}
105 | 
106 | 		i++;
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/src/markdown.h:
--------------------------------------------------------------------------------
  1 | /* markdown.h - generic markdown parser */
  2 | 
  3 | /*
  4 |  * Copyright (c) 2009, Natacha Porté
  5 |  *
  6 |  * Permission to use, copy, modify, and distribute this software for any
  7 |  * purpose with or without fee is hereby granted, provided that the above
  8 |  * copyright notice and this permission notice appear in all copies.
  9 |  *
 10 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 11 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 12 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 13 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 14 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 15 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 16 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 17 |  */
 18 | 
 19 | #ifndef UPSKIRT_MARKDOWN_H
 20 | #define UPSKIRT_MARKDOWN_H
 21 | 
 22 | #include "buffer.h"
 23 | #include "autolink.h"
 24 | 
 25 | #ifdef __cplusplus
 26 | extern "C" {
 27 | #endif
 28 | 
 29 | #define SUNDOWN_VERSION "1.16.0"
 30 | #define SUNDOWN_VER_MAJOR 1
 31 | #define SUNDOWN_VER_MINOR 16
 32 | #define SUNDOWN_VER_REVISION 0
 33 | 
 34 | /********************
 35 |  * TYPE DEFINITIONS *
 36 |  ********************/
 37 | 
 38 | /* mkd_autolink - type of autolink */
 39 | enum mkd_autolink {
 40 | 	MKDA_NOT_AUTOLINK,	/* used internally when it is not an autolink*/
 41 | 	MKDA_NORMAL,		/* normal http/http/ftp/mailto/etc link */
 42 | 	MKDA_EMAIL,			/* e-mail link without explit mailto: */
 43 | };
 44 | 
 45 | enum mkd_tableflags {
 46 | 	MKD_TABLE_ALIGN_L = 1,
 47 | 	MKD_TABLE_ALIGN_R = 2,
 48 | 	MKD_TABLE_ALIGN_CENTER = 3,
 49 | 	MKD_TABLE_ALIGNMASK = 3,
 50 | 	MKD_TABLE_HEADER = 4
 51 | };
 52 | 
 53 | enum mkd_extensions {
 54 | 	MKDEXT_NO_INTRA_EMPHASIS = (1 << 0),
 55 | 	MKDEXT_TABLES = (1 << 1),
 56 | 	MKDEXT_FENCED_CODE = (1 << 2),
 57 | 	MKDEXT_AUTOLINK = (1 << 3),
 58 | 	MKDEXT_STRIKETHROUGH = (1 << 4),
 59 | 	MKDEXT_SPACE_HEADERS = (1 << 6),
 60 | 	MKDEXT_SUPERSCRIPT = (1 << 7),
 61 | 	MKDEXT_LAX_SPACING = (1 << 8),
 62 | };
 63 | 
 64 | /* sd_callbacks - functions for rendering parsed data */
 65 | struct sd_callbacks {
 66 | 	/* block level callbacks - NULL skips the block */
 67 | 	void (*blockcode)(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque);
 68 | 	void (*blockquote)(struct buf *ob, const struct buf *text, void *opaque);
 69 | 	void (*blockhtml)(struct buf *ob,const  struct buf *text, void *opaque);
 70 | 	void (*header)(struct buf *ob, const struct buf *text, int level, void *opaque);
 71 | 	void (*hrule)(struct buf *ob, void *opaque);
 72 | 	void (*list)(struct buf *ob, const struct buf *text, int flags, void *opaque);
 73 | 	void (*listitem)(struct buf *ob, const struct buf *text, int flags, void *opaque);
 74 | 	void (*paragraph)(struct buf *ob, const struct buf *text, void *opaque);
 75 | 	void (*table)(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque);
 76 | 	void (*table_row)(struct buf *ob, const struct buf *text, void *opaque);
 77 | 	void (*table_cell)(struct buf *ob, const struct buf *text, int flags, void *opaque);
 78 | 
 79 | 
 80 | 	/* span level callbacks - NULL or return 0 prints the span verbatim */
 81 | 	int (*autolink)(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque);
 82 | 	int (*codespan)(struct buf *ob, const struct buf *text, void *opaque);
 83 | 	int (*double_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
 84 | 	int (*emphasis)(struct buf *ob, const struct buf *text, void *opaque);
 85 | 	int (*image)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque);
 86 | 	int (*linebreak)(struct buf *ob, void *opaque);
 87 | 	int (*link)(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque);
 88 | 	int (*raw_html_tag)(struct buf *ob, const struct buf *tag, void *opaque);
 89 | 	int (*triple_emphasis)(struct buf *ob, const struct buf *text, void *opaque);
 90 | 	int (*strikethrough)(struct buf *ob, const struct buf *text, void *opaque);
 91 | 	int (*superscript)(struct buf *ob, const struct buf *text, void *opaque);
 92 | 
 93 | 	/* low level callbacks - NULL copies input directly into the output */
 94 | 	void (*entity)(struct buf *ob, const struct buf *entity, void *opaque);
 95 | 	void (*normal_text)(struct buf *ob, const struct buf *text, void *opaque);
 96 | 
 97 | 	/* header and footer */
 98 | 	void (*doc_header)(struct buf *ob, void *opaque);
 99 | 	void (*doc_footer)(struct buf *ob, void *opaque);
100 | };
101 | 
102 | struct sd_markdown;
103 | 
104 | /*********
105 |  * FLAGS *
106 |  *********/
107 | 
108 | /* list/listitem flags */
109 | #define MKD_LIST_ORDERED	1
110 | #define MKD_LI_BLOCK		2  /* <li> containing block data */
111 | 
112 | /**********************
113 |  * EXPORTED FUNCTIONS *
114 |  **********************/
115 | 
116 | extern struct sd_markdown *
117 | sd_markdown_new(
118 | 	unsigned int extensions,
119 | 	size_t max_nesting,
120 | 	const struct sd_callbacks *callbacks,
121 | 	void *opaque);
122 | 
123 | extern void
124 | sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md);
125 | 
126 | extern void
127 | sd_markdown_free(struct sd_markdown *md);
128 | 
129 | extern void
130 | sd_version(int *major, int *minor, int *revision);
131 | 
132 | #ifdef __cplusplus
133 | }
134 | #endif
135 | 
136 | #endif
137 | 
138 | /* vim: set filetype=c: */
139 | 


--------------------------------------------------------------------------------
/src/buffer.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2008, Natacha Porté
  3 |  * Copyright (c) 2011, Vicent Martí
  4 |  *
  5 |  * Permission to use, copy, modify, and distribute this software for any
  6 |  * purpose with or without fee is hereby granted, provided that the above
  7 |  * copyright notice and this permission notice appear in all copies.
  8 |  *
  9 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 10 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 11 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 12 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 13 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 14 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 15 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 16 |  */
 17 | 
 18 | #define BUFFER_MAX_ALLOC_SIZE (1024 * 1024 * 16) //16mb
 19 | 
 20 | #include "buffer.h"
 21 | 
 22 | #include <stdio.h>
 23 | #include <stdlib.h>
 24 | #include <string.h>
 25 | #include <assert.h>
 26 | 
 27 | /* MSVC compat */
 28 | #if defined(_MSC_VER)
 29 | #	define _buf_vsnprintf _vsnprintf
 30 | #else
 31 | #	define _buf_vsnprintf vsnprintf
 32 | #endif
 33 | 
 34 | int
 35 | bufprefix(const struct buf *buf, const char *prefix)
 36 | {
 37 | 	size_t i;
 38 | 	assert(buf && buf->unit);
 39 | 
 40 | 	for (i = 0; i < buf->size; ++i) {
 41 | 		if (prefix[i] == 0)
 42 | 			return 0;
 43 | 
 44 | 		if (buf->data[i] != prefix[i])
 45 | 			return buf->data[i] - prefix[i];
 46 | 	}
 47 | 
 48 | 	return 0;
 49 | }
 50 | 
 51 | /* bufgrow: increasing the allocated size to the given value */
 52 | int
 53 | bufgrow(struct buf *buf, size_t neosz)
 54 | {
 55 | 	size_t neoasz;
 56 | 	void *neodata;
 57 | 
 58 | 	assert(buf && buf->unit);
 59 | 
 60 | 	if (neosz > BUFFER_MAX_ALLOC_SIZE)
 61 | 		return BUF_ENOMEM;
 62 | 
 63 | 	if (buf->asize >= neosz)
 64 | 		return BUF_OK;
 65 | 
 66 | 	neoasz = buf->asize + buf->unit;
 67 | 	while (neoasz < neosz)
 68 | 		neoasz += buf->unit;
 69 | 
 70 | 	neodata = realloc(buf->data, neoasz);
 71 | 	if (!neodata)
 72 | 		return BUF_ENOMEM;
 73 | 
 74 | 	buf->data = neodata;
 75 | 	buf->asize = neoasz;
 76 | 	return BUF_OK;
 77 | }
 78 | 
 79 | 
 80 | /* bufnew: allocation of a new buffer */
 81 | struct buf *
 82 | bufnew(size_t unit)
 83 | {
 84 | 	struct buf *ret;
 85 | 	ret = malloc(sizeof (struct buf));
 86 | 
 87 | 	if (ret) {
 88 | 		ret->data = 0;
 89 | 		ret->size = ret->asize = 0;
 90 | 		ret->unit = unit;
 91 | 	}
 92 | 	return ret;
 93 | }
 94 | 
 95 | /* bufnullterm: NULL-termination of the string array */
 96 | const char *
 97 | bufcstr(struct buf *buf)
 98 | {
 99 | 	assert(buf && buf->unit);
100 | 
101 | 	if (buf->size < buf->asize && buf->data[buf->size] == 0)
102 | 		return (char *)buf->data;
103 | 
104 | 	if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1) == 0) {
105 | 		buf->data[buf->size] = 0;
106 | 		return (char *)buf->data;
107 | 	}
108 | 
109 | 	return NULL;
110 | }
111 | 
112 | /* bufprintf: formatted printing to a buffer */
113 | void
114 | bufprintf(struct buf *buf, const char *fmt, ...)
115 | {
116 | 	va_list ap;
117 | 	int n;
118 | 
119 | 	assert(buf && buf->unit);
120 | 
121 | 	if (buf->size >= buf->asize && bufgrow(buf, buf->size + 1) < 0)
122 | 		return;
123 | 	
124 | 	va_start(ap, fmt);
125 | 	n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
126 | 	va_end(ap);
127 | 
128 | 	if (n < 0) {
129 | #ifdef _MSC_VER
130 | 		va_start(ap, fmt);
131 | 		n = _vscprintf(fmt, ap);
132 | 		va_end(ap);
133 | #else
134 | 		return;
135 | #endif
136 | 	}
137 | 
138 | 	if ((size_t)n >= buf->asize - buf->size) {
139 | 		if (bufgrow(buf, buf->size + n + 1) < 0)
140 | 			return;
141 | 
142 | 		va_start(ap, fmt);
143 | 		n = _buf_vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
144 | 		va_end(ap);
145 | 	}
146 | 
147 | 	if (n < 0)
148 | 		return;
149 | 
150 | 	buf->size += n;
151 | }
152 | 
153 | /* bufput: appends raw data to a buffer */
154 | void
155 | bufput(struct buf *buf, const void *data, size_t len)
156 | {
157 | 	assert(buf && buf->unit);
158 | 
159 | 	if (buf->size + len > buf->asize && bufgrow(buf, buf->size + len) < 0)
160 | 		return;
161 | 
162 | 	memcpy(buf->data + buf->size, data, len);
163 | 	buf->size += len;
164 | }
165 | 
166 | /* bufputs: appends a NUL-terminated string to a buffer */
167 | void
168 | bufputs(struct buf *buf, const char *str)
169 | {
170 | 	bufput(buf, str, strlen(str));
171 | }
172 | 
173 | 
174 | /* bufputc: appends a single uint8_t to a buffer */
175 | void
176 | bufputc(struct buf *buf, int c)
177 | {
178 | 	assert(buf && buf->unit);
179 | 
180 | 	if (buf->size + 1 > buf->asize && bufgrow(buf, buf->size + 1) < 0)
181 | 		return;
182 | 
183 | 	buf->data[buf->size] = c;
184 | 	buf->size += 1;
185 | }
186 | 
187 | /* bufrelease: decrease the reference count and free the buffer if needed */
188 | void
189 | bufrelease(struct buf *buf)
190 | {
191 | 	if (!buf)
192 | 		return;
193 | 
194 | 	free(buf->data);
195 | 	free(buf);
196 | }
197 | 
198 | 
199 | /* bufreset: frees internal data of the buffer */
200 | void
201 | bufreset(struct buf *buf)
202 | {
203 | 	if (!buf)
204 | 		return;
205 | 
206 | 	free(buf->data);
207 | 	buf->data = NULL;
208 | 	buf->size = buf->asize = 0;
209 | }
210 | 
211 | /* bufslurp: removes a given number of bytes from the head of the array */
212 | void
213 | bufslurp(struct buf *buf, size_t len)
214 | {
215 | 	assert(buf && buf->unit);
216 | 
217 | 	if (len >= buf->size) {
218 | 		buf->size = 0;
219 | 		return;
220 | 	}
221 | 
222 | 	buf->size -= len;
223 | 	memmove(buf->data, buf->data + len, buf->size);
224 | }
225 | 
226 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
  1 | Sundown
  2 | =======
  3 | 
  4 | `Sundown` is a Markdown parser based on the original code of the
  5 | [Upskirt library](http://fossil.instinctive.eu/libupskirt/index) by Natacha Porté.
  6 | 
  7 | Features
  8 | --------
  9 | 
 10 | *	**Fully standards compliant**
 11 | 
 12 | 	`Sundown` passes out of the box the official Markdown v1.0.0 and v1.0.3
 13 | 	test suites, and has been extensively tested with additional corner cases
 14 | 	to make sure its output is as sane as possible at all times.
 15 | 
 16 | *	**Massive extension support**
 17 | 
 18 | 	`Sundown` has optional support for several (unofficial) Markdown extensions,
 19 | 	such as non-strict emphasis, fenced code blocks, tables, autolinks,
 20 | 	strikethrough and more.
 21 | 
 22 | *	**UTF-8 aware**
 23 | 
 24 | 	`Sundown` is fully UTF-8 aware, both when parsing the source document and when
 25 | 	generating the resulting (X)HTML code.
 26 | 
 27 | *	**Tested & Ready to be used on production**
 28 | 
 29 | 	`Sundown` has been extensively security audited, and includes protection against
 30 | 	all possible DOS attacks (stack overflows, out of memory situations, malformed
 31 | 	Markdown syntax...) and against client attacks through malicious embedded HTML.
 32 | 
 33 | 	We've worked very hard to make `Sundown` never crash or run out of memory
 34 | 	under *any* input. `Sundown` renders all the Markdown content in GitHub and so
 35 | 	far hasn't crashed a single time.
 36 | 
 37 | *	**Customizable renderers**
 38 | 
 39 | 	`Sundown` is not stuck with XHTML output: the Markdown parser of the library
 40 | 	is decoupled from the renderer, so it's trivial to extend the library with
 41 | 	custom renderers. A fully functional (X)HTML renderer is included.
 42 | 
 43 | *	**Optimized for speed**
 44 | 
 45 | 	`Sundown` is written in C, with a special emphasis on performance. When wrapped
 46 | 	on a dynamic language such as Python or Ruby, it has shown to be up to 40
 47 | 	times faster than other native alternatives.
 48 | 
 49 | *	**Zero-dependency**
 50 | 
 51 | 	`Sundown` is a zero-dependency library composed of 3 `.c` files and their headers.
 52 | 	No dependencies, no bullshit. Only standard C99 that builds everywhere.
 53 | 
 54 | Credits
 55 | -------
 56 | 
 57 | `Sundown` is based on the original Upskirt parser by Natacha Porté, with many additions
 58 | by Vicent Marti (@vmg) and contributions from the following authors:
 59 | 
 60 | 	Ben Noordhuis, Bruno Michel, Joseph Koshy, Krzysztof Kowalczyk, Samuel Bronson,
 61 | 	Shuhei Tanuma
 62 | 
 63 | Bindings
 64 | --------
 65 | 
 66 | `Sundown` is available from other programming languages thanks to these bindings developed
 67 | by our awesome contributors.
 68 | 
 69 | - [Redcarpet](https://github.com/vmg/redcarpet) (Ruby)
 70 | - [RobotSkirt](https://github.com/benmills/robotskirt) (Node.js)
 71 | - [Misaka](https://github.com/FSX/misaka) (Python)
 72 | - [ffi-sundown](https://github.com/postmodern/ffi-sundown) (Ruby FFI)
 73 | - [Sundown HS](https://github.com/bitonic/sundown) (Haskell)
 74 | - [Goskirt](https://github.com/madari/goskirt) (Go)
 75 | - [Upskirt.go](https://github.com/buu700/upskirt.go) (Go)
 76 | - [MoonShine](https://github.com/brandonc/moonshine) (.NET)
 77 | - [PHP-Sundown](https://github.com/chobie/php-sundown) (PHP)
 78 | - [Sundown.net](https://github.com/txdv/sundown.net) (.NET)
 79 | 
 80 | Help us
 81 | -------
 82 | 
 83 | `Sundown` is all about security. If you find a (potential) security vulnerability in the
 84 | library, or a way to make it crash through malicious input, please report it to us,
 85 | either directly via email or by opening an Issue on GitHub, and help make the web safer
 86 | for everybody.
 87 | 
 88 | Unicode character handling
 89 | --------------------------
 90 | 
 91 | Given that the Markdown spec makes no provision for Unicode character handling, `Sundown`
 92 | takes a conservative approach towards deciding which extended characters trigger Markdown
 93 | features:
 94 | 
 95 | *	Punctuation characters outside of the U+007F codepoint are not handled as punctuation.
 96 | 	They are considered as normal, in-word characters for word-boundary checks.
 97 | 
 98 | *	Whitespace characters outside of the U+007F codepoint are not considered as
 99 | 	whitespace. They are considered as normal, in-word characters for word-boundary checks.
100 | 
101 | Install
102 | -------
103 | 
104 | There is nothing to install. `Sundown` is composed of 3 `.c` files (`markdown.c`,
105 | `buffer.c` and `array.c`), so just throw them in your project. Zero-dependency means
106 | zero-dependency. You might want to include `render/html.c` if you want to use the
107 | included XHTML renderer, or write your own renderer. Either way, it's all fun and joy.
108 | 
109 | If you are hardcore, you can use the included `Makefile` to build `Sundown` into a dynamic
110 | library, or to build the sample `sundown` executable, which is just a commandline
111 | Markdown to XHTML parser. (If gcc gives you grief about `-fPIC`, e.g. with MinGW, try
112 | `make MFLAGS=` instead of just `make`.)
113 | 
114 | License
115 | -------
116 | 
117 | Permission to use, copy, modify, and distribute this software for any
118 | purpose with or without fee is hereby granted, provided that the above
119 | copyright notice and this permission notice appear in all copies.
120 | 
121 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
122 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
123 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
124 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
125 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
126 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
127 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
128 | 
129 | <!-- Local Variables: -->
130 | <!-- fill-column: 89 -->
131 | <!-- End: -->
132 | 


--------------------------------------------------------------------------------
/src/autolink.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Vicent Marti
  3 |  *
  4 |  * Permission to use, copy, modify, and distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  */
 16 | 
 17 | #include "buffer.h"
 18 | #include "autolink.h"
 19 | 
 20 | #include <string.h>
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <ctype.h>
 24 | 
 25 | #if defined(_WIN32)
 26 | #define strncasecmp	_strnicmp
 27 | #endif
 28 | 
 29 | int
 30 | sd_autolink_issafe(const uint8_t *link, size_t link_len)
 31 | {
 32 | 	static const size_t valid_uris_count = 5;
 33 | 	static const char *valid_uris[] = {
 34 | 		"/", "http://", "https://", "ftp://", "mailto:"
 35 | 	};
 36 | 
 37 | 	size_t i;
 38 | 
 39 | 	for (i = 0; i < valid_uris_count; ++i) {
 40 | 		size_t len = strlen(valid_uris[i]);
 41 | 
 42 | 		if (link_len > len &&
 43 | 			strncasecmp((char *)link, valid_uris[i], len) == 0 &&
 44 | 			isalnum(link[len]))
 45 | 			return 1;
 46 | 	}
 47 | 
 48 | 	return 0;
 49 | }
 50 | 
 51 | static size_t
 52 | autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
 53 | {
 54 | 	uint8_t cclose, copen = 0;
 55 | 	size_t i;
 56 | 
 57 | 	for (i = 0; i < link_end; ++i)
 58 | 		if (data[i] == '<') {
 59 | 			link_end = i;
 60 | 			break;
 61 | 		}
 62 | 
 63 | 	while (link_end > 0) {
 64 | 		if (strchr("?!.,", data[link_end - 1]) != NULL)
 65 | 			link_end--;
 66 | 
 67 | 		else if (data[link_end - 1] == ';') {
 68 | 			size_t new_end = link_end - 2;
 69 | 
 70 | 			while (new_end > 0 && isalpha(data[new_end]))
 71 | 				new_end--;
 72 | 
 73 | 			if (new_end < link_end - 2 && data[new_end] == '&')
 74 | 				link_end = new_end;
 75 | 			else
 76 | 				link_end--;
 77 | 		}
 78 | 		else break;
 79 | 	}
 80 | 
 81 | 	if (link_end == 0)
 82 | 		return 0;
 83 | 
 84 | 	cclose = data[link_end - 1];
 85 | 
 86 | 	switch (cclose) {
 87 | 	case '"':	copen = '"'; break;
 88 | 	case '\'':	copen = '\''; break;
 89 | 	case ')':	copen = '('; break;
 90 | 	case ']':	copen = '['; break;
 91 | 	case '}':	copen = '{'; break;
 92 | 	}
 93 | 
 94 | 	if (copen != 0) {
 95 | 		size_t closing = 0;
 96 | 		size_t opening = 0;
 97 | 		size_t i = 0;
 98 | 
 99 | 		/* Try to close the final punctuation sign in this same line;
100 | 		 * if we managed to close it outside of the URL, that means that it's
101 | 		 * not part of the URL. If it closes inside the URL, that means it
102 | 		 * is part of the URL.
103 | 		 *
104 | 		 * Examples:
105 | 		 *
106 | 		 *	foo http://www.pokemon.com/Pikachu_(Electric) bar
107 | 		 *		=> http://www.pokemon.com/Pikachu_(Electric)
108 | 		 *
109 | 		 *	foo (http://www.pokemon.com/Pikachu_(Electric)) bar
110 | 		 *		=> http://www.pokemon.com/Pikachu_(Electric)
111 | 		 *
112 | 		 *	foo http://www.pokemon.com/Pikachu_(Electric)) bar
113 | 		 *		=> http://www.pokemon.com/Pikachu_(Electric))
114 | 		 *
115 | 		 *	(foo http://www.pokemon.com/Pikachu_(Electric)) bar
116 | 		 *		=> foo http://www.pokemon.com/Pikachu_(Electric)
117 | 		 */
118 | 
119 | 		while (i < link_end) {
120 | 			if (data[i] == copen)
121 | 				opening++;
122 | 			else if (data[i] == cclose)
123 | 				closing++;
124 | 
125 | 			i++;
126 | 		}
127 | 
128 | 		if (closing != opening)
129 | 			link_end--;
130 | 	}
131 | 
132 | 	return link_end;
133 | }
134 | 
135 | static size_t
136 | check_domain(uint8_t *data, size_t size, int allow_short)
137 | {
138 | 	size_t i, np = 0;
139 | 
140 | 	if (!isalnum(data[0]))
141 | 		return 0;
142 | 
143 | 	for (i = 1; i < size - 1; ++i) {
144 | 		if (data[i] == '.') np++;
145 | 		else if (!isalnum(data[i]) && data[i] != '-') break;
146 | 	}
147 | 
148 | 	if (allow_short) {
149 | 		/* We don't need a valid domain in the strict sense (with
150 | 		 * least one dot; so just make sure it's composed of valid
151 | 		 * domain characters and return the length of the the valid
152 | 		 * sequence. */
153 | 		return i;
154 | 	} else {
155 | 		/* a valid domain needs to have at least a dot.
156 | 		 * that's as far as we get */
157 | 		return np ? i : 0;
158 | 	}
159 | }
160 | 
161 | size_t
162 | sd_autolink__www(
163 | 	size_t *rewind_p,
164 | 	struct buf *link,
165 | 	uint8_t *data,
166 | 	size_t max_rewind,
167 | 	size_t size,
168 | 	unsigned int flags)
169 | {
170 | 	size_t link_end;
171 | 
172 | 	if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
173 | 		return 0;
174 | 
175 | 	if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
176 | 		return 0;
177 | 
178 | 	link_end = check_domain(data, size, 0);
179 | 
180 | 	if (link_end == 0)
181 | 		return 0;
182 | 
183 | 	while (link_end < size && !isspace(data[link_end]))
184 | 		link_end++;
185 | 
186 | 	link_end = autolink_delim(data, link_end, max_rewind, size);
187 | 
188 | 	if (link_end == 0)
189 | 		return 0;
190 | 
191 | 	bufput(link, data, link_end);
192 | 	*rewind_p = 0;
193 | 
194 | 	return (int)link_end;
195 | }
196 | 
197 | size_t
198 | sd_autolink__email(
199 | 	size_t *rewind_p,
200 | 	struct buf *link,
201 | 	uint8_t *data,
202 | 	size_t max_rewind,
203 | 	size_t size,
204 | 	unsigned int flags)
205 | {
206 | 	size_t link_end, rewind;
207 | 	int nb = 0, np = 0;
208 | 
209 | 	for (rewind = 0; rewind < max_rewind; ++rewind) {
210 | 		uint8_t c = data[-rewind - 1];
211 | 
212 | 		if (isalnum(c))
213 | 			continue;
214 | 
215 | 		if (strchr(".+-_", c) != NULL)
216 | 			continue;
217 | 
218 | 		break;
219 | 	}
220 | 
221 | 	if (rewind == 0)
222 | 		return 0;
223 | 
224 | 	for (link_end = 0; link_end < size; ++link_end) {
225 | 		uint8_t c = data[link_end];
226 | 
227 | 		if (isalnum(c))
228 | 			continue;
229 | 
230 | 		if (c == '@')
231 | 			nb++;
232 | 		else if (c == '.' && link_end < size - 1)
233 | 			np++;
234 | 		else if (c != '-' && c != '_')
235 | 			break;
236 | 	}
237 | 
238 | 	if (link_end < 2 || nb != 1 || np == 0 ||
239 | 		!isalpha(data[link_end - 1]))
240 | 		return 0;
241 | 
242 | 	link_end = autolink_delim(data, link_end, max_rewind, size);
243 | 
244 | 	if (link_end == 0)
245 | 		return 0;
246 | 
247 | 	bufput(link, data - rewind, link_end + rewind);
248 | 	*rewind_p = rewind;
249 | 
250 | 	return link_end;
251 | }
252 | 
253 | size_t
254 | sd_autolink__url(
255 | 	size_t *rewind_p,
256 | 	struct buf *link,
257 | 	uint8_t *data,
258 | 	size_t max_rewind,
259 | 	size_t size,
260 | 	unsigned int flags)
261 | {
262 | 	size_t link_end, rewind = 0, domain_len;
263 | 
264 | 	if (size < 4 || data[1] != '/' || data[2] != '/')
265 | 		return 0;
266 | 
267 | 	while (rewind < max_rewind && isalpha(data[-rewind - 1]))
268 | 		rewind++;
269 | 
270 | 	if (!sd_autolink_issafe(data - rewind, size + rewind))
271 | 		return 0;
272 | 
273 | 	link_end = strlen("://");
274 | 
275 | 	domain_len = check_domain(
276 | 		data + link_end,
277 | 		size - link_end,
278 | 		flags & SD_AUTOLINK_SHORT_DOMAINS);
279 | 
280 | 	if (domain_len == 0)
281 | 		return 0;
282 | 
283 | 	link_end += domain_len;
284 | 	while (link_end < size && !isspace(data[link_end]))
285 | 		link_end++;
286 | 
287 | 	link_end = autolink_delim(data, link_end, max_rewind, size);
288 | 
289 | 	if (link_end == 0)
290 | 		return 0;
291 | 
292 | 	bufput(link, data - rewind, link_end + rewind);
293 | 	*rewind_p = rewind;
294 | 
295 | 	return link_end;
296 | }
297 | 
298 | 


--------------------------------------------------------------------------------
/src/html_blocks.h:
--------------------------------------------------------------------------------
  1 | /* C code produced by gperf version 3.0.3 */
  2 | /* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt  */
  3 | /* Computed positions: -k'1-2' */
  4 | 
  5 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
  6 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
  7 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
  8 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
  9 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
 10 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
 11 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
 12 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
 13 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
 14 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
 15 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
 16 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
 17 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
 18 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
 19 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
 20 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
 21 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
 22 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
 23 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
 24 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
 25 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
 26 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
 27 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
 28 | /* The character set is not based on ISO-646.  */
 29 | error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
 30 | #endif
 31 | 
 32 | /* maximum key range = 37, duplicates = 0 */
 33 | 
 34 | #ifndef GPERF_DOWNCASE
 35 | #define GPERF_DOWNCASE 1
 36 | static unsigned char gperf_downcase[256] =
 37 |   {
 38 |       0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
 39 |      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
 40 |      30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
 41 |      45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
 42 |      60,  61,  62,  63,  64,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
 43 |     107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
 44 |     122,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
 45 |     105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
 46 |     120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
 47 |     135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
 48 |     150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
 49 |     165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
 50 |     180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
 51 |     195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
 52 |     210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
 53 |     225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
 54 |     240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
 55 |     255
 56 |   };
 57 | #endif
 58 | 
 59 | #ifndef GPERF_CASE_STRNCMP
 60 | #define GPERF_CASE_STRNCMP 1
 61 | static int
 62 | gperf_case_strncmp (s1, s2, n)
 63 |      register const char *s1;
 64 |      register const char *s2;
 65 |      register unsigned int n;
 66 | {
 67 |   for (; n > 0;)
 68 |     {
 69 |       unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
 70 |       unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
 71 |       if (c1 != 0 && c1 == c2)
 72 |         {
 73 |           n--;
 74 |           continue;
 75 |         }
 76 |       return (int)c1 - (int)c2;
 77 |     }
 78 |   return 0;
 79 | }
 80 | #endif
 81 | 
 82 | #ifdef __GNUC__
 83 | __inline
 84 | #else
 85 | #ifdef __cplusplus
 86 | inline
 87 | #endif
 88 | #endif
 89 | static unsigned int
 90 | hash_block_tag (str, len)
 91 |      register const char *str;
 92 |      register unsigned int len;
 93 | {
 94 |   static const unsigned char asso_values[] =
 95 |     {
 96 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
 97 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
 98 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
 99 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
100 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
101 |        8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
102 |       38, 38, 38, 38, 38, 38,  0, 38,  0, 38,
103 |        5,  5,  5, 15,  0, 38, 38,  0, 15, 10,
104 |        0, 38, 38, 15,  0,  5, 38, 38, 38, 38,
105 |       38, 38, 38, 38, 38, 38, 38, 38,  0, 38,
106 |        0, 38,  5,  5,  5, 15,  0, 38, 38,  0,
107 |       15, 10,  0, 38, 38, 15,  0,  5, 38, 38,
108 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
109 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
110 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
111 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
112 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
113 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
114 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
115 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
116 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
117 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
118 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
119 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
120 |       38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
121 |       38, 38, 38, 38, 38, 38, 38
122 |     };
123 |   register int hval = len;
124 | 
125 |   switch (hval)
126 |     {
127 |       default:
128 |         hval += asso_values[(unsigned char)str[1]+1];
129 |       /*FALLTHROUGH*/
130 |       case 1:
131 |         hval += asso_values[(unsigned char)str[0]];
132 |         break;
133 |     }
134 |   return hval;
135 | }
136 | 
137 | #ifdef __GNUC__
138 | __inline
139 | #ifdef __GNUC_STDC_INLINE__
140 | __attribute__ ((__gnu_inline__))
141 | #endif
142 | #endif
143 | const char *
144 | find_block_tag (str, len)
145 |      register const char *str;
146 |      register unsigned int len;
147 | {
148 |   enum
149 |     {
150 |       TOTAL_KEYWORDS = 24,
151 |       MIN_WORD_LENGTH = 1,
152 |       MAX_WORD_LENGTH = 10,
153 |       MIN_HASH_VALUE = 1,
154 |       MAX_HASH_VALUE = 37
155 |     };
156 | 
157 |   static const char * const wordlist[] =
158 |     {
159 |       "",
160 |       "p",
161 |       "dl",
162 |       "div",
163 |       "math",
164 |       "table",
165 |       "",
166 |       "ul",
167 |       "del",
168 |       "form",
169 |       "blockquote",
170 |       "figure",
171 |       "ol",
172 |       "fieldset",
173 |       "",
174 |       "h1",
175 |       "",
176 |       "h6",
177 |       "pre",
178 |       "", "",
179 |       "script",
180 |       "h5",
181 |       "noscript",
182 |       "",
183 |       "style",
184 |       "iframe",
185 |       "h4",
186 |       "ins",
187 |       "", "", "",
188 |       "h3",
189 |       "", "", "", "",
190 |       "h2"
191 |     };
192 | 
193 |   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
194 |     {
195 |       register int key = hash_block_tag (str, len);
196 | 
197 |       if (key <= MAX_HASH_VALUE && key >= 0)
198 |         {
199 |           register const char *s = wordlist[key];
200 | 
201 |           if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
202 |             return s;
203 |         }
204 |     }
205 |   return 0;
206 | }
207 | 


--------------------------------------------------------------------------------
/html/html_smartypants.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2011, Vicent Marti
  3 |  *
  4 |  * Permission to use, copy, modify, and distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  */
 16 | 
 17 | #include "buffer.h"
 18 | #include "html.h"
 19 | 
 20 | #include <string.h>
 21 | #include <stdlib.h>
 22 | #include <stdio.h>
 23 | #include <ctype.h>
 24 | 
 25 | #if defined(_WIN32)
 26 | #define snprintf	_snprintf		
 27 | #endif
 28 | 
 29 | struct smartypants_data {
 30 | 	int in_squote;
 31 | 	int in_dquote;
 32 | };
 33 | 
 34 | static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 35 | static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 36 | static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 37 | static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 38 | static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 39 | static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 40 | static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 41 | static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 42 | static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 43 | static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
 44 | 
 45 | static size_t (*smartypants_cb_ptrs[])
 46 | 	(struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
 47 | {
 48 | 	NULL,					/* 0 */
 49 | 	smartypants_cb__dash,	/* 1 */
 50 | 	smartypants_cb__parens,	/* 2 */
 51 | 	smartypants_cb__squote, /* 3 */
 52 | 	smartypants_cb__dquote, /* 4 */
 53 | 	smartypants_cb__amp,	/* 5 */
 54 | 	smartypants_cb__period,	/* 6 */
 55 | 	smartypants_cb__number,	/* 7 */
 56 | 	smartypants_cb__ltag,	/* 8 */
 57 | 	smartypants_cb__backtick, /* 9 */
 58 | 	smartypants_cb__escape, /* 10 */
 59 | };
 60 | 
 61 | static const uint8_t smartypants_cb_chars[] = {
 62 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 63 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 64 | 	0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
 65 | 	0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
 66 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 67 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
 68 | 	9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 69 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 70 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 71 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 72 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 73 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 74 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 75 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 76 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 77 | 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 78 | };
 79 | 
 80 | static inline int
 81 | word_boundary(uint8_t c)
 82 | {
 83 | 	return c == 0 || isspace(c) || ispunct(c);
 84 | }
 85 | 
 86 | static int
 87 | smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
 88 | {
 89 | 	char ent[8];
 90 | 
 91 | 	if (*is_open && !word_boundary(next_char))
 92 | 		return 0;
 93 | 
 94 | 	if (!(*is_open) && !word_boundary(previous_char))
 95 | 		return 0;
 96 | 
 97 | 	snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
 98 | 	*is_open = !(*is_open);
 99 | 	bufputs(ob, ent);
100 | 	return 1;
101 | }
102 | 
103 | static size_t
104 | smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
105 | {
106 | 	if (size >= 2) {
107 | 		uint8_t t1 = tolower(text[1]);
108 | 
109 | 		if (t1 == '\'') {
110 | 			if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
111 | 				return 1;
112 | 		}
113 | 
114 | 		if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
115 | 			(size == 3 || word_boundary(text[2]))) {
116 | 			BUFPUTSL(ob, "&rsquo;");
117 | 			return 0;
118 | 		}
119 | 
120 | 		if (size >= 3) {
121 | 			uint8_t t2 = tolower(text[2]);
122 | 
123 | 			if (((t1 == 'r' && t2 == 'e') ||
124 | 				(t1 == 'l' && t2 == 'l') ||
125 | 				(t1 == 'v' && t2 == 'e')) &&
126 | 				(size == 4 || word_boundary(text[3]))) {
127 | 				BUFPUTSL(ob, "&rsquo;");
128 | 				return 0;
129 | 			}
130 | 		}
131 | 	}
132 | 
133 | 	if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
134 | 		return 0;
135 | 
136 | 	bufputc(ob, text[0]);
137 | 	return 0;
138 | }
139 | 
140 | static size_t
141 | smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
142 | {
143 | 	if (size >= 3) {
144 | 		uint8_t t1 = tolower(text[1]);
145 | 		uint8_t t2 = tolower(text[2]);
146 | 
147 | 		if (t1 == 'c' && t2 == ')') {
148 | 			BUFPUTSL(ob, "&copy;");
149 | 			return 2;
150 | 		}
151 | 
152 | 		if (t1 == 'r' && t2 == ')') {
153 | 			BUFPUTSL(ob, "&reg;");
154 | 			return 2;
155 | 		}
156 | 
157 | 		if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
158 | 			BUFPUTSL(ob, "&trade;");
159 | 			return 3;
160 | 		}
161 | 	}
162 | 
163 | 	bufputc(ob, text[0]);
164 | 	return 0;
165 | }
166 | 
167 | static size_t
168 | smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
169 | {
170 | 	if (size >= 3 && text[1] == '-' && text[2] == '-') {
171 | 		BUFPUTSL(ob, "&mdash;");
172 | 		return 2;
173 | 	}
174 | 
175 | 	if (size >= 2 && text[1] == '-') {
176 | 		BUFPUTSL(ob, "&ndash;");
177 | 		return 1;
178 | 	}
179 | 
180 | 	bufputc(ob, text[0]);
181 | 	return 0;
182 | }
183 | 
184 | static size_t
185 | smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
186 | {
187 | 	if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
188 | 		if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
189 | 			return 5;
190 | 	}
191 | 
192 | 	if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
193 | 		return 3;
194 | 
195 | 	bufputc(ob, '&');
196 | 	return 0;
197 | }
198 | 
199 | static size_t
200 | smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
201 | {
202 | 	if (size >= 3 && text[1] == '.' && text[2] == '.') {
203 | 		BUFPUTSL(ob, "&hellip;");
204 | 		return 2;
205 | 	}
206 | 
207 | 	if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
208 | 		BUFPUTSL(ob, "&hellip;");
209 | 		return 4;
210 | 	}
211 | 
212 | 	bufputc(ob, text[0]);
213 | 	return 0;
214 | }
215 | 
216 | static size_t
217 | smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
218 | {
219 | 	if (size >= 2 && text[1] == '`') {
220 | 		if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
221 | 			return 1;
222 | 	}
223 | 
224 | 	return 0;
225 | }
226 | 
227 | static size_t
228 | smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
229 | {
230 | 	if (word_boundary(previous_char) && size >= 3) {
231 | 		if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
232 | 			if (size == 3 || word_boundary(text[3])) {
233 | 				BUFPUTSL(ob, "&frac12;");
234 | 				return 2;
235 | 			}
236 | 		}
237 | 
238 | 		if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
239 | 			if (size == 3 || word_boundary(text[3]) ||
240 | 				(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
241 | 				BUFPUTSL(ob, "&frac14;");
242 | 				return 2;
243 | 			}
244 | 		}
245 | 
246 | 		if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
247 | 			if (size == 3 || word_boundary(text[3]) ||
248 | 				(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
249 | 				BUFPUTSL(ob, "&frac34;");
250 | 				return 2;
251 | 			}
252 | 		}
253 | 	}
254 | 
255 | 	bufputc(ob, text[0]);
256 | 	return 0;
257 | }
258 | 
259 | static size_t
260 | smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
261 | {
262 | 	if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
263 | 		BUFPUTSL(ob, "&quot;");
264 | 
265 | 	return 0;
266 | }
267 | 
268 | static size_t
269 | smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
270 | {
271 | 	static const char *skip_tags[] = {
272 | 	  "pre", "code", "var", "samp", "kbd", "math", "script", "style"
273 | 	};
274 | 	static const size_t skip_tags_count = 8;
275 | 
276 | 	size_t tag, i = 0;
277 | 
278 | 	while (i < size && text[i] != '>')
279 | 		i++;
280 | 
281 | 	for (tag = 0; tag < skip_tags_count; ++tag) {
282 | 		if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN)
283 | 			break;
284 | 	}
285 | 
286 | 	if (tag < skip_tags_count) {
287 | 		for (;;) {
288 | 			while (i < size && text[i] != '<')
289 | 				i++;
290 | 
291 | 			if (i == size)
292 | 				break;
293 | 
294 | 			if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE)
295 | 				break;
296 | 
297 | 			i++;
298 | 		}
299 | 
300 | 		while (i < size && text[i] != '>')
301 | 			i++;
302 | 	}
303 | 
304 | 	bufput(ob, text, i + 1);
305 | 	return i;
306 | }
307 | 
308 | static size_t
309 | smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
310 | {
311 | 	if (size < 2)
312 | 		return 0;
313 | 
314 | 	switch (text[1]) {
315 | 	case '\\':
316 | 	case '"':
317 | 	case '\'':
318 | 	case '.':
319 | 	case '-':
320 | 	case '`':
321 | 		bufputc(ob, text[1]);
322 | 		return 1;
323 | 
324 | 	default:
325 | 		bufputc(ob, '\\');
326 | 		return 0;
327 | 	}
328 | }
329 | 
330 | #if 0
331 | static struct {
332 |     uint8_t c0;
333 |     const uint8_t *pattern;
334 |     const uint8_t *entity;
335 |     int skip;
336 | } smartypants_subs[] = {
337 |     { '\'', "'s>",      "&rsquo;",  0 },
338 |     { '\'', "'t>",      "&rsquo;",  0 },
339 |     { '\'', "'re>",     "&rsquo;",  0 },
340 |     { '\'', "'ll>",     "&rsquo;",  0 },
341 |     { '\'', "'ve>",     "&rsquo;",  0 },
342 |     { '\'', "'m>",      "&rsquo;",  0 },
343 |     { '\'', "'d>",      "&rsquo;",  0 },
344 |     { '-',  "--",       "&mdash;",  1 },
345 |     { '-',  "<->",      "&ndash;",  0 },
346 |     { '.',  "...",      "&hellip;", 2 },
347 |     { '.',  ". . .",    "&hellip;", 4 },
348 |     { '(',  "(c)",      "&copy;",   2 },
349 |     { '(',  "(r)",      "&reg;",    2 },
350 |     { '(',  "(tm)",     "&trade;",  3 },
351 |     { '3',  "<3/4>",    "&frac34;", 2 },
352 |     { '3',  "<3/4ths>", "&frac34;", 2 },
353 |     { '1',  "<1/2>",    "&frac12;", 2 },
354 |     { '1',  "<1/4>",    "&frac14;", 2 },
355 |     { '1',  "<1/4th>",  "&frac14;", 2 },
356 |     { '&',  "&#0;",      0,       3 },
357 | };
358 | #endif
359 | 
360 | void
361 | sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size)
362 | {
363 | 	size_t i;
364 | 	struct smartypants_data smrt = {0, 0};
365 | 
366 | 	if (!text)
367 | 		return;
368 | 
369 | 	bufgrow(ob, size);
370 | 
371 | 	for (i = 0; i < size; ++i) {
372 | 		size_t org;
373 | 		uint8_t action = 0;
374 | 
375 | 		org = i;
376 | 		while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
377 | 			i++;
378 | 
379 | 		if (i > org)
380 | 			bufput(ob, text + org, i - org);
381 | 
382 | 		if (i < size) {
383 | 			i += smartypants_cb_ptrs[(int)action]
384 | 				(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
385 | 		}
386 | 	}
387 | }
388 | 
389 | 
390 | 


--------------------------------------------------------------------------------
/html/html.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2009, Natacha Porté
  3 |  * Copyright (c) 2011, Vicent Marti
  4 |  *
  5 |  * Permission to use, copy, modify, and distribute this software for any
  6 |  * purpose with or without fee is hereby granted, provided that the above
  7 |  * copyright notice and this permission notice appear in all copies.
  8 |  *
  9 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 10 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 11 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 12 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 13 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 14 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 15 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 16 |  */
 17 | 
 18 | #include "markdown.h"
 19 | #include "html.h"
 20 | 
 21 | #include <string.h>
 22 | #include <stdlib.h>
 23 | #include <stdio.h>
 24 | #include <ctype.h>
 25 | 
 26 | #include "houdini.h"
 27 | 
 28 | #define USE_XHTML(opt) (opt->flags & HTML_USE_XHTML)
 29 | 
 30 | int
 31 | sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname)
 32 | {
 33 | 	size_t i;
 34 | 	int closed = 0;
 35 | 
 36 | 	if (tag_size < 3 || tag_data[0] != '<')
 37 | 		return HTML_TAG_NONE;
 38 | 
 39 | 	i = 1;
 40 | 
 41 | 	if (tag_data[i] == '/') {
 42 | 		closed = 1;
 43 | 		i++;
 44 | 	}
 45 | 
 46 | 	for (; i < tag_size; ++i, ++tagname) {
 47 | 		if (*tagname == 0)
 48 | 			break;
 49 | 
 50 | 		if (tag_data[i] != *tagname)
 51 | 			return HTML_TAG_NONE;
 52 | 	}
 53 | 
 54 | 	if (i == tag_size)
 55 | 		return HTML_TAG_NONE;
 56 | 
 57 | 	if (isspace(tag_data[i]) || tag_data[i] == '>')
 58 | 		return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
 59 | 
 60 | 	return HTML_TAG_NONE;
 61 | }
 62 | 
 63 | static inline void escape_html(struct buf *ob, const uint8_t *source, size_t length)
 64 | {
 65 | 	houdini_escape_html0(ob, source, length, 0);
 66 | }
 67 | 
 68 | static inline void escape_href(struct buf *ob, const uint8_t *source, size_t length)
 69 | {
 70 | 	houdini_escape_href(ob, source, length);
 71 | }
 72 | 
 73 | /********************
 74 |  * GENERIC RENDERER *
 75 |  ********************/
 76 | static int
 77 | rndr_autolink(struct buf *ob, const struct buf *link, enum mkd_autolink type, void *opaque)
 78 | {
 79 | 	struct html_renderopt *options = opaque;
 80 | 
 81 | 	if (!link || !link->size)
 82 | 		return 0;
 83 | 
 84 | 	if ((options->flags & HTML_SAFELINK) != 0 &&
 85 | 		!sd_autolink_issafe(link->data, link->size) &&
 86 | 		type != MKDA_EMAIL)
 87 | 		return 0;
 88 | 
 89 | 	BUFPUTSL(ob, "<a href=\"");
 90 | 	if (type == MKDA_EMAIL)
 91 | 		BUFPUTSL(ob, "mailto:");
 92 | 	escape_href(ob, link->data, link->size);
 93 | 
 94 | 	if (options->link_attributes) {
 95 | 		bufputc(ob, '\"');
 96 | 		options->link_attributes(ob, link, opaque);
 97 | 		bufputc(ob, '>');
 98 | 	} else {
 99 | 		BUFPUTSL(ob, "\">");
100 | 	}
101 | 
102 | 	/*
103 | 	 * Pretty printing: if we get an email address as
104 | 	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
105 | 	 * want to print the `mailto:` prefix
106 | 	 */
107 | 	if (bufprefix(link, "mailto:") == 0) {
108 | 		escape_html(ob, link->data + 7, link->size - 7);
109 | 	} else {
110 | 		escape_html(ob, link->data, link->size);
111 | 	}
112 | 
113 | 	BUFPUTSL(ob, "</a>");
114 | 
115 | 	return 1;
116 | }
117 | 
118 | static void
119 | rndr_blockcode(struct buf *ob, const struct buf *text, const struct buf *lang, void *opaque)
120 | {
121 | 	if (ob->size) bufputc(ob, '\n');
122 | 
123 | 	if (lang && lang->size) {
124 | 		size_t i, cls;
125 | 		BUFPUTSL(ob, "<pre><code class=\"");
126 | 
127 | 		for (i = 0, cls = 0; i < lang->size; ++i, ++cls) {
128 | 			while (i < lang->size && isspace(lang->data[i]))
129 | 				i++;
130 | 
131 | 			if (i < lang->size) {
132 | 				size_t org = i;
133 | 				while (i < lang->size && !isspace(lang->data[i]))
134 | 					i++;
135 | 
136 | 				if (lang->data[org] == '.')
137 | 					org++;
138 | 
139 | 				if (cls) bufputc(ob, ' ');
140 | 				escape_html(ob, lang->data + org, i - org);
141 | 			}
142 | 		}
143 | 
144 | 		BUFPUTSL(ob, "\">");
145 | 	} else
146 | 		BUFPUTSL(ob, "<pre><code>");
147 | 
148 | 	if (text)
149 | 		escape_html(ob, text->data, text->size);
150 | 
151 | 	BUFPUTSL(ob, "</code></pre>\n");
152 | }
153 | 
154 | static void
155 | rndr_blockquote(struct buf *ob, const struct buf *text, void *opaque)
156 | {
157 | 	if (ob->size) bufputc(ob, '\n');
158 | 	BUFPUTSL(ob, "<blockquote>\n");
159 | 	if (text) bufput(ob, text->data, text->size);
160 | 	BUFPUTSL(ob, "</blockquote>\n");
161 | }
162 | 
163 | static int
164 | rndr_codespan(struct buf *ob, const struct buf *text, void *opaque)
165 | {
166 | 	BUFPUTSL(ob, "<code>");
167 | 	if (text) escape_html(ob, text->data, text->size);
168 | 	BUFPUTSL(ob, "</code>");
169 | 	return 1;
170 | }
171 | 
172 | static int
173 | rndr_strikethrough(struct buf *ob, const struct buf *text, void *opaque)
174 | {
175 | 	if (!text || !text->size)
176 | 		return 0;
177 | 
178 | 	BUFPUTSL(ob, "<del>");
179 | 	bufput(ob, text->data, text->size);
180 | 	BUFPUTSL(ob, "</del>");
181 | 	return 1;
182 | }
183 | 
184 | static int
185 | rndr_double_emphasis(struct buf *ob, const struct buf *text, void *opaque)
186 | {
187 | 	if (!text || !text->size)
188 | 		return 0;
189 | 
190 | 	BUFPUTSL(ob, "<strong>");
191 | 	bufput(ob, text->data, text->size);
192 | 	BUFPUTSL(ob, "</strong>");
193 | 
194 | 	return 1;
195 | }
196 | 
197 | static int
198 | rndr_emphasis(struct buf *ob, const struct buf *text, void *opaque)
199 | {
200 | 	if (!text || !text->size) return 0;
201 | 	BUFPUTSL(ob, "<em>");
202 | 	if (text) bufput(ob, text->data, text->size);
203 | 	BUFPUTSL(ob, "</em>");
204 | 	return 1;
205 | }
206 | 
207 | static int
208 | rndr_linebreak(struct buf *ob, void *opaque)
209 | {
210 | 	struct html_renderopt *options = opaque;
211 | 	bufputs(ob, USE_XHTML(options) ? "<br/>\n" : "<br>\n");
212 | 	return 1;
213 | }
214 | 
215 | static void
216 | rndr_header(struct buf *ob, const struct buf *text, int level, void *opaque)
217 | {
218 | 	struct html_renderopt *options = opaque;
219 | 
220 | 	if (ob->size)
221 | 		bufputc(ob, '\n');
222 | 
223 | 	if (options->flags & HTML_TOC)
224 | 		bufprintf(ob, "<h%d id=\"toc_%d\">", level, options->toc_data.header_count++);
225 | 	else
226 | 		bufprintf(ob, "<h%d>", level);
227 | 
228 | 	if (text) bufput(ob, text->data, text->size);
229 | 	bufprintf(ob, "</h%d>\n", level);
230 | }
231 | 
232 | static int
233 | rndr_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
234 | {
235 | 	struct html_renderopt *options = opaque;
236 | 
237 | 	if (link != NULL && (options->flags & HTML_SAFELINK) != 0 && !sd_autolink_issafe(link->data, link->size))
238 | 		return 0;
239 | 
240 | 	BUFPUTSL(ob, "<a href=\"");
241 | 
242 | 	if (link && link->size)
243 | 		escape_href(ob, link->data, link->size);
244 | 
245 | 	if (title && title->size) {
246 | 		BUFPUTSL(ob, "\" title=\"");
247 | 		escape_html(ob, title->data, title->size);
248 | 	}
249 | 
250 | 	if (options->link_attributes) {
251 | 		bufputc(ob, '\"');
252 | 		options->link_attributes(ob, link, opaque);
253 | 		bufputc(ob, '>');
254 | 	} else {
255 | 		BUFPUTSL(ob, "\">");
256 | 	}
257 | 
258 | 	if (content && content->size) bufput(ob, content->data, content->size);
259 | 	BUFPUTSL(ob, "</a>");
260 | 	return 1;
261 | }
262 | 
263 | static void
264 | rndr_list(struct buf *ob, const struct buf *text, int flags, void *opaque)
265 | {
266 | 	if (ob->size) bufputc(ob, '\n');
267 | 	bufput(ob, flags & MKD_LIST_ORDERED ? "<ol>\n" : "<ul>\n", 5);
268 | 	if (text) bufput(ob, text->data, text->size);
269 | 	bufput(ob, flags & MKD_LIST_ORDERED ? "</ol>\n" : "</ul>\n", 6);
270 | }
271 | 
272 | static void
273 | rndr_listitem(struct buf *ob, const struct buf *text, int flags, void *opaque)
274 | {
275 | 	BUFPUTSL(ob, "<li>");
276 | 	if (text) {
277 | 		size_t size = text->size;
278 | 		while (size && text->data[size - 1] == '\n')
279 | 			size--;
280 | 
281 | 		bufput(ob, text->data, size);
282 | 	}
283 | 	BUFPUTSL(ob, "</li>\n");
284 | }
285 | 
286 | static void
287 | rndr_paragraph(struct buf *ob, const struct buf *text, void *opaque)
288 | {
289 | 	struct html_renderopt *options = opaque;
290 | 	size_t i = 0;
291 | 
292 | 	if (ob->size) bufputc(ob, '\n');
293 | 
294 | 	if (!text || !text->size)
295 | 		return;
296 | 
297 | 	while (i < text->size && isspace(text->data[i])) i++;
298 | 
299 | 	if (i == text->size)
300 | 		return;
301 | 
302 | 	BUFPUTSL(ob, "<p>");
303 | 	if (options->flags & HTML_HARD_WRAP) {
304 | 		size_t org;
305 | 		while (i < text->size) {
306 | 			org = i;
307 | 			while (i < text->size && text->data[i] != '\n')
308 | 				i++;
309 | 
310 | 			if (i > org)
311 | 				bufput(ob, text->data + org, i - org);
312 | 
313 | 			/*
314 | 			 * do not insert a line break if this newline
315 | 			 * is the last character on the paragraph
316 | 			 */
317 | 			if (i >= text->size - 1)
318 | 				break;
319 | 
320 | 			rndr_linebreak(ob, opaque);
321 | 			i++;
322 | 		}
323 | 	} else {
324 | 		bufput(ob, &text->data[i], text->size - i);
325 | 	}
326 | 	BUFPUTSL(ob, "</p>\n");
327 | }
328 | 
329 | static void
330 | rndr_raw_block(struct buf *ob, const struct buf *text, void *opaque)
331 | {
332 | 	size_t org, sz;
333 | 	if (!text) return;
334 | 	sz = text->size;
335 | 	while (sz > 0 && text->data[sz - 1] == '\n') sz--;
336 | 	org = 0;
337 | 	while (org < sz && text->data[org] == '\n') org++;
338 | 	if (org >= sz) return;
339 | 	if (ob->size) bufputc(ob, '\n');
340 | 	bufput(ob, text->data + org, sz - org);
341 | 	bufputc(ob, '\n');
342 | }
343 | 
344 | static int
345 | rndr_triple_emphasis(struct buf *ob, const struct buf *text, void *opaque)
346 | {
347 | 	if (!text || !text->size) return 0;
348 | 	BUFPUTSL(ob, "<strong><em>");
349 | 	bufput(ob, text->data, text->size);
350 | 	BUFPUTSL(ob, "</em></strong>");
351 | 	return 1;
352 | }
353 | 
354 | static void
355 | rndr_hrule(struct buf *ob, void *opaque)
356 | {
357 | 	struct html_renderopt *options = opaque;
358 | 	if (ob->size) bufputc(ob, '\n');
359 | 	bufputs(ob, USE_XHTML(options) ? "<hr/>\n" : "<hr>\n");
360 | }
361 | 
362 | static int
363 | rndr_image(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *alt, void *opaque)
364 | {
365 | 	struct html_renderopt *options = opaque;
366 | 	if (!link || !link->size) return 0;
367 | 
368 | 	BUFPUTSL(ob, "<img src=\"");
369 | 	escape_href(ob, link->data, link->size);
370 | 	BUFPUTSL(ob, "\" alt=\"");
371 | 
372 | 	if (alt && alt->size)
373 | 		escape_html(ob, alt->data, alt->size);
374 | 
375 | 	if (title && title->size) {
376 | 		BUFPUTSL(ob, "\" title=\"");
377 | 		escape_html(ob, title->data, title->size); }
378 | 
379 | 	bufputs(ob, USE_XHTML(options) ? "\"/>" : "\">");
380 | 	return 1;
381 | }
382 | 
383 | static int
384 | rndr_raw_html(struct buf *ob, const struct buf *text, void *opaque)
385 | {
386 | 	struct html_renderopt *options = opaque;
387 | 
388 | 	/* HTML_ESCAPE overrides SKIP_HTML, SKIP_STYLE, SKIP_LINKS and SKIP_IMAGES
389 | 	* It doens't see if there are any valid tags, just escape all of them. */
390 | 	if((options->flags & HTML_ESCAPE) != 0) {
391 | 		escape_html(ob, text->data, text->size);
392 | 		return 1;
393 | 	}
394 | 
395 | 	if ((options->flags & HTML_SKIP_HTML) != 0)
396 | 		return 1;
397 | 
398 | 	if ((options->flags & HTML_SKIP_STYLE) != 0 &&
399 | 		sdhtml_is_tag(text->data, text->size, "style"))
400 | 		return 1;
401 | 
402 | 	if ((options->flags & HTML_SKIP_LINKS) != 0 &&
403 | 		sdhtml_is_tag(text->data, text->size, "a"))
404 | 		return 1;
405 | 
406 | 	if ((options->flags & HTML_SKIP_IMAGES) != 0 &&
407 | 		sdhtml_is_tag(text->data, text->size, "img"))
408 | 		return 1;
409 | 
410 | 	bufput(ob, text->data, text->size);
411 | 	return 1;
412 | }
413 | 
414 | static void
415 | rndr_table(struct buf *ob, const struct buf *header, const struct buf *body, void *opaque)
416 | {
417 | 	if (ob->size) bufputc(ob, '\n');
418 | 	BUFPUTSL(ob, "<table><thead>\n");
419 | 	if (header)
420 | 		bufput(ob, header->data, header->size);
421 | 	BUFPUTSL(ob, "</thead><tbody>\n");
422 | 	if (body)
423 | 		bufput(ob, body->data, body->size);
424 | 	BUFPUTSL(ob, "</tbody></table>\n");
425 | }
426 | 
427 | static void
428 | rndr_tablerow(struct buf *ob, const struct buf *text, void *opaque)
429 | {
430 | 	BUFPUTSL(ob, "<tr>\n");
431 | 	if (text)
432 | 		bufput(ob, text->data, text->size);
433 | 	BUFPUTSL(ob, "</tr>\n");
434 | }
435 | 
436 | static void
437 | rndr_tablecell(struct buf *ob, const struct buf *text, int flags, void *opaque)
438 | {
439 | 	if (flags & MKD_TABLE_HEADER) {
440 | 		BUFPUTSL(ob, "<th");
441 | 	} else {
442 | 		BUFPUTSL(ob, "<td");
443 | 	}
444 | 
445 | 	switch (flags & MKD_TABLE_ALIGNMASK) {
446 | 	case MKD_TABLE_ALIGN_CENTER:
447 | 		BUFPUTSL(ob, " align=\"center\">");
448 | 		break;
449 | 
450 | 	case MKD_TABLE_ALIGN_L:
451 | 		BUFPUTSL(ob, " align=\"left\">");
452 | 		break;
453 | 
454 | 	case MKD_TABLE_ALIGN_R:
455 | 		BUFPUTSL(ob, " align=\"right\">");
456 | 		break;
457 | 
458 | 	default:
459 | 		BUFPUTSL(ob, ">");
460 | 	}
461 | 
462 | 	if (text)
463 | 		bufput(ob, text->data, text->size);
464 | 
465 | 	if (flags & MKD_TABLE_HEADER) {
466 | 		BUFPUTSL(ob, "</th>\n");
467 | 	} else {
468 | 		BUFPUTSL(ob, "</td>\n");
469 | 	}
470 | }
471 | 
472 | static int
473 | rndr_superscript(struct buf *ob, const struct buf *text, void *opaque)
474 | {
475 | 	if (!text || !text->size) return 0;
476 | 	BUFPUTSL(ob, "<sup>");
477 | 	bufput(ob, text->data, text->size);
478 | 	BUFPUTSL(ob, "</sup>");
479 | 	return 1;
480 | }
481 | 
482 | static void
483 | rndr_normal_text(struct buf *ob, const struct buf *text, void *opaque)
484 | {
485 | 	if (text)
486 | 		escape_html(ob, text->data, text->size);
487 | }
488 | 
489 | static void
490 | toc_header(struct buf *ob, const struct buf *text, int level, void *opaque)
491 | {
492 | 	struct html_renderopt *options = opaque;
493 | 
494 | 	/* set the level offset if this is the first header
495 | 	 * we're parsing for the document */
496 | 	if (options->toc_data.current_level == 0) {
497 | 		options->toc_data.level_offset = level - 1;
498 | 	}
499 | 	level -= options->toc_data.level_offset;
500 | 
501 | 	if (level > options->toc_data.current_level) {
502 | 		while (level > options->toc_data.current_level) {
503 | 			BUFPUTSL(ob, "<ul>\n<li>\n");
504 | 			options->toc_data.current_level++;
505 | 		}
506 | 	} else if (level < options->toc_data.current_level) {
507 | 		BUFPUTSL(ob, "</li>\n");
508 | 		while (level < options->toc_data.current_level) {
509 | 			BUFPUTSL(ob, "</ul>\n</li>\n");
510 | 			options->toc_data.current_level--;
511 | 		}
512 | 		BUFPUTSL(ob,"<li>\n");
513 | 	} else {
514 | 		BUFPUTSL(ob,"</li>\n<li>\n");
515 | 	}
516 | 
517 | 	bufprintf(ob, "<a href=\"#toc_%d\">", options->toc_data.header_count++);
518 | 	if (text)
519 | 		escape_html(ob, text->data, text->size);
520 | 	BUFPUTSL(ob, "</a>\n");
521 | }
522 | 
523 | static int
524 | toc_link(struct buf *ob, const struct buf *link, const struct buf *title, const struct buf *content, void *opaque)
525 | {
526 | 	if (content && content->size)
527 | 		bufput(ob, content->data, content->size);
528 | 	return 1;
529 | }
530 | 
531 | static void
532 | toc_finalize(struct buf *ob, void *opaque)
533 | {
534 | 	struct html_renderopt *options = opaque;
535 | 
536 | 	while (options->toc_data.current_level > 0) {
537 | 		BUFPUTSL(ob, "</li>\n</ul>\n");
538 | 		options->toc_data.current_level--;
539 | 	}
540 | }
541 | 
542 | void
543 | sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options)
544 | {
545 | 	static const struct sd_callbacks cb_default = {
546 | 		NULL,
547 | 		NULL,
548 | 		NULL,
549 | 		toc_header,
550 | 		NULL,
551 | 		NULL,
552 | 		NULL,
553 | 		NULL,
554 | 		NULL,
555 | 		NULL,
556 | 		NULL,
557 | 
558 | 		NULL,
559 | 		rndr_codespan,
560 | 		rndr_double_emphasis,
561 | 		rndr_emphasis,
562 | 		NULL,
563 | 		NULL,
564 | 		toc_link,
565 | 		NULL,
566 | 		rndr_triple_emphasis,
567 | 		rndr_strikethrough,
568 | 		rndr_superscript,
569 | 
570 | 		NULL,
571 | 		NULL,
572 | 
573 | 		NULL,
574 | 		toc_finalize,
575 | 	};
576 | 
577 | 	memset(options, 0x0, sizeof(struct html_renderopt));
578 | 	options->flags = HTML_TOC;
579 | 
580 | 	memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
581 | }
582 | 
583 | void
584 | sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options, unsigned int render_flags)
585 | {
586 | 	static const struct sd_callbacks cb_default = {
587 | 		rndr_blockcode,
588 | 		rndr_blockquote,
589 | 		rndr_raw_block,
590 | 		rndr_header,
591 | 		rndr_hrule,
592 | 		rndr_list,
593 | 		rndr_listitem,
594 | 		rndr_paragraph,
595 | 		rndr_table,
596 | 		rndr_tablerow,
597 | 		rndr_tablecell,
598 | 
599 | 		rndr_autolink,
600 | 		rndr_codespan,
601 | 		rndr_double_emphasis,
602 | 		rndr_emphasis,
603 | 		rndr_image,
604 | 		rndr_linebreak,
605 | 		rndr_link,
606 | 		rndr_raw_html,
607 | 		rndr_triple_emphasis,
608 | 		rndr_strikethrough,
609 | 		rndr_superscript,
610 | 
611 | 		NULL,
612 | 		rndr_normal_text,
613 | 
614 | 		NULL,
615 | 		NULL,
616 | 	};
617 | 
618 | 	/* Prepare the options pointer */
619 | 	memset(options, 0x0, sizeof(struct html_renderopt));
620 | 	options->flags = render_flags;
621 | 
622 | 	/* Prepare the callbacks */
623 | 	memcpy(callbacks, &cb_default, sizeof(struct sd_callbacks));
624 | 
625 | 	if (render_flags & HTML_SKIP_IMAGES)
626 | 		callbacks->image = NULL;
627 | 
628 | 	if (render_flags & HTML_SKIP_LINKS) {
629 | 		callbacks->link = NULL;
630 | 		callbacks->autolink = NULL;
631 | 	}
632 | 
633 | 	if (render_flags & HTML_SKIP_HTML || render_flags & HTML_ESCAPE)
634 | 		callbacks->blockhtml = NULL;
635 | }
636 | 


--------------------------------------------------------------------------------
/src/markdown.c:
--------------------------------------------------------------------------------
   1 | /* markdown.c - generic markdown parser */
   2 | 
   3 | /*
   4 |  * Copyright (c) 2009, Natacha Porté
   5 |  * Copyright (c) 2011, Vicent Marti
   6 |  *
   7 |  * Permission to use, copy, modify, and distribute this software for any
   8 |  * purpose with or without fee is hereby granted, provided that the above
   9 |  * copyright notice and this permission notice appear in all copies.
  10 |  *
  11 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  12 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  14 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18 |  */
  19 | 
  20 | #include "markdown.h"
  21 | #include "stack.h"
  22 | 
  23 | #include <assert.h>
  24 | #include <string.h>
  25 | #include <ctype.h>
  26 | #include <stdio.h>
  27 | 
  28 | #if defined(_WIN32)
  29 | #define strncasecmp	_strnicmp
  30 | #endif
  31 | 
  32 | #define REF_TABLE_SIZE 8
  33 | 
  34 | #define BUFFER_BLOCK 0
  35 | #define BUFFER_SPAN 1
  36 | 
  37 | #define MKD_LI_END 8	/* internal list flag */
  38 | 
  39 | #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
  40 | #define GPERF_DOWNCASE 1
  41 | #define GPERF_CASE_STRNCMP 1
  42 | #include "html_blocks.h"
  43 | 
  44 | /***************
  45 |  * LOCAL TYPES *
  46 |  ***************/
  47 | 
  48 | /* link_ref: reference to a link */
  49 | struct link_ref {
  50 | 	unsigned int id;
  51 | 
  52 | 	struct buf *link;
  53 | 	struct buf *title;
  54 | 
  55 | 	struct link_ref *next;
  56 | };
  57 | 
  58 | /* char_trigger: function pointer to render active chars */
  59 | /*   returns the number of chars taken care of */
  60 | /*   data is the pointer of the beginning of the span */
  61 | /*   offset is the number of valid chars before data */
  62 | struct sd_markdown;
  63 | typedef size_t
  64 | (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  65 | 
  66 | static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  67 | static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  68 | static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  69 | static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  70 | static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  71 | static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  72 | static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  73 | static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  74 | static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  75 | static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  76 | static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
  77 | 
  78 | enum markdown_char_t {
  79 | 	MD_CHAR_NONE = 0,
  80 | 	MD_CHAR_EMPHASIS,
  81 | 	MD_CHAR_CODESPAN,
  82 | 	MD_CHAR_LINEBREAK,
  83 | 	MD_CHAR_LINK,
  84 | 	MD_CHAR_LANGLE,
  85 | 	MD_CHAR_ESCAPE,
  86 | 	MD_CHAR_ENTITITY,
  87 | 	MD_CHAR_AUTOLINK_URL,
  88 | 	MD_CHAR_AUTOLINK_EMAIL,
  89 | 	MD_CHAR_AUTOLINK_WWW,
  90 | 	MD_CHAR_SUPERSCRIPT,
  91 | };
  92 | 
  93 | static char_trigger markdown_char_ptrs[] = {
  94 | 	NULL,
  95 | 	&char_emphasis,
  96 | 	&char_codespan,
  97 | 	&char_linebreak,
  98 | 	&char_link,
  99 | 	&char_langle_tag,
 100 | 	&char_escape,
 101 | 	&char_entity,
 102 | 	&char_autolink_url,
 103 | 	&char_autolink_email,
 104 | 	&char_autolink_www,
 105 | 	&char_superscript,
 106 | };
 107 | 
 108 | /* render • structure containing one particular render */
 109 | struct sd_markdown {
 110 | 	struct sd_callbacks	cb;
 111 | 	void *opaque;
 112 | 
 113 | 	struct link_ref *refs[REF_TABLE_SIZE];
 114 | 	uint8_t active_char[256];
 115 | 	struct stack work_bufs[2];
 116 | 	unsigned int ext_flags;
 117 | 	size_t max_nesting;
 118 | 	int in_link_body;
 119 | };
 120 | 
 121 | /***************************
 122 |  * HELPER FUNCTIONS *
 123 |  ***************************/
 124 | 
 125 | static inline struct buf *
 126 | rndr_newbuf(struct sd_markdown *rndr, int type)
 127 | {
 128 | 	static const size_t buf_size[2] = {256, 64};
 129 | 	struct buf *work = NULL;
 130 | 	struct stack *pool = &rndr->work_bufs[type];
 131 | 
 132 | 	if (pool->size < pool->asize &&
 133 | 		pool->item[pool->size] != NULL) {
 134 | 		work = pool->item[pool->size++];
 135 | 		work->size = 0;
 136 | 	} else {
 137 | 		work = bufnew(buf_size[type]);
 138 | 		stack_push(pool, work);
 139 | 	}
 140 | 
 141 | 	return work;
 142 | }
 143 | 
 144 | static inline void
 145 | rndr_popbuf(struct sd_markdown *rndr, int type)
 146 | {
 147 | 	rndr->work_bufs[type].size--;
 148 | }
 149 | 
 150 | static void
 151 | unscape_text(struct buf *ob, struct buf *src)
 152 | {
 153 | 	size_t i = 0, org;
 154 | 	while (i < src->size) {
 155 | 		org = i;
 156 | 		while (i < src->size && src->data[i] != '\\')
 157 | 			i++;
 158 | 
 159 | 		if (i > org)
 160 | 			bufput(ob, src->data + org, i - org);
 161 | 
 162 | 		if (i + 1 >= src->size)
 163 | 			break;
 164 | 
 165 | 		bufputc(ob, src->data[i + 1]);
 166 | 		i += 2;
 167 | 	}
 168 | }
 169 | 
 170 | static unsigned int
 171 | hash_link_ref(const uint8_t *link_ref, size_t length)
 172 | {
 173 | 	size_t i;
 174 | 	unsigned int hash = 0;
 175 | 
 176 | 	for (i = 0; i < length; ++i)
 177 | 		hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
 178 | 
 179 | 	return hash;
 180 | }
 181 | 
 182 | static struct link_ref *
 183 | add_link_ref(
 184 | 	struct link_ref **references,
 185 | 	const uint8_t *name, size_t name_size)
 186 | {
 187 | 	struct link_ref *ref = calloc(1, sizeof(struct link_ref));
 188 | 
 189 | 	if (!ref)
 190 | 		return NULL;
 191 | 
 192 | 	ref->id = hash_link_ref(name, name_size);
 193 | 	ref->next = references[ref->id % REF_TABLE_SIZE];
 194 | 
 195 | 	references[ref->id % REF_TABLE_SIZE] = ref;
 196 | 	return ref;
 197 | }
 198 | 
 199 | static struct link_ref *
 200 | find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
 201 | {
 202 | 	unsigned int hash = hash_link_ref(name, length);
 203 | 	struct link_ref *ref = NULL;
 204 | 
 205 | 	ref = references[hash % REF_TABLE_SIZE];
 206 | 
 207 | 	while (ref != NULL) {
 208 | 		if (ref->id == hash)
 209 | 			return ref;
 210 | 
 211 | 		ref = ref->next;
 212 | 	}
 213 | 
 214 | 	return NULL;
 215 | }
 216 | 
 217 | static void
 218 | free_link_refs(struct link_ref **references)
 219 | {
 220 | 	size_t i;
 221 | 
 222 | 	for (i = 0; i < REF_TABLE_SIZE; ++i) {
 223 | 		struct link_ref *r = references[i];
 224 | 		struct link_ref *next;
 225 | 
 226 | 		while (r) {
 227 | 			next = r->next;
 228 | 			bufrelease(r->link);
 229 | 			bufrelease(r->title);
 230 | 			free(r);
 231 | 			r = next;
 232 | 		}
 233 | 	}
 234 | }
 235 | 
 236 | /*
 237 |  * Check whether a char is a Markdown space.
 238 | 
 239 |  * Right now we only consider spaces the actual
 240 |  * space and a newline: tabs and carriage returns
 241 |  * are filtered out during the preprocessing phase.
 242 |  *
 243 |  * If we wanted to actually be UTF-8 compliant, we
 244 |  * should instead extract an Unicode codepoint from
 245 |  * this character and check for space properties.
 246 |  */
 247 | static inline int
 248 | _isspace(int c)
 249 | {
 250 | 	return c == ' ' || c == '\n';
 251 | }
 252 | 
 253 | /****************************
 254 |  * INLINE PARSING FUNCTIONS *
 255 |  ****************************/
 256 | 
 257 | /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
 258 | /* this is less strict than the original markdown e-mail address matching */
 259 | static size_t
 260 | is_mail_autolink(uint8_t *data, size_t size)
 261 | {
 262 | 	size_t i = 0, nb = 0;
 263 | 
 264 | 	/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
 265 | 	for (i = 0; i < size; ++i) {
 266 | 		if (isalnum(data[i]))
 267 | 			continue;
 268 | 
 269 | 		switch (data[i]) {
 270 | 			case '@':
 271 | 				nb++;
 272 | 
 273 | 			case '-':
 274 | 			case '.':
 275 | 			case '_':
 276 | 				break;
 277 | 
 278 | 			case '>':
 279 | 				return (nb == 1) ? i + 1 : 0;
 280 | 
 281 | 			default:
 282 | 				return 0;
 283 | 		}
 284 | 	}
 285 | 
 286 | 	return 0;
 287 | }
 288 | 
 289 | /* tag_length • returns the length of the given tag, or 0 is it's not valid */
 290 | static size_t
 291 | tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
 292 | {
 293 | 	size_t i, j;
 294 | 
 295 | 	/* a valid tag can't be shorter than 3 chars */
 296 | 	if (size < 3) return 0;
 297 | 
 298 | 	/* begins with a '<' optionally followed by '/', followed by letter or number */
 299 | 	if (data[0] != '<') return 0;
 300 | 	i = (data[1] == '/') ? 2 : 1;
 301 | 
 302 | 	if (!isalnum(data[i]))
 303 | 		return 0;
 304 | 
 305 | 	/* scheme test */
 306 | 	*autolink = MKDA_NOT_AUTOLINK;
 307 | 
 308 | 	/* try to find the beginning of an URI */
 309 | 	while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
 310 | 		i++;
 311 | 
 312 | 	if (i > 1 && data[i] == '@') {
 313 | 		if ((j = is_mail_autolink(data + i, size - i)) != 0) {
 314 | 			*autolink = MKDA_EMAIL;
 315 | 			return i + j;
 316 | 		}
 317 | 	}
 318 | 
 319 | 	if (i > 2 && data[i] == ':') {
 320 | 		*autolink = MKDA_NORMAL;
 321 | 		i++;
 322 | 	}
 323 | 
 324 | 	/* completing autolink test: no whitespace or ' or " */
 325 | 	if (i >= size)
 326 | 		*autolink = MKDA_NOT_AUTOLINK;
 327 | 
 328 | 	else if (*autolink) {
 329 | 		j = i;
 330 | 
 331 | 		while (i < size) {
 332 | 			if (data[i] == '\\') i += 2;
 333 | 			else if (data[i] == '>' || data[i] == '\'' ||
 334 | 					data[i] == '"' || data[i] == ' ' || data[i] == '\n')
 335 | 					break;
 336 | 			else i++;
 337 | 		}
 338 | 
 339 | 		if (i >= size) return 0;
 340 | 		if (i > j && data[i] == '>') return i + 1;
 341 | 		/* one of the forbidden chars has been found */
 342 | 		*autolink = MKDA_NOT_AUTOLINK;
 343 | 	}
 344 | 
 345 | 	/* looking for sometinhg looking like a tag end */
 346 | 	while (i < size && data[i] != '>') i++;
 347 | 	if (i >= size) return 0;
 348 | 	return i + 1;
 349 | }
 350 | 
 351 | /* parse_inline • parses inline markdown elements */
 352 | static void
 353 | parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
 354 | {
 355 | 	size_t i = 0, end = 0;
 356 | 	uint8_t action = 0;
 357 | 	struct buf work = { 0, 0, 0, 0 };
 358 | 
 359 | 	if (rndr->work_bufs[BUFFER_SPAN].size +
 360 | 		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
 361 | 		return;
 362 | 
 363 | 	while (i < size) {
 364 | 		/* copying inactive chars into the output */
 365 | 		while (end < size && (action = rndr->active_char[data[end]]) == 0) {
 366 | 			end++;
 367 | 		}
 368 | 
 369 | 		if (rndr->cb.normal_text) {
 370 | 			work.data = data + i;
 371 | 			work.size = end - i;
 372 | 			rndr->cb.normal_text(ob, &work, rndr->opaque);
 373 | 		}
 374 | 		else
 375 | 			bufput(ob, data + i, end - i);
 376 | 
 377 | 		if (end >= size) break;
 378 | 		i = end;
 379 | 
 380 | 		end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
 381 | 		if (!end) /* no action from the callback */
 382 | 			end = i + 1;
 383 | 		else {
 384 | 			i += end;
 385 | 			end = i;
 386 | 		}
 387 | 	}
 388 | }
 389 | 
 390 | /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
 391 | static size_t
 392 | find_emph_char(uint8_t *data, size_t size, uint8_t c)
 393 | {
 394 | 	size_t i = 1;
 395 | 
 396 | 	while (i < size) {
 397 | 		while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
 398 | 			i++;
 399 | 
 400 | 		if (i == size)
 401 | 			return 0;
 402 | 
 403 | 		if (data[i] == c)
 404 | 			return i;
 405 | 
 406 | 		/* not counting escaped chars */
 407 | 		if (i && data[i - 1] == '\\') {
 408 | 			i++; continue;
 409 | 		}
 410 | 
 411 | 		if (data[i] == '`') {
 412 | 			size_t span_nb = 0, bt;
 413 | 			size_t tmp_i = 0;
 414 | 
 415 | 			/* counting the number of opening backticks */
 416 | 			while (i < size && data[i] == '`') {
 417 | 				i++; span_nb++;
 418 | 			}
 419 | 
 420 | 			if (i >= size) return 0;
 421 | 
 422 | 			/* finding the matching closing sequence */
 423 | 			bt = 0;
 424 | 			while (i < size && bt < span_nb) {
 425 | 				if (!tmp_i && data[i] == c) tmp_i = i;
 426 | 				if (data[i] == '`') bt++;
 427 | 				else bt = 0;
 428 | 				i++;
 429 | 			}
 430 | 
 431 | 			if (i >= size) return tmp_i;
 432 | 		}
 433 | 		/* skipping a link */
 434 | 		else if (data[i] == '[') {
 435 | 			size_t tmp_i = 0;
 436 | 			uint8_t cc;
 437 | 
 438 | 			i++;
 439 | 			while (i < size && data[i] != ']') {
 440 | 				if (!tmp_i && data[i] == c) tmp_i = i;
 441 | 				i++;
 442 | 			}
 443 | 
 444 | 			i++;
 445 | 			while (i < size && (data[i] == ' ' || data[i] == '\n'))
 446 | 				i++;
 447 | 
 448 | 			if (i >= size)
 449 | 				return tmp_i;
 450 | 
 451 | 			switch (data[i]) {
 452 | 			case '[':
 453 | 				cc = ']'; break;
 454 | 
 455 | 			case '(':
 456 | 				cc = ')'; break;
 457 | 
 458 | 			default:
 459 | 				if (tmp_i)
 460 | 					return tmp_i;
 461 | 				else
 462 | 					continue;
 463 | 			}
 464 | 
 465 | 			i++;
 466 | 			while (i < size && data[i] != cc) {
 467 | 				if (!tmp_i && data[i] == c) tmp_i = i;
 468 | 				i++;
 469 | 			}
 470 | 
 471 | 			if (i >= size)
 472 | 				return tmp_i;
 473 | 
 474 | 			i++;
 475 | 		}
 476 | 	}
 477 | 
 478 | 	return 0;
 479 | }
 480 | 
 481 | /* parse_emph1 • parsing single emphase */
 482 | /* closed by a symbol not preceded by whitespace and not followed by symbol */
 483 | static size_t
 484 | parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 485 | {
 486 | 	size_t i = 0, len;
 487 | 	struct buf *work = 0;
 488 | 	int r;
 489 | 
 490 | 	if (!rndr->cb.emphasis) return 0;
 491 | 
 492 | 	/* skipping one symbol if coming from emph3 */
 493 | 	if (size > 1 && data[0] == c && data[1] == c) i = 1;
 494 | 
 495 | 	while (i < size) {
 496 | 		len = find_emph_char(data + i, size - i, c);
 497 | 		if (!len) return 0;
 498 | 		i += len;
 499 | 		if (i >= size) return 0;
 500 | 
 501 | 		if (data[i] == c && !_isspace(data[i - 1])) {
 502 | 
 503 | 			if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
 504 | 				if (i + 1 < size && isalnum(data[i + 1]))
 505 | 					continue;
 506 | 			}
 507 | 
 508 | 			work = rndr_newbuf(rndr, BUFFER_SPAN);
 509 | 			parse_inline(work, rndr, data, i);
 510 | 			r = rndr->cb.emphasis(ob, work, rndr->opaque);
 511 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 512 | 			return r ? i + 1 : 0;
 513 | 		}
 514 | 	}
 515 | 
 516 | 	return 0;
 517 | }
 518 | 
 519 | /* parse_emph2 • parsing single emphase */
 520 | static size_t
 521 | parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 522 | {
 523 | 	int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
 524 | 	size_t i = 0, len;
 525 | 	struct buf *work = 0;
 526 | 	int r;
 527 | 
 528 | 	render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
 529 | 
 530 | 	if (!render_method)
 531 | 		return 0;
 532 | 
 533 | 	while (i < size) {
 534 | 		len = find_emph_char(data + i, size - i, c);
 535 | 		if (!len) return 0;
 536 | 		i += len;
 537 | 
 538 | 		if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
 539 | 			work = rndr_newbuf(rndr, BUFFER_SPAN);
 540 | 			parse_inline(work, rndr, data, i);
 541 | 			r = render_method(ob, work, rndr->opaque);
 542 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 543 | 			return r ? i + 2 : 0;
 544 | 		}
 545 | 		i++;
 546 | 	}
 547 | 	return 0;
 548 | }
 549 | 
 550 | /* parse_emph3 • parsing single emphase */
 551 | /* finds the first closing tag, and delegates to the other emph */
 552 | static size_t
 553 | parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
 554 | {
 555 | 	size_t i = 0, len;
 556 | 	int r;
 557 | 
 558 | 	while (i < size) {
 559 | 		len = find_emph_char(data + i, size - i, c);
 560 | 		if (!len) return 0;
 561 | 		i += len;
 562 | 
 563 | 		/* skip whitespace preceded symbols */
 564 | 		if (data[i] != c || _isspace(data[i - 1]))
 565 | 			continue;
 566 | 
 567 | 		if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
 568 | 			/* triple symbol found */
 569 | 			struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
 570 | 
 571 | 			parse_inline(work, rndr, data, i);
 572 | 			r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
 573 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 574 | 			return r ? i + 3 : 0;
 575 | 
 576 | 		} else if (i + 1 < size && data[i + 1] == c) {
 577 | 			/* double symbol found, handing over to emph1 */
 578 | 			len = parse_emph1(ob, rndr, data - 2, size + 2, c);
 579 | 			if (!len) return 0;
 580 | 			else return len - 2;
 581 | 
 582 | 		} else {
 583 | 			/* single symbol found, handing over to emph2 */
 584 | 			len = parse_emph2(ob, rndr, data - 1, size + 1, c);
 585 | 			if (!len) return 0;
 586 | 			else return len - 1;
 587 | 		}
 588 | 	}
 589 | 	return 0;
 590 | }
 591 | 
 592 | /* char_emphasis • single and double emphasis parsing */
 593 | static size_t
 594 | char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 595 | {
 596 | 	uint8_t c = data[0];
 597 | 	size_t ret;
 598 | 
 599 | 	if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
 600 | 		if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>')
 601 | 			return 0;
 602 | 	}
 603 | 
 604 | 	if (size > 2 && data[1] != c) {
 605 | 		/* whitespace cannot follow an opening emphasis;
 606 | 		 * strikethrough only takes two characters '~~' */
 607 | 		if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
 608 | 			return 0;
 609 | 
 610 | 		return ret + 1;
 611 | 	}
 612 | 
 613 | 	if (size > 3 && data[1] == c && data[2] != c) {
 614 | 		if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
 615 | 			return 0;
 616 | 
 617 | 		return ret + 2;
 618 | 	}
 619 | 
 620 | 	if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
 621 | 		if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
 622 | 			return 0;
 623 | 
 624 | 		return ret + 3;
 625 | 	}
 626 | 
 627 | 	return 0;
 628 | }
 629 | 
 630 | 
 631 | /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
 632 | static size_t
 633 | char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 634 | {
 635 | 	if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
 636 | 		return 0;
 637 | 
 638 | 	/* removing the last space from ob and rendering */
 639 | 	while (ob->size && ob->data[ob->size - 1] == ' ')
 640 | 		ob->size--;
 641 | 
 642 | 	return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
 643 | }
 644 | 
 645 | 
 646 | /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
 647 | static size_t
 648 | char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 649 | {
 650 | 	size_t end, nb = 0, i, f_begin, f_end;
 651 | 
 652 | 	/* counting the number of backticks in the delimiter */
 653 | 	while (nb < size && data[nb] == '`')
 654 | 		nb++;
 655 | 
 656 | 	/* finding the next delimiter */
 657 | 	i = 0;
 658 | 	for (end = nb; end < size && i < nb; end++) {
 659 | 		if (data[end] == '`') i++;
 660 | 		else i = 0;
 661 | 	}
 662 | 
 663 | 	if (i < nb && end >= size)
 664 | 		return 0; /* no matching delimiter */
 665 | 
 666 | 	/* trimming outside whitespaces */
 667 | 	f_begin = nb;
 668 | 	while (f_begin < end && data[f_begin] == ' ')
 669 | 		f_begin++;
 670 | 
 671 | 	f_end = end - nb;
 672 | 	while (f_end > nb && data[f_end-1] == ' ')
 673 | 		f_end--;
 674 | 
 675 | 	/* real code span */
 676 | 	if (f_begin < f_end) {
 677 | 		struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
 678 | 		if (!rndr->cb.codespan(ob, &work, rndr->opaque))
 679 | 			end = 0;
 680 | 	} else {
 681 | 		if (!rndr->cb.codespan(ob, 0, rndr->opaque))
 682 | 			end = 0;
 683 | 	}
 684 | 
 685 | 	return end;
 686 | }
 687 | 
 688 | 
 689 | /* char_escape • '\\' backslash escape */
 690 | static size_t
 691 | char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 692 | {
 693 | 	static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
 694 | 	struct buf work = { 0, 0, 0, 0 };
 695 | 
 696 | 	if (size > 1) {
 697 | 		if (strchr(escape_chars, data[1]) == NULL)
 698 | 			return 0;
 699 | 
 700 | 		if (rndr->cb.normal_text) {
 701 | 			work.data = data + 1;
 702 | 			work.size = 1;
 703 | 			rndr->cb.normal_text(ob, &work, rndr->opaque);
 704 | 		}
 705 | 		else bufputc(ob, data[1]);
 706 | 	} else if (size == 1) {
 707 | 		bufputc(ob, data[0]);
 708 | 	}
 709 | 
 710 | 	return 2;
 711 | }
 712 | 
 713 | /* char_entity • '&' escaped when it doesn't belong to an entity */
 714 | /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
 715 | static size_t
 716 | char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 717 | {
 718 | 	size_t end = 1;
 719 | 	struct buf work = { 0, 0, 0, 0 };
 720 | 
 721 | 	if (end < size && data[end] == '#')
 722 | 		end++;
 723 | 
 724 | 	while (end < size && isalnum(data[end]))
 725 | 		end++;
 726 | 
 727 | 	if (end < size && data[end] == ';')
 728 | 		end++; /* real entity */
 729 | 	else
 730 | 		return 0; /* lone '&' */
 731 | 
 732 | 	if (rndr->cb.entity) {
 733 | 		work.data = data;
 734 | 		work.size = end;
 735 | 		rndr->cb.entity(ob, &work, rndr->opaque);
 736 | 	}
 737 | 	else bufput(ob, data, end);
 738 | 
 739 | 	return end;
 740 | }
 741 | 
 742 | /* char_langle_tag • '<' when tags or autolinks are allowed */
 743 | static size_t
 744 | char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 745 | {
 746 | 	enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
 747 | 	size_t end = tag_length(data, size, &altype);
 748 | 	struct buf work = { data, end, 0, 0 };
 749 | 	int ret = 0;
 750 | 
 751 | 	if (end > 2) {
 752 | 		if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
 753 | 			struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
 754 | 			work.data = data + 1;
 755 | 			work.size = end - 2;
 756 | 			unscape_text(u_link, &work);
 757 | 			ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
 758 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 759 | 		}
 760 | 		else if (rndr->cb.raw_html_tag)
 761 | 			ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
 762 | 	}
 763 | 
 764 | 	if (!ret) return 0;
 765 | 	else return end;
 766 | }
 767 | 
 768 | static size_t
 769 | char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 770 | {
 771 | 	struct buf *link, *link_url, *link_text;
 772 | 	size_t link_len, rewind;
 773 | 
 774 | 	if (!rndr->cb.link || rndr->in_link_body)
 775 | 		return 0;
 776 | 
 777 | 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 778 | 
 779 | 	if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
 780 | 		link_url = rndr_newbuf(rndr, BUFFER_SPAN);
 781 | 		BUFPUTSL(link_url, "http://");
 782 | 		bufput(link_url, link->data, link->size);
 783 | 
 784 | 		ob->size -= rewind;
 785 | 		if (rndr->cb.normal_text) {
 786 | 			link_text = rndr_newbuf(rndr, BUFFER_SPAN);
 787 | 			rndr->cb.normal_text(link_text, link, rndr->opaque);
 788 | 			rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
 789 | 			rndr_popbuf(rndr, BUFFER_SPAN);
 790 | 		} else {
 791 | 			rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
 792 | 		}
 793 | 		rndr_popbuf(rndr, BUFFER_SPAN);
 794 | 	}
 795 | 
 796 | 	rndr_popbuf(rndr, BUFFER_SPAN);
 797 | 	return link_len;
 798 | }
 799 | 
 800 | static size_t
 801 | char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 802 | {
 803 | 	struct buf *link;
 804 | 	size_t link_len, rewind;
 805 | 
 806 | 	if (!rndr->cb.autolink || rndr->in_link_body)
 807 | 		return 0;
 808 | 
 809 | 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 810 | 
 811 | 	if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
 812 | 		ob->size -= rewind;
 813 | 		rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
 814 | 	}
 815 | 
 816 | 	rndr_popbuf(rndr, BUFFER_SPAN);
 817 | 	return link_len;
 818 | }
 819 | 
 820 | static size_t
 821 | char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 822 | {
 823 | 	struct buf *link;
 824 | 	size_t link_len, rewind;
 825 | 
 826 | 	if (!rndr->cb.autolink || rndr->in_link_body)
 827 | 		return 0;
 828 | 
 829 | 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 830 | 
 831 | 	if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
 832 | 		ob->size -= rewind;
 833 | 		rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
 834 | 	}
 835 | 
 836 | 	rndr_popbuf(rndr, BUFFER_SPAN);
 837 | 	return link_len;
 838 | }
 839 | 
 840 | /* char_link • '[': parsing a link or an image */
 841 | static size_t
 842 | char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 843 | {
 844 | 	int is_img = (offset && data[-1] == '!'), level;
 845 | 	size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
 846 | 	struct buf *content = 0;
 847 | 	struct buf *link = 0;
 848 | 	struct buf *title = 0;
 849 | 	struct buf *u_link = 0;
 850 | 	size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
 851 | 	int text_has_nl = 0, ret = 0;
 852 | 	int in_title = 0, qtype = 0;
 853 | 
 854 | 	/* checking whether the correct renderer exists */
 855 | 	if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
 856 | 		goto cleanup;
 857 | 
 858 | 	/* looking for the matching closing bracket */
 859 | 	for (level = 1; i < size; i++) {
 860 | 		if (data[i] == '\n')
 861 | 			text_has_nl = 1;
 862 | 
 863 | 		else if (data[i - 1] == '\\')
 864 | 			continue;
 865 | 
 866 | 		else if (data[i] == '[')
 867 | 			level++;
 868 | 
 869 | 		else if (data[i] == ']') {
 870 | 			level--;
 871 | 			if (level <= 0)
 872 | 				break;
 873 | 		}
 874 | 	}
 875 | 
 876 | 	if (i >= size)
 877 | 		goto cleanup;
 878 | 
 879 | 	txt_e = i;
 880 | 	i++;
 881 | 
 882 | 	/* skip any amount of whitespace or newline */
 883 | 	/* (this is much more laxist than original markdown syntax) */
 884 | 	while (i < size && _isspace(data[i]))
 885 | 		i++;
 886 | 
 887 | 	/* inline style link */
 888 | 	if (i < size && data[i] == '(') {
 889 | 		/* skipping initial whitespace */
 890 | 		i++;
 891 | 
 892 | 		while (i < size && _isspace(data[i]))
 893 | 			i++;
 894 | 
 895 | 		link_b = i;
 896 | 
 897 | 		/* looking for link end: ' " ) */
 898 | 		while (i < size) {
 899 | 			if (data[i] == '\\') i += 2;
 900 | 			else if (data[i] == ')') break;
 901 | 			else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
 902 | 			else i++;
 903 | 		}
 904 | 
 905 | 		if (i >= size) goto cleanup;
 906 | 		link_e = i;
 907 | 
 908 | 		/* looking for title end if present */
 909 | 		if (data[i] == '\'' || data[i] == '"') {
 910 | 			qtype = data[i];
 911 | 			in_title = 1;
 912 | 			i++;
 913 | 			title_b = i;
 914 | 
 915 | 			while (i < size) {
 916 | 				if (data[i] == '\\') i += 2;
 917 | 				else if (data[i] == qtype) {in_title = 0; i++;}
 918 | 				else if ((data[i] == ')') && !in_title) break;
 919 | 				else i++;
 920 | 			}
 921 | 
 922 | 			if (i >= size) goto cleanup;
 923 | 
 924 | 			/* skipping whitespaces after title */
 925 | 			title_e = i - 1;
 926 | 			while (title_e > title_b && _isspace(data[title_e]))
 927 | 				title_e--;
 928 | 
 929 | 			/* checking for closing quote presence */
 930 | 			if (data[title_e] != '\'' &&  data[title_e] != '"') {
 931 | 				title_b = title_e = 0;
 932 | 				link_e = i;
 933 | 			}
 934 | 		}
 935 | 
 936 | 		/* remove whitespace at the end of the link */
 937 | 		while (link_e > link_b && _isspace(data[link_e - 1]))
 938 | 			link_e--;
 939 | 
 940 | 		/* remove optional angle brackets around the link */
 941 | 		if (data[link_b] == '<') link_b++;
 942 | 		if (data[link_e - 1] == '>') link_e--;
 943 | 
 944 | 		/* building escaped link and title */
 945 | 		if (link_e > link_b) {
 946 | 			link = rndr_newbuf(rndr, BUFFER_SPAN);
 947 | 			bufput(link, data + link_b, link_e - link_b);
 948 | 		}
 949 | 
 950 | 		if (title_e > title_b) {
 951 | 			title = rndr_newbuf(rndr, BUFFER_SPAN);
 952 | 			bufput(title, data + title_b, title_e - title_b);
 953 | 		}
 954 | 
 955 | 		i++;
 956 | 	}
 957 | 
 958 | 	/* reference style link */
 959 | 	else if (i < size && data[i] == '[') {
 960 | 		struct buf id = { 0, 0, 0, 0 };
 961 | 		struct link_ref *lr;
 962 | 
 963 | 		/* looking for the id */
 964 | 		i++;
 965 | 		link_b = i;
 966 | 		while (i < size && data[i] != ']') i++;
 967 | 		if (i >= size) goto cleanup;
 968 | 		link_e = i;
 969 | 
 970 | 		/* finding the link_ref */
 971 | 		if (link_b == link_e) {
 972 | 			if (text_has_nl) {
 973 | 				struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
 974 | 				size_t j;
 975 | 
 976 | 				for (j = 1; j < txt_e; j++) {
 977 | 					if (data[j] != '\n')
 978 | 						bufputc(b, data[j]);
 979 | 					else if (data[j - 1] != ' ')
 980 | 						bufputc(b, ' ');
 981 | 				}
 982 | 
 983 | 				id.data = b->data;
 984 | 				id.size = b->size;
 985 | 			} else {
 986 | 				id.data = data + 1;
 987 | 				id.size = txt_e - 1;
 988 | 			}
 989 | 		} else {
 990 | 			id.data = data + link_b;
 991 | 			id.size = link_e - link_b;
 992 | 		}
 993 | 
 994 | 		lr = find_link_ref(rndr->refs, id.data, id.size);
 995 | 		if (!lr)
 996 | 			goto cleanup;
 997 | 
 998 | 		/* keeping link and title from link_ref */
 999 | 		link = lr->link;
1000 | 		title = lr->title;
1001 | 		i++;
1002 | 	}
1003 | 
1004 | 	/* shortcut reference style link */
1005 | 	else {
1006 | 		struct buf id = { 0, 0, 0, 0 };
1007 | 		struct link_ref *lr;
1008 | 
1009 | 		/* crafting the id */
1010 | 		if (text_has_nl) {
1011 | 			struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1012 | 			size_t j;
1013 | 
1014 | 			for (j = 1; j < txt_e; j++) {
1015 | 				if (data[j] != '\n')
1016 | 					bufputc(b, data[j]);
1017 | 				else if (data[j - 1] != ' ')
1018 | 					bufputc(b, ' ');
1019 | 			}
1020 | 
1021 | 			id.data = b->data;
1022 | 			id.size = b->size;
1023 | 		} else {
1024 | 			id.data = data + 1;
1025 | 			id.size = txt_e - 1;
1026 | 		}
1027 | 
1028 | 		/* finding the link_ref */
1029 | 		lr = find_link_ref(rndr->refs, id.data, id.size);
1030 | 		if (!lr)
1031 | 			goto cleanup;
1032 | 
1033 | 		/* keeping link and title from link_ref */
1034 | 		link = lr->link;
1035 | 		title = lr->title;
1036 | 
1037 | 		/* rewinding the whitespace */
1038 | 		i = txt_e + 1;
1039 | 	}
1040 | 
1041 | 	/* building content: img alt is escaped, link content is parsed */
1042 | 	if (txt_e > 1) {
1043 | 		content = rndr_newbuf(rndr, BUFFER_SPAN);
1044 | 		if (is_img) {
1045 | 			bufput(content, data + 1, txt_e - 1);
1046 | 		} else {
1047 | 			/* disable autolinking when parsing inline the
1048 | 			 * content of a link */
1049 | 			rndr->in_link_body = 1;
1050 | 			parse_inline(content, rndr, data + 1, txt_e - 1);
1051 | 			rndr->in_link_body = 0;
1052 | 		}
1053 | 	}
1054 | 
1055 | 	if (link) {
1056 | 		u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1057 | 		unscape_text(u_link, link);
1058 | 	}
1059 | 
1060 | 	/* calling the relevant rendering function */
1061 | 	if (is_img) {
1062 | 		if (ob->size && ob->data[ob->size - 1] == '!')
1063 | 			ob->size -= 1;
1064 | 
1065 | 		ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1066 | 	} else {
1067 | 		ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1068 | 	}
1069 | 
1070 | 	/* cleanup */
1071 | cleanup:
1072 | 	rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1073 | 	return ret ? i : 0;
1074 | }
1075 | 
1076 | static size_t
1077 | char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1078 | {
1079 | 	size_t sup_start, sup_len;
1080 | 	struct buf *sup;
1081 | 
1082 | 	if (!rndr->cb.superscript)
1083 | 		return 0;
1084 | 
1085 | 	if (size < 2)
1086 | 		return 0;
1087 | 
1088 | 	if (data[1] == '(') {
1089 | 		sup_start = sup_len = 2;
1090 | 
1091 | 		while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1092 | 			sup_len++;
1093 | 
1094 | 		if (sup_len == size)
1095 | 			return 0;
1096 | 	} else {
1097 | 		sup_start = sup_len = 1;
1098 | 
1099 | 		while (sup_len < size && !_isspace(data[sup_len]))
1100 | 			sup_len++;
1101 | 	}
1102 | 
1103 | 	if (sup_len - sup_start == 0)
1104 | 		return (sup_start == 2) ? 3 : 0;
1105 | 
1106 | 	sup = rndr_newbuf(rndr, BUFFER_SPAN);
1107 | 	parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1108 | 	rndr->cb.superscript(ob, sup, rndr->opaque);
1109 | 	rndr_popbuf(rndr, BUFFER_SPAN);
1110 | 
1111 | 	return (sup_start == 2) ? sup_len + 1 : sup_len;
1112 | }
1113 | 
1114 | /*********************************
1115 |  * BLOCK-LEVEL PARSING FUNCTIONS *
1116 |  *********************************/
1117 | 
1118 | /* is_empty • returns the line length when it is empty, 0 otherwise */
1119 | static size_t
1120 | is_empty(uint8_t *data, size_t size)
1121 | {
1122 | 	size_t i;
1123 | 
1124 | 	for (i = 0; i < size && data[i] != '\n'; i++)
1125 | 		if (data[i] != ' ')
1126 | 			return 0;
1127 | 
1128 | 	return i + 1;
1129 | }
1130 | 
1131 | /* is_hrule • returns whether a line is a horizontal rule */
1132 | static int
1133 | is_hrule(uint8_t *data, size_t size)
1134 | {
1135 | 	size_t i = 0, n = 0;
1136 | 	uint8_t c;
1137 | 
1138 | 	/* skipping initial spaces */
1139 | 	if (size < 3) return 0;
1140 | 	if (data[0] == ' ') { i++;
1141 | 	if (data[1] == ' ') { i++;
1142 | 	if (data[2] == ' ') { i++; } } }
1143 | 
1144 | 	/* looking at the hrule uint8_t */
1145 | 	if (i + 2 >= size
1146 | 	|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1147 | 		return 0;
1148 | 	c = data[i];
1149 | 
1150 | 	/* the whole line must be the char or whitespace */
1151 | 	while (i < size && data[i] != '\n') {
1152 | 		if (data[i] == c) n++;
1153 | 		else if (data[i] != ' ')
1154 | 			return 0;
1155 | 
1156 | 		i++;
1157 | 	}
1158 | 
1159 | 	return n >= 3;
1160 | }
1161 | 
1162 | /* check if a line begins with a code fence; return the
1163 |  * width of the code fence */
1164 | static size_t
1165 | prefix_codefence(uint8_t *data, size_t size)
1166 | {
1167 | 	size_t i = 0, n = 0;
1168 | 	uint8_t c;
1169 | 
1170 | 	/* skipping initial spaces */
1171 | 	if (size < 3) return 0;
1172 | 	if (data[0] == ' ') { i++;
1173 | 	if (data[1] == ' ') { i++;
1174 | 	if (data[2] == ' ') { i++; } } }
1175 | 
1176 | 	/* looking at the hrule uint8_t */
1177 | 	if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1178 | 		return 0;
1179 | 
1180 | 	c = data[i];
1181 | 
1182 | 	/* the whole line must be the uint8_t or whitespace */
1183 | 	while (i < size && data[i] == c) {
1184 | 		n++; i++;
1185 | 	}
1186 | 
1187 | 	if (n < 3)
1188 | 		return 0;
1189 | 
1190 | 	return i;
1191 | }
1192 | 
1193 | /* check if a line is a code fence; return its size if it is */
1194 | static size_t
1195 | is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1196 | {
1197 | 	size_t i = 0, syn_len = 0;
1198 | 	uint8_t *syn_start;
1199 | 
1200 | 	i = prefix_codefence(data, size);
1201 | 	if (i == 0)
1202 | 		return 0;
1203 | 
1204 | 	while (i < size && data[i] == ' ')
1205 | 		i++;
1206 | 
1207 | 	syn_start = data + i;
1208 | 
1209 | 	if (i < size && data[i] == '{') {
1210 | 		i++; syn_start++;
1211 | 
1212 | 		while (i < size && data[i] != '}' && data[i] != '\n') {
1213 | 			syn_len++; i++;
1214 | 		}
1215 | 
1216 | 		if (i == size || data[i] != '}')
1217 | 			return 0;
1218 | 
1219 | 		/* strip all whitespace at the beginning and the end
1220 | 		 * of the {} block */
1221 | 		while (syn_len > 0 && _isspace(syn_start[0])) {
1222 | 			syn_start++; syn_len--;
1223 | 		}
1224 | 
1225 | 		while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1226 | 			syn_len--;
1227 | 
1228 | 		i++;
1229 | 	} else {
1230 | 		while (i < size && !_isspace(data[i])) {
1231 | 			syn_len++; i++;
1232 | 		}
1233 | 	}
1234 | 
1235 | 	if (syntax) {
1236 | 		syntax->data = syn_start;
1237 | 		syntax->size = syn_len;
1238 | 	}
1239 | 
1240 | 	while (i < size && data[i] != '\n') {
1241 | 		if (!_isspace(data[i]))
1242 | 			return 0;
1243 | 
1244 | 		i++;
1245 | 	}
1246 | 
1247 | 	return i + 1;
1248 | }
1249 | 
1250 | /* is_atxheader • returns whether the line is a hash-prefixed header */
1251 | static int
1252 | is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1253 | {
1254 | 	if (data[0] != '#')
1255 | 		return 0;
1256 | 
1257 | 	if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1258 | 		size_t level = 0;
1259 | 
1260 | 		while (level < size && level < 6 && data[level] == '#')
1261 | 			level++;
1262 | 
1263 | 		if (level < size && data[level] != ' ')
1264 | 			return 0;
1265 | 	}
1266 | 
1267 | 	return 1;
1268 | }
1269 | 
1270 | /* is_headerline • returns whether the line is a setext-style hdr underline */
1271 | static int
1272 | is_headerline(uint8_t *data, size_t size)
1273 | {
1274 | 	size_t i = 0;
1275 | 
1276 | 	/* test of level 1 header */
1277 | 	if (data[i] == '=') {
1278 | 		for (i = 1; i < size && data[i] == '='; i++);
1279 | 		while (i < size && data[i] == ' ') i++;
1280 | 		return (i >= size || data[i] == '\n') ? 1 : 0; }
1281 | 
1282 | 	/* test of level 2 header */
1283 | 	if (data[i] == '-') {
1284 | 		for (i = 1; i < size && data[i] == '-'; i++);
1285 | 		while (i < size && data[i] == ' ') i++;
1286 | 		return (i >= size || data[i] == '\n') ? 2 : 0; }
1287 | 
1288 | 	return 0;
1289 | }
1290 | 
1291 | static int
1292 | is_next_headerline(uint8_t *data, size_t size)
1293 | {
1294 | 	size_t i = 0;
1295 | 
1296 | 	while (i < size && data[i] != '\n')
1297 | 		i++;
1298 | 
1299 | 	if (++i >= size)
1300 | 		return 0;
1301 | 
1302 | 	return is_headerline(data + i, size - i);
1303 | }
1304 | 
1305 | /* prefix_quote • returns blockquote prefix length */
1306 | static size_t
1307 | prefix_quote(uint8_t *data, size_t size)
1308 | {
1309 | 	size_t i = 0;
1310 | 	if (i < size && data[i] == ' ') i++;
1311 | 	if (i < size && data[i] == ' ') i++;
1312 | 	if (i < size && data[i] == ' ') i++;
1313 | 
1314 | 	if (i < size && data[i] == '>') {
1315 | 		if (i + 1 < size && data[i + 1] == ' ')
1316 | 			return i + 2;
1317 | 
1318 | 		return i + 1;
1319 | 	}
1320 | 
1321 | 	return 0;
1322 | }
1323 | 
1324 | /* prefix_code • returns prefix length for block code*/
1325 | static size_t
1326 | prefix_code(uint8_t *data, size_t size)
1327 | {
1328 | 	if (size > 3 && data[0] == ' ' && data[1] == ' '
1329 | 		&& data[2] == ' ' && data[3] == ' ') return 4;
1330 | 
1331 | 	return 0;
1332 | }
1333 | 
1334 | /* prefix_oli • returns ordered list item prefix */
1335 | static size_t
1336 | prefix_oli(uint8_t *data, size_t size)
1337 | {
1338 | 	size_t i = 0;
1339 | 
1340 | 	if (i < size && data[i] == ' ') i++;
1341 | 	if (i < size && data[i] == ' ') i++;
1342 | 	if (i < size && data[i] == ' ') i++;
1343 | 
1344 | 	if (i >= size || data[i] < '0' || data[i] > '9')
1345 | 		return 0;
1346 | 
1347 | 	while (i < size && data[i] >= '0' && data[i] <= '9')
1348 | 		i++;
1349 | 
1350 | 	if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1351 | 		return 0;
1352 | 
1353 | 	if (is_next_headerline(data + i, size - i))
1354 | 		return 0;
1355 | 
1356 | 	return i + 2;
1357 | }
1358 | 
1359 | /* prefix_uli • returns ordered list item prefix */
1360 | static size_t
1361 | prefix_uli(uint8_t *data, size_t size)
1362 | {
1363 | 	size_t i = 0;
1364 | 
1365 | 	if (i < size && data[i] == ' ') i++;
1366 | 	if (i < size && data[i] == ' ') i++;
1367 | 	if (i < size && data[i] == ' ') i++;
1368 | 
1369 | 	if (i + 1 >= size ||
1370 | 		(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1371 | 		data[i + 1] != ' ')
1372 | 		return 0;
1373 | 
1374 | 	if (is_next_headerline(data + i, size - i))
1375 | 		return 0;
1376 | 
1377 | 	return i + 2;
1378 | }
1379 | 
1380 | 
1381 | /* parse_block • parsing of one block, returning next uint8_t to parse */
1382 | static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1383 | 			uint8_t *data, size_t size);
1384 | 
1385 | 
1386 | /* parse_blockquote • handles parsing of a blockquote fragment */
1387 | static size_t
1388 | parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1389 | {
1390 | 	size_t beg, end = 0, pre, work_size = 0;
1391 | 	uint8_t *work_data = 0;
1392 | 	struct buf *out = 0;
1393 | 
1394 | 	out = rndr_newbuf(rndr, BUFFER_BLOCK);
1395 | 	beg = 0;
1396 | 	while (beg < size) {
1397 | 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1398 | 
1399 | 		pre = prefix_quote(data + beg, end - beg);
1400 | 
1401 | 		if (pre)
1402 | 			beg += pre; /* skipping prefix */
1403 | 
1404 | 		/* empty line followed by non-quote line */
1405 | 		else if (is_empty(data + beg, end - beg) &&
1406 | 				(end >= size || (prefix_quote(data + end, size - end) == 0 &&
1407 | 				!is_empty(data + end, size - end))))
1408 | 			break;
1409 | 
1410 | 		if (beg < end) { /* copy into the in-place working buffer */
1411 | 			/* bufput(work, data + beg, end - beg); */
1412 | 			if (!work_data)
1413 | 				work_data = data + beg;
1414 | 			else if (data + beg != work_data + work_size)
1415 | 				memmove(work_data + work_size, data + beg, end - beg);
1416 | 			work_size += end - beg;
1417 | 		}
1418 | 		beg = end;
1419 | 	}
1420 | 
1421 | 	parse_block(out, rndr, work_data, work_size);
1422 | 	if (rndr->cb.blockquote)
1423 | 		rndr->cb.blockquote(ob, out, rndr->opaque);
1424 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1425 | 	return end;
1426 | }
1427 | 
1428 | static size_t
1429 | parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1430 | 
1431 | /* parse_blockquote • handles parsing of a regular paragraph */
1432 | static size_t
1433 | parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1434 | {
1435 | 	size_t i = 0, end = 0;
1436 | 	int level = 0;
1437 | 	struct buf work = { data, 0, 0, 0 };
1438 | 
1439 | 	while (i < size) {
1440 | 		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1441 | 
1442 | 		if (is_empty(data + i, size - i))
1443 | 			break;
1444 | 
1445 | 		if ((level = is_headerline(data + i, size - i)) != 0)
1446 | 			break;
1447 | 
1448 | 		if (is_atxheader(rndr, data + i, size - i) ||
1449 | 			is_hrule(data + i, size - i) ||
1450 | 			prefix_quote(data + i, size - i)) {
1451 | 			end = i;
1452 | 			break;
1453 | 		}
1454 | 
1455 | 		/*
1456 | 		 * Early termination of a paragraph with the same logic
1457 | 		 * as Markdown 1.0.0. If this logic is applied, the
1458 | 		 * Markdown 1.0.3 test suite won't pass cleanly
1459 | 		 *
1460 | 		 * :: If the first character in a new line is not a letter,
1461 | 		 * let's check to see if there's some kind of block starting
1462 | 		 * here
1463 | 		 */
1464 | 		if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
1465 | 			if (prefix_oli(data + i, size - i) ||
1466 | 				prefix_uli(data + i, size - i)) {
1467 | 				end = i;
1468 | 				break;
1469 | 			}
1470 | 
1471 | 			/* see if an html block starts here */
1472 | 			if (data[i] == '<' && rndr->cb.blockhtml &&
1473 | 				parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1474 | 				end = i;
1475 | 				break;
1476 | 			}
1477 | 
1478 | 			/* see if a code fence starts here */
1479 | 			if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1480 | 				is_codefence(data + i, size - i, NULL) != 0) {
1481 | 				end = i;
1482 | 				break;
1483 | 			}
1484 | 		}
1485 | 
1486 | 		i = end;
1487 | 	}
1488 | 
1489 | 	work.size = i;
1490 | 	while (work.size && data[work.size - 1] == '\n')
1491 | 		work.size--;
1492 | 
1493 | 	if (!level) {
1494 | 		struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1495 | 		parse_inline(tmp, rndr, work.data, work.size);
1496 | 		if (rndr->cb.paragraph)
1497 | 			rndr->cb.paragraph(ob, tmp, rndr->opaque);
1498 | 		rndr_popbuf(rndr, BUFFER_BLOCK);
1499 | 	} else {
1500 | 		struct buf *header_work;
1501 | 
1502 | 		if (work.size) {
1503 | 			size_t beg;
1504 | 			i = work.size;
1505 | 			work.size -= 1;
1506 | 
1507 | 			while (work.size && data[work.size] != '\n')
1508 | 				work.size -= 1;
1509 | 
1510 | 			beg = work.size + 1;
1511 | 			while (work.size && data[work.size - 1] == '\n')
1512 | 				work.size -= 1;
1513 | 
1514 | 			if (work.size > 0) {
1515 | 				struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1516 | 				parse_inline(tmp, rndr, work.data, work.size);
1517 | 
1518 | 				if (rndr->cb.paragraph)
1519 | 					rndr->cb.paragraph(ob, tmp, rndr->opaque);
1520 | 
1521 | 				rndr_popbuf(rndr, BUFFER_BLOCK);
1522 | 				work.data += beg;
1523 | 				work.size = i - beg;
1524 | 			}
1525 | 			else work.size = i;
1526 | 		}
1527 | 
1528 | 		header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1529 | 		parse_inline(header_work, rndr, work.data, work.size);
1530 | 
1531 | 		if (rndr->cb.header)
1532 | 			rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1533 | 
1534 | 		rndr_popbuf(rndr, BUFFER_SPAN);
1535 | 	}
1536 | 
1537 | 	return end;
1538 | }
1539 | 
1540 | /* parse_fencedcode • handles parsing of a block-level code fragment */
1541 | static size_t
1542 | parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1543 | {
1544 | 	size_t beg, end;
1545 | 	struct buf *work = 0;
1546 | 	struct buf lang = { 0, 0, 0, 0 };
1547 | 
1548 | 	beg = is_codefence(data, size, &lang);
1549 | 	if (beg == 0) return 0;
1550 | 
1551 | 	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1552 | 
1553 | 	while (beg < size) {
1554 | 		size_t fence_end;
1555 | 		struct buf fence_trail = { 0, 0, 0, 0 };
1556 | 
1557 | 		fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1558 | 		if (fence_end != 0 && fence_trail.size == 0) {
1559 | 			beg += fence_end;
1560 | 			break;
1561 | 		}
1562 | 
1563 | 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1564 | 
1565 | 		if (beg < end) {
1566 | 			/* verbatim copy to the working buffer,
1567 | 				escaping entities */
1568 | 			if (is_empty(data + beg, end - beg))
1569 | 				bufputc(work, '\n');
1570 | 			else bufput(work, data + beg, end - beg);
1571 | 		}
1572 | 		beg = end;
1573 | 	}
1574 | 
1575 | 	if (work->size && work->data[work->size - 1] != '\n')
1576 | 		bufputc(work, '\n');
1577 | 
1578 | 	if (rndr->cb.blockcode)
1579 | 		rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1580 | 
1581 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1582 | 	return beg;
1583 | }
1584 | 
1585 | static size_t
1586 | parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1587 | {
1588 | 	size_t beg, end, pre;
1589 | 	struct buf *work = 0;
1590 | 
1591 | 	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1592 | 
1593 | 	beg = 0;
1594 | 	while (beg < size) {
1595 | 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1596 | 		pre = prefix_code(data + beg, end - beg);
1597 | 
1598 | 		if (pre)
1599 | 			beg += pre; /* skipping prefix */
1600 | 		else if (!is_empty(data + beg, end - beg))
1601 | 			/* non-empty non-prefixed line breaks the pre */
1602 | 			break;
1603 | 
1604 | 		if (beg < end) {
1605 | 			/* verbatim copy to the working buffer,
1606 | 				escaping entities */
1607 | 			if (is_empty(data + beg, end - beg))
1608 | 				bufputc(work, '\n');
1609 | 			else bufput(work, data + beg, end - beg);
1610 | 		}
1611 | 		beg = end;
1612 | 	}
1613 | 
1614 | 	while (work->size && work->data[work->size - 1] == '\n')
1615 | 		work->size -= 1;
1616 | 
1617 | 	bufputc(work, '\n');
1618 | 
1619 | 	if (rndr->cb.blockcode)
1620 | 		rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1621 | 
1622 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1623 | 	return beg;
1624 | }
1625 | 
1626 | /* parse_listitem • parsing of a single list item */
1627 | /*	assuming initial prefix is already removed */
1628 | static size_t
1629 | parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1630 | {
1631 | 	struct buf *work = 0, *inter = 0;
1632 | 	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1633 | 	int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1634 | 
1635 | 	/* keeping track of the first indentation prefix */
1636 | 	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1637 | 		orgpre++;
1638 | 
1639 | 	beg = prefix_uli(data, size);
1640 | 	if (!beg)
1641 | 		beg = prefix_oli(data, size);
1642 | 
1643 | 	if (!beg)
1644 | 		return 0;
1645 | 
1646 | 	/* skipping to the beginning of the following line */
1647 | 	end = beg;
1648 | 	while (end < size && data[end - 1] != '\n')
1649 | 		end++;
1650 | 
1651 | 	/* getting working buffers */
1652 | 	work = rndr_newbuf(rndr, BUFFER_SPAN);
1653 | 	inter = rndr_newbuf(rndr, BUFFER_SPAN);
1654 | 
1655 | 	/* putting the first line into the working buffer */
1656 | 	bufput(work, data + beg, end - beg);
1657 | 	beg = end;
1658 | 
1659 | 	/* process the following lines */
1660 | 	while (beg < size) {
1661 | 		size_t has_next_uli = 0, has_next_oli = 0;
1662 | 
1663 | 		end++;
1664 | 
1665 | 		while (end < size && data[end - 1] != '\n')
1666 | 			end++;
1667 | 
1668 | 		/* process an empty line */
1669 | 		if (is_empty(data + beg, end - beg)) {
1670 | 			in_empty = 1;
1671 | 			beg = end;
1672 | 			continue;
1673 | 		}
1674 | 
1675 | 		/* calculating the indentation */
1676 | 		i = 0;
1677 | 		while (i < 4 && beg + i < end && data[beg + i] == ' ')
1678 | 			i++;
1679 | 
1680 | 		pre = i;
1681 | 
1682 | 		if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1683 | 			if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1684 | 				in_fence = !in_fence;
1685 | 		}
1686 | 
1687 | 		/* Only check for new list items if we are **not** inside
1688 | 		 * a fenced code block */
1689 | 		if (!in_fence) {
1690 | 			has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1691 | 			has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1692 | 		}
1693 | 
1694 | 		/* checking for ul/ol switch */
1695 | 		if (in_empty && (
1696 | 			((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1697 | 			(!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1698 | 			*flags |= MKD_LI_END;
1699 | 			break; /* the following item must have same list type */
1700 | 		}
1701 | 
1702 | 		/* checking for a new item */
1703 | 		if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1704 | 			if (in_empty)
1705 | 				has_inside_empty = 1;
1706 | 
1707 | 			if (pre == orgpre) /* the following item must have */
1708 | 				break;             /* the same indentation */
1709 | 
1710 | 			if (!sublist)
1711 | 				sublist = work->size;
1712 | 		}
1713 | 		/* joining only indented stuff after empty lines;
1714 | 		 * note that now we only require 1 space of indentation
1715 | 		 * to continue a list */
1716 | 		else if (in_empty && pre == 0) {
1717 | 			*flags |= MKD_LI_END;
1718 | 			break;
1719 | 		}
1720 | 		else if (in_empty) {
1721 | 			bufputc(work, '\n');
1722 | 			has_inside_empty = 1;
1723 | 		}
1724 | 
1725 | 		in_empty = 0;
1726 | 
1727 | 		/* adding the line without prefix into the working buffer */
1728 | 		bufput(work, data + beg + i, end - beg - i);
1729 | 		beg = end;
1730 | 	}
1731 | 
1732 | 	/* render of li contents */
1733 | 	if (has_inside_empty)
1734 | 		*flags |= MKD_LI_BLOCK;
1735 | 
1736 | 	if (*flags & MKD_LI_BLOCK) {
1737 | 		/* intermediate render of block li */
1738 | 		if (sublist && sublist < work->size) {
1739 | 			parse_block(inter, rndr, work->data, sublist);
1740 | 			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1741 | 		}
1742 | 		else
1743 | 			parse_block(inter, rndr, work->data, work->size);
1744 | 	} else {
1745 | 		/* intermediate render of inline li */
1746 | 		if (sublist && sublist < work->size) {
1747 | 			parse_inline(inter, rndr, work->data, sublist);
1748 | 			parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1749 | 		}
1750 | 		else
1751 | 			parse_inline(inter, rndr, work->data, work->size);
1752 | 	}
1753 | 
1754 | 	/* render of li itself */
1755 | 	if (rndr->cb.listitem)
1756 | 		rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1757 | 
1758 | 	rndr_popbuf(rndr, BUFFER_SPAN);
1759 | 	rndr_popbuf(rndr, BUFFER_SPAN);
1760 | 	return beg;
1761 | }
1762 | 
1763 | 
1764 | /* parse_list • parsing ordered or unordered list block */
1765 | static size_t
1766 | parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1767 | {
1768 | 	struct buf *work = 0;
1769 | 	size_t i = 0, j;
1770 | 
1771 | 	work = rndr_newbuf(rndr, BUFFER_BLOCK);
1772 | 
1773 | 	while (i < size) {
1774 | 		j = parse_listitem(work, rndr, data + i, size - i, &flags);
1775 | 		i += j;
1776 | 
1777 | 		if (!j || (flags & MKD_LI_END))
1778 | 			break;
1779 | 	}
1780 | 
1781 | 	if (rndr->cb.list)
1782 | 		rndr->cb.list(ob, work, flags, rndr->opaque);
1783 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
1784 | 	return i;
1785 | }
1786 | 
1787 | /* parse_atxheader • parsing of atx-style headers */
1788 | static size_t
1789 | parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1790 | {
1791 | 	size_t level = 0;
1792 | 	size_t i, end, skip;
1793 | 
1794 | 	while (level < size && level < 6 && data[level] == '#')
1795 | 		level++;
1796 | 
1797 | 	for (i = level; i < size && data[i] == ' '; i++);
1798 | 
1799 | 	for (end = i; end < size && data[end] != '\n'; end++);
1800 | 	skip = end;
1801 | 
1802 | 	while (end && data[end - 1] == '#')
1803 | 		end--;
1804 | 
1805 | 	while (end && data[end - 1] == ' ')
1806 | 		end--;
1807 | 
1808 | 	if (end > i) {
1809 | 		struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
1810 | 
1811 | 		parse_inline(work, rndr, data + i, end - i);
1812 | 
1813 | 		if (rndr->cb.header)
1814 | 			rndr->cb.header(ob, work, (int)level, rndr->opaque);
1815 | 
1816 | 		rndr_popbuf(rndr, BUFFER_SPAN);
1817 | 	}
1818 | 
1819 | 	return skip;
1820 | }
1821 | 
1822 | 
1823 | /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1824 | /*	returns the length on match, 0 otherwise */
1825 | static size_t
1826 | htmlblock_end_tag(
1827 | 	const char *tag,
1828 | 	size_t tag_len,
1829 | 	struct sd_markdown *rndr,
1830 | 	uint8_t *data,
1831 | 	size_t size)
1832 | {
1833 | 	size_t i, w;
1834 | 
1835 | 	/* checking if tag is a match */
1836 | 	if (tag_len + 3 >= size ||
1837 | 		strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
1838 | 		data[tag_len + 2] != '>')
1839 | 		return 0;
1840 | 
1841 | 	/* checking white lines */
1842 | 	i = tag_len + 3;
1843 | 	w = 0;
1844 | 	if (i < size && (w = is_empty(data + i, size - i)) == 0)
1845 | 		return 0; /* non-blank after tag */
1846 | 	i += w;
1847 | 	w = 0;
1848 | 
1849 | 	if (i < size)
1850 | 		w = is_empty(data + i, size - i);
1851 | 
1852 | 	return i + w;
1853 | }
1854 | 
1855 | static size_t
1856 | htmlblock_end(const char *curtag,
1857 | 	struct sd_markdown *rndr,
1858 | 	uint8_t *data,
1859 | 	size_t size,
1860 | 	int start_of_line)
1861 | {
1862 | 	size_t tag_size = strlen(curtag);
1863 | 	size_t i = 1, end_tag;
1864 | 	int block_lines = 0;
1865 | 
1866 | 	while (i < size) {
1867 | 		i++;
1868 | 		while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
1869 | 			if (data[i] == '\n')
1870 | 				block_lines++;
1871 | 
1872 | 			i++;
1873 | 		}
1874 | 
1875 | 		/* If we are only looking for unindented tags, skip the tag
1876 | 		 * if it doesn't follow a newline.
1877 | 		 *
1878 | 		 * The only exception to this is if the tag is still on the
1879 | 		 * initial line; in that case it still counts as a closing
1880 | 		 * tag
1881 | 		 */
1882 | 		if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
1883 | 			continue;
1884 | 
1885 | 		if (i + 2 + tag_size >= size)
1886 | 			break;
1887 | 
1888 | 		end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
1889 | 		if (end_tag)
1890 | 			return i + end_tag - 1;
1891 | 	}
1892 | 
1893 | 	return 0;
1894 | }
1895 | 
1896 | 
1897 | /* parse_htmlblock • parsing of inline HTML block */
1898 | static size_t
1899 | parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
1900 | {
1901 | 	size_t i, j = 0, tag_end;
1902 | 	const char *curtag = NULL;
1903 | 	struct buf work = { data, 0, 0, 0 };
1904 | 
1905 | 	/* identification of the opening tag */
1906 | 	if (size < 2 || data[0] != '<')
1907 | 		return 0;
1908 | 
1909 | 	i = 1;
1910 | 	while (i < size && data[i] != '>' && data[i] != ' ')
1911 | 		i++;
1912 | 
1913 | 	if (i < size)
1914 | 		curtag = find_block_tag((char *)data + 1, (int)i - 1);
1915 | 
1916 | 	/* handling of special cases */
1917 | 	if (!curtag) {
1918 | 
1919 | 		/* HTML comment, laxist form */
1920 | 		if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1921 | 			i = 5;
1922 | 
1923 | 			while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1924 | 				i++;
1925 | 
1926 | 			i++;
1927 | 
1928 | 			if (i < size)
1929 | 				j = is_empty(data + i, size - i);
1930 | 
1931 | 			if (j) {
1932 | 				work.size = i + j;
1933 | 				if (do_render && rndr->cb.blockhtml)
1934 | 					rndr->cb.blockhtml(ob, &work, rndr->opaque);
1935 | 				return work.size;
1936 | 			}
1937 | 		}
1938 | 
1939 | 		/* HR, which is the only self-closing block tag considered */
1940 | 		if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1941 | 			i = 3;
1942 | 			while (i < size && data[i] != '>')
1943 | 				i++;
1944 | 
1945 | 			if (i + 1 < size) {
1946 | 				i++;
1947 | 				j = is_empty(data + i, size - i);
1948 | 				if (j) {
1949 | 					work.size = i + j;
1950 | 					if (do_render && rndr->cb.blockhtml)
1951 | 						rndr->cb.blockhtml(ob, &work, rndr->opaque);
1952 | 					return work.size;
1953 | 				}
1954 | 			}
1955 | 		}
1956 | 
1957 | 		/* no special case recognised */
1958 | 		return 0;
1959 | 	}
1960 | 
1961 | 	/* looking for an unindented matching closing tag */
1962 | 	/*	followed by a blank line */
1963 | 	tag_end = htmlblock_end(curtag, rndr, data, size, 1);
1964 | 
1965 | 	/* if not found, trying a second pass looking for indented match */
1966 | 	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
1967 | 	if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
1968 | 		tag_end = htmlblock_end(curtag, rndr, data, size, 0);
1969 | 	}
1970 | 
1971 | 	if (!tag_end)
1972 | 		return 0;
1973 | 
1974 | 	/* the end of the block has been found */
1975 | 	work.size = tag_end;
1976 | 	if (do_render && rndr->cb.blockhtml)
1977 | 		rndr->cb.blockhtml(ob, &work, rndr->opaque);
1978 | 
1979 | 	return tag_end;
1980 | }
1981 | 
1982 | static void
1983 | parse_table_row(
1984 | 	struct buf *ob,
1985 | 	struct sd_markdown *rndr,
1986 | 	uint8_t *data,
1987 | 	size_t size,
1988 | 	size_t columns,
1989 | 	int *col_data,
1990 | 	int header_flag)
1991 | {
1992 | 	size_t i = 0, col;
1993 | 	struct buf *row_work = 0;
1994 | 
1995 | 	if (!rndr->cb.table_cell || !rndr->cb.table_row)
1996 | 		return;
1997 | 
1998 | 	row_work = rndr_newbuf(rndr, BUFFER_SPAN);
1999 | 
2000 | 	if (i < size && data[i] == '|')
2001 | 		i++;
2002 | 
2003 | 	for (col = 0; col < columns && i < size; ++col) {
2004 | 		size_t cell_start, cell_end;
2005 | 		struct buf *cell_work;
2006 | 
2007 | 		cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2008 | 
2009 | 		while (i < size && _isspace(data[i]))
2010 | 			i++;
2011 | 
2012 | 		cell_start = i;
2013 | 
2014 | 		while (i < size && data[i] != '|')
2015 | 			i++;
2016 | 
2017 | 		cell_end = i - 1;
2018 | 
2019 | 		while (cell_end > cell_start && _isspace(data[cell_end]))
2020 | 			cell_end--;
2021 | 
2022 | 		parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2023 | 		rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
2024 | 
2025 | 		rndr_popbuf(rndr, BUFFER_SPAN);
2026 | 		i++;
2027 | 	}
2028 | 
2029 | 	for (; col < columns; ++col) {
2030 | 		struct buf empty_cell = { 0, 0, 0, 0 };
2031 | 		rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
2032 | 	}
2033 | 
2034 | 	rndr->cb.table_row(ob, row_work, rndr->opaque);
2035 | 
2036 | 	rndr_popbuf(rndr, BUFFER_SPAN);
2037 | }
2038 | 
2039 | static size_t
2040 | parse_table_header(
2041 | 	struct buf *ob,
2042 | 	struct sd_markdown *rndr,
2043 | 	uint8_t *data,
2044 | 	size_t size,
2045 | 	size_t *columns,
2046 | 	int **column_data)
2047 | {
2048 | 	int pipes;
2049 | 	size_t i = 0, col, header_end, under_end;
2050 | 
2051 | 	pipes = 0;
2052 | 	while (i < size && data[i] != '\n')
2053 | 		if (data[i++] == '|')
2054 | 			pipes++;
2055 | 
2056 | 	if (i == size || pipes == 0)
2057 | 		return 0;
2058 | 
2059 | 	header_end = i;
2060 | 
2061 | 	while (header_end > 0 && _isspace(data[header_end - 1]))
2062 | 		header_end--;
2063 | 
2064 | 	if (data[0] == '|')
2065 | 		pipes--;
2066 | 
2067 | 	if (header_end && data[header_end - 1] == '|')
2068 | 		pipes--;
2069 | 
2070 | 	*columns = pipes + 1;
2071 | 	*column_data = calloc(*columns, sizeof(int));
2072 | 
2073 | 	/* Parse the header underline */
2074 | 	i++;
2075 | 	if (i < size && data[i] == '|')
2076 | 		i++;
2077 | 
2078 | 	under_end = i;
2079 | 	while (under_end < size && data[under_end] != '\n')
2080 | 		under_end++;
2081 | 
2082 | 	for (col = 0; col < *columns && i < under_end; ++col) {
2083 | 		size_t dashes = 0;
2084 | 
2085 | 		while (i < under_end && data[i] == ' ')
2086 | 			i++;
2087 | 
2088 | 		if (data[i] == ':') {
2089 | 			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2090 | 			dashes++;
2091 | 		}
2092 | 
2093 | 		while (i < under_end && data[i] == '-') {
2094 | 			i++; dashes++;
2095 | 		}
2096 | 
2097 | 		if (i < under_end && data[i] == ':') {
2098 | 			i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2099 | 			dashes++;
2100 | 		}
2101 | 
2102 | 		while (i < under_end && data[i] == ' ')
2103 | 			i++;
2104 | 
2105 | 		if (i < under_end && data[i] != '|')
2106 | 			break;
2107 | 
2108 | 		if (dashes < 3)
2109 | 			break;
2110 | 
2111 | 		i++;
2112 | 	}
2113 | 
2114 | 	if (col < *columns)
2115 | 		return 0;
2116 | 
2117 | 	parse_table_row(
2118 | 		ob, rndr, data,
2119 | 		header_end,
2120 | 		*columns,
2121 | 		*column_data,
2122 | 		MKD_TABLE_HEADER
2123 | 	);
2124 | 
2125 | 	return under_end + 1;
2126 | }
2127 | 
2128 | static size_t
2129 | parse_table(
2130 | 	struct buf *ob,
2131 | 	struct sd_markdown *rndr,
2132 | 	uint8_t *data,
2133 | 	size_t size)
2134 | {
2135 | 	size_t i;
2136 | 
2137 | 	struct buf *header_work = 0;
2138 | 	struct buf *body_work = 0;
2139 | 
2140 | 	size_t columns;
2141 | 	int *col_data = NULL;
2142 | 
2143 | 	header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2144 | 	body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2145 | 
2146 | 	i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2147 | 	if (i > 0) {
2148 | 
2149 | 		while (i < size) {
2150 | 			size_t row_start;
2151 | 			int pipes = 0;
2152 | 
2153 | 			row_start = i;
2154 | 
2155 | 			while (i < size && data[i] != '\n')
2156 | 				if (data[i++] == '|')
2157 | 					pipes++;
2158 | 
2159 | 			if (pipes == 0 || i == size) {
2160 | 				i = row_start;
2161 | 				break;
2162 | 			}
2163 | 
2164 | 			parse_table_row(
2165 | 				body_work,
2166 | 				rndr,
2167 | 				data + row_start,
2168 | 				i - row_start,
2169 | 				columns,
2170 | 				col_data, 0
2171 | 			);
2172 | 
2173 | 			i++;
2174 | 		}
2175 | 
2176 | 		if (rndr->cb.table)
2177 | 			rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2178 | 	}
2179 | 
2180 | 	free(col_data);
2181 | 	rndr_popbuf(rndr, BUFFER_SPAN);
2182 | 	rndr_popbuf(rndr, BUFFER_BLOCK);
2183 | 	return i;
2184 | }
2185 | 
2186 | /* parse_block • parsing of one block, returning next uint8_t to parse */
2187 | static void
2188 | parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2189 | {
2190 | 	size_t beg, end, i;
2191 | 	uint8_t *txt_data;
2192 | 	beg = 0;
2193 | 
2194 | 	if (rndr->work_bufs[BUFFER_SPAN].size +
2195 | 		rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
2196 | 		return;
2197 | 
2198 | 	while (beg < size) {
2199 | 		txt_data = data + beg;
2200 | 		end = size - beg;
2201 | 
2202 | 		if (is_atxheader(rndr, txt_data, end))
2203 | 			beg += parse_atxheader(ob, rndr, txt_data, end);
2204 | 
2205 | 		else if (data[beg] == '<' && rndr->cb.blockhtml &&
2206 | 				(i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
2207 | 			beg += i;
2208 | 
2209 | 		else if ((i = is_empty(txt_data, end)) != 0)
2210 | 			beg += i;
2211 | 
2212 | 		else if (is_hrule(txt_data, end)) {
2213 | 			if (rndr->cb.hrule)
2214 | 				rndr->cb.hrule(ob, rndr->opaque);
2215 | 
2216 | 			while (beg < size && data[beg] != '\n')
2217 | 				beg++;
2218 | 
2219 | 			beg++;
2220 | 		}
2221 | 
2222 | 		else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
2223 | 			(i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
2224 | 			beg += i;
2225 | 
2226 | 		else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
2227 | 			(i = parse_table(ob, rndr, txt_data, end)) != 0)
2228 | 			beg += i;
2229 | 
2230 | 		else if (prefix_quote(txt_data, end))
2231 | 			beg += parse_blockquote(ob, rndr, txt_data, end);
2232 | 
2233 | 		else if (prefix_code(txt_data, end))
2234 | 			beg += parse_blockcode(ob, rndr, txt_data, end);
2235 | 
2236 | 		else if (prefix_uli(txt_data, end))
2237 | 			beg += parse_list(ob, rndr, txt_data, end, 0);
2238 | 
2239 | 		else if (prefix_oli(txt_data, end))
2240 | 			beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
2241 | 
2242 | 		else
2243 | 			beg += parse_paragraph(ob, rndr, txt_data, end);
2244 | 	}
2245 | }
2246 | 
2247 | 
2248 | 
2249 | /*********************
2250 |  * REFERENCE PARSING *
2251 |  *********************/
2252 | 
2253 | /* is_ref • returns whether a line is a reference or not */
2254 | static int
2255 | is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2256 | {
2257 | /*	int n; */
2258 | 	size_t i = 0;
2259 | 	size_t id_offset, id_end;
2260 | 	size_t link_offset, link_end;
2261 | 	size_t title_offset, title_end;
2262 | 	size_t line_end;
2263 | 
2264 | 	/* up to 3 optional leading spaces */
2265 | 	if (beg + 3 >= end) return 0;
2266 | 	if (data[beg] == ' ') { i = 1;
2267 | 	if (data[beg + 1] == ' ') { i = 2;
2268 | 	if (data[beg + 2] == ' ') { i = 3;
2269 | 	if (data[beg + 3] == ' ') return 0; } } }
2270 | 	i += beg;
2271 | 
2272 | 	/* id part: anything but a newline between brackets */
2273 | 	if (data[i] != '[') return 0;
2274 | 	i++;
2275 | 	id_offset = i;
2276 | 	while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2277 | 		i++;
2278 | 	if (i >= end || data[i] != ']') return 0;
2279 | 	id_end = i;
2280 | 
2281 | 	/* spacer: colon (space | tab)* newline? (space | tab)* */
2282 | 	i++;
2283 | 	if (i >= end || data[i] != ':') return 0;
2284 | 	i++;
2285 | 	while (i < end && data[i] == ' ') i++;
2286 | 	if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2287 | 		i++;
2288 | 		if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2289 | 	while (i < end && data[i] == ' ') i++;
2290 | 	if (i >= end) return 0;
2291 | 
2292 | 	/* link: whitespace-free sequence, optionally between angle brackets */
2293 | 	if (data[i] == '<')
2294 | 		i++;
2295 | 
2296 | 	link_offset = i;
2297 | 
2298 | 	while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2299 | 		i++;
2300 | 
2301 | 	if (data[i - 1] == '>') link_end = i - 1;
2302 | 	else link_end = i;
2303 | 
2304 | 	/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2305 | 	while (i < end && data[i] == ' ') i++;
2306 | 	if (i < end && data[i] != '\n' && data[i] != '\r'
2307 | 			&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
2308 | 		return 0;
2309 | 	line_end = 0;
2310 | 	/* computing end-of-line */
2311 | 	if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2312 | 	if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2313 | 		line_end = i + 1;
2314 | 
2315 | 	/* optional (space|tab)* spacer after a newline */
2316 | 	if (line_end) {
2317 | 		i = line_end + 1;
2318 | 		while (i < end && data[i] == ' ') i++; }
2319 | 
2320 | 	/* optional title: any non-newline sequence enclosed in '"()
2321 | 					alone on its line */
2322 | 	title_offset = title_end = 0;
2323 | 	if (i + 1 < end
2324 | 	&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2325 | 		i++;
2326 | 		title_offset = i;
2327 | 		/* looking for EOL */
2328 | 		while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2329 | 		if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2330 | 			title_end = i + 1;
2331 | 		else	title_end = i;
2332 | 		/* stepping back */
2333 | 		i -= 1;
2334 | 		while (i > title_offset && data[i] == ' ')
2335 | 			i -= 1;
2336 | 		if (i > title_offset
2337 | 		&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2338 | 			line_end = title_end;
2339 | 			title_end = i; } }
2340 | 
2341 | 	if (!line_end || link_end == link_offset)
2342 | 		return 0; /* garbage after the link empty link */
2343 | 
2344 | 	/* a valid ref has been found, filling-in return structures */
2345 | 	if (last)
2346 | 		*last = line_end;
2347 | 
2348 | 	if (refs) {
2349 | 		struct link_ref *ref;
2350 | 
2351 | 		ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2352 | 		if (!ref)
2353 | 			return 0;
2354 | 
2355 | 		ref->link = bufnew(link_end - link_offset);
2356 | 		bufput(ref->link, data + link_offset, link_end - link_offset);
2357 | 
2358 | 		if (title_end > title_offset) {
2359 | 			ref->title = bufnew(title_end - title_offset);
2360 | 			bufput(ref->title, data + title_offset, title_end - title_offset);
2361 | 		}
2362 | 	}
2363 | 
2364 | 	return 1;
2365 | }
2366 | 
2367 | static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2368 | {
2369 | 	size_t  i = 0, tab = 0;
2370 | 
2371 | 	while (i < size) {
2372 | 		size_t org = i;
2373 | 
2374 | 		while (i < size && line[i] != '\t') {
2375 | 			i++; tab++;
2376 | 		}
2377 | 
2378 | 		if (i > org)
2379 | 			bufput(ob, line + org, i - org);
2380 | 
2381 | 		if (i >= size)
2382 | 			break;
2383 | 
2384 | 		do {
2385 | 			bufputc(ob, ' '); tab++;
2386 | 		} while (tab % 4);
2387 | 
2388 | 		i++;
2389 | 	}
2390 | }
2391 | 
2392 | /**********************
2393 |  * EXPORTED FUNCTIONS *
2394 |  **********************/
2395 | 
2396 | struct sd_markdown *
2397 | sd_markdown_new(
2398 | 	unsigned int extensions,
2399 | 	size_t max_nesting,
2400 | 	const struct sd_callbacks *callbacks,
2401 | 	void *opaque)
2402 | {
2403 | 	struct sd_markdown *md = NULL;
2404 | 
2405 | 	assert(max_nesting > 0 && callbacks);
2406 | 
2407 | 	md = malloc(sizeof(struct sd_markdown));
2408 | 	if (!md)
2409 | 		return NULL;
2410 | 
2411 | 	memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2412 | 
2413 | 	stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2414 | 	stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2415 | 
2416 | 	memset(md->active_char, 0x0, 256);
2417 | 
2418 | 	if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2419 | 		md->active_char['*'] = MD_CHAR_EMPHASIS;
2420 | 		md->active_char['_'] = MD_CHAR_EMPHASIS;
2421 | 		if (extensions & MKDEXT_STRIKETHROUGH)
2422 | 			md->active_char['~'] = MD_CHAR_EMPHASIS;
2423 | 	}
2424 | 
2425 | 	if (md->cb.codespan)
2426 | 		md->active_char['`'] = MD_CHAR_CODESPAN;
2427 | 
2428 | 	if (md->cb.linebreak)
2429 | 		md->active_char['\n'] = MD_CHAR_LINEBREAK;
2430 | 
2431 | 	if (md->cb.image || md->cb.link)
2432 | 		md->active_char['['] = MD_CHAR_LINK;
2433 | 
2434 | 	md->active_char['<'] = MD_CHAR_LANGLE;
2435 | 	md->active_char['\\'] = MD_CHAR_ESCAPE;
2436 | 	md->active_char['&'] = MD_CHAR_ENTITITY;
2437 | 
2438 | 	if (extensions & MKDEXT_AUTOLINK) {
2439 | 		md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2440 | 		md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2441 | 		md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2442 | 	}
2443 | 
2444 | 	if (extensions & MKDEXT_SUPERSCRIPT)
2445 | 		md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2446 | 
2447 | 	/* Extension data */
2448 | 	md->ext_flags = extensions;
2449 | 	md->opaque = opaque;
2450 | 	md->max_nesting = max_nesting;
2451 | 	md->in_link_body = 0;
2452 | 
2453 | 	return md;
2454 | }
2455 | 
2456 | void
2457 | sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2458 | {
2459 | #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
2460 | 	static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2461 | 
2462 | 	struct buf *text;
2463 | 	size_t beg, end;
2464 | 
2465 | 	text = bufnew(64);
2466 | 	if (!text)
2467 | 		return;
2468 | 
2469 | 	/* Preallocate enough space for our buffer to avoid expanding while copying */
2470 | 	bufgrow(text, doc_size);
2471 | 
2472 | 	/* reset the references table */
2473 | 	memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2474 | 
2475 | 	/* first pass: looking for references, copying everything else */
2476 | 	beg = 0;
2477 | 
2478 | 	/* Skip a possible UTF-8 BOM, even though the Unicode standard
2479 | 	 * discourages having these in UTF-8 documents */
2480 | 	if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2481 | 		beg += 3;
2482 | 
2483 | 	while (beg < doc_size) /* iterating over lines */
2484 | 		if (is_ref(document, beg, doc_size, &end, md->refs))
2485 | 			beg = end;
2486 | 		else { /* skipping to the next line */
2487 | 			end = beg;
2488 | 			while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2489 | 				end++;
2490 | 
2491 | 			/* adding the line body if present */
2492 | 			if (end > beg)
2493 | 				expand_tabs(text, document + beg, end - beg);
2494 | 
2495 | 			while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2496 | 				/* add one \n per newline */
2497 | 				if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2498 | 					bufputc(text, '\n');
2499 | 				end++;
2500 | 			}
2501 | 
2502 | 			beg = end;
2503 | 		}
2504 | 
2505 | 	/* pre-grow the output buffer to minimize allocations */
2506 | 	bufgrow(ob, MARKDOWN_GROW(text->size));
2507 | 
2508 | 	/* second pass: actual rendering */
2509 | 	if (md->cb.doc_header)
2510 | 		md->cb.doc_header(ob, md->opaque);
2511 | 
2512 | 	if (text->size) {
2513 | 		/* adding a final newline if not already present */
2514 | 		if (text->data[text->size - 1] != '\n' &&  text->data[text->size - 1] != '\r')
2515 | 			bufputc(text, '\n');
2516 | 
2517 | 		parse_block(ob, md, text->data, text->size);
2518 | 	}
2519 | 
2520 | 	if (md->cb.doc_footer)
2521 | 		md->cb.doc_footer(ob, md->opaque);
2522 | 
2523 | 	/* clean-up */
2524 | 	bufrelease(text);
2525 | 	free_link_refs(md->refs);
2526 | 
2527 | 	assert(md->work_bufs[BUFFER_SPAN].size == 0);
2528 | 	assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2529 | }
2530 | 
2531 | void
2532 | sd_markdown_free(struct sd_markdown *md)
2533 | {
2534 | 	size_t i;
2535 | 
2536 | 	for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2537 | 		bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2538 | 
2539 | 	for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2540 | 		bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2541 | 
2542 | 	stack_free(&md->work_bufs[BUFFER_SPAN]);
2543 | 	stack_free(&md->work_bufs[BUFFER_BLOCK]);
2544 | 
2545 | 	free(md);
2546 | }
2547 | 
2548 | void
2549 | sd_version(int *ver_major, int *ver_minor, int *ver_revision)
2550 | {
2551 | 	*ver_major = SUNDOWN_VER_MAJOR;
2552 | 	*ver_minor = SUNDOWN_VER_MINOR;
2553 | 	*ver_revision = SUNDOWN_VER_REVISION;
2554 | }
2555 | 
2556 | /* vim: set filetype=c: */
2557 | 


--------------------------------------------------------------------------------