├── test_data └── test_file.md ├── .gitattributes ├── cmark_version.h ├── .gitignore ├── iterator.h ├── cmark_ctype.h ├── .travis.yml ├── inlines.h ├── utf8.h ├── parser.h ├── README.md ├── cmark_export.h ├── references.h ├── cmark.c ├── iter.go ├── render.h ├── houdini.h ├── config.h ├── cmark_ctype.c ├── node.h ├── houdini_html_e.c ├── buffer.h ├── scanners.h ├── chunk.h ├── commonmark.go ├── houdini_href_e.c ├── iterator.c ├── references.c ├── houdini_html_u.c ├── xml.c ├── man.c ├── LICENSE ├── render.c ├── buffer.c ├── scanners.re ├── html.c ├── utf8.c ├── commonmark_test.go ├── latex.c ├── node.go ├── commonmark.c ├── node.c └── cmark.h /test_data/test_file.md: -------------------------------------------------------------------------------- 1 | Test File 2 | ========= 3 | 4 | Description 5 | ----------- 6 | 7 | This is just a test file. 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.c linguist-vendored=true 2 | *.h linguist-vendored=true 3 | *.inc linguist-vendored=true 4 | *.gperf linguist-vendored=true 5 | *.re linguist-vendored=true 6 | -------------------------------------------------------------------------------- /cmark_version.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_VERSION_H 2 | #define CMARK_VERSION_H 3 | 4 | #define CMARK_VERSION ((0 << 16) | (26 << 8) | 1) 5 | #define CMARK_VERSION_STRING "0.26.1" 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | *.cgo1.go 14 | *.cgo2.c 15 | _cgo_defun.c 16 | _cgo_gotypes.go 17 | _cgo_export.* 18 | _testmain.go 19 | *.exe 20 | *.test 21 | *.prof 22 | 23 | #Misc junk 24 | *.swp 25 | 26 | .idea/ 27 | *.iml 28 | -------------------------------------------------------------------------------- /iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_ITERATOR_H 2 | #define CMARK_ITERATOR_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "cmark.h" 9 | #include "memory.h" 10 | 11 | typedef struct { 12 | cmark_event_type ev_type; 13 | cmark_node *node; 14 | } cmark_iter_state; 15 | 16 | struct cmark_iter { 17 | cmark_mem *mem; 18 | cmark_node *root; 19 | cmark_iter_state cur; 20 | cmark_iter_state next; 21 | }; 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /cmark_ctype.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_CMARK_CTYPE_H 2 | #define CMARK_CMARK_CTYPE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | /** Locale-independent versions of functions from ctype.h. 9 | * We want cmark to behave the same no matter what the system locale. 10 | */ 11 | 12 | int cmark_isspace(char c); 13 | 14 | int cmark_ispunct(char c); 15 | 16 | int cmark_isalnum(char c); 17 | 18 | int cmark_isdigit(char c); 19 | 20 | int cmark_isalpha(char c); 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Travis build settings for go-commonmark 2 | language: go 3 | go: 4 | - 1.5 5 | - 1.6 6 | before_install: 7 | - echo "yes" | sudo add-apt-repository ppa:kalakris/cmake 8 | - sudo apt-get update -qq 9 | - sudo apt-get install gcc 10 | - sudo apt-get install build-essential 11 | - sudo apt-get install python3 12 | - sudo apt-get install re2c 13 | - sudo apt-get install pandoc 14 | - sudo apt-get install texlive-latex-recommended 15 | - cd $HOME/gopath/src/github.com/rhinoman/go-commonmark 16 | script: 17 | - go test -v ./... 18 | -------------------------------------------------------------------------------- /inlines.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_INLINES_H 2 | #define CMARK_INLINES_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); 9 | cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); 10 | 11 | void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, 12 | cmark_reference_map *refmap, int options); 13 | 14 | bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, 15 | cmark_reference_map *refmap); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /utf8.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_UTF8_H 2 | #define CMARK_UTF8_H 3 | 4 | #include 5 | #include "buffer.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, 12 | bufsize_t len); 13 | void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); 14 | int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); 15 | void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line, 16 | bufsize_t size); 17 | int cmark_utf8proc_is_space(int32_t uc); 18 | int cmark_utf8proc_is_punctuation(int32_t uc); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /parser.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_AST_H 2 | #define CMARK_AST_H 3 | 4 | #include 5 | #include "node.h" 6 | #include "buffer.h" 7 | #include "memory.h" 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | #define MAX_LINK_LABEL_LENGTH 1000 14 | 15 | struct cmark_parser { 16 | struct cmark_mem *mem; 17 | struct cmark_reference_map *refmap; 18 | struct cmark_node *root; 19 | struct cmark_node *current; 20 | int line_number; 21 | bufsize_t offset; 22 | bufsize_t column; 23 | bufsize_t first_nonspace; 24 | bufsize_t first_nonspace_column; 25 | int indent; 26 | bool blank; 27 | bool partially_consumed_tab; 28 | cmark_strbuf curline; 29 | bufsize_t last_line_length; 30 | cmark_strbuf linebuf; 31 | int options; 32 | bool last_buffer_ended_with_cr; 33 | }; 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | go-commonmark 2 | ======= 3 | 4 | 5 | [![Build Status](https://travis-ci.org/rhinoman/go-commonmark.svg?branch=master)](https://travis-ci.org/rhinoman/go-commonmark) 6 | 7 | Description 8 | ----------- 9 | 10 | go-commonmark is a [Go](http://golang.org) (golang) wrapper for the [CommonMark](http://commonmark.org/) C library 11 | 12 | 13 | Installation 14 | ------------ 15 | 16 | ``` 17 | go get github.com/rhinoman/go-commonmark 18 | ``` 19 | 20 | **Note:** The [cmark](https://github.com/jgm/cmark) C reference implementation has been folded into this repository, no need to install it separately. It will be built automagically by cgo. 21 | 22 | Documentation 23 | ------------- 24 | 25 | See the Godoc: http://godoc.org/github.com/rhinoman/go-commonmark 26 | 27 | 28 | Example Usage 29 | ------------- 30 | If all you need is to convert CommonMark text to Html, just do this: 31 | 32 | ```go 33 | 34 | import "github.com/rhinoman/go-commonmark" 35 | 36 | ... 37 | 38 | htmlText := commonmark.Md2Html(mdText) 39 | 40 | ``` 41 | -------------------------------------------------------------------------------- /cmark_export.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CMARK_EXPORT_H 3 | #define CMARK_EXPORT_H 4 | 5 | #ifdef CMARK_STATIC_DEFINE 6 | # define CMARK_EXPORT 7 | # define CMARK_NO_EXPORT 8 | #else 9 | # ifndef CMARK_EXPORT 10 | # ifdef libcmark_EXPORTS 11 | /* We are building this library */ 12 | # define CMARK_EXPORT __attribute__((visibility("default"))) 13 | # else 14 | /* We are using this library */ 15 | # define CMARK_EXPORT __attribute__((visibility("default"))) 16 | # endif 17 | # endif 18 | 19 | # ifndef CMARK_NO_EXPORT 20 | # define CMARK_NO_EXPORT __attribute__((visibility("hidden"))) 21 | # endif 22 | #endif 23 | 24 | #ifndef CMARK_DEPRECATED 25 | # define CMARK_DEPRECATED __attribute__ ((__deprecated__)) 26 | # define CMARK_DEPRECATED_EXPORT CMARK_EXPORT __attribute__ ((__deprecated__)) 27 | # define CMARK_DEPRECATED_NO_EXPORT CMARK_NO_EXPORT __attribute__ ((__deprecated__)) 28 | #endif 29 | 30 | #define DEFINE_NO_DEPRECATED 0 31 | #if DEFINE_NO_DEPRECATED 32 | # define CMARK_NO_DEPRECATED 33 | #endif 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /references.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_REFERENCES_H 2 | #define CMARK_REFERENCES_H 3 | 4 | #include "memory.h" 5 | #include "chunk.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #define REFMAP_SIZE 16 12 | 13 | struct cmark_reference { 14 | struct cmark_reference *next; 15 | unsigned char *label; 16 | cmark_chunk url; 17 | cmark_chunk title; 18 | unsigned int hash; 19 | }; 20 | 21 | typedef struct cmark_reference cmark_reference; 22 | 23 | struct cmark_reference_map { 24 | cmark_mem *mem; 25 | cmark_reference *table[REFMAP_SIZE]; 26 | }; 27 | 28 | typedef struct cmark_reference_map cmark_reference_map; 29 | 30 | cmark_reference_map *cmark_reference_map_new(cmark_mem *mem); 31 | void cmark_reference_map_free(cmark_reference_map *map); 32 | cmark_reference *cmark_reference_lookup(cmark_reference_map *map, 33 | cmark_chunk *label); 34 | extern void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, 35 | cmark_chunk *url, cmark_chunk *title); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /cmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "node.h" 5 | #include "houdini.h" 6 | #include "cmark.h" 7 | #include "buffer.h" 8 | 9 | int cmark_version() { return CMARK_VERSION; } 10 | 11 | const char *cmark_version_string() { return CMARK_VERSION_STRING; } 12 | 13 | static void *xcalloc(size_t nmem, size_t size) { 14 | void *ptr = calloc(nmem, size); 15 | if (!ptr) { 16 | fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n"); 17 | abort(); 18 | } 19 | return ptr; 20 | } 21 | 22 | static void *xrealloc(void *ptr, size_t size) { 23 | void *new_ptr = realloc(ptr, size); 24 | if (!new_ptr) { 25 | fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n"); 26 | abort(); 27 | } 28 | return new_ptr; 29 | } 30 | 31 | cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; 32 | 33 | char *cmark_markdown_to_html(const char *text, size_t len, int options) { 34 | cmark_node *doc; 35 | char *result; 36 | 37 | doc = cmark_parse_document(text, len, options); 38 | 39 | result = cmark_render_html(doc, options); 40 | cmark_node_free(doc); 41 | 42 | return result; 43 | } 44 | -------------------------------------------------------------------------------- /iter.go: -------------------------------------------------------------------------------- 1 | package commonmark 2 | 3 | /* 4 | #include 5 | #include "cmark.h" 6 | */ 7 | import "C" 8 | import ( 9 | "runtime" 10 | ) 11 | 12 | type CMarkEvent int 13 | 14 | const ( 15 | CMARK_EVENT_NONE CMarkEvent = iota 16 | CMARK_EVENT_DONE 17 | CMARK_EVENT_ENTER 18 | CMARK_EVENT_EXIT 19 | ) 20 | 21 | //Wraps a cmark_iter 22 | type CMarkIter struct { 23 | iter *C.cmark_iter 24 | } 25 | 26 | //Creates a new iterator starting with the given node. 27 | func NewCMarkIter(node *CMarkNode) *CMarkIter { 28 | iter := &CMarkIter{ 29 | iter: C.cmark_iter_new(node.node), 30 | } 31 | runtime.SetFinalizer(iter, (*CMarkIter).Free) 32 | return iter 33 | } 34 | 35 | //Returns the event type for the next node 36 | func (iter *CMarkIter) Next() CMarkEvent { 37 | ne := C.cmark_iter_next(iter.iter) 38 | return CMarkEvent(ne) 39 | } 40 | 41 | //Returns the next node in the sequence 42 | func (iter *CMarkIter) GetNode() *CMarkNode { 43 | return &CMarkNode{ 44 | node: C.cmark_iter_get_node(iter.iter), 45 | } 46 | 47 | } 48 | 49 | //Reset the iterator so the current node is 'current' and the 50 | //event type is 'event'. Use this to resume after 51 | //desctructively modifying the tree structure 52 | func (iter *CMarkIter) Reset(current *CMarkNode, event CMarkEvent) { 53 | C.cmark_iter_reset(iter.iter, current.node, C.cmark_event_type(event)) 54 | } 55 | 56 | //Frees an iterator 57 | func (iter *CMarkIter) Free() { 58 | if iter.iter != nil { 59 | C.cmark_iter_free(iter.iter) 60 | } 61 | iter.iter = nil 62 | } 63 | -------------------------------------------------------------------------------- /render.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_RENDER_H 2 | #define CMARK_RENDER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include "buffer.h" 10 | #include "chunk.h" 11 | #include "memory.h" 12 | 13 | typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping; 14 | 15 | struct cmark_renderer { 16 | cmark_mem *mem; 17 | cmark_strbuf *buffer; 18 | cmark_strbuf *prefix; 19 | int column; 20 | int width; 21 | int need_cr; 22 | bufsize_t last_breakable; 23 | bool begin_line; 24 | bool begin_content; 25 | bool no_linebreaks; 26 | bool in_tight_list_item; 27 | void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char); 28 | void (*cr)(struct cmark_renderer *); 29 | void (*blankline)(struct cmark_renderer *); 30 | void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping); 31 | }; 32 | 33 | typedef struct cmark_renderer cmark_renderer; 34 | 35 | void cmark_render_ascii(cmark_renderer *renderer, const char *s); 36 | 37 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); 38 | 39 | char *cmark_render(cmark_node *root, int options, int width, 40 | void (*outc)(cmark_renderer *, cmark_escaping, int32_t, 41 | unsigned char), 42 | int (*render_node)(cmark_renderer *renderer, 43 | cmark_node *node, 44 | cmark_event_type ev_type, int options)); 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /houdini.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_HOUDINI_H 2 | #define CMARK_HOUDINI_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include "config.h" 10 | #include "buffer.h" 11 | 12 | #ifdef HAVE___BUILTIN_EXPECT 13 | #define likely(x) __builtin_expect((x), 1) 14 | #define unlikely(x) __builtin_expect((x), 0) 15 | #else 16 | #define likely(x) (x) 17 | #define unlikely(x) (x) 18 | #endif 19 | 20 | #ifdef HOUDINI_USE_LOCALE 21 | #define _isxdigit(c) isxdigit(c) 22 | #define _isdigit(c) isdigit(c) 23 | #else 24 | /* 25 | * Helper _isdigit methods -- do not trust the current locale 26 | * */ 27 | #define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) 28 | #define _isdigit(c) ((c) >= '0' && (c) <= '9') 29 | #endif 30 | 31 | #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10) 32 | #define HOUDINI_UNESCAPED_SIZE(x) (x) 33 | 34 | extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, 35 | bufsize_t size); 36 | extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, 37 | bufsize_t size); 38 | extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, 39 | bufsize_t size, int secure); 40 | extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, 41 | bufsize_t size); 42 | extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, 43 | bufsize_t size); 44 | extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, 45 | bufsize_t size); 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_CONFIG_H 2 | #define CMARK_CONFIG_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define HAVE_STDBOOL_H 9 | 10 | #ifdef HAVE_STDBOOL_H 11 | #include 12 | #elif !defined(__cplusplus) 13 | typedef char bool; 14 | #endif 15 | 16 | #define HAVE___BUILTIN_EXPECT 17 | 18 | #define HAVE___ATTRIBUTE__ 19 | 20 | #ifdef HAVE___ATTRIBUTE__ 21 | #define CMARK_ATTRIBUTE(list) __attribute__ (list) 22 | #else 23 | #define CMARK_ATTRIBUTE(list) 24 | #endif 25 | 26 | #ifndef CMARK_INLINE 27 | #if defined(_MSC_VER) && !defined(__cplusplus) 28 | #define CMARK_INLINE __inline 29 | #else 30 | #define CMARK_INLINE inline 31 | #endif 32 | #endif 33 | 34 | /* snprintf and vsnprintf fallbacks for MSVC before 2015, 35 | due to Valentin Milea http://stackoverflow.com/questions/2915672/ 36 | */ 37 | 38 | #if defined(_MSC_VER) && _MSC_VER < 1900 39 | 40 | #include 41 | #include 42 | 43 | #define snprintf c99_snprintf 44 | #define vsnprintf c99_vsnprintf 45 | 46 | CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap) 47 | { 48 | int count = -1; 49 | 50 | if (size != 0) 51 | count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap); 52 | if (count == -1) 53 | count = _vscprintf(format, ap); 54 | 55 | return count; 56 | } 57 | 58 | CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...) 59 | { 60 | int count; 61 | va_list ap; 62 | 63 | va_start(ap, format); 64 | count = c99_vsnprintf(outBuf, size, format, ap); 65 | va_end(ap); 66 | 67 | return count; 68 | } 69 | 70 | #endif 71 | 72 | #ifdef __cplusplus 73 | } 74 | #endif 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /cmark_ctype.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "cmark_ctype.h" 4 | 5 | /** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other 6 | */ 7 | static const uint8_t cmark_ctype_class[256] = { 8 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 9 | /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 10 | /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11 | /* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12 | /* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 13 | /* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 14 | /* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 15 | /* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 16 | /* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0, 17 | /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18 | /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19 | /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20 | /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 | /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 | /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 | /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 | /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 25 | 26 | /** 27 | * Returns 1 if c is a "whitespace" character as defined by the spec. 28 | */ 29 | int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; } 30 | 31 | /** 32 | * Returns 1 if c is an ascii punctuation character. 33 | */ 34 | int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; } 35 | 36 | int cmark_isalnum(char c) { 37 | uint8_t result; 38 | result = cmark_ctype_class[(uint8_t)c]; 39 | return (result == 3 || result == 4); 40 | } 41 | 42 | int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; } 43 | 44 | int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; } 45 | -------------------------------------------------------------------------------- /node.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_NODE_H 2 | #define CMARK_NODE_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | #include "cmark.h" 12 | #include "buffer.h" 13 | #include "chunk.h" 14 | 15 | typedef struct { 16 | cmark_list_type list_type; 17 | int marker_offset; 18 | int padding; 19 | int start; 20 | cmark_delim_type delimiter; 21 | unsigned char bullet_char; 22 | bool tight; 23 | } cmark_list; 24 | 25 | typedef struct { 26 | cmark_chunk info; 27 | cmark_chunk literal; 28 | uint8_t fence_length; 29 | uint8_t fence_offset; 30 | unsigned char fence_char; 31 | int8_t fenced; 32 | } cmark_code; 33 | 34 | typedef struct { 35 | int level; 36 | bool setext; 37 | } cmark_heading; 38 | 39 | typedef struct { 40 | cmark_chunk url; 41 | cmark_chunk title; 42 | } cmark_link; 43 | 44 | typedef struct { 45 | cmark_chunk on_enter; 46 | cmark_chunk on_exit; 47 | } cmark_custom; 48 | 49 | enum cmark_node__internal_flags { 50 | CMARK_NODE__OPEN = (1 << 0), 51 | CMARK_NODE__LAST_LINE_BLANK = (1 << 1), 52 | }; 53 | 54 | struct cmark_node { 55 | cmark_strbuf content; 56 | 57 | struct cmark_node *next; 58 | struct cmark_node *prev; 59 | struct cmark_node *parent; 60 | struct cmark_node *first_child; 61 | struct cmark_node *last_child; 62 | 63 | void *user_data; 64 | 65 | int start_line; 66 | int start_column; 67 | int end_line; 68 | int end_column; 69 | uint16_t type; 70 | uint16_t flags; 71 | 72 | union { 73 | cmark_chunk literal; 74 | cmark_list list; 75 | cmark_code code; 76 | cmark_heading heading; 77 | cmark_link link; 78 | cmark_custom custom; 79 | int html_block_type; 80 | } as; 81 | }; 82 | 83 | static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) { 84 | return node->content.mem; 85 | } 86 | CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out); 87 | 88 | #ifdef __cplusplus 89 | } 90 | #endif 91 | 92 | #endif 93 | -------------------------------------------------------------------------------- /houdini_html_e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "houdini.h" 6 | 7 | /** 8 | * According to the OWASP rules: 9 | * 10 | * & --> & 11 | * < --> < 12 | * > --> > 13 | * " --> " 14 | * ' --> ' ' is not recommended 15 | * / --> / forward slash is included as it helps end an HTML entity 16 | * 17 | */ 18 | static const char HTML_ESCAPE_TABLE[] = { 19 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, 21 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30 | }; 31 | 32 | static const char *HTML_ESCAPES[] = {"", """, "&", "'", 33 | "/", "<", ">"}; 34 | 35 | int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, 36 | int secure) { 37 | bufsize_t i = 0, org, esc = 0; 38 | 39 | while (i < size) { 40 | org = i; 41 | while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) 42 | i++; 43 | 44 | if (i > org) 45 | cmark_strbuf_put(ob, src + org, i - org); 46 | 47 | /* escaping */ 48 | if (unlikely(i >= size)) 49 | break; 50 | 51 | /* The forward slash is only escaped in secure mode */ 52 | if ((src[i] == '/' || src[i] == '\'') && !secure) { 53 | cmark_strbuf_putc(ob, src[i]); 54 | } else { 55 | cmark_strbuf_puts(ob, HTML_ESCAPES[esc]); 56 | } 57 | 58 | i++; 59 | } 60 | 61 | return 1; 62 | } 63 | 64 | int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { 65 | return houdini_escape_html0(ob, src, size, 1); 66 | } 67 | -------------------------------------------------------------------------------- /buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_BUFFER_H 2 | #define CMARK_BUFFER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "config.h" 10 | #include "cmark.h" 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | typedef int32_t bufsize_t; 17 | 18 | typedef struct { 19 | cmark_mem *mem; 20 | unsigned char *ptr; 21 | bufsize_t asize, size; 22 | } cmark_strbuf; 23 | 24 | extern unsigned char cmark_strbuf__initbuf[]; 25 | 26 | #define CMARK_BUF_INIT(mem) \ 27 | { mem, cmark_strbuf__initbuf, 0, 0 } 28 | 29 | /** 30 | * Initialize a cmark_strbuf structure. 31 | * 32 | * For the cases where CMARK_BUF_INIT cannot be used to do static 33 | * initialization. 34 | */ 35 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, 36 | bufsize_t initial_size); 37 | 38 | /** 39 | * Grow the buffer to hold at least `target_size` bytes. 40 | */ 41 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); 42 | 43 | void cmark_strbuf_free(cmark_strbuf *buf); 44 | void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); 45 | 46 | bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); 47 | 48 | int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); 49 | 50 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); 51 | void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, 52 | const cmark_strbuf *buf); 53 | 54 | static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { 55 | return (char *)buf->ptr; 56 | } 57 | 58 | #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) 59 | 60 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, 61 | bufsize_t len); 62 | void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); 63 | void cmark_strbuf_putc(cmark_strbuf *buf, int c); 64 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, 65 | bufsize_t len); 66 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); 67 | void cmark_strbuf_clear(cmark_strbuf *buf); 68 | 69 | bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); 70 | bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); 71 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); 72 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); 73 | void cmark_strbuf_rtrim(cmark_strbuf *buf); 74 | void cmark_strbuf_trim(cmark_strbuf *buf); 75 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); 76 | void cmark_strbuf_unescape(cmark_strbuf *s); 77 | 78 | #ifdef __cplusplus 79 | } 80 | #endif 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /scanners.h: -------------------------------------------------------------------------------- 1 | #include "cmark.h" 2 | #include "chunk.h" 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, 9 | bufsize_t offset); 10 | bufsize_t _scan_scheme(const unsigned char *p); 11 | bufsize_t _scan_autolink_uri(const unsigned char *p); 12 | bufsize_t _scan_autolink_email(const unsigned char *p); 13 | bufsize_t _scan_html_tag(const unsigned char *p); 14 | bufsize_t _scan_html_block_start(const unsigned char *p); 15 | bufsize_t _scan_html_block_start_7(const unsigned char *p); 16 | bufsize_t _scan_html_block_end_1(const unsigned char *p); 17 | bufsize_t _scan_html_block_end_2(const unsigned char *p); 18 | bufsize_t _scan_html_block_end_3(const unsigned char *p); 19 | bufsize_t _scan_html_block_end_4(const unsigned char *p); 20 | bufsize_t _scan_html_block_end_5(const unsigned char *p); 21 | bufsize_t _scan_link_title(const unsigned char *p); 22 | bufsize_t _scan_spacechars(const unsigned char *p); 23 | bufsize_t _scan_atx_heading_start(const unsigned char *p); 24 | bufsize_t _scan_setext_heading_line(const unsigned char *p); 25 | bufsize_t _scan_thematic_break(const unsigned char *p); 26 | bufsize_t _scan_open_code_fence(const unsigned char *p); 27 | bufsize_t _scan_close_code_fence(const unsigned char *p); 28 | bufsize_t _scan_entity(const unsigned char *p); 29 | bufsize_t _scan_dangerous_url(const unsigned char *p); 30 | 31 | #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) 32 | #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) 33 | #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n) 34 | #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n) 35 | #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n) 36 | #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n) 37 | #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n) 38 | #define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n) 39 | #define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n) 40 | #define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n) 41 | #define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n) 42 | #define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n) 43 | #define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n) 44 | #define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n) 45 | #define scan_setext_heading_line(c, n) \ 46 | _scan_at(&_scan_setext_heading_line, c, n) 47 | #define scan_thematic_break(c, n) _scan_at(&_scan_thematic_break, c, n) 48 | #define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n) 49 | #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n) 50 | #define scan_entity(c, n) _scan_at(&_scan_entity, c, n) 51 | #define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n) 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | -------------------------------------------------------------------------------- /chunk.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_CHUNK_H 2 | #define CMARK_CHUNK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "cmark.h" 8 | #include "buffer.h" 9 | #include "memory.h" 10 | #include "cmark_ctype.h" 11 | 12 | #define CMARK_CHUNK_EMPTY \ 13 | { NULL, 0, 0 } 14 | 15 | typedef struct { 16 | unsigned char *data; 17 | bufsize_t len; 18 | bufsize_t alloc; // also implies a NULL-terminated string 19 | } cmark_chunk; 20 | 21 | static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) { 22 | if (c->alloc) 23 | mem->free(c->data); 24 | 25 | c->data = NULL; 26 | c->alloc = 0; 27 | c->len = 0; 28 | } 29 | 30 | static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) { 31 | assert(!c->alloc); 32 | 33 | while (c->len && cmark_isspace(c->data[0])) { 34 | c->data++; 35 | c->len--; 36 | } 37 | } 38 | 39 | static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) { 40 | assert(!c->alloc); 41 | 42 | while (c->len > 0) { 43 | if (!cmark_isspace(c->data[c->len - 1])) 44 | break; 45 | 46 | c->len--; 47 | } 48 | } 49 | 50 | static CMARK_INLINE void cmark_chunk_trim(cmark_chunk *c) { 51 | cmark_chunk_ltrim(c); 52 | cmark_chunk_rtrim(c); 53 | } 54 | 55 | static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, 56 | bufsize_t offset) { 57 | const unsigned char *p = 58 | (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); 59 | return p ? (bufsize_t)(p - ch->data) : ch->len; 60 | } 61 | 62 | static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem, 63 | cmark_chunk *c) { 64 | unsigned char *str; 65 | 66 | if (c->alloc) { 67 | return (char *)c->data; 68 | } 69 | str = (unsigned char *)mem->calloc(c->len + 1, 1); 70 | if (c->len > 0) { 71 | memcpy(str, c->data, c->len); 72 | } 73 | str[c->len] = 0; 74 | c->data = str; 75 | c->alloc = 1; 76 | 77 | return (char *)str; 78 | } 79 | 80 | static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c, 81 | const char *str) { 82 | unsigned char *old = c->alloc ? c->data : NULL; 83 | if (str == NULL) { 84 | c->len = 0; 85 | c->data = NULL; 86 | c->alloc = 0; 87 | } else { 88 | c->len = (bufsize_t)strlen(str); 89 | c->data = (unsigned char *)mem->calloc(c->len + 1, 1); 90 | c->alloc = 1; 91 | memcpy(c->data, str, c->len + 1); 92 | } 93 | if (old != NULL) { 94 | mem->free(old); 95 | } 96 | } 97 | 98 | static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) { 99 | bufsize_t len = data ? (bufsize_t)strlen(data) : 0; 100 | cmark_chunk c = {(unsigned char *)data, len, 0}; 101 | return c; 102 | } 103 | 104 | static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, 105 | bufsize_t pos, bufsize_t len) { 106 | cmark_chunk c = {ch->data + pos, len, 0}; 107 | return c; 108 | } 109 | 110 | static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) { 111 | cmark_chunk c; 112 | 113 | c.len = buf->size; 114 | c.data = cmark_strbuf_detach(buf); 115 | c.alloc = 1; 116 | 117 | return c; 118 | } 119 | 120 | #endif 121 | -------------------------------------------------------------------------------- /commonmark.go: -------------------------------------------------------------------------------- 1 | //Package commonmark provides a Go wrapper for the CommonMark C Library 2 | package commonmark 3 | 4 | /* 5 | #cgo CFLAGS: -std=gnu99 6 | #include 7 | #include 8 | #include "cmark.h" 9 | */ 10 | import "C" 11 | import ( 12 | "errors" 13 | "runtime" 14 | "strings" 15 | "unsafe" 16 | ) 17 | 18 | // Converts Markdo--, er, CommonMark text to Html. 19 | // Parameter mdtext contains CommonMark text. 20 | // The return value is the HTML string 21 | func Md2Html(mdtext string, options int) string { 22 | //The call to cmark will barf if the input string doesn't end with a newline 23 | if !strings.HasSuffix(mdtext, "\n") { 24 | mdtext += "\n" 25 | } 26 | mdCstr := C.CString(mdtext) 27 | strLen := C.size_t(len(mdtext)) 28 | defer C.free(unsafe.Pointer(mdCstr)) 29 | htmlString := C.cmark_markdown_to_html(mdCstr, strLen, C.int(options)) 30 | defer C.free(unsafe.Pointer(htmlString)) 31 | return C.GoString(htmlString) 32 | } 33 | 34 | //Wraps the cmark_doc_parser 35 | type CMarkParser struct { 36 | parser *C.struct_cmark_parser 37 | } 38 | 39 | // Retruns a new CMark Parser. 40 | // You must call Free() on this thing when you're done with it! 41 | // Please. 42 | func NewCmarkParser(options int) *CMarkParser { 43 | p := &CMarkParser{ 44 | parser: C.cmark_parser_new(C.int(options)), 45 | } 46 | runtime.SetFinalizer(p, (*CMarkParser).Free) 47 | return p 48 | } 49 | 50 | // Process some text 51 | func (cmp *CMarkParser) Feed(text string) { 52 | s := len(text) 53 | cstr := C.CString(text) 54 | defer C.free(unsafe.Pointer(cstr)) 55 | C.cmark_parser_feed(cmp.parser, cstr, C.size_t(s)) 56 | } 57 | 58 | // Finish parsing and generate a document 59 | // You must call Free() on the document when you're done with it! 60 | func (cmp *CMarkParser) Finish() *CMarkNode { 61 | n := &CMarkNode{ 62 | node: C.cmark_parser_finish(cmp.parser), 63 | } 64 | runtime.SetFinalizer(n, (*CMarkNode).Free) 65 | return n 66 | } 67 | 68 | // Cleanup the parser 69 | // Once you call Free on this, you can't use it anymore 70 | func (cmp *CMarkParser) Free() { 71 | if cmp.parser != nil { 72 | C.cmark_parser_free(cmp.parser) 73 | } 74 | cmp.parser = nil 75 | } 76 | 77 | // Generates a document directly from a string 78 | func ParseDocument(buffer string, options int) *CMarkNode { 79 | if !strings.HasSuffix(buffer, "\n") { 80 | buffer += "\n" 81 | } 82 | Cstr := C.CString(buffer) 83 | Clen := C.size_t(len(buffer)) 84 | defer C.free(unsafe.Pointer(Cstr)) 85 | n := &CMarkNode{ 86 | node: C.cmark_parse_document(Cstr, Clen, C.int(options)), 87 | } 88 | runtime.SetFinalizer(n, (*CMarkNode).Free) 89 | return n 90 | } 91 | 92 | // Parses a file and returns a CMarkNode 93 | // Returns an error if the file can't be opened 94 | func ParseFile(filename string, options int) (*CMarkNode, error) { 95 | fname := C.CString(filename) 96 | access := C.CString("r") 97 | defer C.free(unsafe.Pointer(fname)) 98 | defer C.free(unsafe.Pointer(access)) 99 | file := C.fopen(fname, access) 100 | if file == nil { 101 | return nil, errors.New("Unable to open file with name: " + filename) 102 | } 103 | defer C.fclose(file) 104 | n := &CMarkNode{ 105 | node: C.cmark_parse_file(file, C.int(options)), 106 | } 107 | runtime.SetFinalizer(n, (*CMarkNode).Free) 108 | return n, nil 109 | } 110 | 111 | //Version information 112 | func CMarkVersion() int { 113 | return int(C.cmark_version()) 114 | } 115 | -------------------------------------------------------------------------------- /houdini_href_e.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "houdini.h" 6 | 7 | /* 8 | * The following characters will not be escaped: 9 | * 10 | * -_.+!*'(),%#@?=;:/,+&$ alphanum 11 | * 12 | * Note that this character set is the addition of: 13 | * 14 | * - The characters which are safe to be in an URL 15 | * - The characters which are *not* safe to be in 16 | * an URL because they are RESERVED characters. 17 | * 18 | * We asume (lazily) that any RESERVED char that 19 | * appears inside an URL is actually meant to 20 | * have its native function (i.e. as an URL 21 | * component/separator) and hence needs no escaping. 22 | * 23 | * There are two exceptions: the chacters & (amp) 24 | * and ' (single quote) do not appear in the table. 25 | * They are meant to appear in the URL as components, 26 | * yet they require special HTML-entity escaping 27 | * to generate valid HTML markup. 28 | * 29 | * All other characters will be escaped to %XX. 30 | * 31 | */ 32 | static const char HREF_SAFE[] = { 33 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 37 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44 | }; 45 | 46 | int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { 47 | static const uint8_t hex_chars[] = "0123456789ABCDEF"; 48 | bufsize_t i = 0, org; 49 | uint8_t hex_str[3]; 50 | 51 | hex_str[0] = '%'; 52 | 53 | while (i < size) { 54 | org = i; 55 | while (i < size && HREF_SAFE[src[i]] != 0) 56 | i++; 57 | 58 | if (likely(i > org)) 59 | cmark_strbuf_put(ob, src + org, i - org); 60 | 61 | /* escaping */ 62 | if (i >= size) 63 | break; 64 | 65 | switch (src[i]) { 66 | /* amp appears all the time in URLs, but needs 67 | * HTML-entity escaping to be inside an href */ 68 | case '&': 69 | cmark_strbuf_puts(ob, "&"); 70 | break; 71 | 72 | /* the single quote is a valid URL character 73 | * according to the standard; it needs HTML 74 | * entity escaping too */ 75 | case '\'': 76 | cmark_strbuf_puts(ob, "'"); 77 | break; 78 | 79 | /* the space can be escaped to %20 or a plus 80 | * sign. we're going with the generic escape 81 | * for now. the plus thing is more commonly seen 82 | * when building GET strings */ 83 | #if 0 84 | case ' ': 85 | cmark_strbuf_putc(ob, '+'); 86 | break; 87 | #endif 88 | 89 | /* every other character goes with a %XX escaping */ 90 | default: 91 | hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; 92 | hex_str[2] = hex_chars[src[i] & 0xF]; 93 | cmark_strbuf_put(ob, hex_str, 3); 94 | } 95 | 96 | i++; 97 | } 98 | 99 | return 1; 100 | } 101 | -------------------------------------------------------------------------------- /iterator.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "config.h" 5 | #include "node.h" 6 | #include "cmark.h" 7 | #include "iterator.h" 8 | 9 | static const int S_leaf_mask = 10 | (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) | 11 | (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) | 12 | (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) | 13 | (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE); 14 | 15 | cmark_iter *cmark_iter_new(cmark_node *root) { 16 | if (root == NULL) { 17 | return NULL; 18 | } 19 | cmark_mem *mem = root->content.mem; 20 | cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter)); 21 | iter->mem = mem; 22 | iter->root = root; 23 | iter->cur.ev_type = CMARK_EVENT_NONE; 24 | iter->cur.node = NULL; 25 | iter->next.ev_type = CMARK_EVENT_ENTER; 26 | iter->next.node = root; 27 | return iter; 28 | } 29 | 30 | void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); } 31 | 32 | static bool S_is_leaf(cmark_node *node) { 33 | return ((1 << node->type) & S_leaf_mask) != 0; 34 | } 35 | 36 | cmark_event_type cmark_iter_next(cmark_iter *iter) { 37 | cmark_event_type ev_type = iter->next.ev_type; 38 | cmark_node *node = iter->next.node; 39 | 40 | iter->cur.ev_type = ev_type; 41 | iter->cur.node = node; 42 | 43 | if (ev_type == CMARK_EVENT_DONE) { 44 | return ev_type; 45 | } 46 | 47 | /* roll forward to next item, setting both fields */ 48 | if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) { 49 | if (node->first_child == NULL) { 50 | /* stay on this node but exit */ 51 | iter->next.ev_type = CMARK_EVENT_EXIT; 52 | } else { 53 | iter->next.ev_type = CMARK_EVENT_ENTER; 54 | iter->next.node = node->first_child; 55 | } 56 | } else if (node == iter->root) { 57 | /* don't move past root */ 58 | iter->next.ev_type = CMARK_EVENT_DONE; 59 | iter->next.node = NULL; 60 | } else if (node->next) { 61 | iter->next.ev_type = CMARK_EVENT_ENTER; 62 | iter->next.node = node->next; 63 | } else if (node->parent) { 64 | iter->next.ev_type = CMARK_EVENT_EXIT; 65 | iter->next.node = node->parent; 66 | } else { 67 | assert(false); 68 | iter->next.ev_type = CMARK_EVENT_DONE; 69 | iter->next.node = NULL; 70 | } 71 | 72 | return ev_type; 73 | } 74 | 75 | void cmark_iter_reset(cmark_iter *iter, cmark_node *current, 76 | cmark_event_type event_type) { 77 | iter->next.ev_type = event_type; 78 | iter->next.node = current; 79 | cmark_iter_next(iter); 80 | } 81 | 82 | cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; } 83 | 84 | cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) { 85 | return iter->cur.ev_type; 86 | } 87 | 88 | cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; } 89 | 90 | void cmark_consolidate_text_nodes(cmark_node *root) { 91 | if (root == NULL) { 92 | return; 93 | } 94 | cmark_iter *iter = cmark_iter_new(root); 95 | cmark_strbuf buf = CMARK_BUF_INIT(iter->mem); 96 | cmark_event_type ev_type; 97 | cmark_node *cur, *tmp, *next; 98 | 99 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 100 | cur = cmark_iter_get_node(iter); 101 | if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT && 102 | cur->next && cur->next->type == CMARK_NODE_TEXT) { 103 | cmark_strbuf_clear(&buf); 104 | cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len); 105 | tmp = cur->next; 106 | while (tmp && tmp->type == CMARK_NODE_TEXT) { 107 | cmark_iter_next(iter); // advance pointer 108 | cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len); 109 | next = tmp->next; 110 | cmark_node_free(tmp); 111 | tmp = next; 112 | } 113 | cmark_chunk_free(iter->mem, &cur->as.literal); 114 | cur->as.literal = cmark_chunk_buf_detach(&buf); 115 | } 116 | } 117 | 118 | cmark_strbuf_free(&buf); 119 | cmark_iter_free(iter); 120 | } 121 | -------------------------------------------------------------------------------- /references.c: -------------------------------------------------------------------------------- 1 | #include "cmark.h" 2 | #include "utf8.h" 3 | #include "parser.h" 4 | #include "references.h" 5 | #include "inlines.h" 6 | #include "chunk.h" 7 | 8 | static unsigned int refhash(const unsigned char *link_ref) { 9 | unsigned int hash = 0; 10 | 11 | while (*link_ref) 12 | hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash; 13 | 14 | return hash; 15 | } 16 | 17 | static void reference_free(cmark_reference_map *map, cmark_reference *ref) { 18 | cmark_mem *mem = map->mem; 19 | if (ref != NULL) { 20 | mem->free(ref->label); 21 | cmark_chunk_free(mem, &ref->url); 22 | cmark_chunk_free(mem, &ref->title); 23 | mem->free(ref); 24 | } 25 | } 26 | 27 | // normalize reference: collapse internal whitespace to single space, 28 | // remove leading/trailing whitespace, case fold 29 | // Return NULL if the reference name is actually empty (i.e. composed 30 | // solely from whitespace) 31 | static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) { 32 | cmark_strbuf normalized = CMARK_BUF_INIT(mem); 33 | unsigned char *result; 34 | 35 | if (ref == NULL) 36 | return NULL; 37 | 38 | if (ref->len == 0) 39 | return NULL; 40 | 41 | cmark_utf8proc_case_fold(&normalized, ref->data, ref->len); 42 | cmark_strbuf_trim(&normalized); 43 | cmark_strbuf_normalize_whitespace(&normalized); 44 | 45 | result = cmark_strbuf_detach(&normalized); 46 | assert(result); 47 | 48 | if (result[0] == '\0') { 49 | mem->free(result); 50 | return NULL; 51 | } 52 | 53 | return result; 54 | } 55 | 56 | static void add_reference(cmark_reference_map *map, cmark_reference *ref) { 57 | cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE]; 58 | 59 | while (t) { 60 | if (t->hash == ref->hash && !strcmp((char *)t->label, (char *)ref->label)) { 61 | reference_free(map, ref); 62 | return; 63 | } 64 | 65 | t = t->next; 66 | } 67 | 68 | map->table[ref->hash % REFMAP_SIZE] = ref; 69 | } 70 | 71 | void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, 72 | cmark_chunk *url, cmark_chunk *title) { 73 | cmark_reference *ref; 74 | unsigned char *reflabel = normalize_reference(map->mem, label); 75 | 76 | /* empty reference name, or composed from only whitespace */ 77 | if (reflabel == NULL) 78 | return; 79 | 80 | ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); 81 | ref->label = reflabel; 82 | ref->hash = refhash(ref->label); 83 | ref->url = cmark_clean_url(map->mem, url); 84 | ref->title = cmark_clean_title(map->mem, title); 85 | ref->next = NULL; 86 | 87 | add_reference(map, ref); 88 | } 89 | 90 | // Returns reference if refmap contains a reference with matching 91 | // label, otherwise NULL. 92 | cmark_reference *cmark_reference_lookup(cmark_reference_map *map, 93 | cmark_chunk *label) { 94 | cmark_reference *ref = NULL; 95 | unsigned char *norm; 96 | unsigned int hash; 97 | 98 | if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) 99 | return NULL; 100 | 101 | if (map == NULL) 102 | return NULL; 103 | 104 | norm = normalize_reference(map->mem, label); 105 | if (norm == NULL) 106 | return NULL; 107 | 108 | hash = refhash(norm); 109 | ref = map->table[hash % REFMAP_SIZE]; 110 | 111 | while (ref) { 112 | if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm)) 113 | break; 114 | ref = ref->next; 115 | } 116 | 117 | map->mem->free(norm); 118 | return ref; 119 | } 120 | 121 | void cmark_reference_map_free(cmark_reference_map *map) { 122 | unsigned int i; 123 | 124 | if (map == NULL) 125 | return; 126 | 127 | for (i = 0; i < REFMAP_SIZE; ++i) { 128 | cmark_reference *ref = map->table[i]; 129 | cmark_reference *next; 130 | 131 | while (ref) { 132 | next = ref->next; 133 | reference_free(map, ref); 134 | ref = next; 135 | } 136 | } 137 | 138 | map->mem->free(map); 139 | } 140 | 141 | cmark_reference_map *cmark_reference_map_new(cmark_mem *mem) { 142 | cmark_reference_map *map = 143 | (cmark_reference_map *)mem->calloc(1, sizeof(cmark_reference_map)); 144 | map->mem = mem; 145 | return map; 146 | } 147 | -------------------------------------------------------------------------------- /houdini_html_u.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "buffer.h" 6 | #include "houdini.h" 7 | #include "utf8.h" 8 | #include "entities.inc" 9 | 10 | /* Binary tree lookup code for entities added by JGM */ 11 | 12 | static const unsigned char *S_lookup(int i, int low, int hi, 13 | const unsigned char *s, int len) { 14 | int j; 15 | int cmp = 16 | strncmp((const char *)s, (const char *)cmark_entities[i].entity, len); 17 | if (cmp == 0 && cmark_entities[i].entity[len] == 0) { 18 | return (const unsigned char *)cmark_entities[i].bytes; 19 | } else if (cmp <= 0 && i > low) { 20 | j = i - ((i - low) / 2); 21 | if (j == i) 22 | j -= 1; 23 | return S_lookup(j, low, i - 1, s, len); 24 | } else if (cmp > 0 && i < hi) { 25 | j = i + ((hi - i) / 2); 26 | if (j == i) 27 | j += 1; 28 | return S_lookup(j, i + 1, hi, s, len); 29 | } else { 30 | return NULL; 31 | } 32 | } 33 | 34 | static const unsigned char *S_lookup_entity(const unsigned char *s, int len) { 35 | return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len); 36 | } 37 | 38 | bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, 39 | bufsize_t size) { 40 | bufsize_t i = 0; 41 | 42 | if (size >= 3 && src[0] == '#') { 43 | int codepoint = 0; 44 | int num_digits = 0; 45 | 46 | if (_isdigit(src[1])) { 47 | for (i = 1; i < size && _isdigit(src[i]); ++i) { 48 | codepoint = (codepoint * 10) + (src[i] - '0'); 49 | 50 | if (codepoint >= 0x110000) { 51 | // Keep counting digits but 52 | // avoid integer overflow. 53 | codepoint = 0x110000; 54 | } 55 | } 56 | 57 | num_digits = i - 1; 58 | } 59 | 60 | else if (src[1] == 'x' || src[1] == 'X') { 61 | for (i = 2; i < size && _isxdigit(src[i]); ++i) { 62 | codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); 63 | 64 | if (codepoint >= 0x110000) { 65 | // Keep counting digits but 66 | // avoid integer overflow. 67 | codepoint = 0x110000; 68 | } 69 | } 70 | 71 | num_digits = i - 2; 72 | } 73 | 74 | if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') { 75 | if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) || 76 | codepoint >= 0x110000) { 77 | codepoint = 0xFFFD; 78 | } 79 | cmark_utf8proc_encode_char(codepoint, ob); 80 | return i + 1; 81 | } 82 | } 83 | 84 | else { 85 | if (size > CMARK_ENTITY_MAX_LENGTH) 86 | size = CMARK_ENTITY_MAX_LENGTH; 87 | 88 | for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) { 89 | if (src[i] == ' ') 90 | break; 91 | 92 | if (src[i] == ';') { 93 | const unsigned char *entity = S_lookup_entity(src, i); 94 | 95 | if (entity != NULL) { 96 | cmark_strbuf_puts(ob, (const char *)entity); 97 | return i + 1; 98 | } 99 | 100 | break; 101 | } 102 | } 103 | } 104 | 105 | return 0; 106 | } 107 | 108 | int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, 109 | bufsize_t size) { 110 | bufsize_t i = 0, org, ent; 111 | 112 | while (i < size) { 113 | org = i; 114 | while (i < size && src[i] != '&') 115 | i++; 116 | 117 | if (likely(i > org)) { 118 | if (unlikely(org == 0)) { 119 | if (i >= size) 120 | return 0; 121 | 122 | cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); 123 | } 124 | 125 | cmark_strbuf_put(ob, src + org, i - org); 126 | } 127 | 128 | /* escaping */ 129 | if (i >= size) 130 | break; 131 | 132 | i++; 133 | 134 | ent = houdini_unescape_ent(ob, src + i, size - i); 135 | i += ent; 136 | 137 | /* not really an entity */ 138 | if (ent == 0) 139 | cmark_strbuf_putc(ob, '&'); 140 | } 141 | 142 | return 1; 143 | } 144 | 145 | void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, 146 | bufsize_t size) { 147 | if (!houdini_unescape_html(ob, src, size)) 148 | cmark_strbuf_put(ob, src, size); 149 | } 150 | -------------------------------------------------------------------------------- /xml.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "config.h" 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | #include "houdini.h" 11 | 12 | #define BUFFER_SIZE 100 13 | 14 | // Functions to convert cmark_nodes to XML strings. 15 | 16 | static void escape_xml(cmark_strbuf *dest, const unsigned char *source, 17 | bufsize_t length) { 18 | houdini_escape_html0(dest, source, length, 0); 19 | } 20 | 21 | struct render_state { 22 | cmark_strbuf *xml; 23 | int indent; 24 | }; 25 | 26 | static CMARK_INLINE void indent(struct render_state *state) { 27 | int i; 28 | for (i = 0; i < state->indent; i++) { 29 | cmark_strbuf_putc(state->xml, ' '); 30 | } 31 | } 32 | 33 | static int S_render_node(cmark_node *node, cmark_event_type ev_type, 34 | struct render_state *state, int options) { 35 | cmark_strbuf *xml = state->xml; 36 | bool literal = false; 37 | cmark_delim_type delim; 38 | bool entering = (ev_type == CMARK_EVENT_ENTER); 39 | char buffer[BUFFER_SIZE]; 40 | 41 | if (entering) { 42 | indent(state); 43 | cmark_strbuf_putc(xml, '<'); 44 | cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); 45 | 46 | if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) { 47 | snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"", 48 | node->start_line, node->start_column, node->end_line, 49 | node->end_column); 50 | cmark_strbuf_puts(xml, buffer); 51 | } 52 | 53 | literal = false; 54 | 55 | switch (node->type) { 56 | case CMARK_NODE_DOCUMENT: 57 | cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\""); 58 | break; 59 | case CMARK_NODE_TEXT: 60 | case CMARK_NODE_CODE: 61 | case CMARK_NODE_HTML_BLOCK: 62 | case CMARK_NODE_HTML_INLINE: 63 | cmark_strbuf_puts(xml, ">"); 64 | escape_xml(xml, node->as.literal.data, node->as.literal.len); 65 | cmark_strbuf_puts(xml, "as.heading.level); 95 | cmark_strbuf_puts(xml, buffer); 96 | break; 97 | case CMARK_NODE_CODE_BLOCK: 98 | if (node->as.code.info.len > 0) { 99 | cmark_strbuf_puts(xml, " info=\""); 100 | escape_xml(xml, node->as.code.info.data, node->as.code.info.len); 101 | cmark_strbuf_putc(xml, '"'); 102 | } 103 | cmark_strbuf_puts(xml, ">"); 104 | escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len); 105 | cmark_strbuf_puts(xml, "as.custom.on_enter.data, 113 | node->as.custom.on_enter.len); 114 | cmark_strbuf_putc(xml, '"'); 115 | cmark_strbuf_puts(xml, " on_exit=\""); 116 | escape_xml(xml, node->as.custom.on_exit.data, 117 | node->as.custom.on_exit.len); 118 | cmark_strbuf_putc(xml, '"'); 119 | break; 120 | case CMARK_NODE_LINK: 121 | case CMARK_NODE_IMAGE: 122 | cmark_strbuf_puts(xml, " destination=\""); 123 | escape_xml(xml, node->as.link.url.data, node->as.link.url.len); 124 | cmark_strbuf_putc(xml, '"'); 125 | cmark_strbuf_puts(xml, " title=\""); 126 | escape_xml(xml, node->as.link.title.data, node->as.link.title.len); 127 | cmark_strbuf_putc(xml, '"'); 128 | break; 129 | default: 130 | break; 131 | } 132 | if (node->first_child) { 133 | state->indent += 2; 134 | } else if (!literal) { 135 | cmark_strbuf_puts(xml, " /"); 136 | } 137 | cmark_strbuf_puts(xml, ">\n"); 138 | 139 | } else if (node->first_child) { 140 | state->indent -= 2; 141 | indent(state); 142 | cmark_strbuf_puts(xml, "\n"); 145 | } 146 | 147 | return 1; 148 | } 149 | 150 | char *cmark_render_xml(cmark_node *root, int options) { 151 | char *result; 152 | cmark_strbuf xml = CMARK_BUF_INIT(cmark_node_mem(root)); 153 | cmark_event_type ev_type; 154 | cmark_node *cur; 155 | struct render_state state = {&xml, 0}; 156 | 157 | cmark_iter *iter = cmark_iter_new(root); 158 | 159 | cmark_strbuf_puts(state.xml, "\n"); 160 | cmark_strbuf_puts(state.xml, 161 | "\n"); 162 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 163 | cur = cmark_iter_get_node(iter); 164 | S_render_node(cur, ev_type, &state, options); 165 | } 166 | result = (char *)cmark_strbuf_detach(&xml); 167 | 168 | cmark_iter_free(iter); 169 | return result; 170 | } 171 | -------------------------------------------------------------------------------- /man.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "config.h" 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | #include "utf8.h" 11 | #include "render.h" 12 | 13 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 14 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) 15 | #define CR() renderer->cr(renderer) 16 | #define BLANKLINE() renderer->blankline(renderer) 17 | #define LIST_NUMBER_SIZE 20 18 | 19 | // Functions to convert cmark_nodes to groff man strings. 20 | static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, 21 | unsigned char nextc) { 22 | (void)(nextc); 23 | 24 | if (escape == LITERAL) { 25 | cmark_render_code_point(renderer, c); 26 | return; 27 | } 28 | 29 | switch (c) { 30 | case 46: 31 | if (renderer->begin_line) { 32 | cmark_render_ascii(renderer, "\\&."); 33 | } else { 34 | cmark_render_code_point(renderer, c); 35 | } 36 | break; 37 | case 39: 38 | if (renderer->begin_line) { 39 | cmark_render_ascii(renderer, "\\&'"); 40 | } else { 41 | cmark_render_code_point(renderer, c); 42 | } 43 | break; 44 | case 45: 45 | cmark_render_ascii(renderer, "\\-"); 46 | break; 47 | case 92: 48 | cmark_render_ascii(renderer, "\\e"); 49 | break; 50 | case 8216: // left single quote 51 | cmark_render_ascii(renderer, "\\[oq]"); 52 | break; 53 | case 8217: // right single quote 54 | cmark_render_ascii(renderer, "\\[cq]"); 55 | break; 56 | case 8220: // left double quote 57 | cmark_render_ascii(renderer, "\\[lq]"); 58 | break; 59 | case 8221: // right double quote 60 | cmark_render_ascii(renderer, "\\[rq]"); 61 | break; 62 | case 8212: // em dash 63 | cmark_render_ascii(renderer, "\\[em]"); 64 | break; 65 | case 8211: // en dash 66 | cmark_render_ascii(renderer, "\\[en]"); 67 | break; 68 | default: 69 | cmark_render_code_point(renderer, c); 70 | } 71 | } 72 | 73 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, 74 | cmark_event_type ev_type, int options) { 75 | cmark_node *tmp; 76 | int list_number; 77 | bool entering = (ev_type == CMARK_EVENT_ENTER); 78 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); 79 | 80 | // avoid unused parameter error: 81 | (void)(options); 82 | 83 | switch (node->type) { 84 | case CMARK_NODE_DOCUMENT: 85 | break; 86 | 87 | case CMARK_NODE_BLOCK_QUOTE: 88 | if (entering) { 89 | CR(); 90 | LIT(".RS"); 91 | CR(); 92 | } else { 93 | CR(); 94 | LIT(".RE"); 95 | CR(); 96 | } 97 | break; 98 | 99 | case CMARK_NODE_LIST: 100 | break; 101 | 102 | case CMARK_NODE_ITEM: 103 | if (entering) { 104 | CR(); 105 | LIT(".IP "); 106 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 107 | LIT("\\[bu] 2"); 108 | } else { 109 | list_number = cmark_node_get_list_start(node->parent); 110 | tmp = node; 111 | while (tmp->prev) { 112 | tmp = tmp->prev; 113 | list_number += 1; 114 | } 115 | char list_number_s[LIST_NUMBER_SIZE]; 116 | snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number); 117 | LIT(list_number_s); 118 | } 119 | CR(); 120 | } else { 121 | CR(); 122 | } 123 | break; 124 | 125 | case CMARK_NODE_HEADING: 126 | if (entering) { 127 | CR(); 128 | LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS"); 129 | CR(); 130 | } else { 131 | CR(); 132 | } 133 | break; 134 | 135 | case CMARK_NODE_CODE_BLOCK: 136 | CR(); 137 | LIT(".IP\n.nf\n\\f[C]\n"); 138 | OUT(cmark_node_get_literal(node), false, NORMAL); 139 | CR(); 140 | LIT("\\f[]\n.fi"); 141 | CR(); 142 | break; 143 | 144 | case CMARK_NODE_HTML_BLOCK: 145 | break; 146 | 147 | case CMARK_NODE_CUSTOM_BLOCK: 148 | CR(); 149 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 150 | false, LITERAL); 151 | CR(); 152 | break; 153 | 154 | case CMARK_NODE_THEMATIC_BREAK: 155 | CR(); 156 | LIT(".PP\n * * * * *"); 157 | CR(); 158 | break; 159 | 160 | case CMARK_NODE_PARAGRAPH: 161 | if (entering) { 162 | // no blank line if first paragraph in list: 163 | if (node->parent && node->parent->type == CMARK_NODE_ITEM && 164 | node->prev == NULL) { 165 | // no blank line or .PP 166 | } else { 167 | CR(); 168 | LIT(".PP"); 169 | CR(); 170 | } 171 | } else { 172 | CR(); 173 | } 174 | break; 175 | 176 | case CMARK_NODE_TEXT: 177 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 178 | break; 179 | 180 | case CMARK_NODE_LINEBREAK: 181 | LIT(".PD 0\n.P\n.PD"); 182 | CR(); 183 | break; 184 | 185 | case CMARK_NODE_SOFTBREAK: 186 | if (options & CMARK_OPT_HARDBREAKS) { 187 | LIT(".PD 0\n.P\n.PD"); 188 | CR(); 189 | } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { 190 | CR(); 191 | } else { 192 | OUT(" ", allow_wrap, LITERAL); 193 | } 194 | break; 195 | 196 | case CMARK_NODE_CODE: 197 | LIT("\\f[C]"); 198 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 199 | LIT("\\f[]"); 200 | break; 201 | 202 | case CMARK_NODE_HTML_INLINE: 203 | break; 204 | 205 | case CMARK_NODE_CUSTOM_INLINE: 206 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 207 | false, LITERAL); 208 | break; 209 | 210 | case CMARK_NODE_STRONG: 211 | if (entering) { 212 | LIT("\\f[B]"); 213 | } else { 214 | LIT("\\f[]"); 215 | } 216 | break; 217 | 218 | case CMARK_NODE_EMPH: 219 | if (entering) { 220 | LIT("\\f[I]"); 221 | } else { 222 | LIT("\\f[]"); 223 | } 224 | break; 225 | 226 | case CMARK_NODE_LINK: 227 | if (!entering) { 228 | LIT(" ("); 229 | OUT(cmark_node_get_url(node), allow_wrap, URL); 230 | LIT(")"); 231 | } 232 | break; 233 | 234 | case CMARK_NODE_IMAGE: 235 | if (entering) { 236 | LIT("[IMAGE: "); 237 | } else { 238 | LIT("]"); 239 | } 240 | break; 241 | 242 | default: 243 | assert(false); 244 | break; 245 | } 246 | 247 | return 1; 248 | } 249 | 250 | char *cmark_render_man(cmark_node *root, int options, int width) { 251 | return cmark_render(root, options, width, S_outc, S_render_node); 252 | } 253 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | go-commonmark 2 | 3 | Copyright (c) 2014 James Adam 4 | 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following 15 | disclaimer in the documentation and/or other materials provided 16 | with the distribution. 17 | 18 | * Neither the name of James Adam nor the names of other 19 | contributors may be used to endorse or promote products derived 20 | from this software without specific prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | ------- 35 | 36 | cmark 37 | 38 | Copyright (c) 2014, John MacFarlane 39 | 40 | All rights reserved. 41 | 42 | Redistribution and use in source and binary forms, with or without 43 | modification, are permitted provided that the following conditions are met: 44 | 45 | * Redistributions of source code must retain the above copyright 46 | notice, this list of conditions and the following disclaimer. 47 | 48 | * Redistributions in binary form must reproduce the above 49 | copyright notice, this list of conditions and the following 50 | disclaimer in the documentation and/or other materials provided 51 | with the distribution. 52 | 53 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 54 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 55 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 56 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 57 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 58 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 59 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 60 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 61 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 62 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 63 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 | 65 | ----- 66 | 67 | houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c, 68 | html_unescape.gperf, html_unescape.h 69 | 70 | derive from https://github.com/vmg/houdini (with some modifications) 71 | 72 | Copyright (C) 2012 Vicent Martí 73 | 74 | Permission is hereby granted, free of charge, to any person obtaining a copy of 75 | this software and associated documentation files (the "Software"), to deal in 76 | the Software without restriction, including without limitation the rights to 77 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 78 | of the Software, and to permit persons to whom the Software is furnished to do 79 | so, subject to the following conditions: 80 | 81 | The above copyright notice and this permission notice shall be included in all 82 | copies or substantial portions of the Software. 83 | 84 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 85 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 86 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 87 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 88 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 89 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 90 | SOFTWARE. 91 | 92 | ----- 93 | 94 | buffer.h, buffer.c, chunk.h 95 | 96 | are derived from code (C) 2012 Github, Inc. 97 | 98 | Permission is hereby granted, free of charge, to any person obtaining a copy of 99 | this software and associated documentation files (the "Software"), to deal in 100 | the Software without restriction, including without limitation the rights to 101 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 102 | of the Software, and to permit persons to whom the Software is furnished to do 103 | so, subject to the following conditions: 104 | 105 | The above copyright notice and this permission notice shall be included in all 106 | copies or substantial portions of the Software. 107 | 108 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 109 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 110 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 111 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 112 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 113 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 114 | SOFTWARE. 115 | 116 | ----- 117 | 118 | utf8.c and utf8.c 119 | 120 | are derived from utf8proc 121 | (), 122 | (C) 2009 Public Software Group e. V., Berlin, Germany. 123 | 124 | Permission is hereby granted, free of charge, to any person obtaining a 125 | copy of this software and associated documentation files (the "Software"), 126 | to deal in the Software without restriction, including without limitation 127 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 128 | and/or sell copies of the Software, and to permit persons to whom the 129 | Software is furnished to do so, subject to the following conditions: 130 | 131 | The above copyright notice and this permission notice shall be included in 132 | all copies or substantial portions of the Software. 133 | 134 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 135 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 136 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 137 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 138 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 139 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 140 | DEALINGS IN THE SOFTWARE. 141 | 142 | -------------------------------------------------------------------------------- /render.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "buffer.h" 3 | #include "chunk.h" 4 | #include "cmark.h" 5 | #include "utf8.h" 6 | #include "render.h" 7 | #include "node.h" 8 | 9 | static CMARK_INLINE void S_cr(cmark_renderer *renderer) { 10 | if (renderer->need_cr < 1) { 11 | renderer->need_cr = 1; 12 | } 13 | } 14 | 15 | static CMARK_INLINE void S_blankline(cmark_renderer *renderer) { 16 | if (renderer->need_cr < 2) { 17 | renderer->need_cr = 2; 18 | } 19 | } 20 | 21 | static void S_out(cmark_renderer *renderer, const char *source, bool wrap, 22 | cmark_escaping escape) { 23 | int length = strlen(source); 24 | unsigned char nextc; 25 | int32_t c; 26 | int i = 0; 27 | int last_nonspace; 28 | int len; 29 | cmark_chunk remainder = cmark_chunk_literal(""); 30 | int k = renderer->buffer->size - 1; 31 | 32 | wrap = wrap && !renderer->no_linebreaks; 33 | 34 | if (renderer->in_tight_list_item && renderer->need_cr > 1) { 35 | renderer->need_cr = 1; 36 | } 37 | while (renderer->need_cr) { 38 | if (k < 0 || renderer->buffer->ptr[k] == '\n') { 39 | k -= 1; 40 | } else { 41 | cmark_strbuf_putc(renderer->buffer, '\n'); 42 | if (renderer->need_cr > 1) { 43 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 44 | renderer->prefix->size); 45 | } 46 | } 47 | renderer->column = 0; 48 | renderer->begin_line = true; 49 | renderer->begin_content = true; 50 | renderer->need_cr -= 1; 51 | } 52 | 53 | while (i < length) { 54 | if (renderer->begin_line) { 55 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 56 | renderer->prefix->size); 57 | // note: this assumes prefix is ascii: 58 | renderer->column = renderer->prefix->size; 59 | } 60 | 61 | len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c); 62 | if (len == -1) { // error condition 63 | return; // return without rendering rest of string 64 | } 65 | nextc = source[i + len]; 66 | if (c == 32 && wrap) { 67 | if (!renderer->begin_line) { 68 | last_nonspace = renderer->buffer->size; 69 | cmark_strbuf_putc(renderer->buffer, ' '); 70 | renderer->column += 1; 71 | renderer->begin_line = false; 72 | renderer->begin_content = false; 73 | // skip following spaces 74 | while (source[i + 1] == ' ') { 75 | i++; 76 | } 77 | // We don't allow breaks that make a digit the first character 78 | // because this causes problems with commonmark output. 79 | if (!cmark_isdigit(source[i + 1])) { 80 | renderer->last_breakable = last_nonspace; 81 | } 82 | } 83 | 84 | } else if (c == 10) { 85 | cmark_strbuf_putc(renderer->buffer, '\n'); 86 | renderer->column = 0; 87 | renderer->begin_line = true; 88 | renderer->begin_content = true; 89 | renderer->last_breakable = 0; 90 | } else if (escape == LITERAL) { 91 | cmark_render_code_point(renderer, c); 92 | renderer->begin_line = false; 93 | // we don't set 'begin_content' to false til we've 94 | // finished parsing a digit. Reason: in commonmark 95 | // we need to escape a potential list marker after 96 | // a digit: 97 | renderer->begin_content = 98 | renderer->begin_content && cmark_isdigit(c) == 1; 99 | } else { 100 | (renderer->outc)(renderer, escape, c, nextc); 101 | renderer->begin_line = false; 102 | renderer->begin_content = 103 | renderer->begin_content && cmark_isdigit(c) == 1; 104 | } 105 | 106 | // If adding the character went beyond width, look for an 107 | // earlier place where the line could be broken: 108 | if (renderer->width > 0 && renderer->column > renderer->width && 109 | !renderer->begin_line && renderer->last_breakable > 0) { 110 | 111 | // copy from last_breakable to remainder 112 | cmark_chunk_set_cstr(renderer->mem, &remainder, 113 | (char *)renderer->buffer->ptr + 114 | renderer->last_breakable + 1); 115 | // truncate at last_breakable 116 | cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable); 117 | // add newline, prefix, and remainder 118 | cmark_strbuf_putc(renderer->buffer, '\n'); 119 | cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, 120 | renderer->prefix->size); 121 | cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len); 122 | renderer->column = renderer->prefix->size + remainder.len; 123 | cmark_chunk_free(renderer->mem, &remainder); 124 | renderer->last_breakable = 0; 125 | renderer->begin_line = false; 126 | renderer->begin_content = false; 127 | } 128 | 129 | i += len; 130 | } 131 | } 132 | 133 | // Assumes no newlines, assumes ascii content: 134 | void cmark_render_ascii(cmark_renderer *renderer, const char *s) { 135 | int origsize = renderer->buffer->size; 136 | cmark_strbuf_puts(renderer->buffer, s); 137 | renderer->column += renderer->buffer->size - origsize; 138 | } 139 | 140 | void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { 141 | cmark_utf8proc_encode_char(c, renderer->buffer); 142 | renderer->column += 1; 143 | } 144 | 145 | char *cmark_render(cmark_node *root, int options, int width, 146 | void (*outc)(cmark_renderer *, cmark_escaping, int32_t, 147 | unsigned char), 148 | int (*render_node)(cmark_renderer *renderer, 149 | cmark_node *node, 150 | cmark_event_type ev_type, int options)) { 151 | cmark_mem *mem = cmark_node_mem(root); 152 | cmark_strbuf pref = CMARK_BUF_INIT(mem); 153 | cmark_strbuf buf = CMARK_BUF_INIT(mem); 154 | cmark_node *cur; 155 | cmark_event_type ev_type; 156 | char *result; 157 | cmark_iter *iter = cmark_iter_new(root); 158 | 159 | cmark_renderer renderer = {mem, &buf, &pref, 0, width, 160 | 0, 0, true, true, false, 161 | false, outc, S_cr, S_blankline, S_out}; 162 | 163 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 164 | cur = cmark_iter_get_node(iter); 165 | if (!render_node(&renderer, cur, ev_type, options)) { 166 | // a false value causes us to skip processing 167 | // the node's contents. this is used for 168 | // autolinks. 169 | cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT); 170 | } 171 | } 172 | 173 | // ensure final newline 174 | if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { 175 | cmark_strbuf_putc(renderer.buffer, '\n'); 176 | } 177 | 178 | result = (char *)cmark_strbuf_detach(renderer.buffer); 179 | 180 | cmark_iter_free(iter); 181 | cmark_strbuf_free(renderer.prefix); 182 | cmark_strbuf_free(renderer.buffer); 183 | 184 | return result; 185 | } 186 | -------------------------------------------------------------------------------- /buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "config.h" 11 | #include "cmark_ctype.h" 12 | #include "buffer.h" 13 | #include "memory.h" 14 | 15 | /* Used as default value for cmark_strbuf->ptr so that people can always 16 | * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. 17 | */ 18 | unsigned char cmark_strbuf__initbuf[1]; 19 | 20 | #ifndef MIN 21 | #define MIN(x, y) ((x < y) ? x : y) 22 | #endif 23 | 24 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, 25 | bufsize_t initial_size) { 26 | buf->mem = mem; 27 | buf->asize = 0; 28 | buf->size = 0; 29 | buf->ptr = cmark_strbuf__initbuf; 30 | 31 | if (initial_size > 0) 32 | cmark_strbuf_grow(buf, initial_size); 33 | } 34 | 35 | static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { 36 | cmark_strbuf_grow(buf, buf->size + add); 37 | } 38 | 39 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { 40 | assert(target_size > 0); 41 | 42 | if (target_size < buf->asize) 43 | return; 44 | 45 | if (target_size > (bufsize_t)(INT32_MAX / 2)) { 46 | fprintf(stderr, 47 | "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n", 48 | (INT32_MAX / 2)); 49 | abort(); 50 | } 51 | 52 | /* Oversize the buffer by 50% to guarantee amortized linear time 53 | * complexity on append operations. */ 54 | bufsize_t new_size = target_size + target_size / 2; 55 | new_size += 1; 56 | new_size = (new_size + 7) & ~7; 57 | 58 | buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, 59 | new_size); 60 | buf->asize = new_size; 61 | } 62 | 63 | bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } 64 | 65 | void cmark_strbuf_free(cmark_strbuf *buf) { 66 | if (!buf) 67 | return; 68 | 69 | if (buf->ptr != cmark_strbuf__initbuf) 70 | buf->mem->free(buf->ptr); 71 | 72 | cmark_strbuf_init(buf->mem, buf, 0); 73 | } 74 | 75 | void cmark_strbuf_clear(cmark_strbuf *buf) { 76 | buf->size = 0; 77 | 78 | if (buf->asize > 0) 79 | buf->ptr[0] = '\0'; 80 | } 81 | 82 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, 83 | bufsize_t len) { 84 | if (len <= 0 || data == NULL) { 85 | cmark_strbuf_clear(buf); 86 | } else { 87 | if (data != buf->ptr) { 88 | if (len >= buf->asize) 89 | cmark_strbuf_grow(buf, len); 90 | memmove(buf->ptr, data, len); 91 | } 92 | buf->size = len; 93 | buf->ptr[buf->size] = '\0'; 94 | } 95 | } 96 | 97 | void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { 98 | cmark_strbuf_set(buf, (const unsigned char *)string, 99 | string ? strlen(string) : 0); 100 | } 101 | 102 | void cmark_strbuf_putc(cmark_strbuf *buf, int c) { 103 | S_strbuf_grow_by(buf, 1); 104 | buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); 105 | buf->ptr[buf->size] = '\0'; 106 | } 107 | 108 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, 109 | bufsize_t len) { 110 | if (len <= 0) 111 | return; 112 | 113 | S_strbuf_grow_by(buf, len); 114 | memmove(buf->ptr + buf->size, data, len); 115 | buf->size += len; 116 | buf->ptr[buf->size] = '\0'; 117 | } 118 | 119 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { 120 | cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); 121 | } 122 | 123 | void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, 124 | const cmark_strbuf *buf) { 125 | bufsize_t copylen; 126 | 127 | assert(buf); 128 | if (!data || datasize <= 0) 129 | return; 130 | 131 | data[0] = '\0'; 132 | 133 | if (buf->size == 0 || buf->asize <= 0) 134 | return; 135 | 136 | copylen = buf->size; 137 | if (copylen > datasize - 1) 138 | copylen = datasize - 1; 139 | memmove(data, buf->ptr, copylen); 140 | data[copylen] = '\0'; 141 | } 142 | 143 | void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) { 144 | cmark_strbuf t = *buf_a; 145 | *buf_a = *buf_b; 146 | *buf_b = t; 147 | } 148 | 149 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { 150 | unsigned char *data = buf->ptr; 151 | 152 | if (buf->asize == 0) { 153 | /* return an empty string */ 154 | return (unsigned char *)buf->mem->calloc(1, 1); 155 | } 156 | 157 | cmark_strbuf_init(buf->mem, buf, 0); 158 | return data; 159 | } 160 | 161 | int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { 162 | int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); 163 | return (result != 0) ? result 164 | : (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; 165 | } 166 | 167 | bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { 168 | if (pos >= buf->size) 169 | return -1; 170 | if (pos < 0) 171 | pos = 0; 172 | 173 | const unsigned char *p = 174 | (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); 175 | if (!p) 176 | return -1; 177 | 178 | return (bufsize_t)(p - (const unsigned char *)buf->ptr); 179 | } 180 | 181 | bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { 182 | if (pos < 0 || buf->size == 0) 183 | return -1; 184 | if (pos >= buf->size) 185 | pos = buf->size - 1; 186 | 187 | bufsize_t i; 188 | for (i = pos; i >= 0; i--) { 189 | if (buf->ptr[i] == (unsigned char)c) 190 | return i; 191 | } 192 | 193 | return -1; 194 | } 195 | 196 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { 197 | if (len < 0) 198 | len = 0; 199 | 200 | if (len < buf->size) { 201 | buf->size = len; 202 | buf->ptr[buf->size] = '\0'; 203 | } 204 | } 205 | 206 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { 207 | if (n > 0) { 208 | if (n > buf->size) 209 | n = buf->size; 210 | buf->size = buf->size - n; 211 | if (buf->size) 212 | memmove(buf->ptr, buf->ptr + n, buf->size); 213 | 214 | buf->ptr[buf->size] = '\0'; 215 | } 216 | } 217 | 218 | void cmark_strbuf_rtrim(cmark_strbuf *buf) { 219 | if (!buf->size) 220 | return; 221 | 222 | while (buf->size > 0) { 223 | if (!cmark_isspace(buf->ptr[buf->size - 1])) 224 | break; 225 | 226 | buf->size--; 227 | } 228 | 229 | buf->ptr[buf->size] = '\0'; 230 | } 231 | 232 | void cmark_strbuf_trim(cmark_strbuf *buf) { 233 | bufsize_t i = 0; 234 | 235 | if (!buf->size) 236 | return; 237 | 238 | while (i < buf->size && cmark_isspace(buf->ptr[i])) 239 | i++; 240 | 241 | cmark_strbuf_drop(buf, i); 242 | 243 | cmark_strbuf_rtrim(buf); 244 | } 245 | 246 | // Destructively modify string, collapsing consecutive 247 | // space and newline characters into a single space. 248 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { 249 | bool last_char_was_space = false; 250 | bufsize_t r, w; 251 | 252 | for (r = 0, w = 0; r < s->size; ++r) { 253 | if (cmark_isspace(s->ptr[r])) { 254 | if (!last_char_was_space) { 255 | s->ptr[w++] = ' '; 256 | last_char_was_space = true; 257 | } 258 | } else { 259 | s->ptr[w++] = s->ptr[r]; 260 | last_char_was_space = false; 261 | } 262 | } 263 | 264 | cmark_strbuf_truncate(s, w); 265 | } 266 | 267 | // Destructively unescape a string: remove backslashes before punctuation chars. 268 | extern void cmark_strbuf_unescape(cmark_strbuf *buf) { 269 | bufsize_t r, w; 270 | 271 | for (r = 0, w = 0; r < buf->size; ++r) { 272 | if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) 273 | r++; 274 | 275 | buf->ptr[w++] = buf->ptr[r]; 276 | } 277 | 278 | cmark_strbuf_truncate(buf, w); 279 | } 280 | -------------------------------------------------------------------------------- /scanners.re: -------------------------------------------------------------------------------- 1 | #include 2 | #include "chunk.h" 3 | #include "scanners.h" 4 | 5 | bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) 6 | { 7 | bufsize_t res; 8 | unsigned char *ptr = (unsigned char *)c->data; 9 | 10 | if (ptr == NULL || offset > c->len) { 11 | return 0; 12 | } else { 13 | unsigned char lim = ptr[c->len]; 14 | 15 | ptr[c->len] = '\0'; 16 | res = scanner(ptr + offset); 17 | ptr[c->len] = lim; 18 | } 19 | 20 | return res; 21 | } 22 | 23 | /*!re2c 24 | re2c:define:YYCTYPE = "unsigned char"; 25 | re2c:define:YYCURSOR = p; 26 | re2c:define:YYMARKER = marker; 27 | re2c:define:YYCTXMARKER = marker; 28 | re2c:yyfill:enable = 0; 29 | 30 | wordchar = [^\x00-\x20]; 31 | 32 | spacechar = [ \t\v\f\r\n]; 33 | 34 | reg_char = [^\\()\x00-\x20]; 35 | 36 | escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-]; 37 | 38 | tagname = [A-Za-z][A-Za-z0-9-]*; 39 | 40 | blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'meta'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; 41 | 42 | attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*; 43 | 44 | unquotedvalue = [^"'=<>`\x00]+; 45 | singlequotedvalue = ['][^'\x00]*[']; 46 | doublequotedvalue = ["][^"\x00]*["]; 47 | 48 | attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue; 49 | 50 | attributevaluespec = spacechar* [=] spacechar* attributevalue; 51 | 52 | attribute = spacechar+ attributename attributevaluespec?; 53 | 54 | opentag = tagname attribute* spacechar* [/]? [>]; 55 | closetag = [/] tagname spacechar* [>]; 56 | 57 | htmlcomment = "!---->" | ("!--" ([-]? [^\x00>-]) ([-]? [^\x00-])* "-->"); 58 | 59 | processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00] | [>])* "?>"; 60 | 61 | declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">"; 62 | 63 | cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>"; 64 | 65 | htmltag = opentag | closetag | htmlcomment | processinginstruction | 66 | declaration | cdata; 67 | 68 | in_parens_nosp = [(] (reg_char|escaped_char|[\\])* [)]; 69 | 70 | in_double_quotes = ["] (escaped_char|[^"\x00])* ["]; 71 | in_single_quotes = ['] (escaped_char|[^'\x00])* [']; 72 | in_parens = [(] (escaped_char|[^)\x00])* [)]; 73 | 74 | scheme = [A-Za-z][A-Za-z0-9.+-]{1,31}; 75 | */ 76 | 77 | // Try to match a scheme including colon. 78 | bufsize_t _scan_scheme(const unsigned char *p) 79 | { 80 | const unsigned char *marker = NULL; 81 | const unsigned char *start = p; 82 | /*!re2c 83 | scheme [:] { return (bufsize_t)(p - start); } 84 | * { return 0; } 85 | */ 86 | } 87 | 88 | // Try to match URI autolink after first <, returning number of chars matched. 89 | bufsize_t _scan_autolink_uri(const unsigned char *p) 90 | { 91 | const unsigned char *marker = NULL; 92 | const unsigned char *start = p; 93 | /*!re2c 94 | scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); } 95 | * { return 0; } 96 | */ 97 | } 98 | 99 | // Try to match email autolink after first <, returning num of chars matched. 100 | bufsize_t _scan_autolink_email(const unsigned char *p) 101 | { 102 | const unsigned char *marker = NULL; 103 | const unsigned char *start = p; 104 | /*!re2c 105 | [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ 106 | [@] 107 | [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? 108 | ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* 109 | [>] { return (bufsize_t)(p - start); } 110 | * { return 0; } 111 | */ 112 | } 113 | 114 | // Try to match an HTML tag after first <, returning num of chars matched. 115 | bufsize_t _scan_html_tag(const unsigned char *p) 116 | { 117 | const unsigned char *marker = NULL; 118 | const unsigned char *start = p; 119 | /*!re2c 120 | htmltag { return (bufsize_t)(p - start); } 121 | * { return 0; } 122 | */ 123 | } 124 | 125 | // Try to match an HTML block tag start line, returning 126 | // an integer code for the type of block (1-6, matching the spec). 127 | // #7 is handled by a separate function, below. 128 | bufsize_t _scan_html_block_start(const unsigned char *p) 129 | { 130 | const unsigned char *marker = NULL; 131 | /*!re2c 132 | [<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; } 133 | '' { return (bufsize_t)(p - start); } 171 | * { return 0; } 172 | */ 173 | } 174 | 175 | // Try to match an HTML block end line of type 3 176 | bufsize_t _scan_html_block_end_3(const unsigned char *p) 177 | { 178 | const unsigned char *marker = NULL; 179 | const unsigned char *start = p; 180 | /*!re2c 181 | [^\n\x00]* '?>' { return (bufsize_t)(p - start); } 182 | * { return 0; } 183 | */ 184 | } 185 | 186 | // Try to match an HTML block end line of type 4 187 | bufsize_t _scan_html_block_end_4(const unsigned char *p) 188 | { 189 | const unsigned char *marker = NULL; 190 | const unsigned char *start = p; 191 | /*!re2c 192 | [^\n\x00]* '>' { return (bufsize_t)(p - start); } 193 | * { return 0; } 194 | */ 195 | } 196 | 197 | // Try to match an HTML block end line of type 5 198 | bufsize_t _scan_html_block_end_5(const unsigned char *p) 199 | { 200 | const unsigned char *marker = NULL; 201 | const unsigned char *start = p; 202 | /*!re2c 203 | [^\n\x00]* ']]>' { return (bufsize_t)(p - start); } 204 | * { return 0; } 205 | */ 206 | } 207 | 208 | // Try to match a link title (in single quotes, in double quotes, or 209 | // in parentheses), returning number of chars matched. Allow one 210 | // level of internal nesting (quotes within quotes). 211 | bufsize_t _scan_link_title(const unsigned char *p) 212 | { 213 | const unsigned char *marker = NULL; 214 | const unsigned char *start = p; 215 | /*!re2c 216 | ["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); } 217 | ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); } 218 | [(] (escaped_char|[^)\x00])* [)] { return (bufsize_t)(p - start); } 219 | * { return 0; } 220 | */ 221 | } 222 | 223 | // Match space characters, including newlines. 224 | bufsize_t _scan_spacechars(const unsigned char *p) 225 | { 226 | const unsigned char *start = p; \ 227 | /*!re2c 228 | [ \t\v\f\r\n]+ { return (bufsize_t)(p - start); } 229 | * { return 0; } 230 | */ 231 | } 232 | 233 | // Match ATX heading start. 234 | bufsize_t _scan_atx_heading_start(const unsigned char *p) 235 | { 236 | const unsigned char *marker = NULL; 237 | const unsigned char *start = p; 238 | /*!re2c 239 | [#]{1,6} ([ \t]+|[\r\n]) { return (bufsize_t)(p - start); } 240 | * { return 0; } 241 | */ 242 | } 243 | 244 | // Match setext heading line. Return 1 for level-1 heading, 245 | // 2 for level-2, 0 for no match. 246 | bufsize_t _scan_setext_heading_line(const unsigned char *p) 247 | { 248 | const unsigned char *marker = NULL; 249 | /*!re2c 250 | [=]+ [ \t]* [\r\n] { return 1; } 251 | [-]+ [ \t]* [\r\n] { return 2; } 252 | * { return 0; } 253 | */ 254 | } 255 | 256 | // Scan a thematic break line: "...three or more hyphens, asterisks, 257 | // or underscores on a line by themselves. If you wish, you may use 258 | // spaces between the hyphens or asterisks." 259 | bufsize_t _scan_thematic_break(const unsigned char *p) 260 | { 261 | const unsigned char *marker = NULL; 262 | const unsigned char *start = p; 263 | /*!re2c 264 | ([*][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } 265 | ([_][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } 266 | ([-][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } 267 | * { return 0; } 268 | */ 269 | } 270 | 271 | // Scan an opening code fence. 272 | bufsize_t _scan_open_code_fence(const unsigned char *p) 273 | { 274 | const unsigned char *marker = NULL; 275 | const unsigned char *start = p; 276 | /*!re2c 277 | [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } 278 | [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } 279 | * { return 0; } 280 | */ 281 | } 282 | 283 | // Scan a closing code fence with length at least len. 284 | bufsize_t _scan_close_code_fence(const unsigned char *p) 285 | { 286 | const unsigned char *marker = NULL; 287 | const unsigned char *start = p; 288 | /*!re2c 289 | [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } 290 | [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } 291 | * { return 0; } 292 | */ 293 | } 294 | 295 | // Scans an entity. 296 | // Returns number of chars matched. 297 | bufsize_t _scan_entity(const unsigned char *p) 298 | { 299 | const unsigned char *marker = NULL; 300 | const unsigned char *start = p; 301 | /*!re2c 302 | [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] 303 | { return (bufsize_t)(p - start); } 304 | * { return 0; } 305 | */ 306 | } 307 | 308 | // Returns positive value if a URL begins in a way that is potentially 309 | // dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0. 310 | bufsize_t _scan_dangerous_url(const unsigned char *p) 311 | { 312 | const unsigned char *marker = NULL; 313 | const unsigned char *start = p; 314 | /*!re2c 315 | 'data:image/' ('png'|'gif'|'jpeg'|'webp') { return 0; } 316 | 'javascript:' | 'vbscript:' | 'file:' | 'data:' { return (bufsize_t)(p - start); } 317 | * { return 0; } 318 | */ 319 | } 320 | 321 | -------------------------------------------------------------------------------- /html.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "cmark_ctype.h" 6 | #include "config.h" 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | #include "houdini.h" 11 | #include "scanners.h" 12 | 13 | #define BUFFER_SIZE 100 14 | 15 | // Functions to convert cmark_nodes to HTML strings. 16 | 17 | static void escape_html(cmark_strbuf *dest, const unsigned char *source, 18 | bufsize_t length) { 19 | houdini_escape_html0(dest, source, length, 0); 20 | } 21 | 22 | static CMARK_INLINE void cr(cmark_strbuf *html) { 23 | if (html->size && html->ptr[html->size - 1] != '\n') 24 | cmark_strbuf_putc(html, '\n'); 25 | } 26 | 27 | struct render_state { 28 | cmark_strbuf *html; 29 | cmark_node *plain; 30 | }; 31 | 32 | static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html, 33 | int options) { 34 | char buffer[BUFFER_SIZE]; 35 | if (CMARK_OPT_SOURCEPOS & options) { 36 | snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"", 37 | cmark_node_get_start_line(node), cmark_node_get_start_column(node), 38 | cmark_node_get_end_line(node), cmark_node_get_end_column(node)); 39 | cmark_strbuf_puts(html, buffer); 40 | } 41 | } 42 | 43 | static int S_render_node(cmark_node *node, cmark_event_type ev_type, 44 | struct render_state *state, int options) { 45 | cmark_node *parent; 46 | cmark_node *grandparent; 47 | cmark_strbuf *html = state->html; 48 | char start_heading[] = "plain == node) { // back at original node 56 | state->plain = NULL; 57 | } 58 | 59 | if (state->plain != NULL) { 60 | switch (node->type) { 61 | case CMARK_NODE_TEXT: 62 | case CMARK_NODE_CODE: 63 | case CMARK_NODE_HTML_INLINE: 64 | escape_html(html, node->as.literal.data, node->as.literal.len); 65 | break; 66 | 67 | case CMARK_NODE_LINEBREAK: 68 | case CMARK_NODE_SOFTBREAK: 69 | cmark_strbuf_putc(html, ' '); 70 | break; 71 | 72 | default: 73 | break; 74 | } 75 | return 1; 76 | } 77 | 78 | switch (node->type) { 79 | case CMARK_NODE_DOCUMENT: 80 | break; 81 | 82 | case CMARK_NODE_BLOCK_QUOTE: 83 | if (entering) { 84 | cr(html); 85 | cmark_strbuf_puts(html, "\n"); 88 | } else { 89 | cr(html); 90 | cmark_strbuf_puts(html, "\n"); 91 | } 92 | break; 93 | 94 | case CMARK_NODE_LIST: { 95 | cmark_list_type list_type = node->as.list.list_type; 96 | int start = node->as.list.start; 97 | 98 | if (entering) { 99 | cr(html); 100 | if (list_type == CMARK_BULLET_LIST) { 101 | cmark_strbuf_puts(html, "\n"); 104 | } else if (start == 1) { 105 | cmark_strbuf_puts(html, "\n"); 108 | } else { 109 | snprintf(buffer, BUFFER_SIZE, "
    \n"); 113 | } 114 | } else { 115 | cmark_strbuf_puts(html, 116 | list_type == CMARK_BULLET_LIST ? "\n" : "
\n"); 117 | } 118 | break; 119 | } 120 | 121 | case CMARK_NODE_ITEM: 122 | if (entering) { 123 | cr(html); 124 | cmark_strbuf_puts(html, "'); 127 | } else { 128 | cmark_strbuf_puts(html, "\n"); 129 | } 130 | break; 131 | 132 | case CMARK_NODE_HEADING: 133 | if (entering) { 134 | cr(html); 135 | start_heading[2] = (char)('0' + node->as.heading.level); 136 | cmark_strbuf_puts(html, start_heading); 137 | S_render_sourcepos(node, html, options); 138 | cmark_strbuf_putc(html, '>'); 139 | } else { 140 | end_heading[3] = (char)('0' + node->as.heading.level); 141 | cmark_strbuf_puts(html, end_heading); 142 | cmark_strbuf_puts(html, ">\n"); 143 | } 144 | break; 145 | 146 | case CMARK_NODE_CODE_BLOCK: 147 | cr(html); 148 | 149 | if (node->as.code.info.len == 0) { 150 | cmark_strbuf_puts(html, ""); 153 | } else { 154 | bufsize_t first_tag = 0; 155 | while (first_tag < node->as.code.info.len && 156 | !cmark_isspace(node->as.code.info.data[first_tag])) { 157 | first_tag += 1; 158 | } 159 | 160 | cmark_strbuf_puts(html, "as.code.info.data, first_tag); 164 | cmark_strbuf_puts(html, "\">"); 165 | } 166 | 167 | escape_html(html, node->as.code.literal.data, node->as.code.literal.len); 168 | cmark_strbuf_puts(html, "\n"); 169 | break; 170 | 171 | case CMARK_NODE_HTML_BLOCK: 172 | cr(html); 173 | if (options & CMARK_OPT_SAFE) { 174 | cmark_strbuf_puts(html, ""); 175 | } else { 176 | cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); 177 | } 178 | cr(html); 179 | break; 180 | 181 | case CMARK_NODE_CUSTOM_BLOCK: 182 | cr(html); 183 | if (entering) { 184 | cmark_strbuf_put(html, node->as.custom.on_enter.data, 185 | node->as.custom.on_enter.len); 186 | } else { 187 | cmark_strbuf_put(html, node->as.custom.on_exit.data, 188 | node->as.custom.on_exit.len); 189 | } 190 | cr(html); 191 | break; 192 | 193 | case CMARK_NODE_THEMATIC_BREAK: 194 | cr(html); 195 | cmark_strbuf_puts(html, "\n"); 198 | break; 199 | 200 | case CMARK_NODE_PARAGRAPH: 201 | parent = cmark_node_parent(node); 202 | grandparent = cmark_node_parent(parent); 203 | if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) { 204 | tight = grandparent->as.list.tight; 205 | } else { 206 | tight = false; 207 | } 208 | if (!tight) { 209 | if (entering) { 210 | cr(html); 211 | cmark_strbuf_puts(html, "'); 214 | } else { 215 | cmark_strbuf_puts(html, "

\n"); 216 | } 217 | } 218 | break; 219 | 220 | case CMARK_NODE_TEXT: 221 | escape_html(html, node->as.literal.data, node->as.literal.len); 222 | break; 223 | 224 | case CMARK_NODE_LINEBREAK: 225 | cmark_strbuf_puts(html, "
\n"); 226 | break; 227 | 228 | case CMARK_NODE_SOFTBREAK: 229 | if (options & CMARK_OPT_HARDBREAKS) { 230 | cmark_strbuf_puts(html, "
\n"); 231 | } else if (options & CMARK_OPT_NOBREAKS) { 232 | cmark_strbuf_putc(html, ' '); 233 | } else { 234 | cmark_strbuf_putc(html, '\n'); 235 | } 236 | break; 237 | 238 | case CMARK_NODE_CODE: 239 | cmark_strbuf_puts(html, ""); 240 | escape_html(html, node->as.literal.data, node->as.literal.len); 241 | cmark_strbuf_puts(html, ""); 242 | break; 243 | 244 | case CMARK_NODE_HTML_INLINE: 245 | if (options & CMARK_OPT_SAFE) { 246 | cmark_strbuf_puts(html, ""); 247 | } else { 248 | cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); 249 | } 250 | break; 251 | 252 | case CMARK_NODE_CUSTOM_INLINE: 253 | if (entering) { 254 | cmark_strbuf_put(html, node->as.custom.on_enter.data, 255 | node->as.custom.on_enter.len); 256 | } else { 257 | cmark_strbuf_put(html, node->as.custom.on_exit.data, 258 | node->as.custom.on_exit.len); 259 | } 260 | break; 261 | 262 | case CMARK_NODE_STRONG: 263 | if (entering) { 264 | cmark_strbuf_puts(html, ""); 265 | } else { 266 | cmark_strbuf_puts(html, ""); 267 | } 268 | break; 269 | 270 | case CMARK_NODE_EMPH: 271 | if (entering) { 272 | cmark_strbuf_puts(html, ""); 273 | } else { 274 | cmark_strbuf_puts(html, ""); 275 | } 276 | break; 277 | 278 | case CMARK_NODE_LINK: 279 | if (entering) { 280 | cmark_strbuf_puts(html, "as.link.url, 0))) { 283 | houdini_escape_href(html, node->as.link.url.data, 284 | node->as.link.url.len); 285 | } 286 | if (node->as.link.title.len) { 287 | cmark_strbuf_puts(html, "\" title=\""); 288 | escape_html(html, node->as.link.title.data, node->as.link.title.len); 289 | } 290 | cmark_strbuf_puts(html, "\">"); 291 | } else { 292 | cmark_strbuf_puts(html, ""); 293 | } 294 | break; 295 | 296 | case CMARK_NODE_IMAGE: 297 | if (entering) { 298 | cmark_strbuf_puts(html, "as.link.url, 0))) { 301 | houdini_escape_href(html, node->as.link.url.data, 302 | node->as.link.url.len); 303 | } 304 | cmark_strbuf_puts(html, "\" alt=\""); 305 | state->plain = node; 306 | } else { 307 | if (node->as.link.title.len) { 308 | cmark_strbuf_puts(html, "\" title=\""); 309 | escape_html(html, node->as.link.title.data, node->as.link.title.len); 310 | } 311 | 312 | cmark_strbuf_puts(html, "\" />"); 313 | } 314 | break; 315 | 316 | default: 317 | assert(false); 318 | break; 319 | } 320 | 321 | // cmark_strbuf_putc(html, 'x'); 322 | return 1; 323 | } 324 | 325 | char *cmark_render_html(cmark_node *root, int options) { 326 | char *result; 327 | cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root)); 328 | cmark_event_type ev_type; 329 | cmark_node *cur; 330 | struct render_state state = {&html, NULL}; 331 | cmark_iter *iter = cmark_iter_new(root); 332 | 333 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 334 | cur = cmark_iter_get_node(iter); 335 | S_render_node(cur, ev_type, &state, options); 336 | } 337 | result = (char *)cmark_strbuf_detach(&html); 338 | 339 | cmark_iter_free(iter); 340 | return result; 341 | } 342 | -------------------------------------------------------------------------------- /utf8.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cmark_ctype.h" 6 | #include "utf8.h" 7 | 8 | static const int8_t utf8proc_utf8class[256] = { 9 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 10 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 13 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 18 | 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 19 | 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0}; 20 | 21 | static void encode_unknown(cmark_strbuf *buf) { 22 | static const uint8_t repl[] = {239, 191, 189}; 23 | cmark_strbuf_put(buf, repl, 3); 24 | } 25 | 26 | static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) { 27 | int length, i; 28 | 29 | if (!str_len) 30 | return 0; 31 | 32 | length = utf8proc_utf8class[str[0]]; 33 | 34 | if (!length) 35 | return -1; 36 | 37 | if (str_len >= 0 && (bufsize_t)length > str_len) 38 | return -str_len; 39 | 40 | for (i = 1; i < length; i++) { 41 | if ((str[i] & 0xC0) != 0x80) 42 | return -i; 43 | } 44 | 45 | return length; 46 | } 47 | 48 | // Validate a single UTF-8 character according to RFC 3629. 49 | static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) { 50 | int length = utf8proc_utf8class[str[0]]; 51 | 52 | if (!length) 53 | return -1; 54 | 55 | if ((bufsize_t)length > str_len) 56 | return -str_len; 57 | 58 | switch (length) { 59 | case 2: 60 | if ((str[1] & 0xC0) != 0x80) 61 | return -1; 62 | if (str[0] < 0xC2) { 63 | // Overlong 64 | return -length; 65 | } 66 | break; 67 | 68 | case 3: 69 | if ((str[1] & 0xC0) != 0x80) 70 | return -1; 71 | if ((str[2] & 0xC0) != 0x80) 72 | return -2; 73 | if (str[0] == 0xE0) { 74 | if (str[1] < 0xA0) { 75 | // Overlong 76 | return -length; 77 | } 78 | } else if (str[0] == 0xED) { 79 | if (str[1] >= 0xA0) { 80 | // Surrogate 81 | return -length; 82 | } 83 | } 84 | break; 85 | 86 | case 4: 87 | if ((str[1] & 0xC0) != 0x80) 88 | return -1; 89 | if ((str[2] & 0xC0) != 0x80) 90 | return -2; 91 | if ((str[3] & 0xC0) != 0x80) 92 | return -3; 93 | if (str[0] == 0xF0) { 94 | if (str[1] < 0x90) { 95 | // Overlong 96 | return -length; 97 | } 98 | } else if (str[0] >= 0xF4) { 99 | if (str[0] > 0xF4 || str[1] >= 0x90) { 100 | // Above 0x10FFFF 101 | return -length; 102 | } 103 | } 104 | break; 105 | } 106 | 107 | return length; 108 | } 109 | 110 | void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line, 111 | bufsize_t size) { 112 | bufsize_t i = 0; 113 | 114 | while (i < size) { 115 | bufsize_t org = i; 116 | int charlen = 0; 117 | 118 | while (i < size) { 119 | if (line[i] < 0x80 && line[i] != 0) { 120 | i++; 121 | } else if (line[i] >= 0x80) { 122 | charlen = utf8proc_valid(line + i, size - i); 123 | if (charlen < 0) { 124 | charlen = -charlen; 125 | break; 126 | } 127 | i += charlen; 128 | } else if (line[i] == 0) { 129 | // ASCII NUL is technically valid but rejected 130 | // for security reasons. 131 | charlen = 1; 132 | break; 133 | } 134 | } 135 | 136 | if (i > org) { 137 | cmark_strbuf_put(ob, line + org, i - org); 138 | } 139 | 140 | if (i >= size) { 141 | break; 142 | } else { 143 | // Invalid UTF-8 144 | encode_unknown(ob); 145 | i += charlen; 146 | } 147 | } 148 | } 149 | 150 | int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, 151 | int32_t *dst) { 152 | int length; 153 | int32_t uc = -1; 154 | 155 | *dst = -1; 156 | length = utf8proc_charlen(str, str_len); 157 | if (length < 0) 158 | return -1; 159 | 160 | switch (length) { 161 | case 1: 162 | uc = str[0]; 163 | break; 164 | case 2: 165 | uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); 166 | if (uc < 0x80) 167 | uc = -1; 168 | break; 169 | case 3: 170 | uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F); 171 | if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) 172 | uc = -1; 173 | break; 174 | case 4: 175 | uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) + 176 | ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); 177 | if (uc < 0x10000 || uc >= 0x110000) 178 | uc = -1; 179 | break; 180 | } 181 | 182 | if (uc < 0) 183 | return -1; 184 | 185 | *dst = uc; 186 | return length; 187 | } 188 | 189 | void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) { 190 | uint8_t dst[4]; 191 | bufsize_t len = 0; 192 | 193 | assert(uc >= 0); 194 | 195 | if (uc < 0x80) { 196 | dst[0] = (uint8_t)(uc); 197 | len = 1; 198 | } else if (uc < 0x800) { 199 | dst[0] = (uint8_t)(0xC0 + (uc >> 6)); 200 | dst[1] = 0x80 + (uc & 0x3F); 201 | len = 2; 202 | } else if (uc == 0xFFFF) { 203 | dst[0] = 0xFF; 204 | len = 1; 205 | } else if (uc == 0xFFFE) { 206 | dst[0] = 0xFE; 207 | len = 1; 208 | } else if (uc < 0x10000) { 209 | dst[0] = (uint8_t)(0xE0 + (uc >> 12)); 210 | dst[1] = 0x80 + ((uc >> 6) & 0x3F); 211 | dst[2] = 0x80 + (uc & 0x3F); 212 | len = 3; 213 | } else if (uc < 0x110000) { 214 | dst[0] = (uint8_t)(0xF0 + (uc >> 18)); 215 | dst[1] = 0x80 + ((uc >> 12) & 0x3F); 216 | dst[2] = 0x80 + ((uc >> 6) & 0x3F); 217 | dst[3] = 0x80 + (uc & 0x3F); 218 | len = 4; 219 | } else { 220 | encode_unknown(buf); 221 | return; 222 | } 223 | 224 | cmark_strbuf_put(buf, dst, len); 225 | } 226 | 227 | void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, 228 | bufsize_t len) { 229 | int32_t c; 230 | 231 | #define bufpush(x) cmark_utf8proc_encode_char(x, dest) 232 | 233 | while (len > 0) { 234 | bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c); 235 | 236 | if (char_len >= 0) { 237 | #include "case_fold_switch.inc" 238 | } else { 239 | encode_unknown(dest); 240 | char_len = -char_len; 241 | } 242 | 243 | str += char_len; 244 | len -= char_len; 245 | } 246 | } 247 | 248 | // matches anything in the Zs class, plus LF, CR, TAB, FF. 249 | int cmark_utf8proc_is_space(int32_t uc) { 250 | return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 || 251 | uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 || 252 | uc == 8287 || uc == 12288); 253 | } 254 | 255 | // matches anything in the P[cdefios] classes. 256 | int cmark_utf8proc_is_punctuation(int32_t uc) { 257 | return ( 258 | (uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 || 259 | uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 || 260 | uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 || 261 | uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 || 262 | uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 || 263 | uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 || 264 | (uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) || 265 | (uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 || 266 | uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 || 267 | uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) || 268 | uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 || 269 | (uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 || 270 | (uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) || 271 | uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 || 272 | (uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 || 273 | (uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) || 274 | (uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 || 275 | uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) || 276 | (uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) || 277 | (uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 || 278 | (uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) || 279 | (uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) || 280 | (uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 || 281 | uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 || 282 | (uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 || 283 | (uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) || 284 | (uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 || 285 | (uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 || 286 | uc == 11632 || (uc >= 11776 && uc <= 11822) || 287 | (uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) || 288 | (uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) || 289 | uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 || 290 | uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 || 291 | uc == 42622 || (uc >= 42738 && uc <= 42743) || 292 | (uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 || 293 | (uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 || 294 | uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 || 295 | uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 || 296 | uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 || 297 | uc == 64831 || (uc >= 65040 && uc <= 65049) || 298 | (uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) || 299 | uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 || 300 | (uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) || 301 | (uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 || 302 | uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) || 303 | uc == 65343 || uc == 65371 || uc == 65373 || 304 | (uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) || 305 | uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 || 306 | uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 || 307 | (uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) || 308 | (uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) || 309 | uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) || 310 | (uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 || 311 | (uc >= 70085 && uc <= 70088) || uc == 70093 || 312 | (uc >= 70200 && uc <= 70205) || uc == 70854 || 313 | (uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) || 314 | (uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 || 315 | uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 || 316 | uc == 113823); 317 | } 318 | -------------------------------------------------------------------------------- /commonmark_test.go: -------------------------------------------------------------------------------- 1 | package commonmark_test 2 | 3 | import ( 4 | "github.com/rhinoman/go-commonmark" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestMd2Html(t *testing.T) { 10 | htmlText := commonmark.Md2Html("Boo\n===", 0) 11 | if htmlText != "

Boo

\n" { 12 | t.Errorf("Html text is not as expected :(") 13 | } 14 | t.Logf("Html Text: %v", htmlText) 15 | } 16 | 17 | func TestCMarkVersion(t *testing.T) { 18 | version := commonmark.CMarkVersion() 19 | t.Logf("\nVersion: %v", version) 20 | } 21 | 22 | func TestCMarkParser(t *testing.T) { 23 | parser := commonmark.NewCmarkParser(commonmark.CMARK_OPT_DEFAULT) 24 | if parser == nil { 25 | t.Error("Parser is nil!") 26 | } 27 | parser.Feed("Boo\n") 28 | parser.Feed("===\n") 29 | document := parser.Finish() 30 | if document == nil { 31 | t.Error("Document is nil!") 32 | } 33 | //Call it twice to make sure it doesn't crash :) 34 | parser.Free() 35 | parser.Free() 36 | htmlText := document.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 37 | if htmlText != "

Boo

\n" { 38 | t.Error("Html text is not as expected :(") 39 | } 40 | t.Logf("Html Text: %v", htmlText) 41 | document.RenderXML(commonmark.CMARK_OPT_DEFAULT) 42 | document.Free() 43 | 44 | document2 := commonmark.ParseDocument("Foobar\n------", 0) 45 | htmlText = document2.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 46 | document2.RenderXML(commonmark.CMARK_OPT_DEFAULT) 47 | if htmlText != "

Foobar

\n" { 48 | t.Error("Html text 2 is not as expected :(") 49 | } 50 | t.Logf("Html Text2: %v", htmlText) 51 | document2.Free() 52 | document2.Free() 53 | } 54 | 55 | func TestParseFile(t *testing.T) { 56 | node, err := commonmark.ParseFile("test_data/test_file.md", 0) 57 | if err != nil { 58 | t.Error(err) 59 | } 60 | if node == nil { 61 | t.Error(err) 62 | } 63 | htmlText := node.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 64 | if htmlText != "

Test File

\n

Description

\n

This is just a test file.

\n" { 65 | t.Error("Html text is not as expected :(") 66 | } 67 | t.Logf("Html Text: %v", htmlText) 68 | node.Free() 69 | //try to parse a non-existent file 70 | eNode, err := commonmark.ParseFile("notafile.md", 0) 71 | if err == nil { 72 | t.Errorf("Should have been an error!") 73 | } 74 | t.Logf("error string: %v", err.Error()) 75 | if eNode != nil { 76 | t.Errorf("Node should be nil!") 77 | } 78 | } 79 | 80 | func TestCMarkNodeOps(t *testing.T) { 81 | root := commonmark.NewCMarkNode(commonmark.CMARK_NODE_DOCUMENT) 82 | if root == nil { 83 | t.Error("Root is nil!") 84 | } 85 | if root.GetNodeType() != commonmark.CMARK_NODE_DOCUMENT { 86 | t.Error("Root is wrong type!") 87 | } 88 | if root.GetNodeTypeString() != "document" { 89 | t.Error("Root is wrong type string!") 90 | } 91 | header1 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_HEADING) 92 | if header1.GetNodeType() != commonmark.CMARK_NODE_HEADING { 93 | t.Error("header1 is wrong type!") 94 | } 95 | header1.SetHeaderLevel(1) 96 | if header1.SetLiteral("boo") != false { 97 | t.Error("SetLiteral should return false for header node") 98 | } 99 | header1str := commonmark.NewCMarkNode(commonmark.CMARK_NODE_TEXT) 100 | header1str.SetLiteral("I'm the main header!") 101 | if header1str.GetLiteral() != "I'm the main header!" { 102 | t.Error("header1str content is wrong!") 103 | } 104 | header1.AppendChild(header1str) 105 | header2 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_HEADING) 106 | header2str := commonmark.NewCMarkNode(commonmark.CMARK_NODE_TEXT) 107 | if header2str.SetLiteral("Another header!") == false { 108 | t.Error("SetLiteral returned false for valid input") 109 | } 110 | header2.AppendChild(header2str) 111 | header2.SetHeaderLevel(2) 112 | if root.PrependChild(header1) == false { 113 | t.Error("Couldn't prepend header to root") 114 | } 115 | root.AppendChild(header2) 116 | //Replace a Node 117 | header3 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_HEADING) 118 | header3str := commonmark.NewCMarkNode(commonmark.CMARK_NODE_TEXT) 119 | header3.AppendChild(header3str) 120 | header3.SetHeaderLevel(2) 121 | if header3str.SetLiteral("Replacement header!") == false { 122 | t.Error("SetLiteral returned false for valid input") 123 | } 124 | if header3.Replace(header2) == false { 125 | t.Error("Couldn't Replace Node!") 126 | } 127 | //Custom nodes 128 | custom := commonmark.NewCMarkNode(commonmark.CMARK_NODE_CUSTOM_BLOCK) 129 | custom.SetOnEnter("ENTER") 130 | custom.SetOnExit("EXIT") 131 | if custom.GetOnEnter() != "ENTER" { 132 | t.Errorf("OnEnter not set correctly: %v", custom.GetOnEnter()) 133 | } 134 | if custom.GetOnExit() != "EXIT" { 135 | t.Errorf("OnExit not set correctly: %v", custom.GetOnExit()) 136 | } 137 | t.Logf("\nXML: %v", root.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 138 | 139 | htmlStr := root.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 140 | if htmlStr != "

I'm the main header!

\n

Replacement header!

\n" { 141 | t.Error("htmlStr is wrong!") 142 | } 143 | t.Logf("Html Text: %v", htmlStr) 144 | //Replace again 145 | if header2.Replace(header3) == false { 146 | t.Error("replaced node was freed prematurely.") 147 | } 148 | //Rearrange... 149 | header1.InsertBefore(header2) 150 | t.Logf("\nXML: %v", root.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 151 | htmlStr = root.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 152 | if htmlStr != "

Another header!

\n

I'm the main header!

\n" { 153 | t.Error("htmlStr is wrong!") 154 | } 155 | t.Logf("Html Text: %v", htmlStr) 156 | //removing something 157 | header2.Unlink() 158 | t.Logf("\nXML: %v", root.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 159 | htmlStr = root.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 160 | if htmlStr != "

I'm the main header!

\n" { 161 | t.Error("htmlStr is wrong!") 162 | } 163 | latexStr := root.RenderLatex(commonmark.CMARK_OPT_DEFAULT, 80) 164 | t.Logf("\nLatex: %v", latexStr) 165 | manStr := root.RenderMan(commonmark.CMARK_OPT_DEFAULT, 80) 166 | t.Logf("\nMAN: %v", manStr) 167 | cmStr := root.RenderCMark(commonmark.CMARK_OPT_DEFAULT, 0) 168 | t.Logf("\nCMARK: %v", cmStr) 169 | root.ConsolidateTextNodes() 170 | t.Logf("\nXML: %v", root.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 171 | root.SetNodeUserData("STRING!") 172 | x := root.GetNodeUserData() 173 | t.Logf("X: %v", x) 174 | if x != "STRING!"{ 175 | t.Error("NodeUserData is wrong!") 176 | } 177 | //header2.Free() 178 | root.Free() 179 | } 180 | 181 | func TestCMarkLists(t *testing.T) { 182 | root := commonmark.NewCMarkNode(commonmark.CMARK_NODE_DOCUMENT) 183 | list := commonmark.NewCMarkNode(commonmark.CMARK_NODE_LIST) 184 | list.SetListType(commonmark.CMARK_ORDERED_LIST) 185 | listItem1 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_ITEM) 186 | listItem2 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_ITEM) 187 | li1para := commonmark.NewCMarkNode(commonmark.CMARK_NODE_PARAGRAPH) 188 | li1str := commonmark.NewCMarkNode(commonmark.CMARK_NODE_TEXT) 189 | li1str.SetLiteral("List Item 1") 190 | li1para.AppendChild(li1str) 191 | if listItem1.AppendChild(li1para) == false { 192 | t.Error("Couldn't append paragraph to list item") 193 | } 194 | list.AppendChild(listItem1) 195 | list.AppendChild(listItem2) 196 | list.SetListTight(true) 197 | root.AppendChild(list) 198 | t.Logf("\nXML: %v", root.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 199 | htmlString := root.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 200 | if htmlString != "
    \n
  1. List Item 1
  2. \n
  3. \n
\n" { 201 | t.Error("htmlString is wrong!") 202 | } 203 | t.Logf("\nHtmlString: \n%v", htmlString) 204 | t.Logf("\nList start: %v", list.GetListStart()) 205 | t.Logf("\nList tight: %v", list.GetListTight()) 206 | root.Free() 207 | } 208 | 209 | func TestCMarkCodeBlocks(t *testing.T) { 210 | root := commonmark.NewCMarkNode(commonmark.CMARK_NODE_DOCUMENT) 211 | cb := commonmark.NewCMarkNode(commonmark.CMARK_NODE_CODE_BLOCK) 212 | cb.SetLiteral("int main(){\n return 0;\n }") 213 | cb.SetFenceInfo("c") 214 | if cb.GetFenceInfo() != "c" { 215 | t.Error("Fence info isn't c") 216 | } 217 | if cb.GetLiteral() != "int main(){\n return 0;\n }" { 218 | t.Error("Code has changed somehow") 219 | } 220 | if root.AppendChild(cb) == false { 221 | t.Error("Couldn't append code block to document") 222 | } 223 | t.Logf("\nXML: %v", root.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 224 | htmlString := root.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 225 | t.Logf("\nHtml String: %v\n", htmlString) 226 | if htmlString != "
int main(){\n return 0;\n }
\n" { 227 | t.Error("htmlString isn't right!") 228 | } 229 | root.Free() 230 | } 231 | 232 | func TestCMarkUrls(t *testing.T) { 233 | root := commonmark.NewCMarkNode(commonmark.CMARK_NODE_DOCUMENT) 234 | para := commonmark.NewCMarkNode(commonmark.CMARK_NODE_PARAGRAPH) 235 | link := commonmark.NewCMarkNode(commonmark.CMARK_NODE_LINK) 236 | root.AppendChild(para) 237 | if para.AppendChild(link) == false { 238 | t.Error("Couldn't append link node to paragraph!") 239 | } 240 | if link.SetUrl("http://duckduckgo.com") == false { 241 | t.Error("Couldn't set URL!!!") 242 | } 243 | if link.GetUrl() != "http://duckduckgo.com" { 244 | t.Error("Url doesn't match") 245 | } 246 | t.Logf("\nXML: %v", root.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 247 | htmlString := root.RenderHtml(commonmark.CMARK_OPT_DEFAULT) 248 | t.Logf("\nHtml String: %v\n", htmlString) 249 | if htmlString != "

\n" { 250 | t.Error("htmlString isn't right!") 251 | } 252 | root.Free() 253 | } 254 | 255 | func TestCMarkIter(t *testing.T) { 256 | root := commonmark.NewCMarkNode(commonmark.CMARK_NODE_DOCUMENT) 257 | list := commonmark.NewCMarkNode(commonmark.CMARK_NODE_LIST) 258 | list.SetListType(commonmark.CMARK_ORDERED_LIST) 259 | listItem1 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_ITEM) 260 | listItem2 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_ITEM) 261 | li1para := commonmark.NewCMarkNode(commonmark.CMARK_NODE_PARAGRAPH) 262 | li1str := commonmark.NewCMarkNode(commonmark.CMARK_NODE_TEXT) 263 | li1str.SetLiteral("List Item 1") 264 | li1para.AppendChild(li1str) 265 | if listItem1.AppendChild(li1para) == false { 266 | t.Error("Couldn't append paragraph to list item") 267 | } 268 | list.AppendChild(listItem1) 269 | list.AppendChild(listItem2) 270 | list.SetListTight(true) 271 | root.AppendChild(list) 272 | t.Logf("\nXML: %v", root.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 273 | iter := commonmark.NewCMarkIter(root) 274 | for { 275 | ne := iter.Next() 276 | t.Logf("NodeEvent: %v", ne) 277 | iNode := iter.GetNode() 278 | if iNode == nil { 279 | t.Error("iter node was nil!") 280 | } 281 | if ne == commonmark.CMARK_EVENT_DONE { 282 | break 283 | } 284 | 285 | } 286 | iter.Reset(listItem2, commonmark.CMARK_EVENT_DONE) 287 | iter.Free() 288 | root.Free() 289 | } 290 | 291 | func createTree() *commonmark.CMarkNode { 292 | root := commonmark.NewCMarkNode(commonmark.CMARK_NODE_DOCUMENT) 293 | header1 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_HEADING) 294 | header2 := commonmark.NewCMarkNode(commonmark.CMARK_NODE_HEADING) 295 | header1str := commonmark.NewCMarkNode(commonmark.CMARK_NODE_TEXT) 296 | header2str := commonmark.NewCMarkNode(commonmark.CMARK_NODE_TEXT) 297 | header1str.SetLiteral("Header 1!") 298 | header2str.SetLiteral("Header 2!") 299 | root.AppendChild(header1) 300 | root.AppendChild(header2) 301 | header1.AppendChild(header1str) 302 | header2.AppendChild(header2str) 303 | return root 304 | 305 | } 306 | 307 | //Checking mem management functions 308 | func TestMem(t *testing.T) { 309 | tree := createTree() 310 | time.Sleep(3 * time.Second) 311 | t.Logf("\nXML: %v", tree.RenderXML(commonmark.CMARK_OPT_DEFAULT)) 312 | iter := commonmark.NewCMarkIter(tree) 313 | i := 1 314 | for { 315 | ne := iter.Next() 316 | t.Logf("NodeEvent: %v", ne) 317 | if ne == commonmark.CMARK_EVENT_DONE { 318 | break 319 | } 320 | i += 1 321 | } 322 | if i < 9 { 323 | t.Errorf("Lost some nodes somewhere: %v", i) 324 | } 325 | tree.Free() 326 | } 327 | -------------------------------------------------------------------------------- /latex.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "config.h" 7 | #include "cmark.h" 8 | #include "node.h" 9 | #include "buffer.h" 10 | #include "utf8.h" 11 | #include "scanners.h" 12 | #include "render.h" 13 | 14 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 15 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) 16 | #define CR() renderer->cr(renderer) 17 | #define BLANKLINE() renderer->blankline(renderer) 18 | #define LIST_NUMBER_STRING_SIZE 20 19 | 20 | static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, 21 | int32_t c, unsigned char nextc) { 22 | if (escape == LITERAL) { 23 | cmark_render_code_point(renderer, c); 24 | return; 25 | } 26 | 27 | switch (c) { 28 | case 123: // '{' 29 | case 125: // '}' 30 | case 35: // '#' 31 | case 37: // '%' 32 | case 38: // '&' 33 | cmark_render_ascii(renderer, "\\"); 34 | cmark_render_code_point(renderer, c); 35 | break; 36 | case 36: // '$' 37 | case 95: // '_' 38 | if (escape == NORMAL) { 39 | cmark_render_ascii(renderer, "\\"); 40 | } 41 | cmark_render_code_point(renderer, c); 42 | break; 43 | case 45: // '-' 44 | if (nextc == 45) { // prevent ligature 45 | cmark_render_ascii(renderer, "-{}"); 46 | } else { 47 | cmark_render_ascii(renderer, "-"); 48 | } 49 | break; 50 | case 126: // '~' 51 | if (escape == NORMAL) { 52 | cmark_render_ascii(renderer, "\\textasciitilde{}"); 53 | } else { 54 | cmark_render_code_point(renderer, c); 55 | } 56 | break; 57 | case 94: // '^' 58 | cmark_render_ascii(renderer, "\\^{}"); 59 | break; 60 | case 92: // '\\' 61 | if (escape == URL) { 62 | // / acts as path sep even on windows: 63 | cmark_render_ascii(renderer, "/"); 64 | } else { 65 | cmark_render_ascii(renderer, "\\textbackslash{}"); 66 | } 67 | break; 68 | case 124: // '|' 69 | cmark_render_ascii(renderer, "\\textbar{}"); 70 | break; 71 | case 60: // '<' 72 | cmark_render_ascii(renderer, "\\textless{}"); 73 | break; 74 | case 62: // '>' 75 | cmark_render_ascii(renderer, "\\textgreater{}"); 76 | break; 77 | case 91: // '[' 78 | case 93: // ']' 79 | cmark_render_ascii(renderer, "{"); 80 | cmark_render_code_point(renderer, c); 81 | cmark_render_ascii(renderer, "}"); 82 | break; 83 | case 34: // '"' 84 | cmark_render_ascii(renderer, "\\textquotedbl{}"); 85 | // requires \usepackage[T1]{fontenc} 86 | break; 87 | case 39: // '\'' 88 | cmark_render_ascii(renderer, "\\textquotesingle{}"); 89 | // requires \usepackage{textcomp} 90 | break; 91 | case 160: // nbsp 92 | cmark_render_ascii(renderer, "~"); 93 | break; 94 | case 8230: // hellip 95 | cmark_render_ascii(renderer, "\\ldots{}"); 96 | break; 97 | case 8216: // lsquo 98 | if (escape == NORMAL) { 99 | cmark_render_ascii(renderer, "`"); 100 | } else { 101 | cmark_render_code_point(renderer, c); 102 | } 103 | break; 104 | case 8217: // rsquo 105 | if (escape == NORMAL) { 106 | cmark_render_ascii(renderer, "\'"); 107 | } else { 108 | cmark_render_code_point(renderer, c); 109 | } 110 | break; 111 | case 8220: // ldquo 112 | if (escape == NORMAL) { 113 | cmark_render_ascii(renderer, "``"); 114 | } else { 115 | cmark_render_code_point(renderer, c); 116 | } 117 | break; 118 | case 8221: // rdquo 119 | if (escape == NORMAL) { 120 | cmark_render_ascii(renderer, "''"); 121 | } else { 122 | cmark_render_code_point(renderer, c); 123 | } 124 | break; 125 | case 8212: // emdash 126 | if (escape == NORMAL) { 127 | cmark_render_ascii(renderer, "---"); 128 | } else { 129 | cmark_render_code_point(renderer, c); 130 | } 131 | break; 132 | case 8211: // endash 133 | if (escape == NORMAL) { 134 | cmark_render_ascii(renderer, "--"); 135 | } else { 136 | cmark_render_code_point(renderer, c); 137 | } 138 | break; 139 | default: 140 | cmark_render_code_point(renderer, c); 141 | } 142 | } 143 | 144 | typedef enum { 145 | NO_LINK, 146 | URL_AUTOLINK, 147 | EMAIL_AUTOLINK, 148 | NORMAL_LINK, 149 | INTERNAL_LINK 150 | } link_type; 151 | 152 | static link_type get_link_type(cmark_node *node) { 153 | size_t title_len, url_len; 154 | cmark_node *link_text; 155 | char *realurl; 156 | int realurllen; 157 | bool isemail = false; 158 | 159 | if (node->type != CMARK_NODE_LINK) { 160 | return NO_LINK; 161 | } 162 | 163 | const char *url = cmark_node_get_url(node); 164 | cmark_chunk url_chunk = cmark_chunk_literal(url); 165 | 166 | if (url && *url == '#') { 167 | return INTERNAL_LINK; 168 | } 169 | 170 | url_len = strlen(url); 171 | if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) { 172 | return NO_LINK; 173 | } 174 | 175 | const char *title = cmark_node_get_title(node); 176 | title_len = strlen(title); 177 | // if it has a title, we can't treat it as an autolink: 178 | if (title_len == 0) { 179 | 180 | link_text = node->first_child; 181 | cmark_consolidate_text_nodes(link_text); 182 | 183 | if (!link_text) 184 | return NO_LINK; 185 | 186 | realurl = (char *)url; 187 | realurllen = (int)url_len; 188 | if (strncmp(realurl, "mailto:", 7) == 0) { 189 | realurl += 7; 190 | realurllen -= 7; 191 | isemail = true; 192 | } 193 | if (realurllen == link_text->as.literal.len && 194 | strncmp(realurl, (char *)link_text->as.literal.data, 195 | link_text->as.literal.len) == 0) { 196 | if (isemail) { 197 | return EMAIL_AUTOLINK; 198 | } else { 199 | return URL_AUTOLINK; 200 | } 201 | } 202 | } 203 | 204 | return NORMAL_LINK; 205 | } 206 | 207 | static int S_get_enumlevel(cmark_node *node) { 208 | int enumlevel = 0; 209 | cmark_node *tmp = node; 210 | while (tmp) { 211 | if (tmp->type == CMARK_NODE_LIST && 212 | cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) { 213 | enumlevel++; 214 | } 215 | tmp = tmp->parent; 216 | } 217 | return enumlevel; 218 | } 219 | 220 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, 221 | cmark_event_type ev_type, int options) { 222 | int list_number; 223 | int enumlevel; 224 | char list_number_string[LIST_NUMBER_STRING_SIZE]; 225 | bool entering = (ev_type == CMARK_EVENT_ENTER); 226 | cmark_list_type list_type; 227 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); 228 | 229 | // avoid warning about unused parameter: 230 | (void)(options); 231 | 232 | switch (node->type) { 233 | case CMARK_NODE_DOCUMENT: 234 | break; 235 | 236 | case CMARK_NODE_BLOCK_QUOTE: 237 | if (entering) { 238 | LIT("\\begin{quote}"); 239 | CR(); 240 | } else { 241 | LIT("\\end{quote}"); 242 | BLANKLINE(); 243 | } 244 | break; 245 | 246 | case CMARK_NODE_LIST: 247 | list_type = cmark_node_get_list_type(node); 248 | if (entering) { 249 | LIT("\\begin{"); 250 | LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); 251 | LIT("}"); 252 | CR(); 253 | list_number = cmark_node_get_list_start(node); 254 | if (list_number > 1) { 255 | enumlevel = S_get_enumlevel(node); 256 | // latex normally supports only five levels 257 | if (enumlevel >= 1 && enumlevel <= 5) { 258 | snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d", 259 | list_number); 260 | LIT("\\setcounter{enum"); 261 | switch(enumlevel) { 262 | case 1: LIT("i"); break; 263 | case 2: LIT("ii"); break; 264 | case 3: LIT("iii"); break; 265 | case 4: LIT("iv"); break; 266 | case 5: LIT("v"); break; 267 | default: LIT("i"); break; 268 | } 269 | LIT("}{"); 270 | OUT(list_number_string, false, NORMAL); 271 | LIT("}"); 272 | } 273 | CR(); 274 | } 275 | } else { 276 | LIT("\\end{"); 277 | LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); 278 | LIT("}"); 279 | BLANKLINE(); 280 | } 281 | break; 282 | 283 | case CMARK_NODE_ITEM: 284 | if (entering) { 285 | LIT("\\item "); 286 | } else { 287 | CR(); 288 | } 289 | break; 290 | 291 | case CMARK_NODE_HEADING: 292 | if (entering) { 293 | switch (cmark_node_get_heading_level(node)) { 294 | case 1: 295 | LIT("\\section"); 296 | break; 297 | case 2: 298 | LIT("\\subsection"); 299 | break; 300 | case 3: 301 | LIT("\\subsubsection"); 302 | break; 303 | case 4: 304 | LIT("\\paragraph"); 305 | break; 306 | case 5: 307 | LIT("\\subparagraph"); 308 | break; 309 | } 310 | LIT("{"); 311 | } else { 312 | LIT("}"); 313 | BLANKLINE(); 314 | } 315 | break; 316 | 317 | case CMARK_NODE_CODE_BLOCK: 318 | CR(); 319 | LIT("\\begin{verbatim}"); 320 | CR(); 321 | OUT(cmark_node_get_literal(node), false, LITERAL); 322 | CR(); 323 | LIT("\\end{verbatim}"); 324 | BLANKLINE(); 325 | break; 326 | 327 | case CMARK_NODE_HTML_BLOCK: 328 | break; 329 | 330 | case CMARK_NODE_CUSTOM_BLOCK: 331 | CR(); 332 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 333 | false, LITERAL); 334 | CR(); 335 | break; 336 | 337 | case CMARK_NODE_THEMATIC_BREAK: 338 | BLANKLINE(); 339 | LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"); 340 | BLANKLINE(); 341 | break; 342 | 343 | case CMARK_NODE_PARAGRAPH: 344 | if (!entering) { 345 | BLANKLINE(); 346 | } 347 | break; 348 | 349 | case CMARK_NODE_TEXT: 350 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 351 | break; 352 | 353 | case CMARK_NODE_LINEBREAK: 354 | LIT("\\\\"); 355 | CR(); 356 | break; 357 | 358 | case CMARK_NODE_SOFTBREAK: 359 | if (options & CMARK_OPT_HARDBREAKS) { 360 | LIT("\\\\"); 361 | CR(); 362 | } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { 363 | CR(); 364 | } else { 365 | OUT(" ", allow_wrap, NORMAL); 366 | } 367 | break; 368 | 369 | case CMARK_NODE_CODE: 370 | LIT("\\texttt{"); 371 | OUT(cmark_node_get_literal(node), false, NORMAL); 372 | LIT("}"); 373 | break; 374 | 375 | case CMARK_NODE_HTML_INLINE: 376 | break; 377 | 378 | case CMARK_NODE_CUSTOM_INLINE: 379 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 380 | false, LITERAL); 381 | break; 382 | 383 | case CMARK_NODE_STRONG: 384 | if (entering) { 385 | LIT("\\textbf{"); 386 | } else { 387 | LIT("}"); 388 | } 389 | break; 390 | 391 | case CMARK_NODE_EMPH: 392 | if (entering) { 393 | LIT("\\emph{"); 394 | } else { 395 | LIT("}"); 396 | } 397 | break; 398 | 399 | case CMARK_NODE_LINK: 400 | if (entering) { 401 | const char *url = cmark_node_get_url(node); 402 | // requires \usepackage{hyperref} 403 | switch (get_link_type(node)) { 404 | case URL_AUTOLINK: 405 | LIT("\\url{"); 406 | OUT(url, false, URL); 407 | LIT("}"); 408 | return 0; // Don't process further nodes to avoid double-rendering artefacts 409 | case EMAIL_AUTOLINK: 410 | LIT("\\href{"); 411 | OUT(url, false, URL); 412 | LIT("}\\nolinkurl{"); 413 | break; 414 | case NORMAL_LINK: 415 | LIT("\\href{"); 416 | OUT(url, false, URL); 417 | LIT("}{"); 418 | break; 419 | case INTERNAL_LINK: 420 | LIT("\\protect\\hyperlink{"); 421 | OUT(url + 1, false, URL); 422 | LIT("}{"); 423 | break; 424 | case NO_LINK: 425 | LIT("{"); // error? 426 | } 427 | } else { 428 | LIT("}"); 429 | } 430 | 431 | break; 432 | 433 | case CMARK_NODE_IMAGE: 434 | if (entering) { 435 | LIT("\\protect\\includegraphics{"); 436 | // requires \include{graphicx} 437 | OUT(cmark_node_get_url(node), false, URL); 438 | LIT("}"); 439 | return 0; 440 | } 441 | break; 442 | 443 | default: 444 | assert(false); 445 | break; 446 | } 447 | 448 | return 1; 449 | } 450 | 451 | char *cmark_render_latex(cmark_node *root, int options, int width) { 452 | return cmark_render(root, options, width, outc, S_render_node); 453 | } 454 | -------------------------------------------------------------------------------- /node.go: -------------------------------------------------------------------------------- 1 | package commonmark 2 | 3 | /* 4 | #include 5 | #include 6 | #include "cmark.h" 7 | */ 8 | import "C" 9 | import ( 10 | "unsafe" 11 | ) 12 | 13 | //Maps to a cmark_node_type enum in cmark.h 14 | type NodeType int 15 | 16 | const ( 17 | //Error Status 18 | CMARK_NODE_NONE NodeType = iota 19 | 20 | //Block 21 | CMARK_NODE_DOCUMENT 22 | CMARK_NODE_BLOCK_QUOTE 23 | CMARK_NODE_LIST 24 | CMARK_NODE_ITEM 25 | CMARK_NODE_CODE_BLOCK 26 | CMARK_NODE_HTML_BLOCK 27 | CMARK_NODE_CUSTOM_BLOCK 28 | CMARK_NODE_PARAGRAPH 29 | CMARK_NODE_HEADING 30 | CMARK_NODE_THEMATIC_BREAK 31 | 32 | //Inline 33 | CMARK_NODE_TEXT 34 | CMARK_NODE_SOFTBREAK 35 | CMARK_NODE_LINEBREAK 36 | CMARK_NODE_CODE 37 | CMARK_NODE_HTML_INLINE 38 | CMARK_NODE_CUSTOM_INLINE 39 | CMARK_NODE_EMPH 40 | CMARK_NODE_STRONG 41 | CMARK_NODE_LINK 42 | CMARK_NODE_IMAGE 43 | //Block 44 | CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT 45 | CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK 46 | //Inline 47 | CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT 48 | CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE 49 | ) 50 | 51 | //Maps to a cmark_list_type in cmark.h 52 | type ListType int 53 | 54 | const ( 55 | CMARK_NO_LIST ListType = iota 56 | CMARK_BULLET_LIST 57 | CMARK_ORDERED_LIST 58 | ) 59 | 60 | type DelimType int 61 | 62 | const ( 63 | CMARK_NO_DELIM = iota 64 | CMARK_PERIOD_DELIM 65 | CMARK_PAREN_DELIM 66 | ) 67 | 68 | //CMark writer options for render functions 69 | const CMARK_OPT_DEFAULT = 0 70 | const CMARK_OPT_SOURCEPOS = 1 71 | const CMARK_OPT_HARDBREAKS = 2 72 | const CMARK_OPT_NORMALIZE = 4 73 | const CMARK_OPT_SMART = 8 74 | const CMARK_OPT_VALIDATE_UTF8 = 16 75 | const CMARK_OPT_SAFE = 32 76 | 77 | //converts C int return codes to True/False :) 78 | func success(code C.int) bool { 79 | if int(code) > 0 { 80 | return true 81 | } else { 82 | return false 83 | } 84 | } 85 | 86 | //Wraps the cmark_node. 87 | //CommonMark nodes are represented as Trees in memory. 88 | type CMarkNode struct { 89 | node *C.struct_cmark_node 90 | parent *CMarkNode 91 | } 92 | 93 | //Creates a new node of the specified type 94 | func NewCMarkNode(nt NodeType) *CMarkNode { 95 | n := &CMarkNode{ 96 | node: C.cmark_node_new(C.cmark_node_type(nt)), 97 | } 98 | return n 99 | } 100 | 101 | // print structure as XML 102 | func (node *CMarkNode) RenderXML(options int) string { 103 | result := C.cmark_render_xml(node.node, C.int(options)) 104 | defer C.free(unsafe.Pointer(result)) 105 | return C.GoString(result) 106 | } 107 | 108 | // Renders the document as HTML. 109 | // Returns an HTML string. 110 | func (node *CMarkNode) RenderHtml(options int) string { 111 | result := C.cmark_render_html(node.node, C.int(options)) 112 | defer C.free(unsafe.Pointer(result)) 113 | return C.GoString(result) 114 | } 115 | 116 | // Renders the document as a groff man page, 117 | // without the header 118 | func (node *CMarkNode) RenderMan(options int, width int) string { 119 | result := C.cmark_render_man(node.node, C.int(options), C.int(width)) 120 | defer C.free(unsafe.Pointer(result)) 121 | return C.GoString(result) 122 | } 123 | 124 | // Renders node tree as commonmark text. 125 | func (node *CMarkNode) RenderCMark(options int, width int) string { 126 | result := C.cmark_render_commonmark(node.node, C.int(options), C.int(width)) 127 | defer C.free(unsafe.Pointer(result)) 128 | return C.GoString(result) 129 | } 130 | 131 | // Renders node tree as a LaTeX document 132 | func (node *CMarkNode) RenderLatex(options int, width int) string { 133 | result := C.cmark_render_latex(node.node, C.int(options), C.int(width)) 134 | defer C.free(unsafe.Pointer(result)) 135 | return C.GoString(result) 136 | } 137 | 138 | // Cleanup a node, including any children. 139 | // Unlinks a node from the tree and frees it. 140 | func (node *CMarkNode) Free() { 141 | if node.node != nil { 142 | C.cmark_node_free(node.node) 143 | } 144 | node.node = nil 145 | } 146 | 147 | //Node traversal functions 148 | 149 | //Get next node 150 | func (node *CMarkNode) Next() *CMarkNode { 151 | return &CMarkNode{ 152 | node: C.cmark_node_next(node.node), 153 | } 154 | } 155 | 156 | //Get previous node 157 | func (node *CMarkNode) Previous() *CMarkNode { 158 | return &CMarkNode{ 159 | node: C.cmark_node_previous(node.node), 160 | } 161 | } 162 | 163 | //Get parent node 164 | func (node *CMarkNode) Parent() *CMarkNode { 165 | return &CMarkNode{ 166 | node: C.cmark_node_parent(node.node), 167 | } 168 | } 169 | 170 | //Get first child node 171 | func (node *CMarkNode) FirstChild() *CMarkNode { 172 | return &CMarkNode{ 173 | node: C.cmark_node_first_child(node.node), 174 | } 175 | } 176 | 177 | //Get last child node 178 | func (node *CMarkNode) LastChild() *CMarkNode { 179 | return &CMarkNode{ 180 | node: C.cmark_node_last_child(node.node), 181 | } 182 | } 183 | 184 | //Accessor functions 185 | 186 | //Sets arbitrary user data for node 187 | func (node *CMarkNode) SetNodeUserData(userData string) bool { 188 | cstr := C.CString(userData) 189 | res := C.cmark_node_set_user_data(node.node, unsafe.Pointer(&cstr)) 190 | return success(res) 191 | } 192 | 193 | //Returns the user data of the node as an 194 | //unsafe.Pointer. Hope you know what you're doing. 195 | func (node *CMarkNode) GetNodeUserData() string { 196 | data := C.cmark_node_get_user_data(node.node) 197 | return C.GoString((*C.char)(*(*unsafe.Pointer)(data))) 198 | } 199 | 200 | //Get the node type 201 | func (node *CMarkNode) GetNodeType() NodeType { 202 | nt := C.cmark_node_get_type(node.node) 203 | return NodeType(nt) 204 | } 205 | 206 | //Get the node type as a string 207 | func (node *CMarkNode) GetNodeTypeString() string { 208 | cstr := C.cmark_node_get_type_string(node.node) 209 | return C.GoString(cstr) 210 | } 211 | 212 | //Get the node's string content 213 | func (node *CMarkNode) GetLiteral() string { 214 | cstr := C.cmark_node_get_literal(node.node) 215 | return C.GoString(cstr) 216 | } 217 | 218 | //Set the node's string content 219 | func (node *CMarkNode) SetLiteral(content string) bool { 220 | cstr := C.CString(content) 221 | defer C.free(unsafe.Pointer(cstr)) 222 | return success(C.cmark_node_set_literal(node.node, cstr)) 223 | } 224 | 225 | //Get a Header node's level 226 | func (node *CMarkNode) GetHeaderLevel() int { 227 | level := C.cmark_node_get_header_level(node.node) 228 | return int(level) 229 | } 230 | 231 | //Set a Header node's level (1,2, etc.) 232 | func (node *CMarkNode) SetHeaderLevel(level int) bool { 233 | return success(C.cmark_node_set_header_level(node.node, C.int(level))) 234 | } 235 | 236 | //Get a List node's list type 237 | func (node *CMarkNode) GetListType() ListType { 238 | lt := C.cmark_node_get_list_type(node.node) 239 | return ListType(lt) 240 | } 241 | 242 | //Set a List node's list type 243 | func (node *CMarkNode) SetListType(lt ListType) bool { 244 | return success(C.cmark_node_set_list_type(node.node, C.cmark_list_type(lt))) 245 | } 246 | 247 | //Returns the list delimiter type of node, or CMARK_NO_DELIM if node is not a list 248 | func (node *CMarkNode) GetListDelim() DelimType { 249 | dt := C.cmark_node_get_list_delim(node.node) 250 | return DelimType(dt) 251 | } 252 | 253 | //Sets the list delimeter type of the node, returns true on success 254 | func (node *CMarkNode) SetListDelim(dt DelimType) bool { 255 | return success(C.cmark_node_set_list_delim(node.node, C.cmark_delim_type(dt))) 256 | } 257 | 258 | //Get a list's start 259 | func (node *CMarkNode) GetListStart() int { 260 | ls := C.cmark_node_get_list_start(node.node) 261 | return int(ls) 262 | } 263 | 264 | //Set a list's start 265 | func (node *CMarkNode) SetListStart(start int) bool { 266 | return success(C.cmark_node_set_list_start(node.node, C.int(start))) 267 | } 268 | 269 | //Get list 'tight' 270 | func (node *CMarkNode) GetListTight() bool { 271 | return success(C.cmark_node_get_list_tight(node.node)) 272 | } 273 | 274 | //Set list 'tight' 275 | func (node *CMarkNode) SetListTight(isTight bool) bool { 276 | ti := 0 277 | if isTight == true { 278 | ti = 1 279 | } 280 | return success(C.cmark_node_set_list_tight(node.node, C.int(ti))) 281 | } 282 | 283 | //Get Fence info 284 | func (node *CMarkNode) GetFenceInfo() string { 285 | cstr := C.cmark_node_get_fence_info(node.node) 286 | return C.GoString(cstr) 287 | } 288 | 289 | //Set Fence info 290 | func (node *CMarkNode) SetFenceInfo(fenceInfo string) bool { 291 | cstr := C.CString(fenceInfo) 292 | defer C.free(unsafe.Pointer(cstr)) 293 | return success(C.cmark_node_set_fence_info(node.node, cstr)) 294 | } 295 | 296 | //Get a node's url 297 | func (node *CMarkNode) GetUrl() string { 298 | cstr := C.cmark_node_get_url(node.node) 299 | return C.GoString(cstr) 300 | } 301 | 302 | //Set a node's url 303 | func (node *CMarkNode) SetUrl(url string) bool { 304 | cstr := C.CString(url) 305 | defer C.free(unsafe.Pointer(cstr)) 306 | return success(C.cmark_node_set_url(node.node, cstr)) 307 | } 308 | 309 | //Set a node's title 310 | func (node *CMarkNode) SetTitle(title string) bool { 311 | cstr := C.CString(title) 312 | defer C.free(unsafe.Pointer(cstr)) 313 | return success(C.cmark_node_set_title(node.node, cstr)) 314 | } 315 | 316 | //Get a node's title 317 | func (node *CMarkNode) GetTitle() string { 318 | cstr := C.cmark_node_get_title(node.node) 319 | return C.GoString(cstr) 320 | } 321 | 322 | //Returns the literal "on enter" text for a custom node, or an empty 323 | //string if no on_enter is set 324 | func (node *CMarkNode) GetOnEnter() string { 325 | cstr := C.cmark_node_get_on_enter(node.node) 326 | return C.GoString(cstr) 327 | } 328 | 329 | //Sets the literal text to render "on enter" for a custom node. 330 | //Any children of the node will be rendered after this text. 331 | //Returns true on success and false on failure 332 | func (node *CMarkNode) SetOnEnter(onEnter string) bool { 333 | cstr := C.CString(onEnter) 334 | defer C.free(unsafe.Pointer(cstr)) 335 | return success(C.cmark_node_set_on_enter(node.node, cstr)) 336 | } 337 | 338 | //Returns the literal "on exit" text for a custom node, or an empty 339 | //string if no on_exit is set 340 | func (node *CMarkNode) GetOnExit() string { 341 | cstr := C.cmark_node_get_on_exit(node.node) 342 | return C.GoString(cstr) 343 | } 344 | 345 | //Sets the literal text to render "on exit" for a custom node. 346 | //Any children of the node will be rendered before this text. 347 | //Returns true on success and false on failure 348 | func (node *CMarkNode) SetOnExit(onExit string) bool { 349 | cstr := C.CString(onExit) 350 | defer C.free(unsafe.Pointer(cstr)) 351 | return success(C.cmark_node_set_on_exit(node.node, cstr)) 352 | } 353 | 354 | //Returns the line on which 'node' begins 355 | func (node *CMarkNode) GetStartLine() int { 356 | return int(C.cmark_node_get_start_line(node.node)) 357 | } 358 | 359 | //Returns the column at which 'node' begins 360 | func (node *CMarkNode) GetStartColumn() int { 361 | return int(C.cmark_node_get_start_column(node.node)) 362 | } 363 | 364 | //Returns the line on which 'node' ends 365 | func (node *CMarkNode) GetEndLine() int { 366 | return int(C.cmark_node_get_end_line(node.node)) 367 | } 368 | 369 | //Returns the column at which 'node' ends 370 | func (node *CMarkNode) GetEndColumn() int { 371 | return int(C.cmark_node_get_end_column(node.node)) 372 | } 373 | 374 | // Tree manipulation functions 375 | 376 | //Unlink a node from the tree 377 | func (node *CMarkNode) Unlink() { 378 | C.cmark_node_unlink(node.node) 379 | } 380 | 381 | // InsertBefore can cause a panic quite readily :) 382 | // Hint: Both nodes had better already be in the 'tree' 383 | // Insert a node before another 'sibling' node 384 | func (node *CMarkNode) InsertBefore(sibling *CMarkNode) bool { 385 | return success(C.cmark_node_insert_before(node.node, sibling.node)) 386 | } 387 | 388 | // InsertAfter can cause a panic quite readily :) 389 | // Hint: Both nodes had better already be in the 'tree' 390 | //Insert a node after another 'sibling' node 391 | func (node *CMarkNode) InsertAfter(sibling *CMarkNode) bool { 392 | return success(C.cmark_node_insert_after(node.node, sibling.node)) 393 | } 394 | 395 | // Replaces 'oldNode' with 'newNode' and unlinks 'oldnode' (but does 396 | // not free its memory). 397 | // Returns true on success, false on failure. 398 | func (newNode *CMarkNode) Replace(oldNode *CMarkNode) bool { 399 | return success(C.cmark_node_replace(oldNode.node, newNode.node)) 400 | } 401 | 402 | //Prepend a child node 403 | func (node *CMarkNode) PrependChild(child *CMarkNode) bool { 404 | return success(C.cmark_node_prepend_child(node.node, child.node)) 405 | } 406 | 407 | //Append a child node 408 | func (node *CMarkNode) AppendChild(child *CMarkNode) bool { 409 | return success(C.cmark_node_append_child(node.node, child.node)) 410 | } 411 | 412 | //Consolidates adjacent text nodes. 413 | func (node *CMarkNode) ConsolidateTextNodes() { 414 | C.cmark_consolidate_text_nodes(node.node) 415 | } 416 | -------------------------------------------------------------------------------- /commonmark.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "config.h" 8 | #include "cmark.h" 9 | #include "node.h" 10 | #include "buffer.h" 11 | #include "utf8.h" 12 | #include "scanners.h" 13 | #include "render.h" 14 | 15 | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 16 | #define LIT(s) renderer->out(renderer, s, false, LITERAL) 17 | #define CR() renderer->cr(renderer) 18 | #define BLANKLINE() renderer->blankline(renderer) 19 | #define ENCODED_SIZE 20 20 | #define LISTMARKER_SIZE 20 21 | 22 | // Functions to convert cmark_nodes to commonmark strings. 23 | 24 | static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, 25 | int32_t c, unsigned char nextc) { 26 | bool needs_escaping = false; 27 | bool follows_digit = 28 | renderer->buffer->size > 0 && 29 | cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); 30 | char encoded[ENCODED_SIZE]; 31 | 32 | needs_escaping = 33 | c < 0x80 && escape != LITERAL && 34 | ((escape == NORMAL && 35 | (c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || 36 | c == '>' || c == '\\' || c == '`' || c == '!' || 37 | (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || 38 | (renderer->begin_content && (c == '-' || c == '+' || c == '=') && 39 | // begin_content doesn't get set to false til we've passed digits 40 | // at the beginning of line, so... 41 | !follows_digit) || 42 | (renderer->begin_content && (c == '.' || c == ')') && follows_digit && 43 | (nextc == 0 || cmark_isspace(nextc))))) || 44 | (escape == URL && 45 | (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || 46 | c == ')' || c == '(')) || 47 | (escape == TITLE && 48 | (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); 49 | 50 | if (needs_escaping) { 51 | if (cmark_isspace(c)) { 52 | // use percent encoding for spaces 53 | snprintf(encoded, ENCODED_SIZE, "%%%2x", c); 54 | cmark_strbuf_puts(renderer->buffer, encoded); 55 | renderer->column += 3; 56 | } else { 57 | cmark_render_ascii(renderer, "\\"); 58 | cmark_render_code_point(renderer, c); 59 | } 60 | } else { 61 | cmark_render_code_point(renderer, c); 62 | } 63 | } 64 | 65 | static int longest_backtick_sequence(const char *code) { 66 | int longest = 0; 67 | int current = 0; 68 | size_t i = 0; 69 | size_t code_len = strlen(code); 70 | while (i <= code_len) { 71 | if (code[i] == '`') { 72 | current++; 73 | } else { 74 | if (current > longest) { 75 | longest = current; 76 | } 77 | current = 0; 78 | } 79 | i++; 80 | } 81 | return longest; 82 | } 83 | 84 | static int shortest_unused_backtick_sequence(const char *code) { 85 | // note: if the shortest sequence is >= 32, this returns 32 86 | // so as not to overflow the bit array. 87 | uint32_t used = 1; 88 | int current = 0; 89 | size_t i = 0; 90 | size_t code_len = strlen(code); 91 | while (i <= code_len) { 92 | if (code[i] == '`') { 93 | current++; 94 | } else { 95 | if (current > 0 && current < 32) { 96 | used |= (1U << current); 97 | } 98 | current = 0; 99 | } 100 | i++; 101 | } 102 | // return number of first bit that is 0: 103 | i = 0; 104 | while (i < 32 && used & 1) { 105 | used = used >> 1; 106 | i++; 107 | } 108 | return (int)i; 109 | } 110 | 111 | static bool is_autolink(cmark_node *node) { 112 | cmark_chunk *title; 113 | cmark_chunk *url; 114 | cmark_node *link_text; 115 | char *realurl; 116 | int realurllen; 117 | 118 | if (node->type != CMARK_NODE_LINK) { 119 | return false; 120 | } 121 | 122 | url = &node->as.link.url; 123 | if (url->len == 0 || scan_scheme(url, 0) == 0) { 124 | return false; 125 | } 126 | 127 | title = &node->as.link.title; 128 | // if it has a title, we can't treat it as an autolink: 129 | if (title->len > 0) { 130 | return false; 131 | } 132 | 133 | link_text = node->first_child; 134 | if (link_text == NULL) { 135 | return false; 136 | } 137 | cmark_consolidate_text_nodes(link_text); 138 | realurl = (char *)url->data; 139 | realurllen = url->len; 140 | if (strncmp(realurl, "mailto:", 7) == 0) { 141 | realurl += 7; 142 | realurllen -= 7; 143 | } 144 | return (realurllen == link_text->as.literal.len && 145 | strncmp(realurl, (char *)link_text->as.literal.data, 146 | link_text->as.literal.len) == 0); 147 | } 148 | 149 | // if node is a block node, returns node. 150 | // otherwise returns first block-level node that is an ancestor of node. 151 | // if there is no block-level ancestor, returns NULL. 152 | static cmark_node *get_containing_block(cmark_node *node) { 153 | while (node) { 154 | if (node->type >= CMARK_NODE_FIRST_BLOCK && 155 | node->type <= CMARK_NODE_LAST_BLOCK) { 156 | return node; 157 | } else { 158 | node = node->parent; 159 | } 160 | } 161 | return NULL; 162 | } 163 | 164 | static int S_render_node(cmark_renderer *renderer, cmark_node *node, 165 | cmark_event_type ev_type, int options) { 166 | cmark_node *tmp; 167 | int list_number; 168 | cmark_delim_type list_delim; 169 | int numticks; 170 | int i; 171 | bool entering = (ev_type == CMARK_EVENT_ENTER); 172 | const char *info, *code, *title; 173 | size_t info_len, code_len; 174 | char listmarker[LISTMARKER_SIZE]; 175 | char *emph_delim; 176 | bool first_in_list_item; 177 | bufsize_t marker_width; 178 | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && 179 | !(CMARK_OPT_HARDBREAKS & options); 180 | 181 | // Don't adjust tight list status til we've started the list. 182 | // Otherwise we loose the blank line between a paragraph and 183 | // a following list. 184 | if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { 185 | tmp = get_containing_block(node); 186 | renderer->in_tight_list_item = 187 | tmp && // tmp might be NULL if there is no containing block 188 | ((tmp->type == CMARK_NODE_ITEM && 189 | cmark_node_get_list_tight(tmp->parent)) || 190 | (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && 191 | cmark_node_get_list_tight(tmp->parent->parent))); 192 | } 193 | 194 | switch (node->type) { 195 | case CMARK_NODE_DOCUMENT: 196 | break; 197 | 198 | case CMARK_NODE_BLOCK_QUOTE: 199 | if (entering) { 200 | LIT("> "); 201 | renderer->begin_content = true; 202 | cmark_strbuf_puts(renderer->prefix, "> "); 203 | } else { 204 | cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); 205 | BLANKLINE(); 206 | } 207 | break; 208 | 209 | case CMARK_NODE_LIST: 210 | if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK || 211 | node->next->type == CMARK_NODE_LIST)) { 212 | // this ensures that a following indented code block or list will be 213 | // inteprereted correctly. 214 | CR(); 215 | LIT(""); 216 | BLANKLINE(); 217 | } 218 | break; 219 | 220 | case CMARK_NODE_ITEM: 221 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 222 | marker_width = 4; 223 | } else { 224 | list_number = cmark_node_get_list_start(node->parent); 225 | list_delim = cmark_node_get_list_delim(node->parent); 226 | tmp = node; 227 | while (tmp->prev) { 228 | tmp = tmp->prev; 229 | list_number += 1; 230 | } 231 | // we ensure a width of at least 4 so 232 | // we get nice transition from single digits 233 | // to double 234 | snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, 235 | list_delim == CMARK_PAREN_DELIM ? ")" : ".", 236 | list_number < 10 ? " " : " "); 237 | marker_width = strlen(listmarker); 238 | } 239 | if (entering) { 240 | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 241 | LIT(" - "); 242 | renderer->begin_content = true; 243 | } else { 244 | LIT(listmarker); 245 | renderer->begin_content = true; 246 | } 247 | for (i = marker_width; i--;) { 248 | cmark_strbuf_putc(renderer->prefix, ' '); 249 | } 250 | } else { 251 | cmark_strbuf_truncate(renderer->prefix, 252 | renderer->prefix->size - marker_width); 253 | CR(); 254 | } 255 | break; 256 | 257 | case CMARK_NODE_HEADING: 258 | if (entering) { 259 | for (i = cmark_node_get_heading_level(node); i > 0; i--) { 260 | LIT("#"); 261 | } 262 | LIT(" "); 263 | renderer->begin_content = true; 264 | renderer->no_linebreaks = true; 265 | } else { 266 | renderer->no_linebreaks = false; 267 | BLANKLINE(); 268 | } 269 | break; 270 | 271 | case CMARK_NODE_CODE_BLOCK: 272 | first_in_list_item = node->prev == NULL && node->parent && 273 | node->parent->type == CMARK_NODE_ITEM; 274 | 275 | if (!first_in_list_item) { 276 | BLANKLINE(); 277 | } 278 | info = cmark_node_get_fence_info(node); 279 | info_len = strlen(info); 280 | code = cmark_node_get_literal(node); 281 | code_len = strlen(code); 282 | // use indented form if no info, and code doesn't 283 | // begin or end with a blank line, and code isn't 284 | // first thing in a list item 285 | if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) && 286 | !(cmark_isspace(code[code_len - 1]) && 287 | cmark_isspace(code[code_len - 2]))) && 288 | !first_in_list_item) { 289 | LIT(" "); 290 | cmark_strbuf_puts(renderer->prefix, " "); 291 | OUT(cmark_node_get_literal(node), false, LITERAL); 292 | cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4); 293 | } else { 294 | numticks = longest_backtick_sequence(code) + 1; 295 | if (numticks < 3) { 296 | numticks = 3; 297 | } 298 | for (i = 0; i < numticks; i++) { 299 | LIT("`"); 300 | } 301 | LIT(" "); 302 | OUT(info, false, LITERAL); 303 | CR(); 304 | OUT(cmark_node_get_literal(node), false, LITERAL); 305 | CR(); 306 | for (i = 0; i < numticks; i++) { 307 | LIT("`"); 308 | } 309 | } 310 | BLANKLINE(); 311 | break; 312 | 313 | case CMARK_NODE_HTML_BLOCK: 314 | BLANKLINE(); 315 | OUT(cmark_node_get_literal(node), false, LITERAL); 316 | BLANKLINE(); 317 | break; 318 | 319 | case CMARK_NODE_CUSTOM_BLOCK: 320 | BLANKLINE(); 321 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 322 | false, LITERAL); 323 | BLANKLINE(); 324 | break; 325 | 326 | case CMARK_NODE_THEMATIC_BREAK: 327 | BLANKLINE(); 328 | LIT("-----"); 329 | BLANKLINE(); 330 | break; 331 | 332 | case CMARK_NODE_PARAGRAPH: 333 | if (!entering) { 334 | BLANKLINE(); 335 | } 336 | break; 337 | 338 | case CMARK_NODE_TEXT: 339 | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 340 | break; 341 | 342 | case CMARK_NODE_LINEBREAK: 343 | if (!(CMARK_OPT_HARDBREAKS & options)) { 344 | LIT(" "); 345 | } 346 | CR(); 347 | break; 348 | 349 | case CMARK_NODE_SOFTBREAK: 350 | if (CMARK_OPT_HARDBREAKS & options) { 351 | LIT(" "); 352 | CR(); 353 | } else if (!renderer->no_linebreaks && renderer->width == 0 && 354 | !(CMARK_OPT_HARDBREAKS & options) && 355 | !(CMARK_OPT_NOBREAKS & options)) { 356 | CR(); 357 | } else { 358 | OUT(" ", allow_wrap, LITERAL); 359 | } 360 | break; 361 | 362 | case CMARK_NODE_CODE: 363 | code = cmark_node_get_literal(node); 364 | code_len = strlen(code); 365 | numticks = shortest_unused_backtick_sequence(code); 366 | for (i = 0; i < numticks; i++) { 367 | LIT("`"); 368 | } 369 | if (code_len == 0 || code[0] == '`') { 370 | LIT(" "); 371 | } 372 | OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); 373 | if (code_len == 0 || code[code_len - 1] == '`') { 374 | LIT(" "); 375 | } 376 | for (i = 0; i < numticks; i++) { 377 | LIT("`"); 378 | } 379 | break; 380 | 381 | case CMARK_NODE_HTML_INLINE: 382 | OUT(cmark_node_get_literal(node), false, LITERAL); 383 | break; 384 | 385 | case CMARK_NODE_CUSTOM_INLINE: 386 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 387 | false, LITERAL); 388 | break; 389 | 390 | case CMARK_NODE_STRONG: 391 | if (entering) { 392 | LIT("**"); 393 | } else { 394 | LIT("**"); 395 | } 396 | break; 397 | 398 | case CMARK_NODE_EMPH: 399 | // If we have EMPH(EMPH(x)), we need to use *_x_* 400 | // because **x** is STRONG(x): 401 | if (node->parent && node->parent->type == CMARK_NODE_EMPH && 402 | node->next == NULL && node->prev == NULL) { 403 | emph_delim = "_"; 404 | } else { 405 | emph_delim = "*"; 406 | } 407 | if (entering) { 408 | LIT(emph_delim); 409 | } else { 410 | LIT(emph_delim); 411 | } 412 | break; 413 | 414 | case CMARK_NODE_LINK: 415 | if (is_autolink(node)) { 416 | if (entering) { 417 | LIT("<"); 418 | if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) { 419 | LIT((const char *)cmark_node_get_url(node) + 7); 420 | } else { 421 | LIT((const char *)cmark_node_get_url(node)); 422 | } 423 | LIT(">"); 424 | // return signal to skip contents of node... 425 | return 0; 426 | } 427 | } else { 428 | if (entering) { 429 | LIT("["); 430 | } else { 431 | LIT("]("); 432 | OUT(cmark_node_get_url(node), false, URL); 433 | title = cmark_node_get_title(node); 434 | if (strlen(title) > 0) { 435 | LIT(" \""); 436 | OUT(title, false, TITLE); 437 | LIT("\""); 438 | } 439 | LIT(")"); 440 | } 441 | } 442 | break; 443 | 444 | case CMARK_NODE_IMAGE: 445 | if (entering) { 446 | LIT("!["); 447 | } else { 448 | LIT("]("); 449 | OUT(cmark_node_get_url(node), false, URL); 450 | title = cmark_node_get_title(node); 451 | if (strlen(title) > 0) { 452 | OUT(" \"", allow_wrap, LITERAL); 453 | OUT(title, false, TITLE); 454 | LIT("\""); 455 | } 456 | LIT(")"); 457 | } 458 | break; 459 | 460 | default: 461 | assert(false); 462 | break; 463 | } 464 | 465 | return 1; 466 | } 467 | 468 | char *cmark_render_commonmark(cmark_node *root, int options, int width) { 469 | if (options & CMARK_OPT_HARDBREAKS) { 470 | // disable breaking on width, since it has 471 | // a different meaning with OPT_HARDBREAKS 472 | width = 0; 473 | } 474 | return cmark_render(root, options, width, outc, S_render_node); 475 | } 476 | -------------------------------------------------------------------------------- /node.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "config.h" 5 | #include "node.h" 6 | 7 | static void S_node_unlink(cmark_node *node); 8 | 9 | #define NODE_MEM(node) cmark_node_mem(node) 10 | 11 | static CMARK_INLINE bool S_is_block(cmark_node *node) { 12 | if (node == NULL) { 13 | return false; 14 | } 15 | return node->type >= CMARK_NODE_FIRST_BLOCK && 16 | node->type <= CMARK_NODE_LAST_BLOCK; 17 | } 18 | 19 | static CMARK_INLINE bool S_is_inline(cmark_node *node) { 20 | if (node == NULL) { 21 | return false; 22 | } 23 | return node->type >= CMARK_NODE_FIRST_INLINE && 24 | node->type <= CMARK_NODE_LAST_INLINE; 25 | } 26 | 27 | static bool S_can_contain(cmark_node *node, cmark_node *child) { 28 | cmark_node *cur; 29 | 30 | if (node == NULL || child == NULL) { 31 | return false; 32 | } 33 | 34 | // Verify that child is not an ancestor of node or equal to node. 35 | cur = node; 36 | do { 37 | if (cur == child) { 38 | return false; 39 | } 40 | cur = cur->parent; 41 | } while (cur != NULL); 42 | 43 | if (child->type == CMARK_NODE_DOCUMENT) { 44 | return false; 45 | } 46 | 47 | switch (node->type) { 48 | case CMARK_NODE_DOCUMENT: 49 | case CMARK_NODE_BLOCK_QUOTE: 50 | case CMARK_NODE_ITEM: 51 | return S_is_block(child) && child->type != CMARK_NODE_ITEM; 52 | 53 | case CMARK_NODE_LIST: 54 | return child->type == CMARK_NODE_ITEM; 55 | 56 | case CMARK_NODE_CUSTOM_BLOCK: 57 | return true; 58 | 59 | case CMARK_NODE_PARAGRAPH: 60 | case CMARK_NODE_HEADING: 61 | case CMARK_NODE_EMPH: 62 | case CMARK_NODE_STRONG: 63 | case CMARK_NODE_LINK: 64 | case CMARK_NODE_IMAGE: 65 | case CMARK_NODE_CUSTOM_INLINE: 66 | return S_is_inline(child); 67 | 68 | default: 69 | break; 70 | } 71 | 72 | return false; 73 | } 74 | 75 | cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { 76 | cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node)); 77 | cmark_strbuf_init(mem, &node->content, 0); 78 | node->type = (uint16_t)type; 79 | 80 | switch (node->type) { 81 | case CMARK_NODE_HEADING: 82 | node->as.heading.level = 1; 83 | break; 84 | 85 | case CMARK_NODE_LIST: { 86 | cmark_list *list = &node->as.list; 87 | list->list_type = CMARK_BULLET_LIST; 88 | list->start = 0; 89 | list->tight = false; 90 | break; 91 | } 92 | 93 | default: 94 | break; 95 | } 96 | 97 | return node; 98 | } 99 | 100 | cmark_node *cmark_node_new(cmark_node_type type) { 101 | extern cmark_mem DEFAULT_MEM_ALLOCATOR; 102 | return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR); 103 | } 104 | 105 | // Free a cmark_node list and any children. 106 | static void S_free_nodes(cmark_node *e) { 107 | cmark_node *next; 108 | while (e != NULL) { 109 | cmark_strbuf_free(&e->content); 110 | switch (e->type) { 111 | case CMARK_NODE_CODE_BLOCK: 112 | cmark_chunk_free(NODE_MEM(e), &e->as.code.info); 113 | cmark_chunk_free(NODE_MEM(e), &e->as.code.literal); 114 | break; 115 | case CMARK_NODE_TEXT: 116 | case CMARK_NODE_HTML_INLINE: 117 | case CMARK_NODE_CODE: 118 | case CMARK_NODE_HTML_BLOCK: 119 | cmark_chunk_free(NODE_MEM(e), &e->as.literal); 120 | break; 121 | case CMARK_NODE_LINK: 122 | case CMARK_NODE_IMAGE: 123 | cmark_chunk_free(NODE_MEM(e), &e->as.link.url); 124 | cmark_chunk_free(NODE_MEM(e), &e->as.link.title); 125 | break; 126 | case CMARK_NODE_CUSTOM_BLOCK: 127 | case CMARK_NODE_CUSTOM_INLINE: 128 | cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_enter); 129 | cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_exit); 130 | break; 131 | default: 132 | break; 133 | } 134 | if (e->last_child) { 135 | // Splice children into list 136 | e->last_child->next = e->next; 137 | e->next = e->first_child; 138 | } 139 | next = e->next; 140 | NODE_MEM(e)->free(e); 141 | e = next; 142 | } 143 | } 144 | 145 | void cmark_node_free(cmark_node *node) { 146 | S_node_unlink(node); 147 | node->next = NULL; 148 | S_free_nodes(node); 149 | } 150 | 151 | cmark_node_type cmark_node_get_type(cmark_node *node) { 152 | if (node == NULL) { 153 | return CMARK_NODE_NONE; 154 | } else { 155 | return (cmark_node_type)node->type; 156 | } 157 | } 158 | 159 | const char *cmark_node_get_type_string(cmark_node *node) { 160 | if (node == NULL) { 161 | return "NONE"; 162 | } 163 | 164 | switch (node->type) { 165 | case CMARK_NODE_NONE: 166 | return "none"; 167 | case CMARK_NODE_DOCUMENT: 168 | return "document"; 169 | case CMARK_NODE_BLOCK_QUOTE: 170 | return "block_quote"; 171 | case CMARK_NODE_LIST: 172 | return "list"; 173 | case CMARK_NODE_ITEM: 174 | return "item"; 175 | case CMARK_NODE_CODE_BLOCK: 176 | return "code_block"; 177 | case CMARK_NODE_HTML_BLOCK: 178 | return "html_block"; 179 | case CMARK_NODE_CUSTOM_BLOCK: 180 | return "custom_block"; 181 | case CMARK_NODE_PARAGRAPH: 182 | return "paragraph"; 183 | case CMARK_NODE_HEADING: 184 | return "heading"; 185 | case CMARK_NODE_THEMATIC_BREAK: 186 | return "thematic_break"; 187 | case CMARK_NODE_TEXT: 188 | return "text"; 189 | case CMARK_NODE_SOFTBREAK: 190 | return "softbreak"; 191 | case CMARK_NODE_LINEBREAK: 192 | return "linebreak"; 193 | case CMARK_NODE_CODE: 194 | return "code"; 195 | case CMARK_NODE_HTML_INLINE: 196 | return "html_inline"; 197 | case CMARK_NODE_CUSTOM_INLINE: 198 | return "custom_inline"; 199 | case CMARK_NODE_EMPH: 200 | return "emph"; 201 | case CMARK_NODE_STRONG: 202 | return "strong"; 203 | case CMARK_NODE_LINK: 204 | return "link"; 205 | case CMARK_NODE_IMAGE: 206 | return "image"; 207 | } 208 | 209 | return ""; 210 | } 211 | 212 | cmark_node *cmark_node_next(cmark_node *node) { 213 | if (node == NULL) { 214 | return NULL; 215 | } else { 216 | return node->next; 217 | } 218 | } 219 | 220 | cmark_node *cmark_node_previous(cmark_node *node) { 221 | if (node == NULL) { 222 | return NULL; 223 | } else { 224 | return node->prev; 225 | } 226 | } 227 | 228 | cmark_node *cmark_node_parent(cmark_node *node) { 229 | if (node == NULL) { 230 | return NULL; 231 | } else { 232 | return node->parent; 233 | } 234 | } 235 | 236 | cmark_node *cmark_node_first_child(cmark_node *node) { 237 | if (node == NULL) { 238 | return NULL; 239 | } else { 240 | return node->first_child; 241 | } 242 | } 243 | 244 | cmark_node *cmark_node_last_child(cmark_node *node) { 245 | if (node == NULL) { 246 | return NULL; 247 | } else { 248 | return node->last_child; 249 | } 250 | } 251 | 252 | void *cmark_node_get_user_data(cmark_node *node) { 253 | if (node == NULL) { 254 | return NULL; 255 | } else { 256 | return node->user_data; 257 | } 258 | } 259 | 260 | int cmark_node_set_user_data(cmark_node *node, void *user_data) { 261 | if (node == NULL) { 262 | return 0; 263 | } 264 | node->user_data = user_data; 265 | return 1; 266 | } 267 | 268 | const char *cmark_node_get_literal(cmark_node *node) { 269 | if (node == NULL) { 270 | return NULL; 271 | } 272 | 273 | switch (node->type) { 274 | case CMARK_NODE_HTML_BLOCK: 275 | case CMARK_NODE_TEXT: 276 | case CMARK_NODE_HTML_INLINE: 277 | case CMARK_NODE_CODE: 278 | return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal); 279 | 280 | case CMARK_NODE_CODE_BLOCK: 281 | return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.literal); 282 | 283 | default: 284 | break; 285 | } 286 | 287 | return NULL; 288 | } 289 | 290 | int cmark_node_set_literal(cmark_node *node, const char *content) { 291 | if (node == NULL) { 292 | return 0; 293 | } 294 | 295 | switch (node->type) { 296 | case CMARK_NODE_HTML_BLOCK: 297 | case CMARK_NODE_TEXT: 298 | case CMARK_NODE_HTML_INLINE: 299 | case CMARK_NODE_CODE: 300 | cmark_chunk_set_cstr(NODE_MEM(node), &node->as.literal, content); 301 | return 1; 302 | 303 | case CMARK_NODE_CODE_BLOCK: 304 | cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.literal, content); 305 | return 1; 306 | 307 | default: 308 | break; 309 | } 310 | 311 | return 0; 312 | } 313 | 314 | int cmark_node_get_heading_level(cmark_node *node) { 315 | if (node == NULL) { 316 | return 0; 317 | } 318 | 319 | switch (node->type) { 320 | case CMARK_NODE_HEADING: 321 | return node->as.heading.level; 322 | 323 | default: 324 | break; 325 | } 326 | 327 | return 0; 328 | } 329 | 330 | int cmark_node_set_heading_level(cmark_node *node, int level) { 331 | if (node == NULL || level < 1 || level > 6) { 332 | return 0; 333 | } 334 | 335 | switch (node->type) { 336 | case CMARK_NODE_HEADING: 337 | node->as.heading.level = level; 338 | return 1; 339 | 340 | default: 341 | break; 342 | } 343 | 344 | return 0; 345 | } 346 | 347 | cmark_list_type cmark_node_get_list_type(cmark_node *node) { 348 | if (node == NULL) { 349 | return CMARK_NO_LIST; 350 | } 351 | 352 | if (node->type == CMARK_NODE_LIST) { 353 | return node->as.list.list_type; 354 | } else { 355 | return CMARK_NO_LIST; 356 | } 357 | } 358 | 359 | int cmark_node_set_list_type(cmark_node *node, cmark_list_type type) { 360 | if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) { 361 | return 0; 362 | } 363 | 364 | if (node == NULL) { 365 | return 0; 366 | } 367 | 368 | if (node->type == CMARK_NODE_LIST) { 369 | node->as.list.list_type = type; 370 | return 1; 371 | } else { 372 | return 0; 373 | } 374 | } 375 | 376 | cmark_delim_type cmark_node_get_list_delim(cmark_node *node) { 377 | if (node == NULL) { 378 | return CMARK_NO_DELIM; 379 | } 380 | 381 | if (node->type == CMARK_NODE_LIST) { 382 | return node->as.list.delimiter; 383 | } else { 384 | return CMARK_NO_DELIM; 385 | } 386 | } 387 | 388 | int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim) { 389 | if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) { 390 | return 0; 391 | } 392 | 393 | if (node == NULL) { 394 | return 0; 395 | } 396 | 397 | if (node->type == CMARK_NODE_LIST) { 398 | node->as.list.delimiter = delim; 399 | return 1; 400 | } else { 401 | return 0; 402 | } 403 | } 404 | 405 | int cmark_node_get_list_start(cmark_node *node) { 406 | if (node == NULL) { 407 | return 0; 408 | } 409 | 410 | if (node->type == CMARK_NODE_LIST) { 411 | return node->as.list.start; 412 | } else { 413 | return 0; 414 | } 415 | } 416 | 417 | int cmark_node_set_list_start(cmark_node *node, int start) { 418 | if (node == NULL || start < 0) { 419 | return 0; 420 | } 421 | 422 | if (node->type == CMARK_NODE_LIST) { 423 | node->as.list.start = start; 424 | return 1; 425 | } else { 426 | return 0; 427 | } 428 | } 429 | 430 | int cmark_node_get_list_tight(cmark_node *node) { 431 | if (node == NULL) { 432 | return 0; 433 | } 434 | 435 | if (node->type == CMARK_NODE_LIST) { 436 | return node->as.list.tight; 437 | } else { 438 | return 0; 439 | } 440 | } 441 | 442 | int cmark_node_set_list_tight(cmark_node *node, int tight) { 443 | if (node == NULL) { 444 | return 0; 445 | } 446 | 447 | if (node->type == CMARK_NODE_LIST) { 448 | node->as.list.tight = tight == 1; 449 | return 1; 450 | } else { 451 | return 0; 452 | } 453 | } 454 | 455 | const char *cmark_node_get_fence_info(cmark_node *node) { 456 | if (node == NULL) { 457 | return NULL; 458 | } 459 | 460 | if (node->type == CMARK_NODE_CODE_BLOCK) { 461 | return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.info); 462 | } else { 463 | return NULL; 464 | } 465 | } 466 | 467 | int cmark_node_set_fence_info(cmark_node *node, const char *info) { 468 | if (node == NULL) { 469 | return 0; 470 | } 471 | 472 | if (node->type == CMARK_NODE_CODE_BLOCK) { 473 | cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.info, info); 474 | return 1; 475 | } else { 476 | return 0; 477 | } 478 | } 479 | 480 | const char *cmark_node_get_url(cmark_node *node) { 481 | if (node == NULL) { 482 | return NULL; 483 | } 484 | 485 | switch (node->type) { 486 | case CMARK_NODE_LINK: 487 | case CMARK_NODE_IMAGE: 488 | return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.url); 489 | default: 490 | break; 491 | } 492 | 493 | return NULL; 494 | } 495 | 496 | int cmark_node_set_url(cmark_node *node, const char *url) { 497 | if (node == NULL) { 498 | return 0; 499 | } 500 | 501 | switch (node->type) { 502 | case CMARK_NODE_LINK: 503 | case CMARK_NODE_IMAGE: 504 | cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.url, url); 505 | return 1; 506 | default: 507 | break; 508 | } 509 | 510 | return 0; 511 | } 512 | 513 | const char *cmark_node_get_title(cmark_node *node) { 514 | if (node == NULL) { 515 | return NULL; 516 | } 517 | 518 | switch (node->type) { 519 | case CMARK_NODE_LINK: 520 | case CMARK_NODE_IMAGE: 521 | return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.title); 522 | default: 523 | break; 524 | } 525 | 526 | return NULL; 527 | } 528 | 529 | int cmark_node_set_title(cmark_node *node, const char *title) { 530 | if (node == NULL) { 531 | return 0; 532 | } 533 | 534 | switch (node->type) { 535 | case CMARK_NODE_LINK: 536 | case CMARK_NODE_IMAGE: 537 | cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.title, title); 538 | return 1; 539 | default: 540 | break; 541 | } 542 | 543 | return 0; 544 | } 545 | 546 | const char *cmark_node_get_on_enter(cmark_node *node) { 547 | if (node == NULL) { 548 | return NULL; 549 | } 550 | 551 | switch (node->type) { 552 | case CMARK_NODE_CUSTOM_INLINE: 553 | case CMARK_NODE_CUSTOM_BLOCK: 554 | return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_enter); 555 | default: 556 | break; 557 | } 558 | 559 | return NULL; 560 | } 561 | 562 | int cmark_node_set_on_enter(cmark_node *node, const char *on_enter) { 563 | if (node == NULL) { 564 | return 0; 565 | } 566 | 567 | switch (node->type) { 568 | case CMARK_NODE_CUSTOM_INLINE: 569 | case CMARK_NODE_CUSTOM_BLOCK: 570 | cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_enter, on_enter); 571 | return 1; 572 | default: 573 | break; 574 | } 575 | 576 | return 0; 577 | } 578 | 579 | const char *cmark_node_get_on_exit(cmark_node *node) { 580 | if (node == NULL) { 581 | return NULL; 582 | } 583 | 584 | switch (node->type) { 585 | case CMARK_NODE_CUSTOM_INLINE: 586 | case CMARK_NODE_CUSTOM_BLOCK: 587 | return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_exit); 588 | default: 589 | break; 590 | } 591 | 592 | return NULL; 593 | } 594 | 595 | int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) { 596 | if (node == NULL) { 597 | return 0; 598 | } 599 | 600 | switch (node->type) { 601 | case CMARK_NODE_CUSTOM_INLINE: 602 | case CMARK_NODE_CUSTOM_BLOCK: 603 | cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_exit, on_exit); 604 | return 1; 605 | default: 606 | break; 607 | } 608 | 609 | return 0; 610 | } 611 | 612 | int cmark_node_get_start_line(cmark_node *node) { 613 | if (node == NULL) { 614 | return 0; 615 | } 616 | return node->start_line; 617 | } 618 | 619 | int cmark_node_get_start_column(cmark_node *node) { 620 | if (node == NULL) { 621 | return 0; 622 | } 623 | return node->start_column; 624 | } 625 | 626 | int cmark_node_get_end_line(cmark_node *node) { 627 | if (node == NULL) { 628 | return 0; 629 | } 630 | return node->end_line; 631 | } 632 | 633 | int cmark_node_get_end_column(cmark_node *node) { 634 | if (node == NULL) { 635 | return 0; 636 | } 637 | return node->end_column; 638 | } 639 | 640 | // Unlink a node without adjusting its next, prev, and parent pointers. 641 | static void S_node_unlink(cmark_node *node) { 642 | if (node == NULL) { 643 | return; 644 | } 645 | 646 | if (node->prev) { 647 | node->prev->next = node->next; 648 | } 649 | if (node->next) { 650 | node->next->prev = node->prev; 651 | } 652 | 653 | // Adjust first_child and last_child of parent. 654 | cmark_node *parent = node->parent; 655 | if (parent) { 656 | if (parent->first_child == node) { 657 | parent->first_child = node->next; 658 | } 659 | if (parent->last_child == node) { 660 | parent->last_child = node->prev; 661 | } 662 | } 663 | } 664 | 665 | void cmark_node_unlink(cmark_node *node) { 666 | S_node_unlink(node); 667 | 668 | node->next = NULL; 669 | node->prev = NULL; 670 | node->parent = NULL; 671 | } 672 | 673 | int cmark_node_insert_before(cmark_node *node, cmark_node *sibling) { 674 | if (node == NULL || sibling == NULL) { 675 | return 0; 676 | } 677 | 678 | if (!node->parent || !S_can_contain(node->parent, sibling)) { 679 | return 0; 680 | } 681 | 682 | S_node_unlink(sibling); 683 | 684 | cmark_node *old_prev = node->prev; 685 | 686 | // Insert 'sibling' between 'old_prev' and 'node'. 687 | if (old_prev) { 688 | old_prev->next = sibling; 689 | } 690 | sibling->prev = old_prev; 691 | sibling->next = node; 692 | node->prev = sibling; 693 | 694 | // Set new parent. 695 | cmark_node *parent = node->parent; 696 | sibling->parent = parent; 697 | 698 | // Adjust first_child of parent if inserted as first child. 699 | if (parent && !old_prev) { 700 | parent->first_child = sibling; 701 | } 702 | 703 | return 1; 704 | } 705 | 706 | int cmark_node_insert_after(cmark_node *node, cmark_node *sibling) { 707 | if (node == NULL || sibling == NULL) { 708 | return 0; 709 | } 710 | 711 | if (!node->parent || !S_can_contain(node->parent, sibling)) { 712 | return 0; 713 | } 714 | 715 | S_node_unlink(sibling); 716 | 717 | cmark_node *old_next = node->next; 718 | 719 | // Insert 'sibling' between 'node' and 'old_next'. 720 | if (old_next) { 721 | old_next->prev = sibling; 722 | } 723 | sibling->next = old_next; 724 | sibling->prev = node; 725 | node->next = sibling; 726 | 727 | // Set new parent. 728 | cmark_node *parent = node->parent; 729 | sibling->parent = parent; 730 | 731 | // Adjust last_child of parent if inserted as last child. 732 | if (parent && !old_next) { 733 | parent->last_child = sibling; 734 | } 735 | 736 | return 1; 737 | } 738 | 739 | int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode) { 740 | if (!cmark_node_insert_before(oldnode, newnode)) { 741 | return 0; 742 | } 743 | cmark_node_unlink(oldnode); 744 | return 1; 745 | } 746 | 747 | int cmark_node_prepend_child(cmark_node *node, cmark_node *child) { 748 | if (!S_can_contain(node, child)) { 749 | return 0; 750 | } 751 | 752 | S_node_unlink(child); 753 | 754 | cmark_node *old_first_child = node->first_child; 755 | 756 | child->next = old_first_child; 757 | child->prev = NULL; 758 | child->parent = node; 759 | node->first_child = child; 760 | 761 | if (old_first_child) { 762 | old_first_child->prev = child; 763 | } else { 764 | // Also set last_child if node previously had no children. 765 | node->last_child = child; 766 | } 767 | 768 | return 1; 769 | } 770 | 771 | int cmark_node_append_child(cmark_node *node, cmark_node *child) { 772 | if (!S_can_contain(node, child)) { 773 | return 0; 774 | } 775 | 776 | S_node_unlink(child); 777 | 778 | cmark_node *old_last_child = node->last_child; 779 | 780 | child->next = NULL; 781 | child->prev = old_last_child; 782 | child->parent = node; 783 | node->last_child = child; 784 | 785 | if (old_last_child) { 786 | old_last_child->next = child; 787 | } else { 788 | // Also set first_child if node previously had no children. 789 | node->first_child = child; 790 | } 791 | 792 | return 1; 793 | } 794 | 795 | static void S_print_error(FILE *out, cmark_node *node, const char *elem) { 796 | if (out == NULL) { 797 | return; 798 | } 799 | fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem, 800 | cmark_node_get_type_string(node), node->start_line, 801 | node->start_column); 802 | } 803 | 804 | int cmark_node_check(cmark_node *node, FILE *out) { 805 | cmark_node *cur; 806 | int errors = 0; 807 | 808 | if (!node) { 809 | return 0; 810 | } 811 | 812 | cur = node; 813 | for (;;) { 814 | if (cur->first_child) { 815 | if (cur->first_child->prev != NULL) { 816 | S_print_error(out, cur->first_child, "prev"); 817 | cur->first_child->prev = NULL; 818 | ++errors; 819 | } 820 | if (cur->first_child->parent != cur) { 821 | S_print_error(out, cur->first_child, "parent"); 822 | cur->first_child->parent = cur; 823 | ++errors; 824 | } 825 | cur = cur->first_child; 826 | continue; 827 | } 828 | 829 | next_sibling: 830 | if (cur == node) { 831 | break; 832 | } 833 | if (cur->next) { 834 | if (cur->next->prev != cur) { 835 | S_print_error(out, cur->next, "prev"); 836 | cur->next->prev = cur; 837 | ++errors; 838 | } 839 | if (cur->next->parent != cur->parent) { 840 | S_print_error(out, cur->next, "parent"); 841 | cur->next->parent = cur->parent; 842 | ++errors; 843 | } 844 | cur = cur->next; 845 | continue; 846 | } 847 | 848 | if (cur->parent->last_child != cur) { 849 | S_print_error(out, cur->parent, "last_child"); 850 | cur->parent->last_child = cur; 851 | ++errors; 852 | } 853 | cur = cur->parent; 854 | goto next_sibling; 855 | } 856 | 857 | return errors; 858 | } 859 | -------------------------------------------------------------------------------- /cmark.h: -------------------------------------------------------------------------------- 1 | #ifndef CMARK_H 2 | #define CMARK_H 3 | 4 | #include 5 | #include "cmark_export.h" 6 | #include "cmark_version.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | /** # NAME 13 | * 14 | * **cmark** - CommonMark parsing, manipulating, and rendering 15 | */ 16 | 17 | /** # DESCRIPTION 18 | * 19 | * ## Simple Interface 20 | */ 21 | 22 | /** Convert 'text' (assumed to be a UTF-8 encoded string with length 23 | * 'len') from CommonMark Markdown to HTML, returning a null-terminated, 24 | * UTF-8-encoded string. It is the caller's responsibility 25 | * to free the returned buffer. 26 | */ 27 | CMARK_EXPORT 28 | char *cmark_markdown_to_html(const char *text, size_t len, int options); 29 | 30 | /** ## Node Structure 31 | */ 32 | 33 | typedef enum { 34 | /* Error status */ 35 | CMARK_NODE_NONE, 36 | 37 | /* Block */ 38 | CMARK_NODE_DOCUMENT, 39 | CMARK_NODE_BLOCK_QUOTE, 40 | CMARK_NODE_LIST, 41 | CMARK_NODE_ITEM, 42 | CMARK_NODE_CODE_BLOCK, 43 | CMARK_NODE_HTML_BLOCK, 44 | CMARK_NODE_CUSTOM_BLOCK, 45 | CMARK_NODE_PARAGRAPH, 46 | CMARK_NODE_HEADING, 47 | CMARK_NODE_THEMATIC_BREAK, 48 | 49 | CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, 50 | CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK, 51 | 52 | /* Inline */ 53 | CMARK_NODE_TEXT, 54 | CMARK_NODE_SOFTBREAK, 55 | CMARK_NODE_LINEBREAK, 56 | CMARK_NODE_CODE, 57 | CMARK_NODE_HTML_INLINE, 58 | CMARK_NODE_CUSTOM_INLINE, 59 | CMARK_NODE_EMPH, 60 | CMARK_NODE_STRONG, 61 | CMARK_NODE_LINK, 62 | CMARK_NODE_IMAGE, 63 | 64 | CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT, 65 | CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE, 66 | } cmark_node_type; 67 | 68 | /* For backwards compatibility: */ 69 | #define CMARK_NODE_HEADER CMARK_NODE_HEADING 70 | #define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK 71 | #define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK 72 | #define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE 73 | 74 | typedef enum { 75 | CMARK_NO_LIST, 76 | CMARK_BULLET_LIST, 77 | CMARK_ORDERED_LIST 78 | } cmark_list_type; 79 | 80 | typedef enum { 81 | CMARK_NO_DELIM, 82 | CMARK_PERIOD_DELIM, 83 | CMARK_PAREN_DELIM 84 | } cmark_delim_type; 85 | 86 | typedef struct cmark_node cmark_node; 87 | typedef struct cmark_parser cmark_parser; 88 | typedef struct cmark_iter cmark_iter; 89 | 90 | /** 91 | * ## Custom memory allocator support 92 | */ 93 | 94 | /** Defines the memory allocation functions to be used by CMark 95 | * when parsing and allocating a document tree 96 | */ 97 | typedef struct cmark_mem { 98 | void *(*calloc)(size_t, size_t); 99 | void *(*realloc)(void *, size_t); 100 | void (*free)(void *); 101 | } cmark_mem; 102 | 103 | /** 104 | * ## Creating and Destroying Nodes 105 | */ 106 | 107 | /** Creates a new node of type 'type'. Note that the node may have 108 | * other required properties, which it is the caller's responsibility 109 | * to assign. 110 | */ 111 | CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type); 112 | 113 | /** Same as `cmark_node_new`, but explicitly listing the memory 114 | * allocator used to allocate the node. Note: be sure to use the same 115 | * allocator for every node in a tree, or bad things can happen. 116 | */ 117 | CMARK_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type, 118 | cmark_mem *mem); 119 | 120 | /** Frees the memory allocated for a node and any children. 121 | */ 122 | CMARK_EXPORT void cmark_node_free(cmark_node *node); 123 | 124 | /** 125 | * ## Tree Traversal 126 | */ 127 | 128 | /** Returns the next node in the sequence after 'node', or NULL if 129 | * there is none. 130 | */ 131 | CMARK_EXPORT cmark_node *cmark_node_next(cmark_node *node); 132 | 133 | /** Returns the previous node in the sequence after 'node', or NULL if 134 | * there is none. 135 | */ 136 | CMARK_EXPORT cmark_node *cmark_node_previous(cmark_node *node); 137 | 138 | /** Returns the parent of 'node', or NULL if there is none. 139 | */ 140 | CMARK_EXPORT cmark_node *cmark_node_parent(cmark_node *node); 141 | 142 | /** Returns the first child of 'node', or NULL if 'node' has no children. 143 | */ 144 | CMARK_EXPORT cmark_node *cmark_node_first_child(cmark_node *node); 145 | 146 | /** Returns the last child of 'node', or NULL if 'node' has no children. 147 | */ 148 | CMARK_EXPORT cmark_node *cmark_node_last_child(cmark_node *node); 149 | 150 | /** 151 | * ## Iterator 152 | * 153 | * An iterator will walk through a tree of nodes, starting from a root 154 | * node, returning one node at a time, together with information about 155 | * whether the node is being entered or exited. The iterator will 156 | * first descend to a child node, if there is one. When there is no 157 | * child, the iterator will go to the next sibling. When there is no 158 | * next sibling, the iterator will return to the parent (but with 159 | * a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will 160 | * return `CMARK_EVENT_DONE` when it reaches the root node again. 161 | * One natural application is an HTML renderer, where an `ENTER` event 162 | * outputs an open tag and an `EXIT` event outputs a close tag. 163 | * An iterator might also be used to transform an AST in some systematic 164 | * way, for example, turning all level-3 headings into regular paragraphs. 165 | * 166 | * void 167 | * usage_example(cmark_node *root) { 168 | * cmark_event_type ev_type; 169 | * cmark_iter *iter = cmark_iter_new(root); 170 | * 171 | * while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 172 | * cmark_node *cur = cmark_iter_get_node(iter); 173 | * // Do something with `cur` and `ev_type` 174 | * } 175 | * 176 | * cmark_iter_free(iter); 177 | * } 178 | * 179 | * Iterators will never return `EXIT` events for leaf nodes, which are nodes 180 | * of type: 181 | * 182 | * * CMARK_NODE_HTML_BLOCK 183 | * * CMARK_NODE_THEMATIC_BREAK 184 | * * CMARK_NODE_CODE_BLOCK 185 | * * CMARK_NODE_TEXT 186 | * * CMARK_NODE_SOFTBREAK 187 | * * CMARK_NODE_LINEBREAK 188 | * * CMARK_NODE_CODE 189 | * * CMARK_NODE_HTML_INLINE 190 | * 191 | * Nodes must only be modified after an `EXIT` event, or an `ENTER` event for 192 | * leaf nodes. 193 | */ 194 | 195 | typedef enum { 196 | CMARK_EVENT_NONE, 197 | CMARK_EVENT_DONE, 198 | CMARK_EVENT_ENTER, 199 | CMARK_EVENT_EXIT 200 | } cmark_event_type; 201 | 202 | /** Creates a new iterator starting at 'root'. The current node and event 203 | * type are undefined until 'cmark_iter_next' is called for the first time. 204 | * The memory allocated for the iterator should be released using 205 | * 'cmark_iter_free' when it is no longer needed. 206 | */ 207 | CMARK_EXPORT 208 | cmark_iter *cmark_iter_new(cmark_node *root); 209 | 210 | /** Frees the memory allocated for an iterator. 211 | */ 212 | CMARK_EXPORT 213 | void cmark_iter_free(cmark_iter *iter); 214 | 215 | /** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`, 216 | * `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`). 217 | */ 218 | CMARK_EXPORT 219 | cmark_event_type cmark_iter_next(cmark_iter *iter); 220 | 221 | /** Returns the current node. 222 | */ 223 | CMARK_EXPORT 224 | cmark_node *cmark_iter_get_node(cmark_iter *iter); 225 | 226 | /** Returns the current event type. 227 | */ 228 | CMARK_EXPORT 229 | cmark_event_type cmark_iter_get_event_type(cmark_iter *iter); 230 | 231 | /** Returns the root node. 232 | */ 233 | CMARK_EXPORT 234 | cmark_node *cmark_iter_get_root(cmark_iter *iter); 235 | 236 | /** Resets the iterator so that the current node is 'current' and 237 | * the event type is 'event_type'. The new current node must be a 238 | * descendant of the root node or the root node itself. 239 | */ 240 | CMARK_EXPORT 241 | void cmark_iter_reset(cmark_iter *iter, cmark_node *current, 242 | cmark_event_type event_type); 243 | 244 | /** 245 | * ## Accessors 246 | */ 247 | 248 | /** Returns the user data of 'node'. 249 | */ 250 | CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node); 251 | 252 | /** Sets arbitrary user data for 'node'. Returns 1 on success, 253 | * 0 on failure. 254 | */ 255 | CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data); 256 | 257 | /** Returns the type of 'node', or `CMARK_NODE_NONE` on error. 258 | */ 259 | CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); 260 | 261 | /** Like 'cmark_node_get_type', but returns a string representation 262 | of the type, or `""`. 263 | */ 264 | CMARK_EXPORT 265 | const char *cmark_node_get_type_string(cmark_node *node); 266 | 267 | /** Returns the string contents of 'node', or an empty 268 | string if none is set. Returns NULL if called on a 269 | node that does not have string content. 270 | */ 271 | CMARK_EXPORT const char *cmark_node_get_literal(cmark_node *node); 272 | 273 | /** Sets the string contents of 'node'. Returns 1 on success, 274 | * 0 on failure. 275 | */ 276 | CMARK_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content); 277 | 278 | /** Returns the heading level of 'node', or 0 if 'node' is not a heading. 279 | */ 280 | CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node); 281 | 282 | /* For backwards compatibility */ 283 | #define cmark_node_get_header_level cmark_node_get_heading_level 284 | #define cmark_node_set_header_level cmark_node_set_heading_level 285 | 286 | /** Sets the heading level of 'node', returning 1 on success and 0 on error. 287 | */ 288 | CMARK_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level); 289 | 290 | /** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node' 291 | * is not a list. 292 | */ 293 | CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node); 294 | 295 | /** Sets the list type of 'node', returning 1 on success and 0 on error. 296 | */ 297 | CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node, 298 | cmark_list_type type); 299 | 300 | /** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node' 301 | * is not a list. 302 | */ 303 | CMARK_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node); 304 | 305 | /** Sets the list delimiter type of 'node', returning 1 on success and 0 306 | * on error. 307 | */ 308 | CMARK_EXPORT int cmark_node_set_list_delim(cmark_node *node, 309 | cmark_delim_type delim); 310 | 311 | /** Returns starting number of 'node', if it is an ordered list, otherwise 0. 312 | */ 313 | CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node); 314 | 315 | /** Sets starting number of 'node', if it is an ordered list. Returns 1 316 | * on success, 0 on failure. 317 | */ 318 | CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start); 319 | 320 | /** Returns 1 if 'node' is a tight list, 0 otherwise. 321 | */ 322 | CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node); 323 | 324 | /** Sets the "tightness" of a list. Returns 1 on success, 0 on failure. 325 | */ 326 | CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight); 327 | 328 | /** Returns the info string from a fenced code block. 329 | */ 330 | CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); 331 | 332 | /** Sets the info string in a fenced code block, returning 1 on 333 | * success and 0 on failure. 334 | */ 335 | CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); 336 | 337 | /** Returns the URL of a link or image 'node', or an empty string 338 | if no URL is set. Returns NULL if called on a node that is 339 | not a link or image. 340 | */ 341 | CMARK_EXPORT const char *cmark_node_get_url(cmark_node *node); 342 | 343 | /** Sets the URL of a link or image 'node'. Returns 1 on success, 344 | * 0 on failure. 345 | */ 346 | CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url); 347 | 348 | /** Returns the title of a link or image 'node', or an empty 349 | string if no title is set. Returns NULL if called on a node 350 | that is not a link or image. 351 | */ 352 | CMARK_EXPORT const char *cmark_node_get_title(cmark_node *node); 353 | 354 | /** Sets the title of a link or image 'node'. Returns 1 on success, 355 | * 0 on failure. 356 | */ 357 | CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title); 358 | 359 | /** Returns the literal "on enter" text for a custom 'node', or 360 | an empty string if no on_enter is set. Returns NULL if called 361 | on a non-custom node. 362 | */ 363 | CMARK_EXPORT const char *cmark_node_get_on_enter(cmark_node *node); 364 | 365 | /** Sets the literal text to render "on enter" for a custom 'node'. 366 | Any children of the node will be rendered after this text. 367 | Returns 1 on success 0 on failure. 368 | */ 369 | CMARK_EXPORT int cmark_node_set_on_enter(cmark_node *node, 370 | const char *on_enter); 371 | 372 | /** Returns the literal "on exit" text for a custom 'node', or 373 | an empty string if no on_exit is set. Returns NULL if 374 | called on a non-custom node. 375 | */ 376 | CMARK_EXPORT const char *cmark_node_get_on_exit(cmark_node *node); 377 | 378 | /** Sets the literal text to render "on exit" for a custom 'node'. 379 | Any children of the node will be rendered before this text. 380 | Returns 1 on success 0 on failure. 381 | */ 382 | CMARK_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit); 383 | 384 | /** Returns the line on which 'node' begins. 385 | */ 386 | CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node); 387 | 388 | /** Returns the column at which 'node' begins. 389 | */ 390 | CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node); 391 | 392 | /** Returns the line on which 'node' ends. 393 | */ 394 | CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node); 395 | 396 | /** Returns the column at which 'node' ends. 397 | */ 398 | CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node); 399 | 400 | /** 401 | * ## Tree Manipulation 402 | */ 403 | 404 | /** Unlinks a 'node', removing it from the tree, but not freeing its 405 | * memory. (Use 'cmark_node_free' for that.) 406 | */ 407 | CMARK_EXPORT void cmark_node_unlink(cmark_node *node); 408 | 409 | /** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure. 410 | */ 411 | CMARK_EXPORT int cmark_node_insert_before(cmark_node *node, 412 | cmark_node *sibling); 413 | 414 | /** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure. 415 | */ 416 | CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling); 417 | 418 | /** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does 419 | * not free its memory). 420 | * Returns 1 on success, 0 on failure. 421 | */ 422 | CMARK_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode); 423 | 424 | /** Adds 'child' to the beginning of the children of 'node'. 425 | * Returns 1 on success, 0 on failure. 426 | */ 427 | CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child); 428 | 429 | /** Adds 'child' to the end of the children of 'node'. 430 | * Returns 1 on success, 0 on failure. 431 | */ 432 | CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); 433 | 434 | /** Consolidates adjacent text nodes. 435 | */ 436 | CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root); 437 | 438 | /** 439 | * ## Parsing 440 | * 441 | * Simple interface: 442 | * 443 | * cmark_node *document = cmark_parse_document("Hello *world*", 13, 444 | * CMARK_OPT_DEFAULT); 445 | * 446 | * Streaming interface: 447 | * 448 | * cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); 449 | * FILE *fp = fopen("myfile.md", "rb"); 450 | * while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { 451 | * cmark_parser_feed(parser, buffer, bytes); 452 | * if (bytes < sizeof(buffer)) { 453 | * break; 454 | * } 455 | * } 456 | * document = cmark_parser_finish(parser); 457 | * cmark_parser_free(parser); 458 | */ 459 | 460 | /** Creates a new parser object. 461 | */ 462 | CMARK_EXPORT 463 | cmark_parser *cmark_parser_new(int options); 464 | 465 | /** Creates a new parser object with the given memory allocator 466 | */ 467 | CMARK_EXPORT 468 | cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem); 469 | 470 | /** Frees memory allocated for a parser object. 471 | */ 472 | CMARK_EXPORT 473 | void cmark_parser_free(cmark_parser *parser); 474 | 475 | /** Feeds a string of length 'len' to 'parser'. 476 | */ 477 | CMARK_EXPORT 478 | void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); 479 | 480 | /** Finish parsing and return a pointer to a tree of nodes. 481 | */ 482 | CMARK_EXPORT 483 | cmark_node *cmark_parser_finish(cmark_parser *parser); 484 | 485 | /** Parse a CommonMark document in 'buffer' of length 'len'. 486 | * Returns a pointer to a tree of nodes. The memory allocated for 487 | * the node tree should be released using 'cmark_node_free' 488 | * when it is no longer needed. 489 | */ 490 | CMARK_EXPORT 491 | cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); 492 | 493 | /** Parse a CommonMark document in file 'f', returning a pointer to 494 | * a tree of nodes. The memory allocated for the node tree should be 495 | * released using 'cmark_node_free' when it is no longer needed. 496 | */ 497 | CMARK_EXPORT 498 | cmark_node *cmark_parse_file(FILE *f, int options); 499 | 500 | /** 501 | * ## Rendering 502 | */ 503 | 504 | /** Render a 'node' tree as XML. It is the caller's responsibility 505 | * to free the returned buffer. 506 | */ 507 | CMARK_EXPORT 508 | char *cmark_render_xml(cmark_node *root, int options); 509 | 510 | /** Render a 'node' tree as an HTML fragment. It is up to the user 511 | * to add an appropriate header and footer. It is the caller's 512 | * responsibility to free the returned buffer. 513 | */ 514 | CMARK_EXPORT 515 | char *cmark_render_html(cmark_node *root, int options); 516 | 517 | /** Render a 'node' tree as a groff man page, without the header. 518 | * It is the caller's responsibility to free the returned buffer. 519 | */ 520 | CMARK_EXPORT 521 | char *cmark_render_man(cmark_node *root, int options, int width); 522 | 523 | /** Render a 'node' tree as a commonmark document. 524 | * It is the caller's responsibility to free the returned buffer. 525 | */ 526 | CMARK_EXPORT 527 | char *cmark_render_commonmark(cmark_node *root, int options, int width); 528 | 529 | /** Render a 'node' tree as a LaTeX document. 530 | * It is the caller's responsibility to free the returned buffer. 531 | */ 532 | CMARK_EXPORT 533 | char *cmark_render_latex(cmark_node *root, int options, int width); 534 | 535 | /** 536 | * ## Options 537 | */ 538 | 539 | /** Default options. 540 | */ 541 | #define CMARK_OPT_DEFAULT 0 542 | 543 | /** 544 | * ### Options affecting rendering 545 | */ 546 | 547 | /** Include a `data-sourcepos` attribute on all block elements. 548 | */ 549 | #define CMARK_OPT_SOURCEPOS (1 << 1) 550 | 551 | /** Render `softbreak` elements as hard line breaks. 552 | */ 553 | #define CMARK_OPT_HARDBREAKS (1 << 2) 554 | 555 | /** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`, 556 | * `file:`, and `data:`, except for `image/png`, `image/gif`, 557 | * `image/jpeg`, or `image/webp` mime types). Raw HTML is replaced 558 | * by a placeholder HTML comment. Unsafe links are replaced by 559 | * empty strings. 560 | */ 561 | #define CMARK_OPT_SAFE (1 << 3) 562 | 563 | /** Render `softbreak` elements as spaces. 564 | */ 565 | #define CMARK_OPT_NOBREAKS (1 << 4) 566 | 567 | /** 568 | * ### Options affecting parsing 569 | */ 570 | 571 | /** Legacy option (no effect). 572 | */ 573 | #define CMARK_OPT_NORMALIZE (1 << 8) 574 | 575 | /** Validate UTF-8 in the input before parsing, replacing illegal 576 | * sequences with the replacement character U+FFFD. 577 | */ 578 | #define CMARK_OPT_VALIDATE_UTF8 (1 << 9) 579 | 580 | /** Convert straight quotes to curly, --- to em dashes, -- to en dashes. 581 | */ 582 | #define CMARK_OPT_SMART (1 << 10) 583 | 584 | /** 585 | * ## Version information 586 | */ 587 | 588 | /** The library version as integer for runtime checks. Also available as 589 | * macro CMARK_VERSION for compile time checks. 590 | * 591 | * * Bits 16-23 contain the major version. 592 | * * Bits 8-15 contain the minor version. 593 | * * Bits 0-7 contain the patchlevel. 594 | * 595 | * In hexadecimal format, the number 0x010203 represents version 1.2.3. 596 | */ 597 | CMARK_EXPORT 598 | int cmark_version(void); 599 | 600 | /** The library version string for runtime checks. Also available as 601 | * macro CMARK_VERSION_STRING for compile time checks. 602 | */ 603 | CMARK_EXPORT 604 | const char *cmark_version_string(void); 605 | 606 | /** # AUTHORS 607 | * 608 | * John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. 609 | */ 610 | 611 | #ifndef CMARK_NO_SHORT_NAMES 612 | #define NODE_DOCUMENT CMARK_NODE_DOCUMENT 613 | #define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE 614 | #define NODE_LIST CMARK_NODE_LIST 615 | #define NODE_ITEM CMARK_NODE_ITEM 616 | #define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK 617 | #define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK 618 | #define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK 619 | #define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH 620 | #define NODE_HEADING CMARK_NODE_HEADING 621 | #define NODE_HEADER CMARK_NODE_HEADER 622 | #define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK 623 | #define NODE_HRULE CMARK_NODE_HRULE 624 | #define NODE_TEXT CMARK_NODE_TEXT 625 | #define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK 626 | #define NODE_LINEBREAK CMARK_NODE_LINEBREAK 627 | #define NODE_CODE CMARK_NODE_CODE 628 | #define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE 629 | #define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE 630 | #define NODE_EMPH CMARK_NODE_EMPH 631 | #define NODE_STRONG CMARK_NODE_STRONG 632 | #define NODE_LINK CMARK_NODE_LINK 633 | #define NODE_IMAGE CMARK_NODE_IMAGE 634 | #define BULLET_LIST CMARK_BULLET_LIST 635 | #define ORDERED_LIST CMARK_ORDERED_LIST 636 | #define PERIOD_DELIM CMARK_PERIOD_DELIM 637 | #define PAREN_DELIM CMARK_PAREN_DELIM 638 | #endif 639 | 640 | #ifdef __cplusplus 641 | } 642 | #endif 643 | 644 | #endif 645 | --------------------------------------------------------------------------------