├── .gitignore ├── BSDmakefile ├── GNUmakefile ├── LICENSE ├── README ├── README.md ├── array.c ├── array.h ├── benchmark.c ├── buffer.c ├── buffer.h ├── expanded_markdown.sh ├── make-amal ├── markdown.c ├── markdown.h ├── mkd2html.1 ├── mkd2html.c ├── mkd2latex.1 ├── mkd2latex.c ├── mkd2man.1 ├── mkd2man.c ├── renderers.c ├── renderers.h ├── soldout_array.3 ├── soldout_buffer.3 ├── soldout_markdown.3 └── soldout_renderers.3 /.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.o 3 | *.so 4 | *.so.* 5 | 6 | amalgamation/ 7 | depends/ 8 | mkd2html 9 | mkd2latex 10 | mkd2man 11 | -------------------------------------------------------------------------------- /BSDmakefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | # Copyright (c) 2009, Natacha Porté 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | DEPDIR = depends 18 | ALLDEPS = $(DEPDIR)/all 19 | 20 | AR ?= ar 21 | CC ?= cc 22 | CFLAGS ?= -g -O3 -Wall -Werror 23 | LDFLAGS ?= 24 | 25 | all: libsoldout.a libsoldout.so mkd2html mkd2latex mkd2man 26 | 27 | .PHONY: all amal clean 28 | 29 | 30 | # amalgamation 31 | amal: 32 | @./make-amal 33 | 34 | 35 | # libraries 36 | 37 | libsoldout.a: markdown.o array.o buffer.o renderers.o 38 | $(AR) rs $(.TARGET) $(.ALLSRC) 39 | 40 | libsoldout.so: libsoldout.so.1 41 | ln -s $(.ALLSRC) $(.TARGET) 42 | 43 | libsoldout.so.1: markdown.o array.o buffer.o renderers.o 44 | $(CC) $(LDFLAGS) -shared -Wl,-soname=$(.TARGET) \ 45 | $(.ALLSRC) -o $(.TARGET) 46 | 47 | 48 | # executables 49 | 50 | mkd2html: mkd2html.o libsoldout.so 51 | $(CC) $(LDFLAGS) $(.ALLSRC) -o $(.TARGET) 52 | 53 | mkd2latex: mkd2latex.o libsoldout.so 54 | $(CC) $(LDFLAGS) $(.ALLSRC) -o $(.TARGET) 55 | 56 | mkd2man: mkd2man.o libsoldout.so 57 | $(CC) $(LDFLAGS) $(.ALLSRC) -o $(.TARGET) 58 | 59 | 60 | # Housekeeping 61 | 62 | GNUmakefile: BSDmakefile 63 | @sed -e 's/^\(all:.*\)GNUmakefile /\1/' \ 64 | -e 's/\(rm .*\)GNUmakefile /\1/' \ 65 | -e '/^GNUmakefile:/,/^$$/d' \ 66 | -e 's/\$$(\.ALLSRC)/$$^/g' \ 67 | -e 's/\$$(\.IMPSRC)/$$ $(.TARGET) 74 | 75 | benchmark: benchmark.o libsoldout.so 76 | $(CC) $(LDFLAGS) $(.ALLSRC) -o $(.TARGET) 77 | 78 | clean: 79 | rm -f *.o 80 | rm -f libsoldout.a libsoldout.so libsoldout.so.* 81 | rm -f mkd2html mkd2latex mkd2man benchmark 82 | rm -rf $(DEPDIR) 83 | 84 | 85 | # dependencies 86 | 87 | .sinclude "$(ALLDEPS)" 88 | 89 | 90 | # generic object compilations 91 | 92 | .c.o: 93 | @mkdir -p $(DEPDIR) 94 | @touch $(ALLDEPS) 95 | @$(CC) -MM $(.IMPSRC) > $(DEPDIR)/$(.PREFIX).d 96 | @grep -q "$(.PREFIX).d" $(ALLDEPS) \ 97 | || echo ".include \"$(.PREFIX).d\"" >> $(ALLDEPS) 98 | $(CC) $(CFLAGS) -std=c99 -fPIC -c -o $(.TARGET) $(.IMPSRC) 99 | -------------------------------------------------------------------------------- /GNUmakefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | # Copyright (c) 2009, Natacha Porté 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | DEPDIR = depends 18 | ALLDEPS = $(DEPDIR)/all 19 | 20 | AR ?= ar 21 | CC ?= cc 22 | CFLAGS ?= -g -O3 -Wall -Werror 23 | LDFLAGS ?= 24 | 25 | all: libsoldout.a libsoldout.so mkd2html mkd2latex mkd2man 26 | 27 | .PHONY: all amal clean 28 | 29 | 30 | # amalgamation 31 | amal: 32 | @./make-amal 33 | 34 | 35 | # libraries 36 | 37 | libsoldout.a: markdown.o array.o buffer.o renderers.o 38 | $(AR) rs $@ $^ 39 | 40 | libsoldout.so: libsoldout.so.1 41 | ln -s $^ $@ 42 | 43 | libsoldout.so.1: markdown.o array.o buffer.o renderers.o 44 | $(CC) $(LDFLAGS) -shared -Wl,-soname=$@ \ 45 | $^ -o $@ 46 | 47 | 48 | # executables 49 | 50 | mkd2html: mkd2html.o libsoldout.so 51 | $(CC) $(LDFLAGS) $^ -o $@ 52 | 53 | mkd2latex: mkd2latex.o libsoldout.so 54 | $(CC) $(LDFLAGS) $^ -o $@ 55 | 56 | mkd2man: mkd2man.o libsoldout.so 57 | $(CC) $(LDFLAGS) $^ -o $@ 58 | 59 | 60 | # Housekeeping 61 | 62 | benchmark: benchmark.o libsoldout.so 63 | $(CC) $(LDFLAGS) $^ -o $@ 64 | 65 | clean: 66 | rm -f *.o 67 | rm -f libsoldout.a libsoldout.so libsoldout.so.* 68 | rm -f mkd2html mkd2latex mkd2man benchmark 69 | rm -rf $(DEPDIR) 70 | 71 | 72 | # dependencies 73 | 74 | -include "$(ALLDEPS)" 75 | 76 | 77 | # generic object compilations 78 | 79 | .c.o: 80 | @mkdir -p $(DEPDIR) 81 | @touch $(ALLDEPS) 82 | @$(CC) -MM $< > $(DEPDIR)/$*.d 83 | @grep -q "$*.d" $(ALLDEPS) \ 84 | || echo "include \"$*.d\"" >> $(ALLDEPS) 85 | $(CC) $(CFLAGS) -std=c99 -fPIC -c -o $@ $< 86 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, Natacha Porté 2 | 3 | Permission to use, copy, modify, and distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Libsoldout 2 | ========== 3 | 4 | Overview: 5 | 6 | 1. Introduction 7 | 2. Usage 8 | 3. Internals 9 | 10 | 11 | 12 | Introduction 13 | ------------ 14 | 15 | For some projects of mine, I wanted a lightweight C library that can parse 16 | John Gruber's [markdown](http://daringfireball.net/projects/markdown/) 17 | format into whatever I want, and that is easily extensible. 18 | 19 | The only C implementations of markdown that I know of are [Discount] 20 | (http://www.pell.portland.or.us/~orc/Code/markdown/) and [PEG-markdown] 21 | (http://github.com/jgm/peg-markdown/tree/master). Discount seemed a little 22 | bit too integrated and focused on HTML output for my taste, and 23 | PEG-markdown seemed to have a lot of dependencies and stuff. So I wrote my 24 | own. 25 | 26 | I like to keep things simple, so I wrote a function that performs *only* 27 | markdown parsing: no file reading or writing, no (X)HTML considerations, 28 | etc. The actual output is performed by a set of dedicated callback 29 | functions, called here a renderer. Some example renderers are provided, but 30 | you are free to use your own to output in any format you like. 31 | 32 | This callback mechanism make libsoldout so flexible that it does not need 33 | any flag or external information besides input text and renderer to 34 | operate. 35 | 36 | 37 | 38 | Usage 39 | ----- 40 | 41 | ### Library function call 42 | 43 | The only exported function in libsoldout is `markdown()`: 44 | 45 | void markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndr); 46 | 47 | - `ob` is the output buffer, where the renderer will append data, 48 | - `ib` is the input buffer, where the markdown text should be stored prior 49 | to the `markdown()` call, 50 | - `rndr` is a pointer to the renderer structure. 51 | 52 | How to use these structures is explained in the following sections. 53 | 54 | 55 | ### Buffers: struct buf 56 | 57 | I use `struct buf` extensively in input and output buffers. The initial 58 | idea was constructing a Pascal-string like structure, to be able to store 59 | both text and binary data. Hence the members `data`, a char pointer to the 60 | buffer data, and `size` containing the data length. 61 | 62 | When using a `struct buf` as an output buffer, it is useful to pre-allocate 63 | the memory area before filling it, so I added an `asize` member containing 64 | the allocated size of the memory pointed by `data`. 65 | 66 | When accumulating data in a growing memory area, there is a trade-off 67 | between memory usage and speed: the more bytes are added each time, the 68 | less `realloc()` is called, which means potentially less `memcpy()` to a new 69 | zone, so a faster code, but more memory being allocated for nothing. To 70 | set the trade-off on a case-by-case basis, there is a `unit` member in the 71 | structure: when more memory is needed, `asize` is augmented by a multiple 72 | of `unit`. So the larger `unit`, the more memory is allocated at once, the 73 | less `realloc()` is called. 74 | 75 | To further improve code efficiency by removing unneeded memcpy, I added a 76 | reference count to the structure: the `ref` member. 77 | 78 | Buffers are created using `bufnew()` whose only argument is the value for 79 | `unit`. `bufrelease()` decreases the reference count of a buffer, and frees 80 | it when this count is zero. `bufset()` is used to set a `struct buf` 81 | pointer to point to the given buffer, increasing reference count and 82 | dealing with special cases like volatile buffers. 83 | 84 | Usually data from `struct buf` are read through direct access of its 85 | members `data` and `size`. One interesting trick which might not be widely 86 | known is how to printf a buffer (or any kind of non-zero-terminated 87 | string) that doesn't contains any zero, using the `%.*s`. For example: 88 | 89 | printf("Buffer string: \"%.*s\"\n", (int)buf->size, buf->data); 90 | 91 | In case you really need a zero-terminated string, you can call 92 | `bufnullterm()` which appends a zero character without changing `size`, 93 | hence the buffer being virtually the same (and will no longer be 94 | zero-terminated after the following data append) but `data` can be used as 95 | a regular C string. 96 | 97 | The most common functions to append data into buffers are: 98 | 99 | - `bufprintf()` which behaves like any \*printf function, 100 | - `bufput()` which is similar to `memcpy()`, 101 | - `bufputs()` which appends a zero-terminated string to a buffer, 102 | - `BUFPUTSL()` which is a macro to replace `bufputs()` when using string 103 | literals, because then the data size is known at compile-time, this 104 | saves a call to `strlen()`, 105 | - `bufputc()` for single-character appends. 106 | 107 | Modification of existing data in a buffer is also performed through direct 108 | access of structure members. 109 | 110 | This covers the basics to handle my `struct buf`, but there might still be 111 | some interesting stuff to be learned from the header. 112 | 113 | 114 | ### Renderer: struct mkd_renderer 115 | 116 | Libsoldout only performs the parsing of markdown input, the construction of 117 | the output is left to a *renderer*, which is a set of callback functions 118 | called when markdown elements are encountered. Pointers to these functions 119 | are gathered into a `struct mkd_renderer` along with some renderer-related 120 | data. I think the struct declaration is pretty obvious: 121 | 122 | struct mkd_renderer { 123 | /* document level callbacks */ 124 | void (*prolog)(struct buf *ob, void *opaque); 125 | void (*epilog)(struct buf *ob, void *opaque); 126 | 127 | /* block level callbacks - NULL skips the block */ 128 | void (*blockcode)(struct buf *ob, struct buf *text, void *opaque); 129 | void (*blockquote)(struct buf *ob, struct buf *text, void *opaque); 130 | void (*blockhtml)(struct buf *ob, struct buf *text, void *opaque); 131 | void (*header)(struct buf *ob, struct buf *text, 132 | int level, void *opaque); 133 | void (*hrule)(struct buf *ob, void *opaque); 134 | void (*list)(struct buf *ob, struct buf *text, int flags, void *opaque); 135 | void (*listitem)(struct buf *ob, struct buf *text, 136 | int flags, void *opaque); 137 | void (*paragraph)(struct buf *ob, struct buf *text, void *opaque); 138 | void (*table)(struct buf *ob, struct buf *head_row, struct buf *rows, 139 | void *opaque); 140 | void (*table_cell)(struct buf *ob, struct buf *text, int flags, 141 | void *opaque); 142 | void (*table_row)(struct buf *ob, struct buf *cells, int flags, 143 | void *opaque); 144 | 145 | /* span level callbacks - NULL or return 0 prints the span verbatim */ 146 | int (*autolink)(struct buf *ob, struct buf *link, 147 | enum mkd_autolink type, void *opaque); 148 | int (*codespan)(struct buf *ob, struct buf *text, void *opaque); 149 | int (*double_emphasis)(struct buf *ob, struct buf *text, 150 | char c, void *opaque); 151 | int (*emphasis)(struct buf *ob, struct buf *text, char c,void*opaque); 152 | int (*image)(struct buf *ob, struct buf *link, struct buf *title, 153 | struct buf *alt, void *opaque); 154 | int (*linebreak)(struct buf *ob, void *opaque); 155 | int (*link)(struct buf *ob, struct buf *link, struct buf *title, 156 | struct buf *content, void *opaque); 157 | int (*raw_html_tag)(struct buf *ob, struct buf *tag, void *opaque); 158 | int (*triple_emphasis)(struct buf *ob, struct buf *text, 159 | char c, void *opaque); 160 | 161 | /* low level callbacks - NULL copies input directly into the output */ 162 | void (*entity)(struct buf *ob, struct buf *entity, void *opaque); 163 | void (*normal_text)(struct buf *ob, struct buf *text, void *opaque); 164 | 165 | /* renderer data */ 166 | int max_work_stack; /* prevent arbitrary deep recursion */ 167 | const char *emph_chars; /* chars that trigger emphasis rendering */ 168 | void *opaque; /* opaque data send to every rendering callback */ 169 | }; 170 | 171 | The first argument of a renderer function is always the output buffer, 172 | where the function is supposed to write its output. It's not necessarily 173 | related to the output buffer given to `markdown()` because in some cases 174 | render into a temporary buffer is needed. 175 | 176 | The last argument of a renderer function is always an opaque pointer, which 177 | is equal to the `opaque` member of `struct mkd_renderer`. The name 178 | "opaque" might not be well-chosen, but it means a pointer *opaque for the 179 | parser, **not** for the renderer*. It means that my parser passes around 180 | blindly the pointer which contains data you know about, in case you need to 181 | store an internal state or whatever. I have not found anything to put in 182 | this pointer in my example renderers, so it is set to NULL in the structure 183 | and the callbacks don't use it. 184 | 185 | `emph_chars` is a zero-terminated string which contains the set of 186 | characters that trigger emphasis. In regular markdown, emphasis is only 187 | triggered by '\_' and '\*', but in some extensions it might be useful to 188 | add other characters to this list. For example in my extension to handle 189 | `` and `` spans, delimited respectively by "++" and "--", I have 190 | added '+' and '-' to `emph_chars`. The character that triggered the 191 | emphasis is then passed to `emphasis`, `double_emphasis` and 192 | `triple_emphasis` through the parameter `c`. 193 | 194 | Function pointers in `struct mkd_renderer` can be NULL, but it has a 195 | different meaning whether the callback is block-level or span-level. A null 196 | block-level callback will make the corresponding block disappear from the 197 | output, as if the callback was an empty function. A null span-level 198 | callback will cause the corresponding element to be treated as normal 199 | characters, copied verbatim to the output. 200 | 201 | So for example, to disable link and images (e.g. because you consider them 202 | as dangerous), just put a null pointer in `rndr.link` and `rndr.image` and 203 | the bracketed stuff will be present as-is in the output. While a null 204 | pointer in `header` will remove all header-looking blocks. If you want an 205 | otherwise standard markdown-to-XHTML conversion, you can take the example 206 | `mkd_xhtml` struct, copy it into your own `struct mkd_renderer` and then 207 | assign NULL to `link` and `image` members. 208 | 209 | Moreover, span-level callbacks return an integer, which tells whether the 210 | renderer accepts to render the item (non-zero return value) or whether it 211 | should be copied verbatim (zero return value). This allows you to only 212 | accept some specific inputs. For example, my extension for `` and 213 | `` spans asks *exactly* two '-' or '+' as delimiters, when `emphasis` 214 | and `triple_emphasis` are called with '-' or '+', they return 0. 215 | 216 | Special care should be taken when writing `autolink`, `link` and `image` 217 | callbacks, because the arguments `link`, `title` and `alt` are unsanitized 218 | data taken directly from the input file. It is up to the renderer to escape 219 | whatever needs escaping to prevent bad things from happening. To help you 220 | writing renderers, the function `lus_attr_escape()` escapes all problematic 221 | characters in (X)HTML: `'<'`, `'>'`, `'&'` and `'"'`. 222 | 223 | The `normal_text` callback should also perform whatever escape is needed to 224 | have the output looking like the input data. 225 | 226 | 227 | ### libsoldout extension: PHP-Markdown-like tables 228 | 229 | Tables are one of the few extensions that are quite difficult and/or hacky 230 | to implement using vanilla Markdown parser and a renderer. Thus a support 231 | has been introduced into the parser, using dedicated callbacks: 232 | 233 | - `table_cell`, which is called with the span-level contents of the cell; 234 | - `table_row`, which is called with data returned by `table_cell`; 235 | - `table`, which called with data returned by `table_row`. 236 | 237 | The input format to describe tables is taken from PHP-Markdown, and looks 238 | like this: 239 | 240 | header 1 | header 2 | header 3 | header 4 241 | ------------|:-------------:|--------------:|:-------------- 242 | first line | centered | right-aligned | left-aligned 243 | second line | centered |: centered :| left-aligned 244 | third line |: left-aligned | right-aligned | right-aligned : 245 | column-separator | don't need | to be | aligned in the source 246 | | extra spectators | are allowed | at both ends | of the line | 247 | | correct number of cell per row is not enforced | 248 | | pipe characters can be embedded in cell text by escaping it: \| | 249 | 250 | Each row of the input text is a single row in the output, except the header 251 | rule, which is purely syntactic. 252 | 253 | Each cell in a row is delimited by a pipe (`|`) character. Optionally, a 254 | pipe character can also be present at the beginning and/or at the end of 255 | the line. Column separator don't have to be aligned in the input, but it 256 | makes the input more readable. 257 | 258 | There is no check of "squareness" of the table: `table_cell` is called once 259 | for each cell provided in the input, which can be a number of times 260 | different from one row to the other. If the output *has* to respect a given 261 | number of cell per row, it's up to the renderer to enforce it, using state 262 | transmitted through the `opaque` pointer. 263 | 264 | The header rule is a line containing only horizontal blanks (space and 265 | tab), dashes (`-`), colons (`:`) and separator. Moreover, it *must* be the 266 | second line of the table. In case such a header rule is detected, the first 267 | line of the table is considered as a header, and passed as the `head_row` 268 | argument to `table` callback. Moreover `table_row` and `table_cell` are 269 | called for that specific row with `MKD_CELL_HEAD` flag. 270 | 271 | Alignment is defined on a per-cell basis, and specified by a colon (`:`) at 272 | the very beginning of the input span (i.e. directly after the `|` 273 | separator, or as the first character on the line) and/or at the very end of 274 | it (i.e. directly before the separator, or as the last character on the 275 | line). A cell with such a leading colon only is left-aligned 276 | (`MKD_CELL_ALIGN_LEFT`), one with a trailing colon only is right-aligned 277 | (`MKD_CELL_ALIGN_RIGHT`), and one with both is centered 278 | (`MKD_CELL_ALIGN_CENTER`). 279 | 280 | A column-wise default alignment can be specified with the same syntax on 281 | the header rule. 282 | 283 | 284 | ### Renderer examples 285 | 286 | While libsoldout is designed to perform only the parsing of markdown files, 287 | and to let you provide the renderer callbacks, a few renderers have been 288 | included, both to illustrate how to write a set of renderer functions and 289 | to allow anybody who do not need special extensions to use libsoldout 290 | without hassle. 291 | 292 | All the examples provided here come with two flavors, `_html` producing 293 | HTML code (self-closing tags are rendered like this: `
`), and `_xhtml` 294 | producing XHTML code (self-closing tags like `
`). 295 | 296 | #### Standard markdown renderer 297 | 298 | `mkd_html` and `mkd_xhtml` implement standard Markdown to (X)HTML 299 | translation without any extension. 300 | 301 | #### Discount-ish renderer 302 | 303 | `discount_html` and `discount_xhtml` implement on top of the standard 304 | markdown *some* of the extensions found in Discount. 305 | 306 | Actually, all Discount extensions that are not provided here cannot be 307 | easily implemented in libsoldout without touching to the parsing code, 308 | hence they do not belong strictly to the renderer realm. However some 309 | (maybe all, not sure about tables) extensions can be implemented fairly 310 | easily with libsoldout by using both a dedicated renderer and some 311 | preprocessing to make the extension look like something closer to the 312 | original markdown syntax. 313 | 314 | Here is a list of all extensions included in these renderers: 315 | 316 | - image size specification, by appending " =(width)x(height)" to the link, 317 | - pseudo-protocols in links: 318 | * abbr:_description_ for `...` 319 | * class:_name_ for `...` 320 | * id:_name_ for `...` 321 | * raw:_text_ for verbatim unprocessed _text_ inclusion 322 | - class blocks: blockquotes beginning with %_class_% will be rendered as a 323 | `div` of the given class(es). 324 | 325 | #### Natasha's own extensions 326 | 327 | `nat_html` and `nat_xhtml` implement on top of Discount extensions some 328 | things that I need to convert losslessly my existing HTML into extended 329 | markdown. 330 | 331 | Here is a list of these extensions : 332 | 333 | - id attribute for headers, using the syntax _id_#_Header text_ 334 | - class attribute for paragraphs, by putting class name(s) between 335 | parenthesis at the very beginning of the paragraph 336 | - `` and `` spans, using respectively `++` and `--` as 337 | delimiters (with emphasis-like restrictions, i.e. an opening delimiter 338 | cannot be followed by a whitespace, and a closing delimiter cannot be 339 | preceded by a whitespace). 340 | - plain `` without attribute, using emphasis-like delimiter `|` 341 | 342 | Follows an example use of all of them: 343 | 344 | ###atx_id#ID was chosen to look nice in atx-style headers ### 345 | 346 | setext_id#Though it will also work in setext-style headers 347 | ---------------------------------------------------------- 348 | 349 | Here is a paragraph with --deleted-- and ++inserted++ text. 350 | 351 | I use CSS rules to render poetry and other verses, using a plain 352 | `` for each verse, and enclosing each group of verses in 353 | a `

`. Here is how it would look like: 354 | 355 | (verse)|And on the pedestal these words appear:| 356 | |"My name is Ozymandias, king of kings:| 357 | |Look on my works, ye Mighty, and despair!"| 358 | 359 | 360 | Internals 361 | --------- 362 | 363 | Here I explain the structure of `markdown.c`, and how this parser works. I 364 | use a logical order, which is roughly chronological, which means going 365 | roughly from the bottom of the file to the top. 366 | 367 | 368 | ### markdown() 369 | 370 | The markdown function is divided into four parts: setup of the `struct 371 | render`, first pass on the input, actual parsing, and clean-up. 372 | 373 | #### render structure 374 | 375 | A `struct render` is passed around most of the functions, and it contains 376 | every information specific about the render. 377 | 378 | `make` is a copy of the `struct mkd_renderer` given to `markdown()`. The 379 | rendering callbacks are actually called from there. 380 | 381 | `refs` is a dynamic sorted array of link references (`struct link_ref`). It 382 | is filled from the input file during the first pass. A link reference is a 383 | structure of three buffers, `id`, `link` and `title`, whose functions are 384 | straightforward. 385 | 386 | `work` is a dynamic array of working buffers. Short-lived working buffers are 387 | needed throughout the parser, and doing a lot of `malloc()` and `free()` is 388 | quite inefficient. Instead, when a working buffer is allocated, it is kept 389 | in this array to be reused next time a working buffer is needed. 390 | 391 | `active_char` is a C array of function pointers, used for span-level 392 | parsing: a null pointer is affecter to all inactive characters, and a 393 | specialized callback is stored for active characters. This initialization 394 | is the bulk of the first part, because characters should only be marked 395 | active when the rendering callback pointer is non-null. 396 | 397 | #### First pass on the input 398 | 399 | During the first pass on the input, newlines are normalized and reference 400 | lines taken out of the input, and stored into `rndr.refs`. 401 | 402 | It makes use of the helper function `is_ref()`, which parses the given 403 | line, checking whether it matches the reference syntax. Offsets of the 404 | reference components are kept while progressing in the line, and on the 405 | first syntax error 0 is returned and the line is considered as an input 406 | line. 407 | 408 | When all the tests are passed, a new `struct link_ref` is created and 409 | sorted into `rndr.refs`. 410 | 411 | #### Second pass 412 | 413 | `markdown()` does not do much here, the result of the first pass is fed to 414 | `parse_block()` which fills the output buffer `ob`. 415 | 416 | #### Clean-up 417 | 418 | References allocated during the first pass, and working buffers allocated 419 | during the second pass are freed there, before returning. 420 | 421 | 422 | ### Block-level parsing 423 | 424 | The core of block-level parsing is the function `parse_block()`, which 425 | runs over the whole input (on the first call, the input is the output on 426 | the first pass, but `parse_block()` can be called recursively for blocks 427 | inside blocks, e.g. for blockquotes). 428 | 429 | The kind of block at the beginning of the input is determined using the 430 | `prefix_*` functions, then the correct `parse_` function is called 431 | for the current block. All specialized `parse_` functions returns a 432 | `size_t` which is the size of the current block. This lets `parse_block()` 433 | know where to start looking for the following block. 434 | 435 | Some blocks are easy to handle, for example blocks of code: the 436 | `parse_blockcode()` functions only scans the input, accumulating lines in a 437 | working buffer after stripping the blockcode prefix, and stopping at the 438 | first non-empty non-blockcode-prefixed line. It then calls the rendering 439 | function for block codes and returns. 440 | 441 | Other blocks are more complicated, like paragraphs who can actually be 442 | setext-style headers, or list items, which require a special subparse to 443 | follow Markdown rules where sublist creation is more laxist than list 444 | creation. 445 | 446 | Most block functions call `parse_inline()` for span-level parsing, before 447 | handing the result to the block renderer callback. 448 | 449 | #### HTML block parsing 450 | 451 | Of interest is the `parse_htmlblock()` function: according to Markdown 452 | webpage, HTML blocks must be delimited by unindented block-level tags, 453 | with the opening tag being preceded by a blank line, and the closing tag 454 | being followed by a blank line. 455 | 456 | When looking at the reference implementation, `Markdown.pl`, it appeared 457 | that when this doesn't find a match, a more laxist syntax is tried, where 458 | the closing tag can be indented, it only has to be at the end of line and 459 | followed by a blank line. 460 | 461 | But when looking at the test suite, it appeared that a single line 462 | `

foo
` surrounded by blank lines should be recognized as a 463 | block, regardless of the "matching" unindented closing tag at the end of 464 | the document. This meant that only the laxist approach should be used. 465 | 466 | This why the first pass is commented with a `#if 0`. If you want a strict 467 | HTML block parsing, as described on the webpage, you should instead comment 468 | the second pass. Keeping both first and second passes yields the same 469 | behaviour as `Markdown.pl` v1.0.1. 470 | 471 | I have to admit I do not really care that much about these differences, as 472 | I do not intend to use personally any inline HTML, because I will either 473 | parse unsafe input, then inline HTML is too dangerous, or my own input, 474 | but I use Markdown when I'm not confident in my HTML correctness, so it 475 | would be useless to include HTML in my input. However I am aware this 476 | feature can matter for some people, and any patch or suggestion to "fix" 477 | this behaviour will be welcome. 478 | 479 | 480 | ### Span-level parsing 481 | 482 | The core of span-level parsing is the function `parse_inline()`, which is 483 | pretty different from `parse_block()`. It is based around the 484 | `active_char[]` vector table in the render structure. 485 | 486 | The main loop is composed of two parts : first the next active character is 487 | looked for. The string of inactive characters is directly handed over to 488 | `normal_text` rendering callback. 489 | 490 | When a character is active, its corresponding entry in the `active_char[]` 491 | is a pointer to one of the `char_*`functions. Most of these functions do a 492 | pretty straightforward work in handling their role. 493 | 494 | The most complicated of these functions is `char_link`, which responds to 495 | `'['`. This is because of the many possibilities offered by markdown to use 496 | this character : it can either be a part of a link or an image, and then it 497 | can be inline or reference style or a shortcut reference style. 498 | 499 | Emphasis is another interesting piece of code, in that when encountering an 500 | emphasis character, it first looks whether it is single or double or triple 501 | emphasis, an then goes forward looking for a match. 502 | 503 | 504 | ### Proof that recursion depth is bounded by `max_work_stack` 505 | 506 | The core of the code here is that when entering the functions 507 | `parse_inline()` and `parse_block()`, if the current size of the working 508 | buffer stack (`rndr->work`) is above `max_work_stack`, the parsing is 509 | short-circuited and the input is appended as-is. 510 | 511 | Let's prove now that this actually works, i.e. that it does put an upper 512 | bound on the nested function call depth. 513 | 514 | **Step 1**: there is no function calling itself directly in `markdown.c`. 515 | This is quite easy to check, though a bit tedious. This proves that a stack 516 | overflow involves a recursion cycle of a least two functions. 517 | 518 | **Step 2**: most of the functions in `markdown.c` are declared by their 519 | definition, which means that these functions can only call functions 520 | appearing before them in the source file. This provides a strict hierarchy, 521 | which prevents any multiple-function recursion cycle. So only exceptions to 522 | the hierarchy are left to check. 523 | 524 | **Step 3**: there are only 3 functions that break the above-mentioned 525 | hierarchy: 526 | 527 | * `markdown()`, which is declared through the inclusion of `markdown.h` 528 | at the very beginning of `markdown.c`. However an easy text search shows 529 | that it's actually never called here, which obviously prevents it from 530 | being part of a recursion cycle. 531 | * `parse_block()`, which is declared at the beginning of the block-level 532 | section, but defined at the end. 533 | * `parse_inline()`, which uses functions pointer to dispatch active 534 | character handling towards `char_*` functions below. 535 | 536 | So at this point I have proved that any recursion cycle *always* involves 537 | `parse_block()` or `parse_inline()`. So checking a depth-indicator only in 538 | these functions is enough to prevent recursion cycles. 539 | 540 | **Step 4**: `rndr->work.size` is a good depth-indicator, because all calls 541 | to `parse_block()` or `parse_inline()` happen after at least one working 542 | buffer allocation. This is again a bit tedious to check: 543 | 544 | * `parse_block()` is called in `markdown()`, which is irrelevant, and in 545 | `parse_blockquote()` and `parse_listitem()`, which allocate respectively 546 | one and two working buffers at the very beginning of the function; 547 | * `parse_inline()` is called in `parse_emph1()`, `parse_emph2()`, 548 | `parse_emph3()`, `char_link()`, `parse_paragraph()` (twice), and each time 549 | it's called right after allocating a new working buffer; and in 550 | `parse_listitem()` which allocates two working buffers at the very 551 | beginning of the functions. 552 | 553 | Therefore, `rndr->work.size` will always increase between calls of 554 | `parse_block()` or `parse_inline()`, which in turns proves that putting an 555 | upper bound on `rndr->work.size` prevents arbitrarily deep recursions, and 556 | therefore stack overflows when the upper bound is well chosen. 557 | 558 | 559 | ### Utility functions 560 | 561 | Throughout the parsing the need of a working buffer frequently arise. A 562 | naive approach is to allocate a working buffer each time one is needed, and 563 | release it afterwards. However it leads to a lot of allocations, 564 | deallocations and reallocations (when the buffer grows), which costs a lot 565 | of time. 566 | 567 | So I added a `work` dynamic array pointer, which a special meaning to the 568 | `size` and `asize` members: in this array, The `size` first members are 569 | active working buffers that are still in use, and the remaining members up 570 | to `asize` are allocated but no longer used working buffers. 571 | 572 | When a function needs a working buffer, it first compare `size` to `asize`. 573 | When they are equal, it means there is no available working buffer, and a 574 | new one is created and appended (`push`ed) to the array. Otherwise it 575 | increases `size` and takes the already-allocated buffer as its working 576 | buffer, resetting its size. 577 | 578 | When the working buffer is no longer needed, the `size` of the array is 579 | just decreased, meaning the buffer is still allocated but ready to be taken 580 | by the next function in need. 581 | 582 | When the parsing is over, every working buffer should be marked as ready to 583 | be reused, hence the assertion of `size` being zero in `markdown()`. The 584 | buffers in the array are finally freed. 585 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Note**: this project is considered mature software, so there isn't much 2 | going on in terms of code changes. However it's still actively maintained, 3 | and I will consider bug reports or feature requests usually in a matter 4 | of days, or at worst within a few weeks. 5 | 6 | The reference repository is based on [fossil][] and available at 7 | . 8 | 9 | [fossil]: http://www.fossil-scm.org/index.html/doc/trunk/www/index.wiki 10 | 11 | 12 | Libsoldout 13 | ========== 14 | 15 | Overview: 16 | 17 | 1. Introduction 18 | 2. Usage 19 | 3. Internals 20 | 21 | 22 | 23 | Introduction 24 | ------------ 25 | 26 | For some projects of mine, I wanted a lightweight C library that can parse 27 | John Gruber's [markdown](http://daringfireball.net/projects/markdown/) 28 | format into whatever I want, and that is easily extensible. 29 | 30 | The only C implementations of markdown that I know of are [Discount] 31 | (http://www.pell.portland.or.us/~orc/Code/markdown/) and [PEG-markdown] 32 | (http://github.com/jgm/peg-markdown/tree/master). Discount seemed a little 33 | bit too integrated and focused on HTML output for my taste, and 34 | PEG-markdown seemed to have a lot of dependencies and stuff. So I wrote my 35 | own. 36 | 37 | I like to keep things simple, so I wrote a function that performs *only* 38 | markdown parsing: no file reading or writing, no (X)HTML considerations, 39 | etc. The actual output is performed by a set of dedicated callback 40 | functions, called here a renderer. Some example renderers are provided, but 41 | you are free to use your own to output in any format you like. 42 | 43 | This callback mechanism make libsoldout so flexible that it does not need 44 | any flag or external information besides input text and renderer to 45 | operate. 46 | 47 | 48 | 49 | Usage 50 | ----- 51 | 52 | ### Library function call 53 | 54 | The only exported function in libsoldout is `markdown()`: 55 | 56 | void markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndr); 57 | 58 | - `ob` is the output buffer, where the renderer will append data, 59 | - `ib` is the input buffer, where the markdown text should be stored prior 60 | to the `markdown()` call, 61 | - `rndr` is a pointer to the renderer structure. 62 | 63 | How to use these structures is explained in the following sections. 64 | 65 | 66 | ### Buffers: struct buf 67 | 68 | I use `struct buf` extensively in input and output buffers. The initial 69 | idea was constructing a Pascal-string like structure, to be able to store 70 | both text and binary data. Hence the members `data`, a char pointer to the 71 | buffer data, and `size` containing the data length. 72 | 73 | When using a `struct buf` as an output buffer, it is useful to pre-allocate 74 | the memory area before filling it, so I added an `asize` member containing 75 | the allocated size of the memory pointed by `data`. 76 | 77 | When accumulating data in a growing memory area, there is a trade-off 78 | between memory usage and speed: the more bytes are added each time, the 79 | less `realloc()` is called, which means potentially less `memcpy()` to a new 80 | zone, so a faster code, but more memory being allocated for nothing. To 81 | set the trade-off on a case-by-case basis, there is a `unit` member in the 82 | structure: when more memory is needed, `asize` is augmented by a multiple 83 | of `unit`. So the larger `unit`, the more memory is allocated at once, the 84 | less `realloc()` is called. 85 | 86 | To further improve code efficiency by removing unneeded memcpy, I added a 87 | reference count to the structure: the `ref` member. 88 | 89 | Buffers are created using `bufnew()` whose only argument is the value for 90 | `unit`. `bufrelease()` decreases the reference count of a buffer, and frees 91 | it when this count is zero. `bufset()` is used to set a `struct buf` 92 | pointer to point to the given buffer, increasing reference count and 93 | dealing with special cases like volatile buffers. 94 | 95 | Usually data from `struct buf` are read through direct access of its 96 | members `data` and `size`. One interesting trick which might not be widely 97 | known is how to printf a buffer (or any kind of non-zero-terminated 98 | string) that doesn't contains any zero, using the `%.*s`. For example: 99 | 100 | printf("Buffer string: \"%.*s\"\n", (int)buf->size, buf->data); 101 | 102 | In case you really need a zero-terminated string, you can call 103 | `bufnullterm()` which appends a zero character without changing `size`, 104 | hence the buffer being virtually the same (and will no longer be 105 | zero-terminated after the following data append) but `data` can be used as 106 | a regular C string. 107 | 108 | The most common functions to append data into buffers are: 109 | 110 | - `bufprintf()` which behaves like any \*printf function, 111 | - `bufput()` which is similar to `memcpy()`, 112 | - `bufputs()` which appends a zero-terminated string to a buffer, 113 | - `BUFPUTSL()` which is a macro to replace `bufputs()` when using string 114 | literals, because then the data size is known at compile-time, this 115 | saves a call to `strlen()`, 116 | - `bufputc()` for single-character appends. 117 | 118 | Modification of existing data in a buffer is also performed through direct 119 | access of structure members. 120 | 121 | This covers the basics to handle my `struct buf`, but there might still be 122 | some interesting stuff to be learned from the header. 123 | 124 | 125 | ### Renderer: struct mkd_renderer 126 | 127 | Libsoldout only performs the parsing of markdown input, the construction of 128 | the output is left to a *renderer*, which is a set of callback functions 129 | called when markdown elements are encountered. Pointers to these functions 130 | are gathered into a `struct mkd_renderer` along with some renderer-related 131 | data. I think the struct declaration is pretty obvious: 132 | 133 | struct mkd_renderer { 134 | /* document level callbacks */ 135 | void (*prolog)(struct buf *ob, void *opaque); 136 | void (*epilog)(struct buf *ob, void *opaque); 137 | 138 | /* block level callbacks - NULL skips the block */ 139 | void (*blockcode)(struct buf *ob, struct buf *text, void *opaque); 140 | void (*blockquote)(struct buf *ob, struct buf *text, void *opaque); 141 | void (*blockhtml)(struct buf *ob, struct buf *text, void *opaque); 142 | void (*header)(struct buf *ob, struct buf *text, 143 | int level, void *opaque); 144 | void (*hrule)(struct buf *ob, void *opaque); 145 | void (*list)(struct buf *ob, struct buf *text, int flags, void *opaque); 146 | void (*listitem)(struct buf *ob, struct buf *text, 147 | int flags, void *opaque); 148 | void (*paragraph)(struct buf *ob, struct buf *text, void *opaque); 149 | void (*table)(struct buf *ob, struct buf *head_row, struct buf *rows, 150 | void *opaque); 151 | void (*table_cell)(struct buf *ob, struct buf *text, int flags, 152 | void *opaque); 153 | void (*table_row)(struct buf *ob, struct buf *cells, int flags, 154 | void *opaque); 155 | 156 | /* span level callbacks - NULL or return 0 prints the span verbatim */ 157 | int (*autolink)(struct buf *ob, struct buf *link, 158 | enum mkd_autolink type, void *opaque); 159 | int (*codespan)(struct buf *ob, struct buf *text, void *opaque); 160 | int (*double_emphasis)(struct buf *ob, struct buf *text, 161 | char c, void *opaque); 162 | int (*emphasis)(struct buf *ob, struct buf *text, char c,void*opaque); 163 | int (*image)(struct buf *ob, struct buf *link, struct buf *title, 164 | struct buf *alt, void *opaque); 165 | int (*linebreak)(struct buf *ob, void *opaque); 166 | int (*link)(struct buf *ob, struct buf *link, struct buf *title, 167 | struct buf *content, void *opaque); 168 | int (*raw_html_tag)(struct buf *ob, struct buf *tag, void *opaque); 169 | int (*triple_emphasis)(struct buf *ob, struct buf *text, 170 | char c, void *opaque); 171 | 172 | /* low level callbacks - NULL copies input directly into the output */ 173 | void (*entity)(struct buf *ob, struct buf *entity, void *opaque); 174 | void (*normal_text)(struct buf *ob, struct buf *text, void *opaque); 175 | 176 | /* renderer data */ 177 | int max_work_stack; /* prevent arbitrary deep recursion */ 178 | const char *emph_chars; /* chars that trigger emphasis rendering */ 179 | void *opaque; /* opaque data send to every rendering callback */ 180 | }; 181 | 182 | The first argument of a renderer function is always the output buffer, 183 | where the function is supposed to write its output. It's not necessarily 184 | related to the output buffer given to `markdown()` because in some cases 185 | render into a temporary buffer is needed. 186 | 187 | The last argument of a renderer function is always an opaque pointer, which 188 | is equal to the `opaque` member of `struct mkd_renderer`. The name 189 | "opaque" might not be well-chosen, but it means a pointer *opaque for the 190 | parser, **not** for the renderer*. It means that my parser passes around 191 | blindly the pointer which contains data you know about, in case you need to 192 | store an internal state or whatever. I have not found anything to put in 193 | this pointer in my example renderers, so it is set to NULL in the structure 194 | and the callbacks don't use it. 195 | 196 | `emph_chars` is a zero-terminated string which contains the set of 197 | characters that trigger emphasis. In regular markdown, emphasis is only 198 | triggered by '\_' and '\*', but in some extensions it might be useful to 199 | add other characters to this list. For example in my extension to handle 200 | `` and `` spans, delimited respectively by "++" and "--", I have 201 | added '+' and '-' to `emph_chars`. The character that triggered the 202 | emphasis is then passed to `emphasis`, `double_emphasis` and 203 | `triple_emphasis` through the parameter `c`. 204 | 205 | Function pointers in `struct mkd_renderer` can be NULL, but it has a 206 | different meaning whether the callback is block-level or span-level. A null 207 | block-level callback will make the corresponding block disappear from the 208 | output, as if the callback was an empty function. A null span-level 209 | callback will cause the corresponding element to be treated as normal 210 | characters, copied verbatim to the output. 211 | 212 | So for example, to disable link and images (e.g. because you consider them 213 | as dangerous), just put a null pointer in `rndr.link` and `rndr.image` and 214 | the bracketed stuff will be present as-is in the output. While a null 215 | pointer in `header` will remove all header-looking blocks. If you want an 216 | otherwise standard markdown-to-XHTML conversion, you can take the example 217 | `mkd_xhtml` struct, copy it into your own `struct mkd_renderer` and then 218 | assign NULL to `link` and `image` members. 219 | 220 | Moreover, span-level callbacks return an integer, which tells whether the 221 | renderer accepts to render the item (non-zero return value) or whether it 222 | should be copied verbatim (zero return value). This allows you to only 223 | accept some specific inputs. For example, my extension for `` and 224 | `` spans asks *exactly* two '-' or '+' as delimiters, when `emphasis` 225 | and `triple_emphasis` are called with '-' or '+', they return 0. 226 | 227 | Special care should be taken when writing `autolink`, `link` and `image` 228 | callbacks, because the arguments `link`, `title` and `alt` are unsanitized 229 | data taken directly from the input file. It is up to the renderer to escape 230 | whatever needs escaping to prevent bad things from happening. To help you 231 | writing renderers, the function `lus_attr_escape()` escapes all problematic 232 | characters in (X)HTML: `'<'`, `'>'`, `'&'` and `'"'`. 233 | 234 | The `normal_text` callback should also perform whatever escape is needed to 235 | have the output looking like the input data. 236 | 237 | 238 | ### libsoldout extension: PHP-Markdown-like tables 239 | 240 | Tables are one of the few extensions that are quite difficult and/or hacky 241 | to implement using vanilla Markdown parser and a renderer. Thus a support 242 | has been introduced into the parser, using dedicated callbacks: 243 | 244 | - `table_cell`, which is called with the span-level contents of the cell; 245 | - `table_row`, which is called with data returned by `table_cell`; 246 | - `table`, which called with data returned by `table_row`. 247 | 248 | The input format to describe tables is taken from PHP-Markdown, and looks 249 | like this: 250 | 251 | header 1 | header 2 | header 3 | header 4 252 | ------------|:-------------:|--------------:|:-------------- 253 | first line | centered | right-aligned | left-aligned 254 | second line | centered |: centered :| left-aligned 255 | third line |: left-aligned | right-aligned | right-aligned : 256 | column-separator | don't need | to be | aligned in the source 257 | | extra spectators | are allowed | at both ends | of the line | 258 | | correct number of cell per row is not enforced | 259 | | pipe characters can be embedded in cell text by escaping it: \| | 260 | 261 | Each row of the input text is a single row in the output, except the header 262 | rule, which is purely syntactic. 263 | 264 | Each cell in a row is delimited by a pipe (`|`) character. Optionally, a 265 | pipe character can also be present at the beginning and/or at the end of 266 | the line. Column separator don't have to be aligned in the input, but it 267 | makes the input more readable. 268 | 269 | There is no check of "squareness" of the table: `table_cell` is called once 270 | for each cell provided in the input, which can be a number of times 271 | different from one row to the other. If the output *has* to respect a given 272 | number of cell per row, it's up to the renderer to enforce it, using state 273 | transmitted through the `opaque` pointer. 274 | 275 | The header rule is a line containing only horizontal blanks (space and 276 | tab), dashes (`-`), colons (`:`) and separator. Moreover, it *must* be the 277 | second line of the table. In case such a header rule is detected, the first 278 | line of the table is considered as a header, and passed as the `head_row` 279 | argument to `table` callback. Moreover `table_row` and `table_cell` are 280 | called for that specific row with `MKD_CELL_HEAD` flag. 281 | 282 | Alignment is defined on a per-cell basis, and specified by a colon (`:`) at 283 | the very beginning of the input span (i.e. directly after the `|` 284 | separator, or as the first character on the line) and/or at the very end of 285 | it (i.e. directly before the separator, or as the last character on the 286 | line). A cell with such a leading colon only is left-aligned 287 | (`MKD_CELL_ALIGN_LEFT`), one with a trailing colon only is right-aligned 288 | (`MKD_CELL_ALIGN_RIGHT`), and one with both is centered 289 | (`MKD_CELL_ALIGN_CENTER`). 290 | 291 | A column-wise default alignment can be specified with the same syntax on 292 | the header rule. 293 | 294 | 295 | ### Renderer examples 296 | 297 | While libsoldout is designed to perform only the parsing of markdown files, 298 | and to let you provide the renderer callbacks, a few renderers have been 299 | included, both to illustrate how to write a set of renderer functions and 300 | to allow anybody who do not need special extensions to use libsoldout 301 | without hassle. 302 | 303 | All the examples provided here come with two flavors, `_html` producing 304 | HTML code (self-closing tags are rendered like this: `
`), and `_xhtml` 305 | producing XHTML code (self-closing tags like `
`). 306 | 307 | #### Standard markdown renderer 308 | 309 | `mkd_html` and `mkd_xhtml` implement standard Markdown to (X)HTML 310 | translation without any extension. 311 | 312 | #### Discount-ish renderer 313 | 314 | `discount_html` and `discount_xhtml` implement on top of the standard 315 | markdown *some* of the extensions found in Discount. 316 | 317 | Actually, all Discount extensions that are not provided here cannot be 318 | easily implemented in libsoldout without touching to the parsing code, 319 | hence they do not belong strictly to the renderer realm. However some 320 | (maybe all, not sure about tables) extensions can be implemented fairly 321 | easily with libsoldout by using both a dedicated renderer and some 322 | preprocessing to make the extension look like something closer to the 323 | original markdown syntax. 324 | 325 | Here is a list of all extensions included in these renderers: 326 | 327 | - image size specification, by appending " =(width)x(height)" to the link, 328 | - pseudo-protocols in links: 329 | * abbr:_description_ for `...` 330 | * class:_name_ for `...` 331 | * id:_name_ for `...` 332 | * raw:_text_ for verbatim unprocessed _text_ inclusion 333 | - class blocks: blockquotes beginning with %_class_% will be rendered as a 334 | `div` of the given class(es). 335 | 336 | #### Natasha's own extensions 337 | 338 | `nat_html` and `nat_xhtml` implement on top of Discount extensions some 339 | things that I need to convert losslessly my existing HTML into extended 340 | markdown. 341 | 342 | Here is a list of these extensions : 343 | 344 | - id attribute for headers, using the syntax _id_#_Header text_ 345 | - class attribute for paragraphs, by putting class name(s) between 346 | parenthesis at the very beginning of the paragraph 347 | - `` and `` spans, using respectively `++` and `--` as 348 | delimiters (with emphasis-like restrictions, i.e. an opening delimiter 349 | cannot be followed by a whitespace, and a closing delimiter cannot be 350 | preceded by a whitespace). 351 | - plain `` without attribute, using emphasis-like delimiter `|` 352 | 353 | Follows an example use of all of them: 354 | 355 | ###atx_id#ID was chosen to look nice in atx-style headers ### 356 | 357 | setext_id#Though it will also work in setext-style headers 358 | ---------------------------------------------------------- 359 | 360 | Here is a paragraph with --deleted-- and ++inserted++ text. 361 | 362 | I use CSS rules to render poetry and other verses, using a plain 363 | `` for each verse, and enclosing each group of verses in 364 | a `

`. Here is how it would look like: 365 | 366 | (verse)|And on the pedestal these words appear:| 367 | |"My name is Ozymandias, king of kings:| 368 | |Look on my works, ye Mighty, and despair!"| 369 | 370 | 371 | Internals 372 | --------- 373 | 374 | Here I explain the structure of `markdown.c`, and how this parser works. I 375 | use a logical order, which is roughly chronological, which means going 376 | roughly from the bottom of the file to the top. 377 | 378 | 379 | ### markdown() 380 | 381 | The markdown function is divided into four parts: setup of the `struct 382 | render`, first pass on the input, actual parsing, and clean-up. 383 | 384 | #### render structure 385 | 386 | A `struct render` is passed around most of the functions, and it contains 387 | every information specific about the render. 388 | 389 | `make` is a copy of the `struct mkd_renderer` given to `markdown()`. The 390 | rendering callbacks are actually called from there. 391 | 392 | `refs` is a dynamic sorted array of link references (`struct link_ref`). It 393 | is filled from the input file during the first pass. A link reference is a 394 | structure of three buffers, `id`, `link` and `title`, whose functions are 395 | straightforward. 396 | 397 | `work` is a dynamic array of working buffers. Short-lived working buffers are 398 | needed throughout the parser, and doing a lot of `malloc()` and `free()` is 399 | quite inefficient. Instead, when a working buffer is allocated, it is kept 400 | in this array to be reused next time a working buffer is needed. 401 | 402 | `active_char` is a C array of function pointers, used for span-level 403 | parsing: a null pointer is affecter to all inactive characters, and a 404 | specialized callback is stored for active characters. This initialization 405 | is the bulk of the first part, because characters should only be marked 406 | active when the rendering callback pointer is non-null. 407 | 408 | #### First pass on the input 409 | 410 | During the first pass on the input, newlines are normalized and reference 411 | lines taken out of the input, and stored into `rndr.refs`. 412 | 413 | It makes use of the helper function `is_ref()`, which parses the given 414 | line, checking whether it matches the reference syntax. Offsets of the 415 | reference components are kept while progressing in the line, and on the 416 | first syntax error 0 is returned and the line is considered as an input 417 | line. 418 | 419 | When all the tests are passed, a new `struct link_ref` is created and 420 | sorted into `rndr.refs`. 421 | 422 | #### Second pass 423 | 424 | `markdown()` does not do much here, the result of the first pass is fed to 425 | `parse_block()` which fills the output buffer `ob`. 426 | 427 | #### Clean-up 428 | 429 | References allocated during the first pass, and working buffers allocated 430 | during the second pass are freed there, before returning. 431 | 432 | 433 | ### Block-level parsing 434 | 435 | The core of block-level parsing is the function `parse_block()`, which 436 | runs over the whole input (on the first call, the input is the output on 437 | the first pass, but `parse_block()` can be called recursively for blocks 438 | inside blocks, e.g. for blockquotes). 439 | 440 | The kind of block at the beginning of the input is determined using the 441 | `prefix_*` functions, then the correct `parse_` function is called 442 | for the current block. All specialized `parse_` functions returns a 443 | `size_t` which is the size of the current block. This lets `parse_block()` 444 | know where to start looking for the following block. 445 | 446 | Some blocks are easy to handle, for example blocks of code: the 447 | `parse_blockcode()` functions only scans the input, accumulating lines in a 448 | working buffer after stripping the blockcode prefix, and stopping at the 449 | first non-empty non-blockcode-prefixed line. It then calls the rendering 450 | function for block codes and returns. 451 | 452 | Other blocks are more complicated, like paragraphs who can actually be 453 | setext-style headers, or list items, which require a special subparse to 454 | follow Markdown rules where sublist creation is more laxist than list 455 | creation. 456 | 457 | Most block functions call `parse_inline()` for span-level parsing, before 458 | handing the result to the block renderer callback. 459 | 460 | #### HTML block parsing 461 | 462 | Of interest is the `parse_htmlblock()` function: according to Markdown 463 | webpage, HTML blocks must be delimited by unindented block-level tags, 464 | with the opening tag being preceded by a blank line, and the closing tag 465 | being followed by a blank line. 466 | 467 | When looking at the reference implementation, `Markdown.pl`, it appeared 468 | that when this doesn't find a match, a more laxist syntax is tried, where 469 | the closing tag can be indented, it only has to be at the end of line and 470 | followed by a blank line. 471 | 472 | But when looking at the test suite, it appeared that a single line 473 | `

foo
` surrounded by blank lines should be recognized as a 474 | block, regardless of the "matching" unindented closing tag at the end of 475 | the document. This meant that only the laxist approach should be used. 476 | 477 | This why the first pass is commented with a `#if 0`. If you want a strict 478 | HTML block parsing, as described on the webpage, you should instead comment 479 | the second pass. Keeping both first and second passes yields the same 480 | behaviour as `Markdown.pl` v1.0.1. 481 | 482 | I have to admit I do not really care that much about these differences, as 483 | I do not intend to use personally any inline HTML, because I will either 484 | parse unsafe input, then inline HTML is too dangerous, or my own input, 485 | but I use Markdown when I'm not confident in my HTML correctness, so it 486 | would be useless to include HTML in my input. However I am aware this 487 | feature can matter for some people, and any patch or suggestion to "fix" 488 | this behaviour will be welcome. 489 | 490 | 491 | ### Span-level parsing 492 | 493 | The core of span-level parsing is the function `parse_inline()`, which is 494 | pretty different from `parse_block()`. It is based around the 495 | `active_char[]` vector table in the render structure. 496 | 497 | The main loop is composed of two parts : first the next active character is 498 | looked for. The string of inactive characters is directly handed over to 499 | `normal_text` rendering callback. 500 | 501 | When a character is active, its corresponding entry in the `active_char[]` 502 | is a pointer to one of the `char_*`functions. Most of these functions do a 503 | pretty straightforward work in handling their role. 504 | 505 | The most complicated of these functions is `char_link`, which responds to 506 | `'['`. This is because of the many possibilities offered by markdown to use 507 | this character : it can either be a part of a link or an image, and then it 508 | can be inline or reference style or a shortcut reference style. 509 | 510 | Emphasis is another interesting piece of code, in that when encountering an 511 | emphasis character, it first looks whether it is single or double or triple 512 | emphasis, an then goes forward looking for a match. 513 | 514 | 515 | ### Proof that recursion depth is bounded by `max_work_stack` 516 | 517 | The core of the code here is that when entering the functions 518 | `parse_inline()` and `parse_block()`, if the current size of the working 519 | buffer stack (`rndr->work`) is above `max_work_stack`, the parsing is 520 | short-circuited and the input is appended as-is. 521 | 522 | Let's prove now that this actually works, i.e. that it does put an upper 523 | bound on the nested function call depth. 524 | 525 | **Step 1**: there is no function calling itself directly in `markdown.c`. 526 | This is quite easy to check, though a bit tedious. This proves that a stack 527 | overflow involves a recursion cycle of a least two functions. 528 | 529 | **Step 2**: most of the functions in `markdown.c` are declared by their 530 | definition, which means that these functions can only call functions 531 | appearing before them in the source file. This provides a strict hierarchy, 532 | which prevents any multiple-function recursion cycle. So only exceptions to 533 | the hierarchy are left to check. 534 | 535 | **Step 3**: there are only 3 functions that break the above-mentioned 536 | hierarchy: 537 | 538 | * `markdown()`, which is declared through the inclusion of `markdown.h` 539 | at the very beginning of `markdown.c`. However an easy text search shows 540 | that it's actually never called here, which obviously prevents it from 541 | being part of a recursion cycle. 542 | * `parse_block()`, which is declared at the beginning of the block-level 543 | section, but defined at the end. 544 | * `parse_inline()`, which uses functions pointer to dispatch active 545 | character handling towards `char_*` functions below. 546 | 547 | So at this point I have proved that any recursion cycle *always* involves 548 | `parse_block()` or `parse_inline()`. So checking a depth-indicator only in 549 | these functions is enough to prevent recursion cycles. 550 | 551 | **Step 4**: `rndr->work.size` is a good depth-indicator, because all calls 552 | to `parse_block()` or `parse_inline()` happen after at least one working 553 | buffer allocation. This is again a bit tedious to check: 554 | 555 | * `parse_block()` is called in `markdown()`, which is irrelevant, and in 556 | `parse_blockquote()` and `parse_listitem()`, which allocate respectively 557 | one and two working buffers at the very beginning of the function; 558 | * `parse_inline()` is called in `parse_emph1()`, `parse_emph2()`, 559 | `parse_emph3()`, `char_link()`, `parse_paragraph()` (twice), and each time 560 | it's called right after allocating a new working buffer; and in 561 | `parse_listitem()` which allocates two working buffers at the very 562 | beginning of the functions. 563 | 564 | Therefore, `rndr->work.size` will always increase between calls of 565 | `parse_block()` or `parse_inline()`, which in turns proves that putting an 566 | upper bound on `rndr->work.size` prevents arbitrarily deep recursions, and 567 | therefore stack overflows when the upper bound is well chosen. 568 | 569 | 570 | ### Utility functions 571 | 572 | Throughout the parsing the need of a working buffer frequently arise. A 573 | naive approach is to allocate a working buffer each time one is needed, and 574 | release it afterwards. However it leads to a lot of allocations, 575 | deallocations and reallocations (when the buffer grows), which costs a lot 576 | of time. 577 | 578 | So I added a `work` dynamic array pointer, which a special meaning to the 579 | `size` and `asize` members: in this array, The `size` first members are 580 | active working buffers that are still in use, and the remaining members up 581 | to `asize` are allocated but no longer used working buffers. 582 | 583 | When a function needs a working buffer, it first compare `size` to `asize`. 584 | When they are equal, it means there is no available working buffer, and a 585 | new one is created and appended (`push`ed) to the array. Otherwise it 586 | increases `size` and takes the already-allocated buffer as its working 587 | buffer, resetting its size. 588 | 589 | When the working buffer is no longer needed, the `size` of the array is 590 | just decreased, meaning the buffer is still allocated but ready to be taken 591 | by the next function in need. 592 | 593 | When the parsing is over, every working buffer should be marked as ready to 594 | be reused, hence the assertion of `size` being zero in `markdown()`. The 595 | buffers in the array are finally freed. 596 | -------------------------------------------------------------------------------- /array.c: -------------------------------------------------------------------------------- 1 | /* array.c - automatic dynamic array for pointers */ 2 | 3 | /* 4 | * Copyright (c) 2008, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #include "array.h" 20 | 21 | #include 22 | 23 | 24 | /*************************** 25 | * STATIC HELPER FUNCTIONS * 26 | ***************************/ 27 | 28 | /* arr_realloc • realloc memory of a struct array */ 29 | static int 30 | arr_realloc(struct array* arr, int neosz) { 31 | void* neo; 32 | neo = realloc(arr->base, neosz * arr->unit); 33 | if (neo == 0) return 0; 34 | arr->base = neo; 35 | arr->asize = neosz; 36 | if (arr->size > neosz) arr->size = neosz; 37 | return 1; } 38 | 39 | 40 | /* parr_realloc • realloc memory of a struct parray */ 41 | static int 42 | parr_realloc(struct parray* arr, int neosz) { 43 | void* neo; 44 | neo = realloc(arr->item, neosz * sizeof (void*)); 45 | if (neo == 0) return 0; 46 | arr->item = neo; 47 | arr->asize = neosz; 48 | if (arr->size > neosz) arr->size = neosz; 49 | return 1; } 50 | 51 | 52 | 53 | /*************************** 54 | * GENERIC ARRAY FUNCTIONS * 55 | ***************************/ 56 | 57 | /* arr_adjust • shrink the allocated memory to fit exactly the needs */ 58 | int 59 | arr_adjust(struct array *arr) { 60 | return arr_realloc(arr, arr->size); } 61 | 62 | 63 | /* arr_free • frees the structure contents (buf NOT the struct itself) */ 64 | void 65 | arr_free(struct array *arr) { 66 | if (!arr) return; 67 | free(arr->base); 68 | arr->base = 0; 69 | arr->size = arr->asize = 0; } 70 | 71 | 72 | /* arr_grow • increases the array size to fit the given number of elements */ 73 | int 74 | arr_grow(struct array *arr, int need) { 75 | if (arr->asize >= need) return 1; 76 | else return arr_realloc(arr, need); } 77 | 78 | 79 | /* arr_init • initialization of the contents of the struct */ 80 | void 81 | arr_init(struct array *arr, size_t unit) { 82 | arr->base = 0; 83 | arr->size = arr->asize = 0; 84 | arr->unit = unit; } 85 | 86 | 87 | /* arr_insert • inserting nb elements before the nth one */ 88 | int 89 | arr_insert(struct array *arr, int nb, int n) { 90 | char *src, *dst; 91 | size_t len; 92 | if (!arr || nb <= 0 || n < 0 93 | || !arr_grow(arr, arr->size + nb)) 94 | return 0; 95 | if (n < arr->size) { 96 | src = arr->base; 97 | src += n * arr->unit; 98 | dst = src + nb * arr->unit; 99 | len = (arr->size - n) * arr->unit; 100 | memmove(dst, src, len); } 101 | arr->size += nb; 102 | return 1; } 103 | 104 | 105 | /* arr_item • returns a pointer to the n-th element */ 106 | void * 107 | arr_item(struct array *arr, int no) { 108 | char *ptr; 109 | if (!arr || no < 0 || no >= arr->size) return 0; 110 | ptr = arr->base; 111 | ptr += no * arr->unit; 112 | return ptr; } 113 | 114 | 115 | /* arr_newitem • returns the index of a new element appended to the array */ 116 | int 117 | arr_newitem(struct array *arr) { 118 | if (!arr_grow(arr, arr->size + 1)) return -1; 119 | arr->size += 1; 120 | return arr->size - 1; } 121 | 122 | 123 | /* arr_remove • removes the n-th elements of the array */ 124 | void 125 | arr_remove(struct array *arr, int idx) { 126 | if (!arr || idx < 0 || idx >= arr->size) return; 127 | arr->size -= 1; 128 | if (idx < arr->size) { 129 | char *dst = arr->base; 130 | char *src; 131 | dst += idx * arr->unit; 132 | src = dst + arr->unit; 133 | memmove(dst, src, (arr->size - idx) * arr->unit); } } 134 | 135 | 136 | /* arr_sorted_find • O(log n) search in a sorted array, returning entry */ 137 | void * 138 | arr_sorted_find(struct array *arr, void *key, array_cmp_fn cmp) { 139 | int mi, ma, cu, ret; 140 | char *ptr = arr->base; 141 | mi = -1; 142 | ma = arr->size; 143 | while (mi < ma - 1) { 144 | cu = mi + (ma - mi) / 2; 145 | ret = cmp(key, ptr + cu * arr->unit); 146 | if (ret == 0) return ptr + cu * arr->unit; 147 | else if (ret < 0) ma = cu; 148 | else /* if (ret > 0) */ mi = cu; } 149 | return 0; } 150 | 151 | 152 | /* arr_sorted_find_i • O(log n) search in a sorted array, 153 | * returning index of the smallest element larger than the key */ 154 | int 155 | arr_sorted_find_i(struct array *arr, void *key, array_cmp_fn cmp) { 156 | int mi, ma, cu, ret; 157 | char *ptr = arr->base; 158 | mi = -1; 159 | ma = arr->size; 160 | while (mi < ma - 1) { 161 | cu = mi + (ma - mi) / 2; 162 | ret = cmp(key, ptr + cu * arr->unit); 163 | if (ret == 0) { 164 | while (cu < arr->size && ret == 0) { 165 | cu += 1; 166 | ret = cmp(key, ptr + cu * arr->unit); } 167 | return cu; } 168 | else if (ret < 0) ma = cu; 169 | else /* if (ret > 0) */ mi = cu; } 170 | return ma; } 171 | 172 | 173 | 174 | /*************************** 175 | * POINTER ARRAY FUNCTIONS * 176 | ***************************/ 177 | 178 | /* parr_adjust • shrinks the allocated memory to fit exactly the needs */ 179 | int 180 | parr_adjust(struct parray* arr) { 181 | return parr_realloc (arr, arr->size); } 182 | 183 | 184 | /* parr_free • frees the structure contents (buf NOT the struct itself) */ 185 | void 186 | parr_free(struct parray *arr) { 187 | if (!arr) return; 188 | free (arr->item); 189 | arr->item = 0; 190 | arr->size = 0; 191 | arr->asize = 0; } 192 | 193 | 194 | /* parr_grow • increases the array size to fit the given number of elements */ 195 | int 196 | parr_grow(struct parray *arr, int need) { 197 | if (arr->asize >= need) return 1; 198 | else return parr_realloc (arr, need); } 199 | 200 | 201 | /* parr_init • initialization of the struct (which is equivalent to zero) */ 202 | void 203 | parr_init(struct parray *arr) { 204 | arr->item = 0; 205 | arr->size = 0; 206 | arr->asize = 0; } 207 | 208 | 209 | /* parr_insert • inserting nb elements before the nth one */ 210 | int 211 | parr_insert(struct parray *parr, int nb, int n) { 212 | char *src, *dst; 213 | size_t len, i; 214 | if (!parr || nb <= 0 || n < 0 215 | || !parr_grow(parr, parr->size + nb)) 216 | return 0; 217 | if (n < parr->size) { 218 | src = (void *)parr->item; 219 | src += n * sizeof (void *); 220 | dst = src + nb * sizeof (void *); 221 | len = (parr->size - n) * sizeof (void *); 222 | memmove(dst, src, len); 223 | for (i = 0; i < nb; ++i) 224 | parr->item[n + i] = 0; } 225 | parr->size += nb; 226 | return 1; } 227 | 228 | 229 | /* parr_pop • pops the last item of the array and returns it */ 230 | void * 231 | parr_pop(struct parray *arr) { 232 | if (arr->size <= 0) return 0; 233 | arr->size -= 1; 234 | return arr->item[arr->size]; } 235 | 236 | 237 | /* parr_push • pushes a pointer at the end of the array (= append) */ 238 | int 239 | parr_push(struct parray *arr, void *i) { 240 | if (!parr_grow(arr, arr->size + 1)) return 0; 241 | arr->item[arr->size] = i; 242 | arr->size += 1; 243 | return 1; } 244 | 245 | 246 | /* parr_remove • removes the n-th element of the array and returns it */ 247 | void * 248 | parr_remove(struct parray *arr, int idx) { 249 | void* ret; 250 | int i; 251 | if (!arr || idx < 0 || idx >= arr->size) return 0; 252 | ret = arr->item[idx]; 253 | for (i = idx+1; i < arr->size; ++i) 254 | arr->item[i - 1] = arr->item[i]; 255 | arr->size -= 1; 256 | return ret; } 257 | 258 | 259 | /* parr_sorted_find • O(log n) search in a sorted array, returning entry */ 260 | void * 261 | parr_sorted_find(struct parray *arr, void *key, array_cmp_fn cmp) { 262 | int mi, ma, cu, ret; 263 | mi = -1; 264 | ma = arr->size; 265 | while (mi < ma - 1) { 266 | cu = mi + (ma - mi) / 2; 267 | ret = cmp(key, arr->item[cu]); 268 | if (ret == 0) return arr->item[cu]; 269 | else if (ret < 0) ma = cu; 270 | else /* if (ret > 0) */ mi = cu; } 271 | return 0; } 272 | 273 | 274 | /* parr_sorted_find_i • O(log n) search in a sorted array, 275 | * returning index of the smallest element larger than the key */ 276 | int 277 | parr_sorted_find_i(struct parray *arr, void *key, array_cmp_fn cmp) { 278 | int mi, ma, cu, ret; 279 | mi = -1; 280 | ma = arr->size; 281 | while (mi < ma - 1) { 282 | cu = mi + (ma - mi) / 2; 283 | ret = cmp(key, arr->item[cu]); 284 | if (ret == 0) { 285 | while (cu < arr->size && ret == 0) { 286 | cu += 1; 287 | ret = cmp(key, arr->item[cu]); } 288 | return cu; } 289 | else if (ret < 0) ma = cu; 290 | else /* if (ret > 0) */ mi = cu; } 291 | return ma; } 292 | 293 | 294 | /* parr_top • returns the top the stack (i.e. the last element of the array) */ 295 | void * 296 | parr_top(struct parray *arr) { 297 | if (arr == 0 || arr->size <= 0) return 0; 298 | else return arr->item[arr->size - 1]; } 299 | 300 | /* vim: set filetype=c: */ 301 | -------------------------------------------------------------------------------- /array.h: -------------------------------------------------------------------------------- 1 | /* array.h - automatic dynamic array for pointers */ 2 | 3 | /* 4 | * Copyright (c) 2008, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #ifndef LITHIUM_ARRAY_H 20 | #define LITHIUM_ARRAY_H 21 | 22 | #include 23 | 24 | 25 | /******************** 26 | * TYPE DEFINITIONS * 27 | ********************/ 28 | 29 | /* struct array • generic linear array */ 30 | struct array { 31 | void* base; 32 | int size; 33 | int asize; 34 | size_t unit; }; 35 | 36 | 37 | /* struct parray • array of pointers */ 38 | struct parray { 39 | void ** item; 40 | int size; 41 | int asize; }; 42 | 43 | 44 | /* array_cmp_fn • comparison functions for sorted arrays */ 45 | typedef int (*array_cmp_fn)(void *key, void *array_entry); 46 | 47 | 48 | 49 | /*************************** 50 | * GENERIC ARRAY FUNCTIONS * 51 | ***************************/ 52 | 53 | /* arr_adjust • shrink the allocated memory to fit exactly the needs */ 54 | int 55 | arr_adjust(struct array *); 56 | 57 | /* arr_free • frees the structure contents (buf NOT the struct itself) */ 58 | void 59 | arr_free(struct array *); 60 | 61 | /* arr_grow • increases the array size to fit the given number of elements */ 62 | int 63 | arr_grow(struct array *, int); 64 | 65 | /* arr_init • initialization of the contents of the struct */ 66 | void 67 | arr_init(struct array *, size_t); 68 | 69 | /* arr_insert • inserting elements nb before the nth one */ 70 | int 71 | arr_insert(struct array *, int nb, int n); 72 | 73 | /* arr_item • returns a pointer to the n-th element */ 74 | void * 75 | arr_item(struct array *, int); 76 | 77 | /* arr_newitem • returns the index of a new element appended to the array */ 78 | int 79 | arr_newitem(struct array *); 80 | 81 | /* arr_remove • removes the n-th elements of the array */ 82 | void 83 | arr_remove(struct array *, int); 84 | 85 | /* arr_sorted_find • O(log n) search in a sorted array, returning entry */ 86 | /* equivalent to bsearch(key, arr->base, arr->size, arr->unit, cmp) */ 87 | void * 88 | arr_sorted_find(struct array *, void *key, array_cmp_fn cmp); 89 | 90 | /* arr_sorted_find_i • O(log n) search in a sorted array, 91 | * returning index of the smallest element larger than the key */ 92 | int 93 | arr_sorted_find_i(struct array *, void *key, array_cmp_fn cmp); 94 | 95 | 96 | /*************************** 97 | * POINTER ARRAY FUNCTIONS * 98 | ***************************/ 99 | 100 | /* parr_adjust • shrinks the allocated memory to fit exactly the needs */ 101 | int 102 | parr_adjust(struct parray *); 103 | 104 | /* parr_free • frees the structure contents (buf NOT the struct itself) */ 105 | void 106 | parr_free(struct parray *); 107 | 108 | /* parr_grow • increases the array size to fit the given number of elements */ 109 | int 110 | parr_grow(struct parray *, int); 111 | 112 | /* parr_init • initialization of the struct (which is equivalent to zero) */ 113 | void 114 | parr_init(struct parray *); 115 | 116 | /* parr_insert • inserting nb elements before the nth one */ 117 | int 118 | parr_insert(struct parray *, int nb, int n); 119 | 120 | /* parr_pop • pops the last item of the array and returns it */ 121 | void * 122 | parr_pop(struct parray *); 123 | 124 | /* parr_push • pushes a pointer at the end of the array (= append) */ 125 | int 126 | parr_push(struct parray *, void *); 127 | 128 | /* parr_remove • removes the n-th element of the array and returns it */ 129 | void * 130 | parr_remove(struct parray *, int); 131 | 132 | /* parr_sorted_find • O(log n) search in a sorted array, returning entry */ 133 | void * 134 | parr_sorted_find(struct parray *, void *key, array_cmp_fn cmp); 135 | 136 | /* parr_sorted_find_i • O(log n) search in a sorted array, 137 | * returning index of the smallest element larger than the key */ 138 | int 139 | parr_sorted_find_i(struct parray *, void *key, array_cmp_fn cmp); 140 | 141 | /* parr_top • returns the top the stack (i.e. the last element of the array) */ 142 | void * 143 | parr_top(struct parray *); 144 | 145 | 146 | #endif /* ndef LITHIUM_ARRAY_H */ 147 | 148 | /* vim: set filetype=c: */ 149 | -------------------------------------------------------------------------------- /benchmark.c: -------------------------------------------------------------------------------- 1 | /* benchmark.c - main function for markdown module benchmarking */ 2 | 3 | /* 4 | * Copyright (c) 2009, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #include "markdown.h" 20 | #include "renderers.h" 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #define READ_UNIT 1024 28 | #define OUTPUT_UNIT 64 29 | 30 | 31 | /* markdown_file • performs markdown transformation on FILE* */ 32 | static void 33 | benchmark(FILE *in, int nb) { 34 | struct buf *ib, *ob; 35 | size_t ret, i, n; 36 | if (!in) return; 37 | n = (nb <= 1) ? 1 : nb; 38 | 39 | /* reading everything */ 40 | ib = bufnew(READ_UNIT); 41 | bufgrow(ib, READ_UNIT); 42 | while ((ret = fread(ib->data + ib->size, 1, 43 | ib->asize - ib->size, in)) > 0) { 44 | ib->size += ret; 45 | bufgrow(ib, ib->size + READ_UNIT); } 46 | 47 | /* performing markdown parsing */ 48 | for (i = 0; i < n; i += 1) { 49 | ob = bufnew(OUTPUT_UNIT); 50 | ob->size = 0; 51 | markdown(ob, ib, &mkd_xhtml); 52 | bufrelease(ob); } 53 | 54 | /* cleanup */ 55 | bufrelease(ib); } 56 | 57 | 58 | 59 | /* main • main function, interfacing STDIO with the parser */ 60 | int 61 | main(int argc, char **argv) { 62 | int nb = 1, i, j, f, files = 0; 63 | FILE *in = 0; 64 | 65 | /* looking for a count number */ 66 | if (argc > 1) { 67 | for (i = 1; i < argc; i += 1) 68 | if (argv[i][0] == '-' && argv[i][1] == '-') 69 | nb = atoi(argv[i] + 2); 70 | else files += 1; 71 | if (nb < 1) { 72 | fprintf(stderr, "Usage: %s [--] " 73 | "[file] [file] ...\n", argv[0]); 74 | return 2; } } 75 | 76 | /* if no file is given, using stdin as the only file */ 77 | if (files <= 0) { 78 | in = stdin; 79 | files = 1; } 80 | 81 | /* performing the markdown */ 82 | f = 0; 83 | for (j = 0; j < files; j += 1) { 84 | if (in != stdin) { 85 | f += 1; 86 | while (f < argc 87 | && argv[f][0] == '-' && argv[f][1] == '-') 88 | f += 1; 89 | if (f >= argc) break; 90 | in = fopen(argv[f], "r"); 91 | if (!in) { 92 | fprintf(stderr, "Unable to open \"%s\": %s\n", 93 | argv[f], strerror(errno)); 94 | continue; } } 95 | benchmark(in, nb); 96 | if (in != stdin) fclose(in); } 97 | 98 | #ifdef BUFFER_STATS 99 | /* memory checks */ 100 | if (buffer_stat_nb) 101 | fprintf(stderr, "Warning: %ld buffers still active\n", 102 | buffer_stat_nb); 103 | if (buffer_stat_alloc_bytes) 104 | fprintf(stderr, "Warning: %zu bytes still allocated\n", 105 | buffer_stat_alloc_bytes); 106 | #endif 107 | return 0; } 108 | 109 | /* vim: set filetype=c: */ 110 | -------------------------------------------------------------------------------- /buffer.c: -------------------------------------------------------------------------------- 1 | /* buffer.c - automatic buffer structure */ 2 | 3 | /* 4 | * Copyright (c) 2008, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #include "buffer.h" 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | 26 | /******************** 27 | * GLOBAL VARIABLES * 28 | ********************/ 29 | 30 | /* 31 | * COMPILE TIME OPTIONS 32 | * 33 | * BUFFER_STATS • if defined, stats are kept about memory usage 34 | */ 35 | 36 | #ifdef BUFFER_STATS 37 | long buffer_stat_nb = 0; 38 | size_t buffer_stat_alloc_bytes = 0; 39 | #endif 40 | 41 | 42 | /*************************** 43 | * STATIC HELPER FUNCTIONS * 44 | ***************************/ 45 | 46 | /* lower • returns the lower-case variant of the input char */ 47 | static char 48 | lower(char c) { 49 | return (c >= 'A' && c <= 'Z') ? (c - 'A' + 'a') : c; } 50 | 51 | 52 | 53 | /******************** 54 | * BUFFER FUNCTIONS * 55 | ********************/ 56 | 57 | /* bufcasecmp • case-insensitive buffer comparison */ 58 | int 59 | bufcasecmp(const struct buf *a, const struct buf *b) { 60 | size_t i = 0; 61 | size_t cmplen; 62 | if (a == b) return 0; 63 | if (!a) return -1; else if (!b) return 1; 64 | cmplen = (a->size < b->size) ? a->size : b->size; 65 | while (i < cmplen && lower(a->data[i]) == lower(b->data[i])) ++i; 66 | if (i < a->size) { 67 | if (i < b->size) return lower(a->data[i]) - lower(b->data[i]); 68 | else return 1; } 69 | else { if (i < b->size) return -1; 70 | else return 0; } } 71 | 72 | 73 | /* bufcmp • case-sensitive buffer comparison */ 74 | int 75 | bufcmp(const struct buf *a, const struct buf *b) { 76 | size_t i = 0; 77 | size_t cmplen; 78 | if (a == b) return 0; 79 | if (!a) return -1; else if (!b) return 1; 80 | cmplen = (a->size < b->size) ? a->size : b->size; 81 | while (i < cmplen && a->data[i] == b->data[i]) ++i; 82 | if (i < a->size) { 83 | if (i < b->size) return a->data[i] - b->data[i]; 84 | else return 1; } 85 | else { if (i < b->size) return -1; 86 | else return 0; } } 87 | 88 | 89 | /* bufcmps • case-sensitive comparison of a string to a buffer */ 90 | int 91 | bufcmps(const struct buf *a, const char *b) { 92 | const size_t len = strlen(b); 93 | size_t cmplen = len; 94 | int r; 95 | if (!a || !a->size) return b ? 0 : -1; 96 | if (len < a->size) cmplen = a->size; 97 | r = strncmp(a->data, b, cmplen); 98 | if (r) return r; 99 | else if (a->size == len) return 0; 100 | else if (a->size < len) return -1; 101 | else return 1; } 102 | 103 | 104 | /* bufdup • buffer duplication */ 105 | struct buf * 106 | bufdup(const struct buf *src, size_t dupunit) { 107 | size_t blocks; 108 | struct buf *ret; 109 | if (src == 0) return 0; 110 | ret = malloc(sizeof (struct buf)); 111 | if (ret == 0) return 0; 112 | ret->unit = dupunit; 113 | ret->size = src->size; 114 | ret->ref = 1; 115 | if (!src->size) { 116 | ret->asize = 0; 117 | ret->data = 0; 118 | return ret; } 119 | blocks = (src->size + dupunit - 1) / dupunit; 120 | ret->asize = blocks * dupunit; 121 | ret->data = malloc(ret->asize); 122 | if (ret->data == 0) { 123 | free(ret); 124 | return 0; } 125 | memcpy(ret->data, src->data, src->size); 126 | #ifdef BUFFER_STATS 127 | buffer_stat_nb += 1; 128 | buffer_stat_alloc_bytes += ret->asize; 129 | #endif 130 | return ret; } 131 | 132 | 133 | /* bufgrow • increasing the allocated size to the given value */ 134 | int 135 | bufgrow(struct buf *buf, size_t neosz) { 136 | size_t neoasz; 137 | void *neodata; 138 | if (!buf || !buf->unit) return 0; 139 | if (buf->asize >= neosz) return 1; 140 | neoasz = buf->asize + buf->unit; 141 | while (neoasz < neosz) neoasz += buf->unit; 142 | neodata = realloc(buf->data, neoasz); 143 | if (!neodata) return 0; 144 | #ifdef BUFFER_STATS 145 | buffer_stat_alloc_bytes += (neoasz - buf->asize); 146 | #endif 147 | buf->data = neodata; 148 | buf->asize = neoasz; 149 | return 1; } 150 | 151 | 152 | /* bufnew • allocation of a new buffer */ 153 | struct buf * 154 | bufnew(size_t unit) { 155 | struct buf *ret; 156 | ret = malloc(sizeof (struct buf)); 157 | if (ret) { 158 | #ifdef BUFFER_STATS 159 | buffer_stat_nb += 1; 160 | #endif 161 | ret->data = 0; 162 | ret->size = ret->asize = 0; 163 | ret->ref = 1; 164 | ret->unit = unit; } 165 | return ret; } 166 | 167 | 168 | /* bufnullterm • NUL-termination of the string array (making a C-string) */ 169 | void 170 | bufnullterm(struct buf *buf) { 171 | if (!buf || !buf->unit) return; 172 | if (buf->size + 1 <= buf->asize || bufgrow(buf, buf->size + 1)) 173 | buf->data[buf->size] = 0; } 174 | 175 | 176 | /* bufprintf • formatted printing to a buffer */ 177 | void 178 | bufprintf(struct buf *buf, const char *fmt, ...) { 179 | va_list ap; 180 | if (!buf || !buf->unit) return; 181 | va_start(ap, fmt); 182 | vbufprintf(buf, fmt, ap); 183 | va_end(ap); } 184 | 185 | 186 | /* bufput • appends raw data to a buffer */ 187 | void 188 | bufput(struct buf *buf, const void *data, size_t len) { 189 | if (!buf) return; 190 | if (buf->size + len > buf->asize && !bufgrow(buf, buf->size + len)) 191 | return; 192 | memcpy(buf->data + buf->size, data, len); 193 | buf->size += len; } 194 | 195 | 196 | /* bufputs • appends a NUL-terminated string to a buffer */ 197 | void 198 | bufputs(struct buf *buf, const char *str) { 199 | bufput(buf, str, strlen (str)); } 200 | 201 | 202 | /* bufputc • appends a single char to a buffer */ 203 | void 204 | bufputc(struct buf *buf, char c) { 205 | if (!buf) return; 206 | if (buf->size + 1 > buf->asize && !bufgrow(buf, buf->size + 1)) 207 | return; 208 | buf->data[buf->size] = c; 209 | buf->size += 1; } 210 | 211 | 212 | /* bufrelease • decrease the reference count and free the buffer if needed */ 213 | void 214 | bufrelease(struct buf *buf) { 215 | if (!buf || !buf->unit) return; 216 | buf->ref -= 1; 217 | if (buf->ref == 0) { 218 | #ifdef BUFFER_STATS 219 | buffer_stat_nb -= 1; 220 | buffer_stat_alloc_bytes -= buf->asize; 221 | #endif 222 | free(buf->data); 223 | free(buf); } } 224 | 225 | 226 | /* bufreset • frees internal data of the buffer */ 227 | void 228 | bufreset(struct buf *buf) { 229 | if (!buf || !buf->unit || !buf->asize) return; 230 | #ifdef BUFFER_STATS 231 | buffer_stat_alloc_bytes -= buf->asize; 232 | #endif 233 | free(buf->data); 234 | buf->data = 0; 235 | buf->size = buf->asize = 0; } 236 | 237 | 238 | /* bufset • safely assigns a buffer to another */ 239 | void 240 | bufset(struct buf **dest, struct buf *src) { 241 | if (src) { 242 | if (!src->asize) src = bufdup(src, 1); 243 | else src->ref += 1; } 244 | bufrelease(*dest); 245 | *dest = src; } 246 | 247 | 248 | /* bufslurp • removes a given number of bytes from the head of the array */ 249 | void 250 | bufslurp(struct buf *buf, size_t len) { 251 | if (!buf || !buf->unit || !len) return; 252 | if (len >= buf->size) { 253 | buf->size = 0; 254 | return; } 255 | buf->size -= len; 256 | memmove(buf->data, buf->data + len, buf->size); } 257 | 258 | 259 | /* buftoi • converts the numbers at the beginning of the buf into an int */ 260 | int 261 | buftoi(struct buf *buf, size_t offset_i, size_t *offset_o) { 262 | int r = 0, neg = 0; 263 | size_t i = offset_i; 264 | if (!buf || !buf->size) return 0; 265 | if (buf->data[i] == '+') i += 1; 266 | else if (buf->data[i] == '-') { 267 | neg = 1; 268 | i += 1; } 269 | while (i < buf->size && buf->data[i] >= '0' && buf->data[i] <= '9') { 270 | r = (r * 10) + buf->data[i] - '0'; 271 | i += 1; } 272 | if (offset_o) *offset_o = i; 273 | return neg ? -r : r; } 274 | 275 | 276 | 277 | /* vbufprintf • stdarg variant of formatted printing into a buffer */ 278 | void 279 | vbufprintf(struct buf *buf, const char *fmt, va_list ap) { 280 | int n; 281 | va_list ap_save; 282 | if (buf == 0 283 | || (buf->size >= buf->asize && !bufgrow (buf, buf->size + 1))) 284 | return; 285 | va_copy(ap_save, ap); 286 | n = vsnprintf(buf->data + buf->size, buf->asize - buf->size, fmt, ap); 287 | if (n >= buf->asize - buf->size) { 288 | if (buf->size + n + 1 > buf->asize 289 | && !bufgrow (buf, buf->size + n + 1)) 290 | return; 291 | n = vsnprintf (buf->data + buf->size, 292 | buf->asize - buf->size, fmt, ap_save); } 293 | va_end(ap_save); 294 | if (n < 0) return; 295 | buf->size += n; } 296 | 297 | /* vim: set filetype=c: */ 298 | -------------------------------------------------------------------------------- /buffer.h: -------------------------------------------------------------------------------- 1 | /* buffer.h - automatic buffer structure */ 2 | 3 | /* 4 | * Copyright (c) 2008, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #ifndef LITHIUM_BUFFER_H 20 | #define LITHIUM_BUFFER_H 21 | 22 | #include 23 | #include 24 | 25 | 26 | /******************** 27 | * TYPE DEFINITIONS * 28 | ********************/ 29 | 30 | /* struct buf • character array buffer */ 31 | struct buf { 32 | char * data; /* actual character data */ 33 | size_t size; /* size of the string */ 34 | size_t asize; /* allocated size (0 = volatile buffer) */ 35 | size_t unit; /* reallocation unit size (0 = read-only buffer) */ 36 | int ref; }; /* reference count */ 37 | 38 | 39 | 40 | /********** 41 | * MACROS * 42 | **********/ 43 | 44 | /* CONST_BUF • global buffer from a string literal */ 45 | #define CONST_BUF(name, string) \ 46 | static struct buf name = { string, sizeof string -1, sizeof string } 47 | 48 | 49 | /* VOLATILE_BUF • macro for creating a volatile buffer on the stack */ 50 | #define VOLATILE_BUF(name, strname) \ 51 | struct buf name = { strname, strlen(strname) } 52 | 53 | 54 | /* BUFPUTSL • optimized bufputs of a string literal */ 55 | #define BUFPUTSL(output, literal) \ 56 | bufput(output, literal, sizeof literal - 1) 57 | 58 | 59 | /*********************** 60 | * FUNCTION ATTRIBUTES * 61 | ***********************/ 62 | 63 | /* BUF_ALLOCATOR • the function returns a completely new pointer */ 64 | #ifdef __GNUC__ 65 | #define BUF_ALLOCATOR \ 66 | __attribute__ ((malloc)) 67 | #else 68 | #define BUF_ALLOCATOR 69 | #endif 70 | 71 | 72 | /* BUF_PRINTF_LIKE • marks the function as behaving like printf */ 73 | #ifdef __GNUC__ 74 | #define BUF_PRINTF_LIKE(format_index, first_variadic_index) \ 75 | __attribute__ ((format (printf, format_index, first_variadic_index))) 76 | #else 77 | #define BUF_PRINTF_LIKE(format_index, first_variadic_index) 78 | #endif 79 | 80 | 81 | /******************** 82 | * BUFFER FUNCTIONS * 83 | ********************/ 84 | 85 | /* bufcasecmp • case-insensitive buffer comparison */ 86 | int 87 | bufcasecmp(const struct buf *, const struct buf *); 88 | 89 | /* bufcmp • case-sensitive buffer comparison */ 90 | int 91 | bufcmp(const struct buf *, const struct buf *); 92 | 93 | /* bufcmps • case-sensitive comparison of a string to a buffer */ 94 | int 95 | bufcmps(const struct buf *, const char *); 96 | 97 | /* bufdup • buffer duplication */ 98 | struct buf * 99 | bufdup(const struct buf *, size_t) 100 | BUF_ALLOCATOR; 101 | 102 | /* bufgrow • increasing the allocated size to the given value */ 103 | int 104 | bufgrow(struct buf *, size_t); 105 | 106 | /* bufnew • allocation of a new buffer */ 107 | struct buf * 108 | bufnew(size_t) 109 | BUF_ALLOCATOR; 110 | 111 | /* bufnullterm • NUL-termination of the string array (making a C-string) */ 112 | void 113 | bufnullterm(struct buf *); 114 | 115 | /* bufprintf • formatted printing to a buffer */ 116 | void 117 | bufprintf(struct buf *, const char *, ...) 118 | BUF_PRINTF_LIKE(2, 3); 119 | 120 | /* bufput • appends raw data to a buffer */ 121 | void 122 | bufput(struct buf *, const void*, size_t); 123 | 124 | /* bufputs • appends a NUL-terminated string to a buffer */ 125 | void 126 | bufputs(struct buf *, const char*); 127 | 128 | /* bufputc • appends a single char to a buffer */ 129 | void 130 | bufputc(struct buf *, char); 131 | 132 | /* bufrelease • decrease the reference count and free the buffer if needed */ 133 | void 134 | bufrelease(struct buf *); 135 | 136 | /* bufreset • frees internal data of the buffer */ 137 | void 138 | bufreset(struct buf *); 139 | 140 | /* bufset • safely assigns a buffer to another */ 141 | void 142 | bufset(struct buf **, struct buf *); 143 | 144 | /* bufslurp • removes a given number of bytes from the head of the array */ 145 | void 146 | bufslurp(struct buf *, size_t); 147 | 148 | /* buftoi • converts the numbers at the beginning of the buf into an int */ 149 | int 150 | buftoi(struct buf *, size_t, size_t *); 151 | 152 | /* vbufprintf • stdarg variant of formatted printing into a buffer */ 153 | void 154 | vbufprintf(struct buf *, const char*, va_list); 155 | 156 | 157 | /******************** 158 | * GLOBAL VARIABLES * 159 | ********************/ 160 | 161 | #ifdef BUFFER_STATS 162 | 163 | extern long buffer_stat_nb; 164 | extern size_t buffer_stat_alloc_bytes; 165 | 166 | #endif /* def BUFFER_STATS */ 167 | 168 | 169 | #endif /* ndef LITHIUM_BUFFER_H */ 170 | 171 | /* vim: set filetype=c: */ 172 | -------------------------------------------------------------------------------- /expanded_markdown.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright (c) 2009, Natacha Porté 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | # This script is used for markdown validation: 18 | # The original Markdown.pl expand tabs into spaces, which is something 19 | # I don't want to do, and it cannot be easily fixed in the renderer, 20 | # so has to be preprocessed. expand(1) is used for that preprocessing, 21 | # feeding tab-free data into my markdown. 22 | 23 | expand -t 4 "$@" | $(dirname $0)/lace --markdown --xhtml 24 | -------------------------------------------------------------------------------- /make-amal: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # make-amal, generate the libsoldout amalgamation 3 | # inspired by make-bootstrap-jim (https://github.com/msteveb/jimtcl/blob/master/make-bootstrap-jim) 4 | # 5 | # Copyright (c) 2016 Svyatoslav Mishyn 6 | # 7 | # Permission to use, copy, modify, and/or distribute this software for 8 | # any purpose with or without fee is hereby granted, provided that the 9 | # above copyright notice and this permission notice appear in all 10 | # copies. 11 | # 12 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 13 | # WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED 14 | # WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE 15 | # AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL 16 | # DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR 17 | # PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 18 | # TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 19 | # PERFORMANCE OF THIS SOFTWARE. 20 | 21 | outputsource() { 22 | sed -e '/#include.*\".*\"/d' \ 23 | -e 's/\/\*.*\*\///' \ 24 | -e '/^[ ]*\/\*/,/\*\//d' \ 25 | -e 's/[ ]*$//' $1 26 | } 27 | 28 | header="/* Amalgamation version of libsoldout. See https://github.com/faelys/libsoldout */" 29 | license=`sed -n -e '3,17p' markdown.c` 30 | 31 | mkdir -p amalgamation 32 | 33 | 34 | # soldout.h 35 | 36 | echo "$header" > amalgamation/soldout.h 37 | exec >> amalgamation/soldout.h 38 | 39 | echo 40 | echo "$license" 41 | echo 42 | echo '#ifndef SOLDOUT_H' 43 | echo '#define SOLDOUT_H' 44 | echo 45 | 46 | for f in array.h buffer.h markdown.h renderers.h; do 47 | outputsource $f 48 | done 49 | 50 | echo 51 | echo '#endif /* SOLDOUT_H */' 52 | 53 | 54 | # soldout.c 55 | 56 | echo "$header" > amalgamation/soldout.c 57 | exec >> amalgamation/soldout.c 58 | 59 | echo 60 | echo "$license" 61 | echo 62 | echo '#include "soldout.h"' 63 | echo 64 | 65 | for f in array.c buffer.c markdown.c renderers.c; do 66 | outputsource $f 67 | done 68 | -------------------------------------------------------------------------------- /markdown.h: -------------------------------------------------------------------------------- 1 | /* markdown.h - generic markdown parser */ 2 | 3 | /* 4 | * Copyright (c) 2009, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #ifndef LITHIUM_MARKDOWN_H 20 | #define LITHIUM_MARKDOWN_H 21 | 22 | #include "buffer.h" 23 | 24 | 25 | /******************** 26 | * TYPE DEFINITIONS * 27 | ********************/ 28 | 29 | /* mkd_autolink • type of autolink */ 30 | enum mkd_autolink { 31 | MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/ 32 | MKDA_NORMAL, /* normal http/https/ftp link */ 33 | MKDA_EXPLICIT_EMAIL, /* e-mail link with explicit mailto: */ 34 | MKDA_IMPLICIT_EMAIL /* e-mail link without mailto: */ 35 | }; 36 | 37 | /* mkd_renderer • functions for rendering parsed data */ 38 | struct mkd_renderer { 39 | /* document level callbacks */ 40 | void (*prolog)(struct buf *ob, void *opaque); 41 | void (*epilog)(struct buf *ob, void *opaque); 42 | 43 | /* block level callbacks - NULL skips the block */ 44 | void (*blockcode)(struct buf *ob, struct buf *text, void *opaque); 45 | void (*blockquote)(struct buf *ob, struct buf *text, void *opaque); 46 | void (*blockhtml)(struct buf *ob, struct buf *text, void *opaque); 47 | void (*header)(struct buf *ob, struct buf *text, 48 | int level, void *opaque); 49 | void (*hrule)(struct buf *ob, void *opaque); 50 | void (*list)(struct buf *ob, struct buf *text, int flags, void *opaque); 51 | void (*listitem)(struct buf *ob, struct buf *text, 52 | int flags, void *opaque); 53 | void (*paragraph)(struct buf *ob, struct buf *text, void *opaque); 54 | void (*table)(struct buf *ob, struct buf *head_row, struct buf *rows, 55 | void *opaque); 56 | void (*table_cell)(struct buf *ob, struct buf *text, int flags, 57 | void *opaque); 58 | void (*table_row)(struct buf *ob, struct buf *cells, int flags, 59 | void *opaque); 60 | 61 | /* span level callbacks - NULL or return 0 prints the span verbatim */ 62 | int (*autolink)(struct buf *ob, struct buf *link, 63 | enum mkd_autolink type, void *opaque); 64 | int (*codespan)(struct buf *ob, struct buf *text, void *opaque); 65 | int (*double_emphasis)(struct buf *ob, struct buf *text, 66 | char c, void *opaque); 67 | int (*emphasis)(struct buf *ob, struct buf *text, char c,void*opaque); 68 | int (*image)(struct buf *ob, struct buf *link, struct buf *title, 69 | struct buf *alt, void *opaque); 70 | int (*linebreak)(struct buf *ob, void *opaque); 71 | int (*link)(struct buf *ob, struct buf *link, struct buf *title, 72 | struct buf *content, void *opaque); 73 | int (*raw_html_tag)(struct buf *ob, struct buf *tag, void *opaque); 74 | int (*triple_emphasis)(struct buf *ob, struct buf *text, 75 | char c, void *opaque); 76 | 77 | /* low level callbacks - NULL copies input directly into the output */ 78 | void (*entity)(struct buf *ob, struct buf *entity, void *opaque); 79 | void (*normal_text)(struct buf *ob, struct buf *text, void *opaque); 80 | 81 | /* renderer data */ 82 | int max_work_stack; /* prevent arbitrary deep recursion, cf README */ 83 | const char *emph_chars; /* chars that trigger emphasis rendering */ 84 | void *opaque; /* opaque data send to every rendering callback */ 85 | }; 86 | 87 | 88 | 89 | /********* 90 | * FLAGS * 91 | *********/ 92 | 93 | /* list/listitem flags */ 94 | #define MKD_LIST_ORDERED 1 95 | #define MKD_LI_BLOCK 2 /*
  • containing block data */ 96 | 97 | /* table cell flags */ 98 | #define MKD_CELL_ALIGN_DEFAULT 0 99 | #define MKD_CELL_ALIGN_LEFT 1 100 | #define MKD_CELL_ALIGN_RIGHT 2 101 | #define MKD_CELL_ALIGN_CENTER 3 /* LEFT | RIGHT */ 102 | #define MKD_CELL_ALIGN_MASK 3 103 | #define MKD_CELL_HEAD 4 104 | 105 | 106 | 107 | /********************** 108 | * EXPORTED FUNCTIONS * 109 | **********************/ 110 | 111 | /* markdown • parses the input buffer and renders it into the output buffer */ 112 | void 113 | markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndr); 114 | 115 | 116 | #endif /* ndef LITHIUM_MARKDOWN_H */ 117 | 118 | /* vim: set filetype=c: */ 119 | -------------------------------------------------------------------------------- /mkd2html.1: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2009 - 2016 Natacha Porté 3 | .\" 4 | .\" Permission to use, copy, modify, and distribute this software for any 5 | .\" purpose with or without fee is hereby granted, provided that the above 6 | .\" copyright notice and this permission notice appear in all copies. 7 | .\" 8 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | .\" 16 | .Dd September 12, 2016 17 | .Dt MKD2HTML 1 18 | .Os 19 | .Sh NAME 20 | .Nm mkd2html 21 | .Nd convert a markdown document into (X)HTML 22 | .Sh SYNOPSIS 23 | .Nm 24 | .Op Fl dHhmnx 25 | .Op Ar file 26 | .Sh DESCRIPTION 27 | .Nm 28 | utility reads 29 | .Ar file 30 | and generates (X)HTML input from markdown source. 31 | If unspecified, 32 | .Ar file 33 | is taken to be standard input. 34 | .Pp 35 | By default, 36 | .Nm 37 | implies 38 | .Fl H 39 | and 40 | .Fl m 41 | options. 42 | .Pp 43 | The options are as follows: 44 | .Bl -tag -width Ds 45 | .It Fl d , Fl Fl discount 46 | enable Discount extensions and PHP-Markdown-like tables: 47 | .Bl -bullet -width 1m 48 | .It 49 | image size specification, by appending 50 | .Do =(width)x(height) Dc to the link 51 | .It 52 | pseudo-protocols in links: 53 | .Bl -bullet -width 1m 54 | .It 55 | .Do abbr:description Dc for ... 56 | .It 57 | .Do class:name Dc for ... 58 | .It 59 | .Do id:name Dc for ... 60 | .It 61 | .Do raw:text Dc for verbatim unprocessed text inclusion 62 | .El 63 | .It 64 | class blocks: blockquotes beginning with 65 | .Do %class% Dc will be rendered as a div of the given class(es) 66 | .El 67 | .It Fl H , Fl Fl html 68 | output HTML (self-closing tags like:
    ). 69 | .It Fl h , Fl Fl help 70 | display help text. 71 | .It Fl m , Fl Fl markdown 72 | disable all extensions and use strict markdown syntax. 73 | .It Fl n , Fl Fl natext 74 | enable Discount extensions and Natasha's own extensions: 75 | .Bl -bullet -width 1m 76 | .It 77 | id attribute for headers, using the syntax 78 | .Dq id#Header text 79 | .It 80 | class attribute for paragraphs, by putting class name(s) 81 | between parenthesis at the very beginning of the paragraph 82 | .It 83 | and spans, using respectively 84 | .Do ++ Dc and Do -- Dc 85 | as delimiters (with emphasis-like restrictions, 86 | i.e. an opening delimiter cannot be followed by a whitespace, 87 | and a closing delimiter cannot be preceded by a whitespace) 88 | .It 89 | plain without attribute, using emphasis-like delimiter 90 | .Sq | 91 | .El 92 | .It Fl x , Fl Fl xhtml 93 | output XHTML (self-closing tags like:
    ). 94 | .El 95 | .Sh EXIT STATUS 96 | .Ex -std 97 | .Sh SEE ALSO 98 | .Xr mkd2latex 1 , 99 | .Xr mkd2man 1 100 | .Sh AUTHORS 101 | .An -nosplit 102 | .Nm 103 | was written by 104 | .An Natasha Qo Kerensikova Qc Porte Aq Mt natacha@instinctive.eu . 105 | Manual page was originally written by 106 | .An Massimo Manghi Aq Mt mxmanghi@apache.org , 107 | and rewritten to mdoc format by 108 | .An Svyatoslav Mishyn Aq Mt juef@openmailbox.org . 109 | -------------------------------------------------------------------------------- /mkd2html.c: -------------------------------------------------------------------------------- 1 | /* main.c - main function for markdown module testing */ 2 | 3 | /* 4 | * Copyright (c) 2009, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #include "markdown.h" 20 | #include "renderers.h" 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #define READ_UNIT 1024 29 | #define OUTPUT_UNIT 64 30 | 31 | 32 | /* usage • print the option list */ 33 | static void 34 | usage(FILE *out, const char *name) { 35 | fprintf(out, "Usage: %s [-h | -x] [-d | -m | -n] [input-file]\n\n", 36 | name); 37 | fprintf(out, "\t-d, --discount\n" 38 | "\t\tEnable some Discount extensions (image size specification,\n" 39 | "\t\tclass blocks and 'abbr:', 'class:', 'id:' and 'raw:'\n" 40 | "\t\tpseudo-protocols)\n" 41 | "\t-H, --html\n" 42 | "\t\tOutput HTML-style self-closing tags (e.g.
    )\n" 43 | "\t-h, --help\n" 44 | "\t\tDisplay this help text and exit without further processing\n" 45 | "\t-m, --markdown\n" 46 | "\t\tDisable all extensions and use strict markdown syntax\n" 47 | "\t-n, --natext\n" 48 | "\t\tEnable support Discount extensions and Natasha's own\n" 49 | "\t\textensions (id header attribute, class paragraph attribute,\n" 50 | "\t\t'ins' and 'del' elements, and plain span elements)\n" 51 | "\t-x, --xhtml\n" 52 | "\t\tOutput XHTML-style self-closing tags (e.g.
    )\n"); } 53 | 54 | 55 | 56 | /* main • main function, interfacing STDIO with the parser */ 57 | int 58 | main(int argc, char **argv) { 59 | struct buf *ib, *ob; 60 | size_t ret; 61 | FILE *in = stdin; 62 | const struct mkd_renderer *hrndr, *xrndr; 63 | const struct mkd_renderer **prndr; 64 | int ch, argerr, help; 65 | struct option longopts[] = { 66 | { "discount", no_argument, 0, 'd' }, 67 | { "html", no_argument, 0, 'H' }, 68 | { "help", no_argument, 0, 'h' }, 69 | { "markdown", no_argument, 0, 'm' }, 70 | { "natext", no_argument, 0, 'n' }, 71 | { "xhtml", no_argument, 0, 'x' }, 72 | { 0, 0, 0, 0 } }; 73 | 74 | /* default options: strict markdown input, HTML output */ 75 | hrndr = &mkd_html; 76 | xrndr = &mkd_xhtml; 77 | prndr = &hrndr; 78 | 79 | /* argument parsing */ 80 | argerr = help = 0; 81 | while (!argerr && 82 | (ch = getopt_long(argc, argv, "dHhmnx", longopts, 0)) != -1) 83 | switch (ch) { 84 | case 'd': /* discount extension */ 85 | hrndr = &discount_html; 86 | xrndr = &discount_xhtml; 87 | break; 88 | case 'H': /* HTML output */ 89 | prndr = &hrndr; 90 | break; 91 | case 'h': /* display help */ 92 | argerr = help = 1; 93 | break; 94 | case 'm': /* strict markdown */ 95 | hrndr = &mkd_html; 96 | xrndr = &mkd_xhtml; 97 | break; 98 | case 'n': /* Discount + Natasha's extensions */ 99 | hrndr = &nat_html; 100 | xrndr = &nat_xhtml; 101 | break; 102 | case 'x': /* XHTML output */ 103 | prndr = &xrndr; 104 | break; 105 | default: 106 | argerr = 1; } 107 | if (argerr) { 108 | usage(help ? stdout : stderr, argv[0]); 109 | return help ? EXIT_SUCCESS : EXIT_FAILURE; } 110 | argc -= optind; 111 | argv += optind; 112 | 113 | /* opening the file if given from the command line */ 114 | if (argc > 0) { 115 | in = fopen(argv[0], "r"); 116 | if (!in) { 117 | fprintf(stderr,"Unable to open input file \"%s\": %s\n", 118 | argv[0], strerror(errno)); 119 | return 1; } } 120 | 121 | /* reading everything */ 122 | ib = bufnew(READ_UNIT); 123 | bufgrow(ib, READ_UNIT); 124 | while ((ret = fread(ib->data + ib->size, 1, 125 | ib->asize - ib->size, in)) > 0) { 126 | ib->size += ret; 127 | bufgrow(ib, ib->size + READ_UNIT); } 128 | if (in != stdin) fclose(in); 129 | 130 | /* performing markdown parsing */ 131 | ob = bufnew(OUTPUT_UNIT); 132 | markdown(ob, ib, *prndr); 133 | 134 | /* writing the result to stdout */ 135 | ret = fwrite(ob->data, 1, ob->size, stdout); 136 | if (ret < ob->size) 137 | fprintf(stderr, "Warning: only %zu output byte written, " 138 | "out of %zu\n", 139 | ret, 140 | ob->size); 141 | 142 | /* cleanup */ 143 | bufrelease(ib); 144 | bufrelease(ob); 145 | 146 | #ifdef BUFFER_STATS 147 | /* memory checks */ 148 | if (buffer_stat_nb) 149 | fprintf(stderr, "Warning: %ld buffers still active\n", 150 | buffer_stat_nb); 151 | if (buffer_stat_alloc_bytes) 152 | fprintf(stderr, "Warning: %zu bytes still allocated\n", 153 | buffer_stat_alloc_bytes); 154 | #endif 155 | return 0; } 156 | 157 | /* vim: set filetype=c: */ 158 | -------------------------------------------------------------------------------- /mkd2latex.1: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2009 - 2016 Natacha Porté 3 | .\" 4 | .\" Permission to use, copy, modify, and distribute this software for any 5 | .\" purpose with or without fee is hereby granted, provided that the above 6 | .\" copyright notice and this permission notice appear in all copies. 7 | .\" 8 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | .\" 16 | .Dd March 1, 2016 17 | .Dt MKD2LATEX 1 18 | .Os 19 | .Sh NAME 20 | .Nm mkd2latex 21 | .Nd convert a markdown document into LaTex 22 | .Sh SYNOPSIS 23 | .Nm 24 | .Op Ar file 25 | .Sh DESCRIPTION 26 | .Nm 27 | utility reads 28 | .Ar file 29 | and generates LaTex input from markdown source. 30 | If unspecified, 31 | .Ar file 32 | is taken to be standard input. 33 | .Sh EXIT STATUS 34 | .Ex -std 35 | .Sh SEE ALSO 36 | .Xr mkd2html 1 , 37 | .Xr mkd2man 1 38 | .Sh AUTHORS 39 | .An -nosplit 40 | .Nm 41 | was written by 42 | .An Natasha Qo Kerensikova Qc Porte Aq Mt natacha@instinctive.eu , 43 | .An Baptiste Daroussin Aq Mt bapt@freebsd.org 44 | and 45 | .An Michael Huang . 46 | Manual page was originally written by 47 | .An Massimo Manghi Aq Mt mxmanghi@apache.org , 48 | and rewritten to mdoc format by 49 | .An Svyatoslav Mishyn Aq Mt juef@openmailbox.org . 50 | -------------------------------------------------------------------------------- /mkd2latex.c: -------------------------------------------------------------------------------- 1 | /* mkd2latex.c - LaTeX-formatted output from markdown text */ 2 | 3 | /* 4 | * Copyright (c) 2009, Baptiste Daroussin, Natacha Porté, and Michael Huang 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | /* 20 | * Links require the hyperref package, and images require the graphicx 21 | * package. 22 | */ 23 | 24 | #include "markdown.h" 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #define READ_UNIT 1024 32 | #define OUTPUT_UNIT 64 33 | 34 | /********************* 35 | * ENTITY CONVERSION * 36 | *********************/ 37 | 38 | struct str_pair { 39 | const char *entity; 40 | const char *latex; }; 41 | 42 | static struct str_pair entity_latex[] = { 43 | { "Æ", "\\AE{}" }, 44 | { "Á", "\\'A" }, 45 | { "Â", "\\^A" }, 46 | { "À", "\\`A" }, 47 | { "Å", "\\AA{}" }, 48 | { "Ã", "\\~A" }, 49 | { "Ä", "\\\"A" }, 50 | { "Ç", "\\c{C}" }, 51 | // { "Ð", "\\DH{}" }, // not available in OT1 52 | { "É", "\\'E" }, 53 | { "Ê", "\\^E" }, 54 | { "È", "\\`E" }, 55 | { "Ë", "\\\"E" }, 56 | { "Í", "\\'I" }, 57 | { "Î", "\\^I" }, 58 | { "Ì", "\\`I" }, 59 | { "Ï", "\\\"I" }, 60 | { "Ñ", "\\~N" }, 61 | { "Ó", "\\'O" }, 62 | { "Ô", "\\^O" }, 63 | { "Ò", "\\`O" }, 64 | { "Ø", "\\O{}" }, 65 | { "Õ", "\\~O" }, 66 | { "Ö", "\\\"O" }, 67 | // { "Þ", "\\TH{}" }, // not available in OT1 68 | { "Ú", "\\'U" }, 69 | { "Û", "\\^U" }, 70 | { "Ù", "\\`U" }, 71 | { "Ü", "\\\"U" }, 72 | { "&Ygrave;", "\\`Y" }, 73 | { "á", "\\'a" }, 74 | { "â", "\\^a" }, 75 | // { "´", "\\textasciiacute{}" }, // requires textcomp 76 | { "æ", "\\ae{}" }, 77 | { "à", "\\`a" }, 78 | { "&", "\\&" }, 79 | { "'", "'" }, 80 | { "å", "\\aa{}" }, 81 | { "ã", "\\~a" }, 82 | { "ä", "\\\"a" }, 83 | // { "¦", "\\textbrokenbar{}" }, // requires textcomp 84 | { "ç", "\\c{c}" }, 85 | { "¸", "\\c{}" }, 86 | // { "¢", "\\textcent{}" }, // requires textcomp 87 | { "©", "\\copyright{}" }, 88 | // { "¤", "\\textcurrency{}" }, // requires textcomp 89 | // { "°", "\\textdegree{}" }, // requires textcomp 90 | // { "÷", "\\textdiv{}" }, // requires textcomp 91 | { "é", "\\'e" }, 92 | { "ê", "\\^e" }, 93 | { "è", "\\`e" }, 94 | // { "ð", "\\dh{}" }. // not available in OT1 95 | { "ë", "\\\"e" }, 96 | // { "½", "\\textonehalf{}" }, // requires textcomp 97 | // { "¼", "\\textonequarter{}" }, // requires textcomp 98 | // { "¾", "\\textthreequarter{}" }, // requires textcomp 99 | { ">", "$>$" }, 100 | { "í", "\\'\\i{}" }, 101 | { "î", "\\^\\i{}" }, 102 | { "¡", "\\textexclamdown{}" }, 103 | { "ì", "\\`\\i{}" }, 104 | { "¿", "\\" }, 105 | { "ï", "\\\"\\i{}" }, 106 | // { "«", "\\guillemotleft{}" }, // not available in OT1 107 | { "<", "$<$" }, 108 | // { "¯", "\\textasciimacaron{}" }, // requires textcomp 109 | // { "µ", "\\textmu{}"}, // requires textcomp 110 | { "·", "\\textperiodcentered{}" }, 111 | { " ", "~" }, 112 | // { "¬", "\\textlnot{}" }, // requires textcomp 113 | { "ñ", "\\~n" }, 114 | { "ó", "\\'o" }, 115 | { "ô", "\\^o" }, 116 | { "ò", "\\`o" }, 117 | { "ª", "\\textordfeminine{}" }, 118 | { "º", "\\textordmasculine{}" }, 119 | { "ø", "\\o{}" }, 120 | { "õ", "\\~o" }, 121 | { "ö", "\\\"o" }, 122 | { "¶", "\\P{}" }, 123 | // { "±", "\\textpm{}" }, // requires textcomp 124 | { "£", "\\textsterling{}" }, 125 | { """, "\"" }, 126 | // { "»", "\\guillemotright{}" }, // not available in OT1 127 | { "®", "\\textregistered{}" }, 128 | { "§", "\\S{}" }, 129 | { "­", "\\-" }, 130 | // { "¹", "\\textonesuperior{}" }, // requires textcomp 131 | // { "²", "\\texttwosuperior{}" }, // requires textcomp 132 | // { "³", "\\textthreesuperior{}" }, // requires textcomp 133 | { "ß", "\\ss{}" }, 134 | // { "þ", "\\th{}" }, // not available in OT1 135 | // { "×", "\\texttimes{}" }, // requires textcomp 136 | { "ú", "\\'u" }, 137 | { "û", "\\^u" }, 138 | { "ù", "\\`u" }, 139 | // { "¨", "\\textasciidieresis{}" }, // requires textcomp 140 | { "ü", "\\\"u" }, 141 | { "ý", "\\'y" }, 142 | // { "¥", "\\textyen{}" }, // requires textcomp 143 | { "ÿ", "\\\"y" }, 144 | }; 145 | 146 | static int cmp_entity(const void *key, const void *element) { 147 | const struct str_pair *pair = element; 148 | const struct buf *entity = key; 149 | return bufcmps(entity, pair->entity); } 150 | 151 | static const char *entity2latex(const struct buf *entity) { 152 | const struct str_pair *pair; 153 | pair = bsearch(entity, entity_latex, 154 | sizeof entity_latex / sizeof *entity_latex, 155 | sizeof *entity_latex, 156 | &cmp_entity); 157 | return pair ? pair->latex : 0; } 158 | 159 | 160 | 161 | /****************************** 162 | * MARKDOWN TO LATEX RENDERER * 163 | ******************************/ 164 | 165 | static void 166 | latex_text_escape(struct buf *ob, char *src, size_t size) { 167 | size_t i = 0, org; 168 | while (i < size) { 169 | /* copying directly unescaped characters */ 170 | org = i; 171 | while (i < size && src[i] != '&' && src[i] != '%' 172 | && src[i] != '$' && src[i] != '#' && src[i] != '_' 173 | && src[i] != '{' && src[i] != '}' && src[i] != '~' 174 | && src[i] != '^' && src[i] != '\\' && src[i] != '<' 175 | && src[i] != '>') 176 | i += 1; 177 | if (i > org) bufput(ob, src + org, i - org); 178 | 179 | /* escaping */ 180 | if (i >= size) break; 181 | else if (src[i] == '&') BUFPUTSL(ob, "\\&"); 182 | else if (src[i] == '%') BUFPUTSL(ob, "\\%"); 183 | else if (src[i] == '$') BUFPUTSL(ob, "\\$"); 184 | else if (src[i] == '#') BUFPUTSL(ob, "\\#"); 185 | else if (src[i] == '_') BUFPUTSL(ob, "\\_"); 186 | else if (src[i] == '{') BUFPUTSL(ob, "\\{"); 187 | else if (src[i] == '}') BUFPUTSL(ob, "\\}"); 188 | else if (src[i] == '<') BUFPUTSL(ob, "$<$"); 189 | else if (src[i] == '>') BUFPUTSL(ob, "$<$"); 190 | else if (src[i] == '~') BUFPUTSL(ob, "\\textasciitilde{}"); 191 | else if (src[i] == '^') BUFPUTSL(ob, "\\textasciicircum{}"); 192 | else if (src[i] == '\\') BUFPUTSL(ob, "\\textbackslash{}"); 193 | i += 1; } } 194 | 195 | static void 196 | latex_prolog(struct buf *ob, void *opaque) { 197 | BUFPUTSL(ob, 198 | "\\documentclass{article}\n" 199 | "\\usepackage{hyperref}\n" 200 | "\\usepackage{graphicx}\n" 201 | "\\begin{document}\n"); } 202 | 203 | static void 204 | latex_epilog(struct buf *ob, void *opaque) { 205 | BUFPUTSL(ob, "\n\\end{document}\n"); } 206 | 207 | static int 208 | latex_autolink(struct buf *ob, struct buf *link, enum mkd_autolink type, 209 | void *opaque) { 210 | if (!link || !link->size) return 0; 211 | BUFPUTSL(ob, "\\href{"); 212 | if (type == MKDA_IMPLICIT_EMAIL) BUFPUTSL(ob, "mailto:"); 213 | bufput(ob, link->data, link->size); 214 | BUFPUTSL(ob, "}{"); 215 | if (type == MKDA_EXPLICIT_EMAIL && link->size > 7) 216 | latex_text_escape(ob, link->data + 7, link->size - 7); 217 | else latex_text_escape(ob, link->data, link->size); 218 | BUFPUTSL(ob, "}"); 219 | return 1; } 220 | 221 | static int 222 | latex_link(struct buf *ob, struct buf *link, struct buf *title, 223 | struct buf *content, void *opaque) { 224 | BUFPUTSL(ob, "\\href{"); 225 | if (link && link->size) bufput(ob, link->data, link->size); 226 | BUFPUTSL(ob, "}{"); 227 | if (content && content->size) 228 | bufput(ob, content->data, content->size); 229 | BUFPUTSL(ob, "}"); 230 | return 1; } 231 | 232 | static int 233 | latex_image(struct buf *ob, struct buf *link, struct buf *title, 234 | struct buf *alt, void *opaque) { 235 | if (!link || !link->size) return 0; 236 | BUFPUTSL(ob, "\\includegraphics{"); 237 | bufput(ob, link->data, link->size); 238 | BUFPUTSL(ob, "}"); 239 | return 1; } 240 | 241 | static void 242 | latex_blockcode(struct buf *ob, struct buf *text, void *opaque) { 243 | if (ob->size) bufputc(ob, '\n'); 244 | BUFPUTSL(ob, "\\begin{verbatim}\n"); 245 | if (text) bufput(ob, text->data, text->size); 246 | BUFPUTSL(ob, "\\end{verbatim}\n"); } 247 | 248 | static void 249 | latex_blockquote(struct buf *ob, struct buf *text, void *opaque) { 250 | if (ob->size) bufputc(ob, '\n'); 251 | BUFPUTSL(ob, "\\begin{quote}\n"); 252 | if (text) bufput(ob, text->data, text->size); 253 | BUFPUTSL(ob, "\\end{quote}\n"); } 254 | 255 | static int 256 | latex_codespan(struct buf *ob, struct buf *text, void *opaque) { 257 | BUFPUTSL(ob, "\\texttt{"); 258 | if (text) latex_text_escape(ob, text->data, text->size); 259 | BUFPUTSL(ob, "}"); 260 | return 1; } 261 | 262 | static void 263 | latex_header(struct buf *ob, struct buf *text, int level, void *opaque) { 264 | if (ob->size) bufputc(ob, '\n'); 265 | switch(level) { 266 | case 1: 267 | BUFPUTSL(ob,"\\section{"); 268 | break; 269 | case 2: 270 | BUFPUTSL(ob, "\\subsection{"); 271 | break; 272 | case 3: 273 | BUFPUTSL(ob, "\\subsubsection{"); 274 | break; 275 | default: 276 | fprintf(stderr, "Warning: ignoring header level %d\n", 277 | level); 278 | } 279 | if (text) bufput(ob, text->data, text->size); 280 | if (level >= 1 && level <= 3) BUFPUTSL(ob, "}\n"); 281 | } 282 | 283 | static int 284 | latex_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 285 | if (!text || !text->size) return 0; 286 | BUFPUTSL(ob, "\\textbf{"); 287 | bufput(ob, text->data, text->size); 288 | BUFPUTSL(ob, "}"); 289 | return 1; } 290 | 291 | static int 292 | latex_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 293 | if (!text || !text->size) return 0; 294 | BUFPUTSL(ob, "\\emph{"); 295 | if (text) bufput(ob, text->data, text->size); 296 | BUFPUTSL(ob, "}"); 297 | return 1; } 298 | 299 | static int 300 | latex_linebreak(struct buf *ob, void *opaque) { 301 | BUFPUTSL(ob, "\\\\"); 302 | return 1; } 303 | 304 | static void 305 | latex_paragraph(struct buf *ob, struct buf *text, void *opaque) { 306 | if (ob->size) bufputc(ob, '\n'); 307 | if (text) bufput(ob, text->data, text->size); 308 | BUFPUTSL(ob, "\n"); } 309 | 310 | static void 311 | latex_list(struct buf *ob, struct buf *text, int flags, void *opaque) { 312 | if (ob->size) bufputc(ob, '\n'); 313 | if (flags & MKD_LIST_ORDERED) 314 | BUFPUTSL(ob, "\\begin{enumerate}\n"); 315 | else 316 | BUFPUTSL(ob, "\\begin{itemize}\n"); 317 | if (text) bufput(ob, text->data, text->size); 318 | if (flags & MKD_LIST_ORDERED) 319 | BUFPUTSL(ob, "\\end{enumerate}\n"); 320 | else 321 | BUFPUTSL(ob, "\\end{itemize}\n"); } 322 | 323 | static void 324 | latex_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) { 325 | BUFPUTSL(ob, "\\item "); 326 | if (text) { 327 | while (text->size && text->data[text->size - 1] == '\n') 328 | text->size -= 1; 329 | bufput(ob, text->data, text->size); } 330 | BUFPUTSL(ob, "\n"); } 331 | 332 | static void 333 | latex_hrule(struct buf *ob, void *opaque) { 334 | if (ob->size) bufputc(ob, '\n'); 335 | BUFPUTSL(ob, "\\hrule"); } 336 | 337 | static int 338 | latex_triple_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 339 | if (!text || !text->size) return 0; 340 | BUFPUTSL(ob, "\\textbf{\\emph{"); 341 | bufput(ob, text->data, text->size); 342 | BUFPUTSL(ob, "}}"); 343 | return 1; } 344 | 345 | static void 346 | latex_entity(struct buf *ob, struct buf *entity, void *opaque) { 347 | const char *rendered = entity2latex(entity); 348 | if (rendered) 349 | bufputs(ob, rendered); 350 | else { 351 | BUFPUTSL(ob, "\\texttt{"); 352 | bufput(ob, entity->data, entity->size); 353 | BUFPUTSL(ob, "}"); } } 354 | 355 | static void 356 | latex_normal_text(struct buf *ob, struct buf *text, void *opaque) { 357 | if (text) latex_text_escape(ob, text->data, text->size); } 358 | 359 | 360 | /* renderer structure */ 361 | static struct mkd_renderer to_latex = { 362 | /* document-level callbacks */ 363 | latex_prolog, 364 | latex_epilog, 365 | 366 | /* block-level callbacks */ 367 | latex_blockcode, 368 | latex_blockquote, 369 | latex_blockcode, 370 | latex_header, 371 | latex_hrule, 372 | latex_list, 373 | latex_listitem, 374 | latex_paragraph, 375 | NULL, 376 | NULL, 377 | NULL, 378 | 379 | /* span-level callbacks */ 380 | latex_autolink, 381 | latex_codespan, 382 | latex_double_emphasis, 383 | latex_emphasis, 384 | latex_image, 385 | latex_linebreak, 386 | latex_link, 387 | latex_codespan, 388 | latex_triple_emphasis, 389 | 390 | /* low-level callbacks */ 391 | latex_entity, 392 | latex_normal_text, 393 | 394 | /* renderer data */ 395 | 64, 396 | "*_", 397 | NULL }; 398 | 399 | 400 | 401 | /***************** 402 | * MAIN FUNCTION * 403 | *****************/ 404 | 405 | /* main • main function, interfacing STDIO with the parser */ 406 | int 407 | main(int argc, char **argv) { 408 | struct buf *ib, *ob; 409 | size_t ret; 410 | FILE *in = stdin; 411 | 412 | /* opening the file if given from the command line */ 413 | if (argc > 1) { 414 | in = fopen(argv[1], "r"); 415 | if (!in) { 416 | fprintf(stderr,"Unable to open input file \"%s\": %s\n", 417 | argv[1], strerror(errno)); 418 | return 1; } } 419 | 420 | /* reading everything */ 421 | ib = bufnew(READ_UNIT); 422 | bufgrow(ib, READ_UNIT); 423 | while ((ret = fread(ib->data + ib->size, 1, 424 | ib->asize - ib->size, in)) > 0) { 425 | ib->size += ret; 426 | bufgrow(ib, ib->size + READ_UNIT); } 427 | if (in != stdin) fclose(in); 428 | 429 | /* performing markdown to LaTeX */ 430 | ob = bufnew(OUTPUT_UNIT); 431 | markdown(ob, ib, &to_latex); 432 | 433 | /* writing the result to stdout */ 434 | ret = fwrite(ob->data, 1, ob->size, stdout); 435 | if (ret < ob->size) 436 | fprintf(stderr, "Warning: only %zu output byte written, " 437 | "out of %zu\n", 438 | ret, 439 | ob->size); 440 | 441 | /* cleanup */ 442 | bufrelease(ib); 443 | bufrelease(ob); 444 | return 0; } 445 | 446 | /* vim: set filetype=c: */ 447 | -------------------------------------------------------------------------------- /mkd2man.1: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2009 - 2016 Natacha Porté 3 | .\" 4 | .\" Permission to use, copy, modify, and distribute this software for any 5 | .\" purpose with or without fee is hereby granted, provided that the above 6 | .\" copyright notice and this permission notice appear in all copies. 7 | .\" 8 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | .\" 16 | .Dd March 30, 2016 17 | .Dt MKD2MAN 1 18 | .Os 19 | .Sh NAME 20 | .Nm mkd2man 21 | .Nd convert a markdown document into an mdoc manual page 22 | .Sh SYNOPSIS 23 | .Nm 24 | .Op Fl h 25 | .Op Fl d Ar date 26 | .Op Fl s Ar section 27 | .Op Fl t Ar title 28 | .Op Ar file 29 | .Sh DESCRIPTION 30 | .Nm 31 | utility reads 32 | .Ar file 33 | and generates 34 | .Xr mdoc 7 35 | input from markdown source. 36 | If unspecified, 37 | .Ar file 38 | is taken to be standard input. 39 | .Pp 40 | The options are as follows: 41 | .Bl -tag -width Ds 42 | .It Fl d , Fl Fl date 43 | set the document date 44 | .Pq Sq \&Dd 45 | to 46 | .Ar date 47 | .Po 48 | preferrably in 49 | .Qq Month Day, Year 50 | format 51 | .Pc . 52 | If unspecified, 53 | .Nm 54 | uses the file modification date or current date 55 | if reading is from standard input or if 56 | .Xr stat 2 57 | fails. 58 | .It Fl h , Fl Fl help 59 | display help text. 60 | .It Fl s , Fl Fl section 61 | set the document section 62 | .Pq Sq \&Dt 63 | to 64 | .Ar section . 65 | If unspecified, 66 | .Nm 67 | uses 68 | .Ar 1 . 69 | .It Fl t , Fl Fl title 70 | set the document title 71 | .Pq Sq \&Dt 72 | to 73 | .Ar title . 74 | If unspecified, 75 | .Nm 76 | uses the suffix-stripped filename part of 77 | .Ar file . 78 | When reading is from stdin the title must be specified. 79 | .El 80 | .Sh EXIT STATUS 81 | .Ex -std 82 | .Sh SEE ALSO 83 | .Xr mkd2html 1 , 84 | .Xr mkd2latex 1 , 85 | .Xr mdoc 7 86 | .Sh AUTHORS 87 | .An -nosplit 88 | .Nm 89 | was written by 90 | .An Natasha Qo Kerensikova Qc Porte Aq Mt natacha@instinctive.eu 91 | and 92 | .An Baptiste Daroussin Aq Mt bapt@freebsd.org . 93 | Manual page was originally written by 94 | .An Massimo Manghi Aq Mt mxmanghi@apache.org , 95 | and rewritten to mdoc format by 96 | .An Svyatoslav Mishyn Aq Mt juef@openmailbox.org . 97 | -------------------------------------------------------------------------------- /mkd2man.c: -------------------------------------------------------------------------------- 1 | /* mkd2man.c - man-page-formatted output from markdown text */ 2 | 3 | /* 4 | * Copyright (c) 2009-2015, Baptiste Daroussin and Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #include "markdown.h" 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #define READ_UNIT 1024 32 | #define OUTPUT_UNIT 64 33 | 34 | 35 | /**************************** 36 | * MARKDOWN TO MAN RENDERER * 37 | ****************************/ 38 | 39 | /* usage • print the option list */ 40 | 41 | static void 42 | usage(FILE *out, const char *name) { 43 | fprintf(out, "Usage: %s [-h] [-d ] [-s
    ] " 44 | "[ -t ] [input-file]\n\n", name); 45 | fprintf(out, "\t-d, --date\n" 46 | "\t\tSet the date of the manpage (default: now),\n" 47 | "\t-h, --help\n" 48 | "\t\tDisplay this help text and exit without further processing\n" 49 | "\t-s, --section\n" 50 | "\t\tSet the section of the manpage (default: 1)\n" 51 | "\t-t, --title\n" 52 | "\t\tSet the title of the manpage (default: filename)\n"); } 53 | 54 | struct metadata { 55 | char *title; 56 | char *date; 57 | int section; 58 | }; 59 | 60 | static void 61 | man_text_escape(struct buf *ob, char *src, size_t size) { 62 | size_t i = 0, org; 63 | while (i < size) { 64 | /* copying directly unescaped characters */ 65 | org = i; 66 | while (i < size && src[i] != '-') 67 | i += 1; 68 | if (i > org) bufput(ob, src + org, i - org); 69 | 70 | /* escaping */ 71 | if (i >= size) break; 72 | else if (src[i] == '-') BUFPUTSL(ob, "\\-"); 73 | i += 1; } } 74 | 75 | static void 76 | man_prolog(struct buf *ob, void *opaque) { 77 | struct metadata *m = (struct metadata *)opaque; 78 | bufprintf(ob, 79 | ".\\\" Generated by mkd2man\n" 80 | ".Dd %s\n" 81 | ".Dt %s %d\n" 82 | ".Os", 83 | m->date, 84 | m->title, 85 | m->section 86 | ); } 87 | 88 | static void 89 | man_epilog(struct buf *ob, void *opaque) { 90 | BUFPUTSL(ob, "\n"); } 91 | 92 | static void 93 | man_blockcode(struct buf *ob, struct buf *text, void *opaque) { 94 | if (ob->size) bufputc(ob, '\n'); 95 | BUFPUTSL(ob, ".Bd -literal\n"); 96 | if (text) man_text_escape(ob, text->data, text->size); 97 | BUFPUTSL(ob, ".Ed"); } 98 | 99 | static void 100 | man_blockquote(struct buf *ob, struct buf *text, void *opaque) { 101 | if (ob->size) bufputc(ob, '\n'); 102 | BUFPUTSL(ob, ".Eo\n"); 103 | if (text) man_text_escape(ob, text->data, text->size); 104 | BUFPUTSL(ob, "\n.Ec"); } 105 | 106 | static int 107 | man_codespan(struct buf *ob, struct buf *text, void *opaque) { 108 | if (ob->size) bufputc(ob, '\n'); 109 | BUFPUTSL(ob, ".Bd -literal\n"); 110 | if (text) man_text_escape(ob, text->data, text->size); 111 | BUFPUTSL(ob, ".Ed"); 112 | return 1; } 113 | 114 | static void 115 | man_header(struct buf *ob, struct buf *text, int level, void *opaque) { 116 | if (ob->size) bufputc(ob, '\n'); 117 | switch(level) { 118 | case 1: 119 | BUFPUTSL(ob,".Sh "); 120 | break; 121 | case 2: 122 | BUFPUTSL(ob, ".Ss "); 123 | break; 124 | case 3: 125 | BUFPUTSL(ob, ".Pp\n.Em "); 126 | break; 127 | } 128 | if (text) bufput(ob, text->data, text->size); 129 | } 130 | 131 | static int 132 | man_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 133 | if (!text || !text->size) return 0; 134 | BUFPUTSL(ob, "\\fB"); 135 | bufput(ob, text->data, text->size); 136 | BUFPUTSL(ob, "\\fP"); 137 | return 1; } 138 | 139 | static int 140 | man_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 141 | if (!text || !text->size) return 0; 142 | BUFPUTSL(ob, "\\fI"); 143 | if (text) bufput(ob, text->data, text->size); 144 | BUFPUTSL(ob, "\\fP"); 145 | return 1; } 146 | 147 | static int 148 | man_linebreak(struct buf *ob, void *opaque) { 149 | BUFPUTSL(ob, ".br"); 150 | return 1; } 151 | 152 | static void 153 | man_paragraph(struct buf *ob, struct buf *text, void *opaque) { 154 | if (ob->size) bufputc(ob, '\n'); 155 | BUFPUTSL(ob, ".Pp\n"); 156 | if (text) bufput(ob, text->data, text->size); } 157 | 158 | static void 159 | man_list(struct buf *ob, struct buf *text, int flags, void *opaque) { 160 | if (ob->size) bufputc(ob, '\n'); 161 | if (flags & MKD_LIST_ORDERED) 162 | BUFPUTSL(ob,".Bl -enum\n"); 163 | else 164 | BUFPUTSL(ob,".Bl -bullet\n"); 165 | if (text) bufput(ob, text->data, text->size); 166 | BUFPUTSL(ob, ".El"); } 167 | 168 | static void 169 | man_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) { 170 | BUFPUTSL(ob, ".It\n"); 171 | if (text) { 172 | while (text->size && text->data[text->size - 1] == '\n') 173 | text->size -= 1; 174 | bufput(ob, text->data, text->size); } 175 | BUFPUTSL(ob, "\n"); } 176 | 177 | static void 178 | man_normal_text(struct buf *ob, struct buf *text, void *opaque) { 179 | if (text) man_text_escape(ob, text->data, text->size); } 180 | 181 | 182 | /* renderer structure */ 183 | static struct mkd_renderer to_man = { 184 | /* document-level callbacks */ 185 | man_prolog, 186 | man_epilog, 187 | 188 | /* block-level callbacks */ 189 | man_blockcode, 190 | man_blockquote, 191 | NULL, 192 | man_header, 193 | NULL, 194 | man_list, 195 | man_listitem, 196 | man_paragraph, 197 | NULL, 198 | NULL, 199 | NULL, 200 | 201 | /* span-level callbacks */ 202 | NULL, 203 | man_codespan, 204 | man_double_emphasis, 205 | man_emphasis, 206 | NULL, 207 | man_linebreak, 208 | NULL, 209 | NULL, 210 | NULL, 211 | 212 | /* low-level callbacks */ 213 | NULL, 214 | man_normal_text, 215 | 216 | /* renderer data */ 217 | 64, 218 | "*_", 219 | NULL }; 220 | 221 | 222 | 223 | /***************** 224 | * MAIN FUNCTION * 225 | *****************/ 226 | 227 | /* main • main function, interfacing STDIO with the parser */ 228 | int 229 | main(int argc, char **argv) { 230 | struct buf *ib, *ob; 231 | size_t ret; 232 | size_t i; 233 | FILE *in = stdin; 234 | int ch, argerr, help; 235 | char *tmp; 236 | char datebuf[64]; 237 | time_t ttm; 238 | struct tm *tm; 239 | struct stat st; 240 | struct metadata man_metadata; 241 | 242 | struct option longopts[] = { 243 | { "date", required_argument, 0, 'd' }, 244 | { "help", no_argument, 0, 'h' }, 245 | { "section", required_argument, 0, 's' }, 246 | { "title", required_argument, 0, 't' }, 247 | { 0, 0, 0, 0} 248 | }; 249 | 250 | man_metadata.section = 1; 251 | man_metadata.title = NULL; 252 | man_metadata.date = NULL; 253 | /* opening the file if given from the command line */ 254 | argerr = help = 0; 255 | while (!argerr && 256 | (ch = getopt_long(argc, argv, "d:hs:t:", longopts, 0)) != -1) 257 | switch (ch) { 258 | case 'd': 259 | man_metadata.date = optarg; 260 | break; 261 | case 'h': 262 | argerr = help = 1; 263 | break; 264 | case 's': 265 | if (strlen(optarg) != 1 && 266 | strspn(optarg, "123456789") != 1) { 267 | argerr = 1; 268 | break; } 269 | man_metadata.section = (int)strtol(optarg, 270 | (char **)NULL, 10); 271 | break; 272 | case 't': 273 | man_metadata.title = optarg; 274 | break; 275 | default: 276 | argerr = 1; } 277 | if (argerr) { 278 | usage(help ? stdout : stderr, argv[0]); 279 | return help ? EXIT_SUCCESS : EXIT_FAILURE; 280 | } 281 | 282 | argc -= optind; 283 | argv += optind; 284 | 285 | if (argc > 0) { 286 | in = fopen(argv[0], "r"); 287 | if (!in) { 288 | fprintf(stderr,"Unable to open input file \"%s\": %s\n", 289 | argv[0], strerror(errno)); 290 | return EXIT_FAILURE; } } 291 | 292 | if (!man_metadata.date) { 293 | if (in == stdin || stat(argv[0], &st) == -1) { 294 | ttm = time(NULL); 295 | tm = localtime(&ttm); } 296 | else 297 | tm = localtime(&st.st_mtime); 298 | strftime(datebuf, sizeof(datebuf), "%B %d, %Y", tm); 299 | man_metadata.date = datebuf; 300 | } 301 | 302 | if (in == stdin && !man_metadata.title) { 303 | fprintf(stderr, "When reading from stdin the title should be " 304 | "specified is expected\n"); 305 | return EXIT_FAILURE; } 306 | 307 | if (!man_metadata.title) { 308 | tmp = strrchr(argv[0], '/'); 309 | man_metadata.title = strrchr(argv[0], '/'); 310 | if (!tmp) 311 | tmp = argv[0]; 312 | else 313 | tmp++; 314 | man_metadata.title = tmp; 315 | tmp = strrchr(man_metadata.title, '.'); 316 | if (tmp) 317 | *tmp = '\0'; } 318 | 319 | /* Ensure the title is uppercase */ 320 | for (i = 0; i < strlen(man_metadata.title); i++) 321 | man_metadata.title[i] = toupper(man_metadata.title[i]); 322 | 323 | /* reading everything */ 324 | ib = bufnew(READ_UNIT); 325 | bufgrow(ib, READ_UNIT); 326 | while ((ret = fread(ib->data + ib->size, 1, 327 | ib->asize - ib->size, in)) > 0) { 328 | ib->size += ret; 329 | bufgrow(ib, ib->size + READ_UNIT); } 330 | if (in != stdin) fclose(in); 331 | 332 | to_man.opaque = &man_metadata; 333 | /* performing markdown to man */ 334 | ob = bufnew(OUTPUT_UNIT); 335 | markdown(ob, ib, &to_man); 336 | 337 | /* writing the result to stdout */ 338 | ret = fwrite(ob->data, 1, ob->size, stdout); 339 | if (ret < ob->size) 340 | fprintf(stderr, "Warning: only %zu output byte written, " 341 | "out of %zu\n", 342 | ret, 343 | ob->size); 344 | 345 | /* cleanup */ 346 | bufrelease(ib); 347 | bufrelease(ob); 348 | return EXIT_SUCCESS; } 349 | 350 | /* vim: set filetype=c: */ 351 | -------------------------------------------------------------------------------- /renderers.c: -------------------------------------------------------------------------------- 1 | /* renderers.h - example markdown renderers */ 2 | 3 | /* 4 | * Copyright (c) 2009, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #include "renderers.h" 20 | 21 | #include <strings.h> 22 | 23 | 24 | /***************************** 25 | * EXPORTED HELPER FUNCTIONS * 26 | *****************************/ 27 | 28 | /* lus_attr_escape • copy the buffer entity-escaping '<', '>', '&' and '"' */ 29 | void 30 | lus_attr_escape(struct buf *ob, const char *src, size_t size) { 31 | size_t i = 0, org; 32 | while (i < size) { 33 | /* copying directly unescaped characters */ 34 | org = i; 35 | while (i < size && src[i] != '<' && src[i] != '>' 36 | && src[i] != '&' && src[i] != '"') 37 | i += 1; 38 | if (i > org) bufput(ob, src + org, i - org); 39 | 40 | /* escaping */ 41 | if (i >= size) break; 42 | else if (src[i] == '<') BUFPUTSL(ob, "<"); 43 | else if (src[i] == '>') BUFPUTSL(ob, ">"); 44 | else if (src[i] == '&') BUFPUTSL(ob, "&"); 45 | else if (src[i] == '"') BUFPUTSL(ob, """); 46 | i += 1; } } 47 | 48 | 49 | /* lus_body_escape • copy the buffer entity-escaping '<', '>' and '&' */ 50 | void 51 | lus_body_escape(struct buf *ob, const char *src, size_t size) { 52 | size_t i = 0, org; 53 | while (i < size) { 54 | /* copying directly unescaped characters */ 55 | org = i; 56 | while (i < size && src[i] != '<' && src[i] != '>' 57 | && src[i] != '&') 58 | i += 1; 59 | if (i > org) bufput(ob, src + org, i - org); 60 | 61 | /* escaping */ 62 | if (i >= size) break; 63 | else if (src[i] == '<') BUFPUTSL(ob, "<"); 64 | else if (src[i] == '>') BUFPUTSL(ob, ">"); 65 | else if (src[i] == '&') BUFPUTSL(ob, "&"); 66 | i += 1; } } 67 | 68 | 69 | 70 | /******************** 71 | * GENERIC RENDERER * 72 | ********************/ 73 | 74 | static int 75 | rndr_autolink(struct buf *ob, struct buf *link, enum mkd_autolink type, 76 | void *opaque) { 77 | if (!link || !link->size) return 0; 78 | BUFPUTSL(ob, "<a href=\""); 79 | if (type == MKDA_IMPLICIT_EMAIL) BUFPUTSL(ob, "mailto:"); 80 | lus_attr_escape(ob, link->data, link->size); 81 | BUFPUTSL(ob, "\">"); 82 | if (type == MKDA_EXPLICIT_EMAIL && link->size > 7) 83 | lus_body_escape(ob, link->data + 7, link->size - 7); 84 | else lus_body_escape(ob, link->data, link->size); 85 | BUFPUTSL(ob, "</a>"); 86 | return 1; } 87 | 88 | static void 89 | rndr_blockcode(struct buf *ob, struct buf *text, void *opaque) { 90 | if (ob->size) bufputc(ob, '\n'); 91 | BUFPUTSL(ob, "<pre><code>"); 92 | if (text) lus_body_escape(ob, text->data, text->size); 93 | BUFPUTSL(ob, "</code></pre>\n"); } 94 | 95 | static void 96 | rndr_blockquote(struct buf *ob, struct buf *text, void *opaque) { 97 | if (ob->size) bufputc(ob, '\n'); 98 | BUFPUTSL(ob, "<blockquote>\n"); 99 | if (text) bufput(ob, text->data, text->size); 100 | BUFPUTSL(ob, "</blockquote>\n"); } 101 | 102 | static int 103 | rndr_codespan(struct buf *ob, struct buf *text, void *opaque) { 104 | BUFPUTSL(ob, "<code>"); 105 | if (text) lus_body_escape(ob, text->data, text->size); 106 | BUFPUTSL(ob, "</code>"); 107 | return 1; } 108 | 109 | static int 110 | rndr_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 111 | if (!text || !text->size) return 0; 112 | BUFPUTSL(ob, "<strong>"); 113 | bufput(ob, text->data, text->size); 114 | BUFPUTSL(ob, "</strong>"); 115 | return 1; } 116 | 117 | static int 118 | rndr_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 119 | if (!text || !text->size) return 0; 120 | BUFPUTSL(ob, "<em>"); 121 | if (text) bufput(ob, text->data, text->size); 122 | BUFPUTSL(ob, "</em>"); 123 | return 1; } 124 | 125 | static void 126 | rndr_header(struct buf *ob, struct buf *text, int level, void *opaque) { 127 | if (ob->size) bufputc(ob, '\n'); 128 | bufprintf(ob, "<h%d>", level); 129 | if (text) bufput(ob, text->data, text->size); 130 | bufprintf(ob, "</h%d>\n", level); } 131 | 132 | static int 133 | rndr_link(struct buf *ob, struct buf *link, struct buf *title, 134 | struct buf *content, void *opaque) { 135 | BUFPUTSL(ob, "<a href=\""); 136 | if (link && link->size) lus_attr_escape(ob, link->data, link->size); 137 | if (title && title->size) { 138 | BUFPUTSL(ob, "\" title=\""); 139 | lus_attr_escape(ob, title->data, title->size); } 140 | BUFPUTSL(ob, "\">"); 141 | if (content && content->size) bufput(ob, content->data, content->size); 142 | BUFPUTSL(ob, "</a>"); 143 | return 1; } 144 | 145 | static void 146 | rndr_list(struct buf *ob, struct buf *text, int flags, void *opaque) { 147 | if (ob->size) bufputc(ob, '\n'); 148 | bufput(ob, (flags & MKD_LIST_ORDERED) ? "<ol>\n" : "<ul>\n", 5); 149 | if (text) bufput(ob, text->data, text->size); 150 | bufput(ob, (flags & MKD_LIST_ORDERED) ? "</ol>\n" : "</ul>\n", 6); } 151 | 152 | static void 153 | rndr_listitem(struct buf *ob, struct buf *text, int flags, void *opaque) { 154 | BUFPUTSL(ob, "<li>"); 155 | if (text) { 156 | while (text->size && text->data[text->size - 1] == '\n') 157 | text->size -= 1; 158 | bufput(ob, text->data, text->size); } 159 | BUFPUTSL(ob, "</li>\n"); } 160 | 161 | static void 162 | rndr_normal_text(struct buf *ob, struct buf *text, void *opaque) { 163 | if (text) lus_body_escape(ob, text->data, text->size); } 164 | 165 | static void 166 | rndr_paragraph(struct buf *ob, struct buf *text, void *opaque) { 167 | if (ob->size) bufputc(ob, '\n'); 168 | BUFPUTSL(ob, "<p>"); 169 | if (text) bufput(ob, text->data, text->size); 170 | BUFPUTSL(ob, "</p>\n"); } 171 | 172 | static void 173 | rndr_raw_block(struct buf *ob, struct buf *text, void *opaque) { 174 | size_t org, sz; 175 | if (!text) return; 176 | sz = text->size; 177 | while (sz > 0 && text->data[sz - 1] == '\n') sz -= 1; 178 | org = 0; 179 | while (org < sz && text->data[org] == '\n') org += 1; 180 | if (org >= sz) return; 181 | if (ob->size) bufputc(ob, '\n'); 182 | bufput(ob, text->data + org, sz - org); 183 | bufputc(ob, '\n'); } 184 | 185 | static int 186 | rndr_raw_inline(struct buf *ob, struct buf *text, void *opaque) { 187 | bufput(ob, text->data, text->size); 188 | return 1; } 189 | 190 | static int 191 | rndr_triple_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 192 | if (!text || !text->size) return 0; 193 | BUFPUTSL(ob, "<strong><em>"); 194 | bufput(ob, text->data, text->size); 195 | BUFPUTSL(ob, "</em></strong>"); 196 | return 1; } 197 | 198 | 199 | 200 | /******************* 201 | * HTML 4 RENDERER * 202 | *******************/ 203 | 204 | static void 205 | html_hrule(struct buf *ob, void *opaque) { 206 | if (ob->size) bufputc(ob, '\n'); 207 | BUFPUTSL(ob, "<hr>\n"); } 208 | 209 | static int 210 | html_image(struct buf *ob, struct buf *link, struct buf *title, 211 | struct buf *alt, void *opaque) { 212 | if (!link || !link->size) return 0; 213 | BUFPUTSL(ob, "<img src=\""); 214 | lus_attr_escape(ob, link->data, link->size); 215 | BUFPUTSL(ob, "\" alt=\""); 216 | if (alt && alt->size) 217 | lus_attr_escape(ob, alt->data, alt->size); 218 | if (title && title->size) { 219 | BUFPUTSL(ob, "\" title=\""); 220 | lus_attr_escape(ob, title->data, title->size); } 221 | BUFPUTSL(ob, "\">"); 222 | return 1; } 223 | 224 | static int 225 | html_linebreak(struct buf *ob, void *opaque) { 226 | BUFPUTSL(ob, "<br>\n"); 227 | return 1; } 228 | 229 | 230 | /* exported renderer structure */ 231 | const struct mkd_renderer mkd_html = { 232 | NULL, 233 | NULL, 234 | 235 | rndr_blockcode, 236 | rndr_blockquote, 237 | rndr_raw_block, 238 | rndr_header, 239 | html_hrule, 240 | rndr_list, 241 | rndr_listitem, 242 | rndr_paragraph, 243 | NULL, 244 | NULL, 245 | NULL, 246 | 247 | rndr_autolink, 248 | rndr_codespan, 249 | rndr_double_emphasis, 250 | rndr_emphasis, 251 | html_image, 252 | html_linebreak, 253 | rndr_link, 254 | rndr_raw_inline, 255 | rndr_triple_emphasis, 256 | 257 | NULL, 258 | rndr_normal_text, 259 | 260 | 64, 261 | "*_", 262 | NULL }; 263 | 264 | 265 | 266 | /********************** 267 | * XHTML 1.0 RENDERER * 268 | **********************/ 269 | 270 | static void 271 | xhtml_hrule(struct buf *ob, void *opaque) { 272 | if (ob->size) bufputc(ob, '\n'); 273 | BUFPUTSL(ob, "<hr />\n"); } 274 | 275 | static int 276 | xhtml_image(struct buf *ob, struct buf *link, struct buf *title, 277 | struct buf *alt, void *opaque) { 278 | if (!link || !link->size) return 0; 279 | BUFPUTSL(ob, "<img src=\""); 280 | lus_attr_escape(ob, link->data, link->size); 281 | BUFPUTSL(ob, "\" alt=\""); 282 | if (alt && alt->size) 283 | lus_attr_escape(ob, alt->data, alt->size); 284 | if (title && title->size) { 285 | BUFPUTSL(ob, "\" title=\""); 286 | lus_attr_escape(ob, title->data, title->size); } 287 | BUFPUTSL(ob, "\" />"); 288 | return 1; } 289 | 290 | static int 291 | xhtml_linebreak(struct buf *ob, void *opaque) { 292 | BUFPUTSL(ob, "<br />\n"); 293 | return 1; } 294 | 295 | 296 | /* exported renderer structure */ 297 | const struct mkd_renderer mkd_xhtml = { 298 | NULL, 299 | NULL, 300 | 301 | rndr_blockcode, 302 | rndr_blockquote, 303 | rndr_raw_block, 304 | rndr_header, 305 | xhtml_hrule, 306 | rndr_list, 307 | rndr_listitem, 308 | rndr_paragraph, 309 | NULL, 310 | NULL, 311 | NULL, 312 | 313 | rndr_autolink, 314 | rndr_codespan, 315 | rndr_double_emphasis, 316 | rndr_emphasis, 317 | xhtml_image, 318 | xhtml_linebreak, 319 | rndr_link, 320 | rndr_raw_inline, 321 | rndr_triple_emphasis, 322 | 323 | NULL, 324 | rndr_normal_text, 325 | 326 | 64, 327 | "*_", 328 | NULL }; 329 | 330 | 331 | 332 | /********************** 333 | * DISCOUNT RENDERERS * 334 | **********************/ 335 | 336 | static int 337 | print_link_wxh(struct buf *ob, struct buf *link) { 338 | size_t eq, ex, end; 339 | if (link->size < 1) return 0; 340 | eq = link->size - 1; 341 | while (eq > 0 && (link->data[eq - 1] != ' ' || link->data[eq] != '=')) 342 | eq -= 1; 343 | if (!eq) return 0; 344 | ex = eq + 1; 345 | while (ex < link->size 346 | && link->data[ex] >= '0' && link->data[ex] <= '9') 347 | ex += 1; 348 | if (ex >= link->size || ex == eq + 1 || link->data[ex] != 'x') return 0; 349 | end = ex + 1; 350 | while (end < link->size 351 | && link->data[end] >= '0' && link->data[end] <= '9') 352 | end += 1; 353 | if (end == ex + 1) return 0; 354 | /* everything is fine, proceeding to actual printing */ 355 | lus_attr_escape(ob, link->data, eq - 1); 356 | BUFPUTSL(ob, "\" width="); 357 | bufput(ob, link->data + eq + 1, ex - eq - 1); 358 | BUFPUTSL(ob, " height="); 359 | bufput(ob, link->data + ex + 1, end - ex - 1); 360 | return 1; } 361 | 362 | static int 363 | discount_image(struct buf *ob, struct buf *link, struct buf *title, 364 | struct buf *alt, int xhtml) { 365 | if (!link || !link->size) return 0; 366 | BUFPUTSL(ob, "<img src=\""); 367 | if (!print_link_wxh(ob, link)) { 368 | lus_attr_escape(ob, link->data, link->size); 369 | bufputc(ob, '"'); } 370 | BUFPUTSL(ob, " alt=\""); 371 | if (alt && alt->size) 372 | lus_attr_escape(ob, alt->data, alt->size); 373 | if (title && title->size) { 374 | BUFPUTSL(ob, "\" title=\""); 375 | lus_attr_escape(ob, title->data, title->size); } 376 | bufputs(ob, xhtml ? "\" />" : "\">"); 377 | return 1; } 378 | 379 | static int 380 | html_discount_image(struct buf *ob, struct buf *link, struct buf *title, 381 | struct buf *alt, void *opaque) { 382 | return discount_image(ob, link, title, alt, 0); } 383 | 384 | static int 385 | xhtml_discount_image(struct buf *ob, struct buf *link, struct buf *title, 386 | struct buf *alt, void *opaque) { 387 | return discount_image(ob, link, title, alt, 1); } 388 | 389 | static int 390 | discount_link(struct buf *ob, struct buf *link, struct buf *title, 391 | struct buf *content, void *opaque) { 392 | if (!link) return rndr_link(ob, link, title, content, opaque); 393 | else if (link->size > 5 && !strncasecmp(link->data, "abbr:", 5)) { 394 | BUFPUTSL(ob, "<abbr title=\""); 395 | lus_attr_escape(ob, link->data + 5, link->size - 5); 396 | BUFPUTSL(ob, "\">"); 397 | bufput(ob, content->data, content->size); 398 | BUFPUTSL(ob, "</abbr>"); 399 | return 1; } 400 | else if (link->size > 6 && !strncasecmp(link->data, "class:", 6)) { 401 | BUFPUTSL(ob, "<span class=\""); 402 | lus_attr_escape(ob, link->data + 6, link->size - 6); 403 | BUFPUTSL(ob, "\">"); 404 | bufput(ob, content->data, content->size); 405 | BUFPUTSL(ob, "</span>"); 406 | return 1; } 407 | else if (link->size > 3 && !strncasecmp(link->data, "id:", 3)) { 408 | BUFPUTSL(ob, "<span id=\""); 409 | lus_attr_escape(ob, link->data + 3, link->size - 3); 410 | BUFPUTSL(ob, "\">"); 411 | bufput(ob, content->data, content->size); 412 | BUFPUTSL(ob, "</span>"); 413 | return 1; } 414 | else if (link->size > 4 && !strncasecmp(link->data, "raw:", 4)) { 415 | bufput(ob, link->data + 4, link->size - 4); 416 | return 1; } 417 | return rndr_link(ob, link, title, content, opaque); } 418 | 419 | static void 420 | discount_blockquote(struct buf *ob, struct buf *text, void *opaque) { 421 | size_t i = 5, size = text->size; 422 | char *data = text->data; 423 | if (text->size < 5 || strncasecmp(text->data, "<p>%", 4)) { 424 | rndr_blockquote(ob, text, opaque); 425 | return; } 426 | while (i < size && data[i] != '\n' && data[i] != '%') 427 | i += 1; 428 | if (i >= size || data[i] != '%') { 429 | rndr_blockquote(ob, text, opaque); 430 | return; } 431 | BUFPUTSL(ob, "<div class=\""); 432 | bufput(ob, text->data + 4, i - 4); 433 | BUFPUTSL(ob, "\"><p>"); 434 | i += 1; 435 | if (i + 4 >= text->size && !strncasecmp(text->data + i, "</p>", 4)) { 436 | size_t old_i = i; 437 | i += 4; 438 | while (i + 3 < text->size 439 | && (data[i] != '<' || data[i + 1] != 'p' || data[i + 2] != '>')) 440 | i += 1; 441 | if (i + 3 >= text->size) i = old_i; } 442 | bufput(ob, text->data + i, text->size - i); 443 | BUFPUTSL(ob, "</div>\n"); } 444 | 445 | static void 446 | discount_table(struct buf *ob, struct buf *head_row, struct buf *rows, 447 | void *opaque) { 448 | if (ob->size) bufputc(ob, '\n'); 449 | BUFPUTSL(ob, "<table>\n"); 450 | if (head_row) { 451 | BUFPUTSL(ob, "<thead>\n"); 452 | bufput(ob, head_row->data, head_row->size); 453 | BUFPUTSL(ob, "</thead>\n<tbody>\n"); } 454 | if (rows) 455 | bufput(ob, rows->data, rows->size); 456 | if (head_row) 457 | BUFPUTSL(ob, "</tbody>\n"); 458 | BUFPUTSL(ob, "</table>\n"); } 459 | 460 | static void 461 | discount_table_row(struct buf *ob, struct buf *cells, int flags, void *opaque){ 462 | (void)flags; 463 | BUFPUTSL(ob, " <tr>\n"); 464 | if (cells) bufput(ob, cells->data, cells->size); 465 | BUFPUTSL(ob, " </tr>\n"); } 466 | 467 | static void 468 | discount_table_cell(struct buf *ob, struct buf *text, int flags, void *opaque){ 469 | if (flags & MKD_CELL_HEAD) 470 | BUFPUTSL(ob, " <th"); 471 | else 472 | BUFPUTSL(ob, " <td"); 473 | switch (flags & MKD_CELL_ALIGN_MASK) { 474 | case MKD_CELL_ALIGN_LEFT: 475 | BUFPUTSL(ob, " align=\"left\""); 476 | break; 477 | case MKD_CELL_ALIGN_RIGHT: 478 | BUFPUTSL(ob, " align=\"right\""); 479 | break; 480 | case MKD_CELL_ALIGN_CENTER: 481 | BUFPUTSL(ob, " align=\"center\""); 482 | break; } 483 | bufputc(ob, '>'); 484 | if (text) bufput(ob, text->data, text->size); 485 | if (flags & MKD_CELL_HEAD) 486 | BUFPUTSL(ob, "</th>\n"); 487 | else 488 | BUFPUTSL(ob, "</td>\n"); } 489 | 490 | /* exported renderer structures */ 491 | const struct mkd_renderer discount_html = { 492 | NULL, 493 | NULL, 494 | 495 | rndr_blockcode, 496 | discount_blockquote, 497 | rndr_raw_block, 498 | rndr_header, 499 | html_hrule, 500 | rndr_list, 501 | rndr_listitem, 502 | rndr_paragraph, 503 | discount_table, 504 | discount_table_cell, 505 | discount_table_row, 506 | 507 | rndr_autolink, 508 | rndr_codespan, 509 | rndr_double_emphasis, 510 | rndr_emphasis, 511 | html_discount_image, 512 | html_linebreak, 513 | discount_link, 514 | rndr_raw_inline, 515 | rndr_triple_emphasis, 516 | 517 | NULL, 518 | rndr_normal_text, 519 | 520 | 64, 521 | "*_", 522 | NULL }; 523 | const struct mkd_renderer discount_xhtml = { 524 | NULL, 525 | NULL, 526 | 527 | rndr_blockcode, 528 | discount_blockquote, 529 | rndr_raw_block, 530 | rndr_header, 531 | xhtml_hrule, 532 | rndr_list, 533 | rndr_listitem, 534 | rndr_paragraph, 535 | discount_table, 536 | discount_table_cell, 537 | discount_table_row, 538 | 539 | rndr_autolink, 540 | rndr_codespan, 541 | rndr_double_emphasis, 542 | rndr_emphasis, 543 | xhtml_discount_image, 544 | xhtml_linebreak, 545 | discount_link, 546 | rndr_raw_inline, 547 | rndr_triple_emphasis, 548 | 549 | NULL, 550 | rndr_normal_text, 551 | 552 | 64, 553 | "*_", 554 | NULL }; 555 | 556 | 557 | /**************************** 558 | * NATACHA'S OWN EXTENSIONS * 559 | ****************************/ 560 | 561 | static void 562 | nat_span(struct buf *ob, struct buf *text, char *tag) { 563 | bufprintf(ob, "<%s>", tag); 564 | bufput(ob, text->data, text->size); 565 | bufprintf(ob, "</%s>", tag); } 566 | 567 | static int 568 | nat_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 569 | if (!text || !text->size || c == '+' || c == '-') return 0; 570 | if (c == '|') nat_span(ob, text, "span"); 571 | else nat_span(ob, text, "em"); 572 | return 1; } 573 | 574 | static int 575 | nat_double_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 576 | if (!text || !text->size || c == '|') return 0; 577 | if (c == '+') nat_span(ob, text, "ins"); 578 | else if (c == '-') nat_span(ob, text, "del"); 579 | else nat_span(ob, text, "strong"); 580 | return 1; } 581 | 582 | static int 583 | nat_triple_emphasis(struct buf *ob, struct buf *text, char c, void *opaque) { 584 | if (!text || !text->size || c == '+' || c == '-' || c == '|') return 0; 585 | BUFPUTSL(ob, "<strong><em>"); 586 | bufput(ob, text->data, text->size); 587 | BUFPUTSL(ob, "</em></strong>"); 588 | return 1; } 589 | 590 | static void 591 | nat_header(struct buf *ob, struct buf *text, int level, void *opaque) { 592 | size_t i = 0; 593 | if (ob->size) bufputc(ob, '\n'); 594 | while (i < text->size && (text->data[i] == '-' || text->data[i] == '_' 595 | || text->data[i] == '.' || text->data[i] == ':' 596 | || (text->data[i] >= 'a' && text->data[i] <= 'z') 597 | || (text->data[i] >= 'A' && text->data[i] <= 'Z') 598 | || (text->data[i] >= '0' && text->data[i] <= '9'))) 599 | i += 1; 600 | bufprintf(ob, "<h%d", level); 601 | if (i < text->size && text->data[i] == '#') { 602 | bufprintf(ob, " id=\"%.*s\">", (int)i, text->data); 603 | i += 1; } 604 | else { 605 | bufputc(ob, '>'); 606 | i = 0; } 607 | bufput(ob, text->data + i, text->size - i); 608 | bufprintf(ob, "</h%d>\n", level); } 609 | 610 | static void 611 | nat_paragraph(struct buf *ob, struct buf *text, void *opaque) { 612 | size_t i = 0; 613 | if (ob->size) bufputc(ob, '\n'); 614 | BUFPUTSL(ob, "<p"); 615 | if (text && text->size && text->data[0] == '(') { 616 | i = 1; 617 | while (i < text->size && (text->data[i] == ' ' 618 | /* this seems to be a bit more restrictive than */ 619 | /* what is allowed for class names */ 620 | || (text->data[i] >= 'a' && text->data[i] <= 'z') 621 | || (text->data[i] >= 'A' && text->data[i] <= 'Z') 622 | || (text->data[i] >= '0' && text->data[i] <= '9'))) 623 | i += 1; 624 | if (i < text->size && text->data[i] == ')') { 625 | bufprintf(ob, " class=\"%.*s\"", 626 | (int)(i - 1), text->data + 1); 627 | i += 1; } 628 | else i = 0; } 629 | bufputc(ob, '>'); 630 | if (text) bufput(ob, text->data + i, text->size - i); 631 | BUFPUTSL(ob, "</p>\n"); } 632 | 633 | 634 | /* exported renderer structures */ 635 | const struct mkd_renderer nat_html = { 636 | NULL, 637 | NULL, 638 | 639 | rndr_blockcode, 640 | discount_blockquote, 641 | rndr_raw_block, 642 | nat_header, 643 | html_hrule, 644 | rndr_list, 645 | rndr_listitem, 646 | nat_paragraph, 647 | NULL, 648 | NULL, 649 | NULL, 650 | 651 | rndr_autolink, 652 | rndr_codespan, 653 | nat_double_emphasis, 654 | nat_emphasis, 655 | html_discount_image, 656 | html_linebreak, 657 | discount_link, 658 | rndr_raw_inline, 659 | nat_triple_emphasis, 660 | 661 | NULL, 662 | rndr_normal_text, 663 | 664 | 64, 665 | "*_-+|", 666 | NULL }; 667 | const struct mkd_renderer nat_xhtml = { 668 | NULL, 669 | NULL, 670 | 671 | rndr_blockcode, 672 | discount_blockquote, 673 | rndr_raw_block, 674 | nat_header, 675 | xhtml_hrule, 676 | rndr_list, 677 | rndr_listitem, 678 | nat_paragraph, 679 | NULL, 680 | NULL, 681 | NULL, 682 | 683 | rndr_autolink, 684 | rndr_codespan, 685 | nat_double_emphasis, 686 | nat_emphasis, 687 | xhtml_discount_image, 688 | xhtml_linebreak, 689 | discount_link, 690 | rndr_raw_inline, 691 | nat_triple_emphasis, 692 | 693 | NULL, 694 | rndr_normal_text, 695 | 696 | 64, 697 | "*_-+|", 698 | NULL }; 699 | -------------------------------------------------------------------------------- /renderers.h: -------------------------------------------------------------------------------- 1 | /* renderers.h - example markdown renderers */ 2 | 3 | /* 4 | * Copyright (c) 2009, Natacha Porté 5 | * 6 | * Permission to use, copy, modify, and distribute this software for any 7 | * purpose with or without fee is hereby granted, provided that the above 8 | * copyright notice and this permission notice appear in all copies. 9 | * 10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 | */ 18 | 19 | #ifndef MARKDOWN_RENDERERS_H 20 | #define MARKDOWN_RENDERERS_H 21 | 22 | #include "markdown.h" 23 | 24 | 25 | /***************************** 26 | * EXPORTED HELPER FUNCTIONS * 27 | *****************************/ 28 | 29 | /* lus_attr_escape • copy the buffer entity-escaping '<', '>', '&' and '"' */ 30 | void 31 | lus_attr_escape(struct buf *ob, const char *src, size_t size); 32 | 33 | /* lus_body_escape • copy the buffer entity-escaping '<', '>' and '&' */ 34 | void 35 | lus_body_escape(struct buf *ob, const char *src, size_t size); 36 | 37 | 38 | 39 | /*********************** 40 | * RENDERER STRUCTURES * 41 | ***********************/ 42 | 43 | /* original markdown renderers */ 44 | extern const struct mkd_renderer mkd_html; /* HTML 4 renderer */ 45 | extern const struct mkd_renderer mkd_xhtml; /* XHTML 1.0 renderer */ 46 | 47 | /* renderers with some discount extensions */ 48 | extern const struct mkd_renderer discount_html; 49 | extern const struct mkd_renderer discount_xhtml; 50 | 51 | /* renderers with Natasha's own extensions */ 52 | extern const struct mkd_renderer nat_html; 53 | extern const struct mkd_renderer nat_xhtml; 54 | 55 | #endif /* ndef MARKDOWN_RENDERERS_H */ 56 | -------------------------------------------------------------------------------- /soldout_array.3: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2009 - 2016 Natacha Porté <natacha@instinctive.eu> 3 | .\" 4 | .\" Permission to use, copy, modify, and distribute this software for any 5 | .\" purpose with or without fee is hereby granted, provided that the above 6 | .\" copyright notice and this permission notice appear in all copies. 7 | .\" 8 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | .\" 16 | .Dd September 12, 2016 17 | .Dt SOLDOUT_ARRAY 3 18 | .Os 19 | .Sh NAME 20 | .Nm soldout_array , 21 | .Nm arr_adjust , 22 | .Nm arr_free , 23 | .Nm arr_grow , 24 | .Nm arr_init , 25 | .Nm arr_insert , 26 | .Nm arr_item , 27 | .Nm arr_newitem , 28 | .Nm arr_remove , 29 | .Nm arr_sorted_find , 30 | .Nm arr_sorted_find_i , 31 | .Nm parr_adjust , 32 | .Nm parr_free , 33 | .Nm parr_grow , 34 | .Nm parr_init , 35 | .Nm parr_insert , 36 | .Nm parr_pop , 37 | .Nm parr_push , 38 | .Nm parr_remove , 39 | .Nm parr_sorted_find , 40 | .Nm parr_sorted_find_i , 41 | .Nm parr_top 42 | .Nd array handling functions for soldout 43 | .Sh SYNOPSIS 44 | .In array.h 45 | .Ft int 46 | .Fo (*array_cmp_fn) 47 | .Fa "void *key" 48 | .Fa "void *array_entry" 49 | .Fc 50 | .Ft int 51 | .Fo arr_adjust 52 | .Fa "struct array *arr" 53 | .Fc 54 | .Ft void 55 | .Fo arr_free 56 | .Fa "struct array *arr" 57 | .Fc 58 | .Ft int 59 | .Fo arr_grow 60 | .Fa "struct array *arr" 61 | .Fa "int sz" 62 | .Fc 63 | .Ft void 64 | .Fo arr_init 65 | .Fa "struct array *arr" 66 | .Fa "size_t unit" 67 | .Fc 68 | .Ft int 69 | .Fo arr_insert 70 | .Fa "struct array *arr" 71 | .Fa "int nb" 72 | .Fa "int n" 73 | .Fc 74 | .Ft "void *" 75 | .Fo arr_item 76 | .Fa "struct array *arr" 77 | .Fa "int no" 78 | .Fc 79 | .Ft int 80 | .Fo arr_newitem 81 | .Fa "struct array *arr" 82 | .Fc 83 | .Ft void 84 | .Fo arr_remove 85 | .Fa "struct array *arr" 86 | .Fa "int idx" 87 | .Fc 88 | .Ft "void *" 89 | .Fo arr_sorted_find 90 | .Fa "struct array *arr" 91 | .Fa "void *key" 92 | .Fa "array_cmp_fn cmp" 93 | .Fc 94 | .Ft int 95 | .Fo arr_sorted_find_i 96 | .Fa "struct array *arr" 97 | .Fa "void *key" 98 | .Fa "array_cmp_fn cmp" 99 | .Fc 100 | .Ft int 101 | .Fo parr_adjust 102 | .Fa "struct parray *arr" 103 | .Fc 104 | .Ft void 105 | .Fo parr_free 106 | .Fa "struct parray *arr" 107 | .Fc 108 | .Ft int 109 | .Fo parr_grow 110 | .Fa "struct parray *arr" 111 | .Fa "int sz" 112 | .Fc 113 | .Ft void 114 | .Fo parr_init 115 | .Fa "struct parray *arr" 116 | .Fc 117 | .Ft int 118 | .Fo parr_insert 119 | .Fa "struct parray *parr" 120 | .Fa "int nb" 121 | .Fa "int n" 122 | .Fc 123 | .Ft "void *" 124 | .Fo parr_pop 125 | .Fa "struct parray *arr" 126 | .Fc 127 | .Ft int 128 | .Fo parr_push 129 | .Fa "struct parray *arr" 130 | .Fa "void *i" 131 | .Fc 132 | .Ft "void *" 133 | .Fo parr_remove 134 | .Fa "struct parray *arr" 135 | .Fa "int idx" 136 | .Fc 137 | .Ft "void *" 138 | .Fo parr_sorted_find 139 | .Fa "struct parray *arr" 140 | .Fa "void *key" 141 | .Fa "array_cmp_fn cmp" 142 | .Fc 143 | .Ft int 144 | .Fo parr_sorted_find_i 145 | .Fa "struct parray *arr" 146 | .Fa "void *key" 147 | .Fa "array_cmp_fn cmp" 148 | .Fc 149 | .Ft "void *" 150 | .Fo parr_top 151 | .Fa "struct parray *arr" 152 | .Fc 153 | .Sh DESCRIPTION 154 | .Ss Types 155 | .Bl -ohang 156 | .It Vt "struct array" 157 | generic linear array. 158 | Has this form: 159 | .Bd -literal -offset indent 160 | struct array { 161 | void *base; 162 | int size; 163 | int asize; 164 | size_t unit; 165 | }; 166 | .Ed 167 | .It Vt "struct parray" 168 | array of pointers. 169 | Has this form: 170 | .Bd -literal -offset indent 171 | struct parray { 172 | void **item; 173 | int size; 174 | int asize; 175 | }; 176 | .Ed 177 | .It Vt array_cmp_fn 178 | comparison function for sorted arrays. 179 | .El 180 | .Ss Functions 181 | .Bl -ohang 182 | .It Fn arr_adjust 183 | shrink the allocated memory to fit exactly the needs. 184 | .It Fn arr_free 185 | free the structure contents 186 | .Pq but NOT the struct itself . 187 | .It Fn arr_grow 188 | increase the array size to fit the given number of elements. 189 | .It Fn arr_init 190 | initialize the contents of the struct. 191 | .It Fn arr_insert 192 | insert 193 | .Fa nb 194 | elements before the 195 | .Fa n 196 | one. 197 | .It Fn arr_item 198 | return a pointer to the 199 | .Fa n 200 | element. 201 | .It Fn arr_newitem 202 | return the index of a new element appended to the array 203 | .Fa arr . 204 | .It Fn arr_remove 205 | remove the n-th elements of the array. 206 | .It Fn arr_sorted_find 207 | O(log n) search in a sorted array, returning entry. 208 | .It Fn arr_sorted_find_i 209 | O(log n) search in a sorted array, 210 | returning index of the smallest element larger than the key. 211 | .It Fn parr_adjust 212 | shrink the allocated memory to fit exactly the needs. 213 | .It Fn parr_free 214 | free the structure contents 215 | .Pq but NOT the struct itself . 216 | .It Fn parr_grow 217 | increase the array size to fit the given number of elements. 218 | .It Fn parr_init 219 | initialize the contents of the struct. 220 | .It Fn parr_insert 221 | insert 222 | .Fa nb 223 | elements before the 224 | .Fa n 225 | one. 226 | .It Fn parr_pop 227 | pop the last item of the array and return it. 228 | .It Fn parr_push 229 | push a pointer at the end of the array 230 | .Pq = append . 231 | .It Fn parr_remove 232 | remove the 233 | .Fa idx 234 | element of the array and return it. 235 | .It Fn parr_sorted_find 236 | O(log n) search in a sorted array, returning entry. 237 | .It Fn parr_sorted_find_i 238 | O(log n) search in a sorted array, 239 | returning index of the smallest element larger than the key. 240 | .It Fn parr_top 241 | return the top the stack 242 | .Pq i.e. the last element of the array . 243 | .El 244 | .Sh RETURN VALUES 245 | The 246 | .Fn arr_adjust , 247 | .Fn arr_grow , 248 | .Fn arr_insert , 249 | .Fn parr_adjust , 250 | .Fn parr_grow , 251 | .Fn parr_insert 252 | and 253 | .Fn parr_push 254 | functions return on success 1; on error - 0. 255 | .Pp 256 | The 257 | .Fn arr_item , 258 | .Fn arr_sorted_find , 259 | .Fn parr_pop , 260 | .Fn parr_remove , 261 | .Fn parr_sorted_find 262 | and 263 | .Fn parr_top 264 | functions return a pointer to the element on success; on error - 265 | .Dv NULL . 266 | .Pp 267 | The 268 | .Fn arr_newitem 269 | function returns the index on success; on error -1. 270 | .Pp 271 | The 272 | .Fn arr_sorted_find_i 273 | and 274 | .Fn parr_sorted_find_i 275 | functions return an index. 276 | .Sh SEE ALSO 277 | .Xr soldout_markdown 3 278 | .Sh AUTHORS 279 | .An -nosplit 280 | The 281 | .Nm soldout 282 | library 283 | was written by 284 | .An Natasha Qo Kerensikova Qc Porte Aq Mt natacha@instinctive.eu . 285 | Manual page was originally written by 286 | .An Massimo Manghi Aq Mt mxmanghi@apache.org , 287 | and rewritten to mdoc format by 288 | .An Svyatoslav Mishyn Aq Mt juef@openmailbox.org . 289 | -------------------------------------------------------------------------------- /soldout_buffer.3: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2009 - 2016 Natacha Porté <natacha@instinctive.eu> 3 | .\" 4 | .\" Permission to use, copy, modify, and distribute this software for any 5 | .\" purpose with or without fee is hereby granted, provided that the above 6 | .\" copyright notice and this permission notice appear in all copies. 7 | .\" 8 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | .\" 16 | .Dd September 12, 2016 17 | .Dt SOLDOUT_BUFFER 3 18 | .Os 19 | .Sh NAME 20 | .Nm soldout_buffer , 21 | .Nm bufcasecmp , 22 | .Nm bufcmp , 23 | .Nm bufcmps , 24 | .Nm bufdup , 25 | .Nm bufgrow , 26 | .Nm bufnew , 27 | .Nm bufnullterm , 28 | .Nm bufprintf , 29 | .Nm bufput , 30 | .Nm bufputs , 31 | .Nm bufputc , 32 | .Nm bufrelease , 33 | .Nm bufreset , 34 | .Nm bufset , 35 | .Nm bufslurp , 36 | .Nm buftoi , 37 | .Nm vbufprintf 38 | .Nd buffer handling functions for soldout 39 | .Sh SYNOPSIS 40 | .In buffer.h 41 | .Pp 42 | .Fd "#define CONST_BUF(name, string)" 43 | .Fd "#define VOLATILE_BUF(name, strname)" 44 | .Fd "#define BUFPUTSL(output, literal)" 45 | .Ft int 46 | .Fo bufcasecmp 47 | .Fa "const struct buf *a" 48 | .Fa "const struct buf *b" 49 | .Fc 50 | .Ft int 51 | .Fo bufcmp 52 | .Fa "const struct buf *a" 53 | .Fa "const struct buf *b" 54 | .Fc 55 | .Ft int 56 | .Fo bufcmps 57 | .Fa "const struct buf *a" 58 | .Fa "const char *b" 59 | .Fc 60 | .Ft "struct buf *" 61 | .Fo bufdup 62 | .Fa "const struct buf *src" 63 | .Fa "size_t dupunit" 64 | .Fc 65 | .Ft int 66 | .Fo bufgrow 67 | .Fa "struct buf *buf" 68 | .Fa "size_t sz" 69 | .Fc 70 | .Ft "struct buf *" 71 | .Fo bufnew 72 | .Fa "size_t unit" 73 | .Fc 74 | .Ft void 75 | .Fo bufnullterm 76 | .Fa "struct buf *buf" 77 | .Fc 78 | .Ft void 79 | .Fo bufprintf 80 | .Fa "struct buf *buf" 81 | .Fa "const char *fmt" 82 | .Fa ... 83 | .Fc 84 | .Ft void 85 | .Fo bufput 86 | .Fa "struct buf *buf" 87 | .Fa "const void *data" 88 | .Fa "size_t len" 89 | .Fc 90 | .Ft void 91 | .Fo bufputs 92 | .Fa "struct buf *buf" 93 | .Fa "const char *str" 94 | .Fc 95 | .Ft void 96 | .Fo bufputc 97 | .Fa "struct buf *buf" 98 | .Fa "char c" 99 | .Fc 100 | .Ft void 101 | .Fo bufrelease 102 | .Fa "struct buf *buf" 103 | .Fc 104 | .Ft void 105 | .Fo bufreset 106 | .Fa "struct buf *buf" 107 | .Fc 108 | .Ft void 109 | .Fo bufset 110 | .Fa "struct buf **dest" 111 | .Fa "struct buf *src" 112 | .Fc 113 | .Ft void 114 | .Fo bufslurp 115 | .Fa "struct buf *buf" 116 | .Fa "size_t len" 117 | .Fc 118 | .Ft int 119 | .Fo buftoi 120 | .Fa "struct buf *buf" 121 | .Fa "size_t offset_i" 122 | .Fa "size_t *offset_o" 123 | .Fc 124 | .Ft void 125 | .Fo vbufprintf 126 | .Fa "struct buf *buf" 127 | .Fa "const char *fmt" 128 | .Fa "va_list ap" 129 | .Fc 130 | .Vt extern long buffer_stat_nb; 131 | .Vt extern size_t buffer_stat_alloc_bytes; 132 | .Sh DESCRIPTION 133 | .Ss Variables 134 | Compile time options. 135 | Statistics are kept about memory usage. 136 | .Bl -ohang 137 | .It Va buffer_stat_nb 138 | show how many buffers were created. 139 | .It Va buffer_stat_alloc_bytes 140 | show how many bytes were allocated. 141 | .El 142 | .Ss Types 143 | .Bl -ohang 144 | .It Vt "struct buf" 145 | character array buffer. 146 | Has this form: 147 | .Bd -literal -offset indent 148 | struct buf { 149 | char *data; /* actual character data */ 150 | size_t size; /* size of the string */ 151 | size_t asize; /* allocated size (0 = volatile buffer) */ 152 | size_t unit; /* reallocation unit size (0 = read-only buffer) */ 153 | int ref; /* reference count */ 154 | }; 155 | .Ed 156 | .El 157 | .Ss Macros 158 | .Bl -ohang 159 | .It Dv CONST_BUF 160 | create a global buffer 161 | .Va name 162 | from a string literal 163 | .Va string . 164 | .It Dv VOLATILE_BUF 165 | create a volatile buffer 166 | .Va name 167 | on the stack from a string 168 | .Va strname . 169 | .It Dv BUFPUTSL 170 | optimized 171 | .Fn bufputs 172 | of a string literal. 173 | .El 174 | .Ss Functions 175 | .Bl -ohang 176 | .It Fn bufcasecmp 177 | compare two buffers ignoring case. 178 | .It Fn bufcmp 179 | compare two buffers. 180 | .It Fn bufcmps 181 | compare a buffer to a string. 182 | .It Fn bufdup 183 | duplicate a buffer 184 | .Va src . 185 | .It Fn bufgrow 186 | increase the allocated size to the size 187 | .Va sz . 188 | .It Fn bufnew 189 | create a new buffer. 190 | .It Fn bufnullterm 191 | terminate the string array by NUL 192 | .Pq making a C-string . 193 | .It Fn bufprintf 194 | print formatted output to a buffer 195 | .Va buf . 196 | .It Fn bufput 197 | append raw data to a buffer 198 | .Va buf . 199 | .It Fn bufputs 200 | append a NUL-terminated string 201 | .Va str 202 | to a buffer 203 | .Va buf . 204 | .It Fn bufputc 205 | append a single char 206 | .Va c 207 | to a buffer 208 | .Va buf . 209 | .It Fn bufrelease 210 | decrease the reference count and free the buffer 211 | .Va buf 212 | if needed. 213 | .It Fn bufreset 214 | free internal data of the buffer 215 | .Va buf . 216 | .It Fn bufset 217 | safely assign a buffer to another. 218 | .It Fn bufslurp 219 | remove a given number of bytes from the head of the array. 220 | .It Fn buftoi 221 | convert the numbers at the beginning of the buffer 222 | .Va buf 223 | into an 224 | .Vt int . 225 | .It Fn vbufprintf 226 | .Xr stdarg 3 227 | variant of formatted printing into a buffer 228 | .Va buf . 229 | .El 230 | .Sh RETURN VALUES 231 | The 232 | .Fn bufcasecmp , 233 | .Fn bufcmp 234 | and 235 | .Fn bufcmps 236 | functions return an integer less than, equal to, or greater than zero if 237 | .Va a 238 | is found, respectively, to be less than, to match, or be greater than 239 | .Va b . 240 | .Pp 241 | The 242 | .Fn bufdup 243 | and 244 | .Fn bufnew 245 | functions return a 246 | .Vt "struct buf *" 247 | on success; on error they return 248 | .Dv NULL . 249 | .Pp 250 | The 251 | .Fn bufgrow 252 | function returns on success 1; on error - 0. 253 | .Pp 254 | The 255 | .Fn buftoi 256 | function return the converted value. 257 | .Sh SEE ALSO 258 | .Xr soldout_markdown 3 , 259 | .Xr stdarg 3 260 | .Sh AUTHORS 261 | .An -nosplit 262 | The 263 | .Nm soldout 264 | library 265 | was written by 266 | .An Natasha Qo Kerensikova Qc Porte Aq Mt natacha@instinctive.eu . 267 | Manual page was originally written by 268 | .An Massimo Manghi Aq Mt mxmanghi@apache.org , 269 | and rewritten to mdoc format by 270 | .An Svyatoslav Mishyn Aq Mt juef@openmailbox.org . 271 | -------------------------------------------------------------------------------- /soldout_markdown.3: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2009 - 2016 Natacha Porté <natacha@instinctive.eu> 3 | .\" 4 | .\" Permission to use, copy, modify, and distribute this software for any 5 | .\" purpose with or without fee is hereby granted, provided that the above 6 | .\" copyright notice and this permission notice appear in all copies. 7 | .\" 8 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | .\" 16 | .Dd September 12, 2016 17 | .Dt SOLDOUT_MARKDOWN 3 18 | .Os 19 | .Sh NAME 20 | .Nm soldout_markdown , 21 | .Nm markdown 22 | .Nd parse markdown document 23 | .Sh SYNOPSIS 24 | .In markdown.h 25 | .Pp 26 | .Fd "#define MKD_CELL_ALIGN_DEFAULT" 27 | .Fd "#define MKD_CELL_ALIGN_LEFT" 28 | .Fd "#define MKD_CELL_ALIGN_RIGHT" 29 | .Fd "#define MKD_CELL_ALIGN_CENTER" 30 | .Fd "#define MKD_CELL_ALIGN_MASK" 31 | .Fd "#define MKD_CELL_HEAD" 32 | .Fd "#define MKD_LIST_ORDERED" 33 | .Fd "#define MKD_LI_BLOCK" 34 | .Ft void 35 | .Fo markdown 36 | .Fa "struct buf *ob" 37 | .Fa "struct buf *ib" 38 | .Fa "const struct mkd_renderer *rndr" 39 | .Fc 40 | .Sh DESCRIPTION 41 | The 42 | .Fn markdown 43 | function parses the input buffer 44 | .Fa ib 45 | and renders it into the output buffer 46 | .Fa ob ; 47 | .Fa rndr 48 | is a pointer to the renderer structure. 49 | .Pp 50 | The following describes a general parse sequence: 51 | .Bl -enum 52 | .It 53 | Create output, input buffers by 54 | .Fn bufnew 55 | function. 56 | .It 57 | Fill input buffer by 58 | .Fn bufput 59 | function. 60 | .It 61 | Create 62 | .Vt "struct mkd_renderer" 63 | or use provided renderer. 64 | .It 65 | Call 66 | .Fn markdown 67 | function. 68 | .It 69 | Process output buffer. 70 | .It 71 | Call 72 | .Fn bufrelease 73 | function to clean up buffers. 74 | .El 75 | .Sh REFERENCE 76 | This section documents the functions, types, definitions available via 77 | .In markdown.h . 78 | .Ss Types 79 | .Bl -ohang 80 | .It Vt "enum mkd_autolink" 81 | type of autolink: 82 | .Bl -tag -width Ds 83 | .It MKDA_NORMAL 84 | normal http/https/ftp link. 85 | .It MKDA_EXPLICIT_EMAIL 86 | e-mail link with explicit mailto. 87 | .It MKDA_IMPLICIT_EMAIL 88 | e-mail link without mailto. 89 | .El 90 | .It Vt "struct mkd_renderer" 91 | has this form: 92 | .Bd -literal -offset indent 93 | struct mkd_renderer { 94 | /* document level callbacks */ 95 | void (*prolog)(struct buf *ob, void *opaque); 96 | void (*epilog)(struct buf *ob, void *opaque); 97 | 98 | /* block level callbacks - NULL skips the block */ 99 | void (*blockcode)(struct buf *ob, 100 | struct buf *text, 101 | void *opaque); 102 | void (*blockquote)(struct buf *ob, 103 | struct buf *text, 104 | void *opaque); 105 | void (*blockhtml)(struct buf *ob, 106 | struct buf *text, 107 | void *opaque); 108 | void (*header)(struct buf *ob, 109 | struct buf *text, int level, 110 | void *opaque); 111 | void (*hrule)(struct buf *ob, void *opaque); 112 | void (*list)(struct buf *ob, 113 | struct buf *text, int flags, 114 | void *opaque); 115 | void (*listitem)(struct buf *ob, 116 | struct buf *text, int flags, 117 | void *opaque); 118 | void (*paragraph)(struct buf *ob, 119 | struct buf *text, 120 | void *opaque); 121 | void (*table)(struct buf *ob, 122 | struct buf *head_row, struct buf *rows, 123 | void *opaque); 124 | void (*table_cell)(struct buf *ob, 125 | struct buf *text, int flags, 126 | void *opaque); 127 | void (*table_row)(struct buf *ob, 128 | struct buf *cells, int flags, 129 | void *opaque); 130 | 131 | /* span level callbacks - NULL or return 0 prints the span verbatim */ 132 | int (*autolink)(struct buf *ob, 133 | struct buf *link, enum mkd_autolink type, 134 | void *opaque); 135 | int (*codespan)(struct buf *ob, 136 | struct buf *text, 137 | void *opaque); 138 | int (*emphasis)(struct buf *ob, 139 | struct buf *text, char c, 140 | void *opaque); 141 | int (*double_emphasis)(struct buf *ob, 142 | struct buf *text, char c, 143 | void *opaque); 144 | int (*triple_emphasis)(struct buf *ob, 145 | struct buf *text, char c, 146 | void *opaque); 147 | int (*image)(struct buf *ob, 148 | struct buf *link, struct buf *title, struct buf *alt, 149 | void *opaque); 150 | int (*linebreak)(struct buf *ob, void *opaque); 151 | int (*link)(struct buf *ob, 152 | struct buf *link, struct buf *title, struct buf *content, 153 | void *opaque); 154 | int (*raw_html_tag)(struct buf *ob, 155 | struct buf *tag, 156 | void *opaque); 157 | 158 | /* low level callbacks - NULL copies input directly into the output */ 159 | void (*entity)(struct buf *ob, 160 | struct buf *entity, 161 | void *opaque); 162 | void (*normal_text)(struct buf *ob, 163 | struct buf *text, 164 | void *opaque); 165 | 166 | /* renderer data */ 167 | int max_work_stack; /* prevent arbitrary deep recursion, cf README */ 168 | const char *emph_chars; /* chars that trigger emphasis rendering */ 169 | void *opaque; /* opaque data send to every rendering callback */ 170 | }; 171 | .Ed 172 | .Pp 173 | The first argument of a renderer function is always the output buffer, 174 | where the function is supposed to write its output. 175 | The last argument of a renderer function is always 176 | a private pointer, which is 177 | .Va opaque 178 | member of 179 | .Vt struct mkd_renderer . 180 | libsoldout itself never does nothing with this data. 181 | .Pp 182 | Function pointers in 183 | .Vt "struct mkd_renderer" 184 | can be 185 | .Dv NULL . 186 | A null block-level callback will make the corresponding block 187 | disappear from the output, as if the callback was an empty function. 188 | A null span-level callback will cause the corresponding element 189 | to be treated as normal characters, copied verbatim to the output. 190 | Moreover, span-level callbacks return an integer, which tells 191 | whether the renderer accepts to render the item (non-zero return value) 192 | or whether it should be copied verbatim (zero return value). 193 | .Pp 194 | .Fa flags 195 | of the 196 | .Va list 197 | and 198 | .Va listitem 199 | function callbacks are: 200 | .Dv MKD_LIST_ORDERED , 201 | .Dv MKD_LI_BLOCK . 202 | .Pp 203 | .Fa flags 204 | of the 205 | .Va table_cell 206 | and 207 | .Va table_row 208 | function callbacks are: 209 | .Dv MKD_CELL_ALIGN_DEFAULT , 210 | .Dv MKD_CELL_ALIGN_LEFT , 211 | .Dv MKD_CELL_ALIGN_RIGHT , 212 | .Dv MKD_CELL_ALIGN_CENTER , 213 | .Dv MKD_CELL_ALIGN_MASK , 214 | .Dv MKD_CELL_HEAD . 215 | .Pp 216 | The 217 | .Va normal_text 218 | callback should perform whatever escape is needed 219 | to have the output looking like the input data. 220 | .Pp 221 | .Va emph_chars 222 | is a zero-terminated string which contains 223 | the set of characters that trigger emphasis. 224 | In regular markdown, emphasis is only 225 | triggered by 226 | .Sq _ 227 | and 228 | .Sq * , 229 | but in some extensions it might be useful to 230 | add other characters to this list. 231 | The character that triggered the emphasis is then passed to 232 | .Va emphasis , double_emphasis 233 | and 234 | .Va triple_emphasis 235 | function callbacks through the parameter 236 | .Fa c . 237 | .El 238 | .Sh EXAMPLES 239 | Simple example that uses first argument as a markdown string, 240 | converts it to an HTML and outputs it to stdout. 241 | .Bd -literal 242 | #include <stdio.h> 243 | 244 | #include <buffer.h> 245 | #include <markdown.h> 246 | #include <renderers.h> 247 | 248 | #define INPUT_UNIT 1024 249 | #define OUTPUT_UNIT 64 250 | 251 | int 252 | main(int argc, char *argv[]) 253 | { 254 | struct buf *ib, *ob; 255 | 256 | /* Make sure we have enough arguments. */ 257 | if (argc != 2) { 258 | return 1; 259 | } 260 | 261 | ib = bufnew(INPUT_UNIT); 262 | ob = bufnew(OUTPUT_UNIT); 263 | 264 | /* bufputs() is a wrapper over bufput() for nil-terminated string. */ 265 | bufputs(ib, argv[1]); 266 | 267 | markdown(ob, ib, &mkd_html); 268 | 269 | /* Note the resulted data is not nil-terminated string; 270 | * to make it use bufnullterm(). */ 271 | printf("%.*s", (int)ob->size, ob->data); 272 | 273 | bufrelease(ib); 274 | bufrelease(ob); 275 | return 0; 276 | } 277 | .Ed 278 | .Sh SEE ALSO 279 | .Xr soldout_array 3 , 280 | .Xr soldout_buffer 3 , 281 | .Xr soldout_renderers 3 , 282 | .Lk http://daringfireball.net/projects/markdown/ John Gruber's markdown format 283 | .Sh AUTHORS 284 | .An -nosplit 285 | The 286 | .Nm soldout 287 | library was written by 288 | .An Natasha Qo Kerensikova Qc Porte Aq Mt natacha@instinctive.eu . 289 | Manual page was originally written by 290 | .An Massimo Manghi Aq Mt mxmanghi@apache.org , 291 | and rewritten to mdoc format by 292 | .An Svyatoslav Mishyn Aq Mt juef@openmailbox.org . 293 | -------------------------------------------------------------------------------- /soldout_renderers.3: -------------------------------------------------------------------------------- 1 | .\" 2 | .\" Copyright (c) 2009 - 2016 Natacha Porté <natacha@instinctive.eu> 3 | .\" 4 | .\" Permission to use, copy, modify, and distribute this software for any 5 | .\" purpose with or without fee is hereby granted, provided that the above 6 | .\" copyright notice and this permission notice appear in all copies. 7 | .\" 8 | .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | .\" 16 | .Dd September 12, 2016 17 | .Dt SOLDOUT_RENDERERS 3 18 | .Os 19 | .Sh NAME 20 | .Nm soldout_renderers , 21 | .Nm lus_attr_escape , 22 | .Nm lus_body_escape 23 | .Nd various markdown to (X)HTML renderers for soldout 24 | .Sh SYNOPSIS 25 | .In renderers.h 26 | .Ft void 27 | .Fo lus_attr_escape 28 | .Fa "struct buf *ob" 29 | .Fa "const char *str" 30 | .Fa "size_t len" 31 | .Fc 32 | .Ft void 33 | .Fo lus_body_escape 34 | .Fa "struct buf *ob" 35 | .Fa "const char *str" 36 | .Fa "size_t len" 37 | .Fc 38 | .Vt extern const struct mkd_renderer mkd_html; 39 | .Vt extern const struct mkd_renderer mkd_xhtml; 40 | .Vt extern const struct mkd_renderer discount_html; 41 | .Vt extern const struct mkd_renderer discount_xhtml; 42 | .Vt extern const struct mkd_renderer nat_html; 43 | .Vt extern const struct mkd_renderer nat_xhtml; 44 | .Sh DESCRIPTION 45 | The 46 | .Fn lus_attr_escape 47 | and 48 | .Fn lus_body_escape 49 | functions escape all problematic characters in (X)HTML: 50 | .Sq < , 51 | .Sq > , 52 | .Sq & , 53 | .Sq \(dq ; 54 | and 55 | .Sq < , 56 | .Sq > , 57 | .Sq & 58 | respectively. 59 | They accept a string 60 | .Va str 61 | of the length 62 | .Va len 63 | and output into 64 | .Va ob 65 | buffer. 66 | .Pp 67 | All provided renderers come with two flavors, 68 | .Dq _html 69 | producing HTML code (self-closing tags are rendered like this: <hr>), 70 | and 71 | .Dq _xhtml 72 | producing XHTML code (self-closing tags like <hr />). 73 | .Pp 74 | .Va mkd_html 75 | and 76 | .Va mkd_xhtml 77 | implement standard markdown to (X)HTML translation without any extensions. 78 | .Pp 79 | .Va discount_html 80 | and 81 | .Va discount_xhtml 82 | implement on top of the standard markdown some of the extensions 83 | found in Discount plus PHP-Markdown-like tables. 84 | Here is a list of all extensions included in these renderers: 85 | .Bl -bullet -width 1m 86 | .It 87 | image size specification, by appending 88 | .Do =(width)x(height) Dc to the link 89 | .It 90 | pseudo-protocols in links: 91 | .Bl -bullet -width 1m 92 | .It 93 | .Do abbr:description Dc for <abbr title="description">...</abbr> 94 | .It 95 | .Do class:name Dc for <span class="name">...</span> 96 | .It 97 | .Do id:name Dc for <span id="name">...</span> 98 | .It 99 | .Do raw:text Dc for verbatim unprocessed text inclusion 100 | .El 101 | .It 102 | class blocks: blockquotes beginning with 103 | .Do %class% Dc will be rendered as a div of the given class(es) 104 | .El 105 | .Pp 106 | .Va nat_html 107 | and 108 | .Va nat_xhtml 109 | implement on top of the Discount extensions Natasha's own extensions. 110 | Here is a list of these extensions: 111 | .Bl -bullet -width 1m 112 | .It 113 | id attribute for headers, using the syntax 114 | .Dq id#Header text 115 | .It 116 | class attribute for paragraphs, by putting class name(s) 117 | between parenthesis at the very beginning of the paragraph 118 | .It 119 | <ins> and <del> spans, using respectively 120 | .Do ++ Dc and Do -- Dc 121 | as delimiters (with emphasis-like restrictions, 122 | i.e. an opening delimiter cannot be followed by a whitespace, 123 | and a closing delimiter cannot be preceded by a whitespace) 124 | .It 125 | plain <span> without attribute, using emphasis-like delimiter 126 | .Sq | 127 | .El 128 | .Sh SEE ALSO 129 | .Xr soldout_buffer 3 , 130 | .Xr soldout_markdown 3 , 131 | .Lk https://www.w3.org/TR/html/ HTML specification , 132 | .Lk http://www.pell.portland.or.us/~orc/Code/discount/ Discount , 133 | .Lk https://michelf.ca/projects/php-markdown/extra/#table PHP-Markdown tables 134 | .Sh AUTHORS 135 | .An -nosplit 136 | The 137 | .Nm soldout 138 | library 139 | was written by 140 | .An Natasha Qo Kerensikova Qc Porte Aq Mt natacha@instinctive.eu . 141 | Manual page was originally written by 142 | .An Massimo Manghi Aq Mt mxmanghi@apache.org , 143 | and rewritten to mdoc format by 144 | .An Svyatoslav Mishyn Aq Mt juef@openmailbox.org . 145 | --------------------------------------------------------------------------------