├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── configure ├── md2mime.c ├── smtp.c ├── smtp.h ├── vendor └── uthash.h ├── wrap.c └── wrap.h /.gitignore: -------------------------------------------------------------------------------- 1 | config.mk 2 | *.o 3 | *.dSYM 4 | mimebox 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 Joe Nelson 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 8 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 9 | FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 10 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 11 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 12 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 13 | PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | 3 | OBJS = wrap.o smtp.o 4 | CFLAGS = -std=c99 -g -pedantic -Wall -Wextra -Wshadow 5 | LDLIBS = -lm 6 | POSIX = -D_POSIX_C_SOURCE=200112L 7 | 8 | .SUFFIXES : 9 | .SUFFIXES : .o .c 10 | 11 | include config.mk 12 | 13 | md2mime : md2mime.c $(OBJS) 14 | $(CC) $(CFLAGS) $(POSIX) $(LDFLAGS) -o $@ md2mime.c $(OBJS) $(LDLIBS) 15 | 16 | wrap.o : wrap.c wrap.h 17 | 18 | smtp.o : smtp.c smtp.h 19 | 20 | clean : 21 | rm -f md2mime *.o 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Markdown to multipart MIME 2 | 3 | Answering the age-old debate: HTML or plaintext emails? Both! 4 | 5 | Write email in markdown, and convert to multipart MIME that works impeccably in 6 | graphical and console mail clients. 7 | 8 | * Creates a conservative [ISO/IEC 15445:2000 9 | HTML](https://www.iso.org/standard/27688.html) representation, which is set 10 | as the preferred multipart alternative for graphical clients. 11 | * The text alternative is reflowed at 72 characters to look good in standard 12 | console mail readers. 13 | * The text part also uses format=flowed, so it will shrink on narrow screens or 14 | terminal splits. 15 | * Fenced code blocks in the markdown are added as inline attachments with the 16 | appropriate MIME type. This makes them easy to download/save from the 17 | message. 18 | * URLs are listed at the bottom of the text message in a special 19 | `text/uri-list` inline attachment, and referred to with short codes like 20 | `[0]`, `[1]` ... in the message text. 21 | 22 | ### Installation 23 | 24 | Written in portable C99. The only requirement is the 25 | [cmark](https://github.com/commonmark/cmark) library to parse markdown. 26 | 27 | ```sh 28 | # detect cmark and set up build flags 29 | ./configure 30 | # then build md2mime 31 | make 32 | ``` 33 | 34 | ### Usage 35 | 36 | ```sh 37 | ./md2mime sender-domain.com < message.md > message.email 38 | ``` 39 | 40 | The sender domain is required to generate a good Message-ID. 41 | 42 | If the original message starts with headers, pass `-p` to preserve them. Send 43 | the message using a program like [msmtp](https://marlam.de/msmtp/): 44 | 45 | ```sh 46 | msmtp recipient@example.com < message.email 47 | ``` 48 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh -eu 2 | 3 | rm -f config.mk 4 | 5 | pkg-config --print-errors 'libcmark >= 0.20' 6 | 7 | cat >> config.mk <<-EOF 8 | CFLAGS += $(pkg-config --cflags libcmark) 9 | LDFLAGS += $(pkg-config --libs-only-L libcmark) 10 | LDLIBS += $(pkg-config --libs-only-l libcmark) 11 | EOF 12 | 13 | compiles () 14 | { 15 | stage="$(mktemp -d)" 16 | echo "$2" > "$stage/test.c" 17 | (cc -Werror "$1" -o "$stage/test" "$stage/test.c" >/dev/null 2>&1) 18 | cc_success=$? 19 | rm -rf "$stage" 20 | return $cc_success 21 | } 22 | 23 | if compiles "" " 24 | #include 25 | #include 26 | int main(void) 27 | { 28 | void (*p)(void *, size_t) = arc4random_buf; 29 | return (intptr_t)p; 30 | }" 31 | then 32 | echo "CFLAGS += -DHAVE_ARC4RANDOM" >> config.mk 33 | fi 34 | 35 | if compiles "-D_POSIX_C_SOURCE=200112L" " 36 | #include 37 | #include 38 | #include 39 | int main(void) 40 | { 41 | ssize_t (*p)(void *, size_t, unsigned int) = getrandom; 42 | return (intptr_t)p; 43 | }" 44 | then 45 | echo "CFLAGS += -DHAVE_GETRANDOM" >> config.mk 46 | 47 | elif compiles "-D_GNU_SOURCE" " 48 | #include 49 | #include 50 | int main(void) 51 | { 52 | long n = SYS_getrandom; 53 | long (*p)(long, ...) = syscall; 54 | return (intptr_t)p & n; 55 | }" 56 | then 57 | echo "CFLAGS += -DHAVE_GETRANDOM_SYSCALL" >> config.mk 58 | fi 59 | -------------------------------------------------------------------------------- /md2mime.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "smtp.h" 9 | #include "wrap.h" 10 | #include "vendor/uthash.h" 11 | 12 | /* libcmark */ 13 | #include 14 | 15 | #define TEXTWIDTH 72 16 | 17 | struct doc_link 18 | { 19 | size_t id; 20 | char pretty_id[10]; 21 | struct wordlist *caption; 22 | const char *url; 23 | UT_hash_handle hh; 24 | } *g_links = NULL; 25 | 26 | void snarf_node(cmark_iter *iter) 27 | { 28 | cmark_event_type ev_type; 29 | cmark_node *cur = cmark_iter_get_node(iter); 30 | cmark_node_type outer = cmark_node_get_type(cur); 31 | 32 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) 33 | { 34 | if (ev_type == CMARK_EVENT_EXIT && 35 | cmark_node_get_type(cmark_iter_get_node(iter)) == outer) 36 | break; 37 | } 38 | } 39 | 40 | struct doc_link *parse_link(cmark_iter *iter) 41 | { 42 | cmark_event_type ev_type; 43 | cmark_node *cur = cmark_iter_get_node(iter); 44 | 45 | struct doc_link *link = NULL; 46 | const char *url; 47 | 48 | assert(cmark_node_get_type(cur) == CMARK_NODE_LINK); 49 | 50 | url = cmark_node_get_url(cur); 51 | HASH_FIND_STR(g_links, url, link); 52 | if (link) 53 | { /* seen it, move on */ 54 | snarf_node(iter); 55 | return link; 56 | } 57 | link = malloc(sizeof *link); 58 | link->id = HASH_COUNT(g_links); 59 | snprintf(link->pretty_id, sizeof link->pretty_id, 60 | "[%zu]", link->id); 61 | link->url = url; 62 | link->caption = wordlist_create(); 63 | HASH_ADD_KEYPTR(hh, g_links, link->url, 64 | strlen(link->url), link); 65 | 66 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) 67 | { 68 | cur = cmark_iter_get_node(iter); 69 | cmark_node_type type = cmark_node_get_type(cur); 70 | 71 | if (ev_type == CMARK_EVENT_EXIT && type == CMARK_NODE_LINK) 72 | break; 73 | if (ev_type == CMARK_EVENT_ENTER && type == CMARK_NODE_TEXT) 74 | wordlist_segment(link->caption, cmark_node_get_literal(cur)); 75 | } 76 | return link; 77 | } 78 | 79 | size_t render_inner_text(cmark_iter *iter, char *prefix, char *overhang, bool flow) 80 | { 81 | cmark_event_type ev_type; 82 | cmark_node *cur = cmark_iter_get_node(iter); 83 | cmark_node_type outer_type = cmark_node_get_type(cur); 84 | struct wordlist *ws = wordlist_create(); 85 | size_t ret; 86 | 87 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) 88 | { 89 | cur = cmark_iter_get_node(iter); 90 | cmark_node_type type = cmark_node_get_type(cur); 91 | 92 | if (ev_type == CMARK_EVENT_EXIT && type == outer_type) 93 | break; 94 | if (ev_type == CMARK_EVENT_ENTER) 95 | { 96 | if (type == CMARK_NODE_LINK) 97 | { 98 | struct doc_link *link = parse_link(iter); 99 | if (link) 100 | { 101 | wordlist_concat(ws, link->caption); 102 | wordlist_segment(ws, link->pretty_id); 103 | } 104 | else 105 | wordlist_segment(ws, "[could not parse link]"); 106 | } 107 | else 108 | { 109 | const char *content = cmark_node_get_literal(cur); 110 | if (content) 111 | wordlist_segment(ws, content); 112 | } 113 | } 114 | } 115 | 116 | printf("%s", prefix); 117 | ret = print_wrapped(ws, overhang, TEXTWIDTH, flow); 118 | wordlist_free(ws); 119 | return ret; 120 | } 121 | 122 | void render_list(cmark_iter *iter) 123 | { 124 | cmark_event_type ev_type; 125 | cmark_node *cur = cmark_iter_get_node(iter); 126 | cmark_list_type t = cmark_node_get_list_type(cur); 127 | char *pad = ((t == CMARK_ORDERED_LIST) ? " " : " "), 128 | marker[10] = "* "; 129 | int i = 1; 130 | 131 | assert(cmark_node_get_type(cur) == CMARK_NODE_LIST); 132 | 133 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) 134 | { 135 | cur = cmark_iter_get_node(iter); 136 | cmark_node_type type = cmark_node_get_type(cur); 137 | 138 | if (ev_type == CMARK_EVENT_EXIT && type == CMARK_NODE_LIST) 139 | break; 140 | if (ev_type == CMARK_EVENT_ENTER && type == CMARK_NODE_ITEM) 141 | { 142 | if (t == CMARK_ORDERED_LIST) 143 | snprintf(marker, sizeof marker, "%d. ", i++); 144 | render_inner_text(iter, marker, pad, false); 145 | } 146 | } 147 | puts(""); 148 | } 149 | 150 | void render_heading(cmark_iter *iter) 151 | { 152 | cmark_node *cur = cmark_iter_get_node(iter); 153 | char borders[] = {'~', '#', '=', '-', '~'}, b; 154 | size_t width; 155 | unsigned level; 156 | 157 | assert(cmark_node_get_type(cur) == CMARK_NODE_HEADING); 158 | 159 | level = cmark_node_get_heading_level(cur); 160 | b = (level > sizeof(borders)-1) 161 | ? borders[sizeof(borders)-1] 162 | : borders[level]; 163 | width = render_inner_text(iter, "", "", true); 164 | while (width-- > 0) 165 | putchar(b); 166 | puts("\n"); 167 | } 168 | 169 | int main(int argc, char **argv) 170 | { 171 | char buf[BUFSIZ]; 172 | size_t bytes; 173 | cmark_node *doc; 174 | 175 | int c; 176 | bool arg_err = false, preserve_headers = false; 177 | char *host; 178 | while ((c = getopt(argc, argv, "p")) != -1) 179 | { 180 | switch(c) 181 | { 182 | case 'p': 183 | preserve_headers = true; 184 | break; 185 | case '?': 186 | fprintf(stderr, "Unrecognized option: '-%c'\n", optopt); 187 | arg_err = true; 188 | break; 189 | default: 190 | assert(0); 191 | } 192 | } 193 | if (arg_err || optind >= argc) 194 | { 195 | fprintf(stderr, 196 | "usage: %s [-p] sender-host\n" 197 | "\twhere -p = preserve headers\n", argv[0]); 198 | return EXIT_FAILURE; 199 | } 200 | if (optind < argc) 201 | { 202 | host = argv[optind]; 203 | if (strchr(host, '@')) 204 | { 205 | fputs("Hostname should not contain '@'\n", stderr); 206 | return EXIT_FAILURE; 207 | } 208 | } 209 | 210 | if (preserve_headers) 211 | { 212 | unsigned n = 0; 213 | for (c = 0; (c = getchar()) != EOF; ) 214 | { 215 | if (c == '\n') 216 | { 217 | if (++n > 1) 218 | break; /* two consecutive newlines */ 219 | } 220 | else 221 | n = 0; 222 | putchar(c); 223 | } 224 | } 225 | 226 | cmark_parser *p = cmark_parser_new(CMARK_OPT_DEFAULT); 227 | while ((bytes = fread(buf, 1, sizeof(buf), stdin)) > 0) 228 | { 229 | cmark_parser_feed(p, buf, bytes); 230 | if (bytes < sizeof(buf)) 231 | break; 232 | } 233 | doc = cmark_parser_finish(p); 234 | cmark_parser_free(p); 235 | 236 | cmark_event_type ev_type; 237 | cmark_iter *iter = cmark_iter_new(doc); 238 | 239 | enum section_type 240 | { 241 | SEC_NONE, SEC_MSG, SEC_CODE 242 | } section, prev_section = SEC_NONE; 243 | 244 | char *msgid = generate_msgid(host), 245 | *boundary_a = generate_mime_boundary(), 246 | *boundary_b = generate_mime_boundary(); 247 | printf("Message-ID: <%s>\n", msgid); 248 | printf("MIME-Version: 1.0\n" 249 | "Content-Type: multipart/alternative; boundary=\"%s\"\n\n" 250 | "--%s\n" 251 | "Content-Type: multipart/mixed; boundary=\"%s\"\n", 252 | boundary_a, boundary_a, boundary_b); 253 | 254 | int nth_code_block = 0, nth_block = 0; 255 | while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) 256 | { 257 | const char *code; 258 | cmark_node *cur = cmark_iter_get_node(iter); 259 | cmark_node_type type = cmark_node_get_type(cur); 260 | if (ev_type == CMARK_EVENT_ENTER) 261 | { 262 | section = (type == CMARK_NODE_CODE_BLOCK) 263 | ? SEC_CODE 264 | : SEC_MSG; 265 | if (section != prev_section) 266 | { 267 | prev_section = section; 268 | printf("\n--%s\n", boundary_b); 269 | if (section == SEC_CODE) 270 | { 271 | const char *filename = cmark_node_get_fence_info(cur); 272 | printf("Content-Type: %s; charset=\"utf-8\"\n", 273 | filename_mime(filename)); 274 | if (*filename == '\0') 275 | printf( 276 | "Content-Disposition: inline; filename=code-%d.txt\n", 277 | nth_code_block++); 278 | else 279 | printf( 280 | "Content-Disposition: inline; filename=%s\n", 281 | filename); 282 | } 283 | else 284 | { 285 | puts("Content-Type: text/plain; charset=\"utf-8\"; " 286 | "format=\"flowed\""); 287 | puts("Content-Disposition: inline"); 288 | } 289 | printf("Content-ID: <%i.%s>\n\n", nth_block++, msgid); 290 | } 291 | switch (cmark_node_get_type(cur)) 292 | { 293 | case CMARK_NODE_LIST: 294 | render_list(iter); 295 | break; 296 | case CMARK_NODE_HEADING: 297 | render_heading(iter); 298 | break; 299 | case CMARK_NODE_BLOCK_QUOTE: 300 | render_inner_text(iter, "> ", "> ", true); 301 | puts(""); 302 | break; 303 | case CMARK_NODE_CODE_BLOCK: 304 | code = cmark_node_get_literal(cur); 305 | if (code) 306 | fputs(code, stdout); 307 | break; 308 | case CMARK_NODE_PARAGRAPH: 309 | render_inner_text(iter, "", "", true); 310 | puts(""); 311 | break; 312 | default: 313 | break; 314 | } 315 | } 316 | } 317 | 318 | if (HASH_COUNT(g_links)) 319 | { 320 | struct doc_link *x, *next; 321 | 322 | printf("\n--%s\n", boundary_b); 323 | puts("Content-Type: text/uri-list; charset=\"utf-8\""); 324 | puts("Content-Disposition: inline; filename=references.uri\n"); 325 | 326 | /* that the ids are printed in sorted order relies on the fact 327 | * that items are visited in insertion order */ 328 | for (x = g_links; x; x = next) 329 | { 330 | printf("# %zu: ", x->id); 331 | print_wrapped(x->caption, "", SIZE_MAX, false); 332 | puts(x->url); 333 | next = x->hh.next; 334 | 335 | wordlist_free(x->caption); 336 | HASH_DEL(g_links, x); 337 | free(x); 338 | } 339 | } 340 | 341 | printf("\n--%s--\n\n",boundary_b); 342 | printf("--%s\n", boundary_a); 343 | puts("Content-Type: text/html; charset=\"utf-8\""); 344 | puts("Content-Disposition: inline\n"); 345 | 346 | puts(""); 347 | puts("Foo"); 348 | puts(""); 349 | 350 | char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT); 351 | fputs(html, stdout); 352 | free(html); 353 | 354 | puts("\n"); 355 | printf("--%s--\n", boundary_a); 356 | cmark_iter_free(iter); 357 | cmark_node_free(doc); 358 | free(msgid); 359 | free(boundary_a); 360 | free(boundary_b); 361 | 362 | return EXIT_SUCCESS; 363 | } 364 | -------------------------------------------------------------------------------- /smtp.c: -------------------------------------------------------------------------------- 1 | #ifdef HAVE_GETRANDOM_SYSCALL 2 | #define _GNU_SOURCE 3 | #endif 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #ifdef HAVE_GETRANDOM 11 | #include 12 | #elif defined HAVE_GETRANDOM_SYSCALL 13 | #include 14 | #include 15 | #endif 16 | 17 | #include "smtp.h" 18 | 19 | static void get_random_bytes(void *buf, size_t n) 20 | { 21 | #if defined HAVE_ARC4RANDOM /* BSD */ 22 | arc4random_buf(buf, n); 23 | #elif defined HAVE_GETRANDOM /* Linux, GLIBC >= 2.25 */ 24 | getrandom(buf, n, 0); 25 | #elif defined HAVE_GETRANDOM_SYSCALL 26 | syscall(SYS_getrandom, buf, n, 0); 27 | #else 28 | #error OS does not provide recognized function to get entropy 29 | #endif 30 | } 31 | 32 | /* Print number using symbols from an alphabet. The symbols are written 33 | * little-endian, but the output suffices for getting unique strings */ 34 | static char *alpha_radix(uintmax_t n, const char *alpha, size_t nalpha) 35 | { 36 | char *p, *ret = malloc(1 + (size_t)ceil(log(n)/log(nalpha))); 37 | if (!(p = ret)) 38 | return NULL; 39 | do 40 | { 41 | *p++ = alpha[n % nalpha]; 42 | n /= nalpha; 43 | } while(n > 0); 44 | *p = '\0'; 45 | return ret; 46 | } 47 | 48 | /* caller must free */ 49 | char *generate_msgid(const char *host) 50 | { 51 | /* atext tokens from RFC 2822 52 | * https://tools.ietf.org/html/rfc2822.html#section-3.2.4 53 | * However, to allow referencing msg ids in urls, avoid: 54 | * URL reserved chars ; / ? : @ = & 55 | * URL unsafe chars " < > # % { } | \ ^ ~ [ ] ` */ 56 | static const char atext[] = 57 | "0123456789" 58 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 59 | "abcdefghijklmnopqrstuvwxyz" 60 | "$!*-~'+_"; 61 | 62 | uint_least64_t rnd; 63 | get_random_bytes(&rnd, sizeof rnd); 64 | 65 | /* Typically time() returns seconds since the UNIX epoch. The C99 standard 66 | * says that the range and precision is implementation-defined. What 67 | * matters is getting numbers that don't repeat for a long time. */ 68 | char *s = alpha_radix(time(NULL), atext, (sizeof atext)-1), 69 | *t = alpha_radix(rnd, atext, (sizeof atext)-1), 70 | *msgid = NULL; 71 | if (!s || !t) 72 | goto done; 73 | 74 | msgid = malloc(strlen(s) + strlen(t) + strlen(host) + 3); 75 | if (msgid) 76 | sprintf(msgid, "%s.%s@%s", s, t, host); 77 | done: 78 | if (s) free(s); 79 | if (t) free(t); 80 | return msgid; 81 | } 82 | 83 | /* caller must free */ 84 | char *generate_mime_boundary(void) 85 | { 86 | /* https://tools.ietf.org/html/rfc2046#section-5.1.1 */ 87 | static const char bcharsnospace[] = 88 | "0123456789" 89 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 90 | "abcdefghijklmnopqrstuvwxyz" 91 | "'()+_,-./:=?"; 92 | uintmax_t rnd; 93 | get_random_bytes(&rnd, sizeof rnd); 94 | return alpha_radix(rnd, bcharsnospace, (sizeof bcharsnospace)-1); 95 | } 96 | 97 | const char *filename_mime(const char *f) 98 | { 99 | static struct { 100 | const char *ext; 101 | const char *mime; 102 | } known[] = { 103 | {"asm", "text/x-asm"}, {"c", "text/x-c"}, 104 | {"cpp", "text/x-c"}, {"csh", "text/x-script.csh"}, 105 | {"css", "text/css"}, {"csv", "text/csv"}, 106 | {"diff", "text/x-diff"}, {"el", "text/x-script.elisp"}, 107 | {"eps", "application/postscript"}, {"f", "text/x-fortran"}, 108 | {"h", "text/x-c"}, {"htm", "text/html"}, 109 | {"html", "text/html"}, {"jav", "text/x-java-source"}, 110 | {"java", "text/x-java-source"}, {"js", "text/javascript"}, 111 | {"json", "application/json"}, {"ksh", "text/x-script.ksh"}, 112 | {"lisp", "text/x-script.lisp"}, {"lsp", "text/x-script.lisp"}, 113 | {"m", "text/x-m"}, {"md", "text/markdown"}, 114 | {"p", "text/pascal"}, {"pas", "text/pascal"}, 115 | {"patch", "text/x-patch"}, {"pl", "text/x-script.perl"}, 116 | {"py", "text/x-script.python"}, {"s", "text/x-asm"}, 117 | {"scm", "text/x-script.scheme"}, {"sh", "application/x-sh"}, 118 | {"svg", "image/svg+xml"}, {"tcl", "text/x-script.tcl"}, 119 | {"tcsh", "text/x-script.tcsh"}, {"tex", "application/x-tex"}, 120 | {"troff", "text/troff"}, {"tsv", "text/tab-separated-values"}, 121 | {"xml", "text/xml"}, {"zsh", "text/x-script.zsh"} 122 | }; 123 | if (!f || !(f = strrchr(f, '.'))) 124 | return "text/plain"; 125 | f++; 126 | for (size_t i = 0; i < sizeof(known)/sizeof(*known); i++) 127 | if (strcmp(known[i].ext, f) == 0) 128 | return known[i].mime; 129 | return "text/plain"; 130 | } 131 | -------------------------------------------------------------------------------- /smtp.h: -------------------------------------------------------------------------------- 1 | #ifndef MIMEDOWN_SMTP_H 2 | #define MIMEDOWN_SMTP_H 3 | 4 | char *generate_msgid(const char *host); 5 | char *generate_mime_boundary(void); 6 | const char *filename_mime(const char *f); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /vendor/uthash.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2003-2018, Troy D. Hanson http://troydhanson.github.com/uthash/ 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | */ 23 | 24 | #ifndef UTHASH_H 25 | #define UTHASH_H 26 | 27 | #define UTHASH_VERSION 2.1.0 28 | 29 | #include /* memcmp, memset, strlen */ 30 | #include /* ptrdiff_t */ 31 | #include /* exit */ 32 | 33 | /* These macros use decltype or the earlier __typeof GNU extension. 34 | As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 35 | when compiling c++ source) this code uses whatever method is needed 36 | or, for VS2008 where neither is available, uses casting workarounds. */ 37 | #if !defined(DECLTYPE) && !defined(NO_DECLTYPE) 38 | #if defined(_MSC_VER) /* MS compiler */ 39 | #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 40 | #define DECLTYPE(x) (decltype(x)) 41 | #else /* VS2008 or older (or VS2010 in C mode) */ 42 | #define NO_DECLTYPE 43 | #endif 44 | #elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || defined(__WATCOMC__) 45 | #define NO_DECLTYPE 46 | #else /* GNU, Sun and other compilers */ 47 | #define DECLTYPE(x) (__typeof(x)) 48 | #endif 49 | #endif 50 | 51 | #ifdef NO_DECLTYPE 52 | #define DECLTYPE(x) 53 | #define DECLTYPE_ASSIGN(dst,src) \ 54 | do { \ 55 | char **_da_dst = (char**)(&(dst)); \ 56 | *_da_dst = (char*)(src); \ 57 | } while (0) 58 | #else 59 | #define DECLTYPE_ASSIGN(dst,src) \ 60 | do { \ 61 | (dst) = DECLTYPE(dst)(src); \ 62 | } while (0) 63 | #endif 64 | 65 | /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ 66 | #if defined(_WIN32) 67 | #if defined(_MSC_VER) && _MSC_VER >= 1600 68 | #include 69 | #elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) 70 | #include 71 | #else 72 | typedef unsigned int uint32_t; 73 | typedef unsigned char uint8_t; 74 | #endif 75 | #elif defined(__GNUC__) && !defined(__VXWORKS__) 76 | #include 77 | #else 78 | typedef unsigned int uint32_t; 79 | typedef unsigned char uint8_t; 80 | #endif 81 | 82 | #ifndef uthash_malloc 83 | #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 84 | #endif 85 | #ifndef uthash_free 86 | #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 87 | #endif 88 | #ifndef uthash_bzero 89 | #define uthash_bzero(a,n) memset(a,'\0',n) 90 | #endif 91 | #ifndef uthash_strlen 92 | #define uthash_strlen(s) strlen(s) 93 | #endif 94 | 95 | #ifdef uthash_memcmp 96 | /* This warning will not catch programs that define uthash_memcmp AFTER including uthash.h. */ 97 | #warning "uthash_memcmp is deprecated; please use HASH_KEYCMP instead" 98 | #else 99 | #define uthash_memcmp(a,b,n) memcmp(a,b,n) 100 | #endif 101 | 102 | #ifndef HASH_KEYCMP 103 | #define HASH_KEYCMP(a,b,n) uthash_memcmp(a,b,n) 104 | #endif 105 | 106 | #ifndef uthash_noexpand_fyi 107 | #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 108 | #endif 109 | #ifndef uthash_expand_fyi 110 | #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 111 | #endif 112 | 113 | #ifndef HASH_NONFATAL_OOM 114 | #define HASH_NONFATAL_OOM 0 115 | #endif 116 | 117 | #if HASH_NONFATAL_OOM 118 | /* malloc failures can be recovered from */ 119 | 120 | #ifndef uthash_nonfatal_oom 121 | #define uthash_nonfatal_oom(obj) do {} while (0) /* non-fatal OOM error */ 122 | #endif 123 | 124 | #define HASH_RECORD_OOM(oomed) do { (oomed) = 1; } while (0) 125 | #define IF_HASH_NONFATAL_OOM(x) x 126 | 127 | #else 128 | /* malloc failures result in lost memory, hash tables are unusable */ 129 | 130 | #ifndef uthash_fatal 131 | #define uthash_fatal(msg) exit(-1) /* fatal OOM error */ 132 | #endif 133 | 134 | #define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") 135 | #define IF_HASH_NONFATAL_OOM(x) 136 | 137 | #endif 138 | 139 | /* initial number of buckets */ 140 | #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ 141 | #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ 142 | #define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ 143 | 144 | /* calculate the element whose hash handle address is hhp */ 145 | #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 146 | /* calculate the hash handle from element address elp */ 147 | #define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho))) 148 | 149 | #define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ 150 | do { \ 151 | struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ 152 | unsigned _hd_bkt; \ 153 | HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 154 | (head)->hh.tbl->buckets[_hd_bkt].count++; \ 155 | _hd_hh_item->hh_next = NULL; \ 156 | _hd_hh_item->hh_prev = NULL; \ 157 | } while (0) 158 | 159 | #define HASH_VALUE(keyptr,keylen,hashv) \ 160 | do { \ 161 | HASH_FCN(keyptr, keylen, hashv); \ 162 | } while (0) 163 | 164 | #define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ 165 | do { \ 166 | (out) = NULL; \ 167 | if (head) { \ 168 | unsigned _hf_bkt; \ 169 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ 170 | if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) { \ 171 | HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ 172 | } \ 173 | } \ 174 | } while (0) 175 | 176 | #define HASH_FIND(hh,head,keyptr,keylen,out) \ 177 | do { \ 178 | unsigned _hf_hashv; \ 179 | HASH_VALUE(keyptr, keylen, _hf_hashv); \ 180 | HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ 181 | } while (0) 182 | 183 | #ifdef HASH_BLOOM 184 | #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) 185 | #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) 186 | #define HASH_BLOOM_MAKE(tbl,oomed) \ 187 | do { \ 188 | (tbl)->bloom_nbits = HASH_BLOOM; \ 189 | (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 190 | if (!(tbl)->bloom_bv) { \ 191 | HASH_RECORD_OOM(oomed); \ 192 | } else { \ 193 | uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 194 | (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 195 | } \ 196 | } while (0) 197 | 198 | #define HASH_BLOOM_FREE(tbl) \ 199 | do { \ 200 | uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 201 | } while (0) 202 | 203 | #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) 204 | #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) 205 | 206 | #define HASH_BLOOM_ADD(tbl,hashv) \ 207 | HASH_BLOOM_BITSET((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) 208 | 209 | #define HASH_BLOOM_TEST(tbl,hashv) \ 210 | HASH_BLOOM_BITTEST((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) 211 | 212 | #else 213 | #define HASH_BLOOM_MAKE(tbl,oomed) 214 | #define HASH_BLOOM_FREE(tbl) 215 | #define HASH_BLOOM_ADD(tbl,hashv) 216 | #define HASH_BLOOM_TEST(tbl,hashv) (1) 217 | #define HASH_BLOOM_BYTELEN 0U 218 | #endif 219 | 220 | #define HASH_MAKE_TABLE(hh,head,oomed) \ 221 | do { \ 222 | (head)->hh.tbl = (UT_hash_table*)uthash_malloc(sizeof(UT_hash_table)); \ 223 | if (!(head)->hh.tbl) { \ 224 | HASH_RECORD_OOM(oomed); \ 225 | } else { \ 226 | uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ 227 | (head)->hh.tbl->tail = &((head)->hh); \ 228 | (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 229 | (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 230 | (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 231 | (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 232 | HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ 233 | (head)->hh.tbl->signature = HASH_SIGNATURE; \ 234 | if (!(head)->hh.tbl->buckets) { \ 235 | HASH_RECORD_OOM(oomed); \ 236 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 237 | } else { \ 238 | uthash_bzero((head)->hh.tbl->buckets, \ 239 | HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ 240 | HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ 241 | IF_HASH_NONFATAL_OOM( \ 242 | if (oomed) { \ 243 | uthash_free((head)->hh.tbl->buckets, \ 244 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 245 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 246 | } \ 247 | ) \ 248 | } \ 249 | } \ 250 | } while (0) 251 | 252 | #define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ 253 | do { \ 254 | (replaced) = NULL; \ 255 | HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ 256 | if (replaced) { \ 257 | HASH_DELETE(hh, head, replaced); \ 258 | } \ 259 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ 260 | } while (0) 261 | 262 | #define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ 263 | do { \ 264 | (replaced) = NULL; \ 265 | HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ 266 | if (replaced) { \ 267 | HASH_DELETE(hh, head, replaced); \ 268 | } \ 269 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ 270 | } while (0) 271 | 272 | #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ 273 | do { \ 274 | unsigned _hr_hashv; \ 275 | HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ 276 | HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ 277 | } while (0) 278 | 279 | #define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ 280 | do { \ 281 | unsigned _hr_hashv; \ 282 | HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ 283 | HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ 284 | } while (0) 285 | 286 | #define HASH_APPEND_LIST(hh, head, add) \ 287 | do { \ 288 | (add)->hh.next = NULL; \ 289 | (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 290 | (head)->hh.tbl->tail->next = (add); \ 291 | (head)->hh.tbl->tail = &((add)->hh); \ 292 | } while (0) 293 | 294 | #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ 295 | do { \ 296 | do { \ 297 | if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) { \ 298 | break; \ 299 | } \ 300 | } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ 301 | } while (0) 302 | 303 | #ifdef NO_DECLTYPE 304 | #undef HASH_AKBI_INNER_LOOP 305 | #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ 306 | do { \ 307 | char *_hs_saved_head = (char*)(head); \ 308 | do { \ 309 | DECLTYPE_ASSIGN(head, _hs_iter); \ 310 | if (cmpfcn(head, add) > 0) { \ 311 | DECLTYPE_ASSIGN(head, _hs_saved_head); \ 312 | break; \ 313 | } \ 314 | DECLTYPE_ASSIGN(head, _hs_saved_head); \ 315 | } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ 316 | } while (0) 317 | #endif 318 | 319 | #if HASH_NONFATAL_OOM 320 | 321 | #define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ 322 | do { \ 323 | if (!(oomed)) { \ 324 | unsigned _ha_bkt; \ 325 | (head)->hh.tbl->num_items++; \ 326 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ 327 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ 328 | if (oomed) { \ 329 | HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ 330 | HASH_DELETE_HH(hh, head, &(add)->hh); \ 331 | (add)->hh.tbl = NULL; \ 332 | uthash_nonfatal_oom(add); \ 333 | } else { \ 334 | HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ 335 | HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ 336 | } \ 337 | } else { \ 338 | (add)->hh.tbl = NULL; \ 339 | uthash_nonfatal_oom(add); \ 340 | } \ 341 | } while (0) 342 | 343 | #else 344 | 345 | #define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ 346 | do { \ 347 | unsigned _ha_bkt; \ 348 | (head)->hh.tbl->num_items++; \ 349 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ 350 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ 351 | HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ 352 | HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ 353 | } while (0) 354 | 355 | #endif 356 | 357 | 358 | #define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ 359 | do { \ 360 | IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ 361 | (add)->hh.hashv = (hashval); \ 362 | (add)->hh.key = (char*) (keyptr); \ 363 | (add)->hh.keylen = (unsigned) (keylen_in); \ 364 | if (!(head)) { \ 365 | (add)->hh.next = NULL; \ 366 | (add)->hh.prev = NULL; \ 367 | HASH_MAKE_TABLE(hh, add, _ha_oomed); \ 368 | IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ 369 | (head) = (add); \ 370 | IF_HASH_NONFATAL_OOM( } ) \ 371 | } else { \ 372 | void *_hs_iter = (head); \ 373 | (add)->hh.tbl = (head)->hh.tbl; \ 374 | HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn); \ 375 | if (_hs_iter) { \ 376 | (add)->hh.next = _hs_iter; \ 377 | if (((add)->hh.prev = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) { \ 378 | HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = (add); \ 379 | } else { \ 380 | (head) = (add); \ 381 | } \ 382 | HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add); \ 383 | } else { \ 384 | HASH_APPEND_LIST(hh, head, add); \ 385 | } \ 386 | } \ 387 | HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ 388 | HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ 389 | } while (0) 390 | 391 | #define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ 392 | do { \ 393 | unsigned _hs_hashv; \ 394 | HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ 395 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ 396 | } while (0) 397 | 398 | #define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ 399 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) 400 | 401 | #define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ 402 | HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) 403 | 404 | #define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ 405 | do { \ 406 | IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ 407 | (add)->hh.hashv = (hashval); \ 408 | (add)->hh.key = (char*) (keyptr); \ 409 | (add)->hh.keylen = (unsigned) (keylen_in); \ 410 | if (!(head)) { \ 411 | (add)->hh.next = NULL; \ 412 | (add)->hh.prev = NULL; \ 413 | HASH_MAKE_TABLE(hh, add, _ha_oomed); \ 414 | IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ 415 | (head) = (add); \ 416 | IF_HASH_NONFATAL_OOM( } ) \ 417 | } else { \ 418 | (add)->hh.tbl = (head)->hh.tbl; \ 419 | HASH_APPEND_LIST(hh, head, add); \ 420 | } \ 421 | HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ 422 | HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ 423 | } while (0) 424 | 425 | #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 426 | do { \ 427 | unsigned _ha_hashv; \ 428 | HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ 429 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ 430 | } while (0) 431 | 432 | #define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ 433 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) 434 | 435 | #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 436 | HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) 437 | 438 | #define HASH_TO_BKT(hashv,num_bkts,bkt) \ 439 | do { \ 440 | bkt = ((hashv) & ((num_bkts) - 1U)); \ 441 | } while (0) 442 | 443 | /* delete "delptr" from the hash table. 444 | * "the usual" patch-up process for the app-order doubly-linked-list. 445 | * The use of _hd_hh_del below deserves special explanation. 446 | * These used to be expressed using (delptr) but that led to a bug 447 | * if someone used the same symbol for the head and deletee, like 448 | * HASH_DELETE(hh,users,users); 449 | * We want that to work, but by changing the head (users) below 450 | * we were forfeiting our ability to further refer to the deletee (users) 451 | * in the patch-up process. Solution: use scratch space to 452 | * copy the deletee pointer, then the latter references are via that 453 | * scratch pointer rather than through the repointed (users) symbol. 454 | */ 455 | #define HASH_DELETE(hh,head,delptr) \ 456 | HASH_DELETE_HH(hh, head, &(delptr)->hh) 457 | 458 | #define HASH_DELETE_HH(hh,head,delptrhh) \ 459 | do { \ 460 | struct UT_hash_handle *_hd_hh_del = (delptrhh); \ 461 | if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) { \ 462 | HASH_BLOOM_FREE((head)->hh.tbl); \ 463 | uthash_free((head)->hh.tbl->buckets, \ 464 | (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 465 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 466 | (head) = NULL; \ 467 | } else { \ 468 | unsigned _hd_bkt; \ 469 | if (_hd_hh_del == (head)->hh.tbl->tail) { \ 470 | (head)->hh.tbl->tail = HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ 471 | } \ 472 | if (_hd_hh_del->prev != NULL) { \ 473 | HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = _hd_hh_del->next; \ 474 | } else { \ 475 | DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ 476 | } \ 477 | if (_hd_hh_del->next != NULL) { \ 478 | HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = _hd_hh_del->prev; \ 479 | } \ 480 | HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 481 | HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 482 | (head)->hh.tbl->num_items--; \ 483 | } \ 484 | HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ 485 | } while (0) 486 | 487 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 488 | #define HASH_FIND_STR(head,findstr,out) \ 489 | do { \ 490 | unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ 491 | HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ 492 | } while (0) 493 | #define HASH_ADD_STR(head,strfield,add) \ 494 | do { \ 495 | unsigned _uthash_hastr_keylen = (unsigned)uthash_strlen((add)->strfield); \ 496 | HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ 497 | } while (0) 498 | #define HASH_REPLACE_STR(head,strfield,add,replaced) \ 499 | do { \ 500 | unsigned _uthash_hrstr_keylen = (unsigned)uthash_strlen((add)->strfield); \ 501 | HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, replaced); \ 502 | } while (0) 503 | #define HASH_FIND_INT(head,findint,out) \ 504 | HASH_FIND(hh,head,findint,sizeof(int),out) 505 | #define HASH_ADD_INT(head,intfield,add) \ 506 | HASH_ADD(hh,head,intfield,sizeof(int),add) 507 | #define HASH_REPLACE_INT(head,intfield,add,replaced) \ 508 | HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) 509 | #define HASH_FIND_PTR(head,findptr,out) \ 510 | HASH_FIND(hh,head,findptr,sizeof(void *),out) 511 | #define HASH_ADD_PTR(head,ptrfield,add) \ 512 | HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 513 | #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ 514 | HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) 515 | #define HASH_DEL(head,delptr) \ 516 | HASH_DELETE(hh,head,delptr) 517 | 518 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 519 | * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 520 | */ 521 | #ifdef HASH_DEBUG 522 | #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 523 | #define HASH_FSCK(hh,head,where) \ 524 | do { \ 525 | struct UT_hash_handle *_thh; \ 526 | if (head) { \ 527 | unsigned _bkt_i; \ 528 | unsigned _count = 0; \ 529 | char *_prev; \ 530 | for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) { \ 531 | unsigned _bkt_count = 0; \ 532 | _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 533 | _prev = NULL; \ 534 | while (_thh) { \ 535 | if (_prev != (char*)(_thh->hh_prev)) { \ 536 | HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ 537 | (where), (void*)_thh->hh_prev, (void*)_prev); \ 538 | } \ 539 | _bkt_count++; \ 540 | _prev = (char*)(_thh); \ 541 | _thh = _thh->hh_next; \ 542 | } \ 543 | _count += _bkt_count; \ 544 | if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 545 | HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ 546 | (where), (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 547 | } \ 548 | } \ 549 | if (_count != (head)->hh.tbl->num_items) { \ 550 | HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ 551 | (where), (head)->hh.tbl->num_items, _count); \ 552 | } \ 553 | _count = 0; \ 554 | _prev = NULL; \ 555 | _thh = &(head)->hh; \ 556 | while (_thh) { \ 557 | _count++; \ 558 | if (_prev != (char*)_thh->prev) { \ 559 | HASH_OOPS("%s: invalid prev %p, actual %p\n", \ 560 | (where), (void*)_thh->prev, (void*)_prev); \ 561 | } \ 562 | _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 563 | _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) : NULL); \ 564 | } \ 565 | if (_count != (head)->hh.tbl->num_items) { \ 566 | HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ 567 | (where), (head)->hh.tbl->num_items, _count); \ 568 | } \ 569 | } \ 570 | } while (0) 571 | #else 572 | #define HASH_FSCK(hh,head,where) 573 | #endif 574 | 575 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 576 | * the descriptor to which this macro is defined for tuning the hash function. 577 | * The app can #include to get the prototype for write(2). */ 578 | #ifdef HASH_EMIT_KEYS 579 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 580 | do { \ 581 | unsigned _klen = fieldlen; \ 582 | write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 583 | write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ 584 | } while (0) 585 | #else 586 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 587 | #endif 588 | 589 | /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 590 | #ifdef HASH_FUNCTION 591 | #define HASH_FCN HASH_FUNCTION 592 | #else 593 | #define HASH_FCN HASH_JEN 594 | #endif 595 | 596 | /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ 597 | #define HASH_BER(key,keylen,hashv) \ 598 | do { \ 599 | unsigned _hb_keylen = (unsigned)keylen; \ 600 | const unsigned char *_hb_key = (const unsigned char*)(key); \ 601 | (hashv) = 0; \ 602 | while (_hb_keylen-- != 0U) { \ 603 | (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ 604 | } \ 605 | } while (0) 606 | 607 | 608 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 609 | * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 610 | #define HASH_SAX(key,keylen,hashv) \ 611 | do { \ 612 | unsigned _sx_i; \ 613 | const unsigned char *_hs_key = (const unsigned char*)(key); \ 614 | hashv = 0; \ 615 | for (_sx_i=0; _sx_i < keylen; _sx_i++) { \ 616 | hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 617 | } \ 618 | } while (0) 619 | /* FNV-1a variation */ 620 | #define HASH_FNV(key,keylen,hashv) \ 621 | do { \ 622 | unsigned _fn_i; \ 623 | const unsigned char *_hf_key = (const unsigned char*)(key); \ 624 | (hashv) = 2166136261U; \ 625 | for (_fn_i=0; _fn_i < keylen; _fn_i++) { \ 626 | hashv = hashv ^ _hf_key[_fn_i]; \ 627 | hashv = hashv * 16777619U; \ 628 | } \ 629 | } while (0) 630 | 631 | #define HASH_OAT(key,keylen,hashv) \ 632 | do { \ 633 | unsigned _ho_i; \ 634 | const unsigned char *_ho_key=(const unsigned char*)(key); \ 635 | hashv = 0; \ 636 | for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 637 | hashv += _ho_key[_ho_i]; \ 638 | hashv += (hashv << 10); \ 639 | hashv ^= (hashv >> 6); \ 640 | } \ 641 | hashv += (hashv << 3); \ 642 | hashv ^= (hashv >> 11); \ 643 | hashv += (hashv << 15); \ 644 | } while (0) 645 | 646 | #define HASH_JEN_MIX(a,b,c) \ 647 | do { \ 648 | a -= b; a -= c; a ^= ( c >> 13 ); \ 649 | b -= c; b -= a; b ^= ( a << 8 ); \ 650 | c -= a; c -= b; c ^= ( b >> 13 ); \ 651 | a -= b; a -= c; a ^= ( c >> 12 ); \ 652 | b -= c; b -= a; b ^= ( a << 16 ); \ 653 | c -= a; c -= b; c ^= ( b >> 5 ); \ 654 | a -= b; a -= c; a ^= ( c >> 3 ); \ 655 | b -= c; b -= a; b ^= ( a << 10 ); \ 656 | c -= a; c -= b; c ^= ( b >> 15 ); \ 657 | } while (0) 658 | 659 | #define HASH_JEN(key,keylen,hashv) \ 660 | do { \ 661 | unsigned _hj_i,_hj_j,_hj_k; \ 662 | unsigned const char *_hj_key=(unsigned const char*)(key); \ 663 | hashv = 0xfeedbeefu; \ 664 | _hj_i = _hj_j = 0x9e3779b9u; \ 665 | _hj_k = (unsigned)(keylen); \ 666 | while (_hj_k >= 12U) { \ 667 | _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 668 | + ( (unsigned)_hj_key[2] << 16 ) \ 669 | + ( (unsigned)_hj_key[3] << 24 ) ); \ 670 | _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 671 | + ( (unsigned)_hj_key[6] << 16 ) \ 672 | + ( (unsigned)_hj_key[7] << 24 ) ); \ 673 | hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 674 | + ( (unsigned)_hj_key[10] << 16 ) \ 675 | + ( (unsigned)_hj_key[11] << 24 ) ); \ 676 | \ 677 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 678 | \ 679 | _hj_key += 12; \ 680 | _hj_k -= 12U; \ 681 | } \ 682 | hashv += (unsigned)(keylen); \ 683 | switch ( _hj_k ) { \ 684 | case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ 685 | case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ 686 | case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ 687 | case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ 688 | case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ 689 | case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ 690 | case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ 691 | case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ 692 | case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ 693 | case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ 694 | case 1: _hj_i += _hj_key[0]; \ 695 | } \ 696 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 697 | } while (0) 698 | 699 | /* The Paul Hsieh hash function */ 700 | #undef get16bits 701 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 702 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 703 | #define get16bits(d) (*((const uint16_t *) (d))) 704 | #endif 705 | 706 | #if !defined (get16bits) 707 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 708 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 709 | #endif 710 | #define HASH_SFH(key,keylen,hashv) \ 711 | do { \ 712 | unsigned const char *_sfh_key=(unsigned const char*)(key); \ 713 | uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ 714 | \ 715 | unsigned _sfh_rem = _sfh_len & 3U; \ 716 | _sfh_len >>= 2; \ 717 | hashv = 0xcafebabeu; \ 718 | \ 719 | /* Main loop */ \ 720 | for (;_sfh_len > 0U; _sfh_len--) { \ 721 | hashv += get16bits (_sfh_key); \ 722 | _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ 723 | hashv = (hashv << 16) ^ _sfh_tmp; \ 724 | _sfh_key += 2U*sizeof (uint16_t); \ 725 | hashv += hashv >> 11; \ 726 | } \ 727 | \ 728 | /* Handle end cases */ \ 729 | switch (_sfh_rem) { \ 730 | case 3: hashv += get16bits (_sfh_key); \ 731 | hashv ^= hashv << 16; \ 732 | hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ 733 | hashv += hashv >> 11; \ 734 | break; \ 735 | case 2: hashv += get16bits (_sfh_key); \ 736 | hashv ^= hashv << 11; \ 737 | hashv += hashv >> 17; \ 738 | break; \ 739 | case 1: hashv += *_sfh_key; \ 740 | hashv ^= hashv << 10; \ 741 | hashv += hashv >> 1; \ 742 | } \ 743 | \ 744 | /* Force "avalanching" of final 127 bits */ \ 745 | hashv ^= hashv << 3; \ 746 | hashv += hashv >> 5; \ 747 | hashv ^= hashv << 4; \ 748 | hashv += hashv >> 17; \ 749 | hashv ^= hashv << 25; \ 750 | hashv += hashv >> 6; \ 751 | } while (0) 752 | 753 | #ifdef HASH_USING_NO_STRICT_ALIASING 754 | /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 755 | * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 756 | * MurmurHash uses the faster approach only on CPU's where we know it's safe. 757 | * 758 | * Note the preprocessor built-in defines can be emitted using: 759 | * 760 | * gcc -m64 -dM -E - < /dev/null (on gcc) 761 | * cc -## a.c (where a.c is a simple test file) (Sun Studio) 762 | */ 763 | #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) 764 | #define MUR_GETBLOCK(p,i) p[i] 765 | #else /* non intel */ 766 | #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) 767 | #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) 768 | #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) 769 | #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) 770 | #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 771 | #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 772 | #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 773 | #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 774 | #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 775 | #else /* assume little endian non-intel */ 776 | #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 777 | #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 778 | #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 779 | #endif 780 | #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 781 | (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 782 | (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 783 | MUR_ONE_THREE(p)))) 784 | #endif 785 | #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 786 | #define MUR_FMIX(_h) \ 787 | do { \ 788 | _h ^= _h >> 16; \ 789 | _h *= 0x85ebca6bu; \ 790 | _h ^= _h >> 13; \ 791 | _h *= 0xc2b2ae35u; \ 792 | _h ^= _h >> 16; \ 793 | } while (0) 794 | 795 | #define HASH_MUR(key,keylen,hashv) \ 796 | do { \ 797 | const uint8_t *_mur_data = (const uint8_t*)(key); \ 798 | const int _mur_nblocks = (int)(keylen) / 4; \ 799 | uint32_t _mur_h1 = 0xf88D5353u; \ 800 | uint32_t _mur_c1 = 0xcc9e2d51u; \ 801 | uint32_t _mur_c2 = 0x1b873593u; \ 802 | uint32_t _mur_k1 = 0; \ 803 | const uint8_t *_mur_tail; \ 804 | const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ 805 | int _mur_i; \ 806 | for (_mur_i = -_mur_nblocks; _mur_i != 0; _mur_i++) { \ 807 | _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 808 | _mur_k1 *= _mur_c1; \ 809 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 810 | _mur_k1 *= _mur_c2; \ 811 | \ 812 | _mur_h1 ^= _mur_k1; \ 813 | _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 814 | _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ 815 | } \ 816 | _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ 817 | _mur_k1=0; \ 818 | switch ((keylen) & 3U) { \ 819 | case 0: break; \ 820 | case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ 821 | case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ 822 | case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ 823 | _mur_k1 *= _mur_c1; \ 824 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 825 | _mur_k1 *= _mur_c2; \ 826 | _mur_h1 ^= _mur_k1; \ 827 | } \ 828 | _mur_h1 ^= (uint32_t)(keylen); \ 829 | MUR_FMIX(_mur_h1); \ 830 | hashv = _mur_h1; \ 831 | } while (0) 832 | #endif /* HASH_USING_NO_STRICT_ALIASING */ 833 | 834 | /* iterate over items in a known bucket to find desired item */ 835 | #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ 836 | do { \ 837 | if ((head).hh_head != NULL) { \ 838 | DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ 839 | } else { \ 840 | (out) = NULL; \ 841 | } \ 842 | while ((out) != NULL) { \ 843 | if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ 844 | if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) { \ 845 | break; \ 846 | } \ 847 | } \ 848 | if ((out)->hh.hh_next != NULL) { \ 849 | DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ 850 | } else { \ 851 | (out) = NULL; \ 852 | } \ 853 | } \ 854 | } while (0) 855 | 856 | /* add an item to a bucket */ 857 | #define HASH_ADD_TO_BKT(head,hh,addhh,oomed) \ 858 | do { \ 859 | UT_hash_bucket *_ha_head = &(head); \ 860 | _ha_head->count++; \ 861 | (addhh)->hh_next = _ha_head->hh_head; \ 862 | (addhh)->hh_prev = NULL; \ 863 | if (_ha_head->hh_head != NULL) { \ 864 | _ha_head->hh_head->hh_prev = (addhh); \ 865 | } \ 866 | _ha_head->hh_head = (addhh); \ 867 | if ((_ha_head->count >= ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) \ 868 | && !(addhh)->tbl->noexpand) { \ 869 | HASH_EXPAND_BUCKETS(addhh,(addhh)->tbl, oomed); \ 870 | IF_HASH_NONFATAL_OOM( \ 871 | if (oomed) { \ 872 | HASH_DEL_IN_BKT(head,addhh); \ 873 | } \ 874 | ) \ 875 | } \ 876 | } while (0) 877 | 878 | /* remove an item from a given bucket */ 879 | #define HASH_DEL_IN_BKT(head,delhh) \ 880 | do { \ 881 | UT_hash_bucket *_hd_head = &(head); \ 882 | _hd_head->count--; \ 883 | if (_hd_head->hh_head == (delhh)) { \ 884 | _hd_head->hh_head = (delhh)->hh_next; \ 885 | } \ 886 | if ((delhh)->hh_prev) { \ 887 | (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ 888 | } \ 889 | if ((delhh)->hh_next) { \ 890 | (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ 891 | } \ 892 | } while (0) 893 | 894 | /* Bucket expansion has the effect of doubling the number of buckets 895 | * and redistributing the items into the new buckets. Ideally the 896 | * items will distribute more or less evenly into the new buckets 897 | * (the extent to which this is true is a measure of the quality of 898 | * the hash function as it applies to the key domain). 899 | * 900 | * With the items distributed into more buckets, the chain length 901 | * (item count) in each bucket is reduced. Thus by expanding buckets 902 | * the hash keeps a bound on the chain length. This bounded chain 903 | * length is the essence of how a hash provides constant time lookup. 904 | * 905 | * The calculation of tbl->ideal_chain_maxlen below deserves some 906 | * explanation. First, keep in mind that we're calculating the ideal 907 | * maximum chain length based on the *new* (doubled) bucket count. 908 | * In fractions this is just n/b (n=number of items,b=new num buckets). 909 | * Since the ideal chain length is an integer, we want to calculate 910 | * ceil(n/b). We don't depend on floating point arithmetic in this 911 | * hash, so to calculate ceil(n/b) with integers we could write 912 | * 913 | * ceil(n/b) = (n/b) + ((n%b)?1:0) 914 | * 915 | * and in fact a previous version of this hash did just that. 916 | * But now we have improved things a bit by recognizing that b is 917 | * always a power of two. We keep its base 2 log handy (call it lb), 918 | * so now we can write this with a bit shift and logical AND: 919 | * 920 | * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 921 | * 922 | */ 923 | #define HASH_EXPAND_BUCKETS(hh,tbl,oomed) \ 924 | do { \ 925 | unsigned _he_bkt; \ 926 | unsigned _he_bkt_i; \ 927 | struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 928 | UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 929 | _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 930 | 2UL * (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ 931 | if (!_he_new_buckets) { \ 932 | HASH_RECORD_OOM(oomed); \ 933 | } else { \ 934 | uthash_bzero(_he_new_buckets, \ 935 | 2UL * (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ 936 | (tbl)->ideal_chain_maxlen = \ 937 | ((tbl)->num_items >> ((tbl)->log2_num_buckets+1U)) + \ 938 | ((((tbl)->num_items & (((tbl)->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ 939 | (tbl)->nonideal_items = 0; \ 940 | for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) { \ 941 | _he_thh = (tbl)->buckets[ _he_bkt_i ].hh_head; \ 942 | while (_he_thh != NULL) { \ 943 | _he_hh_nxt = _he_thh->hh_next; \ 944 | HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, _he_bkt); \ 945 | _he_newbkt = &(_he_new_buckets[_he_bkt]); \ 946 | if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) { \ 947 | (tbl)->nonideal_items++; \ 948 | if (_he_newbkt->count > _he_newbkt->expand_mult * (tbl)->ideal_chain_maxlen) { \ 949 | _he_newbkt->expand_mult++; \ 950 | } \ 951 | } \ 952 | _he_thh->hh_prev = NULL; \ 953 | _he_thh->hh_next = _he_newbkt->hh_head; \ 954 | if (_he_newbkt->hh_head != NULL) { \ 955 | _he_newbkt->hh_head->hh_prev = _he_thh; \ 956 | } \ 957 | _he_newbkt->hh_head = _he_thh; \ 958 | _he_thh = _he_hh_nxt; \ 959 | } \ 960 | } \ 961 | uthash_free((tbl)->buckets, (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ 962 | (tbl)->num_buckets *= 2U; \ 963 | (tbl)->log2_num_buckets++; \ 964 | (tbl)->buckets = _he_new_buckets; \ 965 | (tbl)->ineff_expands = ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) ? \ 966 | ((tbl)->ineff_expands+1U) : 0U; \ 967 | if ((tbl)->ineff_expands > 1U) { \ 968 | (tbl)->noexpand = 1; \ 969 | uthash_noexpand_fyi(tbl); \ 970 | } \ 971 | uthash_expand_fyi(tbl); \ 972 | } \ 973 | } while (0) 974 | 975 | 976 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 977 | /* Note that HASH_SORT assumes the hash handle name to be hh. 978 | * HASH_SRT was added to allow the hash handle name to be passed in. */ 979 | #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 980 | #define HASH_SRT(hh,head,cmpfcn) \ 981 | do { \ 982 | unsigned _hs_i; \ 983 | unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 984 | struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 985 | if (head != NULL) { \ 986 | _hs_insize = 1; \ 987 | _hs_looping = 1; \ 988 | _hs_list = &((head)->hh); \ 989 | while (_hs_looping != 0U) { \ 990 | _hs_p = _hs_list; \ 991 | _hs_list = NULL; \ 992 | _hs_tail = NULL; \ 993 | _hs_nmerges = 0; \ 994 | while (_hs_p != NULL) { \ 995 | _hs_nmerges++; \ 996 | _hs_q = _hs_p; \ 997 | _hs_psize = 0; \ 998 | for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) { \ 999 | _hs_psize++; \ 1000 | _hs_q = ((_hs_q->next != NULL) ? \ 1001 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 1002 | if (_hs_q == NULL) { \ 1003 | break; \ 1004 | } \ 1005 | } \ 1006 | _hs_qsize = _hs_insize; \ 1007 | while ((_hs_psize != 0U) || ((_hs_qsize != 0U) && (_hs_q != NULL))) { \ 1008 | if (_hs_psize == 0U) { \ 1009 | _hs_e = _hs_q; \ 1010 | _hs_q = ((_hs_q->next != NULL) ? \ 1011 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 1012 | _hs_qsize--; \ 1013 | } else if ((_hs_qsize == 0U) || (_hs_q == NULL)) { \ 1014 | _hs_e = _hs_p; \ 1015 | if (_hs_p != NULL) { \ 1016 | _hs_p = ((_hs_p->next != NULL) ? \ 1017 | HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ 1018 | } \ 1019 | _hs_psize--; \ 1020 | } else if ((cmpfcn( \ 1021 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)), \ 1022 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q)) \ 1023 | )) <= 0) { \ 1024 | _hs_e = _hs_p; \ 1025 | if (_hs_p != NULL) { \ 1026 | _hs_p = ((_hs_p->next != NULL) ? \ 1027 | HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ 1028 | } \ 1029 | _hs_psize--; \ 1030 | } else { \ 1031 | _hs_e = _hs_q; \ 1032 | _hs_q = ((_hs_q->next != NULL) ? \ 1033 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 1034 | _hs_qsize--; \ 1035 | } \ 1036 | if ( _hs_tail != NULL ) { \ 1037 | _hs_tail->next = ((_hs_e != NULL) ? \ 1038 | ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL); \ 1039 | } else { \ 1040 | _hs_list = _hs_e; \ 1041 | } \ 1042 | if (_hs_e != NULL) { \ 1043 | _hs_e->prev = ((_hs_tail != NULL) ? \ 1044 | ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL); \ 1045 | } \ 1046 | _hs_tail = _hs_e; \ 1047 | } \ 1048 | _hs_p = _hs_q; \ 1049 | } \ 1050 | if (_hs_tail != NULL) { \ 1051 | _hs_tail->next = NULL; \ 1052 | } \ 1053 | if (_hs_nmerges <= 1U) { \ 1054 | _hs_looping = 0; \ 1055 | (head)->hh.tbl->tail = _hs_tail; \ 1056 | DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 1057 | } \ 1058 | _hs_insize *= 2U; \ 1059 | } \ 1060 | HASH_FSCK(hh, head, "HASH_SRT"); \ 1061 | } \ 1062 | } while (0) 1063 | 1064 | /* This function selects items from one hash into another hash. 1065 | * The end result is that the selected items have dual presence 1066 | * in both hashes. There is no copy of the items made; rather 1067 | * they are added into the new hash through a secondary hash 1068 | * hash handle that must be present in the structure. */ 1069 | #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 1070 | do { \ 1071 | unsigned _src_bkt, _dst_bkt; \ 1072 | void *_last_elt = NULL, *_elt; \ 1073 | UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 1074 | ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 1075 | if ((src) != NULL) { \ 1076 | for (_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 1077 | for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 1078 | _src_hh != NULL; \ 1079 | _src_hh = _src_hh->hh_next) { \ 1080 | _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 1081 | if (cond(_elt)) { \ 1082 | IF_HASH_NONFATAL_OOM( int _hs_oomed = 0; ) \ 1083 | _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 1084 | _dst_hh->key = _src_hh->key; \ 1085 | _dst_hh->keylen = _src_hh->keylen; \ 1086 | _dst_hh->hashv = _src_hh->hashv; \ 1087 | _dst_hh->prev = _last_elt; \ 1088 | _dst_hh->next = NULL; \ 1089 | if (_last_elt_hh != NULL) { \ 1090 | _last_elt_hh->next = _elt; \ 1091 | } \ 1092 | if ((dst) == NULL) { \ 1093 | DECLTYPE_ASSIGN(dst, _elt); \ 1094 | HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ 1095 | IF_HASH_NONFATAL_OOM( \ 1096 | if (_hs_oomed) { \ 1097 | uthash_nonfatal_oom(_elt); \ 1098 | (dst) = NULL; \ 1099 | continue; \ 1100 | } \ 1101 | ) \ 1102 | } else { \ 1103 | _dst_hh->tbl = (dst)->hh_dst.tbl; \ 1104 | } \ 1105 | HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 1106 | HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], hh_dst, _dst_hh, _hs_oomed); \ 1107 | (dst)->hh_dst.tbl->num_items++; \ 1108 | IF_HASH_NONFATAL_OOM( \ 1109 | if (_hs_oomed) { \ 1110 | HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ 1111 | HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ 1112 | _dst_hh->tbl = NULL; \ 1113 | uthash_nonfatal_oom(_elt); \ 1114 | continue; \ 1115 | } \ 1116 | ) \ 1117 | HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ 1118 | _last_elt = _elt; \ 1119 | _last_elt_hh = _dst_hh; \ 1120 | } \ 1121 | } \ 1122 | } \ 1123 | } \ 1124 | HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ 1125 | } while (0) 1126 | 1127 | #define HASH_CLEAR(hh,head) \ 1128 | do { \ 1129 | if ((head) != NULL) { \ 1130 | HASH_BLOOM_FREE((head)->hh.tbl); \ 1131 | uthash_free((head)->hh.tbl->buckets, \ 1132 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 1133 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 1134 | (head) = NULL; \ 1135 | } \ 1136 | } while (0) 1137 | 1138 | #define HASH_OVERHEAD(hh,head) \ 1139 | (((head) != NULL) ? ( \ 1140 | (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ 1141 | ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ 1142 | sizeof(UT_hash_table) + \ 1143 | (HASH_BLOOM_BYTELEN))) : 0U) 1144 | 1145 | #ifdef NO_DECLTYPE 1146 | #define HASH_ITER(hh,head,el,tmp) \ 1147 | for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ 1148 | (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) 1149 | #else 1150 | #define HASH_ITER(hh,head,el,tmp) \ 1151 | for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ 1152 | (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) 1153 | #endif 1154 | 1155 | /* obtain a count of items in the hash */ 1156 | #define HASH_COUNT(head) HASH_CNT(hh,head) 1157 | #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) 1158 | 1159 | typedef struct UT_hash_bucket { 1160 | struct UT_hash_handle *hh_head; 1161 | unsigned count; 1162 | 1163 | /* expand_mult is normally set to 0. In this situation, the max chain length 1164 | * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 1165 | * the bucket's chain exceeds this length, bucket expansion is triggered). 1166 | * However, setting expand_mult to a non-zero value delays bucket expansion 1167 | * (that would be triggered by additions to this particular bucket) 1168 | * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 1169 | * (The multiplier is simply expand_mult+1). The whole idea of this 1170 | * multiplier is to reduce bucket expansions, since they are expensive, in 1171 | * situations where we know that a particular bucket tends to be overused. 1172 | * It is better to let its chain length grow to a longer yet-still-bounded 1173 | * value, than to do an O(n) bucket expansion too often. 1174 | */ 1175 | unsigned expand_mult; 1176 | 1177 | } UT_hash_bucket; 1178 | 1179 | /* random signature used only to find hash tables in external analysis */ 1180 | #define HASH_SIGNATURE 0xa0111fe1u 1181 | #define HASH_BLOOM_SIGNATURE 0xb12220f2u 1182 | 1183 | typedef struct UT_hash_table { 1184 | UT_hash_bucket *buckets; 1185 | unsigned num_buckets, log2_num_buckets; 1186 | unsigned num_items; 1187 | struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 1188 | ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 1189 | 1190 | /* in an ideal situation (all buckets used equally), no bucket would have 1191 | * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 1192 | unsigned ideal_chain_maxlen; 1193 | 1194 | /* nonideal_items is the number of items in the hash whose chain position 1195 | * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 1196 | * hash distribution; reaching them in a chain traversal takes >ideal steps */ 1197 | unsigned nonideal_items; 1198 | 1199 | /* ineffective expands occur when a bucket doubling was performed, but 1200 | * afterward, more than half the items in the hash had nonideal chain 1201 | * positions. If this happens on two consecutive expansions we inhibit any 1202 | * further expansion, as it's not helping; this happens when the hash 1203 | * function isn't a good fit for the key domain. When expansion is inhibited 1204 | * the hash will still work, albeit no longer in constant time. */ 1205 | unsigned ineff_expands, noexpand; 1206 | 1207 | uint32_t signature; /* used only to find hash tables in external analysis */ 1208 | #ifdef HASH_BLOOM 1209 | uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 1210 | uint8_t *bloom_bv; 1211 | uint8_t bloom_nbits; 1212 | #endif 1213 | 1214 | } UT_hash_table; 1215 | 1216 | typedef struct UT_hash_handle { 1217 | struct UT_hash_table *tbl; 1218 | void *prev; /* prev element in app order */ 1219 | void *next; /* next element in app order */ 1220 | struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 1221 | struct UT_hash_handle *hh_next; /* next hh in bucket order */ 1222 | void *key; /* ptr to enclosing struct's key */ 1223 | unsigned keylen; /* enclosing struct's key len */ 1224 | unsigned hashv; /* result of hash-fcn(key) */ 1225 | } UT_hash_handle; 1226 | 1227 | #endif /* UTHASH_H */ 1228 | -------------------------------------------------------------------------------- /wrap.c: -------------------------------------------------------------------------------- 1 | #include "wrap.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define MAX(x,y) ((x)<(y) ? (y) : (x)) 11 | 12 | static void _wordlist_invariant(const struct wordlist *ws) 13 | { 14 | assert(ws); 15 | assert(ws->len <= ws->_alloced); 16 | #ifndef NDEBUG 17 | for (size_t i = 0; i < ws->len; i++) 18 | { 19 | struct wordlist_entry *w = ws->words+i; 20 | assert(w->len > 0); 21 | assert(strcspn(w->start, " \t") >= (size_t)w->len); 22 | } 23 | #endif 24 | } 25 | 26 | struct wordlist *wordlist_create(void) 27 | { 28 | struct wordlist *ws = malloc(sizeof *ws); 29 | if (!ws) abort(); 30 | ws->words = NULL; 31 | ws->_alloced = ws->len = 0; 32 | _wordlist_invariant(ws); 33 | return ws; 34 | } 35 | 36 | /* underlying string memory is shared by copies */ 37 | void wordlist_append_copy(struct wordlist *ws, const struct wordlist_entry *w) 38 | { 39 | _wordlist_invariant(ws); 40 | if (ws->len >= ws->_alloced) 41 | { 42 | ws->_alloced = ws->_alloced == 0 ? 256 : ws->_alloced * 2; 43 | ws->words = realloc(ws->words, ws->_alloced * sizeof(*w)); 44 | if (!ws->words) abort(); 45 | } 46 | ws->words[ws->len++] = *w; 47 | _wordlist_invariant(ws); 48 | } 49 | 50 | /* TODO: do unicode word break detection with ICU */ 51 | struct wordlist *wordlist_segment(struct wordlist *ws, const char *text) 52 | { 53 | struct wordlist_entry w; 54 | _wordlist_invariant(ws); 55 | while (*text) 56 | { 57 | while (isspace(*text)) 58 | text++; 59 | w.start = text; 60 | while (*text && !isspace(*text)) 61 | text++; 62 | w.len = text - w.start; 63 | if (w.len > 0) 64 | wordlist_append_copy(ws, &w); 65 | while (isspace(*text)) 66 | text++; 67 | } 68 | _wordlist_invariant(ws); 69 | return ws; 70 | } 71 | 72 | /* src and dst entries will share the underlying string memory */ 73 | void wordlist_concat(struct wordlist *dst, const struct wordlist *src) 74 | { 75 | _wordlist_invariant(src); 76 | _wordlist_invariant(dst); 77 | 78 | for (size_t i = 0; i < src->len; i++) 79 | wordlist_append_copy(dst, src->words+i); 80 | 81 | _wordlist_invariant(dst); 82 | } 83 | 84 | /* note: underlying string must be freed by caller */ 85 | void wordlist_free(struct wordlist *ws) 86 | { 87 | _wordlist_invariant(ws); 88 | free(ws->words); 89 | free(ws); 90 | } 91 | 92 | /* the cost (gap until width, squared) of putting words (i..j) on a line of 93 | * their own for all i, j. If they extend past the width, the cost is 94 | * considered infinite. 95 | * 96 | * Representing cost as floating point to get arithmetic that respects 97 | * INFINITY. Also I don't make equality comparisons that could be troublesome. 98 | * 99 | * Caller must free the return value and its first element. 100 | */ 101 | static float **line_costs(const struct wordlist *ws, unsigned width) 102 | { 103 | assert(ws && ws->len > 0); 104 | 105 | size_t i, j; 106 | /* underlying chunk of memory that can be freed in one go */ 107 | float *mem = malloc((sizeof *mem) * ws->len * ws->len); 108 | if (!mem) abort(); 109 | for (i = 0; i < ws->len * ws->len; i++) 110 | mem[i] = INFINITY; 111 | 112 | /* split the memory into rows and columns */ 113 | float **table = malloc((sizeof *table) * ws->len); 114 | if (!table) abort(); 115 | for (i = 0; i < ws->len; i++) 116 | table[i] = mem + (i * ws->len); 117 | 118 | for (i = 0; i < ws->len; i++) 119 | { 120 | unsigned linelen = 0; 121 | for (j = i; j < ws->len; j++) 122 | { 123 | linelen += (unsigned)ws->words[j].len; 124 | if (linelen <= width) 125 | table[i][j] = pow(width - linelen, 2); 126 | else 127 | break; 128 | linelen++; /* space between words */ 129 | } 130 | } 131 | 132 | return table; 133 | } 134 | 135 | /* Use bottom-up dynamic programming to create a list of "parent pointers" of 136 | * word indices before which a break should be placed. The pointers are in this 137 | * case array indices, which you follow like ret[0], ret[ret[0]], ... */ 138 | static size_t *best_breaks(const struct wordlist *ws, unsigned width) 139 | { 140 | assert(ws && ws->len > 0); 141 | 142 | size_t i, j; 143 | float **costs = line_costs(ws, width); 144 | float *min_cost = malloc(1 + ws->len * sizeof *min_cost); 145 | size_t *next_brk = malloc(ws->len * sizeof *next_brk); 146 | if (!costs || !min_cost || !next_brk) abort(); 147 | 148 | for (i = ws->len-1; i < ws->len; i--) 149 | min_cost[i] = INFINITY; 150 | min_cost[ws->len] = 0; /* fake item past the end */ 151 | 152 | /* below 0, unsigned wraps above ws->len */ 153 | for (i = ws->len-1; i < ws->len; i--) 154 | for (j = ws->len-1; i <= j && j < ws->len; j--) 155 | if (costs[i][j] + min_cost[j+1] <= min_cost[i]) 156 | { 157 | min_cost[i] = costs[i][j] + min_cost[j+1]; 158 | next_brk[i] = j+1; 159 | } 160 | 161 | free(*costs); 162 | free(costs); 163 | free(min_cost); 164 | return next_brk; 165 | } 166 | 167 | size_t print_wrapped( 168 | const struct wordlist *ws, const char *overhang, 169 | size_t width, bool flowed) 170 | { 171 | _wordlist_invariant(ws); 172 | if (ws->len == 0) 173 | return 0; 174 | 175 | width -= strlen(overhang); 176 | 177 | size_t i, brk; 178 | unsigned total = 0, longest_line = 0; 179 | size_t *brks = best_breaks(ws, width); 180 | 181 | for (i = 0, brk = *brks; i < ws->len; i++) 182 | { 183 | if (i == brk) 184 | { 185 | total = 0; 186 | brk = brks[i]; 187 | if (i < ws->len) 188 | printf("%s\n%s", flowed ? " " : "", overhang); 189 | else 190 | putchar('\n'); 191 | } 192 | if (total > 0) 193 | putchar(' '); 194 | longest_line = MAX(total + ws->words[i].len, longest_line); 195 | printf("%.*s", (int)ws->words[i].len, ws->words[i].start); 196 | 197 | total += ws->words[i].len + 1; 198 | } 199 | putchar('\n'); 200 | free(brks); 201 | 202 | return longest_line; 203 | } 204 | -------------------------------------------------------------------------------- /wrap.h: -------------------------------------------------------------------------------- 1 | #ifndef MIMEDOWN_WRAP_H 2 | #define MIMEDOWN_WRAP_H 3 | 4 | #include 5 | #include 6 | 7 | struct wordlist_entry 8 | { 9 | const char *start; 10 | ptrdiff_t len; 11 | }; 12 | 13 | struct wordlist 14 | { 15 | struct wordlist_entry *words; 16 | size_t len, _alloced; 17 | }; 18 | 19 | struct wordlist *wordlist_create(void); 20 | 21 | void wordlist_append_copy(struct wordlist *ws, const struct wordlist_entry *w); 22 | struct wordlist *wordlist_segment(struct wordlist *ws, const char *text); 23 | void wordlist_concat(struct wordlist *dst, const struct wordlist *src); 24 | void wordlist_free(struct wordlist *ws); 25 | size_t print_wrapped(const struct wordlist *ws, const char *overhang, 26 | size_t width, bool flowed); 27 | 28 | #endif 29 | --------------------------------------------------------------------------------