├── .gitignore
├── Makefile
├── README.md
├── ini.c
├── ini.h
├── json.c
├── json.h
├── mqttcollect.1
├── mqttcollect.c
├── mqttcollect.ini.example
├── mqttcollect.pandoc
├── temperature-simulator.py
├── uthash.h
└── utstring.h


/.gitignore:
--------------------------------------------------------------------------------
1 | mqttcollect
2 | *.o
3 | *.a
4 | *.so
5 | jp.ini
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PREFIX=/usr/local
 2 | BINDIR=$(PREFIX)/bin
 3 | MANDIR=$(PREFIX)/share/man/man1
 4 | 
 5 | CFLAGS= -Wall -Werror
 6 | LDFLAGS=-lmosquitto # -lssl -lcrypto -lrt
 7 | 
 8 | all: mqttcollect
 9 | 
10 | mqttcollect: mqttcollect.c uthash.h json.o utstring.h ini.o
11 | 	$(CC) $(CFLAGS) -o mqttcollect mqttcollect.c json.o ini.o $(LDFLAGS)
12 | 
13 | json.o: json.c json.h
14 | ini.o: ini.c ini.h
15 | 
16 | install: mqttcollect
17 | 	install -m 755 mqttcollect $(BINDIR)/
18 | 	install -m 644 mqttcollect.1 $(MANDIR)/
19 | 
20 | clean:
21 | 	rm -f *.o
22 | 
23 | clobber: clean
24 | 	rm -f mqttcollect
25 | 
26 | doc: README.md mqttcollect.1
27 | 
28 | README.md: mqttcollect.pandoc
29 | 	pandoc -w markdown mqttcollect.pandoc -o README.md
30 | 
31 | mqttcollect.1: mqttcollect.pandoc
32 | 	pandoc -s -w man mqttcollect.pandoc -o mqttcollect.1
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | NAME
  2 | ====
  3 | 
  4 | mqttcollect - MQTT-based Exec-plugin for collectd
  5 | 
  6 | SYNOPSIS
  7 | ========
  8 | 
  9 | mqttcollect [-v ] [-f *file*]
 10 | 
 11 | DESCRIPTION
 12 | ===========
 13 | 
 14 | *mqttcollect* is an executable program which is used with collectd(1).
 15 | It subscribes to any number of MQTT topics you specify, and prints
 16 | values to stdout for collectd to process in an exec plugin block.
 17 | 
 18 |     PUTVAL tiggr/mqtt‐sys/gauge‐clients.inactive 1430914033:0.00
 19 | 
 20 | *collectd* launches *mqttcollect* which connects to the configured MQTT
 21 | broker, subscribes and waits for publishes to subscribed topics in an
 22 | endless loop. If an error occurs or the program exits for whichever
 23 | reason, *collectd* will restart and log the reason in its log file.
 24 | 
 25 | *mqttcollect* supports TLS connections to the MQTT broker,
 26 | username/password authentication, and TLS-PSK, all configured via its
 27 | configuration file.
 28 | 
 29 | OPTIONS
 30 | =======
 31 | 
 32 | *mqttcollect* understands the following options.
 33 | 
 34 | -f *file*
 35 | :   Specify an ini-type configuration file (see below), which defaults
 36 |     to `/usr/local/etc/mqttcollect.ini`.
 37 | 
 38 | -v
 39 | :   Verbose.
 40 | 
 41 | CONFIGURATION
 42 | =============
 43 | 
 44 | *mqttcollect* requires a configuration file to operate. This ini-type
 45 | file must have a `[defaults]` section in which general program
 46 | parameters are configured, and it will have any number of additional
 47 | sections specifying the MQTT topics it is to subscribe to. For the
 48 | defaults section, please consult the example file provided with the
 49 | source code for a list of allowed settings.
 50 | 
 51 | Within a *topic* section, metrics collected by *collectd* are specified.
 52 | 
 53 |     [defaults]
 54 |     host = localhost
 55 |     port = 1883
 56 | 
 57 |     ; (1) subscribe to a wildcard and produce three metrics per subscription.
 58 |     ; the metric names are interpolated with `tid' from the JSON message
 59 |     ; payload, and the values of each metric are obtained from the
 60 |     ; JSON element behind the `<'
 61 | 
 62 |     [owntracks/+/+]
 63 |     gauge = vehicle/{tid}/speed<vel
 64 |     gauge = vehicle/{tid}/altitude<alt
 65 |     counter = vehicle/{tid}/odometer<trip
 66 | 
 67 |     ; (2) subscribe to one topic and rename the metric
 68 | 
 69 |     [$SYS/broker/clients/inactive]
 70 |     gauge = clients.inactive
 71 | 
 72 |     ; (3) subscribe to one topic and KEEP its name
 73 |     ; use the "*"-form for wildcarded topics
 74 | 
 75 |     [$SYS/broker/load/messages/received/1min]
 76 |     gauge = *
 77 | 
 78 | Example `1` is complex. *mqttcollect* will subscribe to the wildcarded
 79 | `owntracks/+/+` topic, and for each message received on that topic, will
 80 | produce three metrics. The special character `<` in the line indicates
 81 | the MQTT message payload is expected to be JSON. Each of the metric
 82 | names will have the JSON element `tid` from the payload interpolated
 83 | into their names, and the actual value of the metric will be obtained
 84 | (`<`) from the specified JSON element (`vel`, `alt`, and `trip`
 85 | respectively). Using this configuration, and assuming a payload with
 86 | this JSON
 87 | 
 88 |     {"tid": "BB", "vel": 62, "trip": 8246531, "alt": 48}
 89 | 
 90 | *mqttcollect* could produce the following three metrics for *collectd*:
 91 | 
 92 |     PUTVAL tiggr/mqttcollect/gauge-vehicle/BB/speed 1431535440:62.00
 93 |     PUTVAL tiggr/mqttcollect/gauge-vehicle/BB/altitude 1431535440:48.00
 94 |     PUTVAL tiggr/mqttcollect/counter-vehicle/BB/odometer 1431535440:8246531.00
 95 | 
 96 | In example `2`, the program will subscribe to a single topic, and will
 97 | produce a metric renamed to `clients.inactive`.
 98 | 
 99 |     PUTVAL tiggr/mqttcollect/gauge-clients.inactive 1431535434:1.00
100 | 
101 | Example `3` subscribes to the single topic and does *not* rename the
102 | metric (note the `*`); this is what you'll typically use for wildcarded
103 | topic subscribes.
104 | 
105 |     PUTVAL tiggr/mqttcollect/gauge-$SYS/broker/load/messages/received/1min 1431535557:61.47
106 | 
107 | INFLUXDB
108 | ========
109 | 
110 | As an example, we show how to configure InfluxDB to accept values from
111 | *collectd* via the latter’s network plugin. Configure InfluxDB to launch
112 | the native *collectd* input:
113 | 
114 |     [input_plugins]
115 | 
116 |       [input_plugins.collectd]
117 |       enabled = true
118 |       # address = "0.0.0.0" # defaults to bind‐address.
119 |       port = 25826
120 |       database = "collectd"
121 |       # https://github.com/collectd/collectd/blob/master/src/types.db
122 |       typesdb = "/usr/share/collectd/types.db"
123 | 
124 | COLLECTD
125 | ========
126 | 
127 | Configure *collectd* to send its metrics to InfluxDB via the network
128 | plugin which talks to InfluxDB. (Compare the port numbers here and above
129 | in InfluxDB.)
130 | 
131 |     LoadPlugin network
132 | 
133 |     <Plugin "network">
134 |        # influxdb
135 |        Server "127.0.0.1" "25826"
136 |     </Plugin>
137 | 
138 | Configure *collectd* to load our executable *mqttcollect* via its exec
139 | mechanism. Specify *mqttcollect*'s options as individual strings in the
140 | `Exec` invocation.
141 | 
142 |     LoadPlugin exec
143 | 
144 |     <Plugin exec>
145 |        Exec "mosquitto:mosquitto" "/usr/bin/mqttcollect" "‐f" "/etc/my.ini"
146 |     </Plugin>
147 | 
148 | BUGS
149 | ====
150 | 
151 | Yes.
152 | 
153 | AVAILABILITY
154 | ============
155 | 
156 | <https://github.com/jpmens/mqttcollect>
157 | 
158 | CREDITS
159 | =======
160 | 
161 | -   This program uses *libmosquitto*, a library provided by the
162 |     Mosquitto project <http://mosquitto.org> as well as some of the
163 |     excellent include files provided by
164 |     <http://troydhanson.github.io/uthash>
165 | 
166 | INSTALLATION
167 | ============
168 | 
169 | -   Obtain the source code for *mqttcollect*, adjust the `Makefile` and
170 |     run `make`.
171 | 
172 | SEE ALSO
173 | ========
174 | 
175 | -   `collectd`(1).
176 | -   <https://github.com/jpmens/mqttwarn>
177 | 
178 | AUTHOR
179 | ======
180 | 
181 | Jan-Piet Mens <http://jpmens.net>
182 | 
183 | 


--------------------------------------------------------------------------------
/ini.c:
--------------------------------------------------------------------------------
  1 | /* inih -- simple .INI file parser
  2 | 
  3 | inih is released under the New BSD license (see LICENSE.txt). Go to the project
  4 | home page for more info:
  5 | 
  6 | https://github.com/benhoyt/inih
  7 | 
  8 | */
  9 | 
 10 | #ifdef _MSC_VER
 11 | #define _CRT_SECURE_NO_WARNINGS
 12 | #endif
 13 | 
 14 | #include <stdio.h>
 15 | #include <ctype.h>
 16 | #include <string.h>
 17 | 
 18 | #include "ini.h"
 19 | 
 20 | #if !INI_USE_STACK
 21 | #include <stdlib.h>
 22 | #endif
 23 | 
 24 | #define MAX_SECTION 50
 25 | #define MAX_NAME 50
 26 | 
 27 | /* Strip whitespace chars off end of given string, in place. Return s. */
 28 | static char* rstrip(char* s)
 29 | {
 30 |     char* p = s + strlen(s);
 31 |     while (p > s && isspace((unsigned char)(*--p)))
 32 |         *p = '\0';
 33 |     return s;
 34 | }
 35 | 
 36 | /* Return pointer to first non-whitespace char in given string. */
 37 | static char* lskip(const char* s)
 38 | {
 39 |     while (*s && isspace((unsigned char)(*s)))
 40 |         s++;
 41 |     return (char*)s;
 42 | }
 43 | 
 44 | /* Return pointer to first char c or ';' comment in given string, or pointer to
 45 |    null at end of string if neither found. ';' must be prefixed by a whitespace
 46 |    character to register as a comment. */
 47 | static char* find_char_or_comment(const char* s, char c)
 48 | {
 49 |     int was_whitespace = 0;
 50 |     while (*s && *s != c && !(was_whitespace && *s == ';')) {
 51 |         was_whitespace = isspace((unsigned char)(*s));
 52 |         s++;
 53 |     }
 54 |     return (char*)s;
 55 | }
 56 | 
 57 | /* Version of strncpy that ensures dest (size bytes) is null-terminated. */
 58 | static char* strncpy0(char* dest, const char* src, size_t size)
 59 | {
 60 |     strncpy(dest, src, size);
 61 |     dest[size - 1] = '\0';
 62 |     return dest;
 63 | }
 64 | 
 65 | /* See documentation in header file. */
 66 | int ini_parse_file(FILE* file,
 67 |                    int (*handler)(void*, const char*, const char*,
 68 |                                   const char*),
 69 |                    void* user)
 70 | {
 71 |     /* Uses a fair bit of stack (use heap instead if you need to) */
 72 | #if INI_USE_STACK
 73 |     char line[INI_MAX_LINE];
 74 | #else
 75 |     char* line;
 76 | #endif
 77 |     char section[MAX_SECTION] = "";
 78 |     char prev_name[MAX_NAME] = "";
 79 | 
 80 |     char* start;
 81 |     char* end;
 82 |     char* name;
 83 |     char* value;
 84 |     int lineno = 0;
 85 |     int error = 0;
 86 | 
 87 | #if !INI_USE_STACK
 88 |     line = (char*)malloc(INI_MAX_LINE);
 89 |     if (!line) {
 90 |         return -2;
 91 |     }
 92 | #endif
 93 | 
 94 |     /* Scan through file line by line */
 95 |     while (fgets(line, INI_MAX_LINE, file) != NULL) {
 96 |         lineno++;
 97 | 
 98 |         start = line;
 99 | #if INI_ALLOW_BOM
100 |         if (lineno == 1 && (unsigned char)start[0] == 0xEF &&
101 |                            (unsigned char)start[1] == 0xBB &&
102 |                            (unsigned char)start[2] == 0xBF) {
103 |             start += 3;
104 |         }
105 | #endif
106 |         start = lskip(rstrip(start));
107 | 
108 |         if (*start == ';' || *start == '#') {
109 |             /* Per Python ConfigParser, allow '#' comments at start of line */
110 |         }
111 | #if INI_ALLOW_MULTILINE
112 |         else if (*prev_name && *start && start > line) {
113 |             /* Non-black line with leading whitespace, treat as continuation
114 |                of previous name's value (as per Python ConfigParser). */
115 |             if (!handler(user, section, prev_name, start) && !error)
116 |                 error = lineno;
117 |         }
118 | #endif
119 |         else if (*start == '[') {
120 |             /* A "[section]" line */
121 |             end = find_char_or_comment(start + 1, ']');
122 |             if (*end == ']') {
123 |                 *end = '\0';
124 |                 strncpy0(section, start + 1, sizeof(section));
125 |                 *prev_name = '\0';
126 |             }
127 |             else if (!error) {
128 |                 /* No ']' found on section line */
129 |                 error = lineno;
130 |             }
131 |         }
132 |         else if (*start && *start != ';') {
133 |             /* Not a comment, must be a name[=:]value pair */
134 |             end = find_char_or_comment(start, '=');
135 |             if (*end != '=') {
136 |                 end = find_char_or_comment(start, ':');
137 |             }
138 |             if (*end == '=' || *end == ':') {
139 |                 *end = '\0';
140 |                 name = rstrip(start);
141 |                 value = lskip(end + 1);
142 |                 end = find_char_or_comment(value, '\0');
143 |                 if (*end == ';')
144 |                     *end = '\0';
145 |                 rstrip(value);
146 | 
147 |                 /* Valid name[=:]value pair found, call handler */
148 |                 strncpy0(prev_name, name, sizeof(prev_name));
149 |                 if (!handler(user, section, name, value) && !error)
150 |                     error = lineno;
151 |             }
152 |             else if (!error) {
153 |                 /* No '=' or ':' found on name[=:]value line */
154 |                 error = lineno;
155 |             }
156 |         }
157 | 
158 | #if INI_STOP_ON_FIRST_ERROR
159 |         if (error)
160 |             break;
161 | #endif
162 |     }
163 | 
164 | #if !INI_USE_STACK
165 |     free(line);
166 | #endif
167 | 
168 |     return error;
169 | }
170 | 
171 | /* See documentation in header file. */
172 | int ini_parse(const char* filename,
173 |               int (*handler)(void*, const char*, const char*, const char*),
174 |               void* user)
175 | {
176 |     FILE* file;
177 |     int error;
178 | 
179 |     file = fopen(filename, "r");
180 |     if (!file)
181 |         return -1;
182 |     error = ini_parse_file(file, handler, user);
183 |     fclose(file);
184 |     return error;
185 | }
186 | 


--------------------------------------------------------------------------------
/ini.h:
--------------------------------------------------------------------------------
 1 | /* inih -- simple .INI file parser
 2 | 
 3 | inih is released under the New BSD license (see LICENSE.txt). Go to the project
 4 | home page for more info:
 5 | 
 6 | https://github.com/benhoyt/inih
 7 | 
 8 | */
 9 | 
10 | #ifndef __INI_H__
11 | #define __INI_H__
12 | 
13 | /* Make this header file easier to include in C++ code */
14 | #ifdef __cplusplus
15 | extern "C" {
16 | #endif
17 | 
18 | #include <stdio.h>
19 | 
20 | /* Parse given INI-style file. May have [section]s, name=value pairs
21 |    (whitespace stripped), and comments starting with ';' (semicolon). Section
22 |    is "" if name=value pair parsed before any section heading. name:value
23 |    pairs are also supported as a concession to Python's ConfigParser.
24 | 
25 |    For each name=value pair parsed, call handler function with given user
26 |    pointer as well as section, name, and value (data only valid for duration
27 |    of handler call). Handler should return nonzero on success, zero on error.
28 | 
29 |    Returns 0 on success, line number of first error on parse error (doesn't
30 |    stop on first error), -1 on file open error, or -2 on memory allocation
31 |    error (only when INI_USE_STACK is zero).
32 | */
33 | int ini_parse(const char* filename,
34 |               int (*handler)(void* user, const char* section,
35 |                              const char* name, const char* value),
36 |               void* user);
37 | 
38 | /* Same as ini_parse(), but takes a FILE* instead of filename. This doesn't
39 |    close the file when it's finished -- the caller must do that. */
40 | int ini_parse_file(FILE* file,
41 |                    int (*handler)(void* user, const char* section,
42 |                                   const char* name, const char* value),
43 |                    void* user);
44 | 
45 | /* Nonzero to allow multi-line value parsing, in the style of Python's
46 |    ConfigParser. If allowed, ini_parse() will call the handler with the same
47 |    name for each subsequent line parsed. */
48 | #ifndef INI_ALLOW_MULTILINE
49 | #define INI_ALLOW_MULTILINE 1
50 | #endif
51 | 
52 | /* Nonzero to allow a UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of
53 |    the file. See http://code.google.com/p/inih/issues/detail?id=21 */
54 | #ifndef INI_ALLOW_BOM
55 | #define INI_ALLOW_BOM 1
56 | #endif
57 | 
58 | /* Nonzero to use stack, zero to use heap (malloc/free). */
59 | #ifndef INI_USE_STACK
60 | #define INI_USE_STACK 1
61 | #endif
62 | 
63 | /* Stop parsing on first error (default is to keep parsing). */
64 | #ifndef INI_STOP_ON_FIRST_ERROR
65 | #define INI_STOP_ON_FIRST_ERROR 0
66 | #endif
67 | 
68 | /* Maximum line length for any line in INI file. */
69 | #ifndef INI_MAX_LINE
70 | #define INI_MAX_LINE 200
71 | #endif
72 | 
73 | #ifdef __cplusplus
74 | }
75 | #endif
76 | 
77 | #endif /* __INI_H__ */
78 | 


--------------------------------------------------------------------------------
/json.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |   Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
   3 |   All rights reserved.
   4 | 
   5 |   Permission is hereby granted, free of charge, to any person obtaining a copy
   6 |   of this software and associated documentation files (the "Software"), to deal
   7 |   in the Software without restriction, including without limitation the rights
   8 |   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9 |   copies of the Software, and to permit persons to whom the Software is
  10 |   furnished to do so, subject to the following conditions:
  11 | 
  12 |   The above copyright notice and this permission notice shall be included in
  13 |   all copies or substantial portions of the Software.
  14 | 
  15 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 |   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 |   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 |   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 |   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 |   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21 |   THE SOFTWARE.
  22 | */
  23 | 
  24 | #include "json.h"
  25 | 
  26 | #include <assert.h>
  27 | #include <stdint.h>
  28 | #include <stdio.h>
  29 | #include <stdlib.h>
  30 | #include <string.h>
  31 | 
  32 | #define out_of_memory() do {                    \
  33 | 		fprintf(stderr, "Out of memory.\n");    \
  34 | 		exit(EXIT_FAILURE);                     \
  35 | 	} while (0)
  36 | 
  37 | /* Sadly, strdup is not portable. */
  38 | static char *json_strdup(const char *str)
  39 | {
  40 | 	char *ret = (char*) malloc(strlen(str) + 1);
  41 | 	if (ret == NULL)
  42 | 		out_of_memory();
  43 | 	strcpy(ret, str);
  44 | 	return ret;
  45 | }
  46 | 
  47 | /* String buffer */
  48 | 
  49 | typedef struct
  50 | {
  51 | 	char *cur;
  52 | 	char *end;
  53 | 	char *start;
  54 | } SB;
  55 | 
  56 | static void sb_init(SB *sb)
  57 | {
  58 | 	sb->start = (char*) malloc(17);
  59 | 	if (sb->start == NULL)
  60 | 		out_of_memory();
  61 | 	sb->cur = sb->start;
  62 | 	sb->end = sb->start + 16;
  63 | }
  64 | 
  65 | /* sb and need may be evaluated multiple times. */
  66 | #define sb_need(sb, need) do {                  \
  67 | 		if ((sb)->end - (sb)->cur < (need))     \
  68 | 			sb_grow(sb, need);                  \
  69 | 	} while (0)
  70 | 
  71 | static void sb_grow(SB *sb, int need)
  72 | {
  73 | 	size_t length = sb->cur - sb->start;
  74 | 	size_t alloc = sb->end - sb->start;
  75 | 	
  76 | 	do {
  77 | 		alloc *= 2;
  78 | 	} while (alloc < length + need);
  79 | 	
  80 | 	sb->start = (char*) realloc(sb->start, alloc + 1);
  81 | 	if (sb->start == NULL)
  82 | 		out_of_memory();
  83 | 	sb->cur = sb->start + length;
  84 | 	sb->end = sb->start + alloc;
  85 | }
  86 | 
  87 | static void sb_put(SB *sb, const char *bytes, int count)
  88 | {
  89 | 	sb_need(sb, count);
  90 | 	memcpy(sb->cur, bytes, count);
  91 | 	sb->cur += count;
  92 | }
  93 | 
  94 | #define sb_putc(sb, c) do {         \
  95 | 		if ((sb)->cur >= (sb)->end) \
  96 | 			sb_grow(sb, 1);         \
  97 | 		*(sb)->cur++ = (c);         \
  98 | 	} while (0)
  99 | 
 100 | static void sb_puts(SB *sb, const char *str)
 101 | {
 102 | 	sb_put(sb, str, strlen(str));
 103 | }
 104 | 
 105 | static char *sb_finish(SB *sb)
 106 | {
 107 | 	*sb->cur = 0;
 108 | 	assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
 109 | 	return sb->start;
 110 | }
 111 | 
 112 | static void sb_free(SB *sb)
 113 | {
 114 | 	free(sb->start);
 115 | }
 116 | 
 117 | /*
 118 |  * Unicode helper functions
 119 |  *
 120 |  * These are taken from the ccan/charset module and customized a bit.
 121 |  * Putting them here means the compiler can (choose to) inline them,
 122 |  * and it keeps ccan/json from having a dependency.
 123 |  */
 124 | 
 125 | /*
 126 |  * Type for Unicode codepoints.
 127 |  * We need our own because wchar_t might be 16 bits.
 128 |  */
 129 | typedef uint32_t uchar_t;
 130 | 
 131 | /*
 132 |  * Validate a single UTF-8 character starting at @s.
 133 |  * The string must be null-terminated.
 134 |  *
 135 |  * If it's valid, return its length (1 thru 4).
 136 |  * If it's invalid or clipped, return 0.
 137 |  *
 138 |  * This function implements the syntax given in RFC3629, which is
 139 |  * the same as that given in The Unicode Standard, Version 6.0.
 140 |  *
 141 |  * It has the following properties:
 142 |  *
 143 |  *  * All codepoints U+0000..U+10FFFF may be encoded,
 144 |  *    except for U+D800..U+DFFF, which are reserved
 145 |  *    for UTF-16 surrogate pair encoding.
 146 |  *  * UTF-8 byte sequences longer than 4 bytes are not permitted,
 147 |  *    as they exceed the range of Unicode.
 148 |  *  * The sixty-six Unicode "non-characters" are permitted
 149 |  *    (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
 150 |  */
 151 | static int utf8_validate_cz(const char *s)
 152 | {
 153 | 	unsigned char c = *s++;
 154 | 	
 155 | 	if (c <= 0x7F) {        /* 00..7F */
 156 | 		return 1;
 157 | 	} else if (c <= 0xC1) { /* 80..C1 */
 158 | 		/* Disallow overlong 2-byte sequence. */
 159 | 		return 0;
 160 | 	} else if (c <= 0xDF) { /* C2..DF */
 161 | 		/* Make sure subsequent byte is in the range 0x80..0xBF. */
 162 | 		if (((unsigned char)*s++ & 0xC0) != 0x80)
 163 | 			return 0;
 164 | 		
 165 | 		return 2;
 166 | 	} else if (c <= 0xEF) { /* E0..EF */
 167 | 		/* Disallow overlong 3-byte sequence. */
 168 | 		if (c == 0xE0 && (unsigned char)*s < 0xA0)
 169 | 			return 0;
 170 | 		
 171 | 		/* Disallow U+D800..U+DFFF. */
 172 | 		if (c == 0xED && (unsigned char)*s > 0x9F)
 173 | 			return 0;
 174 | 		
 175 | 		/* Make sure subsequent bytes are in the range 0x80..0xBF. */
 176 | 		if (((unsigned char)*s++ & 0xC0) != 0x80)
 177 | 			return 0;
 178 | 		if (((unsigned char)*s++ & 0xC0) != 0x80)
 179 | 			return 0;
 180 | 		
 181 | 		return 3;
 182 | 	} else if (c <= 0xF4) { /* F0..F4 */
 183 | 		/* Disallow overlong 4-byte sequence. */
 184 | 		if (c == 0xF0 && (unsigned char)*s < 0x90)
 185 | 			return 0;
 186 | 		
 187 | 		/* Disallow codepoints beyond U+10FFFF. */
 188 | 		if (c == 0xF4 && (unsigned char)*s > 0x8F)
 189 | 			return 0;
 190 | 		
 191 | 		/* Make sure subsequent bytes are in the range 0x80..0xBF. */
 192 | 		if (((unsigned char)*s++ & 0xC0) != 0x80)
 193 | 			return 0;
 194 | 		if (((unsigned char)*s++ & 0xC0) != 0x80)
 195 | 			return 0;
 196 | 		if (((unsigned char)*s++ & 0xC0) != 0x80)
 197 | 			return 0;
 198 | 		
 199 | 		return 4;
 200 | 	} else {                /* F5..FF */
 201 | 		return 0;
 202 | 	}
 203 | }
 204 | 
 205 | /* Validate a null-terminated UTF-8 string. */
 206 | static bool utf8_validate(const char *s)
 207 | {
 208 | 	int len;
 209 | 	
 210 | 	for (; *s != 0; s += len) {
 211 | 		len = utf8_validate_cz(s);
 212 | 		if (len == 0)
 213 | 			return false;
 214 | 	}
 215 | 	
 216 | 	return true;
 217 | }
 218 | 
 219 | /*
 220 |  * Read a single UTF-8 character starting at @s,
 221 |  * returning the length, in bytes, of the character read.
 222 |  *
 223 |  * This function assumes input is valid UTF-8,
 224 |  * and that there are enough characters in front of @s.
 225 |  */
 226 | static int utf8_read_char(const char *s, uchar_t *out)
 227 | {
 228 | 	const unsigned char *c = (const unsigned char*) s;
 229 | 	
 230 | 	assert(utf8_validate_cz(s));
 231 | 
 232 | 	if (c[0] <= 0x7F) {
 233 | 		/* 00..7F */
 234 | 		*out = c[0];
 235 | 		return 1;
 236 | 	} else if (c[0] <= 0xDF) {
 237 | 		/* C2..DF (unless input is invalid) */
 238 | 		*out = ((uchar_t)c[0] & 0x1F) << 6 |
 239 | 		       ((uchar_t)c[1] & 0x3F);
 240 | 		return 2;
 241 | 	} else if (c[0] <= 0xEF) {
 242 | 		/* E0..EF */
 243 | 		*out = ((uchar_t)c[0] &  0xF) << 12 |
 244 | 		       ((uchar_t)c[1] & 0x3F) << 6  |
 245 | 		       ((uchar_t)c[2] & 0x3F);
 246 | 		return 3;
 247 | 	} else {
 248 | 		/* F0..F4 (unless input is invalid) */
 249 | 		*out = ((uchar_t)c[0] &  0x7) << 18 |
 250 | 		       ((uchar_t)c[1] & 0x3F) << 12 |
 251 | 		       ((uchar_t)c[2] & 0x3F) << 6  |
 252 | 		       ((uchar_t)c[3] & 0x3F);
 253 | 		return 4;
 254 | 	}
 255 | }
 256 | 
 257 | /*
 258 |  * Write a single UTF-8 character to @s,
 259 |  * returning the length, in bytes, of the character written.
 260 |  *
 261 |  * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
 262 |  *
 263 |  * This function will write up to 4 bytes to @out.
 264 |  */
 265 | static int utf8_write_char(uchar_t unicode, char *out)
 266 | {
 267 | 	unsigned char *o = (unsigned char*) out;
 268 | 	
 269 | 	assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF));
 270 | 
 271 | 	if (unicode <= 0x7F) {
 272 | 		/* U+0000..U+007F */
 273 | 		*o++ = unicode;
 274 | 		return 1;
 275 | 	} else if (unicode <= 0x7FF) {
 276 | 		/* U+0080..U+07FF */
 277 | 		*o++ = 0xC0 | unicode >> 6;
 278 | 		*o++ = 0x80 | (unicode & 0x3F);
 279 | 		return 2;
 280 | 	} else if (unicode <= 0xFFFF) {
 281 | 		/* U+0800..U+FFFF */
 282 | 		*o++ = 0xE0 | unicode >> 12;
 283 | 		*o++ = 0x80 | (unicode >> 6 & 0x3F);
 284 | 		*o++ = 0x80 | (unicode & 0x3F);
 285 | 		return 3;
 286 | 	} else {
 287 | 		/* U+10000..U+10FFFF */
 288 | 		*o++ = 0xF0 | unicode >> 18;
 289 | 		*o++ = 0x80 | (unicode >> 12 & 0x3F);
 290 | 		*o++ = 0x80 | (unicode >> 6 & 0x3F);
 291 | 		*o++ = 0x80 | (unicode & 0x3F);
 292 | 		return 4;
 293 | 	}
 294 | }
 295 | 
 296 | /*
 297 |  * Compute the Unicode codepoint of a UTF-16 surrogate pair.
 298 |  *
 299 |  * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
 300 |  * If they aren't, this function returns false.
 301 |  */
 302 | static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uchar_t *unicode)
 303 | {
 304 | 	if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) {
 305 | 		*unicode = 0x10000 + ((((uchar_t)uc & 0x3FF) << 10) | (lc & 0x3FF));
 306 | 		return true;
 307 | 	} else {
 308 | 		return false;
 309 | 	}
 310 | }
 311 | 
 312 | /*
 313 |  * Construct a UTF-16 surrogate pair given a Unicode codepoint.
 314 |  *
 315 |  * @unicode must be U+10000..U+10FFFF.
 316 |  */
 317 | static void to_surrogate_pair(uchar_t unicode, uint16_t *uc, uint16_t *lc)
 318 | {
 319 | 	uchar_t n;
 320 | 	
 321 | 	assert(unicode >= 0x10000 && unicode <= 0x10FFFF);
 322 | 	
 323 | 	n = unicode - 0x10000;
 324 | 	*uc = ((n >> 10) & 0x3FF) | 0xD800;
 325 | 	*lc = (n & 0x3FF) | 0xDC00;
 326 | }
 327 | 
 328 | #define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ')
 329 | #define is_digit(c) ((c) >= '0' && (c) <= '9')
 330 | 
 331 | static bool parse_value     (const char **sp, JsonNode        **out);
 332 | static bool parse_string    (const char **sp, char            **out);
 333 | static bool parse_number    (const char **sp, double           *out);
 334 | static bool parse_array     (const char **sp, JsonNode        **out);
 335 | static bool parse_object    (const char **sp, JsonNode        **out);
 336 | static bool parse_hex16     (const char **sp, uint16_t         *out);
 337 | 
 338 | static bool expect_literal  (const char **sp, const char *str);
 339 | static void skip_space      (const char **sp);
 340 | 
 341 | static void emit_value              (SB *out, const JsonNode *node);
 342 | static void emit_value_indented     (SB *out, const JsonNode *node, const char *space, int indent_level);
 343 | static void emit_string             (SB *out, const char *str);
 344 | static void emit_number             (SB *out, double num);
 345 | static void emit_array              (SB *out, const JsonNode *array);
 346 | static void emit_array_indented     (SB *out, const JsonNode *array, const char *space, int indent_level);
 347 | static void emit_object             (SB *out, const JsonNode *object);
 348 | static void emit_object_indented    (SB *out, const JsonNode *object, const char *space, int indent_level);
 349 | 
 350 | static int write_hex16(char *out, uint16_t val);
 351 | 
 352 | static JsonNode *mknode(JsonTag tag);
 353 | static void append_node(JsonNode *parent, JsonNode *child);
 354 | static void prepend_node(JsonNode *parent, JsonNode *child);
 355 | static void append_member(JsonNode *object, char *key, JsonNode *value);
 356 | 
 357 | /* Assertion-friendly validity checks */
 358 | static bool tag_is_valid(unsigned int tag);
 359 | static bool number_is_valid(const char *num);
 360 | 
 361 | JsonNode *json_decode(const char *json)
 362 | {
 363 | 	const char *s = json;
 364 | 	JsonNode *ret;
 365 | 	
 366 | 	skip_space(&s);
 367 | 	if (!parse_value(&s, &ret))
 368 | 		return NULL;
 369 | 	
 370 | 	skip_space(&s);
 371 | 	if (*s != 0) {
 372 | 		json_delete(ret);
 373 | 		return NULL;
 374 | 	}
 375 | 	
 376 | 	return ret;
 377 | }
 378 | 
 379 | char *json_encode(const JsonNode *node)
 380 | {
 381 | 	return json_stringify(node, NULL);
 382 | }
 383 | 
 384 | char *json_encode_string(const char *str)
 385 | {
 386 | 	SB sb;
 387 | 	sb_init(&sb);
 388 | 	
 389 | 	emit_string(&sb, str);
 390 | 	
 391 | 	return sb_finish(&sb);
 392 | }
 393 | 
 394 | char *json_stringify(const JsonNode *node, const char *space)
 395 | {
 396 | 	SB sb;
 397 | 	sb_init(&sb);
 398 | 	
 399 | 	if (space != NULL)
 400 | 		emit_value_indented(&sb, node, space, 0);
 401 | 	else
 402 | 		emit_value(&sb, node);
 403 | 	
 404 | 	return sb_finish(&sb);
 405 | }
 406 | 
 407 | void json_delete(JsonNode *node)
 408 | {
 409 | 	if (node != NULL) {
 410 | 		json_remove_from_parent(node);
 411 | 		
 412 | 		switch (node->tag) {
 413 | 			case JSON_STRING:
 414 | 				free(node->string_);
 415 | 				break;
 416 | 			case JSON_ARRAY:
 417 | 			case JSON_OBJECT:
 418 | 			{
 419 | 				JsonNode *child, *next;
 420 | 				for (child = node->children.head; child != NULL; child = next) {
 421 | 					next = child->next;
 422 | 					json_delete(child);
 423 | 				}
 424 | 				break;
 425 | 			}
 426 | 			default:;
 427 | 		}
 428 | 		
 429 | 		free(node);
 430 | 	}
 431 | }
 432 | 
 433 | bool json_validate(const char *json)
 434 | {
 435 | 	const char *s = json;
 436 | 	
 437 | 	skip_space(&s);
 438 | 	if (!parse_value(&s, NULL))
 439 | 		return false;
 440 | 	
 441 | 	skip_space(&s);
 442 | 	if (*s != 0)
 443 | 		return false;
 444 | 	
 445 | 	return true;
 446 | }
 447 | 
 448 | JsonNode *json_find_element(JsonNode *array, int index)
 449 | {
 450 | 	JsonNode *element;
 451 | 	int i = 0;
 452 | 	
 453 | 	if (array == NULL || array->tag != JSON_ARRAY)
 454 | 		return NULL;
 455 | 	
 456 | 	json_foreach(element, array) {
 457 | 		if (i == index)
 458 | 			return element;
 459 | 		i++;
 460 | 	}
 461 | 	
 462 | 	return NULL;
 463 | }
 464 | 
 465 | JsonNode *json_find_member(JsonNode *object, const char *name)
 466 | {
 467 | 	JsonNode *member;
 468 | 	
 469 | 	if (object == NULL || object->tag != JSON_OBJECT)
 470 | 		return NULL;
 471 | 	
 472 | 	json_foreach(member, object)
 473 | 		if (strcmp(member->key, name) == 0)
 474 | 			return member;
 475 | 	
 476 | 	return NULL;
 477 | }
 478 | 
 479 | JsonNode *json_first_child(const JsonNode *node)
 480 | {
 481 | 	if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT))
 482 | 		return node->children.head;
 483 | 	return NULL;
 484 | }
 485 | 
 486 | static JsonNode *mknode(JsonTag tag)
 487 | {
 488 | 	JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode));
 489 | 	if (ret == NULL)
 490 | 		out_of_memory();
 491 | 	ret->tag = tag;
 492 | 	return ret;
 493 | }
 494 | 
 495 | JsonNode *json_mknull(void)
 496 | {
 497 | 	return mknode(JSON_NULL);
 498 | }
 499 | 
 500 | JsonNode *json_mkbool(bool b)
 501 | {
 502 | 	JsonNode *ret = mknode(JSON_BOOL);
 503 | 	ret->bool_ = b;
 504 | 	return ret;
 505 | }
 506 | 
 507 | static JsonNode *mkstring(char *s)
 508 | {
 509 | 	JsonNode *ret = mknode(JSON_STRING);
 510 | 	ret->string_ = s;
 511 | 	return ret;
 512 | }
 513 | 
 514 | JsonNode *json_mkstring(const char *s)
 515 | {
 516 | 	return mkstring(json_strdup(s));
 517 | }
 518 | 
 519 | JsonNode *json_mknumber(double n)
 520 | {
 521 | 	JsonNode *node = mknode(JSON_NUMBER);
 522 | 	node->number_ = n;
 523 | 	return node;
 524 | }
 525 | 
 526 | JsonNode *json_mkarray(void)
 527 | {
 528 | 	return mknode(JSON_ARRAY);
 529 | }
 530 | 
 531 | JsonNode *json_mkobject(void)
 532 | {
 533 | 	return mknode(JSON_OBJECT);
 534 | }
 535 | 
 536 | static void append_node(JsonNode *parent, JsonNode *child)
 537 | {
 538 | 	child->parent = parent;
 539 | 	child->prev = parent->children.tail;
 540 | 	child->next = NULL;
 541 | 	
 542 | 	if (parent->children.tail != NULL)
 543 | 		parent->children.tail->next = child;
 544 | 	else
 545 | 		parent->children.head = child;
 546 | 	parent->children.tail = child;
 547 | }
 548 | 
 549 | static void prepend_node(JsonNode *parent, JsonNode *child)
 550 | {
 551 | 	child->parent = parent;
 552 | 	child->prev = NULL;
 553 | 	child->next = parent->children.head;
 554 | 	
 555 | 	if (parent->children.head != NULL)
 556 | 		parent->children.head->prev = child;
 557 | 	else
 558 | 		parent->children.tail = child;
 559 | 	parent->children.head = child;
 560 | }
 561 | 
 562 | static void append_member(JsonNode *object, char *key, JsonNode *value)
 563 | {
 564 | 	value->key = key;
 565 | 	append_node(object, value);
 566 | }
 567 | 
 568 | void json_append_element(JsonNode *array, JsonNode *element)
 569 | {
 570 | 	assert(array->tag == JSON_ARRAY);
 571 | 	assert(element->parent == NULL);
 572 | 	
 573 | 	append_node(array, element);
 574 | }
 575 | 
 576 | void json_prepend_element(JsonNode *array, JsonNode *element)
 577 | {
 578 | 	assert(array->tag == JSON_ARRAY);
 579 | 	assert(element->parent == NULL);
 580 | 	
 581 | 	prepend_node(array, element);
 582 | }
 583 | 
 584 | void json_append_member(JsonNode *object, const char *key, JsonNode *value)
 585 | {
 586 | 	assert(object->tag == JSON_OBJECT);
 587 | 	assert(value->parent == NULL);
 588 | 	
 589 | 	append_member(object, json_strdup(key), value);
 590 | }
 591 | 
 592 | void json_prepend_member(JsonNode *object, const char *key, JsonNode *value)
 593 | {
 594 | 	assert(object->tag == JSON_OBJECT);
 595 | 	assert(value->parent == NULL);
 596 | 	
 597 | 	value->key = json_strdup(key);
 598 | 	prepend_node(object, value);
 599 | }
 600 | 
 601 | void json_remove_from_parent(JsonNode *node)
 602 | {
 603 | 	JsonNode *parent = node->parent;
 604 | 	
 605 | 	if (parent != NULL) {
 606 | 		if (node->prev != NULL)
 607 | 			node->prev->next = node->next;
 608 | 		else
 609 | 			parent->children.head = node->next;
 610 | 		if (node->next != NULL)
 611 | 			node->next->prev = node->prev;
 612 | 		else
 613 | 			parent->children.tail = node->prev;
 614 | 		
 615 | 		free(node->key);
 616 | 		
 617 | 		node->parent = NULL;
 618 | 		node->prev = node->next = NULL;
 619 | 		node->key = NULL;
 620 | 	}
 621 | }
 622 | 
 623 | static bool parse_value(const char **sp, JsonNode **out)
 624 | {
 625 | 	const char *s = *sp;
 626 | 	
 627 | 	switch (*s) {
 628 | 		case 'n':
 629 | 			if (expect_literal(&s, "null")) {
 630 | 				if (out)
 631 | 					*out = json_mknull();
 632 | 				*sp = s;
 633 | 				return true;
 634 | 			}
 635 | 			return false;
 636 | 		
 637 | 		case 'f':
 638 | 			if (expect_literal(&s, "false")) {
 639 | 				if (out)
 640 | 					*out = json_mkbool(false);
 641 | 				*sp = s;
 642 | 				return true;
 643 | 			}
 644 | 			return false;
 645 | 		
 646 | 		case 't':
 647 | 			if (expect_literal(&s, "true")) {
 648 | 				if (out)
 649 | 					*out = json_mkbool(true);
 650 | 				*sp = s;
 651 | 				return true;
 652 | 			}
 653 | 			return false;
 654 | 		
 655 | 		case '"': {
 656 | 			char *str;
 657 | 			if (parse_string(&s, out ? &str : NULL)) {
 658 | 				if (out)
 659 | 					*out = mkstring(str);
 660 | 				*sp = s;
 661 | 				return true;
 662 | 			}
 663 | 			return false;
 664 | 		}
 665 | 		
 666 | 		case '[':
 667 | 			if (parse_array(&s, out)) {
 668 | 				*sp = s;
 669 | 				return true;
 670 | 			}
 671 | 			return false;
 672 | 		
 673 | 		case '{':
 674 | 			if (parse_object(&s, out)) {
 675 | 				*sp = s;
 676 | 				return true;
 677 | 			}
 678 | 			return false;
 679 | 		
 680 | 		default: {
 681 | 			double num;
 682 | 			if (parse_number(&s, out ? &num : NULL)) {
 683 | 				if (out)
 684 | 					*out = json_mknumber(num);
 685 | 				*sp = s;
 686 | 				return true;
 687 | 			}
 688 | 			return false;
 689 | 		}
 690 | 	}
 691 | }
 692 | 
 693 | static bool parse_array(const char **sp, JsonNode **out)
 694 | {
 695 | 	const char *s = *sp;
 696 | 	JsonNode *ret = out ? json_mkarray() : NULL;
 697 | 	JsonNode *element;
 698 | 	
 699 | 	if (*s++ != '[')
 700 | 		goto failure;
 701 | 	skip_space(&s);
 702 | 	
 703 | 	if (*s == ']') {
 704 | 		s++;
 705 | 		goto success;
 706 | 	}
 707 | 	
 708 | 	for (;;) {
 709 | 		if (!parse_value(&s, out ? &element : NULL))
 710 | 			goto failure;
 711 | 		skip_space(&s);
 712 | 		
 713 | 		if (out)
 714 | 			json_append_element(ret, element);
 715 | 		
 716 | 		if (*s == ']') {
 717 | 			s++;
 718 | 			goto success;
 719 | 		}
 720 | 		
 721 | 		if (*s++ != ',')
 722 | 			goto failure;
 723 | 		skip_space(&s);
 724 | 	}
 725 | 	
 726 | success:
 727 | 	*sp = s;
 728 | 	if (out)
 729 | 		*out = ret;
 730 | 	return true;
 731 | 
 732 | failure:
 733 | 	json_delete(ret);
 734 | 	return false;
 735 | }
 736 | 
 737 | static bool parse_object(const char **sp, JsonNode **out)
 738 | {
 739 | 	const char *s = *sp;
 740 | 	JsonNode *ret = out ? json_mkobject() : NULL;
 741 | 	char *key;
 742 | 	JsonNode *value;
 743 | 	
 744 | 	if (*s++ != '{')
 745 | 		goto failure;
 746 | 	skip_space(&s);
 747 | 	
 748 | 	if (*s == '}') {
 749 | 		s++;
 750 | 		goto success;
 751 | 	}
 752 | 	
 753 | 	for (;;) {
 754 | 		if (!parse_string(&s, out ? &key : NULL))
 755 | 			goto failure;
 756 | 		skip_space(&s);
 757 | 		
 758 | 		if (*s++ != ':')
 759 | 			goto failure_free_key;
 760 | 		skip_space(&s);
 761 | 		
 762 | 		if (!parse_value(&s, out ? &value : NULL))
 763 | 			goto failure_free_key;
 764 | 		skip_space(&s);
 765 | 		
 766 | 		if (out)
 767 | 			append_member(ret, key, value);
 768 | 		
 769 | 		if (*s == '}') {
 770 | 			s++;
 771 | 			goto success;
 772 | 		}
 773 | 		
 774 | 		if (*s++ != ',')
 775 | 			goto failure;
 776 | 		skip_space(&s);
 777 | 	}
 778 | 	
 779 | success:
 780 | 	*sp = s;
 781 | 	if (out)
 782 | 		*out = ret;
 783 | 	return true;
 784 | 
 785 | failure_free_key:
 786 | 	if (out)
 787 | 		free(key);
 788 | failure:
 789 | 	json_delete(ret);
 790 | 	return false;
 791 | }
 792 | 
 793 | bool parse_string(const char **sp, char **out)
 794 | {
 795 | 	const char *s = *sp;
 796 | 	SB sb;
 797 | 	char throwaway_buffer[4];
 798 | 		/* enough space for a UTF-8 character */
 799 | 	char *b;
 800 | 	
 801 | 	if (*s++ != '"')
 802 | 		return false;
 803 | 	
 804 | 	if (out) {
 805 | 		sb_init(&sb);
 806 | 		sb_need(&sb, 4);
 807 | 		b = sb.cur;
 808 | 	} else {
 809 | 		b = throwaway_buffer;
 810 | 	}
 811 | 	
 812 | 	while (*s != '"') {
 813 | 		unsigned char c = *s++;
 814 | 		
 815 | 		/* Parse next character, and write it to b. */
 816 | 		if (c == '\\') {
 817 | 			c = *s++;
 818 | 			switch (c) {
 819 | 				case '"':
 820 | 				case '\\':
 821 | 				case '/':
 822 | 					*b++ = c;
 823 | 					break;
 824 | 				case 'b':
 825 | 					*b++ = '\b';
 826 | 					break;
 827 | 				case 'f':
 828 | 					*b++ = '\f';
 829 | 					break;
 830 | 				case 'n':
 831 | 					*b++ = '\n';
 832 | 					break;
 833 | 				case 'r':
 834 | 					*b++ = '\r';
 835 | 					break;
 836 | 				case 't':
 837 | 					*b++ = '\t';
 838 | 					break;
 839 | 				case 'u':
 840 | 				{
 841 | 					uint16_t uc, lc;
 842 | 					uchar_t unicode;
 843 | 					
 844 | 					if (!parse_hex16(&s, &uc))
 845 | 						goto failed;
 846 | 					
 847 | 					if (uc >= 0xD800 && uc <= 0xDFFF) {
 848 | 						/* Handle UTF-16 surrogate pair. */
 849 | 						if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc))
 850 | 							goto failed; /* Incomplete surrogate pair. */
 851 | 						if (!from_surrogate_pair(uc, lc, &unicode))
 852 | 							goto failed; /* Invalid surrogate pair. */
 853 | 					} else if (uc == 0) {
 854 | 						/* Disallow "\u0000". */
 855 | 						goto failed;
 856 | 					} else {
 857 | 						unicode = uc;
 858 | 					}
 859 | 					
 860 | 					b += utf8_write_char(unicode, b);
 861 | 					break;
 862 | 				}
 863 | 				default:
 864 | 					/* Invalid escape */
 865 | 					goto failed;
 866 | 			}
 867 | 		} else if (c <= 0x1F) {
 868 | 			/* Control characters are not allowed in string literals. */
 869 | 			goto failed;
 870 | 		} else {
 871 | 			/* Validate and echo a UTF-8 character. */
 872 | 			int len;
 873 | 			
 874 | 			s--;
 875 | 			len = utf8_validate_cz(s);
 876 | 			if (len == 0)
 877 | 				goto failed; /* Invalid UTF-8 character. */
 878 | 			
 879 | 			while (len--)
 880 | 				*b++ = *s++;
 881 | 		}
 882 | 		
 883 | 		/*
 884 | 		 * Update sb to know about the new bytes,
 885 | 		 * and set up b to write another character.
 886 | 		 */
 887 | 		if (out) {
 888 | 			sb.cur = b;
 889 | 			sb_need(&sb, 4);
 890 | 			b = sb.cur;
 891 | 		} else {
 892 | 			b = throwaway_buffer;
 893 | 		}
 894 | 	}
 895 | 	s++;
 896 | 	
 897 | 	if (out)
 898 | 		*out = sb_finish(&sb);
 899 | 	*sp = s;
 900 | 	return true;
 901 | 
 902 | failed:
 903 | 	if (out)
 904 | 		sb_free(&sb);
 905 | 	return false;
 906 | }
 907 | 
 908 | /*
 909 |  * The JSON spec says that a number shall follow this precise pattern
 910 |  * (spaces and quotes added for readability):
 911 |  *	 '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
 912 |  *
 913 |  * However, some JSON parsers are more liberal.  For instance, PHP accepts
 914 |  * '.5' and '1.'.  JSON.parse accepts '+3'.
 915 |  *
 916 |  * This function takes the strict approach.
 917 |  */
 918 | bool parse_number(const char **sp, double *out)
 919 | {
 920 | 	const char *s = *sp;
 921 | 
 922 | 	/* '-'? */
 923 | 	if (*s == '-')
 924 | 		s++;
 925 | 
 926 | 	/* (0 | [1-9][0-9]*) */
 927 | 	if (*s == '0') {
 928 | 		s++;
 929 | 	} else {
 930 | 		if (!is_digit(*s))
 931 | 			return false;
 932 | 		do {
 933 | 			s++;
 934 | 		} while (is_digit(*s));
 935 | 	}
 936 | 
 937 | 	/* ('.' [0-9]+)? */
 938 | 	if (*s == '.') {
 939 | 		s++;
 940 | 		if (!is_digit(*s))
 941 | 			return false;
 942 | 		do {
 943 | 			s++;
 944 | 		} while (is_digit(*s));
 945 | 	}
 946 | 
 947 | 	/* ([Ee] [+-]? [0-9]+)? */
 948 | 	if (*s == 'E' || *s == 'e') {
 949 | 		s++;
 950 | 		if (*s == '+' || *s == '-')
 951 | 			s++;
 952 | 		if (!is_digit(*s))
 953 | 			return false;
 954 | 		do {
 955 | 			s++;
 956 | 		} while (is_digit(*s));
 957 | 	}
 958 | 
 959 | 	if (out)
 960 | 		*out = strtod(*sp, NULL);
 961 | 
 962 | 	*sp = s;
 963 | 	return true;
 964 | }
 965 | 
 966 | static void skip_space(const char **sp)
 967 | {
 968 | 	const char *s = *sp;
 969 | 	while (is_space(*s))
 970 | 		s++;
 971 | 	*sp = s;
 972 | }
 973 | 
 974 | static void emit_value(SB *out, const JsonNode *node)
 975 | {
 976 | 	assert(tag_is_valid(node->tag));
 977 | 	switch (node->tag) {
 978 | 		case JSON_NULL:
 979 | 			sb_puts(out, "null");
 980 | 			break;
 981 | 		case JSON_BOOL:
 982 | 			sb_puts(out, node->bool_ ? "true" : "false");
 983 | 			break;
 984 | 		case JSON_STRING:
 985 | 			emit_string(out, node->string_);
 986 | 			break;
 987 | 		case JSON_NUMBER:
 988 | 			emit_number(out, node->number_);
 989 | 			break;
 990 | 		case JSON_ARRAY:
 991 | 			emit_array(out, node);
 992 | 			break;
 993 | 		case JSON_OBJECT:
 994 | 			emit_object(out, node);
 995 | 			break;
 996 | 		default:
 997 | 			assert(false);
 998 | 	}
 999 | }
1000 | 
1001 | void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level)
1002 | {
1003 | 	assert(tag_is_valid(node->tag));
1004 | 	switch (node->tag) {
1005 | 		case JSON_NULL:
1006 | 			sb_puts(out, "null");
1007 | 			break;
1008 | 		case JSON_BOOL:
1009 | 			sb_puts(out, node->bool_ ? "true" : "false");
1010 | 			break;
1011 | 		case JSON_STRING:
1012 | 			emit_string(out, node->string_);
1013 | 			break;
1014 | 		case JSON_NUMBER:
1015 | 			emit_number(out, node->number_);
1016 | 			break;
1017 | 		case JSON_ARRAY:
1018 | 			emit_array_indented(out, node, space, indent_level);
1019 | 			break;
1020 | 		case JSON_OBJECT:
1021 | 			emit_object_indented(out, node, space, indent_level);
1022 | 			break;
1023 | 		default:
1024 | 			assert(false);
1025 | 	}
1026 | }
1027 | 
1028 | static void emit_array(SB *out, const JsonNode *array)
1029 | {
1030 | 	const JsonNode *element;
1031 | 	
1032 | 	sb_putc(out, '[');
1033 | 	json_foreach(element, array) {
1034 | 		emit_value(out, element);
1035 | 		if (element->next != NULL)
1036 | 			sb_putc(out, ',');
1037 | 	}
1038 | 	sb_putc(out, ']');
1039 | }
1040 | 
1041 | static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level)
1042 | {
1043 | 	const JsonNode *element = array->children.head;
1044 | 	int i;
1045 | 	
1046 | 	if (element == NULL) {
1047 | 		sb_puts(out, "[]");
1048 | 		return;
1049 | 	}
1050 | 	
1051 | 	sb_puts(out, "[\n");
1052 | 	while (element != NULL) {
1053 | 		for (i = 0; i < indent_level + 1; i++)
1054 | 			sb_puts(out, space);
1055 | 		emit_value_indented(out, element, space, indent_level + 1);
1056 | 		
1057 | 		element = element->next;
1058 | 		sb_puts(out, element != NULL ? ",\n" : "\n");
1059 | 	}
1060 | 	for (i = 0; i < indent_level; i++)
1061 | 		sb_puts(out, space);
1062 | 	sb_putc(out, ']');
1063 | }
1064 | 
1065 | static void emit_object(SB *out, const JsonNode *object)
1066 | {
1067 | 	const JsonNode *member;
1068 | 	
1069 | 	sb_putc(out, '{');
1070 | 	json_foreach(member, object) {
1071 | 		emit_string(out, member->key);
1072 | 		sb_putc(out, ':');
1073 | 		emit_value(out, member);
1074 | 		if (member->next != NULL)
1075 | 			sb_putc(out, ',');
1076 | 	}
1077 | 	sb_putc(out, '}');
1078 | }
1079 | 
1080 | static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level)
1081 | {
1082 | 	const JsonNode *member = object->children.head;
1083 | 	int i;
1084 | 	
1085 | 	if (member == NULL) {
1086 | 		sb_puts(out, "{}");
1087 | 		return;
1088 | 	}
1089 | 	
1090 | 	sb_puts(out, "{\n");
1091 | 	while (member != NULL) {
1092 | 		for (i = 0; i < indent_level + 1; i++)
1093 | 			sb_puts(out, space);
1094 | 		emit_string(out, member->key);
1095 | 		sb_puts(out, ": ");
1096 | 		emit_value_indented(out, member, space, indent_level + 1);
1097 | 		
1098 | 		member = member->next;
1099 | 		sb_puts(out, member != NULL ? ",\n" : "\n");
1100 | 	}
1101 | 	for (i = 0; i < indent_level; i++)
1102 | 		sb_puts(out, space);
1103 | 	sb_putc(out, '}');
1104 | }
1105 | 
1106 | void emit_string(SB *out, const char *str)
1107 | {
1108 | 	bool escape_unicode = false;
1109 | 	const char *s = str;
1110 | 	char *b;
1111 | 	
1112 | 	assert(utf8_validate(str));
1113 | 	
1114 | 	/*
1115 | 	 * 14 bytes is enough space to write up to two
1116 | 	 * \uXXXX escapes and two quotation marks.
1117 | 	 */
1118 | 	sb_need(out, 14);
1119 | 	b = out->cur;
1120 | 	
1121 | 	*b++ = '"';
1122 | 	while (*s != 0) {
1123 | 		unsigned char c = *s++;
1124 | 		
1125 | 		/* Encode the next character, and write it to b. */
1126 | 		switch (c) {
1127 | 			case '"':
1128 | 				*b++ = '\\';
1129 | 				*b++ = '"';
1130 | 				break;
1131 | 			case '\\':
1132 | 				*b++ = '\\';
1133 | 				*b++ = '\\';
1134 | 				break;
1135 | 			case '\b':
1136 | 				*b++ = '\\';
1137 | 				*b++ = 'b';
1138 | 				break;
1139 | 			case '\f':
1140 | 				*b++ = '\\';
1141 | 				*b++ = 'f';
1142 | 				break;
1143 | 			case '\n':
1144 | 				*b++ = '\\';
1145 | 				*b++ = 'n';
1146 | 				break;
1147 | 			case '\r':
1148 | 				*b++ = '\\';
1149 | 				*b++ = 'r';
1150 | 				break;
1151 | 			case '\t':
1152 | 				*b++ = '\\';
1153 | 				*b++ = 't';
1154 | 				break;
1155 | 			default: {
1156 | 				int len;
1157 | 				
1158 | 				s--;
1159 | 				len = utf8_validate_cz(s);
1160 | 				
1161 | 				if (len == 0) {
1162 | 					/*
1163 | 					 * Handle invalid UTF-8 character gracefully in production
1164 | 					 * by writing a replacement character (U+FFFD)
1165 | 					 * and skipping a single byte.
1166 | 					 *
1167 | 					 * This should never happen when assertions are enabled
1168 | 					 * due to the assertion at the beginning of this function.
1169 | 					 */
1170 | 					assert(false);
1171 | 					if (escape_unicode) {
1172 | 						strcpy(b, "\\uFFFD");
1173 | 						b += 6;
1174 | 					} else {
1175 | 						*b++ = 0xEF;
1176 | 						*b++ = 0xBF;
1177 | 						*b++ = 0xBD;
1178 | 					}
1179 | 					s++;
1180 | 				} else if (c < 0x1F || (c >= 0x80 && escape_unicode)) {
1181 | 					/* Encode using \u.... */
1182 | 					uint32_t unicode;
1183 | 					
1184 | 					s += utf8_read_char(s, &unicode);
1185 | 					
1186 | 					if (unicode <= 0xFFFF) {
1187 | 						*b++ = '\\';
1188 | 						*b++ = 'u';
1189 | 						b += write_hex16(b, unicode);
1190 | 					} else {
1191 | 						/* Produce a surrogate pair. */
1192 | 						uint16_t uc, lc;
1193 | 						assert(unicode <= 0x10FFFF);
1194 | 						to_surrogate_pair(unicode, &uc, &lc);
1195 | 						*b++ = '\\';
1196 | 						*b++ = 'u';
1197 | 						b += write_hex16(b, uc);
1198 | 						*b++ = '\\';
1199 | 						*b++ = 'u';
1200 | 						b += write_hex16(b, lc);
1201 | 					}
1202 | 				} else {
1203 | 					/* Write the character directly. */
1204 | 					while (len--)
1205 | 						*b++ = *s++;
1206 | 				}
1207 | 				
1208 | 				break;
1209 | 			}
1210 | 		}
1211 | 	
1212 | 		/*
1213 | 		 * Update *out to know about the new bytes,
1214 | 		 * and set up b to write another encoded character.
1215 | 		 */
1216 | 		out->cur = b;
1217 | 		sb_need(out, 14);
1218 | 		b = out->cur;
1219 | 	}
1220 | 	*b++ = '"';
1221 | 	
1222 | 	out->cur = b;
1223 | }
1224 | 
1225 | static void emit_number(SB *out, double num)
1226 | {
1227 | 	/*
1228 | 	 * This isn't exactly how JavaScript renders numbers,
1229 | 	 * but it should produce valid JSON for reasonable numbers
1230 | 	 * preserve precision well enough, and avoid some oddities
1231 | 	 * like 0.3 -> 0.299999999999999988898 .
1232 | 	 */
1233 | 	char buf[64];
1234 | 	sprintf(buf, "%.16g", num);
1235 | 	
1236 | 	if (number_is_valid(buf))
1237 | 		sb_puts(out, buf);
1238 | 	else
1239 | 		sb_puts(out, "null");
1240 | }
1241 | 
1242 | static bool tag_is_valid(unsigned int tag)
1243 | {
1244 | 	return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT);
1245 | }
1246 | 
1247 | static bool number_is_valid(const char *num)
1248 | {
1249 | 	return (parse_number(&num, NULL) && *num == '\0');
1250 | }
1251 | 
1252 | static bool expect_literal(const char **sp, const char *str)
1253 | {
1254 | 	const char *s = *sp;
1255 | 	
1256 | 	while (*str != '\0')
1257 | 		if (*s++ != *str++)
1258 | 			return false;
1259 | 	
1260 | 	*sp = s;
1261 | 	return true;
1262 | }
1263 | 
1264 | /*
1265 |  * Parses exactly 4 hex characters (capital or lowercase).
1266 |  * Fails if any input chars are not [0-9A-Fa-f].
1267 |  */
1268 | static bool parse_hex16(const char **sp, uint16_t *out)
1269 | {
1270 | 	const char *s = *sp;
1271 | 	uint16_t ret = 0;
1272 | 	uint16_t i;
1273 | 	uint16_t tmp;
1274 | 	char c;
1275 | 
1276 | 	for (i = 0; i < 4; i++) {
1277 | 		c = *s++;
1278 | 		if (c >= '0' && c <= '9')
1279 | 			tmp = c - '0';
1280 | 		else if (c >= 'A' && c <= 'F')
1281 | 			tmp = c - 'A' + 10;
1282 | 		else if (c >= 'a' && c <= 'f')
1283 | 			tmp = c - 'a' + 10;
1284 | 		else
1285 | 			return false;
1286 | 
1287 | 		ret <<= 4;
1288 | 		ret += tmp;
1289 | 	}
1290 | 	
1291 | 	if (out)
1292 | 		*out = ret;
1293 | 	*sp = s;
1294 | 	return true;
1295 | }
1296 | 
1297 | /*
1298 |  * Encodes a 16-bit number into hexadecimal,
1299 |  * writing exactly 4 hex chars.
1300 |  */
1301 | static int write_hex16(char *out, uint16_t val)
1302 | {
1303 | 	const char *hex = "0123456789ABCDEF";
1304 | 	
1305 | 	*out++ = hex[(val >> 12) & 0xF];
1306 | 	*out++ = hex[(val >> 8)  & 0xF];
1307 | 	*out++ = hex[(val >> 4)  & 0xF];
1308 | 	*out++ = hex[ val        & 0xF];
1309 | 	
1310 | 	return 4;
1311 | }
1312 | 
1313 | bool json_check(const JsonNode *node, char errmsg[256])
1314 | {
1315 | 	#define problem(...) do { \
1316 | 			if (errmsg != NULL) \
1317 | 				snprintf(errmsg, 256, __VA_ARGS__); \
1318 | 			return false; \
1319 | 		} while (0)
1320 | 	
1321 | 	if (node->key != NULL && !utf8_validate(node->key))
1322 | 		problem("key contains invalid UTF-8");
1323 | 	
1324 | 	if (!tag_is_valid(node->tag))
1325 | 		problem("tag is invalid (%u)", node->tag);
1326 | 	
1327 | 	if (node->tag == JSON_BOOL) {
1328 | 		if (node->bool_ != false && node->bool_ != true)
1329 | 			problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true);
1330 | 	} else if (node->tag == JSON_STRING) {
1331 | 		if (node->string_ == NULL)
1332 | 			problem("string_ is NULL");
1333 | 		if (!utf8_validate(node->string_))
1334 | 			problem("string_ contains invalid UTF-8");
1335 | 	} else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) {
1336 | 		JsonNode *head = node->children.head;
1337 | 		JsonNode *tail = node->children.tail;
1338 | 		
1339 | 		if (head == NULL || tail == NULL) {
1340 | 			if (head != NULL)
1341 | 				problem("tail is NULL, but head is not");
1342 | 			if (tail != NULL)
1343 | 				problem("head is NULL, but tail is not");
1344 | 		} else {
1345 | 			JsonNode *child;
1346 | 			JsonNode *last = NULL;
1347 | 			
1348 | 			if (head->prev != NULL)
1349 | 				problem("First child's prev pointer is not NULL");
1350 | 			
1351 | 			for (child = head; child != NULL; last = child, child = child->next) {
1352 | 				if (child == node)
1353 | 					problem("node is its own child");
1354 | 				if (child->next == child)
1355 | 					problem("child->next == child (cycle)");
1356 | 				if (child->next == head)
1357 | 					problem("child->next == head (cycle)");
1358 | 				
1359 | 				if (child->parent != node)
1360 | 					problem("child does not point back to parent");
1361 | 				if (child->next != NULL && child->next->prev != child)
1362 | 					problem("child->next does not point back to child");
1363 | 				
1364 | 				if (node->tag == JSON_ARRAY && child->key != NULL)
1365 | 					problem("Array element's key is not NULL");
1366 | 				if (node->tag == JSON_OBJECT && child->key == NULL)
1367 | 					problem("Object member's key is NULL");
1368 | 				
1369 | 				if (!json_check(child, errmsg))
1370 | 					return false;
1371 | 			}
1372 | 			
1373 | 			if (last != tail)
1374 | 				problem("tail does not match pointer found by starting at head and following next links");
1375 | 		}
1376 | 	}
1377 | 	
1378 | 	return true;
1379 | 	
1380 | 	#undef problem
1381 | }
1382 | 


--------------------------------------------------------------------------------
/json.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
  3 |   All rights reserved.
  4 | 
  5 |   Permission is hereby granted, free of charge, to any person obtaining a copy
  6 |   of this software and associated documentation files (the "Software"), to deal
  7 |   in the Software without restriction, including without limitation the rights
  8 |   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 |   copies of the Software, and to permit persons to whom the Software is
 10 |   furnished to do so, subject to the following conditions:
 11 | 
 12 |   The above copyright notice and this permission notice shall be included in
 13 |   all copies or substantial portions of the Software.
 14 | 
 15 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 |   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 |   THE SOFTWARE.
 22 | */
 23 | 
 24 | #ifndef CCAN_JSON_H
 25 | #define CCAN_JSON_H
 26 | 
 27 | #include <stdbool.h>
 28 | #include <stddef.h>
 29 | 
 30 | typedef enum {
 31 | 	JSON_NULL,
 32 | 	JSON_BOOL,
 33 | 	JSON_STRING,
 34 | 	JSON_NUMBER,
 35 | 	JSON_ARRAY,
 36 | 	JSON_OBJECT,
 37 | } JsonTag;
 38 | 
 39 | typedef struct JsonNode JsonNode;
 40 | 
 41 | struct JsonNode
 42 | {
 43 | 	/* only if parent is an object or array (NULL otherwise) */
 44 | 	JsonNode *parent;
 45 | 	JsonNode *prev, *next;
 46 | 	
 47 | 	/* only if parent is an object (NULL otherwise) */
 48 | 	char *key; /* Must be valid UTF-8. */
 49 | 	
 50 | 	JsonTag tag;
 51 | 	union {
 52 | 		/* JSON_BOOL */
 53 | 		bool bool_;
 54 | 		
 55 | 		/* JSON_STRING */
 56 | 		char *string_; /* Must be valid UTF-8. */
 57 | 		
 58 | 		/* JSON_NUMBER */
 59 | 		double number_;
 60 | 		
 61 | 		/* JSON_ARRAY */
 62 | 		/* JSON_OBJECT */
 63 | 		struct {
 64 | 			JsonNode *head, *tail;
 65 | 		} children;
 66 | 	};
 67 | };
 68 | 
 69 | /*** Encoding, decoding, and validation ***/
 70 | 
 71 | JsonNode   *json_decode         (const char *json);
 72 | char       *json_encode         (const JsonNode *node);
 73 | char       *json_encode_string  (const char *str);
 74 | char       *json_stringify      (const JsonNode *node, const char *space);
 75 | void        json_delete         (JsonNode *node);
 76 | 
 77 | bool        json_validate       (const char *json);
 78 | 
 79 | /*** Lookup and traversal ***/
 80 | 
 81 | JsonNode   *json_find_element   (JsonNode *array, int index);
 82 | JsonNode   *json_find_member    (JsonNode *object, const char *key);
 83 | 
 84 | JsonNode   *json_first_child    (const JsonNode *node);
 85 | 
 86 | #define json_foreach(i, object_or_array)            \
 87 | 	for ((i) = json_first_child(object_or_array);   \
 88 | 		 (i) != NULL;                               \
 89 | 		 (i) = (i)->next)
 90 | 
 91 | /*** Construction and manipulation ***/
 92 | 
 93 | JsonNode *json_mknull(void);
 94 | JsonNode *json_mkbool(bool b);
 95 | JsonNode *json_mkstring(const char *s);
 96 | JsonNode *json_mknumber(double n);
 97 | JsonNode *json_mkarray(void);
 98 | JsonNode *json_mkobject(void);
 99 | 
100 | void json_append_element(JsonNode *array, JsonNode *element);
101 | void json_prepend_element(JsonNode *array, JsonNode *element);
102 | void json_append_member(JsonNode *object, const char *key, JsonNode *value);
103 | void json_prepend_member(JsonNode *object, const char *key, JsonNode *value);
104 | 
105 | void json_remove_from_parent(JsonNode *node);
106 | 
107 | /*** Debugging ***/
108 | 
109 | /*
110 |  * Look for structure and encoding problems in a JsonNode or its descendents.
111 |  *
112 |  * If a problem is detected, return false, writing a description of the problem
113 |  * to errmsg (unless errmsg is NULL).
114 |  */
115 | bool json_check(const JsonNode *node, char errmsg[256]);
116 | 
117 | #endif
118 | 


--------------------------------------------------------------------------------
/mqttcollect.1:
--------------------------------------------------------------------------------
  1 | .TH "MQTTCOLLECT" "1" "May 13, 2015" "User Manuals" ""
  2 | .SH NAME
  3 | .PP
  4 | mqttcollect \- MQTT\-based Exec\-plugin for collectd
  5 | .SH SYNOPSIS
  6 | .PP
  7 | mqttcollect [\-v ] [\-f \f[I]file\f[]]
  8 | .SH DESCRIPTION
  9 | .PP
 10 | \f[I]mqttcollect\f[] is an executable program which is used with
 11 | collectd(1).
 12 | It subscribes to any number of MQTT topics you specify, and prints
 13 | values to stdout for collectd to process in an exec plugin block.
 14 | .IP
 15 | .nf
 16 | \f[C]
 17 | PUTVAL\ tiggr/mqtt‐sys/gauge‐clients.inactive\ 1430914033:0.00
 18 | \f[]
 19 | .fi
 20 | .PP
 21 | \f[I]collectd\f[] launches \f[I]mqttcollect\f[] which connects to the
 22 | configured MQTT broker, subscribes and waits for publishes to subscribed
 23 | topics in an endless loop.
 24 | If an error occurs or the program exits for whichever reason,
 25 | \f[I]collectd\f[] will restart and log the reason in its log file.
 26 | .PP
 27 | \f[I]mqttcollect\f[] supports TLS connections to the MQTT broker,
 28 | username/password authentication, and TLS\-PSK, all configured via its
 29 | configuration file.
 30 | .SH OPTIONS
 31 | .PP
 32 | \f[I]mqttcollect\f[] understands the following options.
 33 | .TP
 34 | .B \-f \f[I]file\f[]
 35 | Specify an ini\-type configuration file (see below), which defaults to
 36 | \f[C]/usr/local/etc/mqttcollect.ini\f[].
 37 | .RS
 38 | .RE
 39 | .TP
 40 | .B \-v
 41 | Verbose.
 42 | .RS
 43 | .RE
 44 | .SH CONFIGURATION
 45 | .PP
 46 | \f[I]mqttcollect\f[] requires a configuration file to operate.
 47 | This ini\-type file must have a \f[C][defaults]\f[] section in which
 48 | general program parameters are configured, and it will have any number
 49 | of additional sections specifying the MQTT topics it is to subscribe to.
 50 | For the defaults section, please consult the example file provided with
 51 | the source code for a list of allowed settings.
 52 | .PP
 53 | Within a \f[I]topic\f[] section, metrics collected by \f[I]collectd\f[]
 54 | are specified.
 55 | .IP
 56 | .nf
 57 | \f[C]
 58 | [defaults]
 59 | host\ =\ localhost
 60 | port\ =\ 1883
 61 | 
 62 | ;\ (1)\ subscribe\ to\ a\ wildcard\ and\ produce\ three\ metrics\ per\ subscription.
 63 | ;\ the\ metric\ names\ are\ interpolated\ with\ `tid\[aq]\ from\ the\ JSON\ message
 64 | ;\ payload,\ and\ the\ values\ of\ each\ metric\ are\ obtained\ from\ the
 65 | ;\ JSON\ element\ behind\ the\ `<\[aq]
 66 | 
 67 | [owntracks/+/+]
 68 | gauge\ =\ vehicle/{tid}/speed<vel
 69 | gauge\ =\ vehicle/{tid}/altitude<alt
 70 | counter\ =\ vehicle/{tid}/odometer<trip
 71 | 
 72 | ;\ (2)\ subscribe\ to\ one\ topic\ and\ rename\ the\ metric
 73 | 
 74 | [$SYS/broker/clients/inactive]
 75 | gauge\ =\ clients.inactive
 76 | 
 77 | ;\ (3)\ subscribe\ to\ one\ topic\ and\ KEEP\ its\ name
 78 | ;\ use\ the\ "*"\-form\ for\ wildcarded\ topics
 79 | 
 80 | [$SYS/broker/load/messages/received/1min]
 81 | gauge\ =\ *
 82 | \f[]
 83 | .fi
 84 | .PP
 85 | Example \f[C]1\f[] is complex.
 86 | \f[I]mqttcollect\f[] will subscribe to the wildcarded
 87 | \f[C]owntracks/+/+\f[] topic, and for each message received on that
 88 | topic, will produce three metrics.
 89 | The special character \f[C]<\f[] in the line indicates the MQTT message
 90 | payload is expected to be JSON.
 91 | Each of the metric names will have the JSON element \f[C]tid\f[] from
 92 | the payload interpolated into their names, and the actual value of the
 93 | metric will be obtained (\f[C]<\f[]) from the specified JSON element
 94 | (\f[C]vel\f[], \f[C]alt\f[], and \f[C]trip\f[] respectively).
 95 | Using this configuration, and assuming a payload with this JSON
 96 | .IP
 97 | .nf
 98 | \f[C]
 99 | {"tid":\ "BB",\ "vel":\ 62,\ "trip":\ 8246531,\ "alt":\ 48}
100 | \f[]
101 | .fi
102 | .PP
103 | \f[I]mqttcollect\f[] could produce the following three metrics for
104 | \f[I]collectd\f[]:
105 | .IP
106 | .nf
107 | \f[C]
108 | PUTVAL\ tiggr/mqttcollect/gauge\-vehicle/BB/speed\ 1431535440:62.00
109 | PUTVAL\ tiggr/mqttcollect/gauge\-vehicle/BB/altitude\ 1431535440:48.00
110 | PUTVAL\ tiggr/mqttcollect/counter\-vehicle/BB/odometer\ 1431535440:8246531.00
111 | \f[]
112 | .fi
113 | .PP
114 | In example \f[C]2\f[], the program will subscribe to a single topic, and
115 | will produce a metric renamed to \f[C]clients.inactive\f[].
116 | .IP
117 | .nf
118 | \f[C]
119 | PUTVAL\ tiggr/mqttcollect/gauge\-clients.inactive\ 1431535434:1.00
120 | \f[]
121 | .fi
122 | .PP
123 | Example \f[C]3\f[] subscribes to the single topic and does \f[I]not\f[]
124 | rename the metric (note the \f[C]*\f[]); this is what you\[aq]ll
125 | typically use for wildcarded topic subscribes.
126 | .IP
127 | .nf
128 | \f[C]
129 | PUTVAL\ tiggr/mqttcollect/gauge\-$SYS/broker/load/messages/received/1min\ 1431535557:61.47
130 | \f[]
131 | .fi
132 | .SH INFLUXDB
133 | .PP
134 | As an example, we show how to configure InfluxDB to accept values from
135 | \f[I]collectd\f[] via the latter's network plugin.
136 | Configure InfluxDB to launch the native \f[I]collectd\f[] input:
137 | .IP
138 | .nf
139 | \f[C]
140 | [input_plugins]
141 | 
142 | \ \ [input_plugins.collectd]
143 | \ \ enabled\ =\ true
144 | \ \ #\ address\ =\ "0.0.0.0"\ #\ defaults\ to\ bind‐address.
145 | \ \ port\ =\ 25826
146 | \ \ database\ =\ "collectd"
147 | \ \ #\ https://github.com/collectd/collectd/blob/master/src/types.db
148 | \ \ typesdb\ =\ "/usr/share/collectd/types.db"
149 | \f[]
150 | .fi
151 | .SH COLLECTD
152 | .PP
153 | Configure \f[I]collectd\f[] to send its metrics to InfluxDB via the
154 | network plugin which talks to InfluxDB.
155 | (Compare the port numbers here and above in InfluxDB.)
156 | .IP
157 | .nf
158 | \f[C]
159 | LoadPlugin\ network
160 | 
161 | <Plugin\ "network">
162 | \ \ \ #\ influxdb
163 | \ \ \ Server\ "127.0.0.1"\ "25826"
164 | </Plugin>
165 | \f[]
166 | .fi
167 | .PP
168 | Configure \f[I]collectd\f[] to load our executable \f[I]mqttcollect\f[]
169 | via its exec mechanism.
170 | Specify \f[I]mqttcollect\f[]\[aq]s options as individual strings in the
171 | \f[C]Exec\f[] invocation.
172 | .IP
173 | .nf
174 | \f[C]
175 | LoadPlugin\ exec
176 | 
177 | <Plugin\ exec>
178 | \ \ \ Exec\ "mosquitto:mosquitto"\ "/usr/bin/mqttcollect"\ "‐f"\ "/etc/my.ini"
179 | </Plugin>
180 | \f[]
181 | .fi
182 | .SH BUGS
183 | .PP
184 | Yes.
185 | .SH AVAILABILITY
186 | .PP
187 | <https://github.com/jpmens/mqttcollect>
188 | .SH CREDITS
189 | .IP \[bu] 2
190 | This program uses \f[I]libmosquitto\f[], a library provided by the
191 | Mosquitto project <http://mosquitto.org> as well as some of the
192 | excellent include files provided by
193 | <http://troydhanson.github.io/uthash>
194 | .SH INSTALLATION
195 | .IP \[bu] 2
196 | Obtain the source code for \f[I]mqttcollect\f[], adjust the
197 | \f[C]Makefile\f[] and run \f[C]make\f[].
198 | .SH SEE ALSO
199 | .IP \[bu] 2
200 | \f[C]collectd\f[](1).
201 | .IP \[bu] 2
202 | <https://github.com/jpmens/mqttwarn>
203 | .SH AUTHOR
204 | .PP
205 | Jan\-Piet Mens <http://jpmens.net>
206 | .SH AUTHORS
207 | Jan\-Piet Mens.
208 | 


--------------------------------------------------------------------------------
/mqttcollect.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2015 Jan-Piet Mens <jpmens()gmail.com>
  3 |  * All rights reserved.
  4 |  * 
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  * 
  8 |  * 1. Redistributions of source code must retain the above copyright notice,
  9 |  *    this list of conditions and the following disclaimer.
 10 |  * 2. Redistributions in binary form must reproduce the above copyright
 11 |  *    notice, this list of conditions and the following disclaimer in the
 12 |  *    documentation and/or other materials provided with the distribution.
 13 |  * 3. Neither the name of mosquitto nor the names of its
 14 |  *    contributors may be used to endorse or promote products derived from
 15 |  *    this software without specific prior written permission.
 16 |  * 
 17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 18 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 20 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 21 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 22 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 23 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 24 |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 25 |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 26 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 27 |  * POSSIBILITY OF SUCH DAMAGE.
 28 |  */
 29 | 
 30 | #include <stdio.h>
 31 | #include <stdlib.h>
 32 | #include <unistd.h>
 33 | #include <time.h>
 34 | #include <string.h>
 35 | #include <getopt.h>
 36 | #include <sys/utsname.h>
 37 | #include <signal.h>
 38 | #include <mosquitto.h>
 39 | #include <errno.h>
 40 | #include <ctype.h>
 41 | #include "uthash.h"
 42 | #include "utstring.h"
 43 | #include "json.h"
 44 | #include "ini.h"	/* https://github.com/benhoyt/inih */
 45 | 
 46 | #ifndef TRUE
 47 | # define TRUE (1)
 48 | #endif
 49 | #ifndef FALSE
 50 | # define FALSE (0)
 51 | #endif
 52 | 
 53 | #define SSL_VERIFY_PEER	(1)
 54 | #define SSL_VERIFY_NONE	(0)
 55 | 
 56 | #define PROGNAME	"mqttcollect"
 57 | #define CONFIGFILE	"/usr/local/etc/mqttcollect.ini"
 58 | #define SECTION  	"defaults"
 59 | 
 60 | typedef struct {
 61 |     const char *host;
 62 |     const char *nodename;		/* for collectd; defaults to short uname */
 63 |     int port;
 64 |     const char *username;
 65 |     const char *password;
 66 |     const char *psk_key;
 67 |     const char *psk_identity;
 68 |     const char *ca_file;
 69 |     const char *certfile;
 70 |     const char *keyfile;
 71 |     const char *progname;
 72 |     const char *prefix;
 73 | } config;
 74 | 
 75 | static config cf = {
 76 | 	.host		= "localhost",
 77 | 	.port		= 1883,
 78 | 	.progname	= PROGNAME
 79 | };
 80 | 
 81 | /*
 82 |  * A hash of metrics with their name (metric), type (e.g. gauge) and
 83 |  * optional JSON element.
 84 |  */
 85 | 
 86 | struct metrics_h {
 87 | 	const char *metric;
 88 | 	const char *type;
 89 | 	const char *element;	/* If NULL, not JSON */
 90 | 	UT_hash_handle hh;
 91 | };
 92 | 
 93 | struct topics_h {
 94 |     const char *topic;          /* MQTT topic */
 95 |     struct metrics_h *mh;	/* hash of metrics to produce per topic */
 96 |     UT_hash_handle hh;
 97 | };
 98 | static struct topics_h *topics_h = NULL;
 99 | 
100 | static int verbose = FALSE;
101 | 
102 | 
103 | #define _eq(n) (strcmp(key, n) == 0)
104 | static int handler(void *cf, const char *section, const char *key, const char *val)
105 | {
106 | 	config *c = (config *)cf;
107 | 	struct topics_h *th;
108 | 	struct metrics_h *mh;
109 | 	static UT_string *elem, *metric;
110 | 	char *p;
111 | 
112 | 	utstring_renew(elem);
113 | 	utstring_renew(metric);
114 | 
115 | 	// printf("section=%s  >%s<-->%s\n", section, key, val);
116 | 
117 | 	if (!strcmp(section, SECTION)) {
118 | 
119 | 		if (_eq("host"))
120 | 			c->host = strdup(val);
121 | 		if (_eq("username"))
122 | 			c->username = strdup(val);
123 | 		if (_eq("password"))
124 | 			c->password = strdup(val);
125 | 		if (_eq("psk_key"))
126 | 			c->psk_key = strdup(val);
127 | 		if (_eq("psk_identity"))
128 | 			c->psk_identity = strdup(val);
129 | 		if (_eq("ca_file"))
130 | 			c->ca_file = strdup(val);
131 | 		if (_eq("certfile"))
132 | 			c->certfile = strdup(val);
133 | 		if (_eq("keyfile"))
134 | 			c->keyfile = strdup(val);
135 | 		if (_eq("nodename"))
136 | 			c->nodename = strdup(val);
137 | 		if (_eq("progname"))
138 | 			c->progname = strdup(val);
139 | 		if (_eq("prefix"))
140 | 			c->prefix = strdup(val);
141 | 
142 | 		if (_eq("port"))
143 | 			c->port = atoi(val);
144 | 
145 | 		return (1);
146 | 	}
147 | 
148 | 	/*
149 | 	 * The Section name is MQTT topic. If we've not yet seen this, add
150 | 	 * it to the hash, otherwise, push the new metric into the it's
151 | 	 * array.
152 | 	 * The entry's key is the metric type (gauge, counter)
153 | 	 *
154 | 	 *  [owntracks/gw/+]	<-- section => topic
155 | 	 *  gauge = cars/{tid}/speed@vel
156 | 	 *
157 | 	 *     key: "gauge"
158 | 	 *     val: "cars/{tid}/speed@vel"
159 | 	 *           ^^^^^^^^^^^^^^^^ ^^^
160 | 	 *              metric        elem
161 | 	 *
162 | 	 *  [$SYS/broker/uptime]
163 | 	 *  counter = *
164 | 	 */
165 | 
166 | 	if ((p = strchr(val, '<')) != NULL) {		/* "<vel" */
167 | 		utstring_printf(elem, "%s", p + 1);	/* "vel"  */
168 | 		*p = 0;
169 | 		utstring_printf(metric, "%s", val);
170 | 	} else {
171 | 		utstring_clear(elem);
172 | 
173 | 		if (strcmp(val, "*") == 0) {		/* copy section/topic name to metric */
174 | 			utstring_printf(metric, "%s", section);
175 | 			utstring_printf(elem, "*");	/* indicate wildcard */
176 | 		} else {
177 | 			utstring_printf(metric, "%s", val);
178 | 		}
179 | 	}
180 | 
181 | 	HASH_FIND_STR(topics_h, section, th);
182 | 	if (!th) {
183 | 		th = (struct topics_h *)malloc(sizeof(struct topics_h));
184 | 		th->topic = strdup(section);
185 | 
186 | 		HASH_ADD_KEYPTR( hh, topics_h, th->topic, strlen(th->topic), th );
187 | 
188 | 		/* experiment: add to metric_h with this hash */
189 | 
190 | 		th->mh = NULL;
191 | 		mh = (struct metrics_h *)malloc(sizeof(struct metrics_h));
192 | 		mh->metric = strdup(utstring_body(metric));
193 | 		mh->type = strdup(key);
194 | 		mh->element = utstring_len(elem) ? strdup(utstring_body(elem)) : NULL;
195 | 		HASH_ADD_KEYPTR( hh, th->mh, mh->metric, strlen(mh->metric), mh );
196 | 
197 | 
198 | 	} else {
199 | 		HASH_FIND_STR(th->mh, val, mh);
200 | 		if (mh) {
201 | 			puts("PANIC!!!");
202 | 		} else {
203 | 			mh = (struct metrics_h *)malloc(sizeof(struct metrics_h));
204 | 			mh->metric = strdup(utstring_body(metric));
205 | 			mh->type = strdup(key);
206 | 			mh->element = utstring_len(elem) ? strdup(utstring_body(elem)) : NULL;
207 | 			HASH_ADD_KEYPTR( hh, th->mh, mh->metric, strlen(mh->metric), mh );
208 | 		}
209 | 
210 | 	}
211 | 
212 | 	return (1);
213 | }
214 | 
215 | 
216 | static struct mosquitto *m = NULL;
217 | 
218 | /*
219 |  * User data for Mosquitto
220 |  */
221 | 
222 | struct udata {
223 | 	char *nodename;
224 | 	struct topics_h *topics_h;
225 | };
226 | 
227 | void catcher(int sig)
228 | {
229 | 	fprintf(stderr, "Going down on signal %d\n", sig);
230 | 
231 | 	if (m) {
232 | 		mosquitto_disconnect(m);
233 | 		mosquitto_loop_stop(m, false);
234 | 		mosquitto_lib_cleanup();
235 | 	}
236 | 	exit(1);
237 | }
238 | 
239 | void fatal(void)
240 | {
241 | 	if (m) {
242 | 		mosquitto_disconnect(m);
243 | 		mosquitto_loop_stop(m, false);
244 | 		mosquitto_lib_cleanup();
245 | 	}
246 | 	exit(1);
247 | }
248 | 
249 | double json_object(JsonNode *json, const char *element)
250 | {
251 | 	JsonNode *m;
252 | 	double value = 0.0L;
253 | 
254 | 	if ((m = json_find_member(json, element)) == NULL)
255 | 		return (value);
256 | 
257 | 	if (m && m->tag == JSON_STRING) {
258 | 		value = atof(m->string_);
259 | 	} else if (m && m->tag == JSON_NUMBER) {
260 | 		value = m->number_;
261 | 	}
262 | 
263 | 	return (value);
264 | }
265 | 
266 | /*
267 |  * Expand the content of `line', which may have one or more {token}
268 |  * in it into `res', using the decoded JSON at `json'.
269 |  */
270 | 
271 | void xexpand(UT_string *res, const char *line, JsonNode *json)
272 | {
273 |     JsonNode *m;
274 |     static UT_string *token;
275 |     const char *lp = line;
276 | 
277 |     utstring_renew(token);
278 | 
279 |     for (lp = line; lp && *lp; lp++ ) {
280 |         if (*lp == '\\') {
281 |             utstring_printf(res, "%c", *++lp);
282 |             continue;
283 |         }
284 |         if (*lp != '{') {
285 |             utstring_printf(res, "%c", *lp);
286 |             continue;
287 |         }
288 | 
289 |         utstring_renew(token);
290 |         if (*++lp == '}') { /* skip over this { */
291 |             /* Empty token; push back */
292 |             utstring_printf(res, "%c", *lp);
293 |             continue;
294 |         }
295 | 
296 |         do {
297 |             utstring_printf(token, "%c", *lp++);
298 |         } while (*lp && *lp != '}');
299 | 	// printf("TOKEN=[%s]\n", utstring_body(token));
300 | 
301 |         // printf("LAST=%d\n", *lp);
302 |         if (*lp != '}') {
303 |             /* Push back, incl leading brace */
304 |             utstring_printf(res, "{%s", utstring_body(token));
305 |             break;
306 |         }
307 | 
308 | 
309 | 	/* See if `token' is a JSON element, and if so, interpolate
310 | 	 * its value. If token is not in JSON, stuff it back to
311 | 	 * indicate the error.
312 | 	 */
313 | 
314 | 	if ((m = json_find_member(json, utstring_body(token))) != NULL) {
315 | 		if (m && m->tag == JSON_STRING) {
316 | 			utstring_printf(res, "%s", m->string_);
317 | 		} else if (m && m->tag == JSON_NUMBER) {
318 | 			utstring_printf(res, "%lf", m->number_);
319 | 		} else {
320 | 			utstring_printf(res, "FIXME-JSON");
321 | 		}
322 | 	} else {
323 |             /* stuff token and its braces back into result */
324 |             utstring_printf(res, "{%s}", utstring_body(token));
325 |         }
326 |     }
327 | }
328 | 
329 | 
330 | 
331 | void cb_sub(struct mosquitto *mosq, void *userdata, const struct mosquitto_message *msg)
332 | {
333 | 	char *topic = msg->topic;
334 | 	char *payload = msg->payload;
335 | 	static UT_string *pfix;
336 | 	struct udata *ud = (struct udata *)userdata;
337 | 	time_t now;
338 | 	struct topics_h *th, *currth = NULL;
339 | 	bool bf;
340 | 	struct metrics_h *mh;
341 | 
342 | 
343 | 	/*
344 | 	 * We can't try to find topic in our hash, because this may be the
345 | 	 * result of a wildcard subscription. Instead, see if one of the
346 | 	 * topics in hash matches the subscription. Slower, but I can't
347 | 	 * help that.
348 | 	 */
349 | 
350 | 	for (th = topics_h; th != NULL; th = th->hh.next) {
351 | 		if (mosquitto_topic_matches_sub(th->topic, topic, &bf) == MOSQ_ERR_SUCCESS) {
352 | 			if (bf == 1) {
353 | 				currth = th;
354 | 				break;
355 | 			}
356 | 		}
357 | 	}
358 | 
359 | 	if (currth == NULL) {
360 | 		puts("HUH? PANIC? topic not found");
361 | 		return;
362 | 	}
363 | 
364 | 	time(&now);
365 | 
366 | 	utstring_renew(pfix);
367 | 	utstring_printf(pfix, "%s", cf.progname);
368 | 	if (cf.prefix && *cf.prefix) {
369 | 		utstring_printf(pfix, "-%s", cf.prefix);
370 | 	}
371 | 
372 | 	/*
373 | 	 * For each of the metrics configured for this subscription, do the
374 | 	 * "needful".
375 | 	 * If `element' in metric is NULL, use the original payload; otherwise
376 | 	 * it's the name of a JSON element in the (assumed) JSON payload.
377 | 	 */
378 | 
379 | 	for (mh = currth->mh; mh != NULL; mh = mh->hh.next) {
380 | 		JsonNode *json;
381 | 		double number = -1.0L;
382 | 		static UT_string *metric_name;
383 | 
384 | 		utstring_renew(metric_name);
385 | 
386 | 		if (verbose)
387 | 			fprintf(stderr, "     =====[ %s ] (%s) %s\n", mh->metric, mh->type, mh->element);
388 | 
389 | 		if (mh->element && strcmp(mh->element, "*") != 0) {	/* JSON */
390 | 			if ((json = json_decode(payload)) == NULL) {
391 | 				continue;
392 | 			}
393 | 			utstring_clear(metric_name);
394 | 			xexpand(metric_name, mh->metric, json);
395 | 
396 | 			number = json_object(json, mh->element);
397 | 
398 | 			json_delete(json);
399 | 
400 | 		} else if (mh->element) {
401 | 			if (strcmp(mh->element, "*") == 0) {
402 | 				utstring_printf(metric_name, "%s", topic);
403 | 			} else {
404 | 				utstring_printf(metric_name, "%s", mh->metric);
405 | 			}
406 | 			number = atof(payload);
407 | 		} else {
408 | 			utstring_printf(metric_name, "%s", mh->metric);
409 | 			number = atof(payload);
410 | 		}
411 | 
412 | 		printf("PUTVAL %s/%s/%s-%s %ld:%.2lf\n",
413 | 			ud->nodename,
414 | 			utstring_body(pfix),
415 | 			mh->type,
416 | 			utstring_body(metric_name),
417 | 			now,
418 | 			number);
419 | 	}
420 | }
421 | 
422 | void cb_connect(struct mosquitto *mosq, void *userdata, int rc)
423 | {
424 | 	struct udata *ud = (struct udata *)userdata;
425 | 	struct topics_h *th;
426 | 
427 | 	/*
428 | 	 * Set up an MQTT subscription for each of the topics we have
429 | 	 * in the topics hash.
430 | 	 */
431 | 
432 | 	for (th = ud->topics_h; th != NULL; th = th->hh.next) {
433 | 		// fprintf(stderr, "%s: subscribe to %s\n", PROGNAME, th->topic);
434 | 		mosquitto_subscribe(m, NULL, th->topic, 0);
435 | 	}
436 | }
437 | 
438 | void cb_disconnect(struct mosquitto *mosq, void *userdata, int rc)
439 | {
440 | 	char *explain = NULL;
441 | 
442 | 	if (rc == 0) {
443 | 		// Disconnect requested by client
444 | 	} else {
445 | 		switch (rc) {
446 | 			case 7: explain = "Broker disconnected. Reconnecting.."; break;
447 | 		}
448 | 
449 | 		if (explain) {
450 | 			fprintf(stderr, "%s: disconnected: reason: %d (%s) [%s]\n",
451 | 				PROGNAME, rc, strerror(errno), explain);
452 | 			return;
453 | 		}
454 | 
455 | 		fprintf(stderr, "%s: disconnected: reason: %d (%s)\n",
456 | 			PROGNAME, rc, strerror(errno));
457 | 		fatal();
458 | 	}
459 | }
460 | 
461 | int main(int argc, char **argv)
462 | {
463 | 	char *progname = *argv;
464 | 	int ch, usage = 0, rc;
465 | 	struct utsname uts;
466 | 	char clientid[80];
467 | 	int keepalive = 60;
468 | 	int tls_insecure = FALSE;
469 | 	struct udata udata;
470 | 	char *configfile = CONFIGFILE;
471 | 
472 | 	setvbuf(stdout, NULL, _IONBF, 0);
473 | 
474 | 	while ((ch = getopt(argc, argv, "vs:f:")) != EOF) {
475 | 		switch (ch) {
476 | 			case 'v':
477 | 				verbose = TRUE;
478 | 				break;
479 | 
480 | 			case 's':
481 | 				tls_insecure = TRUE;
482 | 				break;
483 | 			case 'f':
484 | 				configfile = strdup(optarg);
485 | 				break;
486 | 			default:
487 | 				usage = 1;
488 | 				break;
489 | 		}
490 | 	}
491 | 
492 | 
493 | 
494 | 	if (ini_parse(configfile, handler, &cf) < 0) {
495 | 		fprintf(stderr, "%s: Can't load '%s'\n", PROGNAME, configfile);
496 | 		return 1;
497 | 	}
498 | 
499 | 	if (usage) {
500 | 		fprintf(stderr, "Usage: %s [-v] [-s] [-f configfile]\n", progname);
501 | 		exit(1);
502 | 	}
503 | 
504 | 	/* Determine nodename: either use the -h value of the MQTT broker
505 | 	 * or get local nodename */
506 | 
507 | 	if (cf.nodename == NULL) {
508 | 		if (uname(&uts) == 0) {
509 | 			char *p;
510 | 			cf.nodename = strdup(uts.nodename);
511 | 
512 | 			if ((p = strchr(cf.nodename, '.')) != NULL)
513 | 				*p = 0;
514 | 		} else {
515 | 			cf.nodename = strdup("unknown");
516 | 		}
517 | 	}
518 | 
519 | 	mosquitto_lib_init();
520 | 
521 | 	udata.nodename = (char *)cf.nodename;
522 | 	udata.topics_h = topics_h;
523 | 
524 | 	sprintf(clientid, "%s-%d", PROGNAME, getpid());
525 | 	if ((m = mosquitto_new(clientid, TRUE, (void *)&udata)) == NULL) {
526 | 		fprintf(stderr, "Out of memory.\n");
527 | 		exit(1);
528 | 	}
529 | 
530 | 	if (cf.psk_key && cf.psk_identity) {
531 | 		rc = mosquitto_tls_psk_set(m, cf.psk_key, cf.psk_identity,NULL);
532 | 		if (rc != MOSQ_ERR_SUCCESS) {
533 | 			fprintf(stderr, "Cannot set TLS PSK: %s\n",
534 | 				mosquitto_strerror(rc));
535 | 			exit(3);
536 | 		}
537 | 	} else if (cf.ca_file) {
538 | 		rc = mosquitto_tls_set(m,
539 | 			cf.ca_file,		/* cafile */
540 | 			NULL,			/* capath */
541 | 			cf.certfile,		/* certfile */
542 | 			cf.keyfile,		/* keyfile */
543 | 			NULL			/* pw_callback() */
544 | 			);
545 | 		if (rc != MOSQ_ERR_SUCCESS) {
546 | 			fprintf(stderr, "Cannot set TLS CA: %s (check path names)\n",
547 | 				mosquitto_strerror(rc));
548 | 			exit(3);
549 | 		}
550 | 
551 | 		mosquitto_tls_opts_set(m,
552 | 			SSL_VERIFY_PEER,
553 | 			NULL,			/* tls_version: "tlsv1.2", "tlsv1" */
554 | 			NULL			/* ciphers */
555 | 			);
556 | 
557 | 		if (tls_insecure) {
558 | #if LIBMOSQUITTO_VERSION_NUMBER >= 1002000
559 | 			/* mosquitto_tls_insecure_set() requires libmosquitto 1.2. */
560 | 			mosquitto_tls_insecure_set(m, TRUE);
561 | #endif
562 | 		}
563 | 	}
564 | 
565 | 	if (cf.username) {
566 | 		mosquitto_username_pw_set(m, cf.username, cf.password);
567 | 	}
568 | 
569 | 	mosquitto_message_callback_set(m, cb_sub);
570 | 	mosquitto_connect_callback_set(m, cb_connect);
571 | 	mosquitto_disconnect_callback_set(m, cb_disconnect);
572 | 
573 | 	mosquitto_reconnect_delay_set(m,
574 | 		1,      /* delay */
575 | 		10,     /* delay_max */
576 | 		FALSE); /* exponential backoff */
577 | 
578 | 	if ((rc = mosquitto_connect(m, cf.host, cf.port, keepalive)) != MOSQ_ERR_SUCCESS) {
579 | 		fprintf(stderr, "Unable to connect to %s:%d: %s\n", cf.host, cf.port,
580 | 			mosquitto_strerror(rc));
581 | 		perror("");
582 | 		exit(2);
583 | 	}
584 | 
585 | 	signal(SIGINT, catcher);
586 | 
587 | 	while (1) {
588 | 		rc = mosquitto_loop_forever(m, -1, 1);
589 | 		fprintf(stderr, "loop_forever returns %d\n", rc);
590 | 	}
591 | 
592 | 	/* Unreached */
593 | 
594 | 	/*
595 | 	 * There's a tonne of memory we ought to free (topics_h, etc) but
596 | 	 * we don't get here, so nobody will notice...
597 | 	 */
598 | 
599 | 
600 | 	mosquitto_disconnect(m);
601 | 	mosquitto_lib_cleanup();
602 | 
603 | 	return 0;
604 | }
605 | 


--------------------------------------------------------------------------------
/mqttcollect.ini.example:
--------------------------------------------------------------------------------
 1 | ; Configuration for mqttcollect
 2 | ; Comments are introduced with ; also inline after white space
 3 | ; White space around '=' is stripped
 4 | 
 5 | [defaults]
 6 | host = localhost
 7 | port = 1883
 8 | ; username = jjolie
 9 | ; password = s1c#ret
10 | ; psk_key = 
11 | ; psk_identity = 
12 | ; ca_file = 
13 | ; certfile =
14 | ; keyfile =
15 | ; nodename = foob
16 | 
17 | ; progname is the program name (default "mqttcollect") used in PUTVAL
18 | ; prefix is optional (defaults to NULL) and can be used to
19 | ; differentiate multiple services of the same type.
20 | ; PUTVAL <nodename>/<progname>[-<prefix>]/<type>-<metricname>
21 | 
22 | ; progname = mqttcollect
23 | ; prefix   = PREFIX
24 | 
25 | ; Examples
26 | ; password = s1ckr3t		; a password
27 | ; password = s1c;k r3t          ; a password with a semicolon and a space in it
28 | ; password = s1c  r3t           ; a password with two spaces in it!
29 | ; password = s1c#ret		; a password with a hash symbol
30 | 
31 | ; subscribe to a wildcard and produce three metrics per subscription.
32 | ; the metric names are interpolated with `tid' from the JSON message
33 | ; payload, and the values of each metric are obtained from the
34 | ; JSON element behind the `<'
35 | 
36 | [owntracks/+/+]
37 | gauge = vehicle/{tid}/speed<vel
38 | gauge = vehicle/{tid}/altitude<alt
39 | counter = vehicle/{tid}/odometer<trip
40 | 
41 | ; subscribe to one topic and rename the metric
42 | 
43 | [$SYS/broker/clients/inactive]
44 | gauge = clients.inactive
45 | 
46 | ; subscribe to one topic and KEEP its name
47 | 
48 | [$SYS/broker/load/messages/received/1min]
49 | gauge = *
50 | 
51 | ; Can be used with temperature-simulator.py
52 | 
53 | [arduino/temp/+]
54 | gauge = heat.{room}<celsius
55 | 


--------------------------------------------------------------------------------
/mqttcollect.pandoc:
--------------------------------------------------------------------------------
  1 | % MQTTCOLLECT(1) User Manuals
  2 | % Jan-Piet Mens
  3 | % May 13, 2015
  4 | 
  5 | # NAME
  6 | 
  7 | mqttcollect - MQTT-based Exec-plugin for collectd
  8 | 
  9 | # SYNOPSIS
 10 | 
 11 | mqttcollect [-v ] [-f _file_]
 12 | 
 13 | # DESCRIPTION
 14 | 
 15 | *mqttcollect* is an executable program which is used with collectd(1). It
 16 | subscribes to any number of MQTT topics you specify, and prints values to
 17 | stdout for collectd to process in an exec plugin block.
 18 | 
 19 | 	PUTVAL tiggr/mqtt‐sys/gauge‐clients.inactive 1430914033:0.00
 20 | 
 21 | _collectd_ launches *mqttcollect* which connects to the configured MQTT broker,
 22 | subscribes and waits for publishes to subscribed topics in an endless loop.
 23 | If an error occurs or the program exits for whichever reason, _collectd_
 24 | will restart and log the reason in its log file.
 25 | 
 26 | *mqttcollect* supports TLS connections to the MQTT broker, username/password
 27 | authentication, and TLS-PSK, all configured via its configuration file.
 28 | 
 29 | # OPTIONS
 30 | 
 31 | *mqttcollect* understands the following options. 
 32 | 
 33 | -f *file*
 34 | :   Specify an ini-type configuration file (see below), which defaults to `/usr/local/etc/mqttcollect.ini`.
 35 | 
 36 | -v
 37 | :   Verbose.
 38 | 
 39 | 
 40 | # CONFIGURATION
 41 | 
 42 | *mqttcollect* requires a configuration file to operate. This ini-type file
 43 | must have a `[defaults]` section in which general program parameters are
 44 | configured, and it will have any number of additional sections specifying
 45 | the MQTT topics it is to subscribe to. For the defaults section, please
 46 | consult the example file provided with the source code for a list of allowed
 47 | settings.
 48 | 
 49 | Within a _topic_ section, metrics collected by _collectd_ are specified.
 50 | 
 51 | 	[defaults]
 52 | 	host = localhost
 53 | 	port = 1883
 54 | 
 55 | 	; (1) subscribe to a wildcard and produce three metrics per subscription.
 56 | 	; the metric names are interpolated with `tid' from the JSON message
 57 | 	; payload, and the values of each metric are obtained from the
 58 | 	; JSON element behind the `<'
 59 | 
 60 | 	[owntracks/+/+]
 61 | 	gauge = vehicle/{tid}/speed<vel
 62 | 	gauge = vehicle/{tid}/altitude<alt
 63 | 	counter = vehicle/{tid}/odometer<trip
 64 | 
 65 | 	; (2) subscribe to one topic and rename the metric
 66 | 
 67 | 	[$SYS/broker/clients/inactive]
 68 | 	gauge = clients.inactive
 69 | 
 70 | 	; (3) subscribe to one topic and KEEP its name
 71 | 	; use the "*"-form for wildcarded topics
 72 | 
 73 | 	[$SYS/broker/load/messages/received/1min]
 74 | 	gauge = *
 75 | 
 76 | Example `1` is complex. *mqttcollect* will subscribe to the wildcarded `owntracks/+/+`
 77 | topic, and for each message received on that topic, will produce three metrics. The
 78 | special character `<` in the line indicates the MQTT message payload is expected to
 79 | be JSON. Each of the metric names will have the JSON element `tid` from the payload
 80 | interpolated into their names, and the actual value of the metric will be obtained (`<`)
 81 | from the specified JSON element (`vel`, `alt`, and `trip` respectively). Using this configuration,
 82 | and assuming a payload with this JSON
 83 | 
 84 | 	{"tid": "BB", "vel": 62, "trip": 8246531, "alt": 48}
 85 | 
 86 | *mqttcollect* could produce the following three metrics for _collectd_:
 87 | 
 88 | 	PUTVAL tiggr/mqttcollect/gauge-vehicle/BB/speed 1431535440:62.00
 89 | 	PUTVAL tiggr/mqttcollect/gauge-vehicle/BB/altitude 1431535440:48.00
 90 | 	PUTVAL tiggr/mqttcollect/counter-vehicle/BB/odometer 1431535440:8246531.00
 91 | 
 92 | In example `2`, the program will subscribe to a single topic, and will produce a
 93 | metric renamed to `clients.inactive`.
 94 | 
 95 | 	PUTVAL tiggr/mqttcollect/gauge-clients.inactive 1431535434:1.00
 96 | 
 97 | Example `3` subscribes to the single topic and does *not* rename the metric (note the
 98 | `*`); this is what you'll typically use for wildcarded topic subscribes.
 99 | 
100 | 	PUTVAL tiggr/mqttcollect/gauge-$SYS/broker/load/messages/received/1min 1431535557:61.47
101 | 
102 | # INFLUXDB
103 | 
104 | As an example, we show how to configure InfluxDB to accept values from
105 | _collectd_ via the latter’s network plugin. Configure InfluxDB to launch the
106 | native _collectd_ input:
107 | 
108 | 	[input_plugins]
109 | 
110 | 	  [input_plugins.collectd]
111 | 	  enabled = true
112 | 	  # address = "0.0.0.0" # defaults to bind‐address.
113 | 	  port = 25826
114 | 	  database = "collectd"
115 | 	  # https://github.com/collectd/collectd/blob/master/src/types.db
116 | 	  typesdb = "/usr/share/collectd/types.db"
117 | 
118 | # COLLECTD
119 | 
120 | Configure _collectd_ to send its metrics to InfluxDB via the network plugin
121 | which talks to InfluxDB. (Compare the port numbers here and above in
122 | InfluxDB.)
123 | 
124 | 	LoadPlugin network
125 | 
126 | 	<Plugin "network">
127 | 	   # influxdb
128 | 	   Server "127.0.0.1" "25826"
129 | 	</Plugin>
130 | 
131 | Configure _collectd_ to load our executable *mqttcollect* via its exec
132 | mechanism. Specify *mqttcollect*'s options as individual strings in the
133 | `Exec` invocation.
134 | 
135 | 	LoadPlugin exec
136 | 
137 | 	<Plugin exec>
138 | 	   Exec "mosquitto:mosquitto" "/usr/bin/mqttcollect" "‐f" "/etc/my.ini"
139 | 	</Plugin>
140 | 
141 | # BUGS
142 | 
143 | Yes.
144 | 
145 | # AVAILABILITY
146 | 
147 | <https://github.com/jpmens/mqttcollect>
148 | 
149 | # CREDITS
150 | 
151 | * This program uses *libmosquitto*, a library provided by the Mosquitto
152 |   project <http://mosquitto.org> as well as some of the excellent
153 |   include files provided by <http://troydhanson.github.io/uthash>
154 | 
155 | # INSTALLATION
156 | 
157 | * Obtain the source code for *mqttcollect*, adjust the `Makefile` and run `make`.
158 | 
159 | # SEE ALSO
160 | 
161 | * `collectd`(1).
162 | * <https://github.com/jpmens/mqttwarn>
163 | 
164 | # AUTHOR
165 | 
166 | Jan-Piet Mens <http://jpmens.net>
167 | 
168 | 


--------------------------------------------------------------------------------
/temperature-simulator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import paho.mqtt.publish as mqtt
 5 | import random
 6 | import json
 7 | import time
 8 | 
 9 | topic = 'arduino/temp/002'
10 | 
11 | while True:
12 |     celsius = float("%.2f" % (random.random() * 40))
13 | 
14 |     data = {
15 |         'room'       : 'kitchen',
16 |         'celsius'    : celsius,
17 |         'fahrenheit' : float("%.2f" % (9.0 / 5.0 * celsius + 32)),
18 |     }
19 | 
20 | 
21 |     payload = json.dumps(data)
22 | 
23 |     print topic, payload
24 |     mqtt.single(topic, payload, retain=True)
25 | 
26 |     time.sleep(1)
27 | 


--------------------------------------------------------------------------------
/uthash.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2003-2014, Troy D. Hanson     http://troydhanson.github.com/uthash/
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without
  6 | modification, are permitted provided that the following conditions are met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 | 
 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 22 | */
 23 | 
 24 | #ifndef UTHASH_H
 25 | #define UTHASH_H
 26 | 
 27 | #include <string.h>   /* memcmp,strlen */
 28 | #include <stddef.h>   /* ptrdiff_t */
 29 | #include <stdlib.h>   /* exit() */
 30 | 
 31 | /* These macros use decltype or the earlier __typeof GNU extension.
 32 |    As decltype is only available in newer compilers (VS2010 or gcc 4.3+
 33 |    when compiling c++ source) this code uses whatever method is needed
 34 |    or, for VS2008 where neither is available, uses casting workarounds. */
 35 | #if defined(_MSC_VER)   /* MS compiler */
 36 | #if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
 37 | #define DECLTYPE(x) (decltype(x))
 38 | #else                   /* VS2008 or older (or VS2010 in C mode) */
 39 | #define NO_DECLTYPE
 40 | #define DECLTYPE(x)
 41 | #endif
 42 | #elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__)
 43 | #define NO_DECLTYPE
 44 | #define DECLTYPE(x)
 45 | #else                   /* GNU, Sun and other compilers */
 46 | #define DECLTYPE(x) (__typeof(x))
 47 | #endif
 48 | 
 49 | #ifdef NO_DECLTYPE
 50 | #define DECLTYPE_ASSIGN(dst,src)                                                 \
 51 | do {                                                                             \
 52 |   char **_da_dst = (char**)(&(dst));                                             \
 53 |   *_da_dst = (char*)(src);                                                       \
 54 | } while(0)
 55 | #else
 56 | #define DECLTYPE_ASSIGN(dst,src)                                                 \
 57 | do {                                                                             \
 58 |   (dst) = DECLTYPE(dst)(src);                                                    \
 59 | } while(0)
 60 | #endif
 61 | 
 62 | /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
 63 | #if defined (_WIN32)
 64 | #if defined(_MSC_VER) && _MSC_VER >= 1600
 65 | #include <stdint.h>
 66 | #elif defined(__WATCOMC__)
 67 | #include <stdint.h>
 68 | #else
 69 | typedef unsigned int uint32_t;
 70 | typedef unsigned char uint8_t;
 71 | #endif
 72 | #else
 73 | #include <stdint.h>
 74 | #endif
 75 | 
 76 | #define UTHASH_VERSION 1.9.9
 77 | 
 78 | #ifndef uthash_fatal
 79 | #define uthash_fatal(msg) exit(-1)        /* fatal error (out of memory,etc) */
 80 | #endif
 81 | #ifndef uthash_malloc
 82 | #define uthash_malloc(sz) malloc(sz)      /* malloc fcn                      */
 83 | #endif
 84 | #ifndef uthash_free
 85 | #define uthash_free(ptr,sz) free(ptr)     /* free fcn                        */
 86 | #endif
 87 | 
 88 | #ifndef uthash_noexpand_fyi
 89 | #define uthash_noexpand_fyi(tbl)          /* can be defined to log noexpand  */
 90 | #endif
 91 | #ifndef uthash_expand_fyi
 92 | #define uthash_expand_fyi(tbl)            /* can be defined to log expands   */
 93 | #endif
 94 | 
 95 | /* initial number of buckets */
 96 | #define HASH_INITIAL_NUM_BUCKETS 32U     /* initial number of buckets        */
 97 | #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */
 98 | #define HASH_BKT_CAPACITY_THRESH 10U     /* expand when bucket count reaches */
 99 | 
100 | /* calculate the element whose hash handle address is hhe */
101 | #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
102 | 
103 | #define HASH_FIND(hh,head,keyptr,keylen,out)                                     \
104 | do {                                                                             \
105 |   out=NULL;                                                                      \
106 |   if (head != NULL) {                                                            \
107 |      unsigned _hf_bkt,_hf_hashv;                                                 \
108 |      HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt);   \
109 |      if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv) != 0) {                      \
110 |        HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ],  \
111 |                         keyptr,keylen,out);                                      \
112 |      }                                                                           \
113 |   }                                                                              \
114 | } while (0)
115 | 
116 | #ifdef HASH_BLOOM
117 | #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM)
118 | #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL)
119 | #define HASH_BLOOM_MAKE(tbl)                                                     \
120 | do {                                                                             \
121 |   (tbl)->bloom_nbits = HASH_BLOOM;                                               \
122 |   (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN);                 \
123 |   if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
124 |   memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
125 |   (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
126 | } while (0)
127 | 
128 | #define HASH_BLOOM_FREE(tbl)                                                     \
129 | do {                                                                             \
130 |   uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
131 | } while (0)
132 | 
133 | #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U)))
134 | #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U)))
135 | 
136 | #define HASH_BLOOM_ADD(tbl,hashv)                                                \
137 |   HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
138 | 
139 | #define HASH_BLOOM_TEST(tbl,hashv)                                               \
140 |   HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
141 | 
142 | #else
143 | #define HASH_BLOOM_MAKE(tbl)
144 | #define HASH_BLOOM_FREE(tbl)
145 | #define HASH_BLOOM_ADD(tbl,hashv)
146 | #define HASH_BLOOM_TEST(tbl,hashv) (1)
147 | #define HASH_BLOOM_BYTELEN 0U
148 | #endif
149 | 
150 | #define HASH_MAKE_TABLE(hh,head)                                                 \
151 | do {                                                                             \
152 |   (head)->hh.tbl = (UT_hash_table*)uthash_malloc(                                \
153 |                   sizeof(UT_hash_table));                                        \
154 |   if (!((head)->hh.tbl))  { uthash_fatal( "out of memory"); }                    \
155 |   memset((head)->hh.tbl, 0, sizeof(UT_hash_table));                              \
156 |   (head)->hh.tbl->tail = &((head)->hh);                                          \
157 |   (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;                        \
158 |   (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;              \
159 |   (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head);                    \
160 |   (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc(                      \
161 |           HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
162 |   if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); }             \
163 |   memset((head)->hh.tbl->buckets, 0,                                             \
164 |           HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
165 |   HASH_BLOOM_MAKE((head)->hh.tbl);                                               \
166 |   (head)->hh.tbl->signature = HASH_SIGNATURE;                                    \
167 | } while(0)
168 | 
169 | #define HASH_ADD(hh,head,fieldname,keylen_in,add)                                \
170 |         HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
171 | 
172 | #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced)                   \
173 | do {                                                                             \
174 |   replaced=NULL;                                                                 \
175 |   HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced);                     \
176 |   if (replaced!=NULL) {                                                          \
177 |      HASH_DELETE(hh,head,replaced);                                              \
178 |   }                                                                              \
179 |   HASH_ADD(hh,head,fieldname,keylen_in,add);                                     \
180 | } while(0)
181 | 
182 | #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
183 | do {                                                                             \
184 |  unsigned _ha_bkt;                                                               \
185 |  (add)->hh.next = NULL;                                                          \
186 |  (add)->hh.key = (char*)(keyptr);                                                \
187 |  (add)->hh.keylen = (unsigned)(keylen_in);                                       \
188 |  if (!(head)) {                                                                  \
189 |     head = (add);                                                                \
190 |     (head)->hh.prev = NULL;                                                      \
191 |     HASH_MAKE_TABLE(hh,head);                                                    \
192 |  } else {                                                                        \
193 |     (head)->hh.tbl->tail->next = (add);                                          \
194 |     (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);         \
195 |     (head)->hh.tbl->tail = &((add)->hh);                                         \
196 |  }                                                                               \
197 |  (head)->hh.tbl->num_items++;                                                    \
198 |  (add)->hh.tbl = (head)->hh.tbl;                                                 \
199 |  HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets,                         \
200 |          (add)->hh.hashv, _ha_bkt);                                              \
201 |  HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh);                   \
202 |  HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv);                                 \
203 |  HASH_EMIT_KEY(hh,head,keyptr,keylen_in);                                        \
204 |  HASH_FSCK(hh,head);                                                             \
205 | } while(0)
206 | 
207 | #define HASH_TO_BKT( hashv, num_bkts, bkt )                                      \
208 | do {                                                                             \
209 |   bkt = ((hashv) & ((num_bkts) - 1U));                                           \
210 | } while(0)
211 | 
212 | /* delete "delptr" from the hash table.
213 |  * "the usual" patch-up process for the app-order doubly-linked-list.
214 |  * The use of _hd_hh_del below deserves special explanation.
215 |  * These used to be expressed using (delptr) but that led to a bug
216 |  * if someone used the same symbol for the head and deletee, like
217 |  *  HASH_DELETE(hh,users,users);
218 |  * We want that to work, but by changing the head (users) below
219 |  * we were forfeiting our ability to further refer to the deletee (users)
220 |  * in the patch-up process. Solution: use scratch space to
221 |  * copy the deletee pointer, then the latter references are via that
222 |  * scratch pointer rather than through the repointed (users) symbol.
223 |  */
224 | #define HASH_DELETE(hh,head,delptr)                                              \
225 | do {                                                                             \
226 |     struct UT_hash_handle *_hd_hh_del;                                           \
227 |     if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) )  {         \
228 |         uthash_free((head)->hh.tbl->buckets,                                     \
229 |                     (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
230 |         HASH_BLOOM_FREE((head)->hh.tbl);                                         \
231 |         uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                      \
232 |         head = NULL;                                                             \
233 |     } else {                                                                     \
234 |         unsigned _hd_bkt;                                                        \
235 |         _hd_hh_del = &((delptr)->hh);                                            \
236 |         if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) {     \
237 |             (head)->hh.tbl->tail =                                               \
238 |                 (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +               \
239 |                 (head)->hh.tbl->hho);                                            \
240 |         }                                                                        \
241 |         if ((delptr)->hh.prev != NULL) {                                         \
242 |             ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +                  \
243 |                     (head)->hh.tbl->hho))->next = (delptr)->hh.next;             \
244 |         } else {                                                                 \
245 |             DECLTYPE_ASSIGN(head,(delptr)->hh.next);                             \
246 |         }                                                                        \
247 |         if (_hd_hh_del->next != NULL) {                                          \
248 |             ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next +                     \
249 |                     (head)->hh.tbl->hho))->prev =                                \
250 |                     _hd_hh_del->prev;                                            \
251 |         }                                                                        \
252 |         HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);   \
253 |         HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);        \
254 |         (head)->hh.tbl->num_items--;                                             \
255 |     }                                                                            \
256 |     HASH_FSCK(hh,head);                                                          \
257 | } while (0)
258 | 
259 | 
260 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
261 | #define HASH_FIND_STR(head,findstr,out)                                          \
262 |     HASH_FIND(hh,head,findstr,(unsigned)strlen(findstr),out)
263 | #define HASH_ADD_STR(head,strfield,add)                                          \
264 |     HASH_ADD(hh,head,strfield[0],(unsigned int)strlen(add->strfield),add)
265 | #define HASH_REPLACE_STR(head,strfield,add,replaced)                             \
266 |     HASH_REPLACE(hh,head,strfield[0],(unsigned)strlen(add->strfield),add,replaced)
267 | #define HASH_FIND_INT(head,findint,out)                                          \
268 |     HASH_FIND(hh,head,findint,sizeof(int),out)
269 | #define HASH_ADD_INT(head,intfield,add)                                          \
270 |     HASH_ADD(hh,head,intfield,sizeof(int),add)
271 | #define HASH_REPLACE_INT(head,intfield,add,replaced)                             \
272 |     HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
273 | #define HASH_FIND_PTR(head,findptr,out)                                          \
274 |     HASH_FIND(hh,head,findptr,sizeof(void *),out)
275 | #define HASH_ADD_PTR(head,ptrfield,add)                                          \
276 |     HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
277 | #define HASH_REPLACE_PTR(head,ptrfield,add,replaced)                             \
278 |     HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
279 | #define HASH_DEL(head,delptr)                                                    \
280 |     HASH_DELETE(hh,head,delptr)
281 | 
282 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
283 |  * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
284 |  */
285 | #ifdef HASH_DEBUG
286 | #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
287 | #define HASH_FSCK(hh,head)                                                       \
288 | do {                                                                             \
289 |     struct UT_hash_handle *_thh;                                                 \
290 |     if (head) {                                                                  \
291 |         unsigned _bkt_i;                                                         \
292 |         unsigned _count;                                                         \
293 |         char *_prev;                                                             \
294 |         _count = 0;                                                              \
295 |         for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) {       \
296 |             unsigned _bkt_count = 0;                                             \
297 |             _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                      \
298 |             _prev = NULL;                                                        \
299 |             while (_thh) {                                                       \
300 |                if (_prev != (char*)(_thh->hh_prev)) {                            \
301 |                    HASH_OOPS("invalid hh_prev %p, actual %p\n",                  \
302 |                     _thh->hh_prev, _prev );                                      \
303 |                }                                                                 \
304 |                _bkt_count++;                                                     \
305 |                _prev = (char*)(_thh);                                            \
306 |                _thh = _thh->hh_next;                                             \
307 |             }                                                                    \
308 |             _count += _bkt_count;                                                \
309 |             if ((head)->hh.tbl->buckets[_bkt_i].count !=  _bkt_count) {          \
310 |                HASH_OOPS("invalid bucket count %u, actual %u\n",                 \
311 |                 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count);              \
312 |             }                                                                    \
313 |         }                                                                        \
314 |         if (_count != (head)->hh.tbl->num_items) {                               \
315 |             HASH_OOPS("invalid hh item count %u, actual %u\n",                   \
316 |                 (head)->hh.tbl->num_items, _count );                             \
317 |         }                                                                        \
318 |         /* traverse hh in app order; check next/prev integrity, count */         \
319 |         _count = 0;                                                              \
320 |         _prev = NULL;                                                            \
321 |         _thh =  &(head)->hh;                                                     \
322 |         while (_thh) {                                                           \
323 |            _count++;                                                             \
324 |            if (_prev !=(char*)(_thh->prev)) {                                    \
325 |               HASH_OOPS("invalid prev %p, actual %p\n",                          \
326 |                     _thh->prev, _prev );                                         \
327 |            }                                                                     \
328 |            _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh);                    \
329 |            _thh = ( _thh->next ?  (UT_hash_handle*)((char*)(_thh->next) +        \
330 |                                   (head)->hh.tbl->hho) : NULL );                 \
331 |         }                                                                        \
332 |         if (_count != (head)->hh.tbl->num_items) {                               \
333 |             HASH_OOPS("invalid app item count %u, actual %u\n",                  \
334 |                 (head)->hh.tbl->num_items, _count );                             \
335 |         }                                                                        \
336 |     }                                                                            \
337 | } while (0)
338 | #else
339 | #define HASH_FSCK(hh,head)
340 | #endif
341 | 
342 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
343 |  * the descriptor to which this macro is defined for tuning the hash function.
344 |  * The app can #include <unistd.h> to get the prototype for write(2). */
345 | #ifdef HASH_EMIT_KEYS
346 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                                   \
347 | do {                                                                             \
348 |     unsigned _klen = fieldlen;                                                   \
349 |     write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
350 |     write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen);                      \
351 | } while (0)
352 | #else
353 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
354 | #endif
355 | 
356 | /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
357 | #ifdef HASH_FUNCTION
358 | #define HASH_FCN HASH_FUNCTION
359 | #else
360 | #define HASH_FCN HASH_JEN
361 | #endif
362 | 
363 | /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */
364 | #define HASH_BER(key,keylen,num_bkts,hashv,bkt)                                  \
365 | do {                                                                             \
366 |   unsigned _hb_keylen=(unsigned)keylen;                                          \
367 |   const unsigned char *_hb_key=(const unsigned char*)(key);                      \
368 |   (hashv) = 0;                                                                   \
369 |   while (_hb_keylen-- != 0U) {                                                   \
370 |       (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++;                         \
371 |   }                                                                              \
372 |   bkt = (hashv) & (num_bkts-1U);                                                 \
373 | } while (0)
374 | 
375 | 
376 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
377 |  * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
378 | #define HASH_SAX(key,keylen,num_bkts,hashv,bkt)                                  \
379 | do {                                                                             \
380 |   unsigned _sx_i;                                                                \
381 |   const unsigned char *_hs_key=(const unsigned char*)(key);                      \
382 |   hashv = 0;                                                                     \
383 |   for(_sx_i=0; _sx_i < keylen; _sx_i++) {                                        \
384 |       hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];                     \
385 |   }                                                                              \
386 |   bkt = hashv & (num_bkts-1U);                                                   \
387 | } while (0)
388 | /* FNV-1a variation */
389 | #define HASH_FNV(key,keylen,num_bkts,hashv,bkt)                                  \
390 | do {                                                                             \
391 |   unsigned _fn_i;                                                                \
392 |   const unsigned char *_hf_key=(const unsigned char*)(key);                      \
393 |   hashv = 2166136261U;                                                           \
394 |   for(_fn_i=0; _fn_i < keylen; _fn_i++) {                                        \
395 |       hashv = hashv ^ _hf_key[_fn_i];                                            \
396 |       hashv = hashv * 16777619U;                                                 \
397 |   }                                                                              \
398 |   bkt = hashv & (num_bkts-1U);                                                   \
399 | } while(0)
400 | 
401 | #define HASH_OAT(key,keylen,num_bkts,hashv,bkt)                                  \
402 | do {                                                                             \
403 |   unsigned _ho_i;                                                                \
404 |   const unsigned char *_ho_key=(const unsigned char*)(key);                      \
405 |   hashv = 0;                                                                     \
406 |   for(_ho_i=0; _ho_i < keylen; _ho_i++) {                                        \
407 |       hashv += _ho_key[_ho_i];                                                   \
408 |       hashv += (hashv << 10);                                                    \
409 |       hashv ^= (hashv >> 6);                                                     \
410 |   }                                                                              \
411 |   hashv += (hashv << 3);                                                         \
412 |   hashv ^= (hashv >> 11);                                                        \
413 |   hashv += (hashv << 15);                                                        \
414 |   bkt = hashv & (num_bkts-1U);                                                   \
415 | } while(0)
416 | 
417 | #define HASH_JEN_MIX(a,b,c)                                                      \
418 | do {                                                                             \
419 |   a -= b; a -= c; a ^= ( c >> 13 );                                              \
420 |   b -= c; b -= a; b ^= ( a << 8 );                                               \
421 |   c -= a; c -= b; c ^= ( b >> 13 );                                              \
422 |   a -= b; a -= c; a ^= ( c >> 12 );                                              \
423 |   b -= c; b -= a; b ^= ( a << 16 );                                              \
424 |   c -= a; c -= b; c ^= ( b >> 5 );                                               \
425 |   a -= b; a -= c; a ^= ( c >> 3 );                                               \
426 |   b -= c; b -= a; b ^= ( a << 10 );                                              \
427 |   c -= a; c -= b; c ^= ( b >> 15 );                                              \
428 | } while (0)
429 | 
430 | #define HASH_JEN(key,keylen,num_bkts,hashv,bkt)                                  \
431 | do {                                                                             \
432 |   unsigned _hj_i,_hj_j,_hj_k;                                                    \
433 |   unsigned const char *_hj_key=(unsigned const char*)(key);                      \
434 |   hashv = 0xfeedbeefu;                                                           \
435 |   _hj_i = _hj_j = 0x9e3779b9u;                                                   \
436 |   _hj_k = (unsigned)(keylen);                                                    \
437 |   while (_hj_k >= 12U) {                                                         \
438 |     _hj_i +=    (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 )                      \
439 |         + ( (unsigned)_hj_key[2] << 16 )                                         \
440 |         + ( (unsigned)_hj_key[3] << 24 ) );                                      \
441 |     _hj_j +=    (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 )                      \
442 |         + ( (unsigned)_hj_key[6] << 16 )                                         \
443 |         + ( (unsigned)_hj_key[7] << 24 ) );                                      \
444 |     hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 )                         \
445 |         + ( (unsigned)_hj_key[10] << 16 )                                        \
446 |         + ( (unsigned)_hj_key[11] << 24 ) );                                     \
447 |                                                                                  \
448 |      HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                          \
449 |                                                                                  \
450 |      _hj_key += 12;                                                              \
451 |      _hj_k -= 12U;                                                               \
452 |   }                                                                              \
453 |   hashv += (unsigned)(keylen);                                                   \
454 |   switch ( _hj_k ) {                                                             \
455 |      case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */        \
456 |      case 10: hashv += ( (unsigned)_hj_key[9] << 16 );  /* FALLTHROUGH */        \
457 |      case 9:  hashv += ( (unsigned)_hj_key[8] << 8 );   /* FALLTHROUGH */        \
458 |      case 8:  _hj_j += ( (unsigned)_hj_key[7] << 24 );  /* FALLTHROUGH */        \
459 |      case 7:  _hj_j += ( (unsigned)_hj_key[6] << 16 );  /* FALLTHROUGH */        \
460 |      case 6:  _hj_j += ( (unsigned)_hj_key[5] << 8 );   /* FALLTHROUGH */        \
461 |      case 5:  _hj_j += _hj_key[4];                      /* FALLTHROUGH */        \
462 |      case 4:  _hj_i += ( (unsigned)_hj_key[3] << 24 );  /* FALLTHROUGH */        \
463 |      case 3:  _hj_i += ( (unsigned)_hj_key[2] << 16 );  /* FALLTHROUGH */        \
464 |      case 2:  _hj_i += ( (unsigned)_hj_key[1] << 8 );   /* FALLTHROUGH */        \
465 |      case 1:  _hj_i += _hj_key[0];                                               \
466 |   }                                                                              \
467 |   HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                             \
468 |   bkt = hashv & (num_bkts-1U);                                                   \
469 | } while(0)
470 | 
471 | /* The Paul Hsieh hash function */
472 | #undef get16bits
473 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__)             \
474 |   || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
475 | #define get16bits(d) (*((const uint16_t *) (d)))
476 | #endif
477 | 
478 | #if !defined (get16bits)
479 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)             \
480 |                        +(uint32_t)(((const uint8_t *)(d))[0]) )
481 | #endif
482 | #define HASH_SFH(key,keylen,num_bkts,hashv,bkt)                                  \
483 | do {                                                                             \
484 |   unsigned const char *_sfh_key=(unsigned const char*)(key);                     \
485 |   uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen;                                \
486 |                                                                                  \
487 |   unsigned _sfh_rem = _sfh_len & 3U;                                             \
488 |   _sfh_len >>= 2;                                                                \
489 |   hashv = 0xcafebabeu;                                                           \
490 |                                                                                  \
491 |   /* Main loop */                                                                \
492 |   for (;_sfh_len > 0U; _sfh_len--) {                                             \
493 |     hashv    += get16bits (_sfh_key);                                            \
494 |     _sfh_tmp  = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv;              \
495 |     hashv     = (hashv << 16) ^ _sfh_tmp;                                        \
496 |     _sfh_key += 2U*sizeof (uint16_t);                                            \
497 |     hashv    += hashv >> 11;                                                     \
498 |   }                                                                              \
499 |                                                                                  \
500 |   /* Handle end cases */                                                         \
501 |   switch (_sfh_rem) {                                                            \
502 |     case 3: hashv += get16bits (_sfh_key);                                       \
503 |             hashv ^= hashv << 16;                                                \
504 |             hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18;              \
505 |             hashv += hashv >> 11;                                                \
506 |             break;                                                               \
507 |     case 2: hashv += get16bits (_sfh_key);                                       \
508 |             hashv ^= hashv << 11;                                                \
509 |             hashv += hashv >> 17;                                                \
510 |             break;                                                               \
511 |     case 1: hashv += *_sfh_key;                                                  \
512 |             hashv ^= hashv << 10;                                                \
513 |             hashv += hashv >> 1;                                                 \
514 |   }                                                                              \
515 |                                                                                  \
516 |     /* Force "avalanching" of final 127 bits */                                  \
517 |     hashv ^= hashv << 3;                                                         \
518 |     hashv += hashv >> 5;                                                         \
519 |     hashv ^= hashv << 4;                                                         \
520 |     hashv += hashv >> 17;                                                        \
521 |     hashv ^= hashv << 25;                                                        \
522 |     hashv += hashv >> 6;                                                         \
523 |     bkt = hashv & (num_bkts-1U);                                                 \
524 | } while(0)
525 | 
526 | #ifdef HASH_USING_NO_STRICT_ALIASING
527 | /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
528 |  * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
529 |  * MurmurHash uses the faster approach only on CPU's where we know it's safe.
530 |  *
531 |  * Note the preprocessor built-in defines can be emitted using:
532 |  *
533 |  *   gcc -m64 -dM -E - < /dev/null                  (on gcc)
534 |  *   cc -## a.c (where a.c is a simple test file)   (Sun Studio)
535 |  */
536 | #if (defined(__i386__) || defined(__x86_64__)  || defined(_M_IX86))
537 | #define MUR_GETBLOCK(p,i) p[i]
538 | #else /* non intel */
539 | #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL)
540 | #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL)
541 | #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL)
542 | #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL)
543 | #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
544 | #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
545 | #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
546 | #define MUR_TWO_TWO(p)   ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
547 | #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >>  8))
548 | #else /* assume little endian non-intel */
549 | #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
550 | #define MUR_TWO_TWO(p)   ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
551 | #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) <<  8))
552 | #endif
553 | #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) :           \
554 |                             (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
555 |                              (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) :  \
556 |                                                       MUR_ONE_THREE(p))))
557 | #endif
558 | #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
559 | #define MUR_FMIX(_h) \
560 | do {                 \
561 |   _h ^= _h >> 16;    \
562 |   _h *= 0x85ebca6bu; \
563 |   _h ^= _h >> 13;    \
564 |   _h *= 0xc2b2ae35u; \
565 |   _h ^= _h >> 16;    \
566 | } while(0)
567 | 
568 | #define HASH_MUR(key,keylen,num_bkts,hashv,bkt)                        \
569 | do {                                                                   \
570 |   const uint8_t *_mur_data = (const uint8_t*)(key);                    \
571 |   const int _mur_nblocks = (int)(keylen) / 4;                          \
572 |   uint32_t _mur_h1 = 0xf88D5353u;                                      \
573 |   uint32_t _mur_c1 = 0xcc9e2d51u;                                      \
574 |   uint32_t _mur_c2 = 0x1b873593u;                                      \
575 |   uint32_t _mur_k1 = 0;                                                \
576 |   const uint8_t *_mur_tail;                                            \
577 |   const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \
578 |   int _mur_i;                                                          \
579 |   for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) {                   \
580 |     _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i);                        \
581 |     _mur_k1 *= _mur_c1;                                                \
582 |     _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
583 |     _mur_k1 *= _mur_c2;                                                \
584 |                                                                        \
585 |     _mur_h1 ^= _mur_k1;                                                \
586 |     _mur_h1 = MUR_ROTL32(_mur_h1,13);                                  \
587 |     _mur_h1 = (_mur_h1*5U) + 0xe6546b64u;                              \
588 |   }                                                                    \
589 |   _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4));          \
590 |   _mur_k1=0;                                                           \
591 |   switch((keylen) & 3U) {                                              \
592 |     case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \
593 |     case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8;  /* FALLTHROUGH */ \
594 |     case 1: _mur_k1 ^= (uint32_t)_mur_tail[0];                         \
595 |     _mur_k1 *= _mur_c1;                                                \
596 |     _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
597 |     _mur_k1 *= _mur_c2;                                                \
598 |     _mur_h1 ^= _mur_k1;                                                \
599 |   }                                                                    \
600 |   _mur_h1 ^= (uint32_t)(keylen);                                       \
601 |   MUR_FMIX(_mur_h1);                                                   \
602 |   hashv = _mur_h1;                                                     \
603 |   bkt = hashv & (num_bkts-1U);                                         \
604 | } while(0)
605 | #endif  /* HASH_USING_NO_STRICT_ALIASING */
606 | 
607 | /* key comparison function; return 0 if keys equal */
608 | #define HASH_KEYCMP(a,b,len) memcmp(a,b,(unsigned long)(len))
609 | 
610 | /* iterate over items in a known bucket to find desired item */
611 | #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out)                       \
612 | do {                                                                             \
613 |  if (head.hh_head != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); } \
614 |  else { out=NULL; }                                                              \
615 |  while (out != NULL) {                                                           \
616 |     if ((out)->hh.keylen == (keylen_in)) {                                       \
617 |         if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) { break; }         \
618 |     }                                                                            \
619 |     if ((out)->hh.hh_next != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); } \
620 |     else { out = NULL; }                                                         \
621 |  }                                                                               \
622 | } while(0)
623 | 
624 | /* add an item to a bucket  */
625 | #define HASH_ADD_TO_BKT(head,addhh)                                              \
626 | do {                                                                             \
627 |  head.count++;                                                                   \
628 |  (addhh)->hh_next = head.hh_head;                                                \
629 |  (addhh)->hh_prev = NULL;                                                        \
630 |  if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); }                \
631 |  (head).hh_head=addhh;                                                           \
632 |  if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH))          \
633 |      && ((addhh)->tbl->noexpand != 1U)) {                                        \
634 |        HASH_EXPAND_BUCKETS((addhh)->tbl);                                        \
635 |  }                                                                               \
636 | } while(0)
637 | 
638 | /* remove an item from a given bucket */
639 | #define HASH_DEL_IN_BKT(hh,head,hh_del)                                          \
640 |     (head).count--;                                                              \
641 |     if ((head).hh_head == hh_del) {                                              \
642 |       (head).hh_head = hh_del->hh_next;                                          \
643 |     }                                                                            \
644 |     if (hh_del->hh_prev) {                                                       \
645 |         hh_del->hh_prev->hh_next = hh_del->hh_next;                              \
646 |     }                                                                            \
647 |     if (hh_del->hh_next) {                                                       \
648 |         hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
649 |     }
650 | 
651 | /* Bucket expansion has the effect of doubling the number of buckets
652 |  * and redistributing the items into the new buckets. Ideally the
653 |  * items will distribute more or less evenly into the new buckets
654 |  * (the extent to which this is true is a measure of the quality of
655 |  * the hash function as it applies to the key domain).
656 |  *
657 |  * With the items distributed into more buckets, the chain length
658 |  * (item count) in each bucket is reduced. Thus by expanding buckets
659 |  * the hash keeps a bound on the chain length. This bounded chain
660 |  * length is the essence of how a hash provides constant time lookup.
661 |  *
662 |  * The calculation of tbl->ideal_chain_maxlen below deserves some
663 |  * explanation. First, keep in mind that we're calculating the ideal
664 |  * maximum chain length based on the *new* (doubled) bucket count.
665 |  * In fractions this is just n/b (n=number of items,b=new num buckets).
666 |  * Since the ideal chain length is an integer, we want to calculate
667 |  * ceil(n/b). We don't depend on floating point arithmetic in this
668 |  * hash, so to calculate ceil(n/b) with integers we could write
669 |  *
670 |  *      ceil(n/b) = (n/b) + ((n%b)?1:0)
671 |  *
672 |  * and in fact a previous version of this hash did just that.
673 |  * But now we have improved things a bit by recognizing that b is
674 |  * always a power of two. We keep its base 2 log handy (call it lb),
675 |  * so now we can write this with a bit shift and logical AND:
676 |  *
677 |  *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
678 |  *
679 |  */
680 | #define HASH_EXPAND_BUCKETS(tbl)                                                 \
681 | do {                                                                             \
682 |     unsigned _he_bkt;                                                            \
683 |     unsigned _he_bkt_i;                                                          \
684 |     struct UT_hash_handle *_he_thh, *_he_hh_nxt;                                 \
685 |     UT_hash_bucket *_he_new_buckets, *_he_newbkt;                                \
686 |     _he_new_buckets = (UT_hash_bucket*)uthash_malloc(                            \
687 |              2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));            \
688 |     if (!_he_new_buckets) { uthash_fatal( "out of memory"); }                    \
689 |     memset(_he_new_buckets, 0,                                                   \
690 |             2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));             \
691 |     tbl->ideal_chain_maxlen =                                                    \
692 |        (tbl->num_items >> (tbl->log2_num_buckets+1U)) +                          \
693 |        (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U);        \
694 |     tbl->nonideal_items = 0;                                                     \
695 |     for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++)                \
696 |     {                                                                            \
697 |         _he_thh = tbl->buckets[ _he_bkt_i ].hh_head;                             \
698 |         while (_he_thh != NULL) {                                                \
699 |            _he_hh_nxt = _he_thh->hh_next;                                        \
700 |            HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt);           \
701 |            _he_newbkt = &(_he_new_buckets[ _he_bkt ]);                           \
702 |            if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) {                \
703 |              tbl->nonideal_items++;                                              \
704 |              _he_newbkt->expand_mult = _he_newbkt->count /                       \
705 |                                         tbl->ideal_chain_maxlen;                 \
706 |            }                                                                     \
707 |            _he_thh->hh_prev = NULL;                                              \
708 |            _he_thh->hh_next = _he_newbkt->hh_head;                               \
709 |            if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev =     \
710 |                 _he_thh; }                                                       \
711 |            _he_newbkt->hh_head = _he_thh;                                        \
712 |            _he_thh = _he_hh_nxt;                                                 \
713 |         }                                                                        \
714 |     }                                                                            \
715 |     uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
716 |     tbl->num_buckets *= 2U;                                                      \
717 |     tbl->log2_num_buckets++;                                                     \
718 |     tbl->buckets = _he_new_buckets;                                              \
719 |     tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ?         \
720 |         (tbl->ineff_expands+1U) : 0U;                                            \
721 |     if (tbl->ineff_expands > 1U) {                                               \
722 |         tbl->noexpand=1;                                                         \
723 |         uthash_noexpand_fyi(tbl);                                                \
724 |     }                                                                            \
725 |     uthash_expand_fyi(tbl);                                                      \
726 | } while(0)
727 | 
728 | 
729 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
730 | /* Note that HASH_SORT assumes the hash handle name to be hh.
731 |  * HASH_SRT was added to allow the hash handle name to be passed in. */
732 | #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
733 | #define HASH_SRT(hh,head,cmpfcn)                                                 \
734 | do {                                                                             \
735 |   unsigned _hs_i;                                                                \
736 |   unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize;               \
737 |   struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;            \
738 |   if (head != NULL) {                                                            \
739 |       _hs_insize = 1;                                                            \
740 |       _hs_looping = 1;                                                           \
741 |       _hs_list = &((head)->hh);                                                  \
742 |       while (_hs_looping != 0U) {                                                \
743 |           _hs_p = _hs_list;                                                      \
744 |           _hs_list = NULL;                                                       \
745 |           _hs_tail = NULL;                                                       \
746 |           _hs_nmerges = 0;                                                       \
747 |           while (_hs_p != NULL) {                                                \
748 |               _hs_nmerges++;                                                     \
749 |               _hs_q = _hs_p;                                                     \
750 |               _hs_psize = 0;                                                     \
751 |               for ( _hs_i = 0; _hs_i  < _hs_insize; _hs_i++ ) {                  \
752 |                   _hs_psize++;                                                   \
753 |                   _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?              \
754 |                           ((void*)((char*)(_hs_q->next) +                        \
755 |                           (head)->hh.tbl->hho)) : NULL);                         \
756 |                   if (! (_hs_q) ) { break; }                                     \
757 |               }                                                                  \
758 |               _hs_qsize = _hs_insize;                                            \
759 |               while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\
760 |                   if (_hs_psize == 0U) {                                         \
761 |                       _hs_e = _hs_q;                                             \
762 |                       _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
763 |                               ((void*)((char*)(_hs_q->next) +                    \
764 |                               (head)->hh.tbl->hho)) : NULL);                     \
765 |                       _hs_qsize--;                                               \
766 |                   } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) {           \
767 |                       _hs_e = _hs_p;                                             \
768 |                       if (_hs_p != NULL){                                        \
769 |                         _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
770 |                                 ((void*)((char*)(_hs_p->next) +                  \
771 |                                 (head)->hh.tbl->hho)) : NULL);                   \
772 |                        }                                                         \
773 |                       _hs_psize--;                                               \
774 |                   } else if ((                                                   \
775 |                       cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
776 |                              DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
777 |                              ) <= 0) {                                           \
778 |                       _hs_e = _hs_p;                                             \
779 |                       if (_hs_p != NULL){                                        \
780 |                         _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
781 |                                ((void*)((char*)(_hs_p->next) +                   \
782 |                                (head)->hh.tbl->hho)) : NULL);                    \
783 |                        }                                                         \
784 |                       _hs_psize--;                                               \
785 |                   } else {                                                       \
786 |                       _hs_e = _hs_q;                                             \
787 |                       _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
788 |                               ((void*)((char*)(_hs_q->next) +                    \
789 |                               (head)->hh.tbl->hho)) : NULL);                     \
790 |                       _hs_qsize--;                                               \
791 |                   }                                                              \
792 |                   if ( _hs_tail != NULL ) {                                      \
793 |                       _hs_tail->next = ((_hs_e != NULL) ?                        \
794 |                             ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL);          \
795 |                   } else {                                                       \
796 |                       _hs_list = _hs_e;                                          \
797 |                   }                                                              \
798 |                   if (_hs_e != NULL) {                                           \
799 |                   _hs_e->prev = ((_hs_tail != NULL) ?                            \
800 |                      ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL);              \
801 |                   }                                                              \
802 |                   _hs_tail = _hs_e;                                              \
803 |               }                                                                  \
804 |               _hs_p = _hs_q;                                                     \
805 |           }                                                                      \
806 |           if (_hs_tail != NULL){                                                 \
807 |             _hs_tail->next = NULL;                                               \
808 |           }                                                                      \
809 |           if ( _hs_nmerges <= 1U ) {                                             \
810 |               _hs_looping=0;                                                     \
811 |               (head)->hh.tbl->tail = _hs_tail;                                   \
812 |               DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list));      \
813 |           }                                                                      \
814 |           _hs_insize *= 2U;                                                      \
815 |       }                                                                          \
816 |       HASH_FSCK(hh,head);                                                        \
817 |  }                                                                               \
818 | } while (0)
819 | 
820 | /* This function selects items from one hash into another hash.
821 |  * The end result is that the selected items have dual presence
822 |  * in both hashes. There is no copy of the items made; rather
823 |  * they are added into the new hash through a secondary hash
824 |  * hash handle that must be present in the structure. */
825 | #define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
826 | do {                                                                             \
827 |   unsigned _src_bkt, _dst_bkt;                                                   \
828 |   void *_last_elt=NULL, *_elt;                                                   \
829 |   UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL;                         \
830 |   ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst));                 \
831 |   if (src != NULL) {                                                             \
832 |     for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) {     \
833 |       for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;                \
834 |           _src_hh != NULL;                                                       \
835 |           _src_hh = _src_hh->hh_next) {                                          \
836 |           _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);                       \
837 |           if (cond(_elt)) {                                                      \
838 |             _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho);               \
839 |             _dst_hh->key = _src_hh->key;                                         \
840 |             _dst_hh->keylen = _src_hh->keylen;                                   \
841 |             _dst_hh->hashv = _src_hh->hashv;                                     \
842 |             _dst_hh->prev = _last_elt;                                           \
843 |             _dst_hh->next = NULL;                                                \
844 |             if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; }             \
845 |             if (dst == NULL) {                                                   \
846 |               DECLTYPE_ASSIGN(dst,_elt);                                         \
847 |               HASH_MAKE_TABLE(hh_dst,dst);                                       \
848 |             } else {                                                             \
849 |               _dst_hh->tbl = (dst)->hh_dst.tbl;                                  \
850 |             }                                                                    \
851 |             HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt);    \
852 |             HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh);            \
853 |             (dst)->hh_dst.tbl->num_items++;                                      \
854 |             _last_elt = _elt;                                                    \
855 |             _last_elt_hh = _dst_hh;                                              \
856 |           }                                                                      \
857 |       }                                                                          \
858 |     }                                                                            \
859 |   }                                                                              \
860 |   HASH_FSCK(hh_dst,dst);                                                         \
861 | } while (0)
862 | 
863 | #define HASH_CLEAR(hh,head)                                                      \
864 | do {                                                                             \
865 |   if (head != NULL) {                                                            \
866 |     uthash_free((head)->hh.tbl->buckets,                                         \
867 |                 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket));      \
868 |     HASH_BLOOM_FREE((head)->hh.tbl);                                             \
869 |     uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
870 |     (head)=NULL;                                                                 \
871 |   }                                                                              \
872 | } while(0)
873 | 
874 | #define HASH_OVERHEAD(hh,head)                                                   \
875 |  ((head != NULL) ? (                                                             \
876 |  (size_t)(((head)->hh.tbl->num_items   * sizeof(UT_hash_handle))   +             \
877 |           ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket))   +             \
878 |            sizeof(UT_hash_table)                                   +             \
879 |            (HASH_BLOOM_BYTELEN))) : 0U)
880 | 
881 | #ifdef NO_DECLTYPE
882 | #define HASH_ITER(hh,head,el,tmp)                                                \
883 | for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \
884 |   (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL)))
885 | #else
886 | #define HASH_ITER(hh,head,el,tmp)                                                \
887 | for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL));      \
888 |   (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL)))
889 | #endif
890 | 
891 | /* obtain a count of items in the hash */
892 | #define HASH_COUNT(head) HASH_CNT(hh,head)
893 | #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U)
894 | 
895 | typedef struct UT_hash_bucket {
896 |    struct UT_hash_handle *hh_head;
897 |    unsigned count;
898 | 
899 |    /* expand_mult is normally set to 0. In this situation, the max chain length
900 |     * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
901 |     * the bucket's chain exceeds this length, bucket expansion is triggered).
902 |     * However, setting expand_mult to a non-zero value delays bucket expansion
903 |     * (that would be triggered by additions to this particular bucket)
904 |     * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
905 |     * (The multiplier is simply expand_mult+1). The whole idea of this
906 |     * multiplier is to reduce bucket expansions, since they are expensive, in
907 |     * situations where we know that a particular bucket tends to be overused.
908 |     * It is better to let its chain length grow to a longer yet-still-bounded
909 |     * value, than to do an O(n) bucket expansion too often.
910 |     */
911 |    unsigned expand_mult;
912 | 
913 | } UT_hash_bucket;
914 | 
915 | /* random signature used only to find hash tables in external analysis */
916 | #define HASH_SIGNATURE 0xa0111fe1u
917 | #define HASH_BLOOM_SIGNATURE 0xb12220f2u
918 | 
919 | typedef struct UT_hash_table {
920 |    UT_hash_bucket *buckets;
921 |    unsigned num_buckets, log2_num_buckets;
922 |    unsigned num_items;
923 |    struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
924 |    ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
925 | 
926 |    /* in an ideal situation (all buckets used equally), no bucket would have
927 |     * more than ceil(#items/#buckets) items. that's the ideal chain length. */
928 |    unsigned ideal_chain_maxlen;
929 | 
930 |    /* nonideal_items is the number of items in the hash whose chain position
931 |     * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
932 |     * hash distribution; reaching them in a chain traversal takes >ideal steps */
933 |    unsigned nonideal_items;
934 | 
935 |    /* ineffective expands occur when a bucket doubling was performed, but
936 |     * afterward, more than half the items in the hash had nonideal chain
937 |     * positions. If this happens on two consecutive expansions we inhibit any
938 |     * further expansion, as it's not helping; this happens when the hash
939 |     * function isn't a good fit for the key domain. When expansion is inhibited
940 |     * the hash will still work, albeit no longer in constant time. */
941 |    unsigned ineff_expands, noexpand;
942 | 
943 |    uint32_t signature; /* used only to find hash tables in external analysis */
944 | #ifdef HASH_BLOOM
945 |    uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
946 |    uint8_t *bloom_bv;
947 |    uint8_t bloom_nbits;
948 | #endif
949 | 
950 | } UT_hash_table;
951 | 
952 | typedef struct UT_hash_handle {
953 |    struct UT_hash_table *tbl;
954 |    void *prev;                       /* prev element in app order      */
955 |    void *next;                       /* next element in app order      */
956 |    struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
957 |    struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
958 |    void *key;                        /* ptr to enclosing struct's key  */
959 |    unsigned keylen;                  /* enclosing struct's key len     */
960 |    unsigned hashv;                   /* result of hash-fcn(key)        */
961 | } UT_hash_handle;
962 | 
963 | #endif /* UTHASH_H */
964 | 


--------------------------------------------------------------------------------
/utstring.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright (c) 2008-2014, Troy D. Hanson   http://troydhanson.github.com/uthash/
  3 | All rights reserved.
  4 | 
  5 | Redistribution and use in source and binary forms, with or without
  6 | modification, are permitted provided that the following conditions are met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright
  9 |       notice, this list of conditions and the following disclaimer.
 10 | 
 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 22 | */
 23 | 
 24 | /* a dynamic string implementation using macros
 25 |  */
 26 | #ifndef UTSTRING_H
 27 | #define UTSTRING_H
 28 | 
 29 | #define UTSTRING_VERSION 1.9.9
 30 | 
 31 | #ifdef __GNUC__
 32 | #define _UNUSED_ __attribute__ ((__unused__))
 33 | #else
 34 | #define _UNUSED_
 35 | #endif
 36 | 
 37 | #include <stdlib.h>
 38 | #include <string.h>
 39 | #include <stdio.h>
 40 | #include <stdarg.h>
 41 | #define oom() exit(-1)
 42 | 
 43 | typedef struct {
 44 |     char *d;
 45 |     size_t n; /* allocd size */
 46 |     size_t i; /* index of first unused byte */
 47 | } UT_string;
 48 | 
 49 | #define utstring_reserve(s,amt)                            \
 50 | do {                                                       \
 51 |   if (((s)->n - (s)->i) < (size_t)(amt)) {                 \
 52 |      (s)->d = (char*)realloc((s)->d, (s)->n + (amt));      \
 53 |      if ((s)->d == NULL) oom();                            \
 54 |      (s)->n += (amt);                                      \
 55 |   }                                                        \
 56 | } while(0)
 57 | 
 58 | #define utstring_init(s)                                   \
 59 | do {                                                       \
 60 |   (s)->n = 0; (s)->i = 0; (s)->d = NULL;                   \
 61 |   utstring_reserve(s,100);                                 \
 62 |   (s)->d[0] = '\0'; \
 63 | } while(0)
 64 | 
 65 | #define utstring_done(s)                                   \
 66 | do {                                                       \
 67 |   if ((s)->d != NULL) free((s)->d);                        \
 68 |   (s)->n = 0;                                              \
 69 | } while(0)
 70 | 
 71 | #define utstring_free(s)                                   \
 72 | do {                                                       \
 73 |   utstring_done(s);                                        \
 74 |   free(s);                                                 \
 75 | } while(0)
 76 | 
 77 | #define utstring_new(s)                                    \
 78 | do {                                                       \
 79 |    s = (UT_string*)calloc(sizeof(UT_string),1);            \
 80 |    if (!s) oom();                                          \
 81 |    utstring_init(s);                                       \
 82 | } while(0)
 83 | 
 84 | #define utstring_renew(s)                                  \
 85 | do {                                                       \
 86 |    if (s) {                                                \
 87 |      utstring_clear(s);                                    \
 88 |    } else {                                                \
 89 |      utstring_new(s);                                      \
 90 |    }                                                       \
 91 | } while(0)
 92 | 
 93 | #define utstring_clear(s)                                  \
 94 | do {                                                       \
 95 |   (s)->i = 0;                                              \
 96 |   (s)->d[0] = '\0';                                        \
 97 | } while(0)
 98 | 
 99 | #define utstring_bincpy(s,b,l)                             \
100 | do {                                                       \
101 |   utstring_reserve((s),(l)+1);                               \
102 |   if (l) memcpy(&(s)->d[(s)->i], b, l);                    \
103 |   (s)->i += (l);                                           \
104 |   (s)->d[(s)->i]='\0';                                         \
105 | } while(0)
106 | 
107 | #define utstring_concat(dst,src)                                 \
108 | do {                                                             \
109 |   utstring_reserve((dst),((src)->i)+1);                          \
110 |   if ((src)->i) memcpy(&(dst)->d[(dst)->i], (src)->d, (src)->i); \
111 |   (dst)->i += (src)->i;                                          \
112 |   (dst)->d[(dst)->i]='\0';                                       \
113 | } while(0)
114 | 
115 | #define utstring_len(s) ((unsigned)((s)->i))
116 | 
117 | #define utstring_body(s) ((s)->d)
118 | 
119 | _UNUSED_ static void utstring_printf_va(UT_string *s, const char *fmt, va_list ap) {
120 |    int n;
121 |    va_list cp;
122 |    while (1) {
123 | #ifdef _WIN32
124 |       cp = ap;
125 | #else
126 |       va_copy(cp, ap);
127 | #endif
128 |       n = vsnprintf (&s->d[s->i], s->n-s->i, fmt, cp);
129 |       va_end(cp);
130 | 
131 |       if ((n > -1) && ((size_t) n < (s->n-s->i))) {
132 |         s->i += n;
133 |         return;
134 |       }
135 | 
136 |       /* Else try again with more space. */
137 |       if (n > -1) utstring_reserve(s,n+1); /* exact */
138 |       else utstring_reserve(s,(s->n)*2);   /* 2x */
139 |    }
140 | }
141 | #ifdef __GNUC__
142 | /* support printf format checking (2=the format string, 3=start of varargs) */
143 | static void utstring_printf(UT_string *s, const char *fmt, ...)
144 |   __attribute__ (( format( printf, 2, 3) ));
145 | #endif
146 | _UNUSED_ static void utstring_printf(UT_string *s, const char *fmt, ...) {
147 |    va_list ap;
148 |    va_start(ap,fmt);
149 |    utstring_printf_va(s,fmt,ap);
150 |    va_end(ap);
151 | }
152 | 
153 | /*******************************************************************************
154 |  * begin substring search functions                                            *
155 |  ******************************************************************************/
156 | /* Build KMP table from left to right. */
157 | _UNUSED_ static void _utstring_BuildTable(
158 |     const char *P_Needle,
159 |     size_t P_NeedleLen,
160 |     long *P_KMP_Table)
161 | {
162 |     long i, j;
163 | 
164 |     i = 0;
165 |     j = i - 1;
166 |     P_KMP_Table[i] = j;
167 |     while (i < (long) P_NeedleLen)
168 |     {
169 |         while ( (j > -1) && (P_Needle[i] != P_Needle[j]) )
170 |         {
171 |            j = P_KMP_Table[j];
172 |         }
173 |         i++;
174 |         j++;
175 |         if (i < (long) P_NeedleLen)
176 |         {
177 |             if (P_Needle[i] == P_Needle[j])
178 |             {
179 |                 P_KMP_Table[i] = P_KMP_Table[j];
180 |             }
181 |             else
182 |             {
183 |                 P_KMP_Table[i] = j;
184 |             }
185 |         }
186 |         else
187 |         {
188 |             P_KMP_Table[i] = j;
189 |         }
190 |     }
191 | 
192 |     return;
193 | }
194 | 
195 | 
196 | /* Build KMP table from right to left. */
197 | _UNUSED_ static void _utstring_BuildTableR(
198 |     const char *P_Needle,
199 |     size_t P_NeedleLen,
200 |     long *P_KMP_Table)
201 | {
202 |     long i, j;
203 | 
204 |     i = P_NeedleLen - 1;
205 |     j = i + 1;
206 |     P_KMP_Table[i + 1] = j;
207 |     while (i >= 0)
208 |     {
209 |         while ( (j < (long) P_NeedleLen) && (P_Needle[i] != P_Needle[j]) )
210 |         {
211 |            j = P_KMP_Table[j + 1];
212 |         }
213 |         i--;
214 |         j--;
215 |         if (i >= 0)
216 |         {
217 |             if (P_Needle[i] == P_Needle[j])
218 |             {
219 |                 P_KMP_Table[i + 1] = P_KMP_Table[j + 1];
220 |             }
221 |             else
222 |             {
223 |                 P_KMP_Table[i + 1] = j;
224 |             }
225 |         }
226 |         else
227 |         {
228 |             P_KMP_Table[i + 1] = j;
229 |         }
230 |     }
231 | 
232 |     return;
233 | }
234 | 
235 | 
236 | /* Search data from left to right. ( Multiple search mode. ) */
237 | _UNUSED_ static long _utstring_find(
238 |     const char *P_Haystack,
239 |     size_t P_HaystackLen,
240 |     const char *P_Needle,
241 |     size_t P_NeedleLen,
242 |     long *P_KMP_Table)
243 | {
244 |     long i, j;
245 |     long V_FindPosition = -1;
246 | 
247 |     /* Search from left to right. */
248 |     i = j = 0;
249 |     while ( (j < (int)P_HaystackLen) && (((P_HaystackLen - j) + i) >= P_NeedleLen) )
250 |     {
251 |         while ( (i > -1) && (P_Needle[i] != P_Haystack[j]) )
252 |         {
253 |             i = P_KMP_Table[i];
254 |         }
255 |         i++;
256 |         j++;
257 |         if (i >= (int)P_NeedleLen)
258 |         {
259 |             /* Found. */
260 |             V_FindPosition = j - i;
261 |             break;
262 |         }
263 |     }
264 | 
265 |     return V_FindPosition;
266 | }
267 | 
268 | 
269 | /* Search data from right to left. ( Multiple search mode. ) */
270 | _UNUSED_ static long _utstring_findR(
271 |     const char *P_Haystack,
272 |     size_t P_HaystackLen,
273 |     const char *P_Needle,
274 |     size_t P_NeedleLen,
275 |     long *P_KMP_Table)
276 | {
277 |     long i, j;
278 |     long V_FindPosition = -1;
279 | 
280 |     /* Search from right to left. */
281 |     j = (P_HaystackLen - 1);
282 |     i = (P_NeedleLen - 1);
283 |     while ( (j >= 0) && (j >= i) )
284 |     {
285 |         while ( (i < (int)P_NeedleLen) && (P_Needle[i] != P_Haystack[j]) )
286 |         {
287 |             i = P_KMP_Table[i + 1];
288 |         }
289 |         i--;
290 |         j--;
291 |         if (i < 0)
292 |         {
293 |             /* Found. */
294 |             V_FindPosition = j + 1;
295 |             break;
296 |         }
297 |     }
298 | 
299 |     return V_FindPosition;
300 | }
301 | 
302 | 
303 | /* Search data from left to right. ( One time search mode. ) */
304 | _UNUSED_ static long utstring_find(
305 |     UT_string *s,
306 |     long P_StartPosition,   /* Start from 0. -1 means last position. */
307 |     const char *P_Needle,
308 |     size_t P_NeedleLen)
309 | {
310 |     long V_StartPosition;
311 |     long V_HaystackLen;
312 |     long *V_KMP_Table;
313 |     long V_FindPosition = -1;
314 | 
315 |     if (P_StartPosition < 0)
316 |     {
317 |         V_StartPosition = s->i + P_StartPosition;
318 |     }
319 |     else
320 |     {
321 |         V_StartPosition = P_StartPosition;
322 |     }
323 |     V_HaystackLen = s->i - V_StartPosition;
324 |     if ( (V_HaystackLen >= (long) P_NeedleLen) && (P_NeedleLen > 0) )
325 |     {
326 |         V_KMP_Table = (long *)malloc(sizeof(long) * (P_NeedleLen + 1));
327 |         if (V_KMP_Table != NULL)
328 |         {
329 |             _utstring_BuildTable(P_Needle, P_NeedleLen, V_KMP_Table);
330 | 
331 |             V_FindPosition = _utstring_find(s->d + V_StartPosition,
332 |                                             V_HaystackLen,
333 |                                             P_Needle,
334 |                                             P_NeedleLen,
335 |                                             V_KMP_Table);
336 |             if (V_FindPosition >= 0)
337 |             {
338 |                 V_FindPosition += V_StartPosition;
339 |             }
340 | 
341 |             free(V_KMP_Table);
342 |         }
343 |     }
344 | 
345 |     return V_FindPosition;
346 | }
347 | 
348 | 
349 | /* Search data from right to left. ( One time search mode. ) */
350 | _UNUSED_ static long utstring_findR(
351 |     UT_string *s,
352 |     long P_StartPosition,   /* Start from 0. -1 means last position. */
353 |     const char *P_Needle,
354 |     size_t P_NeedleLen)
355 | {
356 |     long V_StartPosition;
357 |     long V_HaystackLen;
358 |     long *V_KMP_Table;
359 |     long V_FindPosition = -1;
360 | 
361 |     if (P_StartPosition < 0)
362 |     {
363 |         V_StartPosition = s->i + P_StartPosition;
364 |     }
365 |     else
366 |     {
367 |         V_StartPosition = P_StartPosition;
368 |     }
369 |     V_HaystackLen = V_StartPosition + 1;
370 |     if ( (V_HaystackLen >= (long) P_NeedleLen) && (P_NeedleLen > 0) )
371 |     {
372 |         V_KMP_Table = (long *)malloc(sizeof(long) * (P_NeedleLen + 1));
373 |         if (V_KMP_Table != NULL)
374 |         {
375 |             _utstring_BuildTableR(P_Needle, P_NeedleLen, V_KMP_Table);
376 | 
377 |             V_FindPosition = _utstring_findR(s->d,
378 |                                              V_HaystackLen,
379 |                                              P_Needle,
380 |                                              P_NeedleLen,
381 |                                              V_KMP_Table);
382 | 
383 |             free(V_KMP_Table);
384 |         }
385 |     }
386 | 
387 |     return V_FindPosition;
388 | }
389 | /*******************************************************************************
390 |  * end substring search functions                                              *
391 |  ******************************************************************************/
392 | 
393 | #endif /* UTSTRING_H */
394 | 


--------------------------------------------------------------------------------