├── .gitignore ├── Makefile ├── README.md ├── ini.c ├── ini.h ├── json.c ├── json.h ├── mqttcollect.1 ├── mqttcollect.c ├── mqttcollect.ini.example ├── mqttcollect.pandoc ├── temperature-simulator.py ├── uthash.h └── utstring.h /.gitignore: -------------------------------------------------------------------------------- 1 | mqttcollect 2 | *.o 3 | *.a 4 | *.so 5 | jp.ini 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PREFIX=/usr/local 2 | BINDIR=$(PREFIX)/bin 3 | MANDIR=$(PREFIX)/share/man/man1 4 | 5 | CFLAGS= -Wall -Werror 6 | LDFLAGS=-lmosquitto # -lssl -lcrypto -lrt 7 | 8 | all: mqttcollect 9 | 10 | mqttcollect: mqttcollect.c uthash.h json.o utstring.h ini.o 11 | $(CC) $(CFLAGS) -o mqttcollect mqttcollect.c json.o ini.o $(LDFLAGS) 12 | 13 | json.o: json.c json.h 14 | ini.o: ini.c ini.h 15 | 16 | install: mqttcollect 17 | install -m 755 mqttcollect $(BINDIR)/ 18 | install -m 644 mqttcollect.1 $(MANDIR)/ 19 | 20 | clean: 21 | rm -f *.o 22 | 23 | clobber: clean 24 | rm -f mqttcollect 25 | 26 | doc: README.md mqttcollect.1 27 | 28 | README.md: mqttcollect.pandoc 29 | pandoc -w markdown mqttcollect.pandoc -o README.md 30 | 31 | mqttcollect.1: mqttcollect.pandoc 32 | pandoc -s -w man mqttcollect.pandoc -o mqttcollect.1 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NAME 2 | ==== 3 | 4 | mqttcollect - MQTT-based Exec-plugin for collectd 5 | 6 | SYNOPSIS 7 | ======== 8 | 9 | mqttcollect [-v ] [-f *file*] 10 | 11 | DESCRIPTION 12 | =========== 13 | 14 | *mqttcollect* is an executable program which is used with collectd(1). 15 | It subscribes to any number of MQTT topics you specify, and prints 16 | values to stdout for collectd to process in an exec plugin block. 17 | 18 | PUTVAL tiggr/mqtt‐sys/gauge‐clients.inactive 1430914033:0.00 19 | 20 | *collectd* launches *mqttcollect* which connects to the configured MQTT 21 | broker, subscribes and waits for publishes to subscribed topics in an 22 | endless loop. If an error occurs or the program exits for whichever 23 | reason, *collectd* will restart and log the reason in its log file. 24 | 25 | *mqttcollect* supports TLS connections to the MQTT broker, 26 | username/password authentication, and TLS-PSK, all configured via its 27 | configuration file. 28 | 29 | OPTIONS 30 | ======= 31 | 32 | *mqttcollect* understands the following options. 33 | 34 | -f *file* 35 | : Specify an ini-type configuration file (see below), which defaults 36 | to `/usr/local/etc/mqttcollect.ini`. 37 | 38 | -v 39 | : Verbose. 40 | 41 | CONFIGURATION 42 | ============= 43 | 44 | *mqttcollect* requires a configuration file to operate. This ini-type 45 | file must have a `[defaults]` section in which general program 46 | parameters are configured, and it will have any number of additional 47 | sections specifying the MQTT topics it is to subscribe to. For the 48 | defaults section, please consult the example file provided with the 49 | source code for a list of allowed settings. 50 | 51 | Within a *topic* section, metrics collected by *collectd* are specified. 52 | 53 | [defaults] 54 | host = localhost 55 | port = 1883 56 | 57 | ; (1) subscribe to a wildcard and produce three metrics per subscription. 58 | ; the metric names are interpolated with `tid' from the JSON message 59 | ; payload, and the values of each metric are obtained from the 60 | ; JSON element behind the `<' 61 | 62 | [owntracks/+/+] 63 | gauge = vehicle/{tid}/speed 134 | # influxdb 135 | Server "127.0.0.1" "25826" 136 | 137 | 138 | Configure *collectd* to load our executable *mqttcollect* via its exec 139 | mechanism. Specify *mqttcollect*'s options as individual strings in the 140 | `Exec` invocation. 141 | 142 | LoadPlugin exec 143 | 144 | 145 | Exec "mosquitto:mosquitto" "/usr/bin/mqttcollect" "‐f" "/etc/my.ini" 146 | 147 | 148 | BUGS 149 | ==== 150 | 151 | Yes. 152 | 153 | AVAILABILITY 154 | ============ 155 | 156 | 157 | 158 | CREDITS 159 | ======= 160 | 161 | - This program uses *libmosquitto*, a library provided by the 162 | Mosquitto project as well as some of the 163 | excellent include files provided by 164 | 165 | 166 | INSTALLATION 167 | ============ 168 | 169 | - Obtain the source code for *mqttcollect*, adjust the `Makefile` and 170 | run `make`. 171 | 172 | SEE ALSO 173 | ======== 174 | 175 | - `collectd`(1). 176 | - 177 | 178 | AUTHOR 179 | ====== 180 | 181 | Jan-Piet Mens 182 | 183 | -------------------------------------------------------------------------------- /ini.c: -------------------------------------------------------------------------------- 1 | /* inih -- simple .INI file parser 2 | 3 | inih is released under the New BSD license (see LICENSE.txt). Go to the project 4 | home page for more info: 5 | 6 | https://github.com/benhoyt/inih 7 | 8 | */ 9 | 10 | #ifdef _MSC_VER 11 | #define _CRT_SECURE_NO_WARNINGS 12 | #endif 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "ini.h" 19 | 20 | #if !INI_USE_STACK 21 | #include 22 | #endif 23 | 24 | #define MAX_SECTION 50 25 | #define MAX_NAME 50 26 | 27 | /* Strip whitespace chars off end of given string, in place. Return s. */ 28 | static char* rstrip(char* s) 29 | { 30 | char* p = s + strlen(s); 31 | while (p > s && isspace((unsigned char)(*--p))) 32 | *p = '\0'; 33 | return s; 34 | } 35 | 36 | /* Return pointer to first non-whitespace char in given string. */ 37 | static char* lskip(const char* s) 38 | { 39 | while (*s && isspace((unsigned char)(*s))) 40 | s++; 41 | return (char*)s; 42 | } 43 | 44 | /* Return pointer to first char c or ';' comment in given string, or pointer to 45 | null at end of string if neither found. ';' must be prefixed by a whitespace 46 | character to register as a comment. */ 47 | static char* find_char_or_comment(const char* s, char c) 48 | { 49 | int was_whitespace = 0; 50 | while (*s && *s != c && !(was_whitespace && *s == ';')) { 51 | was_whitespace = isspace((unsigned char)(*s)); 52 | s++; 53 | } 54 | return (char*)s; 55 | } 56 | 57 | /* Version of strncpy that ensures dest (size bytes) is null-terminated. */ 58 | static char* strncpy0(char* dest, const char* src, size_t size) 59 | { 60 | strncpy(dest, src, size); 61 | dest[size - 1] = '\0'; 62 | return dest; 63 | } 64 | 65 | /* See documentation in header file. */ 66 | int ini_parse_file(FILE* file, 67 | int (*handler)(void*, const char*, const char*, 68 | const char*), 69 | void* user) 70 | { 71 | /* Uses a fair bit of stack (use heap instead if you need to) */ 72 | #if INI_USE_STACK 73 | char line[INI_MAX_LINE]; 74 | #else 75 | char* line; 76 | #endif 77 | char section[MAX_SECTION] = ""; 78 | char prev_name[MAX_NAME] = ""; 79 | 80 | char* start; 81 | char* end; 82 | char* name; 83 | char* value; 84 | int lineno = 0; 85 | int error = 0; 86 | 87 | #if !INI_USE_STACK 88 | line = (char*)malloc(INI_MAX_LINE); 89 | if (!line) { 90 | return -2; 91 | } 92 | #endif 93 | 94 | /* Scan through file line by line */ 95 | while (fgets(line, INI_MAX_LINE, file) != NULL) { 96 | lineno++; 97 | 98 | start = line; 99 | #if INI_ALLOW_BOM 100 | if (lineno == 1 && (unsigned char)start[0] == 0xEF && 101 | (unsigned char)start[1] == 0xBB && 102 | (unsigned char)start[2] == 0xBF) { 103 | start += 3; 104 | } 105 | #endif 106 | start = lskip(rstrip(start)); 107 | 108 | if (*start == ';' || *start == '#') { 109 | /* Per Python ConfigParser, allow '#' comments at start of line */ 110 | } 111 | #if INI_ALLOW_MULTILINE 112 | else if (*prev_name && *start && start > line) { 113 | /* Non-black line with leading whitespace, treat as continuation 114 | of previous name's value (as per Python ConfigParser). */ 115 | if (!handler(user, section, prev_name, start) && !error) 116 | error = lineno; 117 | } 118 | #endif 119 | else if (*start == '[') { 120 | /* A "[section]" line */ 121 | end = find_char_or_comment(start + 1, ']'); 122 | if (*end == ']') { 123 | *end = '\0'; 124 | strncpy0(section, start + 1, sizeof(section)); 125 | *prev_name = '\0'; 126 | } 127 | else if (!error) { 128 | /* No ']' found on section line */ 129 | error = lineno; 130 | } 131 | } 132 | else if (*start && *start != ';') { 133 | /* Not a comment, must be a name[=:]value pair */ 134 | end = find_char_or_comment(start, '='); 135 | if (*end != '=') { 136 | end = find_char_or_comment(start, ':'); 137 | } 138 | if (*end == '=' || *end == ':') { 139 | *end = '\0'; 140 | name = rstrip(start); 141 | value = lskip(end + 1); 142 | end = find_char_or_comment(value, '\0'); 143 | if (*end == ';') 144 | *end = '\0'; 145 | rstrip(value); 146 | 147 | /* Valid name[=:]value pair found, call handler */ 148 | strncpy0(prev_name, name, sizeof(prev_name)); 149 | if (!handler(user, section, name, value) && !error) 150 | error = lineno; 151 | } 152 | else if (!error) { 153 | /* No '=' or ':' found on name[=:]value line */ 154 | error = lineno; 155 | } 156 | } 157 | 158 | #if INI_STOP_ON_FIRST_ERROR 159 | if (error) 160 | break; 161 | #endif 162 | } 163 | 164 | #if !INI_USE_STACK 165 | free(line); 166 | #endif 167 | 168 | return error; 169 | } 170 | 171 | /* See documentation in header file. */ 172 | int ini_parse(const char* filename, 173 | int (*handler)(void*, const char*, const char*, const char*), 174 | void* user) 175 | { 176 | FILE* file; 177 | int error; 178 | 179 | file = fopen(filename, "r"); 180 | if (!file) 181 | return -1; 182 | error = ini_parse_file(file, handler, user); 183 | fclose(file); 184 | return error; 185 | } 186 | -------------------------------------------------------------------------------- /ini.h: -------------------------------------------------------------------------------- 1 | /* inih -- simple .INI file parser 2 | 3 | inih is released under the New BSD license (see LICENSE.txt). Go to the project 4 | home page for more info: 5 | 6 | https://github.com/benhoyt/inih 7 | 8 | */ 9 | 10 | #ifndef __INI_H__ 11 | #define __INI_H__ 12 | 13 | /* Make this header file easier to include in C++ code */ 14 | #ifdef __cplusplus 15 | extern "C" { 16 | #endif 17 | 18 | #include 19 | 20 | /* Parse given INI-style file. May have [section]s, name=value pairs 21 | (whitespace stripped), and comments starting with ';' (semicolon). Section 22 | is "" if name=value pair parsed before any section heading. name:value 23 | pairs are also supported as a concession to Python's ConfigParser. 24 | 25 | For each name=value pair parsed, call handler function with given user 26 | pointer as well as section, name, and value (data only valid for duration 27 | of handler call). Handler should return nonzero on success, zero on error. 28 | 29 | Returns 0 on success, line number of first error on parse error (doesn't 30 | stop on first error), -1 on file open error, or -2 on memory allocation 31 | error (only when INI_USE_STACK is zero). 32 | */ 33 | int ini_parse(const char* filename, 34 | int (*handler)(void* user, const char* section, 35 | const char* name, const char* value), 36 | void* user); 37 | 38 | /* Same as ini_parse(), but takes a FILE* instead of filename. This doesn't 39 | close the file when it's finished -- the caller must do that. */ 40 | int ini_parse_file(FILE* file, 41 | int (*handler)(void* user, const char* section, 42 | const char* name, const char* value), 43 | void* user); 44 | 45 | /* Nonzero to allow multi-line value parsing, in the style of Python's 46 | ConfigParser. If allowed, ini_parse() will call the handler with the same 47 | name for each subsequent line parsed. */ 48 | #ifndef INI_ALLOW_MULTILINE 49 | #define INI_ALLOW_MULTILINE 1 50 | #endif 51 | 52 | /* Nonzero to allow a UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of 53 | the file. See http://code.google.com/p/inih/issues/detail?id=21 */ 54 | #ifndef INI_ALLOW_BOM 55 | #define INI_ALLOW_BOM 1 56 | #endif 57 | 58 | /* Nonzero to use stack, zero to use heap (malloc/free). */ 59 | #ifndef INI_USE_STACK 60 | #define INI_USE_STACK 1 61 | #endif 62 | 63 | /* Stop parsing on first error (default is to keep parsing). */ 64 | #ifndef INI_STOP_ON_FIRST_ERROR 65 | #define INI_STOP_ON_FIRST_ERROR 0 66 | #endif 67 | 68 | /* Maximum line length for any line in INI file. */ 69 | #ifndef INI_MAX_LINE 70 | #define INI_MAX_LINE 200 71 | #endif 72 | 73 | #ifdef __cplusplus 74 | } 75 | #endif 76 | 77 | #endif /* __INI_H__ */ 78 | -------------------------------------------------------------------------------- /json.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com) 3 | All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | */ 23 | 24 | #include "json.h" 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #define out_of_memory() do { \ 33 | fprintf(stderr, "Out of memory.\n"); \ 34 | exit(EXIT_FAILURE); \ 35 | } while (0) 36 | 37 | /* Sadly, strdup is not portable. */ 38 | static char *json_strdup(const char *str) 39 | { 40 | char *ret = (char*) malloc(strlen(str) + 1); 41 | if (ret == NULL) 42 | out_of_memory(); 43 | strcpy(ret, str); 44 | return ret; 45 | } 46 | 47 | /* String buffer */ 48 | 49 | typedef struct 50 | { 51 | char *cur; 52 | char *end; 53 | char *start; 54 | } SB; 55 | 56 | static void sb_init(SB *sb) 57 | { 58 | sb->start = (char*) malloc(17); 59 | if (sb->start == NULL) 60 | out_of_memory(); 61 | sb->cur = sb->start; 62 | sb->end = sb->start + 16; 63 | } 64 | 65 | /* sb and need may be evaluated multiple times. */ 66 | #define sb_need(sb, need) do { \ 67 | if ((sb)->end - (sb)->cur < (need)) \ 68 | sb_grow(sb, need); \ 69 | } while (0) 70 | 71 | static void sb_grow(SB *sb, int need) 72 | { 73 | size_t length = sb->cur - sb->start; 74 | size_t alloc = sb->end - sb->start; 75 | 76 | do { 77 | alloc *= 2; 78 | } while (alloc < length + need); 79 | 80 | sb->start = (char*) realloc(sb->start, alloc + 1); 81 | if (sb->start == NULL) 82 | out_of_memory(); 83 | sb->cur = sb->start + length; 84 | sb->end = sb->start + alloc; 85 | } 86 | 87 | static void sb_put(SB *sb, const char *bytes, int count) 88 | { 89 | sb_need(sb, count); 90 | memcpy(sb->cur, bytes, count); 91 | sb->cur += count; 92 | } 93 | 94 | #define sb_putc(sb, c) do { \ 95 | if ((sb)->cur >= (sb)->end) \ 96 | sb_grow(sb, 1); \ 97 | *(sb)->cur++ = (c); \ 98 | } while (0) 99 | 100 | static void sb_puts(SB *sb, const char *str) 101 | { 102 | sb_put(sb, str, strlen(str)); 103 | } 104 | 105 | static char *sb_finish(SB *sb) 106 | { 107 | *sb->cur = 0; 108 | assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start)); 109 | return sb->start; 110 | } 111 | 112 | static void sb_free(SB *sb) 113 | { 114 | free(sb->start); 115 | } 116 | 117 | /* 118 | * Unicode helper functions 119 | * 120 | * These are taken from the ccan/charset module and customized a bit. 121 | * Putting them here means the compiler can (choose to) inline them, 122 | * and it keeps ccan/json from having a dependency. 123 | */ 124 | 125 | /* 126 | * Type for Unicode codepoints. 127 | * We need our own because wchar_t might be 16 bits. 128 | */ 129 | typedef uint32_t uchar_t; 130 | 131 | /* 132 | * Validate a single UTF-8 character starting at @s. 133 | * The string must be null-terminated. 134 | * 135 | * If it's valid, return its length (1 thru 4). 136 | * If it's invalid or clipped, return 0. 137 | * 138 | * This function implements the syntax given in RFC3629, which is 139 | * the same as that given in The Unicode Standard, Version 6.0. 140 | * 141 | * It has the following properties: 142 | * 143 | * * All codepoints U+0000..U+10FFFF may be encoded, 144 | * except for U+D800..U+DFFF, which are reserved 145 | * for UTF-16 surrogate pair encoding. 146 | * * UTF-8 byte sequences longer than 4 bytes are not permitted, 147 | * as they exceed the range of Unicode. 148 | * * The sixty-six Unicode "non-characters" are permitted 149 | * (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF). 150 | */ 151 | static int utf8_validate_cz(const char *s) 152 | { 153 | unsigned char c = *s++; 154 | 155 | if (c <= 0x7F) { /* 00..7F */ 156 | return 1; 157 | } else if (c <= 0xC1) { /* 80..C1 */ 158 | /* Disallow overlong 2-byte sequence. */ 159 | return 0; 160 | } else if (c <= 0xDF) { /* C2..DF */ 161 | /* Make sure subsequent byte is in the range 0x80..0xBF. */ 162 | if (((unsigned char)*s++ & 0xC0) != 0x80) 163 | return 0; 164 | 165 | return 2; 166 | } else if (c <= 0xEF) { /* E0..EF */ 167 | /* Disallow overlong 3-byte sequence. */ 168 | if (c == 0xE0 && (unsigned char)*s < 0xA0) 169 | return 0; 170 | 171 | /* Disallow U+D800..U+DFFF. */ 172 | if (c == 0xED && (unsigned char)*s > 0x9F) 173 | return 0; 174 | 175 | /* Make sure subsequent bytes are in the range 0x80..0xBF. */ 176 | if (((unsigned char)*s++ & 0xC0) != 0x80) 177 | return 0; 178 | if (((unsigned char)*s++ & 0xC0) != 0x80) 179 | return 0; 180 | 181 | return 3; 182 | } else if (c <= 0xF4) { /* F0..F4 */ 183 | /* Disallow overlong 4-byte sequence. */ 184 | if (c == 0xF0 && (unsigned char)*s < 0x90) 185 | return 0; 186 | 187 | /* Disallow codepoints beyond U+10FFFF. */ 188 | if (c == 0xF4 && (unsigned char)*s > 0x8F) 189 | return 0; 190 | 191 | /* Make sure subsequent bytes are in the range 0x80..0xBF. */ 192 | if (((unsigned char)*s++ & 0xC0) != 0x80) 193 | return 0; 194 | if (((unsigned char)*s++ & 0xC0) != 0x80) 195 | return 0; 196 | if (((unsigned char)*s++ & 0xC0) != 0x80) 197 | return 0; 198 | 199 | return 4; 200 | } else { /* F5..FF */ 201 | return 0; 202 | } 203 | } 204 | 205 | /* Validate a null-terminated UTF-8 string. */ 206 | static bool utf8_validate(const char *s) 207 | { 208 | int len; 209 | 210 | for (; *s != 0; s += len) { 211 | len = utf8_validate_cz(s); 212 | if (len == 0) 213 | return false; 214 | } 215 | 216 | return true; 217 | } 218 | 219 | /* 220 | * Read a single UTF-8 character starting at @s, 221 | * returning the length, in bytes, of the character read. 222 | * 223 | * This function assumes input is valid UTF-8, 224 | * and that there are enough characters in front of @s. 225 | */ 226 | static int utf8_read_char(const char *s, uchar_t *out) 227 | { 228 | const unsigned char *c = (const unsigned char*) s; 229 | 230 | assert(utf8_validate_cz(s)); 231 | 232 | if (c[0] <= 0x7F) { 233 | /* 00..7F */ 234 | *out = c[0]; 235 | return 1; 236 | } else if (c[0] <= 0xDF) { 237 | /* C2..DF (unless input is invalid) */ 238 | *out = ((uchar_t)c[0] & 0x1F) << 6 | 239 | ((uchar_t)c[1] & 0x3F); 240 | return 2; 241 | } else if (c[0] <= 0xEF) { 242 | /* E0..EF */ 243 | *out = ((uchar_t)c[0] & 0xF) << 12 | 244 | ((uchar_t)c[1] & 0x3F) << 6 | 245 | ((uchar_t)c[2] & 0x3F); 246 | return 3; 247 | } else { 248 | /* F0..F4 (unless input is invalid) */ 249 | *out = ((uchar_t)c[0] & 0x7) << 18 | 250 | ((uchar_t)c[1] & 0x3F) << 12 | 251 | ((uchar_t)c[2] & 0x3F) << 6 | 252 | ((uchar_t)c[3] & 0x3F); 253 | return 4; 254 | } 255 | } 256 | 257 | /* 258 | * Write a single UTF-8 character to @s, 259 | * returning the length, in bytes, of the character written. 260 | * 261 | * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF. 262 | * 263 | * This function will write up to 4 bytes to @out. 264 | */ 265 | static int utf8_write_char(uchar_t unicode, char *out) 266 | { 267 | unsigned char *o = (unsigned char*) out; 268 | 269 | assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF)); 270 | 271 | if (unicode <= 0x7F) { 272 | /* U+0000..U+007F */ 273 | *o++ = unicode; 274 | return 1; 275 | } else if (unicode <= 0x7FF) { 276 | /* U+0080..U+07FF */ 277 | *o++ = 0xC0 | unicode >> 6; 278 | *o++ = 0x80 | (unicode & 0x3F); 279 | return 2; 280 | } else if (unicode <= 0xFFFF) { 281 | /* U+0800..U+FFFF */ 282 | *o++ = 0xE0 | unicode >> 12; 283 | *o++ = 0x80 | (unicode >> 6 & 0x3F); 284 | *o++ = 0x80 | (unicode & 0x3F); 285 | return 3; 286 | } else { 287 | /* U+10000..U+10FFFF */ 288 | *o++ = 0xF0 | unicode >> 18; 289 | *o++ = 0x80 | (unicode >> 12 & 0x3F); 290 | *o++ = 0x80 | (unicode >> 6 & 0x3F); 291 | *o++ = 0x80 | (unicode & 0x3F); 292 | return 4; 293 | } 294 | } 295 | 296 | /* 297 | * Compute the Unicode codepoint of a UTF-16 surrogate pair. 298 | * 299 | * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF. 300 | * If they aren't, this function returns false. 301 | */ 302 | static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uchar_t *unicode) 303 | { 304 | if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) { 305 | *unicode = 0x10000 + ((((uchar_t)uc & 0x3FF) << 10) | (lc & 0x3FF)); 306 | return true; 307 | } else { 308 | return false; 309 | } 310 | } 311 | 312 | /* 313 | * Construct a UTF-16 surrogate pair given a Unicode codepoint. 314 | * 315 | * @unicode must be U+10000..U+10FFFF. 316 | */ 317 | static void to_surrogate_pair(uchar_t unicode, uint16_t *uc, uint16_t *lc) 318 | { 319 | uchar_t n; 320 | 321 | assert(unicode >= 0x10000 && unicode <= 0x10FFFF); 322 | 323 | n = unicode - 0x10000; 324 | *uc = ((n >> 10) & 0x3FF) | 0xD800; 325 | *lc = (n & 0x3FF) | 0xDC00; 326 | } 327 | 328 | #define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ') 329 | #define is_digit(c) ((c) >= '0' && (c) <= '9') 330 | 331 | static bool parse_value (const char **sp, JsonNode **out); 332 | static bool parse_string (const char **sp, char **out); 333 | static bool parse_number (const char **sp, double *out); 334 | static bool parse_array (const char **sp, JsonNode **out); 335 | static bool parse_object (const char **sp, JsonNode **out); 336 | static bool parse_hex16 (const char **sp, uint16_t *out); 337 | 338 | static bool expect_literal (const char **sp, const char *str); 339 | static void skip_space (const char **sp); 340 | 341 | static void emit_value (SB *out, const JsonNode *node); 342 | static void emit_value_indented (SB *out, const JsonNode *node, const char *space, int indent_level); 343 | static void emit_string (SB *out, const char *str); 344 | static void emit_number (SB *out, double num); 345 | static void emit_array (SB *out, const JsonNode *array); 346 | static void emit_array_indented (SB *out, const JsonNode *array, const char *space, int indent_level); 347 | static void emit_object (SB *out, const JsonNode *object); 348 | static void emit_object_indented (SB *out, const JsonNode *object, const char *space, int indent_level); 349 | 350 | static int write_hex16(char *out, uint16_t val); 351 | 352 | static JsonNode *mknode(JsonTag tag); 353 | static void append_node(JsonNode *parent, JsonNode *child); 354 | static void prepend_node(JsonNode *parent, JsonNode *child); 355 | static void append_member(JsonNode *object, char *key, JsonNode *value); 356 | 357 | /* Assertion-friendly validity checks */ 358 | static bool tag_is_valid(unsigned int tag); 359 | static bool number_is_valid(const char *num); 360 | 361 | JsonNode *json_decode(const char *json) 362 | { 363 | const char *s = json; 364 | JsonNode *ret; 365 | 366 | skip_space(&s); 367 | if (!parse_value(&s, &ret)) 368 | return NULL; 369 | 370 | skip_space(&s); 371 | if (*s != 0) { 372 | json_delete(ret); 373 | return NULL; 374 | } 375 | 376 | return ret; 377 | } 378 | 379 | char *json_encode(const JsonNode *node) 380 | { 381 | return json_stringify(node, NULL); 382 | } 383 | 384 | char *json_encode_string(const char *str) 385 | { 386 | SB sb; 387 | sb_init(&sb); 388 | 389 | emit_string(&sb, str); 390 | 391 | return sb_finish(&sb); 392 | } 393 | 394 | char *json_stringify(const JsonNode *node, const char *space) 395 | { 396 | SB sb; 397 | sb_init(&sb); 398 | 399 | if (space != NULL) 400 | emit_value_indented(&sb, node, space, 0); 401 | else 402 | emit_value(&sb, node); 403 | 404 | return sb_finish(&sb); 405 | } 406 | 407 | void json_delete(JsonNode *node) 408 | { 409 | if (node != NULL) { 410 | json_remove_from_parent(node); 411 | 412 | switch (node->tag) { 413 | case JSON_STRING: 414 | free(node->string_); 415 | break; 416 | case JSON_ARRAY: 417 | case JSON_OBJECT: 418 | { 419 | JsonNode *child, *next; 420 | for (child = node->children.head; child != NULL; child = next) { 421 | next = child->next; 422 | json_delete(child); 423 | } 424 | break; 425 | } 426 | default:; 427 | } 428 | 429 | free(node); 430 | } 431 | } 432 | 433 | bool json_validate(const char *json) 434 | { 435 | const char *s = json; 436 | 437 | skip_space(&s); 438 | if (!parse_value(&s, NULL)) 439 | return false; 440 | 441 | skip_space(&s); 442 | if (*s != 0) 443 | return false; 444 | 445 | return true; 446 | } 447 | 448 | JsonNode *json_find_element(JsonNode *array, int index) 449 | { 450 | JsonNode *element; 451 | int i = 0; 452 | 453 | if (array == NULL || array->tag != JSON_ARRAY) 454 | return NULL; 455 | 456 | json_foreach(element, array) { 457 | if (i == index) 458 | return element; 459 | i++; 460 | } 461 | 462 | return NULL; 463 | } 464 | 465 | JsonNode *json_find_member(JsonNode *object, const char *name) 466 | { 467 | JsonNode *member; 468 | 469 | if (object == NULL || object->tag != JSON_OBJECT) 470 | return NULL; 471 | 472 | json_foreach(member, object) 473 | if (strcmp(member->key, name) == 0) 474 | return member; 475 | 476 | return NULL; 477 | } 478 | 479 | JsonNode *json_first_child(const JsonNode *node) 480 | { 481 | if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT)) 482 | return node->children.head; 483 | return NULL; 484 | } 485 | 486 | static JsonNode *mknode(JsonTag tag) 487 | { 488 | JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode)); 489 | if (ret == NULL) 490 | out_of_memory(); 491 | ret->tag = tag; 492 | return ret; 493 | } 494 | 495 | JsonNode *json_mknull(void) 496 | { 497 | return mknode(JSON_NULL); 498 | } 499 | 500 | JsonNode *json_mkbool(bool b) 501 | { 502 | JsonNode *ret = mknode(JSON_BOOL); 503 | ret->bool_ = b; 504 | return ret; 505 | } 506 | 507 | static JsonNode *mkstring(char *s) 508 | { 509 | JsonNode *ret = mknode(JSON_STRING); 510 | ret->string_ = s; 511 | return ret; 512 | } 513 | 514 | JsonNode *json_mkstring(const char *s) 515 | { 516 | return mkstring(json_strdup(s)); 517 | } 518 | 519 | JsonNode *json_mknumber(double n) 520 | { 521 | JsonNode *node = mknode(JSON_NUMBER); 522 | node->number_ = n; 523 | return node; 524 | } 525 | 526 | JsonNode *json_mkarray(void) 527 | { 528 | return mknode(JSON_ARRAY); 529 | } 530 | 531 | JsonNode *json_mkobject(void) 532 | { 533 | return mknode(JSON_OBJECT); 534 | } 535 | 536 | static void append_node(JsonNode *parent, JsonNode *child) 537 | { 538 | child->parent = parent; 539 | child->prev = parent->children.tail; 540 | child->next = NULL; 541 | 542 | if (parent->children.tail != NULL) 543 | parent->children.tail->next = child; 544 | else 545 | parent->children.head = child; 546 | parent->children.tail = child; 547 | } 548 | 549 | static void prepend_node(JsonNode *parent, JsonNode *child) 550 | { 551 | child->parent = parent; 552 | child->prev = NULL; 553 | child->next = parent->children.head; 554 | 555 | if (parent->children.head != NULL) 556 | parent->children.head->prev = child; 557 | else 558 | parent->children.tail = child; 559 | parent->children.head = child; 560 | } 561 | 562 | static void append_member(JsonNode *object, char *key, JsonNode *value) 563 | { 564 | value->key = key; 565 | append_node(object, value); 566 | } 567 | 568 | void json_append_element(JsonNode *array, JsonNode *element) 569 | { 570 | assert(array->tag == JSON_ARRAY); 571 | assert(element->parent == NULL); 572 | 573 | append_node(array, element); 574 | } 575 | 576 | void json_prepend_element(JsonNode *array, JsonNode *element) 577 | { 578 | assert(array->tag == JSON_ARRAY); 579 | assert(element->parent == NULL); 580 | 581 | prepend_node(array, element); 582 | } 583 | 584 | void json_append_member(JsonNode *object, const char *key, JsonNode *value) 585 | { 586 | assert(object->tag == JSON_OBJECT); 587 | assert(value->parent == NULL); 588 | 589 | append_member(object, json_strdup(key), value); 590 | } 591 | 592 | void json_prepend_member(JsonNode *object, const char *key, JsonNode *value) 593 | { 594 | assert(object->tag == JSON_OBJECT); 595 | assert(value->parent == NULL); 596 | 597 | value->key = json_strdup(key); 598 | prepend_node(object, value); 599 | } 600 | 601 | void json_remove_from_parent(JsonNode *node) 602 | { 603 | JsonNode *parent = node->parent; 604 | 605 | if (parent != NULL) { 606 | if (node->prev != NULL) 607 | node->prev->next = node->next; 608 | else 609 | parent->children.head = node->next; 610 | if (node->next != NULL) 611 | node->next->prev = node->prev; 612 | else 613 | parent->children.tail = node->prev; 614 | 615 | free(node->key); 616 | 617 | node->parent = NULL; 618 | node->prev = node->next = NULL; 619 | node->key = NULL; 620 | } 621 | } 622 | 623 | static bool parse_value(const char **sp, JsonNode **out) 624 | { 625 | const char *s = *sp; 626 | 627 | switch (*s) { 628 | case 'n': 629 | if (expect_literal(&s, "null")) { 630 | if (out) 631 | *out = json_mknull(); 632 | *sp = s; 633 | return true; 634 | } 635 | return false; 636 | 637 | case 'f': 638 | if (expect_literal(&s, "false")) { 639 | if (out) 640 | *out = json_mkbool(false); 641 | *sp = s; 642 | return true; 643 | } 644 | return false; 645 | 646 | case 't': 647 | if (expect_literal(&s, "true")) { 648 | if (out) 649 | *out = json_mkbool(true); 650 | *sp = s; 651 | return true; 652 | } 653 | return false; 654 | 655 | case '"': { 656 | char *str; 657 | if (parse_string(&s, out ? &str : NULL)) { 658 | if (out) 659 | *out = mkstring(str); 660 | *sp = s; 661 | return true; 662 | } 663 | return false; 664 | } 665 | 666 | case '[': 667 | if (parse_array(&s, out)) { 668 | *sp = s; 669 | return true; 670 | } 671 | return false; 672 | 673 | case '{': 674 | if (parse_object(&s, out)) { 675 | *sp = s; 676 | return true; 677 | } 678 | return false; 679 | 680 | default: { 681 | double num; 682 | if (parse_number(&s, out ? &num : NULL)) { 683 | if (out) 684 | *out = json_mknumber(num); 685 | *sp = s; 686 | return true; 687 | } 688 | return false; 689 | } 690 | } 691 | } 692 | 693 | static bool parse_array(const char **sp, JsonNode **out) 694 | { 695 | const char *s = *sp; 696 | JsonNode *ret = out ? json_mkarray() : NULL; 697 | JsonNode *element; 698 | 699 | if (*s++ != '[') 700 | goto failure; 701 | skip_space(&s); 702 | 703 | if (*s == ']') { 704 | s++; 705 | goto success; 706 | } 707 | 708 | for (;;) { 709 | if (!parse_value(&s, out ? &element : NULL)) 710 | goto failure; 711 | skip_space(&s); 712 | 713 | if (out) 714 | json_append_element(ret, element); 715 | 716 | if (*s == ']') { 717 | s++; 718 | goto success; 719 | } 720 | 721 | if (*s++ != ',') 722 | goto failure; 723 | skip_space(&s); 724 | } 725 | 726 | success: 727 | *sp = s; 728 | if (out) 729 | *out = ret; 730 | return true; 731 | 732 | failure: 733 | json_delete(ret); 734 | return false; 735 | } 736 | 737 | static bool parse_object(const char **sp, JsonNode **out) 738 | { 739 | const char *s = *sp; 740 | JsonNode *ret = out ? json_mkobject() : NULL; 741 | char *key; 742 | JsonNode *value; 743 | 744 | if (*s++ != '{') 745 | goto failure; 746 | skip_space(&s); 747 | 748 | if (*s == '}') { 749 | s++; 750 | goto success; 751 | } 752 | 753 | for (;;) { 754 | if (!parse_string(&s, out ? &key : NULL)) 755 | goto failure; 756 | skip_space(&s); 757 | 758 | if (*s++ != ':') 759 | goto failure_free_key; 760 | skip_space(&s); 761 | 762 | if (!parse_value(&s, out ? &value : NULL)) 763 | goto failure_free_key; 764 | skip_space(&s); 765 | 766 | if (out) 767 | append_member(ret, key, value); 768 | 769 | if (*s == '}') { 770 | s++; 771 | goto success; 772 | } 773 | 774 | if (*s++ != ',') 775 | goto failure; 776 | skip_space(&s); 777 | } 778 | 779 | success: 780 | *sp = s; 781 | if (out) 782 | *out = ret; 783 | return true; 784 | 785 | failure_free_key: 786 | if (out) 787 | free(key); 788 | failure: 789 | json_delete(ret); 790 | return false; 791 | } 792 | 793 | bool parse_string(const char **sp, char **out) 794 | { 795 | const char *s = *sp; 796 | SB sb; 797 | char throwaway_buffer[4]; 798 | /* enough space for a UTF-8 character */ 799 | char *b; 800 | 801 | if (*s++ != '"') 802 | return false; 803 | 804 | if (out) { 805 | sb_init(&sb); 806 | sb_need(&sb, 4); 807 | b = sb.cur; 808 | } else { 809 | b = throwaway_buffer; 810 | } 811 | 812 | while (*s != '"') { 813 | unsigned char c = *s++; 814 | 815 | /* Parse next character, and write it to b. */ 816 | if (c == '\\') { 817 | c = *s++; 818 | switch (c) { 819 | case '"': 820 | case '\\': 821 | case '/': 822 | *b++ = c; 823 | break; 824 | case 'b': 825 | *b++ = '\b'; 826 | break; 827 | case 'f': 828 | *b++ = '\f'; 829 | break; 830 | case 'n': 831 | *b++ = '\n'; 832 | break; 833 | case 'r': 834 | *b++ = '\r'; 835 | break; 836 | case 't': 837 | *b++ = '\t'; 838 | break; 839 | case 'u': 840 | { 841 | uint16_t uc, lc; 842 | uchar_t unicode; 843 | 844 | if (!parse_hex16(&s, &uc)) 845 | goto failed; 846 | 847 | if (uc >= 0xD800 && uc <= 0xDFFF) { 848 | /* Handle UTF-16 surrogate pair. */ 849 | if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc)) 850 | goto failed; /* Incomplete surrogate pair. */ 851 | if (!from_surrogate_pair(uc, lc, &unicode)) 852 | goto failed; /* Invalid surrogate pair. */ 853 | } else if (uc == 0) { 854 | /* Disallow "\u0000". */ 855 | goto failed; 856 | } else { 857 | unicode = uc; 858 | } 859 | 860 | b += utf8_write_char(unicode, b); 861 | break; 862 | } 863 | default: 864 | /* Invalid escape */ 865 | goto failed; 866 | } 867 | } else if (c <= 0x1F) { 868 | /* Control characters are not allowed in string literals. */ 869 | goto failed; 870 | } else { 871 | /* Validate and echo a UTF-8 character. */ 872 | int len; 873 | 874 | s--; 875 | len = utf8_validate_cz(s); 876 | if (len == 0) 877 | goto failed; /* Invalid UTF-8 character. */ 878 | 879 | while (len--) 880 | *b++ = *s++; 881 | } 882 | 883 | /* 884 | * Update sb to know about the new bytes, 885 | * and set up b to write another character. 886 | */ 887 | if (out) { 888 | sb.cur = b; 889 | sb_need(&sb, 4); 890 | b = sb.cur; 891 | } else { 892 | b = throwaway_buffer; 893 | } 894 | } 895 | s++; 896 | 897 | if (out) 898 | *out = sb_finish(&sb); 899 | *sp = s; 900 | return true; 901 | 902 | failed: 903 | if (out) 904 | sb_free(&sb); 905 | return false; 906 | } 907 | 908 | /* 909 | * The JSON spec says that a number shall follow this precise pattern 910 | * (spaces and quotes added for readability): 911 | * '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)? 912 | * 913 | * However, some JSON parsers are more liberal. For instance, PHP accepts 914 | * '.5' and '1.'. JSON.parse accepts '+3'. 915 | * 916 | * This function takes the strict approach. 917 | */ 918 | bool parse_number(const char **sp, double *out) 919 | { 920 | const char *s = *sp; 921 | 922 | /* '-'? */ 923 | if (*s == '-') 924 | s++; 925 | 926 | /* (0 | [1-9][0-9]*) */ 927 | if (*s == '0') { 928 | s++; 929 | } else { 930 | if (!is_digit(*s)) 931 | return false; 932 | do { 933 | s++; 934 | } while (is_digit(*s)); 935 | } 936 | 937 | /* ('.' [0-9]+)? */ 938 | if (*s == '.') { 939 | s++; 940 | if (!is_digit(*s)) 941 | return false; 942 | do { 943 | s++; 944 | } while (is_digit(*s)); 945 | } 946 | 947 | /* ([Ee] [+-]? [0-9]+)? */ 948 | if (*s == 'E' || *s == 'e') { 949 | s++; 950 | if (*s == '+' || *s == '-') 951 | s++; 952 | if (!is_digit(*s)) 953 | return false; 954 | do { 955 | s++; 956 | } while (is_digit(*s)); 957 | } 958 | 959 | if (out) 960 | *out = strtod(*sp, NULL); 961 | 962 | *sp = s; 963 | return true; 964 | } 965 | 966 | static void skip_space(const char **sp) 967 | { 968 | const char *s = *sp; 969 | while (is_space(*s)) 970 | s++; 971 | *sp = s; 972 | } 973 | 974 | static void emit_value(SB *out, const JsonNode *node) 975 | { 976 | assert(tag_is_valid(node->tag)); 977 | switch (node->tag) { 978 | case JSON_NULL: 979 | sb_puts(out, "null"); 980 | break; 981 | case JSON_BOOL: 982 | sb_puts(out, node->bool_ ? "true" : "false"); 983 | break; 984 | case JSON_STRING: 985 | emit_string(out, node->string_); 986 | break; 987 | case JSON_NUMBER: 988 | emit_number(out, node->number_); 989 | break; 990 | case JSON_ARRAY: 991 | emit_array(out, node); 992 | break; 993 | case JSON_OBJECT: 994 | emit_object(out, node); 995 | break; 996 | default: 997 | assert(false); 998 | } 999 | } 1000 | 1001 | void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level) 1002 | { 1003 | assert(tag_is_valid(node->tag)); 1004 | switch (node->tag) { 1005 | case JSON_NULL: 1006 | sb_puts(out, "null"); 1007 | break; 1008 | case JSON_BOOL: 1009 | sb_puts(out, node->bool_ ? "true" : "false"); 1010 | break; 1011 | case JSON_STRING: 1012 | emit_string(out, node->string_); 1013 | break; 1014 | case JSON_NUMBER: 1015 | emit_number(out, node->number_); 1016 | break; 1017 | case JSON_ARRAY: 1018 | emit_array_indented(out, node, space, indent_level); 1019 | break; 1020 | case JSON_OBJECT: 1021 | emit_object_indented(out, node, space, indent_level); 1022 | break; 1023 | default: 1024 | assert(false); 1025 | } 1026 | } 1027 | 1028 | static void emit_array(SB *out, const JsonNode *array) 1029 | { 1030 | const JsonNode *element; 1031 | 1032 | sb_putc(out, '['); 1033 | json_foreach(element, array) { 1034 | emit_value(out, element); 1035 | if (element->next != NULL) 1036 | sb_putc(out, ','); 1037 | } 1038 | sb_putc(out, ']'); 1039 | } 1040 | 1041 | static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level) 1042 | { 1043 | const JsonNode *element = array->children.head; 1044 | int i; 1045 | 1046 | if (element == NULL) { 1047 | sb_puts(out, "[]"); 1048 | return; 1049 | } 1050 | 1051 | sb_puts(out, "[\n"); 1052 | while (element != NULL) { 1053 | for (i = 0; i < indent_level + 1; i++) 1054 | sb_puts(out, space); 1055 | emit_value_indented(out, element, space, indent_level + 1); 1056 | 1057 | element = element->next; 1058 | sb_puts(out, element != NULL ? ",\n" : "\n"); 1059 | } 1060 | for (i = 0; i < indent_level; i++) 1061 | sb_puts(out, space); 1062 | sb_putc(out, ']'); 1063 | } 1064 | 1065 | static void emit_object(SB *out, const JsonNode *object) 1066 | { 1067 | const JsonNode *member; 1068 | 1069 | sb_putc(out, '{'); 1070 | json_foreach(member, object) { 1071 | emit_string(out, member->key); 1072 | sb_putc(out, ':'); 1073 | emit_value(out, member); 1074 | if (member->next != NULL) 1075 | sb_putc(out, ','); 1076 | } 1077 | sb_putc(out, '}'); 1078 | } 1079 | 1080 | static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level) 1081 | { 1082 | const JsonNode *member = object->children.head; 1083 | int i; 1084 | 1085 | if (member == NULL) { 1086 | sb_puts(out, "{}"); 1087 | return; 1088 | } 1089 | 1090 | sb_puts(out, "{\n"); 1091 | while (member != NULL) { 1092 | for (i = 0; i < indent_level + 1; i++) 1093 | sb_puts(out, space); 1094 | emit_string(out, member->key); 1095 | sb_puts(out, ": "); 1096 | emit_value_indented(out, member, space, indent_level + 1); 1097 | 1098 | member = member->next; 1099 | sb_puts(out, member != NULL ? ",\n" : "\n"); 1100 | } 1101 | for (i = 0; i < indent_level; i++) 1102 | sb_puts(out, space); 1103 | sb_putc(out, '}'); 1104 | } 1105 | 1106 | void emit_string(SB *out, const char *str) 1107 | { 1108 | bool escape_unicode = false; 1109 | const char *s = str; 1110 | char *b; 1111 | 1112 | assert(utf8_validate(str)); 1113 | 1114 | /* 1115 | * 14 bytes is enough space to write up to two 1116 | * \uXXXX escapes and two quotation marks. 1117 | */ 1118 | sb_need(out, 14); 1119 | b = out->cur; 1120 | 1121 | *b++ = '"'; 1122 | while (*s != 0) { 1123 | unsigned char c = *s++; 1124 | 1125 | /* Encode the next character, and write it to b. */ 1126 | switch (c) { 1127 | case '"': 1128 | *b++ = '\\'; 1129 | *b++ = '"'; 1130 | break; 1131 | case '\\': 1132 | *b++ = '\\'; 1133 | *b++ = '\\'; 1134 | break; 1135 | case '\b': 1136 | *b++ = '\\'; 1137 | *b++ = 'b'; 1138 | break; 1139 | case '\f': 1140 | *b++ = '\\'; 1141 | *b++ = 'f'; 1142 | break; 1143 | case '\n': 1144 | *b++ = '\\'; 1145 | *b++ = 'n'; 1146 | break; 1147 | case '\r': 1148 | *b++ = '\\'; 1149 | *b++ = 'r'; 1150 | break; 1151 | case '\t': 1152 | *b++ = '\\'; 1153 | *b++ = 't'; 1154 | break; 1155 | default: { 1156 | int len; 1157 | 1158 | s--; 1159 | len = utf8_validate_cz(s); 1160 | 1161 | if (len == 0) { 1162 | /* 1163 | * Handle invalid UTF-8 character gracefully in production 1164 | * by writing a replacement character (U+FFFD) 1165 | * and skipping a single byte. 1166 | * 1167 | * This should never happen when assertions are enabled 1168 | * due to the assertion at the beginning of this function. 1169 | */ 1170 | assert(false); 1171 | if (escape_unicode) { 1172 | strcpy(b, "\\uFFFD"); 1173 | b += 6; 1174 | } else { 1175 | *b++ = 0xEF; 1176 | *b++ = 0xBF; 1177 | *b++ = 0xBD; 1178 | } 1179 | s++; 1180 | } else if (c < 0x1F || (c >= 0x80 && escape_unicode)) { 1181 | /* Encode using \u.... */ 1182 | uint32_t unicode; 1183 | 1184 | s += utf8_read_char(s, &unicode); 1185 | 1186 | if (unicode <= 0xFFFF) { 1187 | *b++ = '\\'; 1188 | *b++ = 'u'; 1189 | b += write_hex16(b, unicode); 1190 | } else { 1191 | /* Produce a surrogate pair. */ 1192 | uint16_t uc, lc; 1193 | assert(unicode <= 0x10FFFF); 1194 | to_surrogate_pair(unicode, &uc, &lc); 1195 | *b++ = '\\'; 1196 | *b++ = 'u'; 1197 | b += write_hex16(b, uc); 1198 | *b++ = '\\'; 1199 | *b++ = 'u'; 1200 | b += write_hex16(b, lc); 1201 | } 1202 | } else { 1203 | /* Write the character directly. */ 1204 | while (len--) 1205 | *b++ = *s++; 1206 | } 1207 | 1208 | break; 1209 | } 1210 | } 1211 | 1212 | /* 1213 | * Update *out to know about the new bytes, 1214 | * and set up b to write another encoded character. 1215 | */ 1216 | out->cur = b; 1217 | sb_need(out, 14); 1218 | b = out->cur; 1219 | } 1220 | *b++ = '"'; 1221 | 1222 | out->cur = b; 1223 | } 1224 | 1225 | static void emit_number(SB *out, double num) 1226 | { 1227 | /* 1228 | * This isn't exactly how JavaScript renders numbers, 1229 | * but it should produce valid JSON for reasonable numbers 1230 | * preserve precision well enough, and avoid some oddities 1231 | * like 0.3 -> 0.299999999999999988898 . 1232 | */ 1233 | char buf[64]; 1234 | sprintf(buf, "%.16g", num); 1235 | 1236 | if (number_is_valid(buf)) 1237 | sb_puts(out, buf); 1238 | else 1239 | sb_puts(out, "null"); 1240 | } 1241 | 1242 | static bool tag_is_valid(unsigned int tag) 1243 | { 1244 | return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT); 1245 | } 1246 | 1247 | static bool number_is_valid(const char *num) 1248 | { 1249 | return (parse_number(&num, NULL) && *num == '\0'); 1250 | } 1251 | 1252 | static bool expect_literal(const char **sp, const char *str) 1253 | { 1254 | const char *s = *sp; 1255 | 1256 | while (*str != '\0') 1257 | if (*s++ != *str++) 1258 | return false; 1259 | 1260 | *sp = s; 1261 | return true; 1262 | } 1263 | 1264 | /* 1265 | * Parses exactly 4 hex characters (capital or lowercase). 1266 | * Fails if any input chars are not [0-9A-Fa-f]. 1267 | */ 1268 | static bool parse_hex16(const char **sp, uint16_t *out) 1269 | { 1270 | const char *s = *sp; 1271 | uint16_t ret = 0; 1272 | uint16_t i; 1273 | uint16_t tmp; 1274 | char c; 1275 | 1276 | for (i = 0; i < 4; i++) { 1277 | c = *s++; 1278 | if (c >= '0' && c <= '9') 1279 | tmp = c - '0'; 1280 | else if (c >= 'A' && c <= 'F') 1281 | tmp = c - 'A' + 10; 1282 | else if (c >= 'a' && c <= 'f') 1283 | tmp = c - 'a' + 10; 1284 | else 1285 | return false; 1286 | 1287 | ret <<= 4; 1288 | ret += tmp; 1289 | } 1290 | 1291 | if (out) 1292 | *out = ret; 1293 | *sp = s; 1294 | return true; 1295 | } 1296 | 1297 | /* 1298 | * Encodes a 16-bit number into hexadecimal, 1299 | * writing exactly 4 hex chars. 1300 | */ 1301 | static int write_hex16(char *out, uint16_t val) 1302 | { 1303 | const char *hex = "0123456789ABCDEF"; 1304 | 1305 | *out++ = hex[(val >> 12) & 0xF]; 1306 | *out++ = hex[(val >> 8) & 0xF]; 1307 | *out++ = hex[(val >> 4) & 0xF]; 1308 | *out++ = hex[ val & 0xF]; 1309 | 1310 | return 4; 1311 | } 1312 | 1313 | bool json_check(const JsonNode *node, char errmsg[256]) 1314 | { 1315 | #define problem(...) do { \ 1316 | if (errmsg != NULL) \ 1317 | snprintf(errmsg, 256, __VA_ARGS__); \ 1318 | return false; \ 1319 | } while (0) 1320 | 1321 | if (node->key != NULL && !utf8_validate(node->key)) 1322 | problem("key contains invalid UTF-8"); 1323 | 1324 | if (!tag_is_valid(node->tag)) 1325 | problem("tag is invalid (%u)", node->tag); 1326 | 1327 | if (node->tag == JSON_BOOL) { 1328 | if (node->bool_ != false && node->bool_ != true) 1329 | problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true); 1330 | } else if (node->tag == JSON_STRING) { 1331 | if (node->string_ == NULL) 1332 | problem("string_ is NULL"); 1333 | if (!utf8_validate(node->string_)) 1334 | problem("string_ contains invalid UTF-8"); 1335 | } else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) { 1336 | JsonNode *head = node->children.head; 1337 | JsonNode *tail = node->children.tail; 1338 | 1339 | if (head == NULL || tail == NULL) { 1340 | if (head != NULL) 1341 | problem("tail is NULL, but head is not"); 1342 | if (tail != NULL) 1343 | problem("head is NULL, but tail is not"); 1344 | } else { 1345 | JsonNode *child; 1346 | JsonNode *last = NULL; 1347 | 1348 | if (head->prev != NULL) 1349 | problem("First child's prev pointer is not NULL"); 1350 | 1351 | for (child = head; child != NULL; last = child, child = child->next) { 1352 | if (child == node) 1353 | problem("node is its own child"); 1354 | if (child->next == child) 1355 | problem("child->next == child (cycle)"); 1356 | if (child->next == head) 1357 | problem("child->next == head (cycle)"); 1358 | 1359 | if (child->parent != node) 1360 | problem("child does not point back to parent"); 1361 | if (child->next != NULL && child->next->prev != child) 1362 | problem("child->next does not point back to child"); 1363 | 1364 | if (node->tag == JSON_ARRAY && child->key != NULL) 1365 | problem("Array element's key is not NULL"); 1366 | if (node->tag == JSON_OBJECT && child->key == NULL) 1367 | problem("Object member's key is NULL"); 1368 | 1369 | if (!json_check(child, errmsg)) 1370 | return false; 1371 | } 1372 | 1373 | if (last != tail) 1374 | problem("tail does not match pointer found by starting at head and following next links"); 1375 | } 1376 | } 1377 | 1378 | return true; 1379 | 1380 | #undef problem 1381 | } 1382 | -------------------------------------------------------------------------------- /json.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com) 3 | All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | */ 23 | 24 | #ifndef CCAN_JSON_H 25 | #define CCAN_JSON_H 26 | 27 | #include 28 | #include 29 | 30 | typedef enum { 31 | JSON_NULL, 32 | JSON_BOOL, 33 | JSON_STRING, 34 | JSON_NUMBER, 35 | JSON_ARRAY, 36 | JSON_OBJECT, 37 | } JsonTag; 38 | 39 | typedef struct JsonNode JsonNode; 40 | 41 | struct JsonNode 42 | { 43 | /* only if parent is an object or array (NULL otherwise) */ 44 | JsonNode *parent; 45 | JsonNode *prev, *next; 46 | 47 | /* only if parent is an object (NULL otherwise) */ 48 | char *key; /* Must be valid UTF-8. */ 49 | 50 | JsonTag tag; 51 | union { 52 | /* JSON_BOOL */ 53 | bool bool_; 54 | 55 | /* JSON_STRING */ 56 | char *string_; /* Must be valid UTF-8. */ 57 | 58 | /* JSON_NUMBER */ 59 | double number_; 60 | 61 | /* JSON_ARRAY */ 62 | /* JSON_OBJECT */ 63 | struct { 64 | JsonNode *head, *tail; 65 | } children; 66 | }; 67 | }; 68 | 69 | /*** Encoding, decoding, and validation ***/ 70 | 71 | JsonNode *json_decode (const char *json); 72 | char *json_encode (const JsonNode *node); 73 | char *json_encode_string (const char *str); 74 | char *json_stringify (const JsonNode *node, const char *space); 75 | void json_delete (JsonNode *node); 76 | 77 | bool json_validate (const char *json); 78 | 79 | /*** Lookup and traversal ***/ 80 | 81 | JsonNode *json_find_element (JsonNode *array, int index); 82 | JsonNode *json_find_member (JsonNode *object, const char *key); 83 | 84 | JsonNode *json_first_child (const JsonNode *node); 85 | 86 | #define json_foreach(i, object_or_array) \ 87 | for ((i) = json_first_child(object_or_array); \ 88 | (i) != NULL; \ 89 | (i) = (i)->next) 90 | 91 | /*** Construction and manipulation ***/ 92 | 93 | JsonNode *json_mknull(void); 94 | JsonNode *json_mkbool(bool b); 95 | JsonNode *json_mkstring(const char *s); 96 | JsonNode *json_mknumber(double n); 97 | JsonNode *json_mkarray(void); 98 | JsonNode *json_mkobject(void); 99 | 100 | void json_append_element(JsonNode *array, JsonNode *element); 101 | void json_prepend_element(JsonNode *array, JsonNode *element); 102 | void json_append_member(JsonNode *object, const char *key, JsonNode *value); 103 | void json_prepend_member(JsonNode *object, const char *key, JsonNode *value); 104 | 105 | void json_remove_from_parent(JsonNode *node); 106 | 107 | /*** Debugging ***/ 108 | 109 | /* 110 | * Look for structure and encoding problems in a JsonNode or its descendents. 111 | * 112 | * If a problem is detected, return false, writing a description of the problem 113 | * to errmsg (unless errmsg is NULL). 114 | */ 115 | bool json_check(const JsonNode *node, char errmsg[256]); 116 | 117 | #endif 118 | -------------------------------------------------------------------------------- /mqttcollect.1: -------------------------------------------------------------------------------- 1 | .TH "MQTTCOLLECT" "1" "May 13, 2015" "User Manuals" "" 2 | .SH NAME 3 | .PP 4 | mqttcollect \- MQTT\-based Exec\-plugin for collectd 5 | .SH SYNOPSIS 6 | .PP 7 | mqttcollect [\-v ] [\-f \f[I]file\f[]] 8 | .SH DESCRIPTION 9 | .PP 10 | \f[I]mqttcollect\f[] is an executable program which is used with 11 | collectd(1). 12 | It subscribes to any number of MQTT topics you specify, and prints 13 | values to stdout for collectd to process in an exec plugin block. 14 | .IP 15 | .nf 16 | \f[C] 17 | PUTVAL\ tiggr/mqtt‐sys/gauge‐clients.inactive\ 1430914033:0.00 18 | \f[] 19 | .fi 20 | .PP 21 | \f[I]collectd\f[] launches \f[I]mqttcollect\f[] which connects to the 22 | configured MQTT broker, subscribes and waits for publishes to subscribed 23 | topics in an endless loop. 24 | If an error occurs or the program exits for whichever reason, 25 | \f[I]collectd\f[] will restart and log the reason in its log file. 26 | .PP 27 | \f[I]mqttcollect\f[] supports TLS connections to the MQTT broker, 28 | username/password authentication, and TLS\-PSK, all configured via its 29 | configuration file. 30 | .SH OPTIONS 31 | .PP 32 | \f[I]mqttcollect\f[] understands the following options. 33 | .TP 34 | .B \-f \f[I]file\f[] 35 | Specify an ini\-type configuration file (see below), which defaults to 36 | \f[C]/usr/local/etc/mqttcollect.ini\f[]. 37 | .RS 38 | .RE 39 | .TP 40 | .B \-v 41 | Verbose. 42 | .RS 43 | .RE 44 | .SH CONFIGURATION 45 | .PP 46 | \f[I]mqttcollect\f[] requires a configuration file to operate. 47 | This ini\-type file must have a \f[C][defaults]\f[] section in which 48 | general program parameters are configured, and it will have any number 49 | of additional sections specifying the MQTT topics it is to subscribe to. 50 | For the defaults section, please consult the example file provided with 51 | the source code for a list of allowed settings. 52 | .PP 53 | Within a \f[I]topic\f[] section, metrics collected by \f[I]collectd\f[] 54 | are specified. 55 | .IP 56 | .nf 57 | \f[C] 58 | [defaults] 59 | host\ =\ localhost 60 | port\ =\ 1883 61 | 62 | ;\ (1)\ subscribe\ to\ a\ wildcard\ and\ produce\ three\ metrics\ per\ subscription. 63 | ;\ the\ metric\ names\ are\ interpolated\ with\ `tid\[aq]\ from\ the\ JSON\ message 64 | ;\ payload,\ and\ the\ values\ of\ each\ metric\ are\ obtained\ from\ the 65 | ;\ JSON\ element\ behind\ the\ `<\[aq] 66 | 67 | [owntracks/+/+] 68 | gauge\ =\ vehicle/{tid}/speed 162 | \ \ \ #\ influxdb 163 | \ \ \ Server\ "127.0.0.1"\ "25826" 164 | 165 | \f[] 166 | .fi 167 | .PP 168 | Configure \f[I]collectd\f[] to load our executable \f[I]mqttcollect\f[] 169 | via its exec mechanism. 170 | Specify \f[I]mqttcollect\f[]\[aq]s options as individual strings in the 171 | \f[C]Exec\f[] invocation. 172 | .IP 173 | .nf 174 | \f[C] 175 | LoadPlugin\ exec 176 | 177 | 178 | \ \ \ Exec\ "mosquitto:mosquitto"\ "/usr/bin/mqttcollect"\ "‐f"\ "/etc/my.ini" 179 | 180 | \f[] 181 | .fi 182 | .SH BUGS 183 | .PP 184 | Yes. 185 | .SH AVAILABILITY 186 | .PP 187 | 188 | .SH CREDITS 189 | .IP \[bu] 2 190 | This program uses \f[I]libmosquitto\f[], a library provided by the 191 | Mosquitto project as well as some of the 192 | excellent include files provided by 193 | 194 | .SH INSTALLATION 195 | .IP \[bu] 2 196 | Obtain the source code for \f[I]mqttcollect\f[], adjust the 197 | \f[C]Makefile\f[] and run \f[C]make\f[]. 198 | .SH SEE ALSO 199 | .IP \[bu] 2 200 | \f[C]collectd\f[](1). 201 | .IP \[bu] 2 202 | 203 | .SH AUTHOR 204 | .PP 205 | Jan\-Piet Mens 206 | .SH AUTHORS 207 | Jan\-Piet Mens. 208 | -------------------------------------------------------------------------------- /mqttcollect.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Jan-Piet Mens 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, 9 | * this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of mosquitto nor the names of its 14 | * contributors may be used to endorse or promote products derived from 15 | * this software without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | * POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include "uthash.h" 42 | #include "utstring.h" 43 | #include "json.h" 44 | #include "ini.h" /* https://github.com/benhoyt/inih */ 45 | 46 | #ifndef TRUE 47 | # define TRUE (1) 48 | #endif 49 | #ifndef FALSE 50 | # define FALSE (0) 51 | #endif 52 | 53 | #define SSL_VERIFY_PEER (1) 54 | #define SSL_VERIFY_NONE (0) 55 | 56 | #define PROGNAME "mqttcollect" 57 | #define CONFIGFILE "/usr/local/etc/mqttcollect.ini" 58 | #define SECTION "defaults" 59 | 60 | typedef struct { 61 | const char *host; 62 | const char *nodename; /* for collectd; defaults to short uname */ 63 | int port; 64 | const char *username; 65 | const char *password; 66 | const char *psk_key; 67 | const char *psk_identity; 68 | const char *ca_file; 69 | const char *certfile; 70 | const char *keyfile; 71 | const char *progname; 72 | const char *prefix; 73 | } config; 74 | 75 | static config cf = { 76 | .host = "localhost", 77 | .port = 1883, 78 | .progname = PROGNAME 79 | }; 80 | 81 | /* 82 | * A hash of metrics with their name (metric), type (e.g. gauge) and 83 | * optional JSON element. 84 | */ 85 | 86 | struct metrics_h { 87 | const char *metric; 88 | const char *type; 89 | const char *element; /* If NULL, not JSON */ 90 | UT_hash_handle hh; 91 | }; 92 | 93 | struct topics_h { 94 | const char *topic; /* MQTT topic */ 95 | struct metrics_h *mh; /* hash of metrics to produce per topic */ 96 | UT_hash_handle hh; 97 | }; 98 | static struct topics_h *topics_h = NULL; 99 | 100 | static int verbose = FALSE; 101 | 102 | 103 | #define _eq(n) (strcmp(key, n) == 0) 104 | static int handler(void *cf, const char *section, const char *key, const char *val) 105 | { 106 | config *c = (config *)cf; 107 | struct topics_h *th; 108 | struct metrics_h *mh; 109 | static UT_string *elem, *metric; 110 | char *p; 111 | 112 | utstring_renew(elem); 113 | utstring_renew(metric); 114 | 115 | // printf("section=%s >%s<-->%s\n", section, key, val); 116 | 117 | if (!strcmp(section, SECTION)) { 118 | 119 | if (_eq("host")) 120 | c->host = strdup(val); 121 | if (_eq("username")) 122 | c->username = strdup(val); 123 | if (_eq("password")) 124 | c->password = strdup(val); 125 | if (_eq("psk_key")) 126 | c->psk_key = strdup(val); 127 | if (_eq("psk_identity")) 128 | c->psk_identity = strdup(val); 129 | if (_eq("ca_file")) 130 | c->ca_file = strdup(val); 131 | if (_eq("certfile")) 132 | c->certfile = strdup(val); 133 | if (_eq("keyfile")) 134 | c->keyfile = strdup(val); 135 | if (_eq("nodename")) 136 | c->nodename = strdup(val); 137 | if (_eq("progname")) 138 | c->progname = strdup(val); 139 | if (_eq("prefix")) 140 | c->prefix = strdup(val); 141 | 142 | if (_eq("port")) 143 | c->port = atoi(val); 144 | 145 | return (1); 146 | } 147 | 148 | /* 149 | * The Section name is MQTT topic. If we've not yet seen this, add 150 | * it to the hash, otherwise, push the new metric into the it's 151 | * array. 152 | * The entry's key is the metric type (gauge, counter) 153 | * 154 | * [owntracks/gw/+] <-- section => topic 155 | * gauge = cars/{tid}/speed@vel 156 | * 157 | * key: "gauge" 158 | * val: "cars/{tid}/speed@vel" 159 | * ^^^^^^^^^^^^^^^^ ^^^ 160 | * metric elem 161 | * 162 | * [$SYS/broker/uptime] 163 | * counter = * 164 | */ 165 | 166 | if ((p = strchr(val, '<')) != NULL) { /* "topic = strdup(section); 185 | 186 | HASH_ADD_KEYPTR( hh, topics_h, th->topic, strlen(th->topic), th ); 187 | 188 | /* experiment: add to metric_h with this hash */ 189 | 190 | th->mh = NULL; 191 | mh = (struct metrics_h *)malloc(sizeof(struct metrics_h)); 192 | mh->metric = strdup(utstring_body(metric)); 193 | mh->type = strdup(key); 194 | mh->element = utstring_len(elem) ? strdup(utstring_body(elem)) : NULL; 195 | HASH_ADD_KEYPTR( hh, th->mh, mh->metric, strlen(mh->metric), mh ); 196 | 197 | 198 | } else { 199 | HASH_FIND_STR(th->mh, val, mh); 200 | if (mh) { 201 | puts("PANIC!!!"); 202 | } else { 203 | mh = (struct metrics_h *)malloc(sizeof(struct metrics_h)); 204 | mh->metric = strdup(utstring_body(metric)); 205 | mh->type = strdup(key); 206 | mh->element = utstring_len(elem) ? strdup(utstring_body(elem)) : NULL; 207 | HASH_ADD_KEYPTR( hh, th->mh, mh->metric, strlen(mh->metric), mh ); 208 | } 209 | 210 | } 211 | 212 | return (1); 213 | } 214 | 215 | 216 | static struct mosquitto *m = NULL; 217 | 218 | /* 219 | * User data for Mosquitto 220 | */ 221 | 222 | struct udata { 223 | char *nodename; 224 | struct topics_h *topics_h; 225 | }; 226 | 227 | void catcher(int sig) 228 | { 229 | fprintf(stderr, "Going down on signal %d\n", sig); 230 | 231 | if (m) { 232 | mosquitto_disconnect(m); 233 | mosquitto_loop_stop(m, false); 234 | mosquitto_lib_cleanup(); 235 | } 236 | exit(1); 237 | } 238 | 239 | void fatal(void) 240 | { 241 | if (m) { 242 | mosquitto_disconnect(m); 243 | mosquitto_loop_stop(m, false); 244 | mosquitto_lib_cleanup(); 245 | } 246 | exit(1); 247 | } 248 | 249 | double json_object(JsonNode *json, const char *element) 250 | { 251 | JsonNode *m; 252 | double value = 0.0L; 253 | 254 | if ((m = json_find_member(json, element)) == NULL) 255 | return (value); 256 | 257 | if (m && m->tag == JSON_STRING) { 258 | value = atof(m->string_); 259 | } else if (m && m->tag == JSON_NUMBER) { 260 | value = m->number_; 261 | } 262 | 263 | return (value); 264 | } 265 | 266 | /* 267 | * Expand the content of `line', which may have one or more {token} 268 | * in it into `res', using the decoded JSON at `json'. 269 | */ 270 | 271 | void xexpand(UT_string *res, const char *line, JsonNode *json) 272 | { 273 | JsonNode *m; 274 | static UT_string *token; 275 | const char *lp = line; 276 | 277 | utstring_renew(token); 278 | 279 | for (lp = line; lp && *lp; lp++ ) { 280 | if (*lp == '\\') { 281 | utstring_printf(res, "%c", *++lp); 282 | continue; 283 | } 284 | if (*lp != '{') { 285 | utstring_printf(res, "%c", *lp); 286 | continue; 287 | } 288 | 289 | utstring_renew(token); 290 | if (*++lp == '}') { /* skip over this { */ 291 | /* Empty token; push back */ 292 | utstring_printf(res, "%c", *lp); 293 | continue; 294 | } 295 | 296 | do { 297 | utstring_printf(token, "%c", *lp++); 298 | } while (*lp && *lp != '}'); 299 | // printf("TOKEN=[%s]\n", utstring_body(token)); 300 | 301 | // printf("LAST=%d\n", *lp); 302 | if (*lp != '}') { 303 | /* Push back, incl leading brace */ 304 | utstring_printf(res, "{%s", utstring_body(token)); 305 | break; 306 | } 307 | 308 | 309 | /* See if `token' is a JSON element, and if so, interpolate 310 | * its value. If token is not in JSON, stuff it back to 311 | * indicate the error. 312 | */ 313 | 314 | if ((m = json_find_member(json, utstring_body(token))) != NULL) { 315 | if (m && m->tag == JSON_STRING) { 316 | utstring_printf(res, "%s", m->string_); 317 | } else if (m && m->tag == JSON_NUMBER) { 318 | utstring_printf(res, "%lf", m->number_); 319 | } else { 320 | utstring_printf(res, "FIXME-JSON"); 321 | } 322 | } else { 323 | /* stuff token and its braces back into result */ 324 | utstring_printf(res, "{%s}", utstring_body(token)); 325 | } 326 | } 327 | } 328 | 329 | 330 | 331 | void cb_sub(struct mosquitto *mosq, void *userdata, const struct mosquitto_message *msg) 332 | { 333 | char *topic = msg->topic; 334 | char *payload = msg->payload; 335 | static UT_string *pfix; 336 | struct udata *ud = (struct udata *)userdata; 337 | time_t now; 338 | struct topics_h *th, *currth = NULL; 339 | bool bf; 340 | struct metrics_h *mh; 341 | 342 | 343 | /* 344 | * We can't try to find topic in our hash, because this may be the 345 | * result of a wildcard subscription. Instead, see if one of the 346 | * topics in hash matches the subscription. Slower, but I can't 347 | * help that. 348 | */ 349 | 350 | for (th = topics_h; th != NULL; th = th->hh.next) { 351 | if (mosquitto_topic_matches_sub(th->topic, topic, &bf) == MOSQ_ERR_SUCCESS) { 352 | if (bf == 1) { 353 | currth = th; 354 | break; 355 | } 356 | } 357 | } 358 | 359 | if (currth == NULL) { 360 | puts("HUH? PANIC? topic not found"); 361 | return; 362 | } 363 | 364 | time(&now); 365 | 366 | utstring_renew(pfix); 367 | utstring_printf(pfix, "%s", cf.progname); 368 | if (cf.prefix && *cf.prefix) { 369 | utstring_printf(pfix, "-%s", cf.prefix); 370 | } 371 | 372 | /* 373 | * For each of the metrics configured for this subscription, do the 374 | * "needful". 375 | * If `element' in metric is NULL, use the original payload; otherwise 376 | * it's the name of a JSON element in the (assumed) JSON payload. 377 | */ 378 | 379 | for (mh = currth->mh; mh != NULL; mh = mh->hh.next) { 380 | JsonNode *json; 381 | double number = -1.0L; 382 | static UT_string *metric_name; 383 | 384 | utstring_renew(metric_name); 385 | 386 | if (verbose) 387 | fprintf(stderr, " =====[ %s ] (%s) %s\n", mh->metric, mh->type, mh->element); 388 | 389 | if (mh->element && strcmp(mh->element, "*") != 0) { /* JSON */ 390 | if ((json = json_decode(payload)) == NULL) { 391 | continue; 392 | } 393 | utstring_clear(metric_name); 394 | xexpand(metric_name, mh->metric, json); 395 | 396 | number = json_object(json, mh->element); 397 | 398 | json_delete(json); 399 | 400 | } else if (mh->element) { 401 | if (strcmp(mh->element, "*") == 0) { 402 | utstring_printf(metric_name, "%s", topic); 403 | } else { 404 | utstring_printf(metric_name, "%s", mh->metric); 405 | } 406 | number = atof(payload); 407 | } else { 408 | utstring_printf(metric_name, "%s", mh->metric); 409 | number = atof(payload); 410 | } 411 | 412 | printf("PUTVAL %s/%s/%s-%s %ld:%.2lf\n", 413 | ud->nodename, 414 | utstring_body(pfix), 415 | mh->type, 416 | utstring_body(metric_name), 417 | now, 418 | number); 419 | } 420 | } 421 | 422 | void cb_connect(struct mosquitto *mosq, void *userdata, int rc) 423 | { 424 | struct udata *ud = (struct udata *)userdata; 425 | struct topics_h *th; 426 | 427 | /* 428 | * Set up an MQTT subscription for each of the topics we have 429 | * in the topics hash. 430 | */ 431 | 432 | for (th = ud->topics_h; th != NULL; th = th->hh.next) { 433 | // fprintf(stderr, "%s: subscribe to %s\n", PROGNAME, th->topic); 434 | mosquitto_subscribe(m, NULL, th->topic, 0); 435 | } 436 | } 437 | 438 | void cb_disconnect(struct mosquitto *mosq, void *userdata, int rc) 439 | { 440 | char *explain = NULL; 441 | 442 | if (rc == 0) { 443 | // Disconnect requested by client 444 | } else { 445 | switch (rc) { 446 | case 7: explain = "Broker disconnected. Reconnecting.."; break; 447 | } 448 | 449 | if (explain) { 450 | fprintf(stderr, "%s: disconnected: reason: %d (%s) [%s]\n", 451 | PROGNAME, rc, strerror(errno), explain); 452 | return; 453 | } 454 | 455 | fprintf(stderr, "%s: disconnected: reason: %d (%s)\n", 456 | PROGNAME, rc, strerror(errno)); 457 | fatal(); 458 | } 459 | } 460 | 461 | int main(int argc, char **argv) 462 | { 463 | char *progname = *argv; 464 | int ch, usage = 0, rc; 465 | struct utsname uts; 466 | char clientid[80]; 467 | int keepalive = 60; 468 | int tls_insecure = FALSE; 469 | struct udata udata; 470 | char *configfile = CONFIGFILE; 471 | 472 | setvbuf(stdout, NULL, _IONBF, 0); 473 | 474 | while ((ch = getopt(argc, argv, "vs:f:")) != EOF) { 475 | switch (ch) { 476 | case 'v': 477 | verbose = TRUE; 478 | break; 479 | 480 | case 's': 481 | tls_insecure = TRUE; 482 | break; 483 | case 'f': 484 | configfile = strdup(optarg); 485 | break; 486 | default: 487 | usage = 1; 488 | break; 489 | } 490 | } 491 | 492 | 493 | 494 | if (ini_parse(configfile, handler, &cf) < 0) { 495 | fprintf(stderr, "%s: Can't load '%s'\n", PROGNAME, configfile); 496 | return 1; 497 | } 498 | 499 | if (usage) { 500 | fprintf(stderr, "Usage: %s [-v] [-s] [-f configfile]\n", progname); 501 | exit(1); 502 | } 503 | 504 | /* Determine nodename: either use the -h value of the MQTT broker 505 | * or get local nodename */ 506 | 507 | if (cf.nodename == NULL) { 508 | if (uname(&uts) == 0) { 509 | char *p; 510 | cf.nodename = strdup(uts.nodename); 511 | 512 | if ((p = strchr(cf.nodename, '.')) != NULL) 513 | *p = 0; 514 | } else { 515 | cf.nodename = strdup("unknown"); 516 | } 517 | } 518 | 519 | mosquitto_lib_init(); 520 | 521 | udata.nodename = (char *)cf.nodename; 522 | udata.topics_h = topics_h; 523 | 524 | sprintf(clientid, "%s-%d", PROGNAME, getpid()); 525 | if ((m = mosquitto_new(clientid, TRUE, (void *)&udata)) == NULL) { 526 | fprintf(stderr, "Out of memory.\n"); 527 | exit(1); 528 | } 529 | 530 | if (cf.psk_key && cf.psk_identity) { 531 | rc = mosquitto_tls_psk_set(m, cf.psk_key, cf.psk_identity,NULL); 532 | if (rc != MOSQ_ERR_SUCCESS) { 533 | fprintf(stderr, "Cannot set TLS PSK: %s\n", 534 | mosquitto_strerror(rc)); 535 | exit(3); 536 | } 537 | } else if (cf.ca_file) { 538 | rc = mosquitto_tls_set(m, 539 | cf.ca_file, /* cafile */ 540 | NULL, /* capath */ 541 | cf.certfile, /* certfile */ 542 | cf.keyfile, /* keyfile */ 543 | NULL /* pw_callback() */ 544 | ); 545 | if (rc != MOSQ_ERR_SUCCESS) { 546 | fprintf(stderr, "Cannot set TLS CA: %s (check path names)\n", 547 | mosquitto_strerror(rc)); 548 | exit(3); 549 | } 550 | 551 | mosquitto_tls_opts_set(m, 552 | SSL_VERIFY_PEER, 553 | NULL, /* tls_version: "tlsv1.2", "tlsv1" */ 554 | NULL /* ciphers */ 555 | ); 556 | 557 | if (tls_insecure) { 558 | #if LIBMOSQUITTO_VERSION_NUMBER >= 1002000 559 | /* mosquitto_tls_insecure_set() requires libmosquitto 1.2. */ 560 | mosquitto_tls_insecure_set(m, TRUE); 561 | #endif 562 | } 563 | } 564 | 565 | if (cf.username) { 566 | mosquitto_username_pw_set(m, cf.username, cf.password); 567 | } 568 | 569 | mosquitto_message_callback_set(m, cb_sub); 570 | mosquitto_connect_callback_set(m, cb_connect); 571 | mosquitto_disconnect_callback_set(m, cb_disconnect); 572 | 573 | mosquitto_reconnect_delay_set(m, 574 | 1, /* delay */ 575 | 10, /* delay_max */ 576 | FALSE); /* exponential backoff */ 577 | 578 | if ((rc = mosquitto_connect(m, cf.host, cf.port, keepalive)) != MOSQ_ERR_SUCCESS) { 579 | fprintf(stderr, "Unable to connect to %s:%d: %s\n", cf.host, cf.port, 580 | mosquitto_strerror(rc)); 581 | perror(""); 582 | exit(2); 583 | } 584 | 585 | signal(SIGINT, catcher); 586 | 587 | while (1) { 588 | rc = mosquitto_loop_forever(m, -1, 1); 589 | fprintf(stderr, "loop_forever returns %d\n", rc); 590 | } 591 | 592 | /* Unreached */ 593 | 594 | /* 595 | * There's a tonne of memory we ought to free (topics_h, etc) but 596 | * we don't get here, so nobody will notice... 597 | */ 598 | 599 | 600 | mosquitto_disconnect(m); 601 | mosquitto_lib_cleanup(); 602 | 603 | return 0; 604 | } 605 | -------------------------------------------------------------------------------- /mqttcollect.ini.example: -------------------------------------------------------------------------------- 1 | ; Configuration for mqttcollect 2 | ; Comments are introduced with ; also inline after white space 3 | ; White space around '=' is stripped 4 | 5 | [defaults] 6 | host = localhost 7 | port = 1883 8 | ; username = jjolie 9 | ; password = s1c#ret 10 | ; psk_key = 11 | ; psk_identity = 12 | ; ca_file = 13 | ; certfile = 14 | ; keyfile = 15 | ; nodename = foob 16 | 17 | ; progname is the program name (default "mqttcollect") used in PUTVAL 18 | ; prefix is optional (defaults to NULL) and can be used to 19 | ; differentiate multiple services of the same type. 20 | ; PUTVAL /[-]/- 21 | 22 | ; progname = mqttcollect 23 | ; prefix = PREFIX 24 | 25 | ; Examples 26 | ; password = s1ckr3t ; a password 27 | ; password = s1c;k r3t ; a password with a semicolon and a space in it 28 | ; password = s1c r3t ; a password with two spaces in it! 29 | ; password = s1c#ret ; a password with a hash symbol 30 | 31 | ; subscribe to a wildcard and produce three metrics per subscription. 32 | ; the metric names are interpolated with `tid' from the JSON message 33 | ; payload, and the values of each metric are obtained from the 34 | ; JSON element behind the `<' 35 | 36 | [owntracks/+/+] 37 | gauge = vehicle/{tid}/speed 127 | # influxdb 128 | Server "127.0.0.1" "25826" 129 | 130 | 131 | Configure _collectd_ to load our executable *mqttcollect* via its exec 132 | mechanism. Specify *mqttcollect*'s options as individual strings in the 133 | `Exec` invocation. 134 | 135 | LoadPlugin exec 136 | 137 | 138 | Exec "mosquitto:mosquitto" "/usr/bin/mqttcollect" "‐f" "/etc/my.ini" 139 | 140 | 141 | # BUGS 142 | 143 | Yes. 144 | 145 | # AVAILABILITY 146 | 147 | 148 | 149 | # CREDITS 150 | 151 | * This program uses *libmosquitto*, a library provided by the Mosquitto 152 | project as well as some of the excellent 153 | include files provided by 154 | 155 | # INSTALLATION 156 | 157 | * Obtain the source code for *mqttcollect*, adjust the `Makefile` and run `make`. 158 | 159 | # SEE ALSO 160 | 161 | * `collectd`(1). 162 | * 163 | 164 | # AUTHOR 165 | 166 | Jan-Piet Mens 167 | 168 | -------------------------------------------------------------------------------- /temperature-simulator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import paho.mqtt.publish as mqtt 5 | import random 6 | import json 7 | import time 8 | 9 | topic = 'arduino/temp/002' 10 | 11 | while True: 12 | celsius = float("%.2f" % (random.random() * 40)) 13 | 14 | data = { 15 | 'room' : 'kitchen', 16 | 'celsius' : celsius, 17 | 'fahrenheit' : float("%.2f" % (9.0 / 5.0 * celsius + 32)), 18 | } 19 | 20 | 21 | payload = json.dumps(data) 22 | 23 | print topic, payload 24 | mqtt.single(topic, payload, retain=True) 25 | 26 | time.sleep(1) 27 | -------------------------------------------------------------------------------- /uthash.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2003-2014, Troy D. Hanson http://troydhanson.github.com/uthash/ 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | */ 23 | 24 | #ifndef UTHASH_H 25 | #define UTHASH_H 26 | 27 | #include /* memcmp,strlen */ 28 | #include /* ptrdiff_t */ 29 | #include /* exit() */ 30 | 31 | /* These macros use decltype or the earlier __typeof GNU extension. 32 | As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 33 | when compiling c++ source) this code uses whatever method is needed 34 | or, for VS2008 where neither is available, uses casting workarounds. */ 35 | #if defined(_MSC_VER) /* MS compiler */ 36 | #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 37 | #define DECLTYPE(x) (decltype(x)) 38 | #else /* VS2008 or older (or VS2010 in C mode) */ 39 | #define NO_DECLTYPE 40 | #define DECLTYPE(x) 41 | #endif 42 | #elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) 43 | #define NO_DECLTYPE 44 | #define DECLTYPE(x) 45 | #else /* GNU, Sun and other compilers */ 46 | #define DECLTYPE(x) (__typeof(x)) 47 | #endif 48 | 49 | #ifdef NO_DECLTYPE 50 | #define DECLTYPE_ASSIGN(dst,src) \ 51 | do { \ 52 | char **_da_dst = (char**)(&(dst)); \ 53 | *_da_dst = (char*)(src); \ 54 | } while(0) 55 | #else 56 | #define DECLTYPE_ASSIGN(dst,src) \ 57 | do { \ 58 | (dst) = DECLTYPE(dst)(src); \ 59 | } while(0) 60 | #endif 61 | 62 | /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ 63 | #if defined (_WIN32) 64 | #if defined(_MSC_VER) && _MSC_VER >= 1600 65 | #include 66 | #elif defined(__WATCOMC__) 67 | #include 68 | #else 69 | typedef unsigned int uint32_t; 70 | typedef unsigned char uint8_t; 71 | #endif 72 | #else 73 | #include 74 | #endif 75 | 76 | #define UTHASH_VERSION 1.9.9 77 | 78 | #ifndef uthash_fatal 79 | #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 80 | #endif 81 | #ifndef uthash_malloc 82 | #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 83 | #endif 84 | #ifndef uthash_free 85 | #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 86 | #endif 87 | 88 | #ifndef uthash_noexpand_fyi 89 | #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 90 | #endif 91 | #ifndef uthash_expand_fyi 92 | #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 93 | #endif 94 | 95 | /* initial number of buckets */ 96 | #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ 97 | #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ 98 | #define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ 99 | 100 | /* calculate the element whose hash handle address is hhe */ 101 | #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 102 | 103 | #define HASH_FIND(hh,head,keyptr,keylen,out) \ 104 | do { \ 105 | out=NULL; \ 106 | if (head != NULL) { \ 107 | unsigned _hf_bkt,_hf_hashv; \ 108 | HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 109 | if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv) != 0) { \ 110 | HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 111 | keyptr,keylen,out); \ 112 | } \ 113 | } \ 114 | } while (0) 115 | 116 | #ifdef HASH_BLOOM 117 | #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) 118 | #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) 119 | #define HASH_BLOOM_MAKE(tbl) \ 120 | do { \ 121 | (tbl)->bloom_nbits = HASH_BLOOM; \ 122 | (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 123 | if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 124 | memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 125 | (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 126 | } while (0) 127 | 128 | #define HASH_BLOOM_FREE(tbl) \ 129 | do { \ 130 | uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 131 | } while (0) 132 | 133 | #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) 134 | #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) 135 | 136 | #define HASH_BLOOM_ADD(tbl,hashv) \ 137 | HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) 138 | 139 | #define HASH_BLOOM_TEST(tbl,hashv) \ 140 | HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) 141 | 142 | #else 143 | #define HASH_BLOOM_MAKE(tbl) 144 | #define HASH_BLOOM_FREE(tbl) 145 | #define HASH_BLOOM_ADD(tbl,hashv) 146 | #define HASH_BLOOM_TEST(tbl,hashv) (1) 147 | #define HASH_BLOOM_BYTELEN 0U 148 | #endif 149 | 150 | #define HASH_MAKE_TABLE(hh,head) \ 151 | do { \ 152 | (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 153 | sizeof(UT_hash_table)); \ 154 | if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 155 | memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 156 | (head)->hh.tbl->tail = &((head)->hh); \ 157 | (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 158 | (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 159 | (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 160 | (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 161 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 162 | if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 163 | memset((head)->hh.tbl->buckets, 0, \ 164 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 165 | HASH_BLOOM_MAKE((head)->hh.tbl); \ 166 | (head)->hh.tbl->signature = HASH_SIGNATURE; \ 167 | } while(0) 168 | 169 | #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 170 | HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) 171 | 172 | #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ 173 | do { \ 174 | replaced=NULL; \ 175 | HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \ 176 | if (replaced!=NULL) { \ 177 | HASH_DELETE(hh,head,replaced); \ 178 | } \ 179 | HASH_ADD(hh,head,fieldname,keylen_in,add); \ 180 | } while(0) 181 | 182 | #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 183 | do { \ 184 | unsigned _ha_bkt; \ 185 | (add)->hh.next = NULL; \ 186 | (add)->hh.key = (char*)(keyptr); \ 187 | (add)->hh.keylen = (unsigned)(keylen_in); \ 188 | if (!(head)) { \ 189 | head = (add); \ 190 | (head)->hh.prev = NULL; \ 191 | HASH_MAKE_TABLE(hh,head); \ 192 | } else { \ 193 | (head)->hh.tbl->tail->next = (add); \ 194 | (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 195 | (head)->hh.tbl->tail = &((add)->hh); \ 196 | } \ 197 | (head)->hh.tbl->num_items++; \ 198 | (add)->hh.tbl = (head)->hh.tbl; \ 199 | HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 200 | (add)->hh.hashv, _ha_bkt); \ 201 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 202 | HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 203 | HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 204 | HASH_FSCK(hh,head); \ 205 | } while(0) 206 | 207 | #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 208 | do { \ 209 | bkt = ((hashv) & ((num_bkts) - 1U)); \ 210 | } while(0) 211 | 212 | /* delete "delptr" from the hash table. 213 | * "the usual" patch-up process for the app-order doubly-linked-list. 214 | * The use of _hd_hh_del below deserves special explanation. 215 | * These used to be expressed using (delptr) but that led to a bug 216 | * if someone used the same symbol for the head and deletee, like 217 | * HASH_DELETE(hh,users,users); 218 | * We want that to work, but by changing the head (users) below 219 | * we were forfeiting our ability to further refer to the deletee (users) 220 | * in the patch-up process. Solution: use scratch space to 221 | * copy the deletee pointer, then the latter references are via that 222 | * scratch pointer rather than through the repointed (users) symbol. 223 | */ 224 | #define HASH_DELETE(hh,head,delptr) \ 225 | do { \ 226 | struct UT_hash_handle *_hd_hh_del; \ 227 | if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 228 | uthash_free((head)->hh.tbl->buckets, \ 229 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 230 | HASH_BLOOM_FREE((head)->hh.tbl); \ 231 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 232 | head = NULL; \ 233 | } else { \ 234 | unsigned _hd_bkt; \ 235 | _hd_hh_del = &((delptr)->hh); \ 236 | if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 237 | (head)->hh.tbl->tail = \ 238 | (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ 239 | (head)->hh.tbl->hho); \ 240 | } \ 241 | if ((delptr)->hh.prev != NULL) { \ 242 | ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ 243 | (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 244 | } else { \ 245 | DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ 246 | } \ 247 | if (_hd_hh_del->next != NULL) { \ 248 | ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ 249 | (head)->hh.tbl->hho))->prev = \ 250 | _hd_hh_del->prev; \ 251 | } \ 252 | HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 253 | HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 254 | (head)->hh.tbl->num_items--; \ 255 | } \ 256 | HASH_FSCK(hh,head); \ 257 | } while (0) 258 | 259 | 260 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 261 | #define HASH_FIND_STR(head,findstr,out) \ 262 | HASH_FIND(hh,head,findstr,(unsigned)strlen(findstr),out) 263 | #define HASH_ADD_STR(head,strfield,add) \ 264 | HASH_ADD(hh,head,strfield[0],(unsigned int)strlen(add->strfield),add) 265 | #define HASH_REPLACE_STR(head,strfield,add,replaced) \ 266 | HASH_REPLACE(hh,head,strfield[0],(unsigned)strlen(add->strfield),add,replaced) 267 | #define HASH_FIND_INT(head,findint,out) \ 268 | HASH_FIND(hh,head,findint,sizeof(int),out) 269 | #define HASH_ADD_INT(head,intfield,add) \ 270 | HASH_ADD(hh,head,intfield,sizeof(int),add) 271 | #define HASH_REPLACE_INT(head,intfield,add,replaced) \ 272 | HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) 273 | #define HASH_FIND_PTR(head,findptr,out) \ 274 | HASH_FIND(hh,head,findptr,sizeof(void *),out) 275 | #define HASH_ADD_PTR(head,ptrfield,add) \ 276 | HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 277 | #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ 278 | HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) 279 | #define HASH_DEL(head,delptr) \ 280 | HASH_DELETE(hh,head,delptr) 281 | 282 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 283 | * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 284 | */ 285 | #ifdef HASH_DEBUG 286 | #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 287 | #define HASH_FSCK(hh,head) \ 288 | do { \ 289 | struct UT_hash_handle *_thh; \ 290 | if (head) { \ 291 | unsigned _bkt_i; \ 292 | unsigned _count; \ 293 | char *_prev; \ 294 | _count = 0; \ 295 | for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 296 | unsigned _bkt_count = 0; \ 297 | _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 298 | _prev = NULL; \ 299 | while (_thh) { \ 300 | if (_prev != (char*)(_thh->hh_prev)) { \ 301 | HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 302 | _thh->hh_prev, _prev ); \ 303 | } \ 304 | _bkt_count++; \ 305 | _prev = (char*)(_thh); \ 306 | _thh = _thh->hh_next; \ 307 | } \ 308 | _count += _bkt_count; \ 309 | if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 310 | HASH_OOPS("invalid bucket count %u, actual %u\n", \ 311 | (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 312 | } \ 313 | } \ 314 | if (_count != (head)->hh.tbl->num_items) { \ 315 | HASH_OOPS("invalid hh item count %u, actual %u\n", \ 316 | (head)->hh.tbl->num_items, _count ); \ 317 | } \ 318 | /* traverse hh in app order; check next/prev integrity, count */ \ 319 | _count = 0; \ 320 | _prev = NULL; \ 321 | _thh = &(head)->hh; \ 322 | while (_thh) { \ 323 | _count++; \ 324 | if (_prev !=(char*)(_thh->prev)) { \ 325 | HASH_OOPS("invalid prev %p, actual %p\n", \ 326 | _thh->prev, _prev ); \ 327 | } \ 328 | _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 329 | _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 330 | (head)->hh.tbl->hho) : NULL ); \ 331 | } \ 332 | if (_count != (head)->hh.tbl->num_items) { \ 333 | HASH_OOPS("invalid app item count %u, actual %u\n", \ 334 | (head)->hh.tbl->num_items, _count ); \ 335 | } \ 336 | } \ 337 | } while (0) 338 | #else 339 | #define HASH_FSCK(hh,head) 340 | #endif 341 | 342 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 343 | * the descriptor to which this macro is defined for tuning the hash function. 344 | * The app can #include to get the prototype for write(2). */ 345 | #ifdef HASH_EMIT_KEYS 346 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 347 | do { \ 348 | unsigned _klen = fieldlen; \ 349 | write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 350 | write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ 351 | } while (0) 352 | #else 353 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 354 | #endif 355 | 356 | /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 357 | #ifdef HASH_FUNCTION 358 | #define HASH_FCN HASH_FUNCTION 359 | #else 360 | #define HASH_FCN HASH_JEN 361 | #endif 362 | 363 | /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ 364 | #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 365 | do { \ 366 | unsigned _hb_keylen=(unsigned)keylen; \ 367 | const unsigned char *_hb_key=(const unsigned char*)(key); \ 368 | (hashv) = 0; \ 369 | while (_hb_keylen-- != 0U) { \ 370 | (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ 371 | } \ 372 | bkt = (hashv) & (num_bkts-1U); \ 373 | } while (0) 374 | 375 | 376 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 377 | * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 378 | #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 379 | do { \ 380 | unsigned _sx_i; \ 381 | const unsigned char *_hs_key=(const unsigned char*)(key); \ 382 | hashv = 0; \ 383 | for(_sx_i=0; _sx_i < keylen; _sx_i++) { \ 384 | hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 385 | } \ 386 | bkt = hashv & (num_bkts-1U); \ 387 | } while (0) 388 | /* FNV-1a variation */ 389 | #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 390 | do { \ 391 | unsigned _fn_i; \ 392 | const unsigned char *_hf_key=(const unsigned char*)(key); \ 393 | hashv = 2166136261U; \ 394 | for(_fn_i=0; _fn_i < keylen; _fn_i++) { \ 395 | hashv = hashv ^ _hf_key[_fn_i]; \ 396 | hashv = hashv * 16777619U; \ 397 | } \ 398 | bkt = hashv & (num_bkts-1U); \ 399 | } while(0) 400 | 401 | #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 402 | do { \ 403 | unsigned _ho_i; \ 404 | const unsigned char *_ho_key=(const unsigned char*)(key); \ 405 | hashv = 0; \ 406 | for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 407 | hashv += _ho_key[_ho_i]; \ 408 | hashv += (hashv << 10); \ 409 | hashv ^= (hashv >> 6); \ 410 | } \ 411 | hashv += (hashv << 3); \ 412 | hashv ^= (hashv >> 11); \ 413 | hashv += (hashv << 15); \ 414 | bkt = hashv & (num_bkts-1U); \ 415 | } while(0) 416 | 417 | #define HASH_JEN_MIX(a,b,c) \ 418 | do { \ 419 | a -= b; a -= c; a ^= ( c >> 13 ); \ 420 | b -= c; b -= a; b ^= ( a << 8 ); \ 421 | c -= a; c -= b; c ^= ( b >> 13 ); \ 422 | a -= b; a -= c; a ^= ( c >> 12 ); \ 423 | b -= c; b -= a; b ^= ( a << 16 ); \ 424 | c -= a; c -= b; c ^= ( b >> 5 ); \ 425 | a -= b; a -= c; a ^= ( c >> 3 ); \ 426 | b -= c; b -= a; b ^= ( a << 10 ); \ 427 | c -= a; c -= b; c ^= ( b >> 15 ); \ 428 | } while (0) 429 | 430 | #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 431 | do { \ 432 | unsigned _hj_i,_hj_j,_hj_k; \ 433 | unsigned const char *_hj_key=(unsigned const char*)(key); \ 434 | hashv = 0xfeedbeefu; \ 435 | _hj_i = _hj_j = 0x9e3779b9u; \ 436 | _hj_k = (unsigned)(keylen); \ 437 | while (_hj_k >= 12U) { \ 438 | _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 439 | + ( (unsigned)_hj_key[2] << 16 ) \ 440 | + ( (unsigned)_hj_key[3] << 24 ) ); \ 441 | _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 442 | + ( (unsigned)_hj_key[6] << 16 ) \ 443 | + ( (unsigned)_hj_key[7] << 24 ) ); \ 444 | hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 445 | + ( (unsigned)_hj_key[10] << 16 ) \ 446 | + ( (unsigned)_hj_key[11] << 24 ) ); \ 447 | \ 448 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 449 | \ 450 | _hj_key += 12; \ 451 | _hj_k -= 12U; \ 452 | } \ 453 | hashv += (unsigned)(keylen); \ 454 | switch ( _hj_k ) { \ 455 | case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ 456 | case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ 457 | case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ 458 | case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ 459 | case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ 460 | case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ 461 | case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ 462 | case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ 463 | case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ 464 | case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ 465 | case 1: _hj_i += _hj_key[0]; \ 466 | } \ 467 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 468 | bkt = hashv & (num_bkts-1U); \ 469 | } while(0) 470 | 471 | /* The Paul Hsieh hash function */ 472 | #undef get16bits 473 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 474 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 475 | #define get16bits(d) (*((const uint16_t *) (d))) 476 | #endif 477 | 478 | #if !defined (get16bits) 479 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 480 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 481 | #endif 482 | #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 483 | do { \ 484 | unsigned const char *_sfh_key=(unsigned const char*)(key); \ 485 | uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ 486 | \ 487 | unsigned _sfh_rem = _sfh_len & 3U; \ 488 | _sfh_len >>= 2; \ 489 | hashv = 0xcafebabeu; \ 490 | \ 491 | /* Main loop */ \ 492 | for (;_sfh_len > 0U; _sfh_len--) { \ 493 | hashv += get16bits (_sfh_key); \ 494 | _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ 495 | hashv = (hashv << 16) ^ _sfh_tmp; \ 496 | _sfh_key += 2U*sizeof (uint16_t); \ 497 | hashv += hashv >> 11; \ 498 | } \ 499 | \ 500 | /* Handle end cases */ \ 501 | switch (_sfh_rem) { \ 502 | case 3: hashv += get16bits (_sfh_key); \ 503 | hashv ^= hashv << 16; \ 504 | hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ 505 | hashv += hashv >> 11; \ 506 | break; \ 507 | case 2: hashv += get16bits (_sfh_key); \ 508 | hashv ^= hashv << 11; \ 509 | hashv += hashv >> 17; \ 510 | break; \ 511 | case 1: hashv += *_sfh_key; \ 512 | hashv ^= hashv << 10; \ 513 | hashv += hashv >> 1; \ 514 | } \ 515 | \ 516 | /* Force "avalanching" of final 127 bits */ \ 517 | hashv ^= hashv << 3; \ 518 | hashv += hashv >> 5; \ 519 | hashv ^= hashv << 4; \ 520 | hashv += hashv >> 17; \ 521 | hashv ^= hashv << 25; \ 522 | hashv += hashv >> 6; \ 523 | bkt = hashv & (num_bkts-1U); \ 524 | } while(0) 525 | 526 | #ifdef HASH_USING_NO_STRICT_ALIASING 527 | /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 528 | * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 529 | * MurmurHash uses the faster approach only on CPU's where we know it's safe. 530 | * 531 | * Note the preprocessor built-in defines can be emitted using: 532 | * 533 | * gcc -m64 -dM -E - < /dev/null (on gcc) 534 | * cc -## a.c (where a.c is a simple test file) (Sun Studio) 535 | */ 536 | #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) 537 | #define MUR_GETBLOCK(p,i) p[i] 538 | #else /* non intel */ 539 | #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) 540 | #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) 541 | #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) 542 | #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) 543 | #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 544 | #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 545 | #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 546 | #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 547 | #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 548 | #else /* assume little endian non-intel */ 549 | #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 550 | #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 551 | #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 552 | #endif 553 | #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 554 | (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 555 | (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 556 | MUR_ONE_THREE(p)))) 557 | #endif 558 | #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 559 | #define MUR_FMIX(_h) \ 560 | do { \ 561 | _h ^= _h >> 16; \ 562 | _h *= 0x85ebca6bu; \ 563 | _h ^= _h >> 13; \ 564 | _h *= 0xc2b2ae35u; \ 565 | _h ^= _h >> 16; \ 566 | } while(0) 567 | 568 | #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ 569 | do { \ 570 | const uint8_t *_mur_data = (const uint8_t*)(key); \ 571 | const int _mur_nblocks = (int)(keylen) / 4; \ 572 | uint32_t _mur_h1 = 0xf88D5353u; \ 573 | uint32_t _mur_c1 = 0xcc9e2d51u; \ 574 | uint32_t _mur_c2 = 0x1b873593u; \ 575 | uint32_t _mur_k1 = 0; \ 576 | const uint8_t *_mur_tail; \ 577 | const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ 578 | int _mur_i; \ 579 | for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \ 580 | _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 581 | _mur_k1 *= _mur_c1; \ 582 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 583 | _mur_k1 *= _mur_c2; \ 584 | \ 585 | _mur_h1 ^= _mur_k1; \ 586 | _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 587 | _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ 588 | } \ 589 | _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ 590 | _mur_k1=0; \ 591 | switch((keylen) & 3U) { \ 592 | case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ 593 | case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ 594 | case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ 595 | _mur_k1 *= _mur_c1; \ 596 | _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 597 | _mur_k1 *= _mur_c2; \ 598 | _mur_h1 ^= _mur_k1; \ 599 | } \ 600 | _mur_h1 ^= (uint32_t)(keylen); \ 601 | MUR_FMIX(_mur_h1); \ 602 | hashv = _mur_h1; \ 603 | bkt = hashv & (num_bkts-1U); \ 604 | } while(0) 605 | #endif /* HASH_USING_NO_STRICT_ALIASING */ 606 | 607 | /* key comparison function; return 0 if keys equal */ 608 | #define HASH_KEYCMP(a,b,len) memcmp(a,b,(unsigned long)(len)) 609 | 610 | /* iterate over items in a known bucket to find desired item */ 611 | #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 612 | do { \ 613 | if (head.hh_head != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); } \ 614 | else { out=NULL; } \ 615 | while (out != NULL) { \ 616 | if ((out)->hh.keylen == (keylen_in)) { \ 617 | if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) { break; } \ 618 | } \ 619 | if ((out)->hh.hh_next != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); } \ 620 | else { out = NULL; } \ 621 | } \ 622 | } while(0) 623 | 624 | /* add an item to a bucket */ 625 | #define HASH_ADD_TO_BKT(head,addhh) \ 626 | do { \ 627 | head.count++; \ 628 | (addhh)->hh_next = head.hh_head; \ 629 | (addhh)->hh_prev = NULL; \ 630 | if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \ 631 | (head).hh_head=addhh; \ 632 | if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \ 633 | && ((addhh)->tbl->noexpand != 1U)) { \ 634 | HASH_EXPAND_BUCKETS((addhh)->tbl); \ 635 | } \ 636 | } while(0) 637 | 638 | /* remove an item from a given bucket */ 639 | #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 640 | (head).count--; \ 641 | if ((head).hh_head == hh_del) { \ 642 | (head).hh_head = hh_del->hh_next; \ 643 | } \ 644 | if (hh_del->hh_prev) { \ 645 | hh_del->hh_prev->hh_next = hh_del->hh_next; \ 646 | } \ 647 | if (hh_del->hh_next) { \ 648 | hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 649 | } 650 | 651 | /* Bucket expansion has the effect of doubling the number of buckets 652 | * and redistributing the items into the new buckets. Ideally the 653 | * items will distribute more or less evenly into the new buckets 654 | * (the extent to which this is true is a measure of the quality of 655 | * the hash function as it applies to the key domain). 656 | * 657 | * With the items distributed into more buckets, the chain length 658 | * (item count) in each bucket is reduced. Thus by expanding buckets 659 | * the hash keeps a bound on the chain length. This bounded chain 660 | * length is the essence of how a hash provides constant time lookup. 661 | * 662 | * The calculation of tbl->ideal_chain_maxlen below deserves some 663 | * explanation. First, keep in mind that we're calculating the ideal 664 | * maximum chain length based on the *new* (doubled) bucket count. 665 | * In fractions this is just n/b (n=number of items,b=new num buckets). 666 | * Since the ideal chain length is an integer, we want to calculate 667 | * ceil(n/b). We don't depend on floating point arithmetic in this 668 | * hash, so to calculate ceil(n/b) with integers we could write 669 | * 670 | * ceil(n/b) = (n/b) + ((n%b)?1:0) 671 | * 672 | * and in fact a previous version of this hash did just that. 673 | * But now we have improved things a bit by recognizing that b is 674 | * always a power of two. We keep its base 2 log handy (call it lb), 675 | * so now we can write this with a bit shift and logical AND: 676 | * 677 | * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 678 | * 679 | */ 680 | #define HASH_EXPAND_BUCKETS(tbl) \ 681 | do { \ 682 | unsigned _he_bkt; \ 683 | unsigned _he_bkt_i; \ 684 | struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 685 | UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 686 | _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 687 | 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 688 | if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 689 | memset(_he_new_buckets, 0, \ 690 | 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 691 | tbl->ideal_chain_maxlen = \ 692 | (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \ 693 | (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ 694 | tbl->nonideal_items = 0; \ 695 | for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 696 | { \ 697 | _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 698 | while (_he_thh != NULL) { \ 699 | _he_hh_nxt = _he_thh->hh_next; \ 700 | HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \ 701 | _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 702 | if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 703 | tbl->nonideal_items++; \ 704 | _he_newbkt->expand_mult = _he_newbkt->count / \ 705 | tbl->ideal_chain_maxlen; \ 706 | } \ 707 | _he_thh->hh_prev = NULL; \ 708 | _he_thh->hh_next = _he_newbkt->hh_head; \ 709 | if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \ 710 | _he_thh; } \ 711 | _he_newbkt->hh_head = _he_thh; \ 712 | _he_thh = _he_hh_nxt; \ 713 | } \ 714 | } \ 715 | uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 716 | tbl->num_buckets *= 2U; \ 717 | tbl->log2_num_buckets++; \ 718 | tbl->buckets = _he_new_buckets; \ 719 | tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 720 | (tbl->ineff_expands+1U) : 0U; \ 721 | if (tbl->ineff_expands > 1U) { \ 722 | tbl->noexpand=1; \ 723 | uthash_noexpand_fyi(tbl); \ 724 | } \ 725 | uthash_expand_fyi(tbl); \ 726 | } while(0) 727 | 728 | 729 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 730 | /* Note that HASH_SORT assumes the hash handle name to be hh. 731 | * HASH_SRT was added to allow the hash handle name to be passed in. */ 732 | #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 733 | #define HASH_SRT(hh,head,cmpfcn) \ 734 | do { \ 735 | unsigned _hs_i; \ 736 | unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 737 | struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 738 | if (head != NULL) { \ 739 | _hs_insize = 1; \ 740 | _hs_looping = 1; \ 741 | _hs_list = &((head)->hh); \ 742 | while (_hs_looping != 0U) { \ 743 | _hs_p = _hs_list; \ 744 | _hs_list = NULL; \ 745 | _hs_tail = NULL; \ 746 | _hs_nmerges = 0; \ 747 | while (_hs_p != NULL) { \ 748 | _hs_nmerges++; \ 749 | _hs_q = _hs_p; \ 750 | _hs_psize = 0; \ 751 | for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 752 | _hs_psize++; \ 753 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 754 | ((void*)((char*)(_hs_q->next) + \ 755 | (head)->hh.tbl->hho)) : NULL); \ 756 | if (! (_hs_q) ) { break; } \ 757 | } \ 758 | _hs_qsize = _hs_insize; \ 759 | while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\ 760 | if (_hs_psize == 0U) { \ 761 | _hs_e = _hs_q; \ 762 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 763 | ((void*)((char*)(_hs_q->next) + \ 764 | (head)->hh.tbl->hho)) : NULL); \ 765 | _hs_qsize--; \ 766 | } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \ 767 | _hs_e = _hs_p; \ 768 | if (_hs_p != NULL){ \ 769 | _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ 770 | ((void*)((char*)(_hs_p->next) + \ 771 | (head)->hh.tbl->hho)) : NULL); \ 772 | } \ 773 | _hs_psize--; \ 774 | } else if (( \ 775 | cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 776 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 777 | ) <= 0) { \ 778 | _hs_e = _hs_p; \ 779 | if (_hs_p != NULL){ \ 780 | _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ 781 | ((void*)((char*)(_hs_p->next) + \ 782 | (head)->hh.tbl->hho)) : NULL); \ 783 | } \ 784 | _hs_psize--; \ 785 | } else { \ 786 | _hs_e = _hs_q; \ 787 | _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ 788 | ((void*)((char*)(_hs_q->next) + \ 789 | (head)->hh.tbl->hho)) : NULL); \ 790 | _hs_qsize--; \ 791 | } \ 792 | if ( _hs_tail != NULL ) { \ 793 | _hs_tail->next = ((_hs_e != NULL) ? \ 794 | ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 795 | } else { \ 796 | _hs_list = _hs_e; \ 797 | } \ 798 | if (_hs_e != NULL) { \ 799 | _hs_e->prev = ((_hs_tail != NULL) ? \ 800 | ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 801 | } \ 802 | _hs_tail = _hs_e; \ 803 | } \ 804 | _hs_p = _hs_q; \ 805 | } \ 806 | if (_hs_tail != NULL){ \ 807 | _hs_tail->next = NULL; \ 808 | } \ 809 | if ( _hs_nmerges <= 1U ) { \ 810 | _hs_looping=0; \ 811 | (head)->hh.tbl->tail = _hs_tail; \ 812 | DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 813 | } \ 814 | _hs_insize *= 2U; \ 815 | } \ 816 | HASH_FSCK(hh,head); \ 817 | } \ 818 | } while (0) 819 | 820 | /* This function selects items from one hash into another hash. 821 | * The end result is that the selected items have dual presence 822 | * in both hashes. There is no copy of the items made; rather 823 | * they are added into the new hash through a secondary hash 824 | * hash handle that must be present in the structure. */ 825 | #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 826 | do { \ 827 | unsigned _src_bkt, _dst_bkt; \ 828 | void *_last_elt=NULL, *_elt; \ 829 | UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 830 | ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 831 | if (src != NULL) { \ 832 | for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 833 | for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 834 | _src_hh != NULL; \ 835 | _src_hh = _src_hh->hh_next) { \ 836 | _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 837 | if (cond(_elt)) { \ 838 | _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 839 | _dst_hh->key = _src_hh->key; \ 840 | _dst_hh->keylen = _src_hh->keylen; \ 841 | _dst_hh->hashv = _src_hh->hashv; \ 842 | _dst_hh->prev = _last_elt; \ 843 | _dst_hh->next = NULL; \ 844 | if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \ 845 | if (dst == NULL) { \ 846 | DECLTYPE_ASSIGN(dst,_elt); \ 847 | HASH_MAKE_TABLE(hh_dst,dst); \ 848 | } else { \ 849 | _dst_hh->tbl = (dst)->hh_dst.tbl; \ 850 | } \ 851 | HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 852 | HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 853 | (dst)->hh_dst.tbl->num_items++; \ 854 | _last_elt = _elt; \ 855 | _last_elt_hh = _dst_hh; \ 856 | } \ 857 | } \ 858 | } \ 859 | } \ 860 | HASH_FSCK(hh_dst,dst); \ 861 | } while (0) 862 | 863 | #define HASH_CLEAR(hh,head) \ 864 | do { \ 865 | if (head != NULL) { \ 866 | uthash_free((head)->hh.tbl->buckets, \ 867 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 868 | HASH_BLOOM_FREE((head)->hh.tbl); \ 869 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 870 | (head)=NULL; \ 871 | } \ 872 | } while(0) 873 | 874 | #define HASH_OVERHEAD(hh,head) \ 875 | ((head != NULL) ? ( \ 876 | (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ 877 | ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ 878 | sizeof(UT_hash_table) + \ 879 | (HASH_BLOOM_BYTELEN))) : 0U) 880 | 881 | #ifdef NO_DECLTYPE 882 | #define HASH_ITER(hh,head,el,tmp) \ 883 | for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ 884 | (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) 885 | #else 886 | #define HASH_ITER(hh,head,el,tmp) \ 887 | for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ 888 | (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) 889 | #endif 890 | 891 | /* obtain a count of items in the hash */ 892 | #define HASH_COUNT(head) HASH_CNT(hh,head) 893 | #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) 894 | 895 | typedef struct UT_hash_bucket { 896 | struct UT_hash_handle *hh_head; 897 | unsigned count; 898 | 899 | /* expand_mult is normally set to 0. In this situation, the max chain length 900 | * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 901 | * the bucket's chain exceeds this length, bucket expansion is triggered). 902 | * However, setting expand_mult to a non-zero value delays bucket expansion 903 | * (that would be triggered by additions to this particular bucket) 904 | * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 905 | * (The multiplier is simply expand_mult+1). The whole idea of this 906 | * multiplier is to reduce bucket expansions, since they are expensive, in 907 | * situations where we know that a particular bucket tends to be overused. 908 | * It is better to let its chain length grow to a longer yet-still-bounded 909 | * value, than to do an O(n) bucket expansion too often. 910 | */ 911 | unsigned expand_mult; 912 | 913 | } UT_hash_bucket; 914 | 915 | /* random signature used only to find hash tables in external analysis */ 916 | #define HASH_SIGNATURE 0xa0111fe1u 917 | #define HASH_BLOOM_SIGNATURE 0xb12220f2u 918 | 919 | typedef struct UT_hash_table { 920 | UT_hash_bucket *buckets; 921 | unsigned num_buckets, log2_num_buckets; 922 | unsigned num_items; 923 | struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 924 | ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 925 | 926 | /* in an ideal situation (all buckets used equally), no bucket would have 927 | * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 928 | unsigned ideal_chain_maxlen; 929 | 930 | /* nonideal_items is the number of items in the hash whose chain position 931 | * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 932 | * hash distribution; reaching them in a chain traversal takes >ideal steps */ 933 | unsigned nonideal_items; 934 | 935 | /* ineffective expands occur when a bucket doubling was performed, but 936 | * afterward, more than half the items in the hash had nonideal chain 937 | * positions. If this happens on two consecutive expansions we inhibit any 938 | * further expansion, as it's not helping; this happens when the hash 939 | * function isn't a good fit for the key domain. When expansion is inhibited 940 | * the hash will still work, albeit no longer in constant time. */ 941 | unsigned ineff_expands, noexpand; 942 | 943 | uint32_t signature; /* used only to find hash tables in external analysis */ 944 | #ifdef HASH_BLOOM 945 | uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 946 | uint8_t *bloom_bv; 947 | uint8_t bloom_nbits; 948 | #endif 949 | 950 | } UT_hash_table; 951 | 952 | typedef struct UT_hash_handle { 953 | struct UT_hash_table *tbl; 954 | void *prev; /* prev element in app order */ 955 | void *next; /* next element in app order */ 956 | struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 957 | struct UT_hash_handle *hh_next; /* next hh in bucket order */ 958 | void *key; /* ptr to enclosing struct's key */ 959 | unsigned keylen; /* enclosing struct's key len */ 960 | unsigned hashv; /* result of hash-fcn(key) */ 961 | } UT_hash_handle; 962 | 963 | #endif /* UTHASH_H */ 964 | -------------------------------------------------------------------------------- /utstring.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2008-2014, Troy D. Hanson http://troydhanson.github.com/uthash/ 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | */ 23 | 24 | /* a dynamic string implementation using macros 25 | */ 26 | #ifndef UTSTRING_H 27 | #define UTSTRING_H 28 | 29 | #define UTSTRING_VERSION 1.9.9 30 | 31 | #ifdef __GNUC__ 32 | #define _UNUSED_ __attribute__ ((__unused__)) 33 | #else 34 | #define _UNUSED_ 35 | #endif 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #define oom() exit(-1) 42 | 43 | typedef struct { 44 | char *d; 45 | size_t n; /* allocd size */ 46 | size_t i; /* index of first unused byte */ 47 | } UT_string; 48 | 49 | #define utstring_reserve(s,amt) \ 50 | do { \ 51 | if (((s)->n - (s)->i) < (size_t)(amt)) { \ 52 | (s)->d = (char*)realloc((s)->d, (s)->n + (amt)); \ 53 | if ((s)->d == NULL) oom(); \ 54 | (s)->n += (amt); \ 55 | } \ 56 | } while(0) 57 | 58 | #define utstring_init(s) \ 59 | do { \ 60 | (s)->n = 0; (s)->i = 0; (s)->d = NULL; \ 61 | utstring_reserve(s,100); \ 62 | (s)->d[0] = '\0'; \ 63 | } while(0) 64 | 65 | #define utstring_done(s) \ 66 | do { \ 67 | if ((s)->d != NULL) free((s)->d); \ 68 | (s)->n = 0; \ 69 | } while(0) 70 | 71 | #define utstring_free(s) \ 72 | do { \ 73 | utstring_done(s); \ 74 | free(s); \ 75 | } while(0) 76 | 77 | #define utstring_new(s) \ 78 | do { \ 79 | s = (UT_string*)calloc(sizeof(UT_string),1); \ 80 | if (!s) oom(); \ 81 | utstring_init(s); \ 82 | } while(0) 83 | 84 | #define utstring_renew(s) \ 85 | do { \ 86 | if (s) { \ 87 | utstring_clear(s); \ 88 | } else { \ 89 | utstring_new(s); \ 90 | } \ 91 | } while(0) 92 | 93 | #define utstring_clear(s) \ 94 | do { \ 95 | (s)->i = 0; \ 96 | (s)->d[0] = '\0'; \ 97 | } while(0) 98 | 99 | #define utstring_bincpy(s,b,l) \ 100 | do { \ 101 | utstring_reserve((s),(l)+1); \ 102 | if (l) memcpy(&(s)->d[(s)->i], b, l); \ 103 | (s)->i += (l); \ 104 | (s)->d[(s)->i]='\0'; \ 105 | } while(0) 106 | 107 | #define utstring_concat(dst,src) \ 108 | do { \ 109 | utstring_reserve((dst),((src)->i)+1); \ 110 | if ((src)->i) memcpy(&(dst)->d[(dst)->i], (src)->d, (src)->i); \ 111 | (dst)->i += (src)->i; \ 112 | (dst)->d[(dst)->i]='\0'; \ 113 | } while(0) 114 | 115 | #define utstring_len(s) ((unsigned)((s)->i)) 116 | 117 | #define utstring_body(s) ((s)->d) 118 | 119 | _UNUSED_ static void utstring_printf_va(UT_string *s, const char *fmt, va_list ap) { 120 | int n; 121 | va_list cp; 122 | while (1) { 123 | #ifdef _WIN32 124 | cp = ap; 125 | #else 126 | va_copy(cp, ap); 127 | #endif 128 | n = vsnprintf (&s->d[s->i], s->n-s->i, fmt, cp); 129 | va_end(cp); 130 | 131 | if ((n > -1) && ((size_t) n < (s->n-s->i))) { 132 | s->i += n; 133 | return; 134 | } 135 | 136 | /* Else try again with more space. */ 137 | if (n > -1) utstring_reserve(s,n+1); /* exact */ 138 | else utstring_reserve(s,(s->n)*2); /* 2x */ 139 | } 140 | } 141 | #ifdef __GNUC__ 142 | /* support printf format checking (2=the format string, 3=start of varargs) */ 143 | static void utstring_printf(UT_string *s, const char *fmt, ...) 144 | __attribute__ (( format( printf, 2, 3) )); 145 | #endif 146 | _UNUSED_ static void utstring_printf(UT_string *s, const char *fmt, ...) { 147 | va_list ap; 148 | va_start(ap,fmt); 149 | utstring_printf_va(s,fmt,ap); 150 | va_end(ap); 151 | } 152 | 153 | /******************************************************************************* 154 | * begin substring search functions * 155 | ******************************************************************************/ 156 | /* Build KMP table from left to right. */ 157 | _UNUSED_ static void _utstring_BuildTable( 158 | const char *P_Needle, 159 | size_t P_NeedleLen, 160 | long *P_KMP_Table) 161 | { 162 | long i, j; 163 | 164 | i = 0; 165 | j = i - 1; 166 | P_KMP_Table[i] = j; 167 | while (i < (long) P_NeedleLen) 168 | { 169 | while ( (j > -1) && (P_Needle[i] != P_Needle[j]) ) 170 | { 171 | j = P_KMP_Table[j]; 172 | } 173 | i++; 174 | j++; 175 | if (i < (long) P_NeedleLen) 176 | { 177 | if (P_Needle[i] == P_Needle[j]) 178 | { 179 | P_KMP_Table[i] = P_KMP_Table[j]; 180 | } 181 | else 182 | { 183 | P_KMP_Table[i] = j; 184 | } 185 | } 186 | else 187 | { 188 | P_KMP_Table[i] = j; 189 | } 190 | } 191 | 192 | return; 193 | } 194 | 195 | 196 | /* Build KMP table from right to left. */ 197 | _UNUSED_ static void _utstring_BuildTableR( 198 | const char *P_Needle, 199 | size_t P_NeedleLen, 200 | long *P_KMP_Table) 201 | { 202 | long i, j; 203 | 204 | i = P_NeedleLen - 1; 205 | j = i + 1; 206 | P_KMP_Table[i + 1] = j; 207 | while (i >= 0) 208 | { 209 | while ( (j < (long) P_NeedleLen) && (P_Needle[i] != P_Needle[j]) ) 210 | { 211 | j = P_KMP_Table[j + 1]; 212 | } 213 | i--; 214 | j--; 215 | if (i >= 0) 216 | { 217 | if (P_Needle[i] == P_Needle[j]) 218 | { 219 | P_KMP_Table[i + 1] = P_KMP_Table[j + 1]; 220 | } 221 | else 222 | { 223 | P_KMP_Table[i + 1] = j; 224 | } 225 | } 226 | else 227 | { 228 | P_KMP_Table[i + 1] = j; 229 | } 230 | } 231 | 232 | return; 233 | } 234 | 235 | 236 | /* Search data from left to right. ( Multiple search mode. ) */ 237 | _UNUSED_ static long _utstring_find( 238 | const char *P_Haystack, 239 | size_t P_HaystackLen, 240 | const char *P_Needle, 241 | size_t P_NeedleLen, 242 | long *P_KMP_Table) 243 | { 244 | long i, j; 245 | long V_FindPosition = -1; 246 | 247 | /* Search from left to right. */ 248 | i = j = 0; 249 | while ( (j < (int)P_HaystackLen) && (((P_HaystackLen - j) + i) >= P_NeedleLen) ) 250 | { 251 | while ( (i > -1) && (P_Needle[i] != P_Haystack[j]) ) 252 | { 253 | i = P_KMP_Table[i]; 254 | } 255 | i++; 256 | j++; 257 | if (i >= (int)P_NeedleLen) 258 | { 259 | /* Found. */ 260 | V_FindPosition = j - i; 261 | break; 262 | } 263 | } 264 | 265 | return V_FindPosition; 266 | } 267 | 268 | 269 | /* Search data from right to left. ( Multiple search mode. ) */ 270 | _UNUSED_ static long _utstring_findR( 271 | const char *P_Haystack, 272 | size_t P_HaystackLen, 273 | const char *P_Needle, 274 | size_t P_NeedleLen, 275 | long *P_KMP_Table) 276 | { 277 | long i, j; 278 | long V_FindPosition = -1; 279 | 280 | /* Search from right to left. */ 281 | j = (P_HaystackLen - 1); 282 | i = (P_NeedleLen - 1); 283 | while ( (j >= 0) && (j >= i) ) 284 | { 285 | while ( (i < (int)P_NeedleLen) && (P_Needle[i] != P_Haystack[j]) ) 286 | { 287 | i = P_KMP_Table[i + 1]; 288 | } 289 | i--; 290 | j--; 291 | if (i < 0) 292 | { 293 | /* Found. */ 294 | V_FindPosition = j + 1; 295 | break; 296 | } 297 | } 298 | 299 | return V_FindPosition; 300 | } 301 | 302 | 303 | /* Search data from left to right. ( One time search mode. ) */ 304 | _UNUSED_ static long utstring_find( 305 | UT_string *s, 306 | long P_StartPosition, /* Start from 0. -1 means last position. */ 307 | const char *P_Needle, 308 | size_t P_NeedleLen) 309 | { 310 | long V_StartPosition; 311 | long V_HaystackLen; 312 | long *V_KMP_Table; 313 | long V_FindPosition = -1; 314 | 315 | if (P_StartPosition < 0) 316 | { 317 | V_StartPosition = s->i + P_StartPosition; 318 | } 319 | else 320 | { 321 | V_StartPosition = P_StartPosition; 322 | } 323 | V_HaystackLen = s->i - V_StartPosition; 324 | if ( (V_HaystackLen >= (long) P_NeedleLen) && (P_NeedleLen > 0) ) 325 | { 326 | V_KMP_Table = (long *)malloc(sizeof(long) * (P_NeedleLen + 1)); 327 | if (V_KMP_Table != NULL) 328 | { 329 | _utstring_BuildTable(P_Needle, P_NeedleLen, V_KMP_Table); 330 | 331 | V_FindPosition = _utstring_find(s->d + V_StartPosition, 332 | V_HaystackLen, 333 | P_Needle, 334 | P_NeedleLen, 335 | V_KMP_Table); 336 | if (V_FindPosition >= 0) 337 | { 338 | V_FindPosition += V_StartPosition; 339 | } 340 | 341 | free(V_KMP_Table); 342 | } 343 | } 344 | 345 | return V_FindPosition; 346 | } 347 | 348 | 349 | /* Search data from right to left. ( One time search mode. ) */ 350 | _UNUSED_ static long utstring_findR( 351 | UT_string *s, 352 | long P_StartPosition, /* Start from 0. -1 means last position. */ 353 | const char *P_Needle, 354 | size_t P_NeedleLen) 355 | { 356 | long V_StartPosition; 357 | long V_HaystackLen; 358 | long *V_KMP_Table; 359 | long V_FindPosition = -1; 360 | 361 | if (P_StartPosition < 0) 362 | { 363 | V_StartPosition = s->i + P_StartPosition; 364 | } 365 | else 366 | { 367 | V_StartPosition = P_StartPosition; 368 | } 369 | V_HaystackLen = V_StartPosition + 1; 370 | if ( (V_HaystackLen >= (long) P_NeedleLen) && (P_NeedleLen > 0) ) 371 | { 372 | V_KMP_Table = (long *)malloc(sizeof(long) * (P_NeedleLen + 1)); 373 | if (V_KMP_Table != NULL) 374 | { 375 | _utstring_BuildTableR(P_Needle, P_NeedleLen, V_KMP_Table); 376 | 377 | V_FindPosition = _utstring_findR(s->d, 378 | V_HaystackLen, 379 | P_Needle, 380 | P_NeedleLen, 381 | V_KMP_Table); 382 | 383 | free(V_KMP_Table); 384 | } 385 | } 386 | 387 | return V_FindPosition; 388 | } 389 | /******************************************************************************* 390 | * end substring search functions * 391 | ******************************************************************************/ 392 | 393 | #endif /* UTSTRING_H */ 394 | --------------------------------------------------------------------------------