├── CMakeLists.txt ├── LICENCE.md ├── README.md ├── cgrpfs.c ├── cgrpfs.h ├── cgrpfs_fuseops.c ├── cgrpfs_main.c ├── cgrpfs_main_puffs.c ├── cgrpfs_main_threads.c ├── cgrpfs_vfsops.c ├── cgrpfs_vnops.c └── uthash.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(cgrpfs) 2 | 3 | cmake_minimum_required(VERSION 3.9) 4 | cmake_policy(VERSION 3.9) 5 | 6 | include(FindPkgConfig) 7 | include(GNUInstallDirs) 8 | 9 | list(APPEND CGRPFS_SRCS cgrpfs.c) 10 | 11 | if (CMAKE_SYSTEM_NAME MATCHES "kOpenBSD.*|OpenBSD.*") 12 | find_package(Threads REQUIRED) 13 | set(CGRPFS_THREADED true) 14 | set(FUSE_LIB fuse Threads::Threads) 15 | list(APPEND CGRPFS_SRCS cgrpfs_main_threads.c cgrpfs_fuseops.c) 16 | elseif (CMAKE_SYSTEM_NAME MATCHES "kNetBSD.*|NetBSD.*") 17 | find_package(Threads REQUIRED) 18 | set(CGRPFS_THREADED true) 19 | set(FUSE_LIB puffs util Threads::Threads) 20 | set(CGRPFS_PUFFS true) 21 | list(APPEND CGRPFS_SRCS cgrpfs_main_puffs.c cgrpfs_vnops.c 22 | cgrpfs_vfsops.c) 23 | else () 24 | pkg_check_modules(fuse REQUIRED IMPORTED_TARGET fuse) 25 | set(FUSE_LIB PkgConfig::fuse) 26 | list(APPEND CGRPFS_SRCS cgrpfs_main.c cgrpfs_fuseops.c) 27 | endif() 28 | 29 | add_executable(cgrpfs ${CGRPFS_SRCS}) 30 | target_link_libraries(cgrpfs ${FUSE_LIB}) 31 | 32 | if (CGRPFS_THREADED) 33 | target_compile_definitions(cgrpfs PRIVATE -DCGRPFS_THREADED) 34 | endif () 35 | 36 | if (CGRPFS_PUFFS) 37 | target_compile_definitions(cgrpfs PRIVATE -DCGRPFS_PUFFS) 38 | endif () 39 | 40 | install(TARGETS cgrpfs DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}) 41 | -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | Copyright 2021 David MacKay 2 | 3 | Redistribution and use in source and binary forms, with or without modification, 4 | are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | 3. Neither the name of the copyright holder nor the names of its contributors 12 | may be used to endorse or promote products derived from this software without 13 | specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 19 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 22 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CGrpFS 2 | ====== 3 | 4 | CGrpFS is a tiny implementation of the GNU/Linux CGroup filesystem for BSD 5 | platforms. It takes the form of a either a PUFFS or FUSE filesystem, and 6 | implements robust tracking of processes. Resource control, however, is not 7 | present; the different BSD platforms each provide different mechanisms for this, 8 | none of which are trivially adapted to CGroups semantics. The process tracking 9 | alone is sufficient for the main user of CGrpFS, 10 | [InitWare](https://github.com/InitWare/InitWare), a service manager derived from 11 | systemd. 12 | 13 | CGrpFS is available under the Modified BSD Licence. It is not as-yet very well 14 | tested, but seems to work fine for InitWare's purposes. 15 | 16 | Architecture 17 | ------ 18 | 19 | CGrpFS was implemented quickly and is not necessarily the most efficient in 20 | design. 21 | 22 | Process tracking is implemented with the process filter for Kernel Queues. This 23 | provides CGrpFS with notifications whenever a process forks or exits to which it 24 | has attached a filter. On all BSD platforms except macOS, the filter is 25 | automatically applied to all the transitive subprocesses spawned by a process 26 | after the filter is attached. A filter is attached as soon as a PID is added to 27 | a CGroup, so the Linux semantics are matched. 28 | 29 | For simplicity, all the files and directories of the CGroup filesystem are 30 | backed by node structures, which are akin to a combination of an `inode` and 31 | `dirent` structure. These nodes are hierarchically ordered and each stores a 32 | name, `stat` structure, a type (CGroup directory, `cgroup.procs` file, ...) and 33 | type-specific data. A CGroup directory node, for example, stores a linked list 34 | of all PIDs within it. It might be better to take an approach that maintains 35 | less data, but bear in mind that at least permissions data must be stored 36 | for nodes, as the GNU/Linux CGroup filesystem allows changing permissions, e.g. 37 | to facilitate delegation. 38 | 39 | To try to ensure consistency of file contents over the course of multiple reads, 40 | each `open` operation in the FUSE version of CGrpFS allocates a buffer into 41 | which the contents of the associated file is generated in full, and this buffer 42 | is used for each read with that FUSE file handle. This may not work properly in 43 | every case because the SunOS VFS (as imitated by BSD) enforces a distinction 44 | between the file and vnode levels absent from GNU/Linux. The likely result of 45 | this distinction is that read operations may not be mapped to the right file 46 | handle during read operations. The only viable fix (which would also work for 47 | PUFFS) would be the generation of a fresh vnode for every open. 48 | 49 | A mini-ProcFS is also provided with only a minimal `cgroup` file present in each 50 | PID's directory. The nodes for directories (and the contained `cgroup` file) 51 | within that hierarchy are generated dynamically in response to getattr() events 52 | to eliminate the need to preallocate the entire lot, and these might feasibly be 53 | pruned if unused for some time. Their purpose is to allow InitWare to determine 54 | the containing CGroup of a PID. If a PID is inquired about which does not 55 | currently belong to any CGroup, it is automatically added to the root CGroup, 56 | in line with the behaviour on Linux. 57 | 58 | Because only NetBSD's PUFFS (and its FUSE emulation, PERFUSE) support poll() 59 | (but not the installation of Kernel Queues filters), while FUSE for other BSDs 60 | doesn't, and because the `release_agent` mechanism is fundamentally fragile, 61 | CGrpFS listens on a sequenced-packet socket in the Unix domain at 62 | `/var/run/cgrpfs.notify`. On a process exiting, a `siginfo_t` structure is 63 | prepared and sent as a message to every peer connected to that socket. InitWare 64 | uses this to help track process lifecycle. 65 | 66 | Some effort is made to be resilient to out-of-memory conditions. This is 67 | untested and may not work. Whether libfuse is similarly resilient is another 68 | question. There is also the problem that under OOM conditions, it is no longer 69 | possible to update the structures in CGrpFS which describe which processes 70 | belong to what CGroup. This might be mitigated in part by keeping some spare 71 | memory around to use under OOM conditions, and hoping that the number of tracked 72 | processes doesn't grow beyond its capacity while the OOM state persists. 73 | Finally, the process filter itself can fail in-kernel under OOM conditions, and 74 | return NOTE_TRACKERR. There is no easy way out of this without modifying the 75 | kernel itself. 76 | 77 | Room for Improvement 78 | -------------------- 79 | 80 | There are several ways in which CGrpFS could be improved. 81 | 82 | The mini-ProcFS is immutable by users and stateless, only providing information 83 | maintained by the actual CGroups tree; it could therefore be implemented 84 | without backing nodes to save some memory use. 85 | 86 | Much more data than necessary is stored in each node (a full struct `stat`); 87 | this can be reduced. And proper nodes for each pseudo-file in a CGroup directory 88 | could be abolished too. 89 | 90 | Much unnecessary copying goes on due to CGrpFS using the high-level libfuse 91 | interface. Lowering to the fuse_lowlevel interface (or even directly to the 92 | `/dev/fuse` device) could help reduce that, and hence reduce the risk of OOM 93 | conditions causing a crash. Needless lookups also occur with the high-level 94 | interface because it's based on path strings; the archictecture of CGrpFS more 95 | readily fits the lower-level inode-based interface. Path lookup would also 96 | become simpler since there would be one lookup request for each component of 97 | the path; currently it has ugly special-cases for e.g. `mkdir`. 98 | 99 | OOM resilience could be improved in line with the notes in the Architecture 100 | section above. 101 | 102 | Release agent support should be implemented for compatibility, though it's not 103 | a reliable mechanism. 104 | 105 | FreeBSD provides hierarchical resource control via the `rctl` system. It's not 106 | clear whether this usefully maps to CGroups semantics, but it certainly is 107 | worth exploring whether it could be used to provide some CGroup resource 108 | controllers. 109 | 110 | CGrpFS could be implemented as an in-kernel filesystem within the various BSD 111 | kernels. CGrpFS could be hooked up more directly with the kernel's process 112 | management, and benefit from the kernel's capacity to to deal with OOM 113 | conditions more aggressively. 114 | 115 | Furthering an in-kernel implementation of CGrpFS, hierarchical resource control 116 | mechanisms could be implemented in those BSDs without them. 117 | 118 | Contributing poll() and kevent() supprt to each BSD's FUSE/PUFFS implementation 119 | would allow the CGroups 2.0 `cgroup.events` file to be implemented. 120 | -------------------------------------------------------------------------------- /cgrpfs.c: -------------------------------------------------------------------------------- 1 | #define _KERNTYPES 2 | #include 3 | 4 | #ifdef CGRPFS_PUFFS 5 | #include 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "cgrpfs.h" 25 | 26 | struct cgdir_nodes { 27 | cg_nodetype_t type; 28 | const char *name; 29 | }; 30 | 31 | #ifdef CGRPFS_THREADED 32 | void 33 | _unlock_cgmgr_(int *unused) 34 | { 35 | (void)unused; 36 | pthread_mutex_unlock(&cgmgr.lock); 37 | } 38 | 39 | static void * 40 | kqueue_thread(void *unused) 41 | { 42 | struct kevent kev; 43 | 44 | (void)unused; 45 | 46 | while (true) { 47 | int r; 48 | 49 | r = kevent(cgmgr.kq, NULL, 0, &kev, 1, NULL); 50 | 51 | pthread_mutex_lock(&cgmgr.lock); 52 | 53 | if (r < 0 && errno != EINTR) 54 | err(EXIT_FAILURE, "kevent failed"); 55 | else if (r < 0) 56 | ; 57 | else if (r == 0) 58 | warn("Got 0 from kevent"); 59 | else if (kev.filter == EVFILT_READ && 60 | kev.ident == cgmgr.notifyfd) 61 | cgmgr_accept(); 62 | else if (kev.filter == EVFILT_PROC) { 63 | if (kev.fflags & NOTE_CHILD) { 64 | pid_hash_entry_t *entry; 65 | uintptr_t ppidp = kev.data; /* parent pid */ 66 | 67 | /* find parent pid's node */ 68 | HASH_FIND_PTR(cgmgr.pidcg, &ppidp, entry); 69 | 70 | if (!entry) 71 | warn("Couldn't find containing CGroup of PID %lld", 72 | (long long)kev.data); 73 | else 74 | attachpid(entry->node, kev.ident); 75 | } else if (kev.fflags & NOTE_EXIT) 76 | detachpid(kev.ident, kev.data, false); 77 | else if (kev.fflags & NOTE_TRACKERR) 78 | warn("NOTE_TRACKERR received from Kernel Queue"); 79 | else if (kev.fflags & NOTE_EXEC) 80 | warn("NOTE_EXEC was received"); 81 | } else 82 | assert(!"Unreached"); 83 | 84 | pthread_mutex_unlock(&cgmgr.lock); 85 | } 86 | 87 | exit(EXIT_FAILURE); 88 | return NULL; 89 | } 90 | #endif /* CGROUPFS_THREADS */ 91 | 92 | /* add PID to cgmgr hashtable */ 93 | static int 94 | addpidhash(pid_t pid, cg_node_t *node, pid_hash_entry_t **entryout) 95 | { 96 | pid_hash_entry_t *entry; 97 | uintptr_t pidp = pid; 98 | 99 | HASH_FIND_PTR(cgmgr.pidcg, &pidp, entry); 100 | 101 | if (entry) { 102 | entry->node = node; 103 | *entryout = entry; 104 | return 0; 105 | } 106 | 107 | entry = malloc(sizeof *entry); 108 | 109 | if (!entry) 110 | return -ENOMEM; 111 | 112 | entry->pid = pid; 113 | entry->node = node; 114 | HASH_ADD_PTR(cgmgr.pidcg, pid, entry); 115 | 116 | *entryout = entry; 117 | 118 | return 1; 119 | } 120 | 121 | cg_node_t * 122 | newnode(cg_node_t *parent, const char *name, cg_nodetype_t type) 123 | { 124 | cg_node_t *node = malloc(sizeof *node); 125 | 126 | if (!node) 127 | return NULL; 128 | 129 | node->type = type; 130 | node->name = name != NULL ? strdup(name) : NULL; 131 | node->agent = NULL; 132 | node->notify = false; 133 | node->parent = parent; 134 | node->pid = 0; 135 | node->accessed = false; 136 | node->todel = false; 137 | LIST_INIT(&node->subnodes); 138 | 139 | bzero(&node->attr, sizeof(node->attr)); 140 | 141 | if (parent != NULL) { 142 | node->attr.st_uid = parent->attr.st_uid; 143 | node->attr.st_gid = parent->attr.st_gid; 144 | LIST_INSERT_HEAD(&parent->subnodes, node, entries); 145 | } 146 | 147 | return node; 148 | } 149 | 150 | static void 151 | movepids(cg_node_t *from, cg_node_t *to) 152 | { 153 | pid_hash_entry_t *entry, *tmp2; 154 | 155 | HASH_ITER(hh, cgmgr.pidcg, entry, tmp2) 156 | if (entry->node == from) { 157 | if (to) 158 | entry->node = to; 159 | else 160 | detachpid(entry->pid, 0, true); 161 | } 162 | } 163 | 164 | void 165 | removenode(cg_node_t *node) 166 | { 167 | cg_node_t *val, *tmp; 168 | 169 | if (!node->accessed) 170 | return delnode(node); 171 | if (node->todel) 172 | return; 173 | 174 | // printf("Marking node %p for deletion\n", node); 175 | node->todel = true; 176 | 177 | LIST_FOREACH_SAFE (val, &node->subnodes, entries, tmp) 178 | if (!val->accessed) 179 | delnode(val); 180 | else 181 | /* if accessed, wait for PUFFS to issue a reclaim op */ 182 | removenode(val); 183 | 184 | /* move up all contained PIDs to parent */ 185 | movepids(node, node->parent); 186 | 187 | if (node->parent) 188 | LIST_REMOVE(node, entries); 189 | } 190 | 191 | void 192 | delnode(cg_node_t *node) 193 | { 194 | cg_node_t *val, *tmp; 195 | 196 | // printf("Deleting node %p\n", node); 197 | 198 | LIST_FOREACH_SAFE (val, &node->subnodes, entries, tmp) 199 | if (!val->accessed) 200 | delnode(val); 201 | else 202 | /* if accessed, wait for PUFFS to issue a reclaim op */ 203 | removenode(val); 204 | 205 | /* move up all contained PIDs to parent */ 206 | movepids(node, node->parent); 207 | 208 | if (node->parent) 209 | LIST_REMOVE(node, entries); 210 | 211 | free(node->name); 212 | free(node->agent); 213 | free(node); 214 | } 215 | 216 | /* Add standard pseudofiles to a CGroup directory node */ 217 | static int 218 | addcgdirfiles(cg_node_t *node) 219 | { 220 | struct cgdir_nodes nodes[] = { { CGN_EVENTS, "cgroup.events" }, 221 | { CGN_PROCS, "cgroup.procs" }, 222 | { CGN_RELEASE_AGENT, "release_agent" }, 223 | { CGN_NOTIFY_ON_RELEASE, "notify_on_release" }, 224 | { CGN_INVALID, NULL } }; 225 | 226 | for (int i = 0; nodes[i].type != CGN_INVALID; i++) { 227 | cg_node_t *subnode = newnode(node, nodes[i].name, 228 | nodes[i].type); 229 | 230 | if (!subnode) 231 | return -ENOMEM; 232 | 233 | subnode->attr.st_mode = S_IFREG | 0644; 234 | } 235 | 236 | return 0; 237 | } 238 | 239 | cg_node_t * 240 | newcgdir(cg_node_t *parent, const char *name, mode_t perms, uid_t uid, 241 | gid_t gid) 242 | { 243 | cg_node_t *node = newnode(parent, name, CGN_CG_DIR); 244 | 245 | node->type = CGN_CG_DIR; 246 | node->attr.st_mode = S_IFDIR | perms; 247 | node->attr.st_uid = uid; 248 | node->attr.st_gid = gid; 249 | 250 | if (addcgdirfiles(node) < 0) { 251 | warn("Out of memory"); 252 | delnode(node); 253 | return NULL; 254 | } 255 | 256 | return node; 257 | } 258 | 259 | /* try to add a PID to our mini procfs. these are strictly auto-synthesised */ 260 | static cg_node_t * 261 | synthpiddir(pid_t pid) 262 | { 263 | char buf[32]; 264 | cg_node_t *node; 265 | struct cgdir_nodes nodes[] = { { CGN_PID_CGROUP, "cgroup" }, 266 | { CGN_INVALID, NULL } }; 267 | pid_hash_entry_t *entry; 268 | uintptr_t pidp = pid; 269 | int r; 270 | 271 | HASH_FIND_PTR(cgmgr.pidcg, &pidp, entry); 272 | if (!entry) { 273 | warnx("Entry absent for %lld, creating one", (long long)pid); 274 | r = attachpid(cgmgr.rootnode, pid); 275 | if (r == -ESRCH) 276 | return NULL; 277 | } 278 | 279 | sprintf(buf, "%lld", (long long)pid); 280 | node = newnode(cgmgr.metanode, buf, CGN_PID_DIR); 281 | if (!node) 282 | return NULL; 283 | 284 | node->pid = pid; 285 | node->attr.st_mode = S_IFDIR | 0755; 286 | for (int i = 0; nodes[i].type != CGN_INVALID; i++) { 287 | cg_node_t *subnode = newnode(node, nodes[i].name, 288 | nodes[i].type); 289 | 290 | if (!subnode) { 291 | delnode(node); 292 | return NULL; 293 | } 294 | 295 | subnode->attr.st_mode = S_IFREG | 0644; 296 | } 297 | 298 | return node; 299 | } 300 | 301 | cg_node_t * 302 | lookupfile(cg_node_t *node, const char *filename) 303 | { 304 | cg_node_t *subnode; 305 | LIST_FOREACH (subnode, &node->subnodes, entries) 306 | if (!strcmp(subnode->name, filename)) 307 | return subnode; 308 | 309 | /* try to synth a PID dir */ 310 | if (node->type == CGN_PID_ROOT_DIR) { 311 | char *endptr; 312 | pid_t pid; 313 | cg_node_t *pidnode; 314 | 315 | pid = strtol(filename, &endptr, 10); 316 | 317 | if (*endptr != '\0' && *endptr != '/') 318 | return NULL; 319 | 320 | return synthpiddir(pid); 321 | } 322 | 323 | return NULL; 324 | } 325 | 326 | cg_node_t * 327 | lookupnode(const char *path, bool secondlast) 328 | { 329 | const char *part = path; 330 | cg_node_t *node = cgmgr.rootnode; 331 | bool breaksecondlast = false; /* whether to break on finding 2nd-last */ 332 | bool last = false; /* are we on the last component of the path? */ 333 | 334 | while ((part = strstr(part, "/")) != NULL) { 335 | char *partend; 336 | size_t partlen; 337 | bool found = false; 338 | cg_node_t *subnode; 339 | 340 | if (!*part++) { 341 | assert(false); 342 | break; /* reached last part */ 343 | } 344 | 345 | partend = strstr(part, "/"); 346 | if (partend) 347 | partlen = partend - part; 348 | else { 349 | partlen = strlen(part); 350 | last = true; 351 | } 352 | 353 | if (secondlast && last && node == cgmgr.rootnode) 354 | return node; 355 | else if (last && breaksecondlast) 356 | return node; 357 | else if (!strlen(part)) /* root dir */ 358 | return node; 359 | 360 | LIST_FOREACH (subnode, &node->subnodes, entries) { 361 | if ((strlen(subnode->name) == partlen) && 362 | !strncmp(subnode->name, part, partlen)) { 363 | node = subnode; 364 | found = true; 365 | break; 366 | } 367 | } 368 | 369 | /* synthesise pid folder under cgroup.meta if absent*/ 370 | if (!found && node->type == CGN_PID_ROOT_DIR) { 371 | char *endptr; 372 | pid_t pid; 373 | cg_node_t *pidnode; 374 | 375 | pid = strtol(part, &endptr, 10); 376 | 377 | if (*endptr != '\0' && *endptr != '/') 378 | return NULL; 379 | 380 | pidnode = synthpiddir(pid); 381 | if (pidnode) { 382 | node = pidnode; 383 | found = true; 384 | } 385 | } 386 | 387 | if (!found && secondlast) 388 | breaksecondlast = true; 389 | else if (!found) 390 | return NULL; 391 | } 392 | 393 | return node; 394 | } 395 | 396 | static char * 397 | nodefullpath_internal(cg_node_t *node) 398 | { 399 | char *path, *newpath; 400 | 401 | if (node->parent) { 402 | if ((path = nodefullpath_internal(node->parent)) == NULL) 403 | return NULL; 404 | } else 405 | return strdup(""); /* root node */ 406 | 407 | asprintf(&newpath, "%s/%s", path, node->name); 408 | free(path); 409 | return newpath; 410 | } 411 | 412 | char * 413 | nodefullpath(cg_node_t *node) 414 | { 415 | if (!node->parent) 416 | return strdup("/"); /* root node */ 417 | else 418 | return nodefullpath_internal(node); 419 | } 420 | 421 | /* Check if a CGroup node has any PIDs, or if any of its subnodes do. */ 422 | static bool 423 | nodepopulated(cg_node_t *node) 424 | { 425 | bool populated = false; 426 | pid_hash_entry_t *entry, *tmp2; 427 | cg_node_t *subnode; 428 | 429 | HASH_ITER(hh, cgmgr.pidcg, entry, tmp2) 430 | if (entry->node == node) 431 | return true; 432 | 433 | LIST_FOREACH (subnode, &node->subnodes, entries) 434 | if (nodepopulated(subnode)) 435 | return true; 436 | 437 | return false; 438 | } 439 | 440 | char * 441 | procsfiletxt(cg_node_t *node) 442 | { 443 | char *txt = NULL; 444 | char linebuf[33]; 445 | size_t curlen = 0; 446 | pid_hash_entry_t *entry, *tmp2; 447 | 448 | HASH_ITER(hh, cgmgr.pidcg, entry, tmp2) 449 | { 450 | if (entry->node == node->parent) { 451 | char *newtxt; 452 | 453 | curlen += sprintf(linebuf, "%lld\n", 454 | (long long)entry->pid); 455 | newtxt = realloc(txt, curlen + 1); 456 | if (!newtxt) { 457 | free(txt); 458 | warnx("Out of memory"); 459 | return NULL; 460 | } 461 | 462 | if (!txt) { 463 | txt = newtxt; 464 | txt[0] = '\0'; 465 | } else 466 | txt = newtxt; 467 | 468 | strcat(txt, linebuf); 469 | } 470 | } 471 | 472 | return txt ? txt : strdup(""); 473 | } 474 | 475 | char * 476 | nodetxt(cg_node_t *node) 477 | { 478 | if (node->type == CGN_PID_CGROUP) { 479 | pid_hash_entry_t *entry; 480 | uintptr_t pidp = node->parent->pid; 481 | char *buf; 482 | 483 | HASH_FIND_PTR(cgmgr.pidcg, &pidp, entry); 484 | 485 | if (!entry) 486 | /* untracked are in root CGroup by default */ 487 | asprintf(&buf, "1:name=systemd:/\n"); 488 | else { 489 | char *path = nodefullpath(entry->node); 490 | 491 | if (!path) 492 | return NULL; 493 | 494 | asprintf(&buf, "1:name=systemd:%s\n", path); 495 | free(path); 496 | } 497 | 498 | return buf; 499 | 500 | } else if (node->type == CGN_EVENTS) { 501 | char *buf; 502 | return NULL; 503 | } else if (node->type == CGN_PROCS) 504 | return procsfiletxt(node); 505 | else 506 | return NULL; 507 | } 508 | 509 | int 510 | attachpid(cg_node_t *node, pid_t pid) 511 | { 512 | struct kevent kev; 513 | int r; 514 | pid_hash_entry_t *entry; 515 | 516 | assert(node->type == CGN_CG_DIR); 517 | r = addpidhash(pid, node, &entry); 518 | 519 | if (r < 0) 520 | warnx("Failed to add PID %lld", (long long)pid); 521 | else if (r == 0) { 522 | warnx("Existing entry for %lld\n", (long long)pid); 523 | return 0; 524 | } 525 | 526 | /* new PID - must be tracked */ 527 | EV_SET(&kev, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT | NOTE_TRACK, 0, NULL); 528 | r = kevent(cgmgr.kq, &kev, 1, NULL, 0, NULL); 529 | 530 | if (r < 0) { 531 | int olderrno = errno; 532 | /* delete untrackable PID */ 533 | HASH_DEL(cgmgr.pidcg, entry); 534 | free(entry); 535 | errno = olderrno; 536 | warn("Failed to watch PID %lld", (long long)pid); 537 | return -olderrno; 538 | } else if (r >= 0) 539 | return 1; 540 | 541 | return -errno; 542 | } 543 | 544 | void 545 | notify_exit(pid_t pid, int wstat) 546 | { 547 | siginfo_t si; 548 | listener_t *val, *tmp; 549 | 550 | si.si_pid = pid; 551 | si.si_signo = SIGCHLD; 552 | 553 | if (WIFEXITED(wstat)) { 554 | si.si_code = CLD_EXITED; 555 | si.si_status = WEXITSTATUS(wstat); 556 | } else if (WIFSIGNALED(wstat)) { 557 | si.si_code = CLD_KILLED; 558 | si.si_status = WTERMSIG(wstat); 559 | } 560 | 561 | LIST_FOREACH_SAFE (val, &cgmgr.listeners, listeners, tmp) { 562 | ssize_t r = send(val->fd, &si, sizeof si, MSG_NOSIGNAL); 563 | if (r < 0 && errno == EPIPE) { 564 | /* remove the listener that disconnected */ 565 | LIST_REMOVE(val, listeners); 566 | free(val); 567 | } else if (r < 0) 568 | warn("Failed to send exit notification"); 569 | } 570 | } 571 | 572 | int 573 | detachpid(pid_t pid, int wstat, bool untrack) 574 | { 575 | struct kevent kev; 576 | cg_node_t *node; 577 | pid_hash_entry_t *entry; 578 | uintptr_t pidp = pid; 579 | 580 | if (untrack) { 581 | int r; 582 | 583 | EV_SET(&kev, pid, EVFILT_PROC, EV_DELETE, 0, 0, NULL); 584 | r = kevent(cgmgr.kq, &kev, 1, NULL, 0, NULL); 585 | if (r < 0) 586 | warn("Failed to untrack PID %lld", (long long)pid); 587 | } 588 | 589 | HASH_FIND_PTR(cgmgr.pidcg, &pidp, entry); 590 | if (!entry) 591 | warnx("Lost PID without a parent CGroup\n"); 592 | else { 593 | HASH_DEL(cgmgr.pidcg, entry); 594 | free(entry); 595 | if (!untrack) 596 | notify_exit(pid, wstat); 597 | } 598 | 599 | return 0; 600 | } 601 | 602 | void 603 | cgmgr_init(void) 604 | { 605 | struct sockaddr_un sun = { .sun_family = AF_UNIX, 606 | .sun_path = "/var/run/cgrpfs.notify" }; 607 | struct kevent kev; 608 | #ifdef CGRPFS_THREADED 609 | int r; 610 | pthread_t thrd; 611 | #endif 612 | 613 | cgmgr.kq = kqueue(); 614 | if ((cgmgr.kq = kqueue()) < 0) 615 | errx(EXIT_FAILURE, "Failed to open kernel queue."); 616 | 617 | #ifdef CGRPFS_THREADED 618 | if (pthread_mutex_init(&cgmgr.lock, NULL) < 0) 619 | err(EXIT_FAILURE, "Failed to initialise mutex"); 620 | 621 | r = pthread_create(&thrd, NULL, kqueue_thread, NULL); 622 | if (r != 0) 623 | errx(EXIT_FAILURE, "pthread_create failed: %s", strerror(r)); 624 | #endif 625 | 626 | cgmgr.notifyfd = socket(AF_UNIX, SOCK_SEQPACKET, 0); 627 | if (cgmgr.notifyfd < 0) 628 | err(EXIT_FAILURE, "failed to create listener socket"); 629 | 630 | unlink(sun.sun_path); 631 | 632 | if (bind(cgmgr.notifyfd, (struct sockaddr *)&sun, SUN_LEN(&sun)) < 0) 633 | err(EXIT_FAILURE, "failed to bind listener socket"); 634 | 635 | if (listen(cgmgr.notifyfd, 10) < 0) 636 | err(EXIT_FAILURE, "failed to listen on listener socket"); 637 | 638 | EV_SET(&kev, cgmgr.notifyfd, EVFILT_READ, EV_ADD, 0, 0, NULL); 639 | if ((kevent(cgmgr.kq, &kev, 1, NULL, 0, NULL)) < 0) 640 | err(EXIT_FAILURE, 641 | "Failed to add event for notify FD to Kernel Queue"); 642 | 643 | cgmgr.pidcg = NULL; 644 | 645 | cgmgr.rootnode = newcgdir(NULL, NULL, 0755, 0, 0); 646 | if (!cgmgr.rootnode) 647 | errx(EXIT_FAILURE, "Failed to allocate root node."); 648 | 649 | cgmgr.metanode = newnode(cgmgr.rootnode, "cgroup.meta", 650 | CGN_PID_ROOT_DIR); 651 | if (!cgmgr.metanode) 652 | errx(EXIT_FAILURE, "Failed to allocate meta node."); 653 | 654 | cgmgr.metanode->attr.st_mode = S_IFDIR | 0755; 655 | 656 | LIST_INIT(&cgmgr.listeners); 657 | } 658 | 659 | void 660 | cgmgr_accept(void) 661 | { 662 | listener_t *listener = malloc(sizeof *listener); 663 | 664 | if (!listener) { 665 | warn("Failed to allocate listener"); 666 | return; 667 | } 668 | 669 | listener->fd = accept(cgmgr.notifyfd, NULL, 0); 670 | if (listener->fd < 0) { 671 | warn("Failed to accept listener"); 672 | free(listener); 673 | return; 674 | } 675 | 676 | LIST_INSERT_HEAD(&cgmgr.listeners, listener, listeners); 677 | } 678 | -------------------------------------------------------------------------------- /cgrpfs.h: -------------------------------------------------------------------------------- 1 | #ifndef CGRPFS_H_ 2 | #define CGRPFS_H_ 3 | 4 | #ifdef CGRPFS_PUFFS 5 | #define _KERNTYPES 6 | 7 | #include 8 | 9 | #include 10 | #endif 11 | 12 | #include 13 | #include 14 | 15 | #ifdef CGRPFS_THREADED 16 | #include 17 | 18 | #define CGMGR_LOCKED \ 19 | __attribute__((cleanup(_unlock_cgmgr_))) __attribute__(( \ 20 | unused)) int _unused_lock_ = pthread_mutex_lock(&cgmgr.lock) 21 | 22 | void _unlock_cgmgr_(int *unused); 23 | #else 24 | #define CGMGR_LOCKED 25 | #endif 26 | 27 | #include "uthash.h" 28 | 29 | /* an entry in the pid => node hashtable */ 30 | typedef struct pid_hash_entry { 31 | uintptr_t pid; 32 | struct cg_node *node; 33 | UT_hash_handle hh; 34 | } pid_hash_entry_t; 35 | 36 | typedef struct poll_request { 37 | LIST_ENTRY(poll_request) pollreqs; 38 | 39 | struct cg_node *node; 40 | } poll_request_t; 41 | 42 | /* a listener for emptiness/exit events */ 43 | typedef struct listener { 44 | LIST_ENTRY(listener) listeners; 45 | 46 | int fd; 47 | } listener_t; 48 | 49 | /* kind of CGroupFS node */ 50 | typedef enum cg_nodetype { 51 | CGN_INVALID = -1, 52 | CGN_EVENTS, /* cgroup.events file */ 53 | CGN_PROCS, /* cgroup.procs file */ 54 | CGN_RELEASE_AGENT, /* release_agent file */ 55 | CGN_NOTIFY_ON_RELEASE, /* notify_on_release file */ 56 | CGN_CG_DIR, /* cgroup directory */ 57 | CGN_PID_ROOT_DIR, /* cgroup.meta root dir */ 58 | CGN_PID_DIR, /* cgroup.meta/$pid directory */ 59 | CGN_PID_CGROUP /* cgroup.meta/$pid/cgroup */ 60 | } cg_nodetype_t; 61 | 62 | /* node for all entries in the CGroupFS */ 63 | typedef struct cg_node { 64 | LIST_ENTRY(cg_node) entries; 65 | 66 | char *name; 67 | cg_nodetype_t type; 68 | struct cg_node *parent; 69 | struct stat attr; 70 | bool todel; /* is it to be deleted? */ 71 | int accessed; /* how many kernel handles to it? */ 72 | 73 | /* for PID dirs */ 74 | pid_t pid; 75 | 76 | /* for all dirs */ 77 | LIST_HEAD(cg_node_list, cg_node) subnodes; 78 | 79 | /* for cgroup dirs */ 80 | bool notify; 81 | char *agent; 82 | } cg_node_t; 83 | 84 | /* the cgfs manager singleton */ 85 | typedef struct cgmgr { 86 | struct fuse *fuse; 87 | char *mountpoint; 88 | int mt; /* is it multithreaded? */ 89 | int kq; /* kernel queue fd */ 90 | int notifyfd; /* notification server fd for exit and emptiness events */ 91 | 92 | LIST_HEAD(listeners, listener) listeners; 93 | 94 | pid_hash_entry_t *pidcg; /* map pid => node */ 95 | 96 | cg_node_t *rootnode, *metanode; 97 | 98 | #ifdef CGRPFS_THREADED 99 | pthread_mutex_t lock; 100 | /* 101 | * TODO: If it turns out adding events to kqueue from another thread is 102 | * not allowed, we'll write a byte to the pipe which the kevent thread 103 | * will set a read filter on, so that it's not blocked on kevent() at 104 | * the time of adding a new event filter from our other thread. 105 | */ 106 | int commfd[2]; 107 | #endif 108 | } cgmgr_t; 109 | 110 | /* an open file description */ 111 | typedef struct cgn_filedesc { 112 | cg_node_t *node; 113 | 114 | char *buf; /* file contents - pre-filled on open() for consistency */ 115 | } cg_filedesc_t; 116 | 117 | /* set up the cgmgr */ 118 | void cgmgr_init(void); 119 | /* accept a connection on the notify passive socket */ 120 | void cgmgr_accept(void); 121 | 122 | /* Create a new node and initialise it enough to let delnode not fail */ 123 | cg_node_t *newnode(cg_node_t *parent, const char *name, cg_nodetype_t type); 124 | /* Create a new CGroup directory node */ 125 | cg_node_t *newcgdir(cg_node_t *parent, const char *name, mode_t perms, 126 | uid_t uid, gid_t gid); 127 | /* Like delnode but dosen't free it. PUFFS will order a reclaim op later. */ 128 | void removenode(cg_node_t *node); 129 | /* 130 | * Delete a node. Any contained PIDs moved to parent. Subnodes either deleted 131 | * (if accessed = 0) or marked for deletion. 132 | */ 133 | void delnode(cg_node_t *node); 134 | 135 | /* Lookup a node by filename within another node. */ 136 | cg_node_t *lookupfile(cg_node_t *node, const char *filename); 137 | /* Lookup a node by path, or the second-last node of that path */ 138 | cg_node_t *lookupnode(const char *path, bool secondlast); 139 | /* Get full path of node */ 140 | char *nodefullpath(cg_node_t *node); 141 | 142 | /* Get file contents of node. */ 143 | char *nodetxt(cg_node_t *node); 144 | /* Get cgroups.proc file contents for node */ 145 | char *procsfiletxt(cg_node_t *node); 146 | 147 | /* Attach a PID to a CGroup */ 148 | int attachpid(cg_node_t *node, pid_t pid); 149 | /* Detach a PID from its owner CGroup and stop tracking it if untrack set */ 150 | int detachpid(pid_t pid, int wstat, bool untrack); 151 | 152 | extern cgmgr_t cgmgr; 153 | 154 | #ifdef CGRPFS_PUFFS 155 | PUFFSOP_PROTOS(cgrpfs); 156 | #else 157 | extern struct fuse_operations cgops; 158 | #endif 159 | 160 | #endif /* CGRPFS_H_ */ 161 | -------------------------------------------------------------------------------- /cgrpfs_fuseops.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define FUSE_USE_VERSION 26 11 | #include 12 | 13 | #include "cgrpfs.h" 14 | 15 | static int 16 | cg_chmod(const char *path, mode_t mode) 17 | { 18 | CGMGR_LOCKED; 19 | cg_node_t *node = lookupnode(path, false); 20 | 21 | if (!node) 22 | return -ENOENT; 23 | 24 | node->attr.st_mode &= ~(07777); 25 | node->attr.st_mode |= mode; 26 | 27 | return 0; 28 | } 29 | 30 | static int 31 | cg_chown(const char *path, uid_t uid, gid_t gid) 32 | { 33 | CGMGR_LOCKED; 34 | cg_node_t *node = lookupnode(path, false); 35 | 36 | if (!node) 37 | return -ENOENT; 38 | 39 | if (uid != -1) 40 | node->attr.st_uid = uid; 41 | if (gid != -1) 42 | node->attr.st_gid = gid; 43 | 44 | return 0; 45 | } 46 | 47 | static int 48 | cg_getattr(const char *path, struct stat *st) 49 | { 50 | CGMGR_LOCKED; 51 | cg_node_t *node = lookupnode(path, false); 52 | 53 | if (!node) 54 | return -ENOENT; 55 | 56 | *st = node->attr; 57 | 58 | return 0; 59 | } 60 | 61 | static int 62 | cg_open(const char *path, struct fuse_file_info *fi) 63 | { 64 | CGMGR_LOCKED; 65 | cg_node_t *node = lookupnode(path, false); 66 | cg_filedesc_t *filedesc; 67 | 68 | if (!node) 69 | return -ENOENT; 70 | 71 | filedesc = malloc(sizeof *filedesc); 72 | if (!filedesc) 73 | return -ENOMEM; 74 | filedesc->node = node; 75 | filedesc->buf = NULL; 76 | 77 | fi->fh = (uintptr_t)filedesc; 78 | fi->direct_io = 1; 79 | 80 | if (node->type != CGN_PROCS && node->type != CGN_PID_CGROUP) 81 | return -ENOTSUP; 82 | 83 | filedesc->buf = nodetxt(node); 84 | if (!filedesc->buf) { 85 | free(filedesc); 86 | return -ENOMEM; 87 | } 88 | 89 | return 0; 90 | } 91 | 92 | #ifndef __OpenBSD__ 93 | static int 94 | cg_poll(const char *path, struct fuse_file_info *fi, struct fuse_pollhandle *ph, 95 | unsigned *reventsp) 96 | { 97 | *reventsp = POLLIN | POLLHUP; 98 | fuse_notify_poll(ph); 99 | return 0; 100 | } 101 | #endif 102 | 103 | static int 104 | cg_read(const char *path, char *buf, size_t len, off_t off, 105 | struct fuse_file_info *fi) 106 | { 107 | CGMGR_LOCKED; 108 | cg_filedesc_t *filedesc = (void *)fi->fh; 109 | size_t maxlen; 110 | 111 | assert(filedesc); 112 | 113 | if (!filedesc->buf) 114 | return 0; 115 | 116 | maxlen = strlen(filedesc->buf); 117 | if (off > maxlen) 118 | return 0; 119 | else if (len < maxlen - off) 120 | maxlen = len; 121 | else 122 | maxlen -= off; 123 | 124 | memcpy(buf, filedesc->buf + off, maxlen); 125 | 126 | return maxlen; 127 | } 128 | 129 | static int 130 | cg_write(const char *path, const char *buf, size_t len, off_t off, 131 | struct fuse_file_info *fi) 132 | { 133 | CGMGR_LOCKED; 134 | cg_filedesc_t *filedesc = (void *)fi->fh; 135 | cg_node_t *node = filedesc->node; 136 | 137 | assert(node); 138 | 139 | if (node->type == CGN_PROCS) { 140 | long pid; 141 | int r; 142 | 143 | if (sscanf(buf, "%ld\n", &pid) < 1) 144 | return -EINVAL; 145 | 146 | r = attachpid(node->parent, pid); 147 | 148 | if (r < 0) 149 | return r; 150 | return len; 151 | } else 152 | return -ENODEV; 153 | } 154 | 155 | static int 156 | cg_release(const char *path, struct fuse_file_info *fi) 157 | { 158 | CGMGR_LOCKED; 159 | cg_filedesc_t *filedesc = (void *)fi->fh; 160 | 161 | assert(filedesc); 162 | free(filedesc->buf); 163 | free(filedesc); 164 | 165 | return 0; 166 | } 167 | 168 | static int 169 | cg_opendir(const char *path, struct fuse_file_info *fi) 170 | { 171 | CGMGR_LOCKED; 172 | cg_node_t *node = lookupnode(path, false); 173 | 174 | if (!node) 175 | return -ENOENT; 176 | else if (node->type != CGN_CG_DIR && node->type != CGN_PID_ROOT_DIR && 177 | node->type != CGN_PID_DIR) 178 | return -ENOTDIR; 179 | 180 | fi->fh = (uintptr_t)node; 181 | 182 | return 0; 183 | } 184 | 185 | static int 186 | cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t off, 187 | struct fuse_file_info *fi) 188 | { 189 | CGMGR_LOCKED; 190 | cg_node_t *node = (cg_node_t *)fi->fh; 191 | cg_node_t *dirent; 192 | 193 | filler(buf, ".", NULL, 0); 194 | filler(buf, "..", NULL, 0); 195 | 196 | LIST_FOREACH (dirent, &node->subnodes, entries) { 197 | filler(buf, dirent->name, &dirent->attr, 0); 198 | } 199 | 200 | return 0; 201 | } 202 | 203 | int 204 | cg_mkdir(const char *path, mode_t mode) 205 | { 206 | CGMGR_LOCKED; 207 | cg_node_t *node = lookupnode(path, false); 208 | cg_node_t *newdir; 209 | struct fuse_context *ctx = fuse_get_context(); 210 | char *dirname = strrchr(path, '/'); 211 | 212 | if (node != NULL) 213 | return -EEXIST; 214 | 215 | /* get containing node */ 216 | node = lookupnode(path, true); 217 | 218 | if (!node) 219 | return -ENOENT; 220 | else if (node->type != CGN_CG_DIR) 221 | return -ENOTSUP; 222 | 223 | newdir = newcgdir(node, dirname + 1, 0755 & ~ctx->umask, ctx->uid, 224 | ctx->gid); 225 | if (!newdir) 226 | return -ENOMEM; 227 | 228 | return 0; 229 | } 230 | 231 | static int 232 | cg_rmdir(const char *path) 233 | { 234 | CGMGR_LOCKED; 235 | cg_node_t *node = lookupnode(path, false); 236 | 237 | if (!node) 238 | return -ENOENT; 239 | else if (node->type != CGN_CG_DIR || node == cgmgr.rootnode) 240 | return -ENOTSUP; 241 | 242 | delnode(node); 243 | 244 | return 0; 245 | } 246 | 247 | static int 248 | cg_rename(const char *oldpath, const char *newpath) 249 | { 250 | CGMGR_LOCKED; 251 | cg_node_t *old = lookupnode(oldpath, false); 252 | cg_node_t *newparent = lookupnode(newpath, true); 253 | char *dirname = strrchr(newpath, '/'); 254 | 255 | if (!old || !newparent) 256 | return -ENOENT; 257 | else if (old->parent != newparent) 258 | return -EOPNOTSUPP; 259 | else if (old->type != CGN_CG_DIR || newparent->type != CGN_CG_DIR) 260 | return -EOPNOTSUPP; 261 | 262 | free(old->name); 263 | old->name = strdup(dirname); 264 | 265 | return 0; 266 | } 267 | 268 | struct fuse_operations cgops = { 269 | .chmod = cg_chmod, 270 | .chown = cg_chown, 271 | .getattr = cg_getattr, 272 | .open = cg_open, 273 | #ifndef __OpenBSD__ /* no fuse poll on OpenBSD */ 274 | .poll = cg_poll, 275 | #endif 276 | .read = cg_read, 277 | .write = cg_write, 278 | .release = cg_release, 279 | .opendir = cg_opendir, 280 | .readdir = cg_readdir, 281 | .mkdir = cg_mkdir, 282 | .rmdir = cg_rmdir, 283 | .rename = cg_rename, 284 | }; 285 | -------------------------------------------------------------------------------- /cgrpfs_main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define FUSE_USE_VERSION 26 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "cgrpfs.h" 18 | 19 | cgmgr_t cgmgr; 20 | 21 | int 22 | loop() 23 | { 24 | struct fuse_session *se; 25 | struct fuse_chan *ch; 26 | int fd; 27 | struct kevent kev; 28 | size_t bufsize; 29 | char *buf; 30 | 31 | se = fuse_get_session(cgmgr.fuse); 32 | if (!se) 33 | return -1; 34 | 35 | ch = fuse_session_next_chan(se, NULL); 36 | if (!ch) 37 | return -1; 38 | 39 | bufsize = fuse_chan_bufsize(ch); 40 | buf = malloc(bufsize); 41 | if (!buf) 42 | errx(EXIT_FAILURE, "Failed to allocate buffer"); 43 | 44 | fd = fuse_chan_fd(ch); 45 | 46 | EV_SET(&kev, fd, EVFILT_READ, EV_ADD, 0, 0, NULL); 47 | if ((kevent(cgmgr.kq, &kev, 1, NULL, 0, NULL)) < 0) 48 | err(EXIT_FAILURE, "Failed to add event to Kernel Queue"); 49 | 50 | while (!fuse_session_exited(se)) { 51 | int r; 52 | struct fuse_chan *tmpch = ch; 53 | struct fuse_buf fbuf = { 54 | .mem = buf, 55 | .size = bufsize, 56 | }; 57 | 58 | r = kevent(cgmgr.kq, NULL, 0, &kev, 1, NULL); 59 | 60 | if (r < 0 && errno != EINTR) 61 | err(EXIT_FAILURE, "kevent failed"); 62 | else if (r < 0) 63 | break; 64 | else if (r == 0) 65 | warn("Got 0 from kevent"); 66 | else if (kev.filter == EVFILT_READ && 67 | kev.ident == cgmgr.notifyfd) 68 | cgmgr_accept(); 69 | else if (kev.filter == EVFILT_READ) { 70 | r = fuse_session_receive_buf(se, &fbuf, &tmpch); 71 | 72 | if (r == -EINTR) 73 | continue; 74 | else if (r <= 0) 75 | errx(EXIT_FAILURE, "Got <0 from fuse"); 76 | 77 | fuse_session_process_buf(se, &fbuf, tmpch); 78 | } else if (kev.filter == EVFILT_PROC) { 79 | if (kev.fflags & NOTE_CHILD) { 80 | pid_hash_entry_t *entry; 81 | uintptr_t ppidp = kev.data; /* parent pid */ 82 | 83 | /* find parent pid's node */ 84 | HASH_FIND_PTR(cgmgr.pidcg, &ppidp, entry); 85 | 86 | if (!entry) 87 | warn("Couldn't find containing CGroup of PID %lld", 88 | (long long)kev.data); 89 | else 90 | attachpid(entry->node, kev.ident); 91 | } else if (kev.fflags & NOTE_EXIT) 92 | detachpid(kev.ident, kev.data, false); 93 | else if (kev.fflags & NOTE_TRACKERR) 94 | warn("NOTE_TRACKERR received from Kernel Queue"); 95 | else if (kev.fflags & NOTE_EXEC) 96 | warn("NOTE_EXEC was received"); 97 | } else 98 | assert(!"Unreached"); 99 | } 100 | 101 | free(buf); 102 | fuse_session_reset(se); 103 | 104 | return 0; 105 | } 106 | 107 | int 108 | main(int argc, char *argv[]) 109 | { 110 | cgmgr_init(); 111 | 112 | cgmgr.fuse = fuse_setup(argc, argv, &cgops, sizeof(cgops), 113 | &cgmgr.mountpoint, &cgmgr.mt, &cgmgr); 114 | if (!cgmgr.fuse) 115 | errx(EXIT_FAILURE, "Failed to mount filesystem."); 116 | 117 | printf("CGrpFS mounted at %s\n", cgmgr.mountpoint); 118 | 119 | loop(); 120 | 121 | fuse_teardown(cgmgr.fuse, cgmgr.mountpoint); 122 | } 123 | -------------------------------------------------------------------------------- /cgrpfs_main_puffs.c: -------------------------------------------------------------------------------- 1 | #define _KERNTYPES 2 | 3 | #include 4 | #include 5 | /* these must come first */ 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "cgrpfs.h" 18 | 19 | cgmgr_t cgmgr; 20 | static struct puffs_usermount *pu; 21 | 22 | static void 23 | usage() 24 | { 25 | errx(EXIT_FAILURE, "usage: %s [-o mntopt] [-o puffsopt] /mountpoint", 26 | getprogname()); 27 | } 28 | 29 | int 30 | main(int argc, char *argv[]) 31 | { 32 | extern char *optarg; 33 | extern int optind; 34 | struct puffs_pathobj *po_root; 35 | struct puffs_ops *pops; 36 | struct timespec ts; 37 | const char *typename; 38 | char *rtstr; 39 | mntoptparse_t mp; 40 | int pflags = 0, detach = 0, mntflags = 0; 41 | int ch; 42 | 43 | while ((ch = getopt(argc, argv, "do:")) != -1) { 44 | switch (ch) { 45 | case 'd': 46 | pflags |= PUFFS_FLAG_OPDUMP; 47 | break; 48 | case 'o': 49 | mp = getmntopts(optarg, puffsmopts, &mntflags, &pflags); 50 | if (mp == NULL) 51 | err(1, "getmntopts"); 52 | freemntopts(mp); 53 | break; 54 | default: 55 | usage(); 56 | } 57 | } 58 | 59 | argc -= optind; 60 | argv += optind; 61 | 62 | if (argc != 1) 63 | usage(); 64 | 65 | puffs_unmountonsignal(SIGINT, 1); 66 | 67 | cgmgr_init(); 68 | 69 | PUFFSOP_INIT(pops); 70 | puffs_null_setops(pops); 71 | 72 | PUFFSOP_SETFSNOP(pops, sync); 73 | PUFFSOP_SETFSNOP(pops, statvfs); 74 | 75 | PUFFSOP_SET(pops, cgrpfs, node, lookup); 76 | PUFFSOP_SET(pops, cgrpfs, node, open); 77 | PUFFSOP_SET(pops, cgrpfs, node, open2); 78 | PUFFSOP_SET(pops, cgrpfs, node, mkdir); 79 | PUFFSOP_SET(pops, cgrpfs, node, rmdir); 80 | PUFFSOP_SET(pops, cgrpfs, node, access); 81 | PUFFSOP_SET(pops, cgrpfs, node, getattr); 82 | PUFFSOP_SET(pops, cgrpfs, node, setattr); 83 | PUFFSOP_SET(pops, cgrpfs, node, poll); 84 | PUFFSOP_SET(pops, cgrpfs, node, readdir); 85 | PUFFSOP_SET(pops, cgrpfs, node, rename); 86 | PUFFSOP_SET(pops, cgrpfs, node, read); 87 | PUFFSOP_SET(pops, cgrpfs, node, write); 88 | PUFFSOP_SET(pops, cgrpfs, node, inactive); 89 | PUFFSOP_SET(pops, cgrpfs, node, reclaim); 90 | 91 | if ((pu = puffs_init(pops, _PATH_PUFFS, "cgrpfs", NULL, pflags)) == 92 | NULL) 93 | err(1, "init"); 94 | 95 | /* 96 | * The framebuf interface is useless to us as it tries to add a write 97 | * event filter, which doesn't work on kernel queues. 98 | * We could maybe have a separate thread wait for the KQ FD to become 99 | * ready then sent a byte along a pipe instead, but that can be done 100 | * another time. 101 | */ 102 | #if 0 103 | puffs_framev_init(pu, kq_fdread_fn, NULL, NULL, NULL, NULL); 104 | if (puffs_framev_addfd(pu, cgmgr.kq, PUFFS_FBIO_READ) < 0) 105 | err(1, "framebuf addfd kq"); 106 | #endif 107 | 108 | puffs_set_errnotify(pu, puffs_kernerr_abort); 109 | if (detach) 110 | if (puffs_daemon(pu, 1, 1) == -1) 111 | err(1, "puffs_daemon"); 112 | 113 | if (puffs_mount(pu, *argv, mntflags, cgmgr.rootnode) == -1) 114 | err(1, "mount"); 115 | if (puffs_mainloop(pu) == -1) 116 | err(1, "mainloop"); 117 | 118 | return 0; 119 | } -------------------------------------------------------------------------------- /cgrpfs_main_threads.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define FUSE_USE_VERSION 26 12 | #include 13 | 14 | #include "cgrpfs.h" 15 | 16 | cgmgr_t cgmgr; 17 | 18 | int 19 | main(int argc, char *argv[]) 20 | { 21 | int r; 22 | 23 | cgmgr_init(); 24 | 25 | cgmgr.fuse = fuse_setup(argc, argv, &cgops, sizeof(cgops), 26 | &cgmgr.mountpoint, &cgmgr.mt, &cgmgr); 27 | if (!cgmgr.fuse) 28 | errx(EXIT_FAILURE, "Failed to mount filesystem."); 29 | 30 | printf("CGrpFS mounted at %s\n", cgmgr.mountpoint); 31 | 32 | r = fuse_loop(cgmgr.fuse); 33 | if (r < 0) 34 | err(EXIT_FAILURE, "fuse_loop failed"); 35 | 36 | fuse_teardown(cgmgr.fuse, cgmgr.mountpoint); 37 | } 38 | -------------------------------------------------------------------------------- /cgrpfs_vfsops.c: -------------------------------------------------------------------------------- 1 | /* 2 | * VFS operations for PUFFS-based cgrpfs 3 | */ -------------------------------------------------------------------------------- /cgrpfs_vnops.c: -------------------------------------------------------------------------------- 1 | /* 2 | * vnode operations for PUFFS-based cgrpfs 3 | */ 4 | 5 | #include "cgrpfs.h" 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | static void 14 | setaccessed(cg_node_t *node) 15 | { 16 | node->accessed++; 17 | } 18 | 19 | static int 20 | nodevtype(cg_node_t *node) 21 | { 22 | switch (node->type) { 23 | case CGN_EVENTS: 24 | case CGN_PROCS: /* cgroup.procs file */ 25 | case CGN_RELEASE_AGENT: /* release_agent file */ 26 | case CGN_NOTIFY_ON_RELEASE: /* notify_on_release file */ 27 | case CGN_PID_CGROUP: 28 | return VREG; 29 | 30 | case CGN_CG_DIR: /* cgroup directory */ 31 | case CGN_PID_ROOT_DIR: /* cgroup.meta root dir */ 32 | case CGN_PID_DIR: /* cgroup.meta/$pid directory */ 33 | return VDIR; 34 | 35 | default: 36 | return VBAD; 37 | } 38 | } 39 | 40 | int 41 | cgrpfs_node_lookup(struct puffs_usermount *pu, void *opc, 42 | struct puffs_newinfo *pni, const struct puffs_cn *pcn) 43 | { 44 | CGMGR_LOCKED; 45 | cg_node_t *node = (cg_node_t *)opc, *file; 46 | 47 | if (PCNISDOTDOT(pcn)) { 48 | if (!node->parent) 49 | return -ENOENT; 50 | 51 | puffs_newinfo_setcookie(pni, node->parent); 52 | setaccessed(node->parent); 53 | puffs_newinfo_setvtype(pni, VDIR); 54 | 55 | return 0; 56 | } 57 | 58 | file = lookupfile(node, pcn->pcn_name); 59 | if (file) { 60 | puffs_newinfo_setcookie(pni, file); 61 | setaccessed(file); 62 | puffs_newinfo_setvtype(pni, nodevtype(file)); 63 | puffs_newinfo_setsize(pni, 0); 64 | puffs_newinfo_setrdev(pni, 0); 65 | 66 | return 0; 67 | } 68 | 69 | if ((pcn->pcn_flags & NAMEI_ISLASTCN) && 70 | (pcn->pcn_nameiop == NAMEI_CREATE || 71 | pcn->pcn_nameiop == NAMEI_RENAME)) { 72 | int r = puffs_access(VDIR, node->attr.st_mode, 73 | node->attr.st_uid, node->attr.st_gid, PUFFS_VWRITE, 74 | pcn->pcn_cred); 75 | if (r) 76 | return r; 77 | } 78 | 79 | return ENOENT; 80 | } 81 | 82 | int 83 | cgrpfs_node_mkdir(struct puffs_usermount *pu, void *opc, 84 | struct puffs_newinfo *pni, const struct puffs_cn *pcn, 85 | const struct vattr *va) 86 | { 87 | CGMGR_LOCKED; 88 | cg_node_t *node_parent = (cg_node_t *)opc; 89 | cg_node_t *node_new; 90 | uid_t uid; 91 | gid_t gid; 92 | 93 | if (node_parent->type != CGN_CG_DIR) 94 | return EOPNOTSUPP; 95 | 96 | if (lookupfile(node_parent, pcn->pcn_name) != NULL) 97 | return EEXIST; 98 | 99 | assert(puffs_cred_getuid(pcn->pcn_cred, &uid) == 0); 100 | assert(puffs_cred_getgid(pcn->pcn_cred, &gid) == 0); 101 | 102 | // FIXME: umask? And I don't think we have any further info to extract 103 | // from vattr. 104 | node_new = newcgdir(node_parent, pcn->pcn_name, va->va_mode & 07777, 105 | uid, gid); 106 | 107 | setaccessed(node_new); 108 | puffs_newinfo_setcookie(pni, node_new); 109 | 110 | return 0; 111 | } 112 | 113 | int 114 | cgrpfs_node_rmdir(struct puffs_usermount *pu, void *opc, void *targ, 115 | const struct puffs_cn *pcn) 116 | { 117 | CGMGR_LOCKED; 118 | cg_node_t *node = (cg_node_t *)targ; 119 | 120 | if (node->type != CGN_CG_DIR || node == cgmgr.rootnode) 121 | return -ENOTSUP; 122 | 123 | removenode(node); 124 | puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N2); 125 | 126 | return 0; 127 | } 128 | 129 | int 130 | cgrpfs_node_access(struct puffs_usermount *pu, void *opc, int acc_mode, 131 | const struct puffs_cred *pcr) 132 | { 133 | CGMGR_LOCKED; 134 | cg_node_t *node = (cg_node_t *)opc; 135 | 136 | return puffs_access(nodevtype(node), node->attr.st_mode & 07777, 137 | node->attr.st_uid, node->attr.st_gid, acc_mode, pcr); 138 | } 139 | 140 | int 141 | cgrpfs_node_getattr(struct puffs_usermount *pu, void *opc, struct vattr *va, 142 | const struct puffs_cred *pcred) 143 | { 144 | CGMGR_LOCKED; 145 | cg_node_t *node = (cg_node_t *)opc; 146 | 147 | puffs_stat2vattr(va, &node->attr); 148 | 149 | return 0; 150 | } 151 | 152 | int 153 | cgrpfs_node_setattr(struct puffs_usermount *pu, void *opc, 154 | const struct vattr *va, const struct puffs_cred *pcr) 155 | { 156 | CGMGR_LOCKED; 157 | cg_node_t *node = (cg_node_t *)opc; 158 | int rv; 159 | 160 | /* check permissions */ 161 | if (va->va_flags != PUFFS_VNOVAL) 162 | return EOPNOTSUPP; 163 | 164 | if (va->va_uid != PUFFS_VNOVAL || va->va_gid != PUFFS_VNOVAL) { 165 | rv = puffs_access_chown(node->attr.st_uid, node->attr.st_gid, 166 | va->va_uid, va->va_gid, pcr); 167 | if (rv) 168 | return rv; 169 | if (va->va_uid != PUFFS_VNOVAL) 170 | node->attr.st_uid = va->va_uid; 171 | if (va->va_gid != PUFFS_VNOVAL) 172 | node->attr.st_gid = va->va_gid; 173 | } 174 | 175 | if (va->va_mode != PUFFS_VNOVAL) { 176 | rv = puffs_access_chmod(node->attr.st_uid, node->attr.st_gid, 177 | nodevtype(node), node->attr.st_mode & 07777, pcr); 178 | if (rv) 179 | return rv; 180 | node->attr.st_mode &= ~(07777); 181 | node->attr.st_mode |= va->va_mode & 07777; 182 | } 183 | 184 | if ((va->va_atime.tv_sec != PUFFS_VNOVAL && 185 | va->va_atime.tv_nsec != PUFFS_VNOVAL) || 186 | (va->va_mtime.tv_sec != PUFFS_VNOVAL && 187 | va->va_mtime.tv_nsec != PUFFS_VNOVAL)) { 188 | rv = puffs_access_times(node->attr.st_uid, node->attr.st_gid, 189 | node->attr.st_mode & 07777, 190 | va->va_vaflags & VA_UTIMES_NULL, pcr); 191 | if (rv) 192 | return rv; 193 | if (va->va_atime.tv_sec != PUFFS_VNOVAL) 194 | node->attr.st_atim.tv_sec = va->va_atime.tv_sec; 195 | if (va->va_atime.tv_nsec != PUFFS_VNOVAL) 196 | node->attr.st_atim.tv_nsec = va->va_atime.tv_nsec; 197 | if (va->va_mtime.tv_sec != PUFFS_VNOVAL) 198 | node->attr.st_mtim.tv_sec = va->va_mtime.tv_sec; 199 | if (va->va_mtime.tv_nsec != PUFFS_VNOVAL) 200 | node->attr.st_mtim.tv_nsec = va->va_mtime.tv_nsec; 201 | } 202 | 203 | if (va->va_size != PUFFS_VNOVAL) 204 | return EOPNOTSUPP; 205 | 206 | return 0; 207 | } 208 | 209 | /* xxx: not usable until PUFFS fixed in NetBSD. */ 210 | int 211 | cgrpfs_node_poll(struct puffs_usermount *pu, void *opc, int *revents) 212 | { 213 | CGMGR_LOCKED; 214 | *revents &= POLLIN | POLLHUP; 215 | return EOPNOTSUPP; 216 | } 217 | 218 | int 219 | cgrpfs_node_readdir(struct puffs_usermount *pu, void *opc, struct dirent *dent, 220 | off_t *readoff, size_t *reslen, const struct puffs_cred *pcr, 221 | int *eofflag, off_t *cookies, size_t *ncookies) 222 | { 223 | CGMGR_LOCKED; 224 | cg_node_t *node = (cg_node_t *)opc; 225 | cg_node_t *subnode; /* iterator */ 226 | int i = 0; 227 | 228 | if (nodevtype(node) != VDIR) 229 | return ENOTDIR; 230 | 231 | *ncookies = 0; 232 | again: 233 | if (*readoff == DENT_DOT || *readoff == DENT_DOTDOT) { 234 | puffs_gendotdent(&dent, (ino_t)node, *readoff, reslen); 235 | (*readoff)++; 236 | PUFFS_STORE_DCOOKIE(cookies, ncookies, *readoff); 237 | goto again; 238 | } 239 | 240 | LIST_FOREACH (subnode, &node->subnodes, entries) { 241 | if (i < DENT_ADJ(*readoff)) 242 | continue; 243 | 244 | i++; 245 | 246 | if (!puffs_nextdent(&dent, subnode->name, (ino_t)subnode, 247 | puffs_vtype2dt(nodevtype(subnode)), reslen)) 248 | return 0; 249 | 250 | (*readoff)++; 251 | PUFFS_STORE_DCOOKIE(cookies, ncookies, *readoff); 252 | } 253 | 254 | *eofflag = 1; 255 | 256 | return 0; 257 | } 258 | 259 | int 260 | cgrpfs_node_open(struct puffs_usermount *pu, void *opc, int modep, 261 | const struct puffs_cred *pcr) 262 | { 263 | return 0; 264 | } 265 | 266 | int 267 | cgrpfs_node_open2(struct puffs_usermount *pu, void *opc, int modep, 268 | const struct puffs_cred *pcr, int *oflags) 269 | { 270 | *oflags |= PUFFS_OPEN_IO_DIRECT; 271 | return 0; 272 | } 273 | 274 | int 275 | cgrpfs_node_rename(struct puffs_usermount *pu, void *opc, void *src, 276 | const struct puffs_cn *pcn_src, void *targ_dir, void *targ, 277 | const struct puffs_cn *pcn_targ) 278 | { 279 | CGMGR_LOCKED; 280 | cg_node_t *cgn_sdir = opc; 281 | cg_node_t *cgn_sfile = src; 282 | cg_node_t *cgn_tdir = targ_dir; 283 | /* Target file doesn't matter. It doesn't exist yet. */ 284 | 285 | if (cgn_sdir != cgn_tdir) 286 | return EPERM; /* only rename within same dir */ 287 | else if (cgn_sfile->type != CGN_CG_DIR) 288 | return EOPNOTSUPP; /* only cgdirs may be renamed */ 289 | 290 | // TODO: double check source still exists? 291 | 292 | free(cgn_sfile->name); 293 | cgn_sfile->name = strdup(pcn_targ->pcn_name); 294 | 295 | return 0; 296 | } 297 | 298 | int 299 | cgrpfs_node_read(struct puffs_usermount *pu, void *opc, uint8_t *buf, 300 | off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag) 301 | { 302 | CGMGR_LOCKED; 303 | cg_node_t *node = opc; 304 | char *txt; 305 | size_t maxlen; 306 | 307 | txt = nodetxt(node); 308 | 309 | if (!txt) 310 | return ENOMEM; 311 | 312 | maxlen = strlen(txt); 313 | if (offset > maxlen) 314 | return 0; 315 | else if (*resid < maxlen - offset) 316 | maxlen = *resid; 317 | else 318 | maxlen -= offset; 319 | 320 | memcpy(buf, txt + offset, maxlen); 321 | 322 | *resid -= maxlen; 323 | 324 | return 0; 325 | } 326 | 327 | int 328 | cgrpfs_node_write(struct puffs_usermount *pu, void *opc, uint8_t *buf, 329 | off_t offset, size_t *resid, const struct puffs_cred *pcr, int ioflag) 330 | { 331 | CGMGR_LOCKED; 332 | cg_node_t *node = opc; 333 | 334 | if (node->type == CGN_PROCS) { 335 | long pid; 336 | int r; 337 | 338 | if (sscanf((const char *)buf, "%ld\n", &pid) < 1) 339 | return -EINVAL; 340 | 341 | r = attachpid(node->parent, pid); 342 | if (r < 0) 343 | return -r; 344 | 345 | *resid = 0; 346 | 347 | return 0; 348 | } else 349 | return ENODEV; 350 | } 351 | 352 | int 353 | cgrpfs_node_inactive(struct puffs_usermount *pu, void *opc) 354 | { 355 | puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_NOREF_N1); 356 | return 0; 357 | } 358 | 359 | int 360 | cgrpfs_node_reclaim(struct puffs_usermount *pu, void *opc) 361 | { 362 | CGMGR_LOCKED; 363 | cg_node_t *node = opc; 364 | 365 | if (node->todel) 366 | delnode(node); 367 | else 368 | node->accessed = 0; 369 | 370 | return 0; 371 | } -------------------------------------------------------------------------------- /uthash.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2003-2021, Troy D. Hanson http://troydhanson.github.io/uthash/ 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 | */ 23 | 24 | #ifndef UTHASH_H 25 | #define UTHASH_H 26 | 27 | #define UTHASH_VERSION 2.3.0 28 | 29 | #include /* memcmp, memset, strlen */ 30 | #include /* ptrdiff_t */ 31 | #include /* exit */ 32 | 33 | #if defined(HASH_DEFINE_OWN_STDINT) && HASH_DEFINE_OWN_STDINT 34 | /* This codepath is provided for backward compatibility, but I plan to remove it. */ 35 | #warning "HASH_DEFINE_OWN_STDINT is deprecated; please use HASH_NO_STDINT instead" 36 | typedef unsigned int uint32_t; 37 | typedef unsigned char uint8_t; 38 | #elif defined(HASH_NO_STDINT) && HASH_NO_STDINT 39 | #else 40 | #include /* uint8_t, uint32_t */ 41 | #endif 42 | 43 | /* These macros use decltype or the earlier __typeof GNU extension. 44 | As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 45 | when compiling c++ source) this code uses whatever method is needed 46 | or, for VS2008 where neither is available, uses casting workarounds. */ 47 | #if !defined(DECLTYPE) && !defined(NO_DECLTYPE) 48 | #if defined(_MSC_VER) /* MS compiler */ 49 | #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 50 | #define DECLTYPE(x) (decltype(x)) 51 | #else /* VS2008 or older (or VS2010 in C mode) */ 52 | #define NO_DECLTYPE 53 | #endif 54 | #elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || defined(__WATCOMC__) 55 | #define NO_DECLTYPE 56 | #else /* GNU, Sun and other compilers */ 57 | #define DECLTYPE(x) (__typeof(x)) 58 | #endif 59 | #endif 60 | 61 | #ifdef NO_DECLTYPE 62 | #define DECLTYPE(x) 63 | #define DECLTYPE_ASSIGN(dst,src) \ 64 | do { \ 65 | char **_da_dst = (char**)(&(dst)); \ 66 | *_da_dst = (char*)(src); \ 67 | } while (0) 68 | #else 69 | #define DECLTYPE_ASSIGN(dst,src) \ 70 | do { \ 71 | (dst) = DECLTYPE(dst)(src); \ 72 | } while (0) 73 | #endif 74 | 75 | #ifndef uthash_malloc 76 | #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 77 | #endif 78 | #ifndef uthash_free 79 | #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 80 | #endif 81 | #ifndef uthash_bzero 82 | #define uthash_bzero(a,n) memset(a,'\0',n) 83 | #endif 84 | #ifndef uthash_strlen 85 | #define uthash_strlen(s) strlen(s) 86 | #endif 87 | 88 | #ifndef HASH_FUNCTION 89 | #define HASH_FUNCTION(keyptr,keylen,hashv) HASH_JEN(keyptr, keylen, hashv) 90 | #endif 91 | 92 | #ifndef HASH_KEYCMP 93 | #define HASH_KEYCMP(a,b,n) memcmp(a,b,n) 94 | #endif 95 | 96 | #ifndef uthash_noexpand_fyi 97 | #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 98 | #endif 99 | #ifndef uthash_expand_fyi 100 | #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 101 | #endif 102 | 103 | #ifndef HASH_NONFATAL_OOM 104 | #define HASH_NONFATAL_OOM 0 105 | #endif 106 | 107 | #if HASH_NONFATAL_OOM 108 | /* malloc failures can be recovered from */ 109 | 110 | #ifndef uthash_nonfatal_oom 111 | #define uthash_nonfatal_oom(obj) do {} while (0) /* non-fatal OOM error */ 112 | #endif 113 | 114 | #define HASH_RECORD_OOM(oomed) do { (oomed) = 1; } while (0) 115 | #define IF_HASH_NONFATAL_OOM(x) x 116 | 117 | #else 118 | /* malloc failures result in lost memory, hash tables are unusable */ 119 | 120 | #ifndef uthash_fatal 121 | #define uthash_fatal(msg) exit(-1) /* fatal OOM error */ 122 | #endif 123 | 124 | #define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") 125 | #define IF_HASH_NONFATAL_OOM(x) 126 | 127 | #endif 128 | 129 | /* initial number of buckets */ 130 | #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ 131 | #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ 132 | #define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ 133 | 134 | /* calculate the element whose hash handle address is hhp */ 135 | #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 136 | /* calculate the hash handle from element address elp */ 137 | #define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle*)(void*)(((char*)(elp)) + ((tbl)->hho))) 138 | 139 | #define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ 140 | do { \ 141 | struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ 142 | unsigned _hd_bkt; \ 143 | HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 144 | (head)->hh.tbl->buckets[_hd_bkt].count++; \ 145 | _hd_hh_item->hh_next = NULL; \ 146 | _hd_hh_item->hh_prev = NULL; \ 147 | } while (0) 148 | 149 | #define HASH_VALUE(keyptr,keylen,hashv) \ 150 | do { \ 151 | HASH_FUNCTION(keyptr, keylen, hashv); \ 152 | } while (0) 153 | 154 | #define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ 155 | do { \ 156 | (out) = NULL; \ 157 | if (head) { \ 158 | unsigned _hf_bkt; \ 159 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ 160 | if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) { \ 161 | HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ 162 | } \ 163 | } \ 164 | } while (0) 165 | 166 | #define HASH_FIND(hh,head,keyptr,keylen,out) \ 167 | do { \ 168 | (out) = NULL; \ 169 | if (head) { \ 170 | unsigned _hf_hashv; \ 171 | HASH_VALUE(keyptr, keylen, _hf_hashv); \ 172 | HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ 173 | } \ 174 | } while (0) 175 | 176 | #ifdef HASH_BLOOM 177 | #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) 178 | #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) 179 | #define HASH_BLOOM_MAKE(tbl,oomed) \ 180 | do { \ 181 | (tbl)->bloom_nbits = HASH_BLOOM; \ 182 | (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 183 | if (!(tbl)->bloom_bv) { \ 184 | HASH_RECORD_OOM(oomed); \ 185 | } else { \ 186 | uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 187 | (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 188 | } \ 189 | } while (0) 190 | 191 | #define HASH_BLOOM_FREE(tbl) \ 192 | do { \ 193 | uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 194 | } while (0) 195 | 196 | #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) 197 | #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) 198 | 199 | #define HASH_BLOOM_ADD(tbl,hashv) \ 200 | HASH_BLOOM_BITSET((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) 201 | 202 | #define HASH_BLOOM_TEST(tbl,hashv) \ 203 | HASH_BLOOM_BITTEST((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) 204 | 205 | #else 206 | #define HASH_BLOOM_MAKE(tbl,oomed) 207 | #define HASH_BLOOM_FREE(tbl) 208 | #define HASH_BLOOM_ADD(tbl,hashv) 209 | #define HASH_BLOOM_TEST(tbl,hashv) (1) 210 | #define HASH_BLOOM_BYTELEN 0U 211 | #endif 212 | 213 | #define HASH_MAKE_TABLE(hh,head,oomed) \ 214 | do { \ 215 | (head)->hh.tbl = (UT_hash_table*)uthash_malloc(sizeof(UT_hash_table)); \ 216 | if (!(head)->hh.tbl) { \ 217 | HASH_RECORD_OOM(oomed); \ 218 | } else { \ 219 | uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ 220 | (head)->hh.tbl->tail = &((head)->hh); \ 221 | (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 222 | (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 223 | (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 224 | (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 225 | HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ 226 | (head)->hh.tbl->signature = HASH_SIGNATURE; \ 227 | if (!(head)->hh.tbl->buckets) { \ 228 | HASH_RECORD_OOM(oomed); \ 229 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 230 | } else { \ 231 | uthash_bzero((head)->hh.tbl->buckets, \ 232 | HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ 233 | HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ 234 | IF_HASH_NONFATAL_OOM( \ 235 | if (oomed) { \ 236 | uthash_free((head)->hh.tbl->buckets, \ 237 | HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 238 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 239 | } \ 240 | ) \ 241 | } \ 242 | } \ 243 | } while (0) 244 | 245 | #define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ 246 | do { \ 247 | (replaced) = NULL; \ 248 | HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ 249 | if (replaced) { \ 250 | HASH_DELETE(hh, head, replaced); \ 251 | } \ 252 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ 253 | } while (0) 254 | 255 | #define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ 256 | do { \ 257 | (replaced) = NULL; \ 258 | HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ 259 | if (replaced) { \ 260 | HASH_DELETE(hh, head, replaced); \ 261 | } \ 262 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ 263 | } while (0) 264 | 265 | #define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ 266 | do { \ 267 | unsigned _hr_hashv; \ 268 | HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ 269 | HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ 270 | } while (0) 271 | 272 | #define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ 273 | do { \ 274 | unsigned _hr_hashv; \ 275 | HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ 276 | HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ 277 | } while (0) 278 | 279 | #define HASH_APPEND_LIST(hh, head, add) \ 280 | do { \ 281 | (add)->hh.next = NULL; \ 282 | (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 283 | (head)->hh.tbl->tail->next = (add); \ 284 | (head)->hh.tbl->tail = &((add)->hh); \ 285 | } while (0) 286 | 287 | #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ 288 | do { \ 289 | do { \ 290 | if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) { \ 291 | break; \ 292 | } \ 293 | } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ 294 | } while (0) 295 | 296 | #ifdef NO_DECLTYPE 297 | #undef HASH_AKBI_INNER_LOOP 298 | #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ 299 | do { \ 300 | char *_hs_saved_head = (char*)(head); \ 301 | do { \ 302 | DECLTYPE_ASSIGN(head, _hs_iter); \ 303 | if (cmpfcn(head, add) > 0) { \ 304 | DECLTYPE_ASSIGN(head, _hs_saved_head); \ 305 | break; \ 306 | } \ 307 | DECLTYPE_ASSIGN(head, _hs_saved_head); \ 308 | } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ 309 | } while (0) 310 | #endif 311 | 312 | #if HASH_NONFATAL_OOM 313 | 314 | #define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ 315 | do { \ 316 | if (!(oomed)) { \ 317 | unsigned _ha_bkt; \ 318 | (head)->hh.tbl->num_items++; \ 319 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ 320 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ 321 | if (oomed) { \ 322 | HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ 323 | HASH_DELETE_HH(hh, head, &(add)->hh); \ 324 | (add)->hh.tbl = NULL; \ 325 | uthash_nonfatal_oom(add); \ 326 | } else { \ 327 | HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ 328 | HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ 329 | } \ 330 | } else { \ 331 | (add)->hh.tbl = NULL; \ 332 | uthash_nonfatal_oom(add); \ 333 | } \ 334 | } while (0) 335 | 336 | #else 337 | 338 | #define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ 339 | do { \ 340 | unsigned _ha_bkt; \ 341 | (head)->hh.tbl->num_items++; \ 342 | HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ 343 | HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ 344 | HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ 345 | HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ 346 | } while (0) 347 | 348 | #endif 349 | 350 | 351 | #define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ 352 | do { \ 353 | IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ 354 | (add)->hh.hashv = (hashval); \ 355 | (add)->hh.key = (char*) (keyptr); \ 356 | (add)->hh.keylen = (unsigned) (keylen_in); \ 357 | if (!(head)) { \ 358 | (add)->hh.next = NULL; \ 359 | (add)->hh.prev = NULL; \ 360 | HASH_MAKE_TABLE(hh, add, _ha_oomed); \ 361 | IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ 362 | (head) = (add); \ 363 | IF_HASH_NONFATAL_OOM( } ) \ 364 | } else { \ 365 | void *_hs_iter = (head); \ 366 | (add)->hh.tbl = (head)->hh.tbl; \ 367 | HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn); \ 368 | if (_hs_iter) { \ 369 | (add)->hh.next = _hs_iter; \ 370 | if (((add)->hh.prev = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) { \ 371 | HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = (add); \ 372 | } else { \ 373 | (head) = (add); \ 374 | } \ 375 | HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add); \ 376 | } else { \ 377 | HASH_APPEND_LIST(hh, head, add); \ 378 | } \ 379 | } \ 380 | HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ 381 | HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ 382 | } while (0) 383 | 384 | #define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ 385 | do { \ 386 | unsigned _hs_hashv; \ 387 | HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ 388 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ 389 | } while (0) 390 | 391 | #define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ 392 | HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) 393 | 394 | #define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ 395 | HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) 396 | 397 | #define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ 398 | do { \ 399 | IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ 400 | (add)->hh.hashv = (hashval); \ 401 | (add)->hh.key = (const void*) (keyptr); \ 402 | (add)->hh.keylen = (unsigned) (keylen_in); \ 403 | if (!(head)) { \ 404 | (add)->hh.next = NULL; \ 405 | (add)->hh.prev = NULL; \ 406 | HASH_MAKE_TABLE(hh, add, _ha_oomed); \ 407 | IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ 408 | (head) = (add); \ 409 | IF_HASH_NONFATAL_OOM( } ) \ 410 | } else { \ 411 | (add)->hh.tbl = (head)->hh.tbl; \ 412 | HASH_APPEND_LIST(hh, head, add); \ 413 | } \ 414 | HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ 415 | HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ 416 | } while (0) 417 | 418 | #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 419 | do { \ 420 | unsigned _ha_hashv; \ 421 | HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ 422 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ 423 | } while (0) 424 | 425 | #define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ 426 | HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) 427 | 428 | #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 429 | HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) 430 | 431 | #define HASH_TO_BKT(hashv,num_bkts,bkt) \ 432 | do { \ 433 | bkt = ((hashv) & ((num_bkts) - 1U)); \ 434 | } while (0) 435 | 436 | /* delete "delptr" from the hash table. 437 | * "the usual" patch-up process for the app-order doubly-linked-list. 438 | * The use of _hd_hh_del below deserves special explanation. 439 | * These used to be expressed using (delptr) but that led to a bug 440 | * if someone used the same symbol for the head and deletee, like 441 | * HASH_DELETE(hh,users,users); 442 | * We want that to work, but by changing the head (users) below 443 | * we were forfeiting our ability to further refer to the deletee (users) 444 | * in the patch-up process. Solution: use scratch space to 445 | * copy the deletee pointer, then the latter references are via that 446 | * scratch pointer rather than through the repointed (users) symbol. 447 | */ 448 | #define HASH_DELETE(hh,head,delptr) \ 449 | HASH_DELETE_HH(hh, head, &(delptr)->hh) 450 | 451 | #define HASH_DELETE_HH(hh,head,delptrhh) \ 452 | do { \ 453 | struct UT_hash_handle *_hd_hh_del = (delptrhh); \ 454 | if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) { \ 455 | HASH_BLOOM_FREE((head)->hh.tbl); \ 456 | uthash_free((head)->hh.tbl->buckets, \ 457 | (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 458 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 459 | (head) = NULL; \ 460 | } else { \ 461 | unsigned _hd_bkt; \ 462 | if (_hd_hh_del == (head)->hh.tbl->tail) { \ 463 | (head)->hh.tbl->tail = HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ 464 | } \ 465 | if (_hd_hh_del->prev != NULL) { \ 466 | HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = _hd_hh_del->next; \ 467 | } else { \ 468 | DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ 469 | } \ 470 | if (_hd_hh_del->next != NULL) { \ 471 | HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = _hd_hh_del->prev; \ 472 | } \ 473 | HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 474 | HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 475 | (head)->hh.tbl->num_items--; \ 476 | } \ 477 | HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ 478 | } while (0) 479 | 480 | /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 481 | #define HASH_FIND_STR(head,findstr,out) \ 482 | do { \ 483 | unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ 484 | HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ 485 | } while (0) 486 | #define HASH_ADD_STR(head,strfield,add) \ 487 | do { \ 488 | unsigned _uthash_hastr_keylen = (unsigned)uthash_strlen((add)->strfield); \ 489 | HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ 490 | } while (0) 491 | #define HASH_REPLACE_STR(head,strfield,add,replaced) \ 492 | do { \ 493 | unsigned _uthash_hrstr_keylen = (unsigned)uthash_strlen((add)->strfield); \ 494 | HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, replaced); \ 495 | } while (0) 496 | #define HASH_FIND_INT(head,findint,out) \ 497 | HASH_FIND(hh,head,findint,sizeof(int),out) 498 | #define HASH_ADD_INT(head,intfield,add) \ 499 | HASH_ADD(hh,head,intfield,sizeof(int),add) 500 | #define HASH_REPLACE_INT(head,intfield,add,replaced) \ 501 | HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) 502 | #define HASH_FIND_PTR(head,findptr,out) \ 503 | HASH_FIND(hh,head,findptr,sizeof(void *),out) 504 | #define HASH_ADD_PTR(head,ptrfield,add) \ 505 | HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 506 | #define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ 507 | HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) 508 | #define HASH_DEL(head,delptr) \ 509 | HASH_DELETE(hh,head,delptr) 510 | 511 | /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 512 | * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 513 | */ 514 | #ifdef HASH_DEBUG 515 | #include /* fprintf, stderr */ 516 | #define HASH_OOPS(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0) 517 | #define HASH_FSCK(hh,head,where) \ 518 | do { \ 519 | struct UT_hash_handle *_thh; \ 520 | if (head) { \ 521 | unsigned _bkt_i; \ 522 | unsigned _count = 0; \ 523 | char *_prev; \ 524 | for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) { \ 525 | unsigned _bkt_count = 0; \ 526 | _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 527 | _prev = NULL; \ 528 | while (_thh) { \ 529 | if (_prev != (char*)(_thh->hh_prev)) { \ 530 | HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ 531 | (where), (void*)_thh->hh_prev, (void*)_prev); \ 532 | } \ 533 | _bkt_count++; \ 534 | _prev = (char*)(_thh); \ 535 | _thh = _thh->hh_next; \ 536 | } \ 537 | _count += _bkt_count; \ 538 | if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 539 | HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ 540 | (where), (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 541 | } \ 542 | } \ 543 | if (_count != (head)->hh.tbl->num_items) { \ 544 | HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ 545 | (where), (head)->hh.tbl->num_items, _count); \ 546 | } \ 547 | _count = 0; \ 548 | _prev = NULL; \ 549 | _thh = &(head)->hh; \ 550 | while (_thh) { \ 551 | _count++; \ 552 | if (_prev != (char*)_thh->prev) { \ 553 | HASH_OOPS("%s: invalid prev %p, actual %p\n", \ 554 | (where), (void*)_thh->prev, (void*)_prev); \ 555 | } \ 556 | _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 557 | _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) : NULL); \ 558 | } \ 559 | if (_count != (head)->hh.tbl->num_items) { \ 560 | HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ 561 | (where), (head)->hh.tbl->num_items, _count); \ 562 | } \ 563 | } \ 564 | } while (0) 565 | #else 566 | #define HASH_FSCK(hh,head,where) 567 | #endif 568 | 569 | /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 570 | * the descriptor to which this macro is defined for tuning the hash function. 571 | * The app can #include to get the prototype for write(2). */ 572 | #ifdef HASH_EMIT_KEYS 573 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 574 | do { \ 575 | unsigned _klen = fieldlen; \ 576 | write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 577 | write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ 578 | } while (0) 579 | #else 580 | #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 581 | #endif 582 | 583 | /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ 584 | #define HASH_BER(key,keylen,hashv) \ 585 | do { \ 586 | unsigned _hb_keylen = (unsigned)keylen; \ 587 | const unsigned char *_hb_key = (const unsigned char*)(key); \ 588 | (hashv) = 0; \ 589 | while (_hb_keylen-- != 0U) { \ 590 | (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ 591 | } \ 592 | } while (0) 593 | 594 | 595 | /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 596 | * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 597 | #define HASH_SAX(key,keylen,hashv) \ 598 | do { \ 599 | unsigned _sx_i; \ 600 | const unsigned char *_hs_key = (const unsigned char*)(key); \ 601 | hashv = 0; \ 602 | for (_sx_i=0; _sx_i < keylen; _sx_i++) { \ 603 | hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 604 | } \ 605 | } while (0) 606 | /* FNV-1a variation */ 607 | #define HASH_FNV(key,keylen,hashv) \ 608 | do { \ 609 | unsigned _fn_i; \ 610 | const unsigned char *_hf_key = (const unsigned char*)(key); \ 611 | (hashv) = 2166136261U; \ 612 | for (_fn_i=0; _fn_i < keylen; _fn_i++) { \ 613 | hashv = hashv ^ _hf_key[_fn_i]; \ 614 | hashv = hashv * 16777619U; \ 615 | } \ 616 | } while (0) 617 | 618 | #define HASH_OAT(key,keylen,hashv) \ 619 | do { \ 620 | unsigned _ho_i; \ 621 | const unsigned char *_ho_key=(const unsigned char*)(key); \ 622 | hashv = 0; \ 623 | for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 624 | hashv += _ho_key[_ho_i]; \ 625 | hashv += (hashv << 10); \ 626 | hashv ^= (hashv >> 6); \ 627 | } \ 628 | hashv += (hashv << 3); \ 629 | hashv ^= (hashv >> 11); \ 630 | hashv += (hashv << 15); \ 631 | } while (0) 632 | 633 | #define HASH_JEN_MIX(a,b,c) \ 634 | do { \ 635 | a -= b; a -= c; a ^= ( c >> 13 ); \ 636 | b -= c; b -= a; b ^= ( a << 8 ); \ 637 | c -= a; c -= b; c ^= ( b >> 13 ); \ 638 | a -= b; a -= c; a ^= ( c >> 12 ); \ 639 | b -= c; b -= a; b ^= ( a << 16 ); \ 640 | c -= a; c -= b; c ^= ( b >> 5 ); \ 641 | a -= b; a -= c; a ^= ( c >> 3 ); \ 642 | b -= c; b -= a; b ^= ( a << 10 ); \ 643 | c -= a; c -= b; c ^= ( b >> 15 ); \ 644 | } while (0) 645 | 646 | #define HASH_JEN(key,keylen,hashv) \ 647 | do { \ 648 | unsigned _hj_i,_hj_j,_hj_k; \ 649 | unsigned const char *_hj_key=(unsigned const char*)(key); \ 650 | hashv = 0xfeedbeefu; \ 651 | _hj_i = _hj_j = 0x9e3779b9u; \ 652 | _hj_k = (unsigned)(keylen); \ 653 | while (_hj_k >= 12U) { \ 654 | _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 655 | + ( (unsigned)_hj_key[2] << 16 ) \ 656 | + ( (unsigned)_hj_key[3] << 24 ) ); \ 657 | _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 658 | + ( (unsigned)_hj_key[6] << 16 ) \ 659 | + ( (unsigned)_hj_key[7] << 24 ) ); \ 660 | hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 661 | + ( (unsigned)_hj_key[10] << 16 ) \ 662 | + ( (unsigned)_hj_key[11] << 24 ) ); \ 663 | \ 664 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 665 | \ 666 | _hj_key += 12; \ 667 | _hj_k -= 12U; \ 668 | } \ 669 | hashv += (unsigned)(keylen); \ 670 | switch ( _hj_k ) { \ 671 | case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ 672 | case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ 673 | case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ 674 | case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ 675 | case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ 676 | case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ 677 | case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ 678 | case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ 679 | case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ 680 | case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ 681 | case 1: _hj_i += _hj_key[0]; /* FALLTHROUGH */ \ 682 | default: ; \ 683 | } \ 684 | HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 685 | } while (0) 686 | 687 | /* The Paul Hsieh hash function */ 688 | #undef get16bits 689 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 690 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 691 | #define get16bits(d) (*((const uint16_t *) (d))) 692 | #endif 693 | 694 | #if !defined (get16bits) 695 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 696 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 697 | #endif 698 | #define HASH_SFH(key,keylen,hashv) \ 699 | do { \ 700 | unsigned const char *_sfh_key=(unsigned const char*)(key); \ 701 | uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ 702 | \ 703 | unsigned _sfh_rem = _sfh_len & 3U; \ 704 | _sfh_len >>= 2; \ 705 | hashv = 0xcafebabeu; \ 706 | \ 707 | /* Main loop */ \ 708 | for (;_sfh_len > 0U; _sfh_len--) { \ 709 | hashv += get16bits (_sfh_key); \ 710 | _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ 711 | hashv = (hashv << 16) ^ _sfh_tmp; \ 712 | _sfh_key += 2U*sizeof (uint16_t); \ 713 | hashv += hashv >> 11; \ 714 | } \ 715 | \ 716 | /* Handle end cases */ \ 717 | switch (_sfh_rem) { \ 718 | case 3: hashv += get16bits (_sfh_key); \ 719 | hashv ^= hashv << 16; \ 720 | hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ 721 | hashv += hashv >> 11; \ 722 | break; \ 723 | case 2: hashv += get16bits (_sfh_key); \ 724 | hashv ^= hashv << 11; \ 725 | hashv += hashv >> 17; \ 726 | break; \ 727 | case 1: hashv += *_sfh_key; \ 728 | hashv ^= hashv << 10; \ 729 | hashv += hashv >> 1; \ 730 | break; \ 731 | default: ; \ 732 | } \ 733 | \ 734 | /* Force "avalanching" of final 127 bits */ \ 735 | hashv ^= hashv << 3; \ 736 | hashv += hashv >> 5; \ 737 | hashv ^= hashv << 4; \ 738 | hashv += hashv >> 17; \ 739 | hashv ^= hashv << 25; \ 740 | hashv += hashv >> 6; \ 741 | } while (0) 742 | 743 | /* iterate over items in a known bucket to find desired item */ 744 | #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ 745 | do { \ 746 | if ((head).hh_head != NULL) { \ 747 | DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ 748 | } else { \ 749 | (out) = NULL; \ 750 | } \ 751 | while ((out) != NULL) { \ 752 | if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ 753 | if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) { \ 754 | break; \ 755 | } \ 756 | } \ 757 | if ((out)->hh.hh_next != NULL) { \ 758 | DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ 759 | } else { \ 760 | (out) = NULL; \ 761 | } \ 762 | } \ 763 | } while (0) 764 | 765 | /* add an item to a bucket */ 766 | #define HASH_ADD_TO_BKT(head,hh,addhh,oomed) \ 767 | do { \ 768 | UT_hash_bucket *_ha_head = &(head); \ 769 | _ha_head->count++; \ 770 | (addhh)->hh_next = _ha_head->hh_head; \ 771 | (addhh)->hh_prev = NULL; \ 772 | if (_ha_head->hh_head != NULL) { \ 773 | _ha_head->hh_head->hh_prev = (addhh); \ 774 | } \ 775 | _ha_head->hh_head = (addhh); \ 776 | if ((_ha_head->count >= ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) \ 777 | && !(addhh)->tbl->noexpand) { \ 778 | HASH_EXPAND_BUCKETS(addhh,(addhh)->tbl, oomed); \ 779 | IF_HASH_NONFATAL_OOM( \ 780 | if (oomed) { \ 781 | HASH_DEL_IN_BKT(head,addhh); \ 782 | } \ 783 | ) \ 784 | } \ 785 | } while (0) 786 | 787 | /* remove an item from a given bucket */ 788 | #define HASH_DEL_IN_BKT(head,delhh) \ 789 | do { \ 790 | UT_hash_bucket *_hd_head = &(head); \ 791 | _hd_head->count--; \ 792 | if (_hd_head->hh_head == (delhh)) { \ 793 | _hd_head->hh_head = (delhh)->hh_next; \ 794 | } \ 795 | if ((delhh)->hh_prev) { \ 796 | (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ 797 | } \ 798 | if ((delhh)->hh_next) { \ 799 | (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ 800 | } \ 801 | } while (0) 802 | 803 | /* Bucket expansion has the effect of doubling the number of buckets 804 | * and redistributing the items into the new buckets. Ideally the 805 | * items will distribute more or less evenly into the new buckets 806 | * (the extent to which this is true is a measure of the quality of 807 | * the hash function as it applies to the key domain). 808 | * 809 | * With the items distributed into more buckets, the chain length 810 | * (item count) in each bucket is reduced. Thus by expanding buckets 811 | * the hash keeps a bound on the chain length. This bounded chain 812 | * length is the essence of how a hash provides constant time lookup. 813 | * 814 | * The calculation of tbl->ideal_chain_maxlen below deserves some 815 | * explanation. First, keep in mind that we're calculating the ideal 816 | * maximum chain length based on the *new* (doubled) bucket count. 817 | * In fractions this is just n/b (n=number of items,b=new num buckets). 818 | * Since the ideal chain length is an integer, we want to calculate 819 | * ceil(n/b). We don't depend on floating point arithmetic in this 820 | * hash, so to calculate ceil(n/b) with integers we could write 821 | * 822 | * ceil(n/b) = (n/b) + ((n%b)?1:0) 823 | * 824 | * and in fact a previous version of this hash did just that. 825 | * But now we have improved things a bit by recognizing that b is 826 | * always a power of two. We keep its base 2 log handy (call it lb), 827 | * so now we can write this with a bit shift and logical AND: 828 | * 829 | * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 830 | * 831 | */ 832 | #define HASH_EXPAND_BUCKETS(hh,tbl,oomed) \ 833 | do { \ 834 | unsigned _he_bkt; \ 835 | unsigned _he_bkt_i; \ 836 | struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 837 | UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 838 | _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 839 | sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ 840 | if (!_he_new_buckets) { \ 841 | HASH_RECORD_OOM(oomed); \ 842 | } else { \ 843 | uthash_bzero(_he_new_buckets, \ 844 | sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ 845 | (tbl)->ideal_chain_maxlen = \ 846 | ((tbl)->num_items >> ((tbl)->log2_num_buckets+1U)) + \ 847 | ((((tbl)->num_items & (((tbl)->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ 848 | (tbl)->nonideal_items = 0; \ 849 | for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) { \ 850 | _he_thh = (tbl)->buckets[ _he_bkt_i ].hh_head; \ 851 | while (_he_thh != NULL) { \ 852 | _he_hh_nxt = _he_thh->hh_next; \ 853 | HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, _he_bkt); \ 854 | _he_newbkt = &(_he_new_buckets[_he_bkt]); \ 855 | if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) { \ 856 | (tbl)->nonideal_items++; \ 857 | if (_he_newbkt->count > _he_newbkt->expand_mult * (tbl)->ideal_chain_maxlen) { \ 858 | _he_newbkt->expand_mult++; \ 859 | } \ 860 | } \ 861 | _he_thh->hh_prev = NULL; \ 862 | _he_thh->hh_next = _he_newbkt->hh_head; \ 863 | if (_he_newbkt->hh_head != NULL) { \ 864 | _he_newbkt->hh_head->hh_prev = _he_thh; \ 865 | } \ 866 | _he_newbkt->hh_head = _he_thh; \ 867 | _he_thh = _he_hh_nxt; \ 868 | } \ 869 | } \ 870 | uthash_free((tbl)->buckets, (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ 871 | (tbl)->num_buckets *= 2U; \ 872 | (tbl)->log2_num_buckets++; \ 873 | (tbl)->buckets = _he_new_buckets; \ 874 | (tbl)->ineff_expands = ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) ? \ 875 | ((tbl)->ineff_expands+1U) : 0U; \ 876 | if ((tbl)->ineff_expands > 1U) { \ 877 | (tbl)->noexpand = 1; \ 878 | uthash_noexpand_fyi(tbl); \ 879 | } \ 880 | uthash_expand_fyi(tbl); \ 881 | } \ 882 | } while (0) 883 | 884 | 885 | /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 886 | /* Note that HASH_SORT assumes the hash handle name to be hh. 887 | * HASH_SRT was added to allow the hash handle name to be passed in. */ 888 | #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 889 | #define HASH_SRT(hh,head,cmpfcn) \ 890 | do { \ 891 | unsigned _hs_i; \ 892 | unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 893 | struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 894 | if (head != NULL) { \ 895 | _hs_insize = 1; \ 896 | _hs_looping = 1; \ 897 | _hs_list = &((head)->hh); \ 898 | while (_hs_looping != 0U) { \ 899 | _hs_p = _hs_list; \ 900 | _hs_list = NULL; \ 901 | _hs_tail = NULL; \ 902 | _hs_nmerges = 0; \ 903 | while (_hs_p != NULL) { \ 904 | _hs_nmerges++; \ 905 | _hs_q = _hs_p; \ 906 | _hs_psize = 0; \ 907 | for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) { \ 908 | _hs_psize++; \ 909 | _hs_q = ((_hs_q->next != NULL) ? \ 910 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 911 | if (_hs_q == NULL) { \ 912 | break; \ 913 | } \ 914 | } \ 915 | _hs_qsize = _hs_insize; \ 916 | while ((_hs_psize != 0U) || ((_hs_qsize != 0U) && (_hs_q != NULL))) { \ 917 | if (_hs_psize == 0U) { \ 918 | _hs_e = _hs_q; \ 919 | _hs_q = ((_hs_q->next != NULL) ? \ 920 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 921 | _hs_qsize--; \ 922 | } else if ((_hs_qsize == 0U) || (_hs_q == NULL)) { \ 923 | _hs_e = _hs_p; \ 924 | if (_hs_p != NULL) { \ 925 | _hs_p = ((_hs_p->next != NULL) ? \ 926 | HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ 927 | } \ 928 | _hs_psize--; \ 929 | } else if ((cmpfcn( \ 930 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)), \ 931 | DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q)) \ 932 | )) <= 0) { \ 933 | _hs_e = _hs_p; \ 934 | if (_hs_p != NULL) { \ 935 | _hs_p = ((_hs_p->next != NULL) ? \ 936 | HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ 937 | } \ 938 | _hs_psize--; \ 939 | } else { \ 940 | _hs_e = _hs_q; \ 941 | _hs_q = ((_hs_q->next != NULL) ? \ 942 | HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ 943 | _hs_qsize--; \ 944 | } \ 945 | if ( _hs_tail != NULL ) { \ 946 | _hs_tail->next = ((_hs_e != NULL) ? \ 947 | ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL); \ 948 | } else { \ 949 | _hs_list = _hs_e; \ 950 | } \ 951 | if (_hs_e != NULL) { \ 952 | _hs_e->prev = ((_hs_tail != NULL) ? \ 953 | ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL); \ 954 | } \ 955 | _hs_tail = _hs_e; \ 956 | } \ 957 | _hs_p = _hs_q; \ 958 | } \ 959 | if (_hs_tail != NULL) { \ 960 | _hs_tail->next = NULL; \ 961 | } \ 962 | if (_hs_nmerges <= 1U) { \ 963 | _hs_looping = 0; \ 964 | (head)->hh.tbl->tail = _hs_tail; \ 965 | DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 966 | } \ 967 | _hs_insize *= 2U; \ 968 | } \ 969 | HASH_FSCK(hh, head, "HASH_SRT"); \ 970 | } \ 971 | } while (0) 972 | 973 | /* This function selects items from one hash into another hash. 974 | * The end result is that the selected items have dual presence 975 | * in both hashes. There is no copy of the items made; rather 976 | * they are added into the new hash through a secondary hash 977 | * hash handle that must be present in the structure. */ 978 | #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 979 | do { \ 980 | unsigned _src_bkt, _dst_bkt; \ 981 | void *_last_elt = NULL, *_elt; \ 982 | UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 983 | ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 984 | if ((src) != NULL) { \ 985 | for (_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 986 | for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 987 | _src_hh != NULL; \ 988 | _src_hh = _src_hh->hh_next) { \ 989 | _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 990 | if (cond(_elt)) { \ 991 | IF_HASH_NONFATAL_OOM( int _hs_oomed = 0; ) \ 992 | _dst_hh = (UT_hash_handle*)(void*)(((char*)_elt) + _dst_hho); \ 993 | _dst_hh->key = _src_hh->key; \ 994 | _dst_hh->keylen = _src_hh->keylen; \ 995 | _dst_hh->hashv = _src_hh->hashv; \ 996 | _dst_hh->prev = _last_elt; \ 997 | _dst_hh->next = NULL; \ 998 | if (_last_elt_hh != NULL) { \ 999 | _last_elt_hh->next = _elt; \ 1000 | } \ 1001 | if ((dst) == NULL) { \ 1002 | DECLTYPE_ASSIGN(dst, _elt); \ 1003 | HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ 1004 | IF_HASH_NONFATAL_OOM( \ 1005 | if (_hs_oomed) { \ 1006 | uthash_nonfatal_oom(_elt); \ 1007 | (dst) = NULL; \ 1008 | continue; \ 1009 | } \ 1010 | ) \ 1011 | } else { \ 1012 | _dst_hh->tbl = (dst)->hh_dst.tbl; \ 1013 | } \ 1014 | HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 1015 | HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], hh_dst, _dst_hh, _hs_oomed); \ 1016 | (dst)->hh_dst.tbl->num_items++; \ 1017 | IF_HASH_NONFATAL_OOM( \ 1018 | if (_hs_oomed) { \ 1019 | HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ 1020 | HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ 1021 | _dst_hh->tbl = NULL; \ 1022 | uthash_nonfatal_oom(_elt); \ 1023 | continue; \ 1024 | } \ 1025 | ) \ 1026 | HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ 1027 | _last_elt = _elt; \ 1028 | _last_elt_hh = _dst_hh; \ 1029 | } \ 1030 | } \ 1031 | } \ 1032 | } \ 1033 | HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ 1034 | } while (0) 1035 | 1036 | #define HASH_CLEAR(hh,head) \ 1037 | do { \ 1038 | if ((head) != NULL) { \ 1039 | HASH_BLOOM_FREE((head)->hh.tbl); \ 1040 | uthash_free((head)->hh.tbl->buckets, \ 1041 | (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 1042 | uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 1043 | (head) = NULL; \ 1044 | } \ 1045 | } while (0) 1046 | 1047 | #define HASH_OVERHEAD(hh,head) \ 1048 | (((head) != NULL) ? ( \ 1049 | (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ 1050 | ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ 1051 | sizeof(UT_hash_table) + \ 1052 | (HASH_BLOOM_BYTELEN))) : 0U) 1053 | 1054 | #ifdef NO_DECLTYPE 1055 | #define HASH_ITER(hh,head,el,tmp) \ 1056 | for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ 1057 | (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) 1058 | #else 1059 | #define HASH_ITER(hh,head,el,tmp) \ 1060 | for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ 1061 | (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) 1062 | #endif 1063 | 1064 | /* obtain a count of items in the hash */ 1065 | #define HASH_COUNT(head) HASH_CNT(hh,head) 1066 | #define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) 1067 | 1068 | typedef struct UT_hash_bucket { 1069 | struct UT_hash_handle *hh_head; 1070 | unsigned count; 1071 | 1072 | /* expand_mult is normally set to 0. In this situation, the max chain length 1073 | * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 1074 | * the bucket's chain exceeds this length, bucket expansion is triggered). 1075 | * However, setting expand_mult to a non-zero value delays bucket expansion 1076 | * (that would be triggered by additions to this particular bucket) 1077 | * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 1078 | * (The multiplier is simply expand_mult+1). The whole idea of this 1079 | * multiplier is to reduce bucket expansions, since they are expensive, in 1080 | * situations where we know that a particular bucket tends to be overused. 1081 | * It is better to let its chain length grow to a longer yet-still-bounded 1082 | * value, than to do an O(n) bucket expansion too often. 1083 | */ 1084 | unsigned expand_mult; 1085 | 1086 | } UT_hash_bucket; 1087 | 1088 | /* random signature used only to find hash tables in external analysis */ 1089 | #define HASH_SIGNATURE 0xa0111fe1u 1090 | #define HASH_BLOOM_SIGNATURE 0xb12220f2u 1091 | 1092 | typedef struct UT_hash_table { 1093 | UT_hash_bucket *buckets; 1094 | unsigned num_buckets, log2_num_buckets; 1095 | unsigned num_items; 1096 | struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 1097 | ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 1098 | 1099 | /* in an ideal situation (all buckets used equally), no bucket would have 1100 | * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 1101 | unsigned ideal_chain_maxlen; 1102 | 1103 | /* nonideal_items is the number of items in the hash whose chain position 1104 | * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 1105 | * hash distribution; reaching them in a chain traversal takes >ideal steps */ 1106 | unsigned nonideal_items; 1107 | 1108 | /* ineffective expands occur when a bucket doubling was performed, but 1109 | * afterward, more than half the items in the hash had nonideal chain 1110 | * positions. If this happens on two consecutive expansions we inhibit any 1111 | * further expansion, as it's not helping; this happens when the hash 1112 | * function isn't a good fit for the key domain. When expansion is inhibited 1113 | * the hash will still work, albeit no longer in constant time. */ 1114 | unsigned ineff_expands, noexpand; 1115 | 1116 | uint32_t signature; /* used only to find hash tables in external analysis */ 1117 | #ifdef HASH_BLOOM 1118 | uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 1119 | uint8_t *bloom_bv; 1120 | uint8_t bloom_nbits; 1121 | #endif 1122 | 1123 | } UT_hash_table; 1124 | 1125 | typedef struct UT_hash_handle { 1126 | struct UT_hash_table *tbl; 1127 | void *prev; /* prev element in app order */ 1128 | void *next; /* next element in app order */ 1129 | struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 1130 | struct UT_hash_handle *hh_next; /* next hh in bucket order */ 1131 | const void *key; /* ptr to enclosing struct's key */ 1132 | unsigned keylen; /* enclosing struct's key len */ 1133 | unsigned hashv; /* result of hash-fcn(key) */ 1134 | } UT_hash_handle; 1135 | 1136 | #endif /* UTHASH_H */ 1137 | --------------------------------------------------------------------------------