├── .gitignore ├── .travis.yml ├── Makefile ├── README.md ├── benchmarks.c ├── greatest.h ├── loom.c ├── loom.h ├── loom_internal.h └── test_loom.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.o 3 | test_loom 4 | benchmarks 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | compiler: 4 | - clang 5 | - gcc 6 | 7 | script: make ci 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJECT = loom 2 | OPTIMIZE = -O3 3 | WARN = -Wall -pedantic -Wextra 4 | CSTD += -std=c99 #-D_POSIX_C_SOURCE=1 -D_C99_SOURCE 5 | #CDEFS += -DNDEBUG 6 | CFLAGS += ${CSTD} -g ${WARN} ${CDEFS} ${CINCS} ${OPTIMIZE} 7 | LDFLAGS += -lpthread 8 | 9 | TEST_CFLAGS = ${CFLAGS} 10 | TEST_LDFLAGS += -L. -lloom ${LDFLAGS} 11 | BENCH_LDFLAGS += -L. -lloom ${LDFLAGS} 12 | 13 | all: test_${PROJECT} lib${PROJECT}.a benchmarks 14 | 15 | OBJS= loom.o 16 | 17 | TEST_OBJS= 18 | 19 | # Basic targets 20 | 21 | ${PROJECT}: main.o ${OBJS} 22 | ${CC} -o $@ main.o ${OBJS} ${LDFLAGS} 23 | 24 | lib${PROJECT}.a: ${OBJS} 25 | ar -rcs lib${PROJECT}.a ${OBJS} 26 | 27 | test_${PROJECT}: test_${PROJECT}.o ${TEST_OBJS} lib${PROJECT}.a 28 | ${CC} -o $@ test_${PROJECT}.o \ 29 | ${TEST_OBJS} ${TEST_CFLAGS} ${TEST_LDFLAGS} 30 | 31 | test: ./test_${PROJECT} 32 | ./test_${PROJECT} 33 | 34 | bench: benchmarks 35 | ./benchmarks -v -v 36 | 37 | benchmarks: benchmarks.o lib${PROJECT}.a 38 | ${CC} -o $@ $< ${BENCH_LDFLAGS} 39 | 40 | ci: test bench 41 | 42 | clean: 43 | rm -f ${PROJECT} test_${PROJECT} benchmarks *.o *.a *.core 44 | 45 | ${OBJS}: Makefile 46 | 47 | loom.o: loom.h loom_internal.h 48 | 49 | # Installation 50 | PREFIX ?= /usr/local 51 | INSTALL ?= install 52 | RM ?= rm 53 | 54 | install: 55 | ${INSTALL} -c lib${PROJECT}.a ${PREFIX}/lib/ 56 | ${INSTALL} -c ${PROJECT}.h ${PREFIX}/include/ 57 | 58 | uninstall: 59 | ${RM} -f ${PREFIX}/lib/lib${PROJECT}.a 60 | ${RM} -f ${PREFIX}/include/${PROJECT}.h 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # loom -- A lock-less thread pool for C99 2 | 3 | loom creates a task queue and pool of worker threads. Workers run tasks 4 | as they're scheduled, and otherwise sleep until work is available. 5 | 6 | Note: This is still very early in development. I have stress-tested it 7 | on multi-core x86 / x86-64 systems, and it should be safely lock-less 8 | there, but there may be race conditions on hardware platforms with 9 | different memory barriers. `__sync_bool_compare_and_swap` is used 10 | internally, so portability will be limited to platforms / compilers 11 | where that functionality is available. 12 | 13 | 14 | ## Build Status 15 | 16 | [![Build Status](https://travis-ci.org/silentbicycle/loom.png)](http://travis-ci.org/silentbicycle/loom) 17 | 18 | 19 | ## Key Features: 20 | 21 | - Lock-less: Lock contention overhead is avoided by using [atomic 22 | compare-and-swap][1] operations internally. 23 | 24 | - Allocation-free: Does no allocation after initialization. 25 | 26 | - Backpressure support: The backlog size is exposed, to allow 27 | proportional push-back against upstream code filling the queue. 28 | 29 | - The number of threads and size of the task queue can be tuned for 30 | specific applications. 31 | 32 | - ISC License: You can use it freely, even for commercial purposes. 33 | 34 | 35 | [1]: http://en.wikipedia.org/wiki/Compare-and-swap 36 | 37 | 38 | ## Getting Started 39 | 40 | First, initialize the thread pool: 41 | 42 | /* The default configuration. If a C99-style struct literal is used, 43 | * any omitted fields will be replaced with the defaults below. */ 44 | loom_config cfg = { 45 | // Number of threads to start upfront; more will start on demand. 46 | .init_threads = 0, 47 | 48 | // Max number of threads too run 49 | .max_threads = 8, 50 | 51 | // Max msec. idle threads should sleep, to avoid busywaiting. 52 | // They will be awakened when new tasks are added. 53 | .max_delay = 1000, 54 | 55 | // Base-2 log of the task queue size (e.g. 10 => 1024 tasks). 56 | // A larger size uses more memory, but allows more flexibility in 57 | // the backlog size before it fills up. 58 | .ring_sz2 = 8, 59 | }; 60 | struct loom *l = NULL; 61 | 62 | if (LOOM_INIT_RES_OK != loom_init(&cfg, &l)) { /* error... */ } 63 | 64 | 65 | Then, schedule tasks in it: 66 | 67 | loom_task task = { 68 | // Task callback: void task_cb(void *closure_environment) {} 69 | .task_cb = task_cb, 70 | 71 | // Cleanup callback: Called to free *env if task is canceled. 72 | .cleanup_cb = cleanup_cb, 73 | 74 | // void * to a data to pass to the callbacks. 75 | .env = (void *)closure_environment, 76 | }; 77 | 78 | int i = 0; 79 | for (i = 0; i < RETRY_COUNT; i++) { 80 | size_t backpressure = 0; 81 | /* Retry adding task, pushing back if the queue is 82 | * currently full and cannot schedule more tasks. */ 83 | if (loom_enqueue(l, &task, &backpressure)) { break; } 84 | do_pushback(backpressure); 85 | } 86 | if (i == RETRY_COUNT) { /* failed to enqueue -- queue full */ } 87 | 88 | 89 | Finally, notify the thread pool that the system is shutting down: 90 | 91 | while (!loom_shutdown(l)) { 92 | /* Do other stuff, giving threads a chance to shut down; 93 | * loom_shutdown will return true once they've halted. */ 94 | } 95 | 96 | loom_free(l); 97 | 98 | 99 | To get info about the threadpool as it runs, use: 100 | 101 | /* Get the size of the queue. */ 102 | size_t loom_queue_size(struct loom *l); 103 | 104 | /* Get statistics from the currently running thread pool. */ 105 | bool loom_get_stats(struct loom *l, loom_info *info); 106 | 107 | 108 | ## Implementation 109 | 110 | The threadpool is based on a ring buffer of task structs, and uses 111 | atomic compare-and-swap instructions to update offsets for cells that 112 | have been reserved for write, committed, requested for read, and 113 | released. Tasks are copied into the ring queue by value when written, 114 | and read into the worker thread's stack and released immediately to help 115 | keep the ring queue from filling up. Because a ring buffer is used, the 116 | offsets can wrap, reusing memory. 117 | 118 | To avoid a race condition on releasing writes or reads (by just 119 | incrementing the commit/done offsets), producer and consumer threads 120 | mark a write or read as complete by a mark field, and then atomically 121 | compare-and-swap the commit or done offset to advance past any that have 122 | been marked already. 123 | 124 | Worker threads attempt to request tasks from the queue, and if the queue 125 | is empty (the commit offset is the same as the read offset), they poll 126 | on an alert pipe for progressively longer periods of time (up to 127 | `max_delay`) to avoid busywaiting. If a new task is added, the client 128 | thread writes to their pipe, waking them up immediately. 129 | 130 | When `loom_shutdown` is called, the alert pipes are closed, causing the 131 | worker threads to switch to a mode where they cancel remaining tasks 132 | (using their cleanup callbacks, if given), then exit when the queue is 133 | empty. 134 | 135 | 136 | ## Future Development 137 | 138 | - Performance tuning 139 | 140 | - Optional thread shutdown (most likely via `pthread_cancel`) 141 | -------------------------------------------------------------------------------- /benchmarks.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "loom.h" 14 | 15 | typedef struct { 16 | size_t limit; 17 | size_t arg2; 18 | uint8_t ring_sz2; 19 | uint16_t max_delay; 20 | uint16_t init_threads; 21 | uint16_t max_threads; 22 | int verbosity; 23 | char *bench_name; 24 | } config; 25 | 26 | typedef void (benchmark_cb)(config *cfg, struct loom *l); 27 | static benchmark_cb bench_noop; 28 | static benchmark_cb bench_pi; 29 | static benchmark_cb bench_pi_with_delay; 30 | static benchmark_cb bench_wakeup; 31 | static benchmark_cb bench_multiprod; 32 | 33 | typedef struct { 34 | const char *name; 35 | const char *descr; 36 | benchmark_cb *cb; 37 | bool run_by_default; 38 | size_t def_limit; 39 | } benchmark; 40 | static benchmark benchmarks[] = { 41 | {"no-op", "enqueue no-op tasks to measure throughput", 42 | bench_noop, true, 1000 * 1000L}, 43 | {"pi", "calculate pi to arg2 (def. 1000) digits", 44 | bench_pi, true, 1000 * 1000L}, 45 | {"pi_delay", "calculate pi to arg2 (def. 1000) digits w/ sleep before", 46 | bench_pi_with_delay, false, 1000}, 47 | {"wakeup", "add tasks w/ delay to stress worker sleep/wakeup", 48 | bench_wakeup, true, 1000}, 49 | {"multiprod", "stress with multiple producers", 50 | bench_multiprod, true, 1000}, 51 | }; 52 | 53 | static void usage(void) { 54 | fprintf(stderr, 55 | "Benchmarks and stress tests for loom\n" 56 | "Usage: benchmarks [-h] [-b BENCHMARK_NAME] [-d MAX_DELAY] [-i INIT_THREADS]\n" 57 | " [-l LIMIT] [-r RING_SZ2] [-t MAX_THREADS] [-v]\n" 58 | "\n"); 59 | fprintf(stderr, 60 | "Benchmarks (run individually with '-b NAME'):\n"); 61 | for (size_t i = 0; i < sizeof(benchmarks)/sizeof(benchmarks[0]); i++) { 62 | benchmark *b = &benchmarks[i]; 63 | fprintf(stderr, " -- %-10s %s\n", b->name, b->descr); 64 | } 65 | exit(1); 66 | } 67 | 68 | static void parse_args(config *cfg, int argc, char **argv) { 69 | memset(cfg, 0, sizeof(*cfg)); 70 | 71 | int a = 0; 72 | while ((a = getopt(argc, argv, "ha:b:d:i:l:r:t:v")) != -1) { 73 | switch (a) { 74 | case 'h': /* help */ 75 | usage(); 76 | break; 77 | case 'a': /* arg2 */ 78 | cfg->arg2 = atol(optarg); 79 | break; 80 | case 'b': /* run benchmark by name */ 81 | cfg->bench_name = optarg; 82 | break; 83 | case 'd': /* max delay */ 84 | cfg->max_delay = atol(optarg); 85 | break; 86 | case 'i': /* init. threads */ 87 | cfg->init_threads = atoi(optarg); 88 | break; 89 | case 'l': /* limit */ 90 | cfg->limit = atol(optarg); 91 | break; 92 | case 'r': /* lg2(ring size) */ 93 | cfg->ring_sz2 = atoi(optarg); 94 | break; 95 | case 't': /* max threads */ 96 | cfg->max_threads = atoi(optarg); 97 | break; 98 | case 'v': /* verbosity */ 99 | cfg->verbosity++; 100 | break; 101 | case '?': /* unknown argument */ 102 | default: 103 | usage(); 104 | } 105 | } 106 | } 107 | 108 | int main(int argc, char **argv) { 109 | config cfg; 110 | parse_args(&cfg, argc, argv); 111 | 112 | struct timeval tv_pre; 113 | struct timeval tv_post; 114 | clock_t pre; 115 | clock_t post; 116 | 117 | bool l0 = cfg.limit == 0; 118 | 119 | for (size_t i = 0; i < sizeof(benchmarks)/sizeof(benchmarks[0]); i++) { 120 | benchmark *b = &benchmarks[i]; 121 | 122 | if (cfg.bench_name != NULL) { 123 | if (0 != strcmp(cfg.bench_name, b->name)) { continue; } 124 | } 125 | 126 | if (cfg.bench_name == NULL && !b->run_by_default) { 127 | continue; 128 | } 129 | 130 | loom_config lcfg = { 131 | .ring_sz2 = cfg.ring_sz2, 132 | .max_delay = cfg.max_delay, 133 | .init_threads = cfg.init_threads, 134 | .max_threads = cfg.max_threads, 135 | }; 136 | struct loom *l = NULL; 137 | if (cfg.verbosity > 1) { printf(" -- loom_init...\n"); } 138 | loom_init_res res = loom_init(&lcfg, &l); 139 | assert(res == LOOM_INIT_RES_OK); 140 | 141 | pre = clock(); 142 | assert(pre != (clock_t)-1); 143 | 144 | if (-1 == gettimeofday(&tv_pre, NULL)) { assert(false); } 145 | if (cfg.verbosity > 0) { printf(" -- running '%s'\n", b->name); } 146 | if (l0) { cfg.limit = b->def_limit; } 147 | b->cb(&cfg, l); 148 | 149 | post = clock(); 150 | if (-1 == gettimeofday(&tv_post, NULL)) { assert(false); } 151 | assert(post != (clock_t)-1); 152 | 153 | double tdelta = (tv_post.tv_sec - tv_pre.tv_sec) 154 | + 1e-06 * (tv_post.tv_usec - tv_pre.tv_usec); 155 | printf(" -- %-10s limit %zd -- wall %.3f clock %.3f => %.1f / sec\n", 156 | b->name, cfg.limit, tdelta, (post - pre) / (1.0 * CLOCKS_PER_SEC), 157 | cfg.limit / tdelta); 158 | 159 | const int MAX_SHUTDOWN_SECONDS = 1; 160 | if (cfg.verbosity > 1) { printf(" -- loom_shutdown...\n"); } 161 | int i = 0; 162 | for (i = 0; i < 10 * MAX_SHUTDOWN_SECONDS; i++) { 163 | if (loom_shutdown(l)) { break; } 164 | poll(NULL, 0, 100); 165 | if (i > 0 && cfg.verbosity > 1 && ((i % 10) == 0)) { 166 | printf(" -- loom_shutdown, %d msec have passed\n", i * 100); 167 | } 168 | } 169 | if (i == 10 * MAX_SHUTDOWN_SECONDS) { assert(false); } 170 | 171 | if (cfg.verbosity > 1) { printf(" -- loom_free...\n"); } 172 | loom_free(l); 173 | } 174 | return 0; 175 | } 176 | 177 | #define RETRIES 100 178 | 179 | static void noop_cb(void *env) { 180 | (void)env; 181 | } 182 | 183 | static void bench_noop(config *cfg, struct loom *l) { 184 | size_t backpressure = 0; 185 | struct timeval tv; 186 | time_t last_second = 0; 187 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 188 | last_second = tv.tv_sec; 189 | 190 | loom_info info; 191 | const size_t limit = cfg->limit; 192 | int shift = 6; 193 | for (size_t ts = 0; ts < limit; ts++) { 194 | loom_task t = { 195 | .task_cb = noop_cb, 196 | }; 197 | 198 | int i = 0; 199 | for (i = 0; i < RETRIES; i++) { 200 | if (loom_enqueue(l, &t, &backpressure)) { break; } 201 | int wait = backpressure >> shift; 202 | if (wait > 0) { 203 | poll(NULL, 0, wait); 204 | } 205 | } 206 | if (i == RETRIES) { assert(false); } 207 | 208 | if (cfg->verbosity > 0) { 209 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 210 | if (tv.tv_sec != last_second) { 211 | last_second = tv.tv_sec; 212 | if (!loom_get_stats(l, &info)) { assert(false); } 213 | printf("%ld: -- %zd enqueued, backlog %zd\n", 214 | last_second, ts, info.backlog_size); 215 | } 216 | } 217 | } 218 | 219 | do { 220 | if (!loom_get_stats(l, &info)) { assert(false); } 221 | poll(NULL, 0, 5); //info.backlog_size / 10); 222 | 223 | if (cfg->verbosity > 0) { 224 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 225 | if (tv.tv_sec != last_second) { 226 | last_second = tv.tv_sec; 227 | printf("%ld: -- %zd left\n", last_second, info.backlog_size); 228 | } 229 | } 230 | } while (info.backlog_size > 0); 231 | } 232 | 233 | typedef struct { 234 | int from; 235 | int to; 236 | int delay; 237 | } pi_env; 238 | 239 | static void calc_pi_cb(void *env) { 240 | pi_env *p = (pi_env *)env; 241 | if (p->delay >= 0) { poll(NULL, 0, p->delay); } 242 | double acc = 0; 243 | for (int i = p->from; i < p->to; i++) { 244 | acc += 4.0 * (1 - 2*(i & 0x01)) / (2*i + 1); 245 | } 246 | //printf("%d: %.20f\n", p->to, acc); 247 | (void)acc; 248 | } 249 | 250 | typedef int (bench_delay_cb)(int nth_test); 251 | 252 | /* This benchmarks is loosely based on "Benchmarking JVM Concurrency 253 | * Options for Java, Scala and Akka" by Michael Slinn. 254 | * http://www.infoq.com/articles/benchmarking-jvm */ 255 | static void pi_delay(config *cfg, struct loom *l, bench_delay_cb delay_cb) { 256 | size_t backpressure = 0; 257 | struct timeval tv; 258 | time_t last_second = 0; 259 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 260 | last_second = tv.tv_sec; 261 | 262 | size_t arg2 = cfg->arg2; 263 | if (arg2 == 0) { arg2 = 10000; } 264 | 265 | int shift = (cfg->ring_sz2 / 2); 266 | loom_info info; 267 | const size_t limit = cfg->limit; 268 | for (size_t ts = 0; ts < limit; ts++) { 269 | int delay = delay_cb(ts); 270 | pi_env penv = { 271 | .from = 0, 272 | .to = arg2, 273 | .delay = delay, 274 | }; 275 | loom_task t = { 276 | .task_cb = calc_pi_cb, 277 | .env = (void *)&penv, 278 | }; 279 | 280 | int i = 0; 281 | for (i = 0; i < RETRIES; i++) { 282 | if (loom_enqueue(l, &t, &backpressure)) { break; } 283 | int wait = backpressure >> shift; 284 | poll(NULL, 0, i < wait ? i : wait); 285 | } 286 | if (i == RETRIES) { assert(false); } 287 | 288 | if (cfg->verbosity > 0) { 289 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 290 | if (tv.tv_sec != last_second) { 291 | last_second = tv.tv_sec; 292 | if (!loom_get_stats(l, &info)) { assert(false); } 293 | printf("%ld: -- %zd enqueued, backlog %zd\n", 294 | last_second, ts, info.backlog_size); 295 | } 296 | } 297 | } 298 | 299 | do { 300 | if (!loom_get_stats(l, &info)) { assert(false); } 301 | poll(NULL, 0, (info.backlog_size >> 10) | 10); 302 | 303 | if (cfg->verbosity > 0) { 304 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 305 | if (tv.tv_sec != last_second) { 306 | last_second = tv.tv_sec; 307 | printf("%ld: -- %zd left\n", last_second, info.backlog_size); 308 | } 309 | } 310 | } while (info.backlog_size > 0); 311 | } 312 | 313 | static int no_delay_cb(int nth_test) { 314 | (void)nth_test; 315 | return 0; 316 | } 317 | 318 | static void bench_pi(config *cfg, struct loom *l) { 319 | pi_delay(cfg, l, no_delay_cb); 320 | } 321 | 322 | static int small_delay_cb(int nth_test) { 323 | const uint32_t LARGE_PRIME = ((1L << 31L) - 1); 324 | return ((1 << 5) - 1) & (nth_test * LARGE_PRIME); 325 | } 326 | 327 | static void bench_pi_with_delay(config *cfg, struct loom *l) { 328 | pi_delay(cfg, l, small_delay_cb); 329 | } 330 | 331 | static void block_sequentially_cb(void *env) { 332 | bool *do_next = (bool *)env; 333 | *do_next = true; 334 | } 335 | 336 | static void bench_wakeup(config *cfg, struct loom *l) { 337 | size_t backpressure = 0; 338 | struct timeval tv; 339 | time_t last_second = 0; 340 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 341 | last_second = tv.tv_sec; 342 | 343 | bool do_next = true; 344 | 345 | loom_info info; 346 | const size_t limit = cfg->limit; 347 | for (size_t ts = 0; ts < limit; ts++) { 348 | do_next = false; 349 | loom_task t = { 350 | .task_cb = block_sequentially_cb, 351 | .env = &do_next, 352 | }; 353 | 354 | int i = 0; 355 | for (i = 0; i < RETRIES; i++) { 356 | if (loom_enqueue(l, &t, &backpressure)) { break; } 357 | int wait = i; 358 | poll(NULL, 0, i < wait ? i : wait); 359 | } 360 | if (i == RETRIES) { assert(false); } 361 | 362 | while (!do_next) { 363 | /* Add a wait between scheduling each task, so worker 364 | * threads will be dormant and can exercise their wakeup code. */ 365 | poll(NULL, 0, 1); 366 | 367 | if (cfg->verbosity > 0) { 368 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 369 | if (tv.tv_sec != last_second) { 370 | last_second = tv.tv_sec; 371 | if (!loom_get_stats(l, &info)) { assert(false); } 372 | printf("%ld: -- %zd enqueued, backlog %zd\n", 373 | last_second, ts, info.backlog_size); 374 | } 375 | } 376 | } 377 | } 378 | 379 | do { 380 | if (!loom_get_stats(l, &info)) { assert(false); } 381 | poll(NULL, 0, (info.backlog_size >> 10) | 10); 382 | 383 | if (cfg->verbosity > 0) { 384 | if (0 != gettimeofday(&tv, NULL)) { assert(false); } 385 | if (tv.tv_sec != last_second) { 386 | last_second = tv.tv_sec; 387 | printf("%ld: -- %zd left\n", last_second, info.backlog_size); 388 | } 389 | } 390 | } while (info.backlog_size > 0); 391 | } 392 | 393 | typedef struct { 394 | pthread_t t; 395 | int limit; 396 | struct loom *l; 397 | } multiprod_env; 398 | 399 | static void *multiprod_thread_task(void *arg); 400 | 401 | /* Stress multiple producers as well as multiple consumers. */ 402 | static void bench_multiprod(config *cfg, struct loom *l) { 403 | size_t producers = cfg->arg2; 404 | if (producers == 0) { producers = 4; } 405 | 406 | multiprod_env prods[producers]; 407 | memset(prods, 0, producers * sizeof(prods[0])); 408 | 409 | for (size_t i = 0; i < producers; i++) { 410 | prods[i].limit = cfg->limit; 411 | prods[i].l = l; 412 | if (0 != pthread_create(&prods[i].t, NULL, 413 | multiprod_thread_task, (void *)&prods[i])) { 414 | assert(false); 415 | } 416 | } 417 | 418 | for (size_t i = 0; i < producers; i++) { 419 | void *out = NULL; 420 | if (0 != pthread_join(prods[i].t, &out)) { 421 | assert(false); 422 | } 423 | } 424 | } 425 | 426 | static void *multiprod_thread_task(void *arg) { 427 | multiprod_env *env = (multiprod_env *)arg; 428 | assert(arg); 429 | 430 | struct loom *l = env->l; 431 | int shift = 6; 432 | size_t backpressure = 0; 433 | 434 | for (int t = 0; t < env->limit; t++) { 435 | loom_task t = { 436 | .task_cb = noop_cb, 437 | }; 438 | 439 | int i = 0; 440 | for (i = 0; i < RETRIES; i++) { 441 | if (loom_enqueue(l, &t, &backpressure)) { break; } 442 | int wait = backpressure >> shift; 443 | if (wait > 0) { 444 | poll(NULL, 0, wait); 445 | } 446 | } 447 | if (i == RETRIES) { assert(false); } 448 | } 449 | return NULL; 450 | } 451 | -------------------------------------------------------------------------------- /greatest.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2014 Scott Vokes 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #ifndef GREATEST_H 18 | #define GREATEST_H 19 | 20 | #define GREATEST_VERSION_MAJOR 0 21 | #define GREATEST_VERSION_MINOR 10 22 | #define GREATEST_VERSION_PATCH 1 23 | 24 | /* A unit testing system for C, contained in 1 file. 25 | * It doesn't use dynamic allocation or depend on anything 26 | * beyond ANSI C89. */ 27 | 28 | 29 | /********************************************************************* 30 | * Minimal test runner template 31 | *********************************************************************/ 32 | #if 0 33 | 34 | #include "greatest.h" 35 | 36 | TEST foo_should_foo() { 37 | PASS(); 38 | } 39 | 40 | static void setup_cb(void *data) { 41 | printf("setup callback for each test case\n"); 42 | } 43 | 44 | static void teardown_cb(void *data) { 45 | printf("teardown callback for each test case\n"); 46 | } 47 | 48 | SUITE(suite) { 49 | /* Optional setup/teardown callbacks which will be run before/after 50 | * every test case in the suite. 51 | * Cleared when the suite finishes. */ 52 | SET_SETUP(setup_cb, voidp_to_callback_data); 53 | SET_TEARDOWN(teardown_cb, voidp_to_callback_data); 54 | 55 | RUN_TEST(foo_should_foo); 56 | } 57 | 58 | /* Add all the definitions that need to be in the test runner's main file. */ 59 | GREATEST_MAIN_DEFS(); 60 | 61 | int main(int argc, char **argv) { 62 | GREATEST_MAIN_BEGIN(); /* command-line arguments, initialization. */ 63 | RUN_SUITE(suite); 64 | GREATEST_MAIN_END(); /* display results */ 65 | } 66 | 67 | #endif 68 | /*********************************************************************/ 69 | 70 | 71 | #include 72 | #include 73 | #include 74 | #include 75 | 76 | 77 | /*********** 78 | * Options * 79 | ***********/ 80 | 81 | /* Default column width for non-verbose output. */ 82 | #ifndef GREATEST_DEFAULT_WIDTH 83 | #define GREATEST_DEFAULT_WIDTH 72 84 | #endif 85 | 86 | /* FILE *, for test logging. */ 87 | #ifndef GREATEST_STDOUT 88 | #define GREATEST_STDOUT stdout 89 | #endif 90 | 91 | /* Remove GREATEST_ prefix from most commonly used symbols? */ 92 | #ifndef GREATEST_USE_ABBREVS 93 | #define GREATEST_USE_ABBREVS 1 94 | #endif 95 | 96 | 97 | /********* 98 | * Types * 99 | *********/ 100 | 101 | /* Info for the current running suite. */ 102 | typedef struct greatest_suite_info { 103 | unsigned int tests_run; 104 | unsigned int passed; 105 | unsigned int failed; 106 | unsigned int skipped; 107 | 108 | /* timers, pre/post running suite and individual tests */ 109 | clock_t pre_suite; 110 | clock_t post_suite; 111 | clock_t pre_test; 112 | clock_t post_test; 113 | } greatest_suite_info; 114 | 115 | /* Type for a suite function. */ 116 | typedef void (greatest_suite_cb)(void); 117 | 118 | /* Types for setup/teardown callbacks. If non-NULL, these will be run 119 | * and passed the pointer to their additional data. */ 120 | typedef void (greatest_setup_cb)(void *udata); 121 | typedef void (greatest_teardown_cb)(void *udata); 122 | 123 | /* Type for an equality comparison between two pointers of the same type. 124 | * Should return non-0 if equal, otherwise 0. 125 | * UDATA is a closure value, passed through from ASSERT_EQUAL_T[m]. */ 126 | typedef int greatest_equal_cb(const void *exp, const void *got, void *udata); 127 | 128 | /* Type for a callback that prints a value pointed to by T. 129 | * Return value has the same meaning as printf's. 130 | * UDATA is a closure value, passed through from ASSERT_EQUAL_T[m]. */ 131 | typedef int greatest_printf_cb(const void *t, void *udata); 132 | 133 | /* Callbacks for an arbitrary type; needed for type-specific 134 | * comparisons via GREATEST_ASSERT_EQUAL_T[m].*/ 135 | typedef struct greatest_type_info { 136 | greatest_equal_cb *equal; 137 | greatest_printf_cb *print; 138 | } greatest_type_info; 139 | 140 | /* Callbacks for string type. */ 141 | extern greatest_type_info greatest_type_info_string; 142 | extern greatest_type_info greatest_type_info_fmt; 143 | 144 | typedef enum { 145 | GREATEST_FLAG_VERBOSE = 0x01, 146 | GREATEST_FLAG_FIRST_FAIL = 0x02, 147 | GREATEST_FLAG_LIST_ONLY = 0x04 148 | } GREATEST_FLAG; 149 | 150 | /* Struct containing all test runner state. */ 151 | typedef struct greatest_run_info { 152 | unsigned int flags; 153 | unsigned int tests_run; /* total test count */ 154 | 155 | /* overall pass/fail/skip counts */ 156 | unsigned int passed; 157 | unsigned int failed; 158 | unsigned int skipped; 159 | unsigned int assertions; 160 | 161 | /* currently running test suite */ 162 | greatest_suite_info suite; 163 | 164 | /* info to print about the most recent failure */ 165 | const char *fail_file; 166 | unsigned int fail_line; 167 | const char *msg; 168 | 169 | /* current setup/teardown hooks and userdata */ 170 | greatest_setup_cb *setup; 171 | void *setup_udata; 172 | greatest_teardown_cb *teardown; 173 | void *teardown_udata; 174 | 175 | /* formatting info for ".....s...F"-style output */ 176 | unsigned int col; 177 | unsigned int width; 178 | 179 | /* only run a specific suite or test */ 180 | char *suite_filter; 181 | char *test_filter; 182 | 183 | /* overall timers */ 184 | clock_t begin; 185 | clock_t end; 186 | } greatest_run_info; 187 | 188 | /* Global var for the current testing context. 189 | * Initialized by GREATEST_MAIN_DEFS(). */ 190 | extern greatest_run_info greatest_info; 191 | 192 | 193 | /********************** 194 | * Exported functions * 195 | **********************/ 196 | 197 | /* These are used internally by greatest. */ 198 | void greatest_do_pass(const char *name); 199 | void greatest_do_fail(const char *name); 200 | void greatest_do_skip(const char *name); 201 | int greatest_pre_test(const char *name); 202 | void greatest_post_test(const char *name, int res); 203 | void greatest_usage(const char *name); 204 | int greatest_do_assert_equal_t(const void *exp, const void *got, 205 | greatest_type_info *type_info, void *udata); 206 | 207 | /* These are part of the public greatest API. */ 208 | void GREATEST_SET_SETUP_CB(greatest_setup_cb *cb, void *udata); 209 | void GREATEST_SET_TEARDOWN_CB(greatest_teardown_cb *cb, void *udata); 210 | 211 | 212 | /********** 213 | * Macros * 214 | **********/ 215 | 216 | /* Define a suite. */ 217 | #define GREATEST_SUITE(NAME) void NAME(void) 218 | 219 | /* Start defining a test function. 220 | * The arguments are not included, to allow parametric testing. */ 221 | #define GREATEST_TEST static int 222 | 223 | /* Run a suite. */ 224 | #define GREATEST_RUN_SUITE(S_NAME) greatest_run_suite(S_NAME, #S_NAME) 225 | 226 | /* Run a test in the current suite. */ 227 | #define GREATEST_RUN_TEST(TEST) \ 228 | do { \ 229 | if (greatest_pre_test(#TEST) == 1) { \ 230 | int res = TEST(); \ 231 | greatest_post_test(#TEST, res); \ 232 | } else if (GREATEST_LIST_ONLY()) { \ 233 | fprintf(GREATEST_STDOUT, " %s\n", #TEST); \ 234 | } \ 235 | } while (0) 236 | 237 | /* Run a test in the current suite with one void* argument, 238 | * which can be a pointer to a struct with multiple arguments. */ 239 | #define GREATEST_RUN_TEST1(TEST, ENV) \ 240 | do { \ 241 | if (greatest_pre_test(#TEST) == 1) { \ 242 | int res = TEST(ENV); \ 243 | greatest_post_test(#TEST, res); \ 244 | } else if (GREATEST_LIST_ONLY()) { \ 245 | fprintf(GREATEST_STDOUT, " %s\n", #TEST); \ 246 | } \ 247 | } while (0) 248 | 249 | /* If __VA_ARGS__ (C99) is supported, allow parametric testing 250 | * without needing to manually manage the argument struct. */ 251 | #if __STDC_VERSION__ >= 19901L 252 | #define GREATEST_RUN_TESTp(TEST, ...) \ 253 | do { \ 254 | if (greatest_pre_test(#TEST) == 1) { \ 255 | int res = TEST(__VA_ARGS__); \ 256 | greatest_post_test(#TEST, res); \ 257 | } else if (GREATEST_LIST_ONLY()) { \ 258 | fprintf(GREATEST_STDOUT, " %s\n", #TEST); \ 259 | } \ 260 | } while (0) 261 | #endif 262 | 263 | 264 | /* Check if the test runner is in verbose mode. */ 265 | #define GREATEST_IS_VERBOSE() (greatest_info.flags & GREATEST_FLAG_VERBOSE) 266 | #define GREATEST_LIST_ONLY() (greatest_info.flags & GREATEST_FLAG_LIST_ONLY) 267 | #define GREATEST_FIRST_FAIL() (greatest_info.flags & GREATEST_FLAG_FIRST_FAIL) 268 | #define GREATEST_FAILURE_ABORT() (greatest_info.suite.failed > 0 && GREATEST_FIRST_FAIL()) 269 | 270 | /* Message-less forms of tests defined below. */ 271 | #define GREATEST_PASS() GREATEST_PASSm(NULL) 272 | #define GREATEST_FAIL() GREATEST_FAILm(NULL) 273 | #define GREATEST_SKIP() GREATEST_SKIPm(NULL) 274 | #define GREATEST_ASSERT(COND) GREATEST_ASSERTm(#COND, COND) 275 | #define GREATEST_ASSERT_FALSE(COND) GREATEST_ASSERT_FALSEm(#COND, COND) 276 | #define GREATEST_ASSERT_EQ(EXP, GOT) GREATEST_ASSERT_EQm(#EXP " != " #GOT, EXP, GOT) 277 | #define GREATEST_ASSERT_EQUAL_T(EXP, GOT, TYPE_INFO, UDATA) \ 278 | GREATEST_ASSERT_EQUAL_Tm(#EXP " != " #GOT, EXP, GOT, TYPE_INFO, UDATA) 279 | #define GREATEST_ASSERT_STR_EQ(EXP, GOT) GREATEST_ASSERT_STR_EQm(#EXP " != " #GOT, EXP, GOT) 280 | 281 | /* The following forms take an additional message argument first, 282 | * to be displayed by the test runner. */ 283 | 284 | /* Fail if a condition is not true, with message. */ 285 | #define GREATEST_ASSERTm(MSG, COND) \ 286 | do { \ 287 | greatest_info.assertions++; \ 288 | if (!(COND)) { FAILm(MSG); } \ 289 | } while (0) 290 | 291 | /* Fail if a condition is not false, with message. */ 292 | #define GREATEST_ASSERT_FALSEm(MSG, COND) \ 293 | do { \ 294 | greatest_info.assertions++; \ 295 | if ((COND)) { FAILm(MSG); } \ 296 | } while (0) 297 | 298 | /* Fail if EXP != GOT (equality comparison by ==). */ 299 | #define GREATEST_ASSERT_EQm(MSG, EXP, GOT) \ 300 | do { \ 301 | greatest_info.assertions++; \ 302 | if ((EXP) != (GOT)) { FAILm(MSG); } \ 303 | } while (0) 304 | 305 | /* Fail if EXP is not equal to GOT, according to strcmp. */ 306 | #define GREATEST_ASSERT_STR_EQm(MSG, EXP, GOT) \ 307 | do { \ 308 | GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT, \ 309 | &greatest_type_info_string, NULL); \ 310 | } while (0) \ 311 | 312 | #define GREATEST_ASSERT_EQ_FMT(FMT, EXP, GOT) \ 313 | do { \ 314 | const char _fmt[] = FMT; \ 315 | GREATEST_ASSERT_EQUAL_Tm("", (void *)EXP, (void *)GOT, \ 316 | &greatest_type_info_fmt, (void *)_fmt); \ 317 | } while (0) \ 318 | 319 | /* Fail if EXP is not equal to GOT, according to a comparison 320 | * callback in TYPE_INFO. If they are not equal, optionally use a 321 | * print callback in TYPE_INFO to print them. */ 322 | #define GREATEST_ASSERT_EQUAL_Tm(MSG, EXP, GOT, TYPE_INFO, UDATA) \ 323 | do { \ 324 | greatest_type_info *type_info = (TYPE_INFO); \ 325 | greatest_info.assertions++; \ 326 | if (!greatest_do_assert_equal_t(EXP, GOT, \ 327 | type_info, UDATA)) { \ 328 | if (type_info == NULL || type_info->equal == NULL) { \ 329 | FAILm("type_info->equal callback missing!"); \ 330 | } else { \ 331 | FAILm(MSG); \ 332 | } \ 333 | } \ 334 | } while (0) \ 335 | 336 | /* Pass. */ 337 | #define GREATEST_PASSm(MSG) \ 338 | do { \ 339 | greatest_info.msg = MSG; \ 340 | return 0; \ 341 | } while (0) 342 | 343 | /* Fail. */ 344 | #define GREATEST_FAILm(MSG) \ 345 | do { \ 346 | greatest_info.fail_file = __FILE__; \ 347 | greatest_info.fail_line = __LINE__; \ 348 | greatest_info.msg = MSG; \ 349 | return -1; \ 350 | } while (0) 351 | 352 | /* Skip the current test. */ 353 | #define GREATEST_SKIPm(MSG) \ 354 | do { \ 355 | greatest_info.msg = MSG; \ 356 | return 1; \ 357 | } while (0) 358 | 359 | #define GREATEST_SET_TIME(NAME) \ 360 | NAME = clock(); \ 361 | if (NAME == (clock_t) -1) { \ 362 | fprintf(GREATEST_STDOUT, \ 363 | "clock error: %s\n", #NAME); \ 364 | exit(EXIT_FAILURE); \ 365 | } 366 | 367 | #define GREATEST_CLOCK_DIFF(C1, C2) \ 368 | fprintf(GREATEST_STDOUT, " (%lu ticks, %.3f sec)", \ 369 | (long unsigned int) (C2) - (long unsigned int)(C1), \ 370 | (double)((C2) - (C1)) / (1.0 * (double)CLOCKS_PER_SEC)) \ 371 | 372 | /* Include several function definitions in the main test file. */ 373 | #define GREATEST_MAIN_DEFS() \ 374 | \ 375 | /* Is FILTER a subset of NAME? */ \ 376 | static int greatest_name_match(const char *name, \ 377 | const char *filter) { \ 378 | size_t offset = 0; \ 379 | size_t filter_len = strlen(filter); \ 380 | while (name[offset] != '\0') { \ 381 | if (name[offset] == filter[0]) { \ 382 | if (0 == strncmp(&name[offset], filter, filter_len)) { \ 383 | return 1; \ 384 | } \ 385 | } \ 386 | offset++; \ 387 | } \ 388 | \ 389 | return 0; \ 390 | } \ 391 | \ 392 | int greatest_pre_test(const char *name) { \ 393 | if (!GREATEST_LIST_ONLY() \ 394 | && (!GREATEST_FIRST_FAIL() || greatest_info.suite.failed == 0) \ 395 | && (greatest_info.test_filter == NULL || \ 396 | greatest_name_match(name, greatest_info.test_filter))) { \ 397 | GREATEST_SET_TIME(greatest_info.suite.pre_test); \ 398 | if (greatest_info.setup) { \ 399 | greatest_info.setup(greatest_info.setup_udata); \ 400 | } \ 401 | return 1; /* test should be run */ \ 402 | } else { \ 403 | return 0; /* skipped */ \ 404 | } \ 405 | } \ 406 | \ 407 | void greatest_post_test(const char *name, int res) { \ 408 | GREATEST_SET_TIME(greatest_info.suite.post_test); \ 409 | if (greatest_info.teardown) { \ 410 | void *udata = greatest_info.teardown_udata; \ 411 | greatest_info.teardown(udata); \ 412 | } \ 413 | \ 414 | if (res < 0) { \ 415 | greatest_do_fail(name); \ 416 | } else if (res > 0) { \ 417 | greatest_do_skip(name); \ 418 | } else if (res == 0) { \ 419 | greatest_do_pass(name); \ 420 | } \ 421 | greatest_info.suite.tests_run++; \ 422 | greatest_info.col++; \ 423 | if (GREATEST_IS_VERBOSE()) { \ 424 | GREATEST_CLOCK_DIFF(greatest_info.suite.pre_test, \ 425 | greatest_info.suite.post_test); \ 426 | fprintf(GREATEST_STDOUT, "\n"); \ 427 | } else if (greatest_info.col % greatest_info.width == 0) { \ 428 | fprintf(GREATEST_STDOUT, "\n"); \ 429 | greatest_info.col = 0; \ 430 | } \ 431 | if (GREATEST_STDOUT == stdout) fflush(stdout); \ 432 | } \ 433 | \ 434 | static void greatest_run_suite(greatest_suite_cb *suite_cb, \ 435 | const char *suite_name) { \ 436 | if (greatest_info.suite_filter && \ 437 | !greatest_name_match(suite_name, greatest_info.suite_filter)) { \ 438 | return; \ 439 | } \ 440 | if (GREATEST_FIRST_FAIL() && greatest_info.failed > 0) { return; } \ 441 | memset(&greatest_info.suite, 0, sizeof(greatest_info.suite)); \ 442 | greatest_info.col = 0; \ 443 | fprintf(GREATEST_STDOUT, "\n* Suite %s:\n", suite_name); \ 444 | GREATEST_SET_TIME(greatest_info.suite.pre_suite); \ 445 | suite_cb(); \ 446 | GREATEST_SET_TIME(greatest_info.suite.post_suite); \ 447 | if (greatest_info.suite.tests_run > 0) { \ 448 | fprintf(GREATEST_STDOUT, \ 449 | "\n%u tests - %u pass, %u fail, %u skipped", \ 450 | greatest_info.suite.tests_run, \ 451 | greatest_info.suite.passed, \ 452 | greatest_info.suite.failed, \ 453 | greatest_info.suite.skipped); \ 454 | GREATEST_CLOCK_DIFF(greatest_info.suite.pre_suite, \ 455 | greatest_info.suite.post_suite); \ 456 | fprintf(GREATEST_STDOUT, "\n"); \ 457 | } \ 458 | greatest_info.setup = NULL; \ 459 | greatest_info.setup_udata = NULL; \ 460 | greatest_info.teardown = NULL; \ 461 | greatest_info.teardown_udata = NULL; \ 462 | greatest_info.passed += greatest_info.suite.passed; \ 463 | greatest_info.failed += greatest_info.suite.failed; \ 464 | greatest_info.skipped += greatest_info.suite.skipped; \ 465 | greatest_info.tests_run += greatest_info.suite.tests_run; \ 466 | } \ 467 | \ 468 | void greatest_do_pass(const char *name) { \ 469 | if (GREATEST_IS_VERBOSE()) { \ 470 | fprintf(GREATEST_STDOUT, "PASS %s: %s", \ 471 | name, greatest_info.msg ? greatest_info.msg : ""); \ 472 | } else { \ 473 | fprintf(GREATEST_STDOUT, "."); \ 474 | } \ 475 | greatest_info.suite.passed++; \ 476 | } \ 477 | \ 478 | void greatest_do_fail(const char *name) { \ 479 | if (GREATEST_IS_VERBOSE()) { \ 480 | fprintf(GREATEST_STDOUT, \ 481 | "FAIL %s: %s (%s:%u)", \ 482 | name, greatest_info.msg ? greatest_info.msg : "", \ 483 | greatest_info.fail_file, greatest_info.fail_line); \ 484 | } else { \ 485 | fprintf(GREATEST_STDOUT, "F"); \ 486 | greatest_info.col++; \ 487 | /* add linebreak if in line of '.'s */ \ 488 | if (greatest_info.col != 0) { \ 489 | fprintf(GREATEST_STDOUT, "\n"); \ 490 | greatest_info.col = 0; \ 491 | } \ 492 | fprintf(GREATEST_STDOUT, "FAIL %s: %s (%s:%u)\n", \ 493 | name, \ 494 | greatest_info.msg ? greatest_info.msg : "", \ 495 | greatest_info.fail_file, greatest_info.fail_line); \ 496 | } \ 497 | greatest_info.suite.failed++; \ 498 | } \ 499 | \ 500 | void greatest_do_skip(const char *name) { \ 501 | if (GREATEST_IS_VERBOSE()) { \ 502 | fprintf(GREATEST_STDOUT, "SKIP %s: %s", \ 503 | name, \ 504 | greatest_info.msg ? \ 505 | greatest_info.msg : "" ); \ 506 | } else { \ 507 | fprintf(GREATEST_STDOUT, "s"); \ 508 | } \ 509 | greatest_info.suite.skipped++; \ 510 | } \ 511 | \ 512 | int greatest_do_assert_equal_t(const void *exp, const void *got, \ 513 | greatest_type_info *type_info, void *udata) { \ 514 | if (type_info == NULL || type_info->equal == NULL) { \ 515 | return 0; \ 516 | } \ 517 | int eq = type_info->equal(exp, got, udata); \ 518 | if (!eq) { \ 519 | if (type_info->print != NULL) { \ 520 | fprintf(GREATEST_STDOUT, "Expected: "); \ 521 | (void)type_info->print(exp, udata); \ 522 | fprintf(GREATEST_STDOUT, "\nGot: "); \ 523 | (void)type_info->print(got, udata); \ 524 | fprintf(GREATEST_STDOUT, "\n"); \ 525 | } else { \ 526 | fprintf(GREATEST_STDOUT, \ 527 | "GREATEST_ASSERT_EQUAL_T failure at %s:%dn", \ 528 | greatest_info.fail_file, \ 529 | greatest_info.fail_line); \ 530 | } \ 531 | } \ 532 | return eq; \ 533 | } \ 534 | \ 535 | void greatest_usage(const char *name) { \ 536 | fprintf(GREATEST_STDOUT, \ 537 | "Usage: %s [-hlfv] [-s SUITE] [-t TEST]\n" \ 538 | " -h print this Help\n" \ 539 | " -l List suites and their tests, then exit\n" \ 540 | " -f Stop runner after first failure\n" \ 541 | " -v Verbose output\n" \ 542 | " -s SUITE only run suite named SUITE\n" \ 543 | " -t TEST only run test named TEST\n", \ 544 | name); \ 545 | } \ 546 | \ 547 | void GREATEST_SET_SETUP_CB(greatest_setup_cb *cb, void *udata) { \ 548 | greatest_info.setup = cb; \ 549 | greatest_info.setup_udata = udata; \ 550 | } \ 551 | \ 552 | void GREATEST_SET_TEARDOWN_CB(greatest_teardown_cb *cb, \ 553 | void *udata) { \ 554 | greatest_info.teardown = cb; \ 555 | greatest_info.teardown_udata = udata; \ 556 | } \ 557 | \ 558 | static int greatest_string_equal_cb(const void *exp, const void *got, \ 559 | void *udata) { \ 560 | (void)udata; \ 561 | return (0 == strcmp((const char *)exp, (const char *)got)); \ 562 | } \ 563 | \ 564 | static int greatest_string_printf_cb(const void *t, void *udata) { \ 565 | (void)udata; \ 566 | return fprintf(GREATEST_STDOUT, "%s", (const char *)t); \ 567 | } \ 568 | \ 569 | greatest_type_info greatest_type_info_string = { \ 570 | greatest_string_equal_cb, \ 571 | greatest_string_printf_cb, \ 572 | }; \ 573 | \ 574 | static int greatest_fmt_equal_cb(const void *exp, const void *got, \ 575 | void *udata) { \ 576 | (void)udata; \ 577 | long long lle = (long long )exp; \ 578 | long long llg = (long long )got; \ 579 | return lle < llg ? -1 : lle > llg ? 1 : 0; \ 580 | } \ 581 | \ 582 | static int greatest_fmt_printf_cb(const void *t, void *udata) { \ 583 | const char *fmt = (const char *)udata; \ 584 | long long v = (long long)t; \ 585 | return fprintf(GREATEST_STDOUT, fmt, v); \ 586 | } \ 587 | \ 588 | greatest_type_info greatest_type_info_fmt = { \ 589 | greatest_fmt_equal_cb, \ 590 | greatest_fmt_printf_cb, \ 591 | }; \ 592 | \ 593 | greatest_run_info greatest_info 594 | 595 | /* Handle command-line arguments, etc. */ 596 | #define GREATEST_MAIN_BEGIN() \ 597 | do { \ 598 | int i = 0; \ 599 | memset(&greatest_info, 0, sizeof(greatest_info)); \ 600 | greatest_info.width = GREATEST_DEFAULT_WIDTH; \ 601 | for (i = 1; i < argc; i++) { \ 602 | if (0 == strcmp("-t", argv[i])) { \ 603 | if (argc <= i + 1) { \ 604 | greatest_usage(argv[0]); \ 605 | exit(EXIT_FAILURE); \ 606 | } \ 607 | greatest_info.test_filter = argv[i+1]; \ 608 | i++; \ 609 | } else if (0 == strcmp("-s", argv[i])) { \ 610 | if (argc <= i + 1) { \ 611 | greatest_usage(argv[0]); \ 612 | exit(EXIT_FAILURE); \ 613 | } \ 614 | greatest_info.suite_filter = argv[i+1]; \ 615 | i++; \ 616 | } else if (0 == strcmp("-f", argv[i])) { \ 617 | greatest_info.flags |= GREATEST_FLAG_FIRST_FAIL; \ 618 | } else if (0 == strcmp("-v", argv[i])) { \ 619 | greatest_info.flags |= GREATEST_FLAG_VERBOSE; \ 620 | } else if (0 == strcmp("-l", argv[i])) { \ 621 | greatest_info.flags |= GREATEST_FLAG_LIST_ONLY; \ 622 | } else if (0 == strcmp("-h", argv[i])) { \ 623 | greatest_usage(argv[0]); \ 624 | exit(EXIT_SUCCESS); \ 625 | } else { \ 626 | fprintf(GREATEST_STDOUT, \ 627 | "Unknown argument '%s'\n", argv[i]); \ 628 | greatest_usage(argv[0]); \ 629 | exit(EXIT_FAILURE); \ 630 | } \ 631 | } \ 632 | } while (0); \ 633 | GREATEST_SET_TIME(greatest_info.begin) 634 | 635 | /* Report passes, failures, skipped tests, the number of 636 | * assertions, and the overall run time. */ 637 | #define GREATEST_MAIN_END() \ 638 | do { \ 639 | if (!GREATEST_LIST_ONLY()) { \ 640 | GREATEST_SET_TIME(greatest_info.end); \ 641 | fprintf(GREATEST_STDOUT, \ 642 | "\nTotal: %u tests", greatest_info.tests_run); \ 643 | GREATEST_CLOCK_DIFF(greatest_info.begin, \ 644 | greatest_info.end); \ 645 | fprintf(GREATEST_STDOUT, ", %u assertions\n", \ 646 | greatest_info.assertions); \ 647 | fprintf(GREATEST_STDOUT, \ 648 | "Pass: %u, fail: %u, skip: %u.\n", \ 649 | greatest_info.passed, \ 650 | greatest_info.failed, greatest_info.skipped); \ 651 | } \ 652 | return (greatest_info.failed > 0 \ 653 | ? EXIT_FAILURE : EXIT_SUCCESS); \ 654 | } while (0) 655 | 656 | /* Make abbreviations without the GREATEST_ prefix for the 657 | * most commonly used symbols. */ 658 | #if GREATEST_USE_ABBREVS 659 | #define TEST GREATEST_TEST 660 | #define SUITE GREATEST_SUITE 661 | #define RUN_TEST GREATEST_RUN_TEST 662 | #define RUN_TEST1 GREATEST_RUN_TEST1 663 | #define RUN_SUITE GREATEST_RUN_SUITE 664 | #define ASSERT GREATEST_ASSERT 665 | #define ASSERTm GREATEST_ASSERTm 666 | #define ASSERT_FALSE GREATEST_ASSERT_FALSE 667 | #define ASSERT_EQ GREATEST_ASSERT_EQ 668 | #define ASSERT_EQUAL_T GREATEST_ASSERT_EQUAL_T 669 | #define ASSERT_STR_EQ GREATEST_ASSERT_STR_EQ 670 | #define ASSERT_FALSEm GREATEST_ASSERT_FALSEm 671 | #define ASSERT_EQm GREATEST_ASSERT_EQm 672 | #define ASSERT_EQUAL_Tm GREATEST_ASSERT_EQUAL_Tm 673 | #define ASSERT_EQ_FMT GREATEST_ASSERT_EQ_FMT 674 | #define ASSERT_STR_EQm GREATEST_ASSERT_STR_EQm 675 | #define PASS GREATEST_PASS 676 | #define FAIL GREATEST_FAIL 677 | #define SKIP GREATEST_SKIP 678 | #define PASSm GREATEST_PASSm 679 | #define FAILm GREATEST_FAILm 680 | #define SKIPm GREATEST_SKIPm 681 | #define SET_SETUP GREATEST_SET_SETUP_CB 682 | #define SET_TEARDOWN GREATEST_SET_TEARDOWN_CB 683 | 684 | #if __STDC_VERSION__ >= 19901L 685 | #endif /* C99 */ 686 | #define RUN_TESTp GREATEST_RUN_TESTp 687 | #endif /* USE_ABBREVS */ 688 | 689 | #endif 690 | -------------------------------------------------------------------------------- /loom.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 Scott Vokes 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "loom.h" 25 | #include "loom_internal.h" 26 | 27 | /* Initialize a thread pool in *L, according to the configuration in CFG. */ 28 | loom_init_res loom_init(loom_config *cfg, struct loom **l) { 29 | if (cfg == NULL || l == NULL) { return LOOM_INIT_RES_ERROR_NULL; } 30 | if (cfg->ring_sz2 == 0) { cfg->ring_sz2 = DEF_RING_SZ2; } 31 | if (cfg->ring_sz2 < 1 || cfg->ring_sz2 > LOOM_MAX_RING_SZ2) { 32 | return LOOM_INIT_RES_ERROR_BADARG; 33 | } 34 | 35 | if (cfg->max_delay == 0) { cfg->max_delay = DEF_MAX_DELAY; } 36 | if (cfg->max_threads == 0) { cfg->max_threads = DEF_MAX_THREADS; } 37 | 38 | bool mutex_init = false; 39 | size_t loom_sz = sizeof(**l) + cfg->max_threads * sizeof(thread_info); 40 | struct loom *pl = calloc(1, loom_sz); 41 | if (pl == NULL) { return LOOM_INIT_RES_ERROR_MEMORY; } 42 | 43 | size_t ring_sz = (1 << cfg->ring_sz2); 44 | ltask *ring = malloc(ring_sz * sizeof(*ring)); 45 | if (ring == NULL) { 46 | free(pl); 47 | return LOOM_INIT_RES_ERROR_MEMORY; 48 | } 49 | 50 | /* Set with 0xFF because highest bit => avaliable for write-reserve. */ 51 | memset(ring, 0xFF, ring_sz * sizeof(*ring)); 52 | 53 | pl->size = ring_sz; 54 | pl->mask = ring_sz - 1; 55 | pl->ring = ring; 56 | pl->max_delay = cfg->max_delay; 57 | pl->max_threads = cfg->max_threads; 58 | 59 | #if LOOM_USE_LOCKING 60 | if (0 != pthread_mutex_init(&pl->lock, NULL)) { goto cleanup; } 61 | mutex_init = true; 62 | #endif 63 | 64 | for (int i = 0; i < cfg->init_threads; i++) { 65 | if (spawn(pl, pl->cur_threads)) { 66 | pl->cur_threads++; 67 | } else { 68 | goto cleanup; 69 | } 70 | } 71 | *l = pl; 72 | 73 | return LOOM_INIT_RES_OK; 74 | 75 | cleanup: 76 | if (pl) { 77 | if (pl->ring) { 78 | for (int i = 0; i < pl->cur_threads; i++) { 79 | /* write shutdown */ 80 | } 81 | } 82 | for (int i = 0; i < pl->cur_threads; i++) { 83 | thread_info *ti = &pl->threads[i]; 84 | /* worker thread will close the other end */ 85 | close(ti->wr_fd); 86 | void *val = NULL; 87 | if (0 != pthread_join(ti->t, &val)) { assert(false); } 88 | } 89 | 90 | #if LOOM_USE_LOCKING 91 | if (mutex_init) { pthread_mutex_destroy(&pl->lock); } 92 | #else 93 | (void)mutex_init; 94 | #endif 95 | 96 | free(pl); 97 | } 98 | 99 | if (ring) { free(ring); } 100 | return LOOM_INIT_RES_ERROR_MEMORY; 101 | } 102 | 103 | /* Get the size of the queue. */ 104 | size_t loom_queue_size(struct loom *l) { return l->size; } 105 | 106 | /* Enqueue a task, which will be copied in to the thread pool by value. 107 | * Returns whether the task was successfully enqueued - it can fail if 108 | * the queue is full or if L on T are NULL. 109 | * 110 | * If BACKPRESSURE is non-NULL, the current backlog size will be written 111 | * into it. This is a good way to push back against parts of the system 112 | * which are inundating the thread pool with tasks. 113 | * (*BACKPRESSURE / loom_queue_size(l) gives how full the queue is.) */ 114 | bool loom_enqueue(struct loom *l, loom_task *t, size_t *backpressure) { 115 | LOG(3, " -- enqueuing task %p\n", (void *)t); 116 | if (l == NULL || t == NULL) { return false; } 117 | if (t->task_cb == NULL) { return false; } 118 | 119 | /* Start more worker threads if necessary, using a spin/CAS loop to 120 | * avoid a race when reserving the slot for the new thread. */ 121 | LOCK(l); 122 | if (!start_worker_if_necessary(l)) { 123 | UNLOCK(l); 124 | return false; 125 | } 126 | 127 | /* Reserve write, ensuring that it never wraps and clobbers 128 | * anything protected by l->done. */ 129 | size_t w = 0; 130 | for (;;) { 131 | w = l->write; 132 | const size_t fill_sz = l->write - l->done; 133 | if (fill_sz > l->mask) { 134 | if (backpressure) { *backpressure = fill_sz; } 135 | UNLOCK(l); 136 | return false; 137 | } 138 | 139 | /* Check if the highest bit is set, either because the cell has 140 | * been memset to all 0xFF the first time around, or because 141 | * l->ring[w * l->mask] has been set to w (write commit) and 142 | * then to ~r (when r == w), releasing it. */ 143 | const size_t mark_bit = 1L << (8 * sizeof(size_t) - 1); 144 | 145 | if ((l->ring[w & l->mask].mark & mark_bit) == 0) { 146 | UNLOCK(l); 147 | return false; 148 | } 149 | 150 | if (CAS(&l->write, w, w + 1)) { 151 | if (backpressure) { *backpressure = fill_sz; } 152 | break; 153 | } 154 | } 155 | 156 | /* qt = &l->ring[w] is now reserved, and l->write is w + 1. */ 157 | ltask *qt = &l->ring[w & l->mask]; 158 | assert(qt->mark != w); /* not yet marked */ 159 | 160 | qt->task_cb = t->task_cb; 161 | qt->cleanup_cb = t->cleanup_cb; 162 | qt->env = t->env; 163 | 164 | LOG(4, " -- saving %p(%zd), env %p\n", 165 | (void *)qt, w, (void *)t->env); 166 | 167 | qt->mark = w; /* Mark as committed */ 168 | 169 | /* Advance commit counter through marked cells. */ 170 | update_marked_commits(l); 171 | 172 | size_t bp = w - l->done; 173 | if (backpressure != NULL) { *backpressure = bp; } 174 | 175 | UNLOCK(l); 176 | 177 | send_wakeup(l); 178 | return true; 179 | } 180 | 181 | static void update_marked_commits(struct loom *l) { 182 | const size_t mask = l->mask; 183 | for (;;) { 184 | size_t c = l->commit; 185 | const size_t mark = l->ring[c & mask].mark; 186 | if (mark != c) { break; } 187 | size_t d = l->done; 188 | if (CAS(&l->commit, c, c + 1)) { 189 | if (d > c) { LOG(0, "c %zd, d %zd\n", c, d); } 190 | assert(d <= c); 191 | } 192 | } 193 | } 194 | 195 | /* Send wakeup to first sleeping thread */ 196 | static void send_wakeup(struct loom *l) { 197 | for (int i = 0; i < l->cur_threads; i++) { 198 | thread_info *ti = &l->threads[i]; 199 | thread_state s = ti->state; 200 | if (s == LTS_ASLEEP) { 201 | write(ti->wr_fd, "!", 1); 202 | break; 203 | } 204 | } 205 | } 206 | 207 | /* Get statistics from the currently running thread pool. */ 208 | bool loom_get_stats(struct loom *l, loom_info *info) { 209 | if (l == NULL || info == NULL) { return false; } 210 | 211 | uint16_t active = 0; 212 | for (int i = 0; i < l->cur_threads; i++) { 213 | if (l->threads[i].state == LTS_ACTIVE) { active++; } 214 | } 215 | info->active_threads = active; 216 | info->total_threads = l->cur_threads; 217 | info->backlog_size = l->commit - l->done; 218 | assert(l->commit >= l->done); 219 | 220 | return true; 221 | } 222 | 223 | /* Send a shutdown notification to the thread pool. This may need to be 224 | * called multiple times, as threads will not cancel remaining tasks, 225 | * clean up, and terminate until they complete their current task, if any. 226 | * Returns whether all threads have shut down. (Idempotent.)*/ 227 | bool loom_shutdown(struct loom *l) { 228 | uint16_t joined = 0; 229 | 230 | for (int i = 0; i < l->cur_threads; i++) { 231 | thread_info *ti = &l->threads[i]; 232 | if (ti->state < LTS_ALERT_SHUTDOWN) { 233 | LOG(4, "shutdown: %d -- %d => LTS_ALERT_SHUTDOWN\n", 234 | i, ti->state); 235 | ti->state = LTS_ALERT_SHUTDOWN; 236 | close(ti->wr_fd); 237 | } 238 | } 239 | 240 | for (int i = 0; i < l->cur_threads; i++) { 241 | thread_info *ti = &l->threads[i]; 242 | if (ti->state <= LTS_ALERT_SHUTDOWN) { 243 | LOG(3, " -- ti->state %d\n", ti->state); 244 | } 245 | if (ti->state == LTS_CLOSING) { 246 | LOG(2, " -- joining %d\n", i); 247 | void *val = NULL; 248 | if (0 != pthread_join(ti->t, &val)) { assert(false); } 249 | ti->state = LTS_JOINED; 250 | } 251 | 252 | if (ti->state >= LTS_JOINED) { joined++; } 253 | } 254 | 255 | LOG(2, " -- joined %u of %d\n", joined, l->cur_threads); 256 | return joined == l->cur_threads; 257 | } 258 | 259 | /* Free the thread pool and other internal resources. This will 260 | * internally call loom_shutdown until all threads have shut down. */ 261 | void loom_free(struct loom *l) { 262 | LOG(2, " -- free\n"); 263 | for (;;) { 264 | if (loom_shutdown(l)) { break; } 265 | const int FREE_DELAY_MSEC = 10; 266 | poll(NULL, 0, FREE_DELAY_MSEC); 267 | } 268 | 269 | free(l->ring); 270 | free(l); 271 | } 272 | 273 | static void *thread_task(void *arg) { 274 | thread_info *ti = (thread_info *)arg; 275 | struct loom *l = ti->l; 276 | int delay = 1; 277 | struct pollfd fds[1]; 278 | fds[0].fd = ti->rd_fd; 279 | fds[0].events = POLLIN; 280 | 281 | ti->state = LTS_ACTIVE; 282 | 283 | while (ti->state != LTS_ALERT_SHUTDOWN) { 284 | bool did_work = false; 285 | 286 | alert_pipe_res ap_res = read_alert_pipe(ti, fds, delay); 287 | if (ap_res == ALERT_IDLE || ap_res == ALERT_NEWTASK) { 288 | /* no-op */ 289 | } else if (ap_res == ALERT_SHUTDOWN) { 290 | ti->state = LTS_ALERT_SHUTDOWN; 291 | break; 292 | } else if (ap_res == ALERT_ERROR) { 293 | assert(false); 294 | } 295 | did_work = run_tasks(l, ti); 296 | 297 | /* Exponential back-off */ 298 | if (did_work) { 299 | delay = 0; 300 | } else if (delay == 0) { 301 | delay = 1; 302 | } else { 303 | delay <<= 1; 304 | if (delay > l->max_delay) { delay = l->max_delay; } 305 | } 306 | } 307 | ti->state = LTS_CLOSING; 308 | 309 | clean_up_cancelled_tasks(ti); 310 | return NULL; 311 | } 312 | 313 | static alert_pipe_res read_alert_pipe(thread_info *ti, 314 | struct pollfd *pfd, int delay) { 315 | if (delay > 1 && ti->state == LTS_ACTIVE) { 316 | ti->state = LTS_ASLEEP; 317 | } 318 | int pres = poll(pfd, 1, delay); 319 | if (pres == 1) { 320 | short revents = pfd[0].revents; 321 | if (revents & POLLHUP) { /* client closed other end */ 322 | return ALERT_SHUTDOWN; 323 | } else if (revents & POLLIN) { 324 | char buf[16]; /* shouldn't get > 1 '!', but in case */ 325 | ssize_t rd_res = read(ti->rd_fd, buf, 16); 326 | if (rd_res > 0) { 327 | return ALERT_NEWTASK; 328 | } else if (rd_res < 0) { 329 | if (errno == EINTR) { 330 | errno = 0; 331 | return ALERT_IDLE; 332 | } else { 333 | LOG(1, "rd_res %zd, errno %d == %s, revents %x\n", 334 | rd_res, errno, strerror(errno), revents); 335 | return ALERT_ERROR; 336 | } 337 | } 338 | } else if (revents & (POLLERR)) { 339 | assert(false); 340 | } 341 | } else if (pres < 0) { 342 | if (errno == EAGAIN || errno == EINTR) { 343 | errno = 0; 344 | } else { 345 | assert(false); 346 | } 347 | } 348 | return ALERT_IDLE; 349 | } 350 | 351 | static bool start_worker_if_necessary(struct loom *l) { 352 | uint16_t cur = l->cur_threads; 353 | if (cur == 0 || (cur < l->max_threads 354 | && l->commit - l->done > l->size / 2)) { 355 | 356 | /* First, try to respawn failed threads, if any. */ 357 | for (int i = 0; i < l->max_threads; i++) { 358 | thread_info *ti = &l->threads[i]; 359 | if (ti->state == LTS_DEAD 360 | && CAS(&ti->state, LTS_DEAD, LTS_RESPAWN)) { 361 | if (spawn(l, i)) { 362 | return true; 363 | } 364 | } 365 | } 366 | 367 | /* Reserve an unused thread slot. */ 368 | for (;;) { 369 | cur = l->cur_threads; 370 | if (CAS(&l->cur_threads, cur, cur + 1)) { 371 | LOG(2, " -- spawning a new worker thread %d\n", cur); 372 | return spawn(l, cur); 373 | } 374 | } 375 | } else { 376 | return true; /* no need to start a new worker thread */ 377 | } 378 | } 379 | 380 | static bool spawn(struct loom *l, int id) { 381 | LOG(2, " -- spawning %d\n", id); 382 | thread_info *ti = &l->threads[id]; 383 | ti->state = LTS_INIT; 384 | int pair[2]; 385 | if (0 != pipe(pair)) { return false; } 386 | ti->l = l; 387 | ti->rd_fd = pair[0]; 388 | ti->wr_fd = pair[1]; 389 | 390 | if (0 != pthread_create(&l->threads[id].t, NULL, thread_task, (void *)ti)) { 391 | ti->state = LTS_DEAD; 392 | return false; 393 | } else { 394 | return true; 395 | } 396 | } 397 | 398 | static bool run_tasks(struct loom *l, thread_info *ti) { 399 | bool work = false; 400 | ltask *qt = NULL; /* task in queue */ 401 | ltask t; /* current task */ 402 | 403 | /* While work is available... */ 404 | while (l->read < l->commit) { 405 | for (;;) { 406 | /* Break early so it notices the pipe has been closed 407 | * and cancels the remaining tasks, rather than 408 | * exhausting the work queue before checking. */ 409 | if (ti->state == LTS_ALERT_SHUTDOWN) { return work; } 410 | size_t r = l->read; 411 | if (r == l->commit) { break; } 412 | LOCK(l); 413 | if (r < l->commit && CAS(&l->read, r, r + 1)) { 414 | /* &l->ring[r & l->mask] is reserved, and read is r + 1 */ 415 | if (ti->state == LTS_ASLEEP) { ti->state = LTS_ACTIVE; } 416 | 417 | qt = &l->ring[r & l->mask]; 418 | 419 | /* If this fails, writes are wrapping reads. */ 420 | assert(qt->mark == r); 421 | 422 | /* Copy task out of queue */ 423 | memcpy(&t, qt, sizeof(loom_task)); 424 | 425 | LOG(3, " -- running %p(%zd), env %p\n", 426 | (void *)qt, r, (void *)t.env); 427 | 428 | qt->mark = ~r; /* Mark as done */ 429 | update_marked_done(l); 430 | 431 | assert(t.task_cb != NULL); 432 | t.task_cb(t.env); 433 | work = true; 434 | } 435 | UNLOCK(l); 436 | } 437 | } 438 | return work; 439 | } 440 | 441 | static void update_marked_done(struct loom *l) { 442 | const size_t mask = l->mask; 443 | for (;;) { 444 | size_t d = l->done; 445 | const size_t mark = l->ring[d & mask].mark; 446 | if (mark != ~d) { break; } 447 | if (CAS(&l->done, d, d + 1)) { 448 | size_t c = l->commit; 449 | if (d > c) { LOG(0, "c %zd, d %zd\n", c, d); } 450 | assert(d <= c); 451 | } 452 | } 453 | } 454 | 455 | static void clean_up_cancelled_tasks(thread_info *ti) { 456 | LOG(2, " -- cleanup for thread %p\n", (void *)pthread_self()); 457 | struct loom *l = ti->l; 458 | ltask *qt = NULL; /* task in queue */ 459 | ltask t; /* current task */ 460 | for (;;) { 461 | size_t r = l->read; 462 | if (r == l->commit) { break; } 463 | LOCK(l); 464 | if (CAS(&l->read, r, r + 1)) { 465 | qt = &l->ring[r & l->mask]; 466 | memcpy(&t, qt, sizeof(t)); 467 | qt->mark = ~r; 468 | update_marked_done(l); 469 | if (t.cleanup_cb != NULL) { t.cleanup_cb(t.env); } 470 | } 471 | UNLOCK(l); 472 | } 473 | close(ti->rd_fd); 474 | } 475 | -------------------------------------------------------------------------------- /loom.h: -------------------------------------------------------------------------------- 1 | #ifndef LOOM_H 2 | #define LOOM_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /* Version 0.1.1. */ 9 | #define LOOM_VERSION_MAJOR 0 10 | #define LOOM_VERSION_MINOR 1 11 | #define LOOM_VERSION_PATCH 1 12 | 13 | /* Opaque type for a thread pool. */ 14 | struct loom; 15 | 16 | /* Configuration struct. */ 17 | typedef struct { 18 | /* Base-2 log for the task ring buffer, e.g. 8 => 256 slots. 19 | * A larger ring buffer takes more memory, but allows for a 20 | * larger backlog of tasks before the pool fills up and rejects 21 | * new tasks outright. */ 22 | uint8_t ring_sz2; 23 | 24 | /* Max delay when for idle tasks' exponential back-off. */ 25 | uint16_t max_delay; 26 | 27 | /* How many threads to start during initialization. Otherwise, 28 | * threads are started on demand, if new tasks are enqueued while 29 | * all others are busy. (Defaults to 0.) */ 30 | uint16_t init_threads; 31 | 32 | /* The max number of threads to run for the thread pool. 33 | * Defaults to 8. */ 34 | uint16_t max_threads; 35 | } loom_config; 36 | 37 | /* Callback to run, with an environment pointer. (A closure.) */ 38 | typedef void (loom_task_cb)(void *env); 39 | 40 | /* Callback to clean up the environment if the thread pool is 41 | * shutting down & tasks are being canceled. */ 42 | typedef void (loom_cleanup_cb)(void *env); 43 | 44 | /* A task to enqueue in the thread pool. *ENV is an arbitrary void pointer 45 | * that will be passed to the callbacks. */ 46 | typedef struct { 47 | loom_task_cb *task_cb; 48 | loom_cleanup_cb *cleanup_cb; 49 | void *env; 50 | } loom_task; 51 | 52 | /* Statistics from the currently running thread pool. */ 53 | typedef struct { 54 | uint16_t active_threads; 55 | uint16_t total_threads; 56 | size_t backlog_size; 57 | } loom_info; 58 | 59 | /* Initialize a thread pool in *L, according to the configuration in CFG. */ 60 | typedef enum { 61 | LOOM_INIT_RES_OK = 0, 62 | LOOM_INIT_RES_ERROR_NULL = -1, 63 | LOOM_INIT_RES_ERROR_BADARG = -2, 64 | LOOM_INIT_RES_ERROR_MEMORY = -3, 65 | } loom_init_res; 66 | loom_init_res loom_init(loom_config *cfg, struct loom **l); 67 | 68 | /* Enqueue a task, which will be copied in to the thread pool by value. 69 | * Returns whether the task was successfully enqueued - it can fail if 70 | * the queue is full or if L or T are NULL. 71 | * 72 | * If BACKPRESSURE is non-NULL, the current backlog size will be written 73 | * into it. This is a good way to push back against parts of the system 74 | * which are inundating the thread pool with tasks. 75 | * (*BACKPRESSURE / loom_queue_size(l) gives how full the queue is.) */ 76 | bool loom_enqueue(struct loom *l, loom_task *t, size_t *backpressure); 77 | 78 | /* Get the size of the queue. */ 79 | size_t loom_queue_size(struct loom *l); 80 | 81 | /* Get statistics from the currently running thread pool. */ 82 | bool loom_get_stats(struct loom *l, loom_info *info); 83 | 84 | /* Send a shutdown notification to the thread pool. This may need to be 85 | * called multiple times, as threads will not cancel remaining tasks, 86 | * clean up, and terminate until they complete their current task, if any. 87 | * Returns whether all threads have shut down. (Idempotent.)*/ 88 | bool loom_shutdown(struct loom *l); 89 | 90 | /* Free the thread pool and other internal resources. This will 91 | * internally call loom_shutdown until all threads have shut down. */ 92 | void loom_free(struct loom *l); 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /loom_internal.h: -------------------------------------------------------------------------------- 1 | #ifndef LOOM_INTERNAL_H 2 | #define LOOM_INTERNAL_H 3 | 4 | /* Defaults. */ 5 | #define DEF_RING_SZ2 8 6 | #define DEF_MAX_DELAY 1000 7 | #define DEF_MAX_THREADS 8 8 | 9 | /* Max for log2(size) of task queue ring buffer. 10 | * The most significant bit of each cell is used as a mark. */ 11 | #define LOOM_MAX_RING_SZ2 ((8 * sizeof(size_t)) - 1) 12 | 13 | /* Use mutexes instead of CAS? 14 | * This is mostly for benchmarking -- the lockless mode should be 15 | * significantly faster in most cases. */ 16 | #define LOOM_USE_LOCKING 0 17 | 18 | typedef enum { 19 | LTS_INIT, /* initializing */ 20 | LTS_ASLEEP, /* worker is asleep */ 21 | LTS_ACTIVE, /* running a task */ 22 | LTS_ALERT_SHUTDOWN, /* worker should shut down */ 23 | LTS_CLOSING, /* worker is shutting down */ 24 | LTS_JOINED, /* worker has been pthread_join'd */ 25 | LTS_DEAD, /* pthread_create failed */ 26 | LTS_RESPAWN, /* slot reserved for respawn */ 27 | } thread_state; 28 | 29 | typedef struct { 30 | pthread_t t; /* thread handle */ 31 | thread_state state; /* task state machine state */ 32 | int wr_fd; /* write end of alert pipe */ 33 | int rd_fd; /* read end of alert pipe */ 34 | struct loom *l; /* pointer to thread pool */ 35 | } thread_info; 36 | 37 | /* loom_task, with an added mark field. This is used mark whether a task is 38 | * ready to have the l->commit or l->done offsets advanced over it. 39 | * 40 | * . The mark bytes are all memset to 0xFF at init. 41 | * 42 | * . The l->write offset can only advance across a cell if its mark 43 | * has the most significant bit set. 44 | * 45 | * . When a cell has been reserved for write (by atomically CAS-ing 46 | * l->write to increment past it; during this time, only the producer 47 | * thread reserving it can write to it), it is marked for commit by 48 | * setting the mark to the write offset. Since the ring buffer wraps, 49 | * this means the next time the same cell is used, the mark value will 50 | * be based on the previous pass's write offset (l->write - l->size), 51 | * which will no longer be valid. 52 | * 53 | * . After a write is committed, the producer thread does a CAS loop to 54 | * advance l->commit over every marked cell. Since putting a task in 55 | * or out of the queue is just a memcpy of an ltask to/from the 56 | * caller's stack, it should never block for long, and have little 57 | * variability in latency. It also doesn't matter which producer 58 | * thread advances l->commit. 59 | * 60 | * . Similarly, a consumer atomically CASs l->read to reserve a cell 61 | * for read, copies its task into its call stack, and then sets 62 | * the cell mask to the negated read offset (~l->read). This means 63 | * that it will always be distinct from the commit mark, distinct 64 | * from the last spin around the ring buffer, and have the most 65 | * significant bit set so that l->write knows it's free to advance 66 | * over it. 67 | * 68 | * . Also, after the read is released, the consumer thread does a 69 | * CAS loop to advance l->done over every marked cell. This behaves 70 | * just like the CAS loop to advance l->commit above. */ 71 | typedef struct { 72 | loom_task_cb *task_cb; 73 | loom_cleanup_cb *cleanup_cb; 74 | void *env; 75 | size_t mark; 76 | } ltask; 77 | 78 | /* Offsets in ring buffer. Slot acquisition happens in the order 79 | * [Write, Commit, Read, Done]. For S(x) == loom->ring[x]: 80 | * x >= W: undefined 81 | * C <= x < W: reserved for write 82 | * R <= x < C: committed, available for read 83 | * D <= x < R: being processed 84 | * x < D: freed 85 | * 86 | * W == C: 0 reserved 87 | * R == C: 0 available 88 | * D == R: 0 being read 89 | * 90 | * It's a ring buffer, so it wraps with (x & mask), and the 91 | * number of slots in useat a given time is: 92 | * W - D 93 | * 94 | * Empty when W == D: 95 | * [_, _, _, _,DW, _, _, _, ] {W:4, D:4} -- empty 96 | * 97 | * In use: 98 | * (W - D) 99 | * 100 | * Full when (reserved + 1) & mask == released & mask: 101 | * [x, x, x, W, D, x, x, x, ] {W:3+8, D:4} -- full 102 | * 103 | * D <= R <= C <= W 104 | */ 105 | typedef struct loom { 106 | #if LOOM_USE_LOCKING 107 | pthread_mutex_t lock; 108 | #endif 109 | /* Offsets. See block comment above. */ 110 | size_t write; 111 | size_t commit; 112 | size_t read; 113 | size_t done; 114 | 115 | size_t size; /* size of pool */ 116 | size_t mask; /* bitmask for pool offsets */ 117 | uint16_t max_delay; /* max poll(2) sleep for idle tasks */ 118 | uint16_t cur_threads; /* current live threads */ 119 | uint16_t max_threads; /* max # of threads to create */ 120 | ltask *ring; /* ring buffer */ 121 | thread_info threads[]; /* thread metadata */ 122 | } loom; 123 | 124 | typedef enum { 125 | ALERT_IDLE, /* no new tasks */ 126 | ALERT_NEWTASK, /* new task is available -- wake up */ 127 | ALERT_ERROR, /* unexpected read(2) failure */ 128 | ALERT_SHUTDOWN, /* threadpool is shutting down */ 129 | } alert_pipe_res; 130 | 131 | static void *thread_task(void *arg); 132 | static bool run_tasks(struct loom *l, thread_info *ti); 133 | static bool start_worker_if_necessary(struct loom *l); 134 | static bool spawn(struct loom *l, int id); 135 | static void clean_up_cancelled_tasks(thread_info *ti); 136 | static alert_pipe_res read_alert_pipe(thread_info *ti, 137 | struct pollfd *pfd, int delay); 138 | static void send_wakeup(struct loom *l); 139 | static void update_marked_commits(struct loom *l); 140 | static void update_marked_done(struct loom *l); 141 | 142 | #ifndef LOOM_LOG_LEVEL 143 | #define LOOM_LOG_LEVEL 1 144 | #endif 145 | 146 | /* Log debugging info. */ 147 | #if LOOM_LOG_LEVEL > 0 148 | #include 149 | #define LOG(LVL, ...) \ 150 | do { \ 151 | int lvl = LVL; \ 152 | if ((LOOM_LOG_LEVEL) >= lvl) { \ 153 | printf(__VA_ARGS__); \ 154 | } \ 155 | } while (0) 156 | #else 157 | #define LOG(LVL, ...) 158 | #endif 159 | 160 | #if LOOM_USE_LOCKING 161 | #define LOCK(L) if (0 != pthread_mutex_lock(&(L)->lock)) { assert(false); } 162 | #define UNLOCK(L) if (0 != pthread_mutex_unlock(&(L)->lock)) { assert(false); } 163 | #define CAS(PTR, OLD, NEW) (*PTR == (OLD) ? (*PTR = (NEW), 1) : 0) 164 | #else 165 | #define LOCK(L) /* no-op */ 166 | #define UNLOCK(L) /* no-op */ 167 | #define CAS(PTR, OLD, NEW) (__sync_bool_compare_and_swap(PTR, OLD, NEW)) 168 | #endif 169 | 170 | #endif 171 | -------------------------------------------------------------------------------- /test_loom.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "greatest.h" 10 | #include "loom.h" 11 | 12 | static struct loom *l = NULL; 13 | 14 | #define MAX_TASKS 1025 15 | 16 | static void sleep_msec(int msec) { 17 | poll(NULL, 0, msec); 18 | } 19 | 20 | static struct test_context { 21 | int limit; 22 | int flags[MAX_TASKS]; 23 | uintptr_t cleanup_counter; 24 | } context; 25 | 26 | static void setup_cb(void *data) { 27 | context.limit = 0; 28 | memset(context.flags, 0xFF, sizeof(context.flags)); 29 | context.cleanup_counter = 0; 30 | (void)data; 31 | } 32 | 33 | TEST loom_should_init_and_free(void) { 34 | loom_config cfg = { 35 | .init_threads = 0, 36 | }; 37 | ASSERT(LOOM_INIT_RES_OK == loom_init(&cfg, &l)); 38 | loom_free(l); 39 | PASS(); 40 | } 41 | 42 | TEST loom_should_init_and_free_and_join(int count) { 43 | loom_config cfg = { 44 | .init_threads = count, 45 | }; 46 | ASSERT(LOOM_INIT_RES_OK == loom_init(&cfg, &l)); 47 | loom_free(l); 48 | PASS(); 49 | } 50 | 51 | static void set_flag_cb(void *env) { 52 | uintptr_t i = (uintptr_t)env; 53 | //printf(" == set_flag_cb %zd\n",i); 54 | context.flags[i] = i; 55 | } 56 | 57 | static void set_flag_dont_cleanup_cb(void *env) { 58 | (void)env; 59 | assert(false); // all tasks should run 60 | } 61 | 62 | TEST loom_should_run_tasks(int threads, uintptr_t tasks) { 63 | if (tasks > MAX_TASKS) { FAILm("too many"); } 64 | 65 | loom_config cfg = { 66 | .init_threads = threads, 67 | }; 68 | ASSERT(LOOM_INIT_RES_OK == loom_init(&cfg, &l)); 69 | 70 | for (uintptr_t i = 0; i < tasks; i++) { 71 | loom_task t = { 72 | .task_cb = set_flag_cb, 73 | .cleanup_cb = set_flag_dont_cleanup_cb, 74 | .env = (void *)i, 75 | }; 76 | 77 | for (int i = 0; i < 100; i++) { // 100 retries 78 | size_t backpressure = 0; 79 | if (loom_enqueue(l, &t, &backpressure)) { break; } 80 | sleep_msec(backpressure / 10); 81 | 82 | if (i == 99) { FAILm("queue full too long"); } 83 | } 84 | } 85 | 86 | loom_info info; 87 | 88 | /* Give them a bit to actually work... */ 89 | for (uintptr_t i = 0; i < tasks; i++) { 90 | ASSERT(loom_get_stats(l, &info)); 91 | /* If all tasks have been started, break */ 92 | if (info.backlog_size == 0) { break; } 93 | sleep_msec(info.backlog_size); 94 | } 95 | 96 | /* Give the last task(s) still running time to finish before shutting down */ 97 | sleep_msec(10); 98 | 99 | ASSERT(loom_get_stats(l, &info)); 100 | ASSERT(info.backlog_size == 0); 101 | 102 | loom_free(l); 103 | 104 | for (uintptr_t i = 0; i < tasks; i++) { 105 | //ASSERT_EQ_FMT("%zd", i, flags[i]); 106 | ASSERT_EQ(i, (uintptr_t)context.flags[i]); 107 | } 108 | 109 | PASS(); 110 | } 111 | 112 | TEST loom_should_not_busywait_when_idle(void) { 113 | loom_config cfg = { 114 | .max_delay = 10 * 1024, 115 | .init_threads = 8, 116 | }; 117 | ASSERT(LOOM_INIT_RES_OK == loom_init(&cfg, &l)); 118 | 119 | clock_t pre = clock(); 120 | sleep_msec(5000); 121 | clock_t post = clock(); 122 | loom_free(l); 123 | 124 | ASSERT(pre != (clock_t)-1); 125 | ASSERT(post != (clock_t)-1); 126 | clock_t delta = post - pre; 127 | if (0) { 128 | printf("delta %zd (%.3f sec)\n", delta, delta / (1.0 * CLOCKS_PER_SEC)); 129 | } 130 | 131 | /* It should use significantly less than this... */ 132 | ASSERTm("should use less than 100 msec of CPU in 5 seconds idle", 133 | delta / (1.0 * CLOCKS_PER_SEC) < 0.1); 134 | PASS(); 135 | } 136 | 137 | static void slow_serial_task_cb(void *env) { 138 | (void)env; 139 | sleep_msec(100); 140 | } 141 | 142 | #define CAS(PTR, OLD, NEW) (__sync_bool_compare_and_swap(PTR, OLD, NEW)) 143 | 144 | static void bump_counter_cleanup_cb(void *env) { 145 | (void)env; 146 | for (;;) { 147 | int v = context.cleanup_counter; 148 | if (CAS(&context.cleanup_counter, v, v + 1)) { 149 | break; 150 | } 151 | } 152 | } 153 | 154 | TEST loom_should_run_cleanup_tasks_if_cancelled(void) { 155 | loom_config cfg = { 156 | .max_threads = 1, 157 | }; 158 | ASSERT(LOOM_INIT_RES_OK == loom_init(&cfg, &l)); 159 | 160 | const uintptr_t task_count = 10; 161 | for (uintptr_t i = 0; i < task_count; i++) { 162 | loom_task t = { 163 | .task_cb = slow_serial_task_cb, 164 | .cleanup_cb = bump_counter_cleanup_cb, 165 | }; 166 | 167 | for (int i = 0; i < 100; i++) { // 100 retries 168 | size_t backpressure = 0; 169 | if (loom_enqueue(l, &t, &backpressure)) { break; } 170 | sleep_msec(backpressure / 10); 171 | 172 | if (i == 99) { FAILm("queue full too long"); } 173 | } 174 | } 175 | 176 | sleep_msec(10); 177 | for (int i = 0; i < 5; i++) { 178 | if (loom_shutdown(l)) { break; } 179 | } 180 | sleep_msec(100); 181 | 182 | loom_free(l); 183 | /* One test should start running, all the rest should be cancelled 184 | * and cleaned up after the first completes. */ 185 | ASSERT_EQ(task_count - 1, context.cleanup_counter); 186 | PASS(); 187 | } 188 | 189 | SUITE(suite) { 190 | SET_SETUP(setup_cb, NULL); 191 | 192 | RUN_TEST(loom_should_init_and_free); 193 | RUN_TESTp(loom_should_init_and_free_and_join, 1); 194 | RUN_TESTp(loom_should_init_and_free_and_join, 8); 195 | 196 | RUN_TESTp(loom_should_run_tasks, 1, 1); 197 | RUN_TESTp(loom_should_run_tasks, 1, 8); 198 | RUN_TESTp(loom_should_run_tasks, 8, 64); 199 | RUN_TESTp(loom_should_run_tasks, 8, 1024); 200 | 201 | // Check that worker threads are started on demand 202 | RUN_TESTp(loom_should_run_tasks, 0, 1024); 203 | 204 | char *slow = getenv("NO_SLOW_TESTS"); 205 | if (slow == NULL) { 206 | RUN_TEST(loom_should_not_busywait_when_idle); 207 | } 208 | RUN_TEST(loom_should_run_cleanup_tasks_if_cancelled); 209 | } 210 | 211 | /* Add all the definitions that need to be in the test runner's main file. */ 212 | GREATEST_MAIN_DEFS(); 213 | 214 | int main(int argc, char **argv) { 215 | GREATEST_MAIN_BEGIN(); /* command-line arguments, initialization. */ 216 | RUN_SUITE(suite); 217 | GREATEST_MAIN_END(); /* display results */ 218 | } 219 | --------------------------------------------------------------------------------