├── .gitignore ├── .travis.yml ├── gc ├── random.h ├── ptst.h ├── gc.h ├── portable_defns.h ├── ptst.c ├── intel_defns.h └── gc.c ├── Makefile ├── prioq.h ├── LICENSE ├── README.md ├── common.c ├── gdb_skiplist_print.py ├── common.h ├── unittests.c ├── perf_meas.c ├── prioq_model.pml └── prioq.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.dat 2 | *.o -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | script: make test 3 | -------------------------------------------------------------------------------- /gc/random.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * random.h 3 | * 4 | * A really simple random-number generator. Crappy linear congruential 5 | * taken from glibc, but has at least a 2^32 period. 6 | */ 7 | 8 | #ifndef __RANDOM_H__ 9 | #define __RANDOM_H__ 10 | 11 | typedef unsigned long rand_t; 12 | 13 | #define rand_init(_ptst) \ 14 | ((_ptst)->rand = RDTICK()) 15 | 16 | #define rand_next(_ptst) \ 17 | ((_ptst)->rand = ((_ptst)->rand * 1103515245) + 12345) 18 | 19 | #endif /* __RANDOM_H__ */ 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC := gcc 2 | CFLAGS := -DINTEL -Wall -std=c99 3 | LDFLAGS := -lpthread -lm 4 | 5 | OS := $(shell uname -s) 6 | ifeq ($(OS),Linux) 7 | CFLAGS += -DCACHE_LINE_SIZE=`getconf LEVEL1_DCACHE_LINESIZE` 8 | LDFLAGS += -lrt 9 | endif 10 | ifeq ($(OS),Darwin) 11 | CFLAGS += -DCACHE_LINE_SIZE=`sysctl -n hw.cachelinesize` 12 | endif 13 | 14 | ifeq ($(DEBUG),true) 15 | CFLAGS+=-DDEBUG -O0 -ggdb3 #-fno-omit-frame-pointer -fsanitize=address 16 | else 17 | CFLAGS+=-O3 18 | endif 19 | 20 | 21 | VPATH := gc 22 | DEPS += Makefile $(wildcard *.h) $(wildcard gc/*.h) 23 | 24 | TARGETS := perf_meas unittests 25 | 26 | 27 | all: $(TARGETS) 28 | 29 | clean: 30 | rm -f $(TARGETS) core *.o 31 | 32 | %.o: %.c $(DEPS) 33 | $(CC) $(CFLAGS) -c -o $@ $< 34 | 35 | perf_meas: CFLAGS+=-DNDEBUG 36 | $(TARGETS): %: %.o ptst.o gc.o prioq.o common.o 37 | $(CC) -o $@ $^ $(LDFLAGS) 38 | 39 | test: unittests 40 | ./unittests 41 | 42 | .PHONY: all clean test 43 | -------------------------------------------------------------------------------- /gc/ptst.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * ptst.h 3 | * 4 | * Per-thread state management. 5 | * 6 | * 7 | * Copyright (c) 2013-2018, Jonatan Linden 8 | * Copyright (c) 2002-2003, K A Fraser 9 | */ 10 | 11 | #ifndef __PTST_H__ 12 | #define __PTST_H__ 13 | 14 | typedef struct ptst_st ptst_t; 15 | 16 | #include "gc.h" 17 | 18 | struct ptst_st 19 | { 20 | /* Thread id */ 21 | unsigned int id; 22 | /* State management */ 23 | ptst_t *next; 24 | unsigned int count; 25 | 26 | /* Utility structures */ 27 | gc_t *gc; 28 | char pad[56]; 29 | unsigned int rand; 30 | }; 31 | 32 | /* 33 | * Enter/leave a critical region. A thread gets a state handle for 34 | * use during critical regions. 35 | */ 36 | 37 | void critical_enter(void ); 38 | 39 | #define critical_exit() gc_exit(ptst) 40 | 41 | /* Iterators */ 42 | extern ptst_t *ptst_list; 43 | 44 | #define ptst_first() (ptst_list) 45 | #define ptst_next(_p) ((_p)->next) 46 | 47 | 48 | 49 | #endif /* __PTST_H__ */ 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /gc/gc.h: -------------------------------------------------------------------------------- 1 | #ifndef __GC_H__ 2 | #define __GC_H__ 3 | 4 | typedef struct gc_st gc_t; 5 | 6 | /* Most of these functions peek into a per-thread state struct. */ 7 | #include "ptst.h" 8 | 9 | /* Initialise GC section of given per-thread state structure. */ 10 | gc_t *gc_init(void); 11 | 12 | int gc_add_allocator(unsigned int alloc_size); 13 | void gc_remove_allocator(int alloc_id); 14 | 15 | /* 16 | * Memory allocate/free. An unsafe free can be used when an object was 17 | * not made visible to other processes. 18 | */ 19 | void *gc_alloc(ptst_t *ptst, int alloc_id); 20 | void gc_free(ptst_t *ptst, void *p, int alloc_id); 21 | void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id); 22 | 23 | /* 24 | * Hook registry. Allows users to hook in their own per-epoch delay 25 | * lists. 26 | */ 27 | typedef void (*hook_fn_t)(ptst_t *, void *); 28 | int gc_add_hook(hook_fn_t fn); 29 | void gc_remove_hook(int hook_id); 30 | void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id); 31 | 32 | /* Per-thread entry/exit from critical regions */ 33 | void gc_enter(ptst_t *ptst); 34 | void gc_exit(ptst_t *ptst); 35 | 36 | /* Start-of-day initialisation of garbage collector. */ 37 | void _init_gc_subsystem(void); 38 | void _destroy_gc_subsystem(void); 39 | 40 | #endif /* __GC_H__ */ 41 | -------------------------------------------------------------------------------- /prioq.h: -------------------------------------------------------------------------------- 1 | #ifndef PRIOQ_H 2 | #define PRIOQ_H 3 | 4 | #include "common.h" 5 | 6 | typedef unsigned long pkey_t; 7 | typedef void *pval_t; 8 | 9 | #define KEY_NULL 0 10 | #define NUM_LEVELS 32 11 | /* Internal key values with special meanings. */ 12 | #define SENTINEL_KEYMIN ( 0UL) /* Key value of first dummy node. */ 13 | #define SENTINEL_KEYMAX (~1UL) /* Key value of last dummy node. */ 14 | 15 | 16 | typedef struct node_s 17 | { 18 | pkey_t k; 19 | int level; 20 | int inserting; //char pad2[4]; 21 | pval_t v; 22 | struct node_s *next[1]; 23 | } node_t; 24 | 25 | typedef struct 26 | { 27 | int max_offset; 28 | int max_level; 29 | int nthreads; 30 | node_t *head; 31 | node_t *tail; 32 | char pad[128]; 33 | } pq_t; 34 | 35 | #define get_marked_ref(_p) ((void *)(((uintptr_t)(_p)) | 1)) 36 | #define get_unmarked_ref(_p) ((void *)(((uintptr_t)(_p)) & ~1)) 37 | #define is_marked_ref(_p) (((uintptr_t)(_p)) & 1) 38 | 39 | 40 | /* Interface */ 41 | 42 | extern pq_t *pq_init(int max_offset); 43 | 44 | extern void pq_destroy(pq_t *pq); 45 | 46 | extern void insert(pq_t *pq, pkey_t k, pval_t v); 47 | 48 | extern pval_t deletemin(pq_t *pq); 49 | 50 | extern void sequential_length(pq_t *pq); 51 | 52 | #endif // PRIOQ_H 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2018, Jonatan Lindén 2 | Copyright (c) 2002-2003, K A Fraser 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the 14 | distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHTS HOLDER AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A Lock-Free Skiplist-Based Priority Queue 2 | == 3 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://github.com/jonatanlinden/PR/blob/master/COPYING) 4 | [![Build Status](https://travis-ci.org/jonatanlinden/PR.png?branch=master)](https://travis-ci.org/jonatanlinden/PR) 5 | 6 | 7 | A linearizable skiplist-based lock-free priority queue implementation 8 | minimizing the number of required CAS-instructions per operation. The 9 | skiplist implementation is adapted from Keir Fraser's skiplist 10 | (http://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf). 11 | 12 | For more information about the priority queue, see 13 | http://user.it.uu.se/~jonli208/priorityqueue. 14 | 15 | 16 | ### Build 17 | 18 | make perf_meas 19 | 20 | ### Usage 21 | 22 | Run the benchmark application as: 23 | 24 | ./perf_meas -n 8 -o 64 -t 42 25 | 26 | This will start a benchmark run with 8 threads, the offset parameter 27 | of the algorithm will be set to 64, and the benchmark will run for 42 28 | seconds. Per default, the inserted keys are uniformly distributed, 29 | operations (deletemin, insert) are randomly selected (50%/50%) and the 30 | queue is prefilled with 2^15 elements. 31 | 32 | Run 33 | 34 | ./perf_meas -h 35 | 36 | for more information about the available parameters. 37 | 38 | ### Extras 39 | 40 | A model for the SPIN model checker (http://spinroot.com) is included, 41 | with linearizability checks of the operations. The -O flag has to be 42 | used (if SPIN version >= 6), the model is using the old scope rules. 43 | To perform a bit state space analysis: 44 | 45 | spin -O -a prioq_model.pml 46 | gcc -O2 -DMEMLIM=2048 -DBITSTATE -o pan pan.c 47 | ./pan -w33 48 | 49 | 50 | -------------------------------------------------------------------------------- /common.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include "common.h" 3 | 4 | #if defined(__linux__) 5 | pid_t 6 | gettid(void) 7 | { 8 | return (pid_t) syscall(SYS_gettid); 9 | } 10 | 11 | void 12 | pin(pid_t t, int cpu) 13 | { 14 | cpu_set_t cpuset; 15 | CPU_ZERO(&cpuset); 16 | CPU_SET(cpu, &cpuset); 17 | E_en(sched_setaffinity(t, sizeof(cpu_set_t), &cpuset)); 18 | } 19 | 20 | void 21 | gettime(struct timespec *ts) 22 | { 23 | E(clock_gettime(CLOCK_MONOTONIC, ts)); 24 | } 25 | 26 | #endif 27 | 28 | #if defined(__APPLE__) 29 | void 30 | gettime(struct timespec *ts) 31 | { 32 | uint64_t time = mach_absolute_time(); 33 | 34 | static mach_timebase_info_data_t info = {0,0}; 35 | 36 | if (info.denom == 0) { 37 | mach_timebase_info(&info); 38 | } 39 | 40 | uint64_t elapsed = time * (info.numer / info.denom); 41 | 42 | ts->tv_sec = elapsed * 1e-9; 43 | ts->tv_nsec = elapsed - (ts->tv_sec * 1e9); 44 | } 45 | #endif 46 | 47 | 48 | 49 | 50 | struct timespec 51 | timediff (struct timespec begin, struct timespec end) 52 | { 53 | struct timespec tmp; 54 | if ((end.tv_nsec - begin.tv_nsec) < 0) { 55 | tmp.tv_sec = end.tv_sec - begin.tv_sec - 1; 56 | tmp.tv_nsec = 1000000000 + end.tv_nsec - begin.tv_nsec; 57 | } else { 58 | tmp.tv_sec = end.tv_sec - begin.tv_sec; 59 | tmp.tv_nsec = end.tv_nsec - begin.tv_nsec; 60 | } 61 | return tmp; 62 | } 63 | 64 | void 65 | rng_init (unsigned short rng[3]) 66 | { 67 | struct timespec time; 68 | 69 | // finally available in macos 10.12 as well! 70 | clock_gettime(CLOCK_REALTIME, &time); 71 | 72 | /* initialize seed */ 73 | rng[0] = time.tv_nsec; 74 | rng[1] = time.tv_nsec >> 16; 75 | rng[2] = time.tv_nsec >> 32; 76 | 77 | } 78 | -------------------------------------------------------------------------------- /gdb_skiplist_print.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import gdb 4 | 5 | class SkiplistPrintCommand(gdb.Command): 6 | """Iterate and print a list. 7 | 8 | skip [MAX] 9 | 10 | Given a list EXPR, iterate though the list nodes' ->next pointers, printing 11 | each node iterated. We will iterate thorugh MAX list nodes, to prevent 12 | infinite loops with corrupt lists. If MAX is zero, we will iterate the 13 | entire list. 14 | 15 | List nodes types are expected to have a member named "next". List types 16 | may be the same as node types, or a separate type with an explicit 17 | head node, called "head".""" 18 | 19 | MAX_ITER = 10 20 | 21 | def __init__(self): 22 | super(SkiplistPrintCommand, self).__init__("skiplist-print", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL) 23 | 24 | def invoke(self, _args, from_tty): 25 | args = gdb.string_to_argv(_args) 26 | start_node = args[0] 27 | 28 | if len(args) > 1: 29 | max_iter = int(args[1]) 30 | else: 31 | max_iter = self.MAX_ITER 32 | 33 | if len(args) > 2: 34 | lvl = int(args[2]) 35 | else: 36 | lvl = 0 37 | 38 | p_node_t = gdb.lookup_type('node_t').pointer() 39 | long_t = gdb.lookup_type('long') 40 | node = gdb.parse_and_eval(start_node) 41 | print node 42 | 43 | for i in xrange(max_iter): 44 | nexts = node['next'] 45 | nxt = gdb.Value(nexts[lvl]).cast(long_t) 46 | nxt = nxt & ~1 47 | node = gdb.Value(nxt).cast(p_node_t).dereference() 48 | nexts = node['next'] 49 | print node['k'], node['level'], node['inserting'], 50 | k = 0 51 | while k < node['level']: 52 | print(nexts[k]), 53 | k+=1 54 | print("") 55 | 56 | SkiplistPrintCommand() 57 | -------------------------------------------------------------------------------- /gc/portable_defns.h: -------------------------------------------------------------------------------- 1 | #ifndef __PORTABLE_DEFNS_H__ 2 | #define __PORTABLE_DEFNS_H__ 3 | 4 | #define MAX_THREADS 128 /* Nobody will ever have more! */ 5 | 6 | #if defined(SPARC) 7 | #include "sparc_defns.h" 8 | #elif defined(INTEL) 9 | #include "intel_defns.h" 10 | #elif defined(PPC) 11 | #include "ppc_defns.h" 12 | #elif defined(IA64) 13 | #include "ia64_defns.h" 14 | #elif defined(MIPS) 15 | #include "mips_defns.h" 16 | #elif defined(ALPHA) 17 | #include "alpha_defns.h" 18 | #else 19 | #error "A valid architecture has not been defined" 20 | #endif 21 | 22 | #include 23 | 24 | #ifndef MB_NEAR_CAS 25 | #define RMB_NEAR_CAS() RMB() 26 | #define WMB_NEAR_CAS() WMB() 27 | #define MB_NEAR_CAS() MB() 28 | #endif 29 | 30 | typedef unsigned long int_addr_t; 31 | 32 | typedef int bool_t; 33 | #define FALSE 0 34 | #define TRUE 1 35 | 36 | #define ADD_TO(_v,_x) \ 37 | do { \ 38 | int __val = (_v), __newval; \ 39 | while ( (__newval = CASIO(&(_v),__val,__val+(_x))) != __val ) \ 40 | __val = __newval; \ 41 | } while ( 0 ) 42 | 43 | /* 44 | * Allow us to efficiently align and pad structures so that shared fields 45 | * don't cause contention on thread-local or read-only fields. 46 | */ 47 | #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE] 48 | #define ALIGNED_ALLOC(_s) \ 49 | ((void *)(((unsigned long)malloc((_s)+CACHE_LINE_SIZE*2) + \ 50 | CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE-1))) 51 | 52 | 53 | /* 54 | * POINTER MARKING 55 | */ 56 | #define get_marked_ref(_p) ((void *)(((unsigned long)(_p)) | 1)) 57 | #define get_unmarked_ref(_p) ((void *)(((unsigned long)(_p)) & ~1)) 58 | #define is_marked_ref(_p) (((unsigned long)(_p)) & 1) 59 | 60 | 61 | 62 | /* Read field @_f into variable @_x. */ 63 | #define READ_FIELD(_x,_f) ((_x) = (_f)) 64 | 65 | #define WEAK_DEP_ORDER_RMB() ((void)0) 66 | #define WEAK_DEP_ORDER_WMB() ((void)0) 67 | #define WEAK_DEP_ORDER_MB() ((void)0) 68 | 69 | 70 | 71 | #endif /* __PORTABLE_DEFNS_H__ */ 72 | -------------------------------------------------------------------------------- /gc/ptst.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * ptst.c 3 | * 4 | * Per-thread state management. Essentially the state management parts 5 | * of MB's garbage-collection code have been pulled out and placed 6 | * here, for the use of other utility routines. 7 | * 8 | * Copyright (c) 2013, Jonatan Linden 9 | * Copyright (c) 2002-2003, K A Fraser 10 | * 11 | * All rights reserved. 12 | * 13 | * Redistribution and use in source and binary forms, with or without 14 | * modification, are permitted provided that the following conditions 15 | * are met: 16 | * 17 | * * Redistributions of source code must retain the above copyright 18 | * notice, this list of conditions and the following disclaimer. 19 | * 20 | * * Redistributions in binary form must reproduce the above 21 | * copyright notice, this list of conditions and the following 22 | * disclaimer in the documentation and/or other materials provided 23 | * with the distribution. 24 | * 25 | * * The name of the author may not be used to endorse or promote 26 | * products derived from this software without specific prior 27 | * written permission. 28 | * 29 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 30 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 31 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 33 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 35 | * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 38 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 39 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 | */ 41 | 42 | #include 43 | #include 44 | #include 45 | #include "random.h" 46 | #include "portable_defns.h" 47 | #include "ptst.h" 48 | 49 | ptst_t *ptst_list = NULL; 50 | extern __thread ptst_t *ptst; 51 | static unsigned int next_id = 0; 52 | 53 | void 54 | critical_enter() 55 | { 56 | ptst_t *next, *new_next; 57 | 58 | if ( ptst == NULL ) 59 | { 60 | ptst = (ptst_t *) ALIGNED_ALLOC(sizeof(ptst_t)); 61 | if ( ptst == NULL ) exit(1); 62 | 63 | memset(ptst, 0, sizeof(ptst_t)); 64 | ptst->gc = gc_init(); 65 | ptst->count = 1; 66 | ptst->id = __sync_fetch_and_add(&next_id, 1); 67 | rand_init(ptst); 68 | new_next = ptst_list; 69 | do { 70 | ptst->next = next = new_next; 71 | } 72 | while ( (new_next = __sync_val_compare_and_swap(&ptst_list, next, ptst)) != next ); 73 | } 74 | 75 | gc_enter(ptst); 76 | return; 77 | } 78 | 79 | 80 | 81 | static void ptst_destructor(ptst_t *ptst) 82 | { 83 | ptst->count = 0; 84 | } 85 | 86 | 87 | -------------------------------------------------------------------------------- /common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #if defined(__linux__) 14 | #include 15 | #include 16 | #include 17 | #endif 18 | 19 | #if defined(__APPLE__) 20 | #include 21 | #endif 22 | 23 | 24 | 25 | #define DCL_ALIGN __attribute__((aligned (2*CACHE_LINE_SIZE))) 26 | #define CACHELINE __attribute__((aligned (1*CACHE_LINE_SIZE))) 27 | 28 | #define ATPAGESIZE __attribute__((aligned (PAGESIZE))) 29 | 30 | #define SQR(x) (x)*(x) 31 | 32 | #define max(a,b) \ 33 | ({ __typeof__ (a) _a = (a); \ 34 | __typeof__ (b) _b = (b); \ 35 | _a > _b ? _a : _b; }) 36 | 37 | #define min(a,b) \ 38 | ({ __typeof__ (a) _a = (a); \ 39 | __typeof__ (b) _b = (b); \ 40 | _a < _b ? _a : _b; }) 41 | 42 | 43 | typedef struct thread_args_s 44 | { 45 | pthread_t thread; 46 | int id; 47 | unsigned short rng[3]; 48 | int measure; 49 | int cycles; 50 | char pad[128]; 51 | } thread_args_t; 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | #define E(c) \ 61 | do { \ 62 | int _c = (c); \ 63 | if (_c < 0) { \ 64 | fprintf(stderr, "E: %s: %d: %s\n", \ 65 | __FILE__, __LINE__, #c); \ 66 | } \ 67 | } while (0) 68 | 69 | #define E_en(c) \ 70 | do { \ 71 | int _c = (c); \ 72 | if (_c != 0) { \ 73 | fprintf(stderr, "%s", strerror(_c)); \ 74 | } \ 75 | } while (0) 76 | 77 | #define E_NULL(c) \ 78 | do { \ 79 | if ((c) == NULL) { \ 80 | perror("E_NULL"); \ 81 | } \ 82 | } while (0) 83 | 84 | 85 | #if defined(__x86_64__) 86 | /* accurate time measurements on late recent cpus */ 87 | static inline uint64_t __attribute__((always_inline)) 88 | read_tsc_p() 89 | { 90 | uint64_t tsc; 91 | __asm__ __volatile__ ("rdtscp\n" 92 | "shl $32, %%rdx\n" 93 | "or %%rdx, %%rax" 94 | : "=a"(tsc) 95 | : 96 | : "%rcx", "%rdx"); 97 | return tsc; 98 | } 99 | 100 | /* compiler memory barrier */ 101 | #define CMB() __asm__ __volatile__ ("" : : : "memory") 102 | 103 | #define IMB() __asm__ __volatile__("mfence":::"memory") 104 | #define IRMB() __asm__ __volatile__("lfence":::"memory") 105 | #define IWMB() __asm__ __volatile__("sfence":::"memory") 106 | 107 | #else 108 | #error Unsupported architecture 109 | #endif // __x86_64__ 110 | 111 | 112 | #if defined(__linux__) 113 | extern pid_t gettid(void); 114 | extern void pin(pid_t t, int cpu); 115 | #endif 116 | 117 | void rng_init (unsigned short rng[3]); 118 | extern void gettime(struct timespec *t); 119 | extern struct timespec timediff(struct timespec, struct timespec); 120 | 121 | 122 | #endif 123 | 124 | -------------------------------------------------------------------------------- /gc/intel_defns.h: -------------------------------------------------------------------------------- 1 | #ifndef __INTEL_DEFNS_H__ 2 | #define __INTEL_DEFNS_H__ 3 | 4 | #include 5 | #include 6 | 7 | #ifndef INTEL 8 | #define INTEL 9 | #endif 10 | 11 | #if 0 12 | #define pthread_mutex_init(_m,_i) \ 13 | ({ pthread_mutex_init(_m,_i); (_m)->__m_kind = PTHREAD_MUTEX_ADAPTIVE_NP; }) 14 | #endif 15 | 16 | 17 | /* 18 | * I. Compare-and-swap. 19 | */ 20 | 21 | /* 22 | * This is a strong barrier! Reads cannot be delayed beyond a later store. 23 | * Reads cannot be hoisted beyond a LOCK prefix. Stores always in-order. 24 | */ 25 | #define CAS(_a, _o, _n) \ 26 | ({ __typeof__(_o) __o = _o; \ 27 | __asm__ __volatile__( \ 28 | "lock cmpxchg %3,%1" \ 29 | : "=a" (__o), "=m" (*(volatile unsigned int *)(_a)) \ 30 | : "0" (__o), "r" (_n) ); \ 31 | __o; \ 32 | }) 33 | 34 | #define FAS(_a, _n) \ 35 | ({ __typeof__(_n) __o; \ 36 | __asm__ __volatile__( \ 37 | "lock xchg %0,%1" \ 38 | : "=r" (__o), "=m" (*(volatile unsigned int *)(_a)) \ 39 | : "0" (_n) ); \ 40 | __o; \ 41 | }) 42 | 43 | #define CAS64(_a, _o, _n) \ 44 | ({ __typeof__(_o) __o = _o; \ 45 | __asm__ __volatile__( \ 46 | "movl %3, %%ecx;" \ 47 | "movl %4, %%ebx;" \ 48 | "lock cmpxchg8b %1" \ 49 | : "=A" (__o), "=m" (*(volatile unsigned long long *)(_a)) \ 50 | : "0" (__o), "m" (_n >> 32), "m" (_n) \ 51 | : "ebx", "ecx" ); \ 52 | __o; \ 53 | }) 54 | 55 | /* Update Integer location, return Old value. */ 56 | #define CASIO CAS 57 | #define FASIO FAS 58 | /* Update Pointer location, return Old value. */ 59 | #define CASPO CAS 60 | #define FASPO FAS 61 | /* Update 32/64-bit location, return Old value. */ 62 | #define CAS32O CAS 63 | #define CAS64O CAS64 64 | 65 | /* 66 | * II. Memory barriers. 67 | * WMB(): All preceding write operations must commit before any later writes. 68 | * RMB(): All preceding read operations must commit before any later reads. 69 | * MB(): All preceding memory accesses must commit before any later accesses. 70 | * 71 | * If the compiler does not observe these barriers (but any sane compiler 72 | * will!), then VOLATILE should be defined as 'volatile'. 73 | */ 74 | 75 | #define MB() __sync_synchronize() 76 | #define WMB() __asm__ __volatile__ ("" : : : "memory") 77 | #define RMB() MB() 78 | #define VOLATILE /*volatile*/ 79 | 80 | /* On Intel, CAS is a strong barrier, but not a compile barrier. */ 81 | #define RMB_NEAR_CAS() WMB() 82 | #define WMB_NEAR_CAS() WMB() 83 | #define MB_NEAR_CAS() WMB() 84 | 85 | 86 | /* 87 | * III. Cycle counter access. 88 | */ 89 | 90 | typedef unsigned long long tick_t; 91 | 92 | static inline tick_t __attribute__((always_inline)) 93 | RDTICK() 94 | { tick_t __t; 95 | __asm__ __volatile__("rdtsc\n" 96 | "shl $32,%%rdx\n" 97 | "or %%rdx,%%rax" 98 | : "=a"(__t) 99 | : 100 | : "%rcx", "%rdx"); 101 | return __t; 102 | } 103 | 104 | 105 | 106 | 107 | /* 108 | * IV. Types. 109 | */ 110 | 111 | typedef unsigned char _u8; 112 | typedef unsigned short _u16; 113 | typedef unsigned int _u32; 114 | typedef unsigned long long _u64; 115 | 116 | #endif /* __INTEL_DEFNS_H__ */ 117 | -------------------------------------------------------------------------------- /unittests.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | 6 | #include "gc/gc.h" 7 | 8 | #include "prioq.h" 9 | #include "common.h" 10 | 11 | #define PER_THREAD 30 12 | 13 | static pq_t *pq; 14 | 15 | int nthreads; 16 | 17 | pthread_t *ts; 18 | 19 | void *add_thread(void *id); 20 | void *removemin_thread(void *id); 21 | void *invariant_thread(void *id); 22 | 23 | 24 | /* the different tests */ 25 | void test_parallel_add(void); 26 | void test_parallel_del(void); 27 | void test_invariants(void); 28 | 29 | typedef void (* test_func_t)(void); 30 | 31 | test_func_t tests[] = { 32 | test_parallel_del, 33 | test_parallel_add, 34 | // test_invariants, 35 | NULL 36 | }; 37 | 38 | void 39 | test_parallel_add() 40 | { 41 | printf("test parallel add, %d threads\n", nthreads); 42 | 43 | for (long i = 0; i < nthreads; i ++) 44 | pthread_create (&ts[i], NULL, add_thread, (void *)i); 45 | 46 | for (long i = 0; i < nthreads; i ++) 47 | (void)pthread_join (ts[i], NULL); 48 | 49 | unsigned long new, old = 0; 50 | for (long i = 0; i < nthreads * PER_THREAD; i++) { 51 | new = (long)deletemin(pq); 52 | assert (old < new); 53 | old = new; 54 | } 55 | 56 | printf("OK.\n"); 57 | } 58 | 59 | 60 | void 61 | test_parallel_del() 62 | { 63 | printf("test parallel del, %d threads\n", nthreads); 64 | 65 | for (long i = 0; i < nthreads * PER_THREAD; i++) 66 | insert(pq, i+1, (pval_t)i+1); 67 | 68 | for (long i = 0; i < nthreads; i ++) 69 | pthread_create (&ts[i], NULL, removemin_thread, (void *)i); 70 | 71 | for (long i = 0; i < nthreads; i ++) 72 | (void)pthread_join (ts[i], NULL); 73 | 74 | printf("OK.\n"); 75 | } 76 | 77 | void 78 | check_invariants(pq_t *pq) 79 | { 80 | 81 | node_t *cur, *pred; 82 | int cnt = 0; 83 | unsigned long long k = 0; 84 | int i = 0; 85 | 86 | /* Bottom level */ 87 | /* deleted prefix */ 88 | cur = pq->head->next[0]; 89 | while (is_marked_ref(cur)) { 90 | pred = get_unmarked_ref(cur); 91 | cur = pred->next[0]; 92 | cnt++; 93 | } 94 | 95 | pred = cur; 96 | cur = pred->next[0]; 97 | 98 | while (cur != pq->tail) { 99 | assert(!is_marked_ref(cur)); 100 | i = 1; 101 | /* pred and succ at each each level is ordered correctly */ 102 | while(i < cur->level && cur->next[i]) { 103 | assert(cur->k < cur->next[i]->k); 104 | i++; 105 | } 106 | assert(cur->k > k); 107 | k = cur->k; 108 | pred = cur; 109 | cur = pred->next[0]; 110 | cnt++; 111 | } 112 | 113 | /* Higher levels */ 114 | k = 0; 115 | for (int i = 31; i > 0; i--) { 116 | cur = get_unmarked_ref(pq->head->next[i]); 117 | while(cur != pq->tail) { 118 | cur = get_unmarked_ref(cur->next[i]); 119 | } 120 | } 121 | } 122 | 123 | /* test_invariants control of invariant threads */ 124 | volatile int halt = 0, stop = 0, abort_loop = 0; 125 | 126 | /* A rough way to test that certain invariants always are true. 127 | * Run a certain number of operations, halt, check invariants, 128 | * continue, halt, etc. 129 | * Specifically, it does not check that the invariants hold during 130 | * the execution of an operation. 131 | */ 132 | 133 | void 134 | test_invariants() 135 | { 136 | printf("test invariants, %d threads\n", nthreads); 137 | 138 | for (long i = 0; i < nthreads * PER_THREAD; i++) 139 | insert(pq, i+1, (pval_t)i + 1); 140 | 141 | for (long i = 0; i < nthreads; i ++) 142 | pthread_create (&ts[i], NULL, invariant_thread, (void *)i); 143 | 144 | for (int i = 0; i < 200; i++) { 145 | usleep(50000); 146 | halt = 1; 147 | while(stop < nthreads) { 148 | IRMB(); 149 | } 150 | printf("."); 151 | fflush(stdout); 152 | check_invariants(pq); 153 | stop = 0; 154 | halt = 0; 155 | IWMB(); 156 | 157 | } 158 | abort_loop = 1; 159 | 160 | for (long i = 0; i < nthreads; i ++) 161 | (void)pthread_join (ts[i], NULL); 162 | 163 | printf("\nOK.\n"); 164 | } 165 | 166 | void 167 | setup (int max_offset) 168 | { 169 | _init_gc_subsystem(); 170 | pq = pq_init(max_offset); 171 | } 172 | 173 | void 174 | teardown () 175 | { 176 | pq_destroy(pq); 177 | _destroy_gc_subsystem(); 178 | } 179 | 180 | int 181 | main(int argc, char **argv) 182 | { 183 | nthreads = 8; 184 | 185 | ts = malloc(nthreads * sizeof(pthread_t)); 186 | assert(ts); 187 | 188 | for(test_func_t *tf = tests; *tf; tf++) { 189 | setup(10); 190 | (*tf)(); 191 | teardown(); 192 | } 193 | 194 | return 0; 195 | } 196 | 197 | __thread unsigned short rng[3]; 198 | 199 | void * 200 | invariant_thread(void *_args) 201 | { 202 | unsigned long id = (unsigned long)_args; 203 | unsigned long elem; 204 | int cnt = 0; 205 | 206 | rng_init(rng); 207 | 208 | while(!abort_loop) { 209 | if (halt) { 210 | __sync_fetch_and_add(&stop, 1); 211 | while(halt) 212 | IRMB(); 213 | } 214 | if (erand48(rng) < 0.5) { 215 | elem = nrand48(rng); 216 | insert(pq, elem+1, (pval_t)elem + 1); 217 | } else { 218 | deletemin(pq); 219 | } 220 | cnt++; 221 | } 222 | return NULL; 223 | } 224 | 225 | 226 | 227 | void * 228 | add_thread(void *id) 229 | { 230 | long base = PER_THREAD * (long)id; 231 | for(int i = 0; i < PER_THREAD; i++) 232 | insert(pq, base+i+1, (pval_t) base+i+1); 233 | return NULL; 234 | } 235 | 236 | 237 | void * 238 | removemin_thread(void *id) 239 | { 240 | unsigned long v, ov = 0; 241 | for(int i = 0; i < PER_THREAD; i++) { 242 | v = (unsigned long) deletemin(pq); 243 | assert(v > ov); 244 | ov = v; 245 | } 246 | return NULL; 247 | } 248 | 249 | 250 | -------------------------------------------------------------------------------- /perf_meas.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Priority queue test harness. 3 | * 4 | * 5 | * Copyright (c) 2013-2018, Jonatan Linden 6 | * 7 | */ 8 | 9 | #define _GNU_SOURCE 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | #include "gc/gc.h" 21 | 22 | #include "common.h" 23 | #include "prioq.h" 24 | 25 | /* check your cpu core numbering before pinning */ 26 | #define PIN 27 | 28 | #define DEFAULT_SECS 10 29 | #define DEFAULT_NTHREADS 1 30 | #define DEFAULT_OFFSET 32 31 | #define DEFAULT_SIZE 1<<15 32 | #define EXPS 100000000 33 | 34 | #define THREAD_ARGS_FOREACH(_iter) \ 35 | for (int i = 0; i < nthreads && (_iter = &ts[i]); i++) 36 | 37 | 38 | /* preload array with exponentially distanced integers for the 39 | * DES workload */ 40 | unsigned long *exps; 41 | int exps_pos = 0; 42 | void gen_exps(unsigned long *arr, unsigned short rng[3], int len, int intensity); 43 | 44 | /* the workloads */ 45 | void work_exp (pq_t *pq); 46 | void work_uni (pq_t *pq); 47 | 48 | void *run (void *_args); 49 | 50 | 51 | void (* work)(pq_t *pq); 52 | thread_args_t *ts; 53 | pq_t *pq; 54 | 55 | volatile int wait_barrier = 0; 56 | volatile int loop = 0; 57 | 58 | 59 | static void 60 | usage(FILE *out, const char *argv0) 61 | { 62 | fprintf(out, "Usage: %s [OPTION]...\n" 63 | "\n" 64 | "Options:\n", argv0); 65 | 66 | fprintf(out, "\t-h\t\tDisplay usage.\n"); 67 | fprintf(out, "\t-t SECS\t\tRun for SECS seconds. " 68 | "Default: %i\n", 69 | DEFAULT_SECS); 70 | fprintf(out, "\t-o OFFSET\tUse an offset of OFFSET nodes. Sensible " 71 | "\n\t\t\tvalues could be 16 for 8 threads, 128 for 32 threads. " 72 | "\n\t\t\tDefault: %i\n", 73 | DEFAULT_OFFSET); 74 | fprintf(out, "\t-n NUM\t\tUse NUM threads. " 75 | "Default: %i\n", 76 | DEFAULT_NTHREADS); 77 | fprintf(out, "\t-s SIZE\t\tInitialize queue with SIZE elements. " 78 | "Default: %i\n", 79 | DEFAULT_SIZE); 80 | } 81 | 82 | 83 | 84 | static inline unsigned long 85 | next_geometric (unsigned short seed[3], unsigned int p) 86 | { 87 | /* inverse transform sampling */ 88 | /* cf. https://en.wikipedia.org/wiki/Geometric_distribution */ 89 | return floor(log(erand48(seed))/log(1 - p)); 90 | /* uniformly distributed bits => geom. dist. level, p = 0.5 */ 91 | //return __builtin_ctz(nrand48(seed) & (1LU << max) - 1) + 1; 92 | } 93 | 94 | 95 | int 96 | main (int argc, char **argv) 97 | { 98 | int opt; 99 | unsigned short rng[3]; 100 | struct timespec time; 101 | struct timespec start, end; 102 | thread_args_t *t; 103 | unsigned long elem; 104 | 105 | extern char *optarg; 106 | extern int optind, optopt; 107 | int nthreads = DEFAULT_NTHREADS; 108 | int offset = DEFAULT_OFFSET; 109 | int secs = DEFAULT_SECS; 110 | int exp = 0; 111 | int init_size = DEFAULT_SIZE; 112 | int concise = 0; 113 | work = work_uni; 114 | 115 | while ((opt = getopt(argc, argv, "t:n:o:s:hex")) >= 0) { 116 | switch (opt) { 117 | case 'n': nthreads = atoi(optarg); break; 118 | case 't': secs = atoi(optarg); break; 119 | case 'o': offset = atoi(optarg); break; 120 | case 's': init_size = atoi(optarg); break; 121 | case 'x': concise = 1; break; 122 | case 'e': exp = 1; work = work_exp; break; 123 | case 'h': usage(stdout, argv[0]); exit(EXIT_SUCCESS); break; 124 | } 125 | } 126 | 127 | #ifndef PIN 128 | printf("Running without threads pinned to cores.\n"); 129 | #endif 130 | 131 | E_NULL(ts = malloc(nthreads*sizeof(thread_args_t))); 132 | memset(ts, 0, nthreads*sizeof(thread_args_t)); 133 | 134 | // finally available in macos 10.12 as well! 135 | clock_gettime(CLOCK_REALTIME, &time); 136 | 137 | /* initialize seed */ 138 | rng[0] = time.tv_nsec; 139 | rng[1] = time.tv_nsec >> 16; 140 | rng[2] = time.tv_nsec >> 32; 141 | 142 | /* initialize garbage collection */ 143 | _init_gc_subsystem(); 144 | pq = pq_init(offset); 145 | 146 | // if DES workload, pre-sample values/event times 147 | if (exp) { 148 | E_NULL(exps = (unsigned long *)malloc(sizeof(unsigned long) * EXPS)); 149 | gen_exps(exps, rng, EXPS, 1000); 150 | } 151 | 152 | /* pre-fill priority queue with elements */ 153 | for (int i = 0; i < init_size; i++) { 154 | if (exp) { 155 | elem = exps[exps_pos++]; 156 | insert(pq, elem, (void *)elem); 157 | } else { 158 | elem = nrand48(rng); 159 | insert(pq, elem, (void *)elem); 160 | } 161 | } 162 | 163 | 164 | /* initialize threads */ 165 | THREAD_ARGS_FOREACH(t) { 166 | t->id = i; 167 | rng_init(t->rng); 168 | E_en(pthread_create(&t->thread, NULL, run, t)); 169 | } 170 | 171 | /* RUN BENCHMARK */ 172 | 173 | /* wait for all threads to call in */ 174 | while (wait_barrier != nthreads) ; 175 | IRMB(); 176 | gettime(&start); 177 | loop = 1; 178 | IWMB(); 179 | /* Process might sleep longer than specified, 180 | * but this will be accounted for. */ 181 | usleep( 1000000 * secs ); 182 | loop = 0; /* halt all threads */ 183 | IWMB(); 184 | gettime(&end); 185 | 186 | /* END RUN BENCHMARK */ 187 | 188 | THREAD_ARGS_FOREACH(t) { 189 | pthread_join(t->thread, NULL); 190 | } 191 | 192 | /* PRINT PERF. MEASURES */ 193 | int sum = 0, min = INT_MAX, max =0; 194 | 195 | THREAD_ARGS_FOREACH(t) { 196 | sum += t->measure; 197 | min = min(min, t->measure); 198 | max = max(max, t->measure); 199 | } 200 | struct timespec elapsed = timediff(start, end); 201 | double dt = elapsed.tv_sec + (double)elapsed.tv_nsec / 1000000000.0; 202 | 203 | 204 | if (!concise) { 205 | printf("Total time:\t%1.8f s\n", dt); 206 | printf("Ops:\t\t%d\n", sum); 207 | printf("Ops/s:\t\t%.0f\n", (double) sum / dt); 208 | printf("Min ops/t:\t%d\n", min); 209 | printf("Max ops/t:\t%d\n", max); 210 | } else { 211 | printf("%li\n", lround((double) sum / dt)); 212 | 213 | } 214 | 215 | /* CLEANUP */ 216 | pq_destroy(pq); 217 | free (ts); 218 | _destroy_gc_subsystem(); 219 | } 220 | 221 | 222 | __thread thread_args_t *args; 223 | 224 | /* uniform workload */ 225 | void 226 | work_uni (pq_t *pq) 227 | { 228 | unsigned long elem; 229 | 230 | if (erand48(args->rng) < 0.5) { 231 | elem = (unsigned long)1 + nrand48(args->rng); 232 | insert(pq, elem, (void *)elem); 233 | } else 234 | deletemin(pq); 235 | } 236 | 237 | /* DES workload */ 238 | void 239 | work_exp (pq_t *pq) 240 | { 241 | int pos; 242 | unsigned long elem; 243 | deletemin(pq); 244 | pos = __sync_fetch_and_add(&exps_pos, 1); 245 | elem = exps[pos]; 246 | insert(pq, elem, (void *)elem); 247 | } 248 | 249 | 250 | void * 251 | run (void *_args) 252 | { 253 | args = (thread_args_t *)_args; 254 | int cnt = 0; 255 | 256 | 257 | #if defined(PIN) && defined(__linux__) 258 | /* Straight allocation on 32 core machine. 259 | * Check with your OS + machine. */ 260 | pin (gettid(), args->id/8 + 4*(args->id % 8)); 261 | #endif 262 | 263 | // call in to main thread 264 | __sync_fetch_and_add(&wait_barrier, 1); 265 | 266 | // wait until signaled by main thread 267 | while (!loop); 268 | /* start benchmark execution */ 269 | do { 270 | work(pq); 271 | cnt++; 272 | } while (loop); 273 | /* end of measured execution */ 274 | 275 | args->measure = cnt; 276 | return NULL; 277 | } 278 | 279 | 280 | /* generate array of exponentially distributed variables */ 281 | void 282 | gen_exps(unsigned long *arr, unsigned short rng[3], int len, int intensity) 283 | { 284 | int i = 0; 285 | arr[0] = 2; 286 | while (++i < len) 287 | arr[i] = arr[i-1] + 288 | next_geometric(rng, intensity); 289 | } 290 | 291 | 292 | 293 | -------------------------------------------------------------------------------- /prioq_model.pml: -------------------------------------------------------------------------------- 1 | /***** 2 | * 3 | * Verification of the linearizability of the Linden-Jonsson priority 4 | * queue at presented in the paper, and that the algorithm implements a 5 | * priority queue. 6 | * 7 | * Adapted from Martin Vechev et al., Experience with Model Checking 8 | * Linearizability, 2009. 9 | * 10 | * Copyright (c) 2018, Jonatan Lindén 11 | * 12 | * All rights reserved. 13 | * 14 | * Redistribution and use in source and binary forms, with or without 15 | * modification, are permitted provided that the following conditions are met: 16 | * 17 | * * Redistributions of source code must retain the above copyright 18 | * notice, this list of conditions and the following disclaimer. 19 | * 20 | * * Redistributions in binary form must reproduce the above copyright 21 | * notice, this list of conditions and the following disclaimer in the 22 | * documentation and/or other materials provided with the distribution. 23 | * 24 | * * The name of the author may not be used to endorse or promote products 25 | * derived from this software without specific prior written permission. 26 | * 27 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 28 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 29 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 30 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 31 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 32 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 33 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 35 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 36 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | * POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | #define IF if :: 41 | #define FI :: else fi 42 | 43 | #define CAS(a, d, o, n) \ 44 | cas_success = 0; \ 45 | if :: (d == 0 && a == o) -> a = n; cas_success = 1; \ 46 | :: else fi 47 | 48 | #define FAO(a,v) \ 49 | a; a = v; 50 | 51 | #define WHILE do :: 52 | #define ELIHW :: else -> break; od 53 | 54 | #define GCASSERT(new, old) \ 55 | assert(nodes[new].recycled == 0 || nodes[old].recycled); 56 | 57 | #define NLEVELS 3 /* 3 level skiplist */ 58 | #define THREADS 3 /* 3 threads */ 59 | 60 | #define MAX_KEY 10 61 | 62 | #define MAX_OPS 2 /* no. of random ops per thread */ 63 | #define BOUNDOFFSET 2 /* restructure offset */ 64 | 65 | #define NODES 12 /* total memory */ 66 | 67 | /* Operation types. */ 68 | #define INS 0 69 | #define DEL 1 70 | 71 | /* types */ 72 | #define key_t byte 73 | #define idx_t byte 74 | 75 | typedef node_t { 76 | key_t key; 77 | byte level; 78 | bit inserting; 79 | bit recycled; 80 | /* the following 2 fields are colocated in one mem pos, 81 | * and should be treated as such. */ 82 | bit d; 83 | idx_t next[NLEVELS]; 84 | } 85 | 86 | typedef queue_t { 87 | idx_t head, tail; 88 | } 89 | 90 | /* this is the memory */ 91 | node_t nodes[NODES]; 92 | 93 | /********** declaration of global variables *************/ 94 | 95 | queue_t q; /* the priority queue */ 96 | byte seqq[NODES]; /* the sequential spec. */ 97 | idx_t glob_entry; /* pointer to free memory */ 98 | 99 | 100 | /********* sequential specification **************/ 101 | 102 | /* adding */ 103 | inline seq_add(entry, k) { 104 | assert(seqq[k] == 0); 105 | seqq[k] = 1; 106 | } 107 | 108 | /* removing - element should be the smallest */ 109 | inline seq_remove(kl) { 110 | assert(seqq[kl]); 111 | for (j : 0..kl-1) { 112 | assert(seqq[j] == 0); 113 | } 114 | seqq[kl] = seqq[kl] - 1; 115 | } 116 | /* if empty, no entry in queue */ 117 | inline seq_empty() { 118 | for (j : 0..(NODES-1)) { 119 | assert(seqq[j] == 0); 120 | } 121 | } 122 | 123 | /************* Handling nodes/memory *****************/ 124 | 125 | inline get_entry(ptr) 126 | { 127 | d_step{ 128 | ptr = glob_entry; 129 | assert(ptr < NODES - 1); 130 | glob_entry++; 131 | } 132 | } 133 | 134 | /* return index pointing to a node being free to use */ 135 | inline alloc_node(new, k) 136 | { 137 | atomic { 138 | get_entry(new); 139 | nodes[new].key = k; 140 | select(i : 0..(NLEVELS - 1)); /* ok, since called before locatepreds */ 141 | nodes[new].level = i; 142 | nodes[new].inserting = 1; 143 | } 144 | } 145 | 146 | 147 | /******************************************************************* 148 | * BEGIN PRIORITY QUEUE ALGORITHM 149 | *******************************************************************/ 150 | 151 | 152 | /* CAS(addr, d, old, new) - representing a CAS, that will update addr 153 | * to new, given that addr = old and d = 0. d represents hence the 154 | * delete bit being a part of old. */ 155 | 156 | /* FAO(addr, val) - representing a Fetch-and-Or, that will update 157 | * addr to *addr | val. */ 158 | 159 | inline LocatePreds(key) { 160 | d_step { /* resetting some local vars */ 161 | cur = 0; pred = 0; d = 0; del = 0; 162 | i = NLEVELS; pred = q.head 163 | } 164 | /* NB: index i is offset by one in comparison to paper, 165 | * due to lack of negative bytes in promela */ 166 | WHILE (i > 0) -> /* for each level */ 167 | d_step { /* colocated together */ 168 | cur = nodes[pred].next[i-1]; 169 | d = nodes[pred].d 170 | } 171 | WHILE (nodes[cur].key < key || nodes[cur].d || (d && i == 1)) -> 172 | atomic { 173 | IF (d && i == 1) -> del = cur FI; 174 | pred = cur; /* local */ 175 | /* colocated together */ 176 | cur = nodes[pred].next[i-1]; 177 | d = nodes[pred].d 178 | } 179 | ELIHW; 180 | atomic { /* local vars */ 181 | preds[i-1] = pred; 182 | succs[i-1] = cur; 183 | i-- /* descend to next level */ 184 | } 185 | ELIHW 186 | } 187 | 188 | inline Insert(key) { 189 | alloc_node(new, key) 190 | 191 | retry: 192 | LocatePreds(key) 193 | 194 | nodes[new].next[0] = succs[0]; 195 | /* Lowest level */ 196 | atomic { /* linearization point of non-failed insert */ 197 | CAS(nodes[preds[0]].next[0], nodes[preds[0]].d, succs[0], new); 198 | if :: (cas_success) -> 199 | seq_add(new, key) 200 | GCASSERT(succs[0], new) 201 | :: else -> goto retry /* restart */ 202 | fi 203 | } 204 | /* swing upper levels */ 205 | j = 1; /* i is being used in locatepreds */ 206 | WHILE (j <= nodes[new].level) -> 207 | nodes[new].next[j] = succs[j]; 208 | IF (nodes[new].d || nodes[succs[i]].d || succs[i] == del) -> goto end_insert FI; 209 | atomic { 210 | CAS(nodes[preds[j]].next[j], 0, succs[j], new); 211 | IF (cas_success) -> 212 | GCASSERT(succs[j], new) 213 | j++ 214 | FI 215 | } 216 | IF (!cas_success) -> 217 | LocatePreds(key) /* update preds, succs and del */ 218 | IF (succs[0] != new) -> goto end_insert FI 219 | FI 220 | ELIHW; 221 | end_insert: 222 | nodes[new].inserting = 0 223 | } 224 | 225 | inline Restructure() { 226 | i = NLEVELS - 1; pred = q.head; 227 | re_continue: 228 | WHILE (i > 0) -> 229 | h = nodes[q.head].next[i]; 230 | cur = nodes[pred].next[i]; 231 | IF (!nodes[h].d) -> i--; goto re_continue FI; 232 | WHILE (nodes[cur].d) -> 233 | pred = cur; 234 | cur = nodes[pred].next[i] 235 | ELIHW; 236 | atomic { 237 | CAS(nodes[q.head].next[i], 0, h, nodes[pred].next[i]); 238 | IF (cas_success) -> 239 | GCASSERT(nodes[pred].next[i], q.head) 240 | i-- 241 | FI 242 | } 243 | ELIHW 244 | } 245 | 246 | inline DeleteMin () { 247 | d_step { 248 | d = 1; x = q.head; offset = 0; 249 | obshead = nodes[x].next[0] 250 | } 251 | WHILE (d) -> 252 | atomic { 253 | offset ++; 254 | /* nxt & d colocated */ 255 | nxt = nodes[x].next[0]; 256 | d = nodes[x].d; 257 | IF (nxt == q.tail) -> 258 | /* empty: got linearized when reading nxt */ 259 | seq_empty() 260 | goto end_remove 261 | FI 262 | } 263 | IF (nodes[x].inserting && newhead == NODES) -> 264 | newhead = x 265 | FI; 266 | atomic { 267 | /* linearization point */ 268 | d = FAO(nodes[x].d, 1) 269 | IF (!d) -> 270 | /* check linearization */ 271 | key = nodes[nodes[x].next[0]].key; 272 | seq_remove(key) 273 | FI 274 | } 275 | x = nodes[x].next[0] 276 | ELIHW; 277 | IF (offset <= BOUNDOFFSET) -> goto end_remove FI; 278 | IF (newhead == NODES) -> newhead = x FI; 279 | atomic { 280 | CAS(nodes[q.head].next[0], 0, obshead,newhead); 281 | if :: (cas_success) -> GCASSERT(newhead, q.head) 282 | :: else -> goto end_remove 283 | fi 284 | } 285 | Restructure() 286 | cur = obshead; 287 | WHILE (cur != newhead) -> 288 | nxt = nodes[cur].next[0]; 289 | nodes[cur].recycled = 1; /* MarkRecycle */ 290 | cur = nxt 291 | ELIHW; 292 | end_remove: 293 | } 294 | 295 | 296 | /******************************************************************* 297 | * END ALGORITHM 298 | *******************************************************************/ 299 | 300 | 301 | 302 | 303 | 304 | /* Random key generator that generates unique keys 305 | * 0 is taken by head sentinel node 306 | * MAX_KEY is taken by tail sentinel node, and should be > keys[*] */ 307 | 308 | bit keys[MAX_KEY] = 1; 309 | 310 | inline pick_key(var) { 311 | atomic { 312 | if :: (keys[1] == 1) -> keys[1] = 0; var = 1 313 | :: (keys[2] == 1) -> keys[2] = 0; var = 2 314 | :: (keys[3] == 1) -> keys[3] = 0; var = 3 315 | :: (keys[4] == 1) -> keys[4] = 0; var = 4 316 | :: (keys[5] == 1) -> keys[5] = 0; var = 5 317 | :: (keys[6] == 1) -> keys[6] = 0; var = 6 318 | :: (keys[7] == 1) -> keys[7] = 0; var = 7 319 | :: (keys[8] == 1) -> keys[8] = 0; var = 8 320 | :: (keys[9] == 1) -> keys[9] = 0; var = 9 321 | fi; 322 | } 323 | } 324 | 325 | inline start_op() { 326 | init_locals(); 327 | }; 328 | 329 | inline end_op() { 330 | d_step { 331 | key = 0; 332 | op = 0; 333 | new = 0; 334 | } 335 | } 336 | 337 | inline exec_op(key) { 338 | start_op(); 339 | assert(key < NODES); 340 | if 341 | :: op = INS; 342 | pick_key(key); 343 | Insert (key); 344 | :: op = DEL; 345 | DeleteMin(); 346 | fi; 347 | end_op(); 348 | } 349 | 350 | 351 | inline execute() 352 | { 353 | byte _dummy1; 354 | for (_dummy1 : 1..(MAX_OPS)) { 355 | exec_op(key); 356 | } 357 | } 358 | 359 | inline init_locals() 360 | { 361 | d_step { 362 | pred = 0; 363 | cur = 0; 364 | d = 0; 365 | preds[0] = 0; 366 | preds[1] = 0; 367 | preds[2] = 0; 368 | succs[0] = 0; 369 | succs[1] = 0; 370 | succs[2] = 0; 371 | op = 0; 372 | offset = 0; 373 | obshead = 0; 374 | del = 0; /* ok, succs will never be 0 */ 375 | cas_success = 0; 376 | h = 0; 377 | i = 0; 378 | j = 0; 379 | new = 0; 380 | key = 0; 381 | x = 0; 382 | nxt = 0; 383 | newhead = NODES; 384 | } 385 | } 386 | 387 | inline define_locals() 388 | { 389 | idx_t pred, cur, obshead, offset, newhead, h, x, nxt; 390 | idx_t preds[NLEVELS], succs[NLEVELS], del; 391 | byte i,j; 392 | bit op, d, cas_success; 393 | byte key; 394 | 395 | idx_t new; 396 | init_locals(); 397 | } 398 | 399 | 400 | proctype client() { 401 | define_locals(); 402 | execute(); 403 | } 404 | 405 | 406 | inline init_globals() 407 | { 408 | /* init the structure */ 409 | atomic { 410 | glob_entry = 0; 411 | /* tail */ 412 | alloc_node(new, MAX_KEY); 413 | q.tail = new; 414 | nodes[q.tail].level = 1; 415 | nodes[q.tail].inserting = 0; 416 | 417 | alloc_node(new, 0); 418 | q.head = new; 419 | nodes[q.head].level = 1; 420 | nodes[q.head].inserting = 0; 421 | for (j : 0..2) { /* levels */ 422 | nodes[q.head].next[j] = q.tail; 423 | }; 424 | } 425 | } 426 | 427 | 428 | init { 429 | atomic{ 430 | byte _dummy0; 431 | define_locals(); 432 | init_globals(); 433 | /* run n - 1 threads as proctype */ 434 | for ( _dummy0 : 1..(THREADS - 1)) { 435 | run client(); 436 | } 437 | } 438 | /* and run last thread here */ 439 | execute(); 440 | 441 | /* wait until the other process finishes. */ 442 | _nr_pr == 1; 443 | i = nodes[q.head].next[0]; 444 | printf("h, %d -> ", nodes[q.head].d); 445 | do :: (i != q.tail) -> 446 | printf("%d,%d ->", nodes[i].key, nodes[i].d); 447 | i = nodes[i].next[0]; 448 | :: else -> break; 449 | od; 450 | printf("t\n"); 451 | } 452 | 453 | -------------------------------------------------------------------------------- /prioq.c: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | * prioq.c 3 | * 4 | * Lock-free concurrent priority queue. 5 | * 6 | * Copyright (c) 2012-2014, Jonatan Linden 7 | * 8 | * Adapted from Keir Fraser's skiplist, 9 | * Copyright (c) 2001-2003, Keir Fraser 10 | * 11 | * Keir Fraser's skiplist is available at 12 | * http://www.cl.cam.ac.uk/research/srg/netos/lock-free/. 13 | * 14 | * Redistribution and use in source and binary forms, with or without 15 | * modification, are permitted provided that the following conditions 16 | * are met: 17 | * 18 | * * Redistributions of source code must retain the above copyright 19 | * notice, this list of conditions and the following disclaimer. 20 | * 21 | * * Redistributions in binary form must reproduce the above 22 | * copyright notice, this list of conditions and the following 23 | * disclaimer in the documentation and/or other materials provided 24 | * with the distribution. 25 | * 26 | * * The name of the author may not be used to endorse or promote 27 | * products derived from this software without specific prior 28 | * written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 31 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 34 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 36 | * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 37 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 38 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 39 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 40 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 | */ 42 | 43 | #include 44 | #include 45 | 46 | /* keir fraser's garbage collection */ 47 | #include "gc/ptst.h" 48 | 49 | /* some utilities (e.g. memory barriers) */ 50 | #include "common.h" 51 | 52 | /* interface, constant defines, and typedefs */ 53 | #include "prioq.h" 54 | 55 | 56 | /* thread state. */ 57 | __thread ptst_t *ptst; 58 | 59 | static int gc_id[NUM_LEVELS]; 60 | 61 | 62 | /* initialize new node */ 63 | static node_t * 64 | alloc_node() 65 | { 66 | node_t *n; 67 | /* crappy lcg rng */ 68 | unsigned int r = ptst->rand; 69 | ptst->rand = r * 1103515245 + 12345; 70 | r &= (1u << (NUM_LEVELS - 1)) - 1; 71 | /* uniformly distributed bits => geom. dist. level, p = 0.5 */ 72 | int level = __builtin_ctz(r) + 1; 73 | assert(1 <= level && level <= 32); 74 | 75 | n = gc_alloc(ptst, gc_id[level - 1]); 76 | n->level = level; 77 | n->inserting = 1; 78 | memset(n->next, 0, level * sizeof(node_t *)); 79 | return n; 80 | } 81 | 82 | 83 | /* Mark node as ready for reclamation to the garbage collector. */ 84 | static void 85 | free_node(node_t *n) 86 | { 87 | gc_free(ptst, (void *)n, gc_id[(n->level) - 1]); 88 | } 89 | 90 | 91 | /***** locate_preds ***** 92 | * Record predecessors and non-deleted successors of key k. If k is 93 | * encountered during traversal of list, the node will be in succs[0]. 94 | * 95 | * To detect skew in insert operation, return a pointer to the only 96 | * deleted node not having it's delete flag set. 97 | * 98 | * Skew example illustration, when locating 3. Level 1 is shifted in 99 | * relation to level 0, due to not noticing that s[1] is deleted until 100 | * level 0 is reached. (pointers in illustration are implicit, e.g., 101 | * 0 --> 7 at level 2.) 102 | * 103 | * del 104 | * p[0] 105 | * p[2] p[1] s[1] s[0] s[2] 106 | * | | | | | 107 | * v | | | v 108 | * _ v v | _ 109 | * | | _ _ v | | 110 | * | | | | _ | | _ | | 111 | * | | | | | | | | | | | | 112 | * 0 1 2 4 6 7 113 | * d d d 114 | * 115 | */ 116 | 117 | static node_t * 118 | locate_preds(pq_t * restrict pq, pkey_t k, node_t ** restrict preds, node_t ** restrict succs) 119 | { 120 | node_t *x, *x_next, *del = NULL; 121 | int d = 0, i; 122 | 123 | x = pq->head; 124 | i = NUM_LEVELS - 1; 125 | while (i >= 0) 126 | { 127 | x_next = x->next[i]; 128 | d = is_marked_ref(x_next); 129 | x_next = get_unmarked_ref(x_next); 130 | assert(x_next != NULL); 131 | 132 | while (x_next->k < k || is_marked_ref(x_next->next[0]) 133 | || ((i == 0) && d)) { 134 | /* Record bottom level deleted node not having delete flag 135 | * set, if traversed. */ 136 | if (i == 0 && d) 137 | del = x_next; 138 | x = x_next; 139 | x_next = x->next[i]; 140 | d = is_marked_ref(x_next); 141 | x_next = get_unmarked_ref(x_next); 142 | assert(x_next != NULL); 143 | } 144 | preds[i] = x; 145 | succs[i] = x_next; 146 | i--; 147 | } 148 | return del; 149 | } 150 | 151 | /***** insert ***** 152 | * Insert a new node n with key k and value v. 153 | * The node will not be inserted if another node with key k is already 154 | * present in the list. 155 | * 156 | * The predecessors, preds, and successors, succs, at all levels are 157 | * recorded, after which the node n is inserted from bottom to 158 | * top. Conditioned on that succs[i] is still the successor of 159 | * preds[i], n will be spliced in on level i. 160 | */ 161 | void 162 | insert(pq_t *pq, pkey_t k, pval_t v) 163 | { 164 | node_t *preds[NUM_LEVELS], *succs[NUM_LEVELS]; 165 | node_t *new = NULL, *del = NULL; 166 | 167 | assert(SENTINEL_KEYMIN < k && k < SENTINEL_KEYMAX); 168 | critical_enter(); 169 | 170 | /* Initialise a new node for insertion. */ 171 | new = alloc_node(); 172 | new->k = k; 173 | new->v = v; 174 | 175 | /* lowest level insertion retry loop */ 176 | retry: 177 | del = locate_preds(pq, k, preds, succs); 178 | 179 | /* return if key already exists, i.e., is present in a non-deleted 180 | * node */ 181 | if (succs[0]->k == k && !is_marked_ref(preds[0]->next[0]) && preds[0]->next[0] == succs[0]) { 182 | new->inserting = 0; 183 | free_node(new); 184 | goto out; 185 | } 186 | new->next[0] = succs[0]; 187 | 188 | /* The node is logically inserted once it is present at the bottom 189 | * level. */ 190 | if (!__sync_bool_compare_and_swap(&preds[0]->next[0], succs[0], new)) { 191 | /* either succ has been deleted (modifying preds[0]), 192 | * or another insert has succeeded or preds[0] is head, 193 | * and a restructure operation has updated it */ 194 | goto retry; 195 | } 196 | 197 | /* Insert at each of the other levels in turn. */ 198 | int i = 1; 199 | while ( i < new->level) 200 | { 201 | /* If successor of new is deleted, we're done. (We're done if 202 | * only new is deleted as well, but this we can't tell) If a 203 | * candidate successor at any level is deleted, we consider 204 | * the operation completed. */ 205 | if (is_marked_ref(new->next[0]) || 206 | is_marked_ref(succs[i]->next[0]) || 207 | del == succs[i]) 208 | goto success; 209 | 210 | /* prepare next pointer of new node */ 211 | new->next[i] = succs[i]; 212 | if (!__sync_bool_compare_and_swap(&preds[i]->next[i], succs[i], new)) 213 | { 214 | /* failed due to competing insert or restructure */ 215 | del = locate_preds(pq, k, preds, succs); 216 | 217 | /* if new has been deleted, we're done */ 218 | if (succs[0] != new) goto success; 219 | 220 | } else { 221 | /* Succeeded at this level. */ 222 | i++; 223 | } 224 | } 225 | success: 226 | if (new) { 227 | /* this flag must be reset *after* all CAS have completed */ 228 | new->inserting = 0; 229 | } 230 | 231 | out: 232 | critical_exit(); 233 | } 234 | 235 | 236 | /***** restructure ***** 237 | * 238 | * Update the head node's pointers from level 1 and up. Will locate 239 | * the last node at each level that has the delete flag set, and set 240 | * the head to point to the successor of that node. After completion, 241 | * if operating in isolation, for each level i, it holds that 242 | * head->next[i-1] is before or equal to head->next[i]. 243 | * 244 | * Illustration valid state after completion: 245 | * 246 | * h[0] h[1] h[2] 247 | * | | | 248 | * | | v 249 | * _ | v _ 250 | * | | _ v _ | | 251 | * | | | | _ | | | | 252 | * | | | | | | | | | | 253 | * d d 254 | * 255 | */ 256 | static void 257 | restructure(pq_t *pq) 258 | { 259 | node_t *pred, *cur, *h; 260 | int i = NUM_LEVELS - 1; 261 | 262 | pred = pq->head; 263 | while (i > 0) { 264 | /* the order of these reads must be maintained */ 265 | h = pq->head->next[i]; /* record observed head */ 266 | CMB(); 267 | cur = pred->next[i]; /* take one step forward from pred */ 268 | if (!is_marked_ref(h->next[0])) { 269 | i--; 270 | continue; 271 | } 272 | /* traverse level until non-marked node is found 273 | * pred will always have its delete flag set 274 | */ 275 | while(is_marked_ref(cur->next[0])) { 276 | pred = cur; 277 | cur = pred->next[i]; 278 | } 279 | assert(is_marked_ref(pred->next[0])); 280 | 281 | /* swing head pointer */ 282 | if (__sync_bool_compare_and_swap(&pq->head->next[i],h,cur)) 283 | i--; 284 | } 285 | } 286 | 287 | 288 | /* deletemin 289 | * 290 | * Delete element with smallest key in queue. 291 | * Try to update the head node's pointers, if offset > max_offset. 292 | * 293 | * Traverse level 0 next pointers until one is found that does 294 | * not have the delete bit set. 295 | */ 296 | pval_t 297 | deletemin(pq_t *pq) 298 | { 299 | pval_t v = NULL; 300 | node_t *x, *nxt, *obs_head = NULL, *newhead, *cur; 301 | int offset, lvl; 302 | 303 | newhead = NULL; 304 | offset = lvl = 0; 305 | 306 | critical_enter(); 307 | 308 | x = pq->head; 309 | obs_head = x->next[0]; 310 | 311 | do { 312 | offset++; 313 | 314 | /* expensive, high probability that this cache line has 315 | * been modified */ 316 | nxt = x->next[0]; 317 | 318 | // tail cannot be deleted 319 | if (get_unmarked_ref(nxt) == pq->tail) { 320 | goto out; 321 | } 322 | 323 | /* Do not allow head to point past a node currently being 324 | * inserted. This makes the lock-freedom quite a theoretic 325 | * matter. */ 326 | if (newhead == NULL && x->inserting) newhead = x; 327 | 328 | /* optimization */ 329 | if (is_marked_ref(nxt)) continue; 330 | /* the marker is on the preceding pointer */ 331 | /* linearisation point deletemin */ 332 | nxt = __sync_fetch_and_or(&x->next[0], 1); 333 | } 334 | while ( (x = get_unmarked_ref(nxt)) && is_marked_ref(nxt) ); 335 | 336 | assert(!is_marked_ref(x)); 337 | 338 | v = x->v; 339 | 340 | 341 | /* If no inserting node was traversed, then use the latest 342 | * deleted node as the new lowest-level head pointed node 343 | * candidate. */ 344 | if (newhead == NULL) newhead = x; 345 | 346 | /* if the offset is big enough, try to update the head node and 347 | * perform memory reclamation */ 348 | if (offset <= pq->max_offset) goto out; 349 | 350 | /* Optimization. Marginally faster */ 351 | if (pq->head->next[0] != obs_head) goto out; 352 | 353 | /* try to swing the lowest level head pointer to point to newhead, 354 | * which is deleted */ 355 | if (__sync_bool_compare_and_swap(&pq->head->next[0], obs_head, get_marked_ref(newhead))) 356 | { 357 | /* Update higher level pointers. */ 358 | restructure(pq); 359 | 360 | /* We successfully swung the upper head pointer. The nodes 361 | * between the observed head (obs_head) and the new bottom 362 | * level head pointed node (newhead) are guaranteed to be 363 | * non-live. Mark them for recycling. */ 364 | 365 | cur = get_unmarked_ref(obs_head); 366 | while (cur != get_unmarked_ref(newhead)) { 367 | nxt = get_unmarked_ref(cur->next[0]); 368 | assert(is_marked_ref(cur->next[0])); 369 | free_node(cur); 370 | cur = nxt; 371 | } 372 | } 373 | out: 374 | critical_exit(); 375 | return v; 376 | } 377 | 378 | /* 379 | * Init structure, setup sentinel head and tail nodes. 380 | */ 381 | pq_t * 382 | pq_init(int max_offset) 383 | { 384 | pq_t *pq; 385 | node_t *t, *h; 386 | int i; 387 | 388 | /* head and tail nodes */ 389 | t = calloc(1, sizeof *t + (NUM_LEVELS-1)*sizeof(node_t *)); 390 | h = calloc(1, sizeof *h + (NUM_LEVELS-1)*sizeof(node_t *)); 391 | 392 | t->inserting = 0; 393 | h->inserting = 0; 394 | 395 | t->k = SENTINEL_KEYMAX; 396 | h->k = SENTINEL_KEYMIN; 397 | h->level = NUM_LEVELS; 398 | t->level = NUM_LEVELS; 399 | 400 | for ( i = 0; i < NUM_LEVELS; i++ ) 401 | h->next[i] = t; 402 | 403 | pq = malloc(sizeof *pq); 404 | pq->head = h; 405 | pq->tail = t; 406 | pq->max_offset = max_offset; 407 | 408 | for (int i = 0; i < NUM_LEVELS; i++ ) 409 | gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(node_t *)); 410 | 411 | return pq; 412 | } 413 | 414 | /* Cleanup, mark all the nodes for recycling. */ 415 | void 416 | pq_destroy(pq_t *pq) 417 | { 418 | node_t *cur, *pred; 419 | cur = pq->head; 420 | while (cur != pq->tail) { 421 | pred = cur; 422 | cur = get_unmarked_ref(pred->next[0]); 423 | free_node(pred); 424 | } 425 | free(pq->tail); 426 | free(pq->head); 427 | free(pq); 428 | } 429 | 430 | 431 | 432 | 433 | 434 | -------------------------------------------------------------------------------- /gc/gc.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * gc.c 3 | * 4 | * A fully recycling epoch-based garbage collector. Works by counting 5 | * threads in and out of critical regions, to work out when 6 | * garbage queues can be fully deleted. 7 | * 8 | * Copyright (c) 2018, Jonatan Lindén 9 | * Copyright (c) 2001-2003, K A Fraser 10 | * 11 | * All rights reserved. 12 | * 13 | * Redistribution and use in source and binary forms, with or without 14 | * modification, are permitted provided that the following conditions are met: 15 | * 16 | * * Redistributions of source code must retain the above copyright 17 | * notice, this list of conditions and the following disclaimer. 18 | * 19 | * * Redistributions in binary form must reproduce the above copyright 20 | * notice, this list of conditions and the following disclaimer in the 21 | * documentation and/or other materials provided with the distribution. 22 | * 23 | * * The name of the author may not be used to endorse or promote products 24 | * derived from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 27 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 28 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 29 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 30 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 31 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 34 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 35 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 | * POSSIBILITY OF SUCH DAMAGE. 37 | */ 38 | 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include "portable_defns.h" 46 | #include "gc.h" 47 | 48 | /*#define MINIMAL_GC*/ 49 | /*#define YIELD_TO_HELP_PROGRESS*/ 50 | //#define PROFILE_GC 51 | 52 | /* Recycled nodes are filled with this value if WEAK_MEM_ORDER. */ 53 | #define INVALID_BYTE 0 54 | #define INITIALISE_NODES(_p,_c) memset((_p), INVALID_BYTE, (_c)); 55 | 56 | /* Number of unique block sizes we can deal with. */ 57 | #define MAX_SIZES 32 58 | 59 | #define MAX_HOOKS 4 60 | 61 | /* 62 | * The initial number of allocation chunks for each per-blocksize list. 63 | * Popular allocation lists will steadily increase the allocation unit 64 | * in line with demand. 65 | */ 66 | #define ALLOC_CHUNKS_PER_LIST 10 67 | 68 | /* 69 | * How many times should a thread call gc_enter(), seeing the same epoch 70 | * each time, before it makes a reclaim attempt? 71 | */ 72 | #define ENTRIES_PER_RECLAIM_ATTEMPT 100 73 | 74 | /* 75 | * 0: current epoch -- threads are moving to this; 76 | * -1: some threads may still throw garbage into this epoch; 77 | * -2: no threads can see this epoch => we can zero garbage lists; 78 | * -3: all threads see zeros in these garbage lists => move to alloc lists. 79 | */ 80 | #ifdef WEAK_MEM_ORDER 81 | #define NR_EPOCHS 4 82 | #else 83 | #define NR_EPOCHS 3 84 | #endif 85 | 86 | /* 87 | * A chunk amortises the cost of allocation from shared lists. It also 88 | * helps when zeroing nodes, as it increases per-cacheline pointer density 89 | * and means that node locations don't need to be brought into the cache 90 | * (most architectures have a non-temporal store instruction). 91 | */ 92 | #define BLKS_PER_CHUNK 100 93 | typedef struct chunk_st chunk_t; 94 | struct chunk_st 95 | { 96 | chunk_t *next; /* chunk chaining */ 97 | unsigned int i; /* the next entry in blk[] to use */ 98 | void *blk[BLKS_PER_CHUNK]; 99 | }; 100 | 101 | static struct gc_global_st 102 | { 103 | CACHE_PAD(0); 104 | 105 | /* The current epoch. */ 106 | VOLATILE unsigned int current; 107 | CACHE_PAD(1); 108 | 109 | /* Exclusive access to gc_reclaim(). */ 110 | VOLATILE unsigned int inreclaim; 111 | CACHE_PAD(2); 112 | 113 | /* 114 | * RUN-TIME CONSTANTS (to first approximation) 115 | */ 116 | 117 | /* Memory page size, in bytes. */ 118 | unsigned int page_size; 119 | 120 | /* Node sizes (run-time constants). */ 121 | int nr_sizes; 122 | int blk_sizes[MAX_SIZES]; 123 | 124 | /* Registered epoch hooks. */ 125 | int nr_hooks; 126 | hook_fn_t hook_fns[MAX_HOOKS]; 127 | CACHE_PAD(3); 128 | 129 | /* 130 | * DATA WE MAY HIT HARD 131 | */ 132 | 133 | /* Chain of free, empty chunks. */ 134 | chunk_t * VOLATILE free_chunks; 135 | 136 | /* Main allocation lists. */ 137 | chunk_t * VOLATILE alloc[MAX_SIZES]; 138 | VOLATILE unsigned int alloc_size[MAX_SIZES]; 139 | #ifdef PROFILE_GC 140 | VOLATILE unsigned int total_size; 141 | VOLATILE unsigned int allocations; 142 | #endif 143 | } gc_global; 144 | 145 | 146 | /* Per-thread state. */ 147 | struct gc_st 148 | { 149 | /* Epoch that this thread sees. */ 150 | unsigned int epoch; 151 | 152 | /* Number of calls to gc_entry() since last gc_reclaim() attempt. */ 153 | unsigned int entries_since_reclaim; 154 | 155 | #ifdef YIELD_TO_HELP_PROGRESS 156 | /* Number of calls to gc_reclaim() since we last yielded. */ 157 | unsigned int reclaim_attempts_since_yield; 158 | #endif 159 | 160 | /* Used by gc_async_barrier(). */ 161 | void *async_page; 162 | int async_page_state; 163 | 164 | /* Garbage lists. */ 165 | chunk_t *garbage[NR_EPOCHS][MAX_SIZES]; 166 | chunk_t *garbage_tail[NR_EPOCHS][MAX_SIZES]; 167 | chunk_t *chunk_cache; 168 | 169 | /* Local allocation lists. */ 170 | chunk_t *alloc[MAX_SIZES]; 171 | unsigned int alloc_chunks[MAX_SIZES]; 172 | 173 | /* Hook pointer lists. */ 174 | chunk_t *hook[NR_EPOCHS][MAX_HOOKS]; 175 | }; 176 | 177 | 178 | #define MEM_FAIL(_s) \ 179 | do { \ 180 | fprintf(stderr, "OUT OF MEMORY: %lu bytes at line %d\n", (_s), __LINE__); \ 181 | exit(1); \ 182 | } while ( 0 ) 183 | 184 | 185 | /* Allocate more empty chunks from the heap. */ 186 | #define CHUNKS_PER_ALLOC 1000 187 | static chunk_t *alloc_more_chunks(void) 188 | { 189 | int i; 190 | chunk_t *h, *p; 191 | 192 | h = p = ALIGNED_ALLOC(CHUNKS_PER_ALLOC * sizeof(*h)); 193 | if ( h == NULL ) MEM_FAIL(CHUNKS_PER_ALLOC * sizeof(*h)); 194 | 195 | for ( i = 1; i < CHUNKS_PER_ALLOC; i++ ) 196 | { 197 | p->next = p + 1; 198 | p++; 199 | } 200 | 201 | p->next = h; 202 | 203 | return(h); 204 | } 205 | 206 | 207 | /* Put a chain of chunks onto a list. */ 208 | static void add_chunks_to_list(chunk_t *ch, chunk_t *head) 209 | { 210 | chunk_t *h_next, *new_h_next, *ch_next; 211 | ch_next = ch->next; 212 | new_h_next = head->next; 213 | do { ch->next = h_next = new_h_next; WMB_NEAR_CAS(); } 214 | while ( (new_h_next = CASPO(&head->next, h_next, ch_next)) != h_next ); 215 | } 216 | 217 | 218 | /* Allocate a chain of @n empty chunks. Pointers may be garbage. */ 219 | static chunk_t *get_empty_chunks(int n) 220 | { 221 | int i; 222 | chunk_t *new_rh, *rh, *rt, *head; 223 | 224 | retry: 225 | head = gc_global.free_chunks; 226 | new_rh = head->next; 227 | do { 228 | rh = new_rh; 229 | rt = head; 230 | WEAK_DEP_ORDER_RMB(); 231 | for ( i = 0; i < n; i++ ) 232 | { 233 | if ( (rt = rt->next) == head ) 234 | { 235 | /* Allocate some more chunks. */ 236 | add_chunks_to_list(alloc_more_chunks(), head); 237 | goto retry; 238 | } 239 | } 240 | } 241 | while ( (new_rh = CASPO(&head->next, rh, rt->next)) != rh ); 242 | 243 | rt->next = rh; 244 | return(rh); 245 | } 246 | 247 | 248 | /* Get @n filled chunks, pointing at blocks of @sz bytes each. */ 249 | static chunk_t *get_filled_chunks(unsigned int n, unsigned int sz) 250 | { 251 | chunk_t *h, *p; 252 | char *node; 253 | int i; 254 | 255 | #ifdef PROFILE_GC 256 | ADD_TO(gc_global.total_size, n * BLKS_PER_CHUNK * sz); 257 | ADD_TO(gc_global.allocations, 1); 258 | #endif 259 | 260 | node = ALIGNED_ALLOC(n * BLKS_PER_CHUNK * sz); 261 | if ( node == NULL ) MEM_FAIL((unsigned long) n * BLKS_PER_CHUNK * sz); 262 | #ifdef WEAK_MEM_ORDER 263 | INITIALISE_NODES(node, n * BLKS_PER_CHUNK * sz); 264 | #endif 265 | 266 | h = p = get_empty_chunks(n); 267 | do { 268 | p->i = BLKS_PER_CHUNK; 269 | for ( i = 0; i < BLKS_PER_CHUNK; i++ ) 270 | { 271 | p->blk[i] = node; 272 | node += sz; 273 | } 274 | } 275 | while ( (p = p->next) != h ); 276 | 277 | return(h); 278 | } 279 | 280 | 281 | /* 282 | * gc_async_barrier: Cause an asynchronous barrier in all other threads. We do 283 | * this by causing a TLB shootdown to be propagated to all other processors. 284 | * Each time such an action is required, this function calls: 285 | * mprotect(async_page, , ) 286 | * Each thread's state contains a memory page dedicated for this purpose. 287 | */ 288 | #ifdef WEAK_MEM_ORDER 289 | static void gc_async_barrier(gc_t *gc) 290 | { 291 | mprotect(gc->async_page, gc_global.page_size, 292 | gc->async_page_state ? PROT_READ : PROT_NONE); 293 | gc->async_page_state = !gc->async_page_state; 294 | } 295 | #else 296 | #define gc_async_barrier(_g) ((void)0) 297 | #endif 298 | 299 | 300 | /* Grab a level @i allocation chunk from main chain. */ 301 | static chunk_t *get_alloc_chunk(gc_t *gc, int i) 302 | { 303 | chunk_t *alloc, *p, *new_p, *nh; 304 | unsigned int sz; 305 | 306 | alloc = gc_global.alloc[i]; 307 | new_p = alloc->next; 308 | 309 | do { 310 | p = new_p; 311 | while ( p == alloc ) 312 | { 313 | sz = gc_global.alloc_size[i]; 314 | nh = get_filled_chunks(sz, gc_global.blk_sizes[i]); 315 | ADD_TO(gc_global.alloc_size[i], sz >> 3); 316 | gc_async_barrier(gc); 317 | add_chunks_to_list(nh, alloc); 318 | p = alloc->next; 319 | } 320 | WEAK_DEP_ORDER_RMB(); 321 | } 322 | while ( (new_p = CASPO(&alloc->next, p, p->next)) != p ); 323 | 324 | p->next = p; 325 | assert(p->i == BLKS_PER_CHUNK); 326 | return(p); 327 | } 328 | 329 | 330 | #ifndef MINIMAL_GC 331 | /* 332 | * gc_reclaim: Scans the list of struct gc_perthread looking for the lowest 333 | * maximum epoch number seen by a thread that's in the list code. If it's the 334 | * current epoch, the "nearly-free" lists from the previous epoch are 335 | * reclaimed, and the epoch is incremented. 336 | */ 337 | static void gc_reclaim(ptst_t * our_ptst) 338 | { 339 | ptst_t *ptst, *first_ptst; //, *our_ptst = NULL; 340 | gc_t *gc = NULL; 341 | unsigned long curr_epoch; 342 | chunk_t *ch, *t; 343 | int two_ago, three_ago, i, j; 344 | 345 | /* Barrier to entering the reclaim critical section. */ 346 | if ( gc_global.inreclaim || CASIO(&gc_global.inreclaim, 0, 1) ) return; 347 | 348 | /* 349 | * Grab first ptst structure *before* barrier -- prevent bugs 350 | * on weak-ordered architectures. 351 | */ 352 | first_ptst = ptst_first(); 353 | MB(); 354 | curr_epoch = gc_global.current; 355 | 356 | /* Have all threads seen the current epoch, or not in mutator code? */ 357 | for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) ) 358 | { 359 | if ( (ptst->count > 1) && (ptst->gc->epoch != curr_epoch) ) goto out; 360 | } 361 | 362 | /* 363 | * Three-epoch-old garbage lists move to allocation lists. 364 | * Two-epoch-old garbage lists are cleaned out. 365 | */ 366 | two_ago = (curr_epoch+2) % NR_EPOCHS; 367 | three_ago = (curr_epoch+1) % NR_EPOCHS; 368 | //if ( gc_global.nr_hooks != 0 ) 369 | //our_ptst = (ptst_t *)pthread_getspecific(ptst_key); 370 | for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) ) 371 | { 372 | gc = ptst->gc; 373 | 374 | for ( i = 0; i < gc_global.nr_sizes; i++ ) 375 | { 376 | /* NB. Leave one chunk behind, as it is probably not yet full. */ 377 | t = gc->garbage[three_ago][i]; 378 | if ( (t == NULL) || ((ch = t->next) == t) ) continue; 379 | gc->garbage_tail[three_ago][i]->next = ch; 380 | gc->garbage_tail[three_ago][i] = t; 381 | t->next = t; 382 | add_chunks_to_list(ch, gc_global.alloc[i]); 383 | } 384 | 385 | for ( i = 0; i < gc_global.nr_hooks; i++ ) 386 | { 387 | hook_fn_t fn = gc_global.hook_fns[i]; 388 | ch = gc->hook[three_ago][i]; 389 | if ( ch == NULL ) continue; 390 | gc->hook[three_ago][i] = NULL; 391 | 392 | t = ch; 393 | do { for ( j = 0; j < t->i; j++ ) fn(our_ptst, t->blk[j]); } 394 | while ( (t = t->next) != ch ); 395 | 396 | add_chunks_to_list(ch, gc_global.free_chunks); 397 | } 398 | } 399 | 400 | /* Update current epoch. */ 401 | WMB(); 402 | gc_global.current = (curr_epoch+1) % NR_EPOCHS; 403 | 404 | out: 405 | gc_global.inreclaim = 0; 406 | } 407 | #endif /* MINIMAL_GC */ 408 | 409 | 410 | void *gc_alloc(ptst_t *ptst, int alloc_id) 411 | { 412 | gc_t *gc = ptst->gc; 413 | chunk_t *ch; 414 | 415 | ch = gc->alloc[alloc_id]; 416 | if ( ch->i == 0 ) 417 | { 418 | if ( gc->alloc_chunks[alloc_id]++ == 100 ) 419 | { 420 | gc->alloc_chunks[alloc_id] = 0; 421 | add_chunks_to_list(ch, gc_global.free_chunks); 422 | gc->alloc[alloc_id] = ch = get_alloc_chunk(gc, alloc_id); 423 | } 424 | else 425 | { 426 | chunk_t *och = ch; 427 | ch = get_alloc_chunk(gc, alloc_id); 428 | ch->next = och->next; 429 | och->next = ch; 430 | gc->alloc[alloc_id] = ch; 431 | } 432 | } 433 | 434 | return ch->blk[--ch->i]; 435 | } 436 | 437 | 438 | static chunk_t *chunk_from_cache(gc_t *gc) 439 | { 440 | chunk_t *ch = gc->chunk_cache, *p = ch->next; 441 | 442 | if ( ch == p ) 443 | { 444 | gc->chunk_cache = get_empty_chunks(100); 445 | } 446 | else 447 | { 448 | ch->next = p->next; 449 | p->next = p; 450 | } 451 | 452 | p->i = 0; 453 | return(p); 454 | } 455 | 456 | 457 | void gc_free(ptst_t *ptst, void *p, int alloc_id) 458 | { 459 | #ifndef MINIMAL_GC 460 | gc_t *gc = ptst->gc; 461 | chunk_t *prev, *new, *ch = gc->garbage[gc->epoch][alloc_id]; 462 | 463 | if ( ch == NULL ) 464 | { 465 | gc->garbage[gc->epoch][alloc_id] = ch = chunk_from_cache(gc); 466 | gc->garbage_tail[gc->epoch][alloc_id] = ch; 467 | } 468 | else if ( ch->i == BLKS_PER_CHUNK ) 469 | { 470 | prev = gc->garbage_tail[gc->epoch][alloc_id]; 471 | new = chunk_from_cache(gc); 472 | gc->garbage[gc->epoch][alloc_id] = new; 473 | new->next = ch; 474 | prev->next = new; 475 | ch = new; 476 | } 477 | 478 | ch->blk[ch->i++] = p; 479 | #endif 480 | } 481 | 482 | 483 | void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id) 484 | { 485 | gc_t *gc = ptst->gc; 486 | chunk_t *och, *ch = gc->hook[gc->epoch][hook_id]; 487 | 488 | if ( ch == NULL ) 489 | { 490 | gc->hook[gc->epoch][hook_id] = ch = chunk_from_cache(gc); 491 | } 492 | else 493 | { 494 | ch = ch->next; 495 | if ( ch->i == BLKS_PER_CHUNK ) 496 | { 497 | och = gc->hook[gc->epoch][hook_id]; 498 | ch = chunk_from_cache(gc); 499 | ch->next = och->next; 500 | och->next = ch; 501 | } 502 | } 503 | 504 | ch->blk[ch->i++] = ptr; 505 | } 506 | 507 | 508 | void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id) 509 | { 510 | gc_t *gc = ptst->gc; 511 | chunk_t *ch; 512 | 513 | ch = gc->alloc[alloc_id]; 514 | if ( ch->i < BLKS_PER_CHUNK ) 515 | { 516 | ch->blk[ch->i++] = p; 517 | } 518 | else 519 | { 520 | gc_free(ptst, p, alloc_id); 521 | } 522 | } 523 | 524 | 525 | void gc_enter(ptst_t *ptst) 526 | { 527 | #ifdef MINIMAL_GC 528 | ptst->count++; 529 | MB(); 530 | #else 531 | gc_t *gc = ptst->gc; 532 | int new_epoch, cnt; 533 | 534 | retry: 535 | cnt = ptst->count++; 536 | MB(); 537 | if ( cnt == 1 ) 538 | { 539 | new_epoch = gc_global.current; 540 | if ( gc->epoch != new_epoch ) 541 | { 542 | gc->epoch = new_epoch; 543 | gc->entries_since_reclaim = 0; 544 | #ifdef YIELD_TO_HELP_PROGRESS 545 | gc->reclaim_attempts_since_yield = 0; 546 | #endif 547 | } 548 | else if ( gc->entries_since_reclaim++ == 100 ) 549 | { 550 | ptst->count--; 551 | #ifdef YIELD_TO_HELP_PROGRESS 552 | if ( gc->reclaim_attempts_since_yield++ == 10000 ) 553 | { 554 | gc->reclaim_attempts_since_yield = 0; 555 | sched_yield(); 556 | } 557 | #endif 558 | gc->entries_since_reclaim = 0; 559 | gc_reclaim(ptst); 560 | goto retry; 561 | } 562 | } 563 | #endif 564 | } 565 | 566 | 567 | void gc_exit(ptst_t *ptst) 568 | { 569 | MB(); 570 | ptst->count--; 571 | } 572 | 573 | 574 | gc_t *gc_init(void) 575 | { 576 | gc_t *gc; 577 | int i; 578 | 579 | gc = ALIGNED_ALLOC(sizeof(*gc)); 580 | if ( gc == NULL ) MEM_FAIL(sizeof(*gc)); 581 | memset(gc, 0, sizeof(*gc)); 582 | 583 | #ifdef WEAK_MEM_ORDER 584 | /* Initialise shootdown state. */ 585 | gc->async_page = mmap(NULL, gc_global.page_size, PROT_NONE, 586 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 587 | if ( gc->async_page == (void *)MAP_FAILED ) MEM_FAIL(gc_global.page_size); 588 | gc->async_page_state = 1; 589 | #endif 590 | 591 | gc->chunk_cache = get_empty_chunks(100); 592 | 593 | /* Get ourselves a set of allocation chunks. */ 594 | for ( i = 0; i < gc_global.nr_sizes; i++ ) 595 | { 596 | gc->alloc[i] = get_alloc_chunk(gc, i); 597 | } 598 | for ( ; i < MAX_SIZES; i++ ) 599 | { 600 | gc->alloc[i] = chunk_from_cache(gc); 601 | } 602 | 603 | return(gc); 604 | } 605 | 606 | 607 | int gc_add_allocator(unsigned int alloc_size) 608 | { 609 | int ni, i = gc_global.nr_sizes; 610 | while ( (ni = CASIO(&gc_global.nr_sizes, i, i+1)) != i ) i = ni; 611 | gc_global.blk_sizes[i] = alloc_size; 612 | gc_global.alloc_size[i] = ALLOC_CHUNKS_PER_LIST; 613 | gc_global.alloc[i] = get_filled_chunks(ALLOC_CHUNKS_PER_LIST, alloc_size); 614 | return i; 615 | } 616 | 617 | 618 | void gc_remove_allocator(int alloc_id) 619 | { 620 | /* This is a no-op for now. */ 621 | } 622 | 623 | 624 | int gc_add_hook(hook_fn_t fn) 625 | { 626 | int ni, i = gc_global.nr_hooks; 627 | while ( (ni = CASIO(&gc_global.nr_hooks, i, i+1)) != i ) i = ni; 628 | gc_global.hook_fns[i] = fn; 629 | return i; 630 | } 631 | 632 | 633 | void gc_remove_hook(int hook_id) 634 | { 635 | /* This is a no-op for now. */ 636 | } 637 | 638 | 639 | void _destroy_gc_subsystem(void) 640 | { 641 | #ifdef PROFILE_GC 642 | printf("Total heap: %u bytes (%.2fMB) in %u allocations\n", 643 | gc_global.total_size, (double)gc_global.total_size / 1000000, 644 | gc_global.allocations); 645 | #endif 646 | } 647 | 648 | 649 | void _init_gc_subsystem(void) 650 | { 651 | memset(&gc_global, 0, sizeof(gc_global)); 652 | 653 | gc_global.page_size = (unsigned int)sysconf(_SC_PAGESIZE); 654 | gc_global.free_chunks = alloc_more_chunks(); 655 | 656 | gc_global.nr_hooks = 0; 657 | gc_global.nr_sizes = 0; 658 | } 659 | --------------------------------------------------------------------------------