├── datatype └── nstring.c ├── include ├── common.h ├── datatype.h ├── hashtable.h ├── hazard.h ├── list.h ├── lwt.h ├── map.h ├── mem.h ├── murmur.h ├── nstring.h ├── rcu.h ├── runtime.h ├── skiplist.h ├── tls.h └── txn.h ├── license.txt ├── makefile ├── map ├── hashtable.c ├── list.c ├── map.c ├── skiplist.c └── unsafe_skiplist.c ├── perf.sh ├── runtime ├── hazard.c ├── lwt.c ├── mem.c ├── mem2.c ├── mem_class_calc.c ├── random.c ├── rcu.c ├── rlocal.h └── runtime.c ├── test ├── CuTest-license.txt ├── CuTest.c ├── CuTest.h ├── haz_test.c ├── map_test1.c ├── map_test2.c ├── perf_test.c ├── rcu_test.c └── txn_test.c ├── todo └── txn └── txn.c /datatype/nstring.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "nstring.h" 3 | #include "murmur.h" 4 | #include "mem.h" 5 | 6 | const datatype_t DATATYPE_NSTRING = { (cmp_fun_t)ns_cmp, (hash_fun_t)ns_hash, (clone_fun_t)ns_dup }; 7 | 8 | nstring_t *ns_alloc (uint32_t len) { 9 | nstring_t *ns = nbd_malloc(sizeof(nstring_t) + len); 10 | ns->len = len; 11 | return ns; 12 | } 13 | 14 | int ns_cmp (const nstring_t *ns1, const nstring_t *ns2) { 15 | int d = memcmp(ns1->data, ns2->data, (ns1->len < ns2->len) ? ns1->len : ns1->len); 16 | return (d == 0) ? ns1->len - ns2->len : d; 17 | } 18 | 19 | uint32_t ns_hash (const nstring_t *ns) { 20 | return murmur32(ns->data, ns->len); 21 | } 22 | 23 | nstring_t *ns_dup (const nstring_t *ns1) { 24 | nstring_t *ns2 = ns_alloc(ns1->len); 25 | memcpy(ns2->data, ns1->data, ns1->len); 26 | return ns2; 27 | } 28 | -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | */ 5 | #ifndef COMMON_H 6 | #define COMMON_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define CACHE_LINE_SIZE 64 // 64 byte cache line on x86 and x86-64 15 | #define CACHE_LINE_SCALE 6 // log base 2 of the cache line size 16 | 17 | #define EXPECT_TRUE(x) __builtin_expect(!!(x), 1) 18 | #define EXPECT_FALSE(x) __builtin_expect(!!(x), 0) 19 | 20 | #ifndef NBD_SINGLE_THREADED 21 | 22 | #define MAX_NUM_THREADS 32 // make this whatever you want, but make it a power of 2 23 | 24 | #define SYNC_SWAP(addr,x) __sync_lock_test_and_set(addr,x) 25 | #define SYNC_CAS(addr,old,x) __sync_val_compare_and_swap(addr,old,x) 26 | #define SYNC_ADD(addr,n) __sync_add_and_fetch(addr,n) 27 | #define SYNC_FETCH_AND_OR(addr,x) __sync_fetch_and_or(addr,x) 28 | #else// NBD_SINGLE_THREADED 29 | 30 | #define MAX_NUM_THREADS 1 31 | 32 | #define SYNC_SWAP(addr,x) ({ typeof(*(addr)) _old = *(addr); *(addr) = (x); _old; }) 33 | #define SYNC_CAS(addr,old,x) ({ typeof(*(addr)) _old = *(addr); *(addr) = (x); _old; }) 34 | //#define SYNC_CAS(addr,old,x) ({ typeof(*(addr)) _old = *(addr); if ((old) == _old) { *(addr) = (x); } _old; }) 35 | #define SYNC_ADD(addr,n) ({ typeof(*(addr)) _old = *(addr); *(addr) += (n); _old; }) 36 | #define SYNC_FETCH_AND_OR(addr,x) ({ typeof(*(addr)) _old = *(addr); *(addr) |= (x); _old; }) 37 | 38 | #endif//NBD_SINGLE_THREADED 39 | 40 | #define COUNT_TRAILING_ZEROS __builtin_ctz 41 | 42 | #define MASK(n) ((1ULL << (n)) - 1) 43 | 44 | #define TRUE 1 45 | #define FALSE 0 46 | 47 | #ifdef NBD32 48 | #define TAG1 (1U << 31) 49 | #define TAG2 (1U << 30) 50 | #else 51 | #define TAG1 (1ULL << 63) 52 | #define TAG2 (1ULL << 62) 53 | #endif 54 | #define TAG_VALUE(v, tag) ((v) | tag) 55 | #define IS_TAGGED(v, tag) ((v) & tag) 56 | #define STRIP_TAG(v, tag) ((v) & ~tag) 57 | 58 | #define DOES_NOT_EXIST 0 59 | #define ERROR_INVALID_OPTION (-1) 60 | #define ERROR_INVALID_ARGUMENT (-2) 61 | #define ERROR_UNSUPPORTED_FEATURE (-3) 62 | #define ERROR_TXN_NOT_RUNNING (-4) 63 | 64 | #define VOLATILE_DEREF(x) (*((volatile typeof(x))(x))) 65 | 66 | typedef unsigned long long uint64_t; 67 | typedef unsigned int uint32_t; 68 | typedef unsigned short uint16_t; 69 | typedef unsigned char uint8_t; 70 | 71 | typedef size_t markable_t; 72 | 73 | static inline uint64_t rdtsc (void) { 74 | unsigned l, u; 75 | __asm__ __volatile__("rdtsc" : "=a" (l), "=d" (u)); 76 | return ((uint64_t)u << 32) | l; 77 | } 78 | 79 | #include "lwt.h" 80 | #endif //COMMON_H 81 | -------------------------------------------------------------------------------- /include/datatype.h: -------------------------------------------------------------------------------- 1 | #ifndef DATATYPE_H 2 | #define DATATYPE_H 3 | 4 | typedef int (*cmp_fun_t) (void *, void *); 5 | typedef void * (*clone_fun_t) (void *); 6 | typedef uint32_t (*hash_fun_t) (void *); 7 | 8 | typedef struct datatype { 9 | cmp_fun_t cmp; 10 | hash_fun_t hash; 11 | clone_fun_t clone; 12 | } datatype_t; 13 | 14 | #endif//DATATYPE_H 15 | -------------------------------------------------------------------------------- /include/hashtable.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHTABLE_H 2 | #define HASHTABLE_H 3 | 4 | #include "map.h" 5 | 6 | typedef struct ht hashtable_t; 7 | typedef struct ht_iter ht_iter_t; 8 | 9 | hashtable_t * ht_alloc (const datatype_t *key_type); 10 | map_val_t ht_cas (hashtable_t *ht, map_key_t key, map_val_t expected_val, map_val_t val); 11 | map_val_t ht_get (hashtable_t *ht, map_key_t key); 12 | map_val_t ht_remove (hashtable_t *ht, map_key_t key); 13 | size_t ht_count (hashtable_t *ht); 14 | void ht_print (hashtable_t *ht, int verbose); 15 | void ht_free (hashtable_t *ht); 16 | ht_iter_t * ht_iter_begin (hashtable_t *ht, map_key_t key); 17 | map_val_t ht_iter_next (ht_iter_t *iter, map_key_t *key_ptr); 18 | void ht_iter_free (ht_iter_t *iter); 19 | 20 | static const map_impl_t MAP_IMPL_HT = { 21 | (map_alloc_t)ht_alloc, (map_cas_t)ht_cas, (map_get_t)ht_get, (map_remove_t)ht_remove, 22 | (map_count_t)ht_count, (map_print_t)ht_print, (map_free_t)ht_free, 23 | (map_iter_begin_t)ht_iter_begin, (map_iter_next_t)ht_iter_next, (map_iter_free_t)ht_iter_free 24 | }; 25 | 26 | #endif//HASHTABLE_H 27 | -------------------------------------------------------------------------------- /include/hazard.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * hazard pointers 6 | * 7 | * www.research.ibm.com/people/m/michael/ieeetpds-2004.pdf 8 | * 9 | */ 10 | #ifndef HAZARD_H 11 | #define HAZARD_H 12 | 13 | #define STATIC_HAZ_PER_THREAD 2 14 | 15 | typedef void (*free_t) (void *); 16 | typedef void *haz_t; 17 | 18 | //static inline void haz_set (volatile haz_t *haz, void *x) { *haz = x; haz_t y = *haz; y = y; } 19 | 20 | static inline void haz_set (volatile haz_t *haz, void *x) { *haz = x; __asm__ __volatile__("mfence"); } 21 | 22 | haz_t *haz_get_static (int n); 23 | void haz_register_dynamic (haz_t *haz); 24 | void haz_unregister_dynamic (haz_t *haz); 25 | void haz_defer_free (void *p, free_t f); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /include/list.h: -------------------------------------------------------------------------------- 1 | #ifndef LIST_H 2 | #define LIST_H 3 | 4 | #include "map.h" 5 | 6 | typedef struct ll list_t; 7 | typedef struct ll_iter ll_iter_t; 8 | 9 | list_t * ll_alloc (const datatype_t *key_type); 10 | map_val_t ll_cas (list_t *ll, map_key_t key, map_val_t expected_val, map_val_t new_val); 11 | map_val_t ll_lookup (list_t *ll, map_key_t key); 12 | map_val_t ll_remove (list_t *ll, map_key_t key); 13 | size_t ll_count (list_t *ll); 14 | void ll_print (list_t *ll, int verbose); 15 | void ll_free (list_t *ll); 16 | map_key_t ll_min_key (list_t *sl); 17 | 18 | ll_iter_t * ll_iter_begin (list_t *ll, map_key_t key); 19 | map_val_t ll_iter_next (ll_iter_t *iter, map_key_t *key_ptr); 20 | void ll_iter_free (ll_iter_t *iter); 21 | 22 | static const map_impl_t MAP_IMPL_LL = { 23 | (map_alloc_t)ll_alloc, (map_cas_t)ll_cas, (map_get_t)ll_lookup, (map_remove_t)ll_remove, 24 | (map_count_t)ll_count, (map_print_t)ll_print, (map_free_t)ll_free, (map_iter_begin_t)ll_iter_begin, 25 | (map_iter_next_t)ll_iter_next, (map_iter_free_t)ll_iter_free 26 | }; 27 | 28 | #endif//LIST_H 29 | -------------------------------------------------------------------------------- /include/lwt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * lightweight tracing 6 | */ 7 | #ifndef LWT_H 8 | #define LWT_H 9 | 10 | #ifndef ENABLE_TRACE 11 | #define TRACE(...) do { } while (0) 12 | #else 13 | #define TRACE(flag, format, v1, v2) lwt_trace(flag, format, (size_t)(v1), (size_t)(v2)) 14 | #endif 15 | 16 | #ifndef NDEBUG 17 | #define ASSERT(x) do { if (!(x)) { lwt_halt(); assert(!#x); } } while (0) 18 | #else 19 | #define ASSERT(x) do { } while (0) 20 | #endif 21 | 22 | // Dump trace records to . The file should be post-processed with "sort" before viewing. 23 | void lwt_dump (const char *file_name) __attribute__ ((externally_visible)); 24 | 25 | // indicates what kind of trace messages should be included in the dump. is a sequence of letters 26 | // followed by numbers (e.g. "x1c9n2g3"). The letters indicate trace categories and the numbers are trace levels 27 | // for each category. If a category appears in , then messages from that category will be included in the 28 | // dump if they have a trace level less than or equal to the one specified in . Categories are case 29 | // sensitive. 30 | void lwt_set_trace_level (const char *flags); 31 | 32 | // is a two character string containing a letter followed by a number (e.g. "f3"). The letter indicates a 33 | // trace category, and the number a trace level. controls whether or not the trace message gets included in 34 | // the dump. It is only included when its specified category is enabled at a trace level greater than or equal to 35 | // the one in . Categories are case sensitive. 36 | static inline void lwt_trace (const char *flag, const char *format, size_t value1, size_t value2) { 37 | extern char TraceLevel[256]; 38 | if (EXPECT_FALSE(TraceLevel[(unsigned)flag[0]] >= flag[1])) { 39 | // embed in so we don't have to make the lwt_record_t any bigger than it already is 40 | uint64_t f = ((uint64_t)(size_t)format | ((uint64_t)flag[0] << 56) | ((uint64_t)flag[1] << 48)); 41 | extern void lwt_trace_i (uint64_t format, size_t value1, size_t value2); 42 | lwt_trace_i(f, value1, value2); 43 | } 44 | } 45 | 46 | void lwt_halt (void); 47 | 48 | #endif//LWT_H 49 | -------------------------------------------------------------------------------- /include/map.h: -------------------------------------------------------------------------------- 1 | #ifndef MAP_H 2 | #define MAP_H 3 | 4 | #include "datatype.h" 5 | 6 | typedef struct map map_t; 7 | typedef struct map_iter map_iter_t; 8 | typedef struct map_impl map_impl_t; 9 | 10 | #ifdef NBD32 11 | typedef uint32_t map_key_t; 12 | typedef uint32_t map_val_t; 13 | #else 14 | typedef uint64_t map_key_t; 15 | typedef uint64_t map_val_t; 16 | #endif 17 | 18 | map_t * map_alloc (const map_impl_t *map_impl, const datatype_t *key_type); 19 | map_val_t map_get (map_t *map, map_key_t key); 20 | map_val_t map_set (map_t *map, map_key_t key, map_val_t new_val); 21 | map_val_t map_add (map_t *map, map_key_t key, map_val_t new_val); 22 | map_val_t map_cas (map_t *map, map_key_t key, map_val_t expected_val, map_val_t new_val); 23 | map_val_t map_replace (map_t *map, map_key_t key, map_val_t new_val); 24 | map_val_t map_remove (map_t *map, map_key_t key); 25 | map_val_t map_count (map_t *map); 26 | void map_print (map_t *map, int verbose); 27 | void map_free (map_t *map); 28 | 29 | map_iter_t * map_iter_begin (map_t *map, map_key_t key); 30 | map_val_t map_iter_next (map_iter_t *iter, map_key_t *key); 31 | void map_iter_free (map_iter_t *iter); 32 | 33 | ///////////////////////////////////////////////////////////////////////////////////// 34 | 35 | #define CAS_EXPECT_DOES_NOT_EXIST ( 0) 36 | #define CAS_EXPECT_EXISTS (-1) 37 | #define CAS_EXPECT_WHATEVER (-2) 38 | 39 | typedef void * (*map_alloc_t) (const datatype_t *); 40 | typedef map_val_t (*map_cas_t) (void *, map_key_t , map_val_t, map_val_t); 41 | typedef map_val_t (*map_get_t) (void *, map_key_t ); 42 | typedef map_val_t (*map_remove_t) (void *, map_key_t ); 43 | typedef size_t (*map_count_t) (void *); 44 | typedef void (*map_print_t) (void *, int); 45 | typedef void (*map_free_t) (void *); 46 | 47 | typedef map_iter_t * (*map_iter_begin_t) (void *, map_key_t); 48 | typedef map_val_t (*map_iter_next_t) (map_iter_t *, map_key_t *); 49 | typedef void (*map_iter_free_t) (map_iter_t *); 50 | 51 | struct map_impl { 52 | map_alloc_t alloc; 53 | map_cas_t cas; 54 | map_get_t get; 55 | map_remove_t remove; 56 | map_count_t count; 57 | map_print_t print; 58 | map_free_t free_; 59 | 60 | map_iter_begin_t iter_begin; 61 | map_iter_next_t iter_next; 62 | map_iter_free_t iter_free; 63 | }; 64 | 65 | #endif//MAP_H 66 | -------------------------------------------------------------------------------- /include/mem.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | */ 5 | #ifndef MEM_H 6 | #define MEM_H 7 | void *nbd_malloc (size_t n) __attribute__((malloc, alloc_size(1))); 8 | void nbd_free (void *x) __attribute__((nonnull)); 9 | #endif//MEM_H 10 | -------------------------------------------------------------------------------- /include/murmur.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash2, by Austin Appleby 3 | 4 | // Note - This code makes a few assumptions about how your machine behaves - 5 | 6 | // 1. We can read a 4-byte value from any address without crashing 7 | // 2. sizeof(int) == 4 8 | 9 | // And it has a few limitations - 10 | 11 | // 1. It will not work incrementally. 12 | // 2. It will not produce the same results on little-endian and big-endian 13 | // machines. 14 | 15 | static inline uint32_t murmur32 (const char *key, int len) 16 | { 17 | // 'm' and 'r' are mixing constants generated offline. 18 | // They're not really 'magic', they just happen to work well. 19 | 20 | const uint32_t m = 0x5bd1e995; 21 | const int r = 24; 22 | 23 | // Initialize the hash to a 'random' value 24 | uint32_t h = len; 25 | 26 | // Mix 4 bytes at a time into the hash 27 | 28 | const unsigned char *data = (const unsigned char *)key; 29 | 30 | while(len >= 4) 31 | { 32 | uint32_t k = *(uint32_t *)data; 33 | 34 | k *= m; 35 | k ^= k >> r; 36 | k *= m; 37 | 38 | h *= m; 39 | h ^= k; 40 | 41 | data += 4; 42 | len -= 4; 43 | } 44 | 45 | // Handle the last few bytes of the input array 46 | 47 | switch(len) 48 | { 49 | case 3: h ^= data[2] << 16; 50 | case 2: h ^= data[1] << 8; 51 | case 1: h ^= data[0]; 52 | h *= m; 53 | }; 54 | 55 | // Do a few final mixes of the hash to ensure the last few 56 | // bytes are well-incorporated. 57 | 58 | h ^= h >> 13; 59 | h *= m; 60 | h ^= h >> 15; 61 | 62 | return h; 63 | } 64 | 65 | static inline uint32_t murmur32_8b (uint64_t key) 66 | { 67 | // 'm' and 'r' are mixing constants generated offline. 68 | // They're not really 'magic', they just happen to work well. 69 | 70 | const uint32_t m = 0x5bd1e995; 71 | const int r = 24; 72 | 73 | // Initialize the hash to a 'random' value 74 | uint32_t h = 8; 75 | 76 | uint32_t k1 = (uint32_t)(key >> 32); 77 | uint32_t k2 = (uint32_t)key; 78 | 79 | k1 *= m; 80 | k1 ^= k1 >> r; 81 | k1 *= m; 82 | 83 | k2 *= m; 84 | k2 ^= k2 >> r; 85 | k2 *= m; 86 | 87 | // Mix 4 bytes at a time into the hash 88 | 89 | h *= m; 90 | h ^= k1; 91 | h *= m; 92 | h ^= k2; 93 | 94 | // Do a few final mixes of the hash to ensure the last few 95 | // bytes are well-incorporated. 96 | 97 | h ^= h >> 13; 98 | h *= m; 99 | h ^= h >> 15; 100 | 101 | return h; 102 | } 103 | 104 | static inline uint32_t murmur32_4b (uint32_t key) 105 | { 106 | // 'm' and 'r' are mixing constants generated offline. 107 | // They're not really 'magic', they just happen to work well. 108 | 109 | const uint32_t m = 0x5bd1e995; 110 | const int r = 24; 111 | 112 | // Initialize the hash to a 'random' value 113 | uint32_t h = 4; 114 | 115 | uint32_t k = *(uint32_t *)&key; 116 | 117 | k *= m; 118 | k ^= k >> r; 119 | k *= m; 120 | 121 | // Mix 4 bytes at a time into the hash 122 | 123 | h *= m; 124 | h ^= k; 125 | 126 | // Do a few final mixes of the hash to ensure the last few 127 | // bytes are well-incorporated. 128 | 129 | h ^= h >> 13; 130 | h *= m; 131 | h ^= h >> 15; 132 | 133 | return h; 134 | } 135 | -------------------------------------------------------------------------------- /include/nstring.h: -------------------------------------------------------------------------------- 1 | #ifndef NSTRING_H 2 | #define NSTRING_H 3 | 4 | #include "common.h" 5 | #include "datatype.h" 6 | 7 | typedef struct nstring { 8 | uint32_t len; 9 | char data[]; 10 | } nstring_t; 11 | 12 | nstring_t * ns_alloc (uint32_t len); 13 | int ns_cmp (const nstring_t *ns1, const nstring_t *ns2); 14 | uint32_t ns_hash (const nstring_t *ns); 15 | nstring_t * ns_dup (const nstring_t *ns); 16 | 17 | extern const datatype_t DATATYPE_NSTRING; 18 | 19 | #endif//NSTRING_H 20 | -------------------------------------------------------------------------------- /include/rcu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | */ 5 | #ifndef RCU_H 6 | #define RCU_H 7 | 8 | void rcu_update (void); 9 | void rcu_defer_free (void *x); 10 | 11 | #endif//RCU_H 12 | -------------------------------------------------------------------------------- /include/runtime.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | */ 5 | #ifndef RUNTIME_H 6 | #define RUNTIME_H 7 | 8 | #include 9 | #include "tls.h" 10 | 11 | void nbd_thread_init (void); 12 | uint64_t nbd_rand (void); 13 | 14 | #endif//RUNTIME_H 15 | -------------------------------------------------------------------------------- /include/skiplist.h: -------------------------------------------------------------------------------- 1 | #ifndef SKIPLIST_H 2 | #define SKIPLIST_H 3 | 4 | #include "map.h" 5 | 6 | typedef struct sl skiplist_t; 7 | typedef struct sl_iter sl_iter_t; 8 | 9 | skiplist_t * sl_alloc (const datatype_t *key_type); 10 | map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expected_val, map_val_t new_val); 11 | map_val_t sl_lookup (skiplist_t *sl, map_key_t key); 12 | map_val_t sl_remove (skiplist_t *sl, map_key_t key); 13 | size_t sl_count (skiplist_t *sl); 14 | void sl_print (skiplist_t *sl, int verbose); 15 | void sl_free (skiplist_t *sl); 16 | map_key_t sl_min_key (skiplist_t *sl); 17 | 18 | sl_iter_t * sl_iter_begin (skiplist_t *sl, map_key_t key); 19 | map_val_t sl_iter_next (sl_iter_t *iter, map_key_t *key_ptr); 20 | void sl_iter_free (sl_iter_t *iter); 21 | 22 | static const map_impl_t MAP_IMPL_SL = { 23 | (map_alloc_t)sl_alloc, (map_cas_t)sl_cas, (map_get_t)sl_lookup, (map_remove_t)sl_remove, 24 | (map_count_t)sl_count, (map_print_t)sl_print, (map_free_t)sl_free, (map_iter_begin_t)sl_iter_begin, 25 | (map_iter_next_t)sl_iter_next, (map_iter_free_t)sl_iter_free 26 | }; 27 | 28 | #endif//SKIPLIST_H 29 | -------------------------------------------------------------------------------- /include/tls.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * A platform independant wrapper around thread-local storage. On platforms that don't support 6 | * __thread variables (e.g. Mac OS X), we have to use the pthreads library for thread-local storage 7 | */ 8 | #ifndef TLS_H 9 | #define TLS_H 10 | 11 | #ifdef __ELF__ // use gcc thread-local storage (i.e. __thread variables) 12 | #define DECLARE_THREAD_LOCAL(name, type) __thread type name 13 | #define INIT_THREAD_LOCAL(name) 14 | #define SET_THREAD_LOCAL(name, value) name = value 15 | #define LOCALIZE_THREAD_LOCAL(name, type) 16 | 17 | #else//!__ELF__ 18 | 19 | #include 20 | 21 | #define DECLARE_THREAD_LOCAL(name, type) pthread_key_t name##_KEY 22 | 23 | #define INIT_THREAD_LOCAL(name) \ 24 | do { \ 25 | if (pthread_key_create(&name##_KEY, NULL) != 0) { \ 26 | assert("error initializing thread local variable " #name, FALSE); \ 27 | } \ 28 | } while (0) 29 | 30 | #define SET_THREAD_LOCAL(name, value) \ 31 | do { \ 32 | name = value; \ 33 | pthread_setspecific(name##_KEY, (void *)(size_t)value); \ 34 | } while (0); 35 | 36 | #define LOCALIZE_THREAD_LOCAL(name, type) type name = (type)(size_t)pthread_getspecific(name##_KEY) 37 | 38 | #endif//__ELF__ 39 | #endif//TLS_H 40 | -------------------------------------------------------------------------------- /include/txn.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | */ 5 | #ifndef TXN_H 6 | #define TXN_H 7 | 8 | #include "map.h" 9 | 10 | typedef enum { TXN_RUNNING, TXN_VALIDATING, TXN_VALIDATED, TXN_ABORTED } txn_state_e; 11 | 12 | typedef struct txn txn_t; 13 | 14 | txn_t * txn_begin (map_t *map); 15 | void txn_abort (txn_t *txn); 16 | txn_state_e txn_commit (txn_t *txn); 17 | 18 | map_val_t txn_map_get (txn_t *txn, map_key_t key); 19 | void txn_map_set (txn_t *txn, map_key_t key, map_val_t value); 20 | 21 | #endif//TXN_H 22 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Code in this distribution that is written by Josh Dybnis is released to the 2 | public domain, as explained at http://creativecommons.org/licenses/publicdomain 3 | which is repeated below: 4 | 5 | The person or persons who have associated work with this document (the 6 | "Dedicator" or "Certifier") hereby either (a) certifies that, to the 7 | best of his knowledge, the work of authorship identified is in the 8 | public domain of the country from which the work is published, or (b) 9 | hereby dedicates whatever copyright the dedicators holds in the work of 10 | authorship identified below (the "Work") to the public domain. A 11 | certifier, moreover, dedicates any copyright interest he may have in the 12 | associated work, and for these purposes, is described as a "dedicator" 13 | below. 14 | 15 | A certifier has taken reasonable steps to verify the copyright status of 16 | this work. Certifier recognizes that his good faith efforts may not 17 | shield him from liability if in fact the work certified is not in the 18 | public domain. 19 | 20 | Dedicator makes this dedication for the benefit of the public at large 21 | and to the detriment of the Dedicator's heirs and successors. Dedicator 22 | intends this dedication to be an overt act of relinquishment in 23 | perpetuity of all present and future rights under copyright law, 24 | whether vested or contingent, in the Work. Dedicator understands that 25 | such relinquishment of all rights includes the relinquishment of all 26 | rights to enforce (by lawsuit or otherwise) those copyrights in the Work. 27 | 28 | Dedicator recognizes that, once placed in the public domain, the Work may 29 | be freely reproduced, distributed, transmitted, used, modified, built 30 | upon, or otherwise exploited by anyone for any purpose, commercial or 31 | non-commercial, and in any way, including by methods that have not yet 32 | been invented or conceived. 33 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | ################################################################################################### 2 | # Written by Josh Dybnis and released to the public domain, as explained at 3 | # http://creativecommons.org/licenses/publicdomain 4 | ################################################################################################### 5 | # Makefile for building programs with whole-program interfile optimization 6 | ################################################################################################### 7 | CFLAGS0 := -Wall -Werror -std=gnu99 -lpthread #-m32 -DNBD32 8 | CFLAGS1 := $(CFLAGS0) -g #-DNDEBUG #-fwhole-program -combine 9 | CFLAGS2 := $(CFLAGS1) #-DENABLE_TRACE 10 | CFLAGS3 := $(CFLAGS2) #-DLIST_USE_HAZARD_POINTER 11 | CFLAGS := $(CFLAGS3) #-DNBD_SINGLE_THREADED #-DUSE_SYSTEM_MALLOC #-DTEST_STRING_KEYS 12 | INCS := $(addprefix -I, include) 13 | TESTS := output/perf_test output/map_test1 output/map_test2 output/rcu_test output/txn_test #output/haz_test 14 | OBJS := $(TESTS) 15 | 16 | RUNTIME_SRCS := runtime/runtime.c runtime/rcu.c runtime/lwt.c runtime/mem.c runtime/random.c \ 17 | datatype/nstring.c #runtime/hazard.c 18 | MAP_SRCS := map/map.c map/list.c map/skiplist.c map/hashtable.c 19 | 20 | haz_test_SRCS := $(RUNTIME_SRCS) test/haz_test.c 21 | rcu_test_SRCS := $(RUNTIME_SRCS) test/rcu_test.c 22 | txn_test_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/txn_test.c test/CuTest.c txn/txn.c 23 | map_test1_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/map_test1.c 24 | map_test2_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/map_test2.c test/CuTest.c 25 | perf_test_SRCS := $(RUNTIME_SRCS) $(MAP_SRCS) test/perf_test.c 26 | 27 | tests: $(TESTS) 28 | 29 | ################################################################################################### 30 | # build and run tests 31 | ################################################################################################### 32 | test: $(addsuffix .log, $(TESTS)) 33 | @echo > /dev/null 34 | 35 | $(addsuffix .log, $(TESTS)) : %.log : % 36 | @echo "Running $*" && $* | tee $*.log 37 | 38 | ################################################################################################### 39 | # Rebuild an executable if any of it's source files need to be recompiled 40 | # 41 | # Note: Calculating dependencies as a side-effect of compilation is disabled. There is a bug in 42 | # gcc. Compilation fails when -MM -MF is used and there is more than one source file. 43 | # Otherwise "-MM -MT $@.d -MF $@.d" should be part of the command line for the compile. 44 | # 45 | # Also, when calculating dependencies -combine is removed from CFLAGS because of another bug 46 | # in gcc. It chokes when -MM is used with -combine. 47 | ################################################################################################### 48 | $(OBJS): output/% : output/%.d makefile 49 | gcc $(CFLAGS) $(INCS) -MM -MT $@ $($*_SRCS) > $@.d 50 | gcc $(CFLAGS) $(INCS) -o $@ $($*_SRCS) 51 | 52 | asm: $(addsuffix .s, $(OBJS)) 53 | 54 | $(addsuffix .s, $(OBJS)): output/%.s : output/%.d makefile 55 | gcc $(CFLAGS:-combine:) $(INCS) -MM -MT $@ $($*_SRCS) > output/$*.d 56 | gcc $(CFLAGS) $(INCS) -combine -S -o $@.temp $($*_SRCS) 57 | grep -v "^L[BFM]\|^LCF" $@.temp > $@ 58 | rm $@.temp 59 | 60 | ################################################################################################### 61 | # tags file for vi 62 | ################################################################################################### 63 | tags: 64 | ctags -R . 65 | 66 | ################################################################################################### 67 | # 68 | ################################################################################################### 69 | clean: 70 | rm -rfv output/* 71 | 72 | ################################################################################################### 73 | # dummy rule for boostrapping dependency files 74 | ################################################################################################### 75 | $(addsuffix .d, $(OBJS)) : output/%.d : 76 | 77 | -include $(addsuffix .d, $(OBJS)) 78 | 79 | .PHONY: clean test tags asm 80 | -------------------------------------------------------------------------------- /map/hashtable.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * C implementation of Cliff Click's lock-free hash table from 6 | * http://www.azulsystems.com/events/javaone_2008/2008_CodingNonBlock.pdf 7 | * http://sourceforge.net/projects/high-scale-lib 8 | * 9 | * Note: This is code uses synchronous atomic operations because that is all that x86 provides. 10 | * Every atomic operation is also an implicit full memory barrier. The upshot is that it simplifies 11 | * the code a bit, but it won't be as fast as it could be on platforms that provide weaker 12 | * operations like unfenced CAS which would still do the job. 13 | * 14 | * 11FebO9 - Bug fix in ht_iter_next() from Rui Ueyama 15 | */ 16 | 17 | #include 18 | #include "common.h" 19 | #include "murmur.h" 20 | #include "mem.h" 21 | #include "rcu.h" 22 | #include "hashtable.h" 23 | 24 | #ifndef NBD32 25 | #define GET_PTR(x) ((void *)((x) & MASK(48))) // low-order 48 bits is a pointer to a nstring_t 26 | #else 27 | #define GET_PTR(x) ((void *)(x)) 28 | #endif 29 | 30 | typedef struct entry { 31 | map_key_t key; 32 | map_val_t val; 33 | } entry_t; 34 | 35 | typedef struct hti { 36 | volatile entry_t *table; 37 | hashtable_t *ht; // parent ht; 38 | struct hti *next; 39 | #ifdef USE_SYSTEM_MALLOC 40 | void *unaligned_table_ptr; // system malloc doesn't guarentee cache-line alignment 41 | #endif 42 | size_t count; // TODO: make these counters distributed 43 | size_t key_count; 44 | size_t copy_scan; 45 | size_t num_entries_copied; 46 | int probe; 47 | int ref_count; 48 | uint8_t scale; 49 | } hti_t; 50 | 51 | struct ht_iter { 52 | hti_t * hti; 53 | int64_t idx; 54 | }; 55 | 56 | struct ht { 57 | hti_t *hti; 58 | const datatype_t *key_type; 59 | uint32_t hti_copies; 60 | double density; 61 | int probe; 62 | }; 63 | 64 | static const map_val_t COPIED_VALUE = TAG_VALUE(DOES_NOT_EXIST, TAG1); 65 | static const map_val_t TOMBSTONE = STRIP_TAG(-1, TAG1); 66 | 67 | static const unsigned ENTRIES_PER_BUCKET = CACHE_LINE_SIZE/sizeof(entry_t); 68 | static const unsigned ENTRIES_PER_COPY_CHUNK = CACHE_LINE_SIZE/sizeof(entry_t)*2; 69 | static const unsigned MIN_SCALE = 4; // min 16 entries (4 buckets) 70 | 71 | static int hti_copy_entry (hti_t *ht1, volatile entry_t *ent, uint32_t ent_key_hash, hti_t *ht2); 72 | 73 | // Choose the next bucket to probe using the high-order bits of . 74 | static inline int get_next_ndx(int old_ndx, uint32_t key_hash, int ht_scale) { 75 | #if 1 76 | int incr = (key_hash >> (32 - ht_scale)); 77 | if (incr < ENTRIES_PER_BUCKET) { incr += ENTRIES_PER_BUCKET; } 78 | return (old_ndx + incr) & MASK(ht_scale); 79 | #else 80 | return (old_ndx + ENTRIES_PER_BUCKET) & MASK(ht_scale); 81 | #endif 82 | } 83 | 84 | // Lookup in . 85 | // 86 | // Return the entry that is in, or if isn't in return the entry that it would be 87 | // in if it were inserted into . If there is no room for in then return NULL, to 88 | // indicate that the caller should look in next>. 89 | // 90 | // Record if the entry being returned is empty. Otherwise the caller will have to waste time 91 | // re-comparing the keys to confirm that it did not lose a race to fill an empty entry. 92 | static volatile entry_t *hti_lookup (hti_t *hti, map_key_t key, uint32_t key_hash, int *is_empty) { 93 | TRACE("h2", "hti_lookup(key %p in hti %p)", key, hti); 94 | *is_empty = 0; 95 | 96 | // Probe one cache line at a time 97 | int ndx = key_hash & MASK(hti->scale); // the first entry to search 98 | for (int i = 0; i < hti->probe; ++i) { 99 | 100 | // The start of the bucket is the first entry in the cache line. 101 | volatile entry_t *bucket = hti->table + (ndx & ~(ENTRIES_PER_BUCKET-1)); 102 | 103 | // Start searching at the indexed entry. Then loop around to the begining of the cache line. 104 | for (int j = 0; j < ENTRIES_PER_BUCKET; ++j) { 105 | volatile entry_t *ent = bucket + ((ndx + j) & (ENTRIES_PER_BUCKET-1)); 106 | 107 | map_key_t ent_key = ent->key; 108 | if (ent_key == DOES_NOT_EXIST) { 109 | TRACE("h1", "hti_lookup: entry %p for key %p is empty", ent, 110 | (hti->ht->key_type == NULL) ? (void *)key : GET_PTR(key)); 111 | *is_empty = 1; // indicate an empty so the caller avoids an expensive key compare 112 | return ent; 113 | } 114 | 115 | // Compare with the key in the entry. 116 | if (EXPECT_TRUE(hti->ht->key_type == NULL)) { 117 | // fast path for integer keys 118 | if (ent_key == key) { 119 | TRACE("h1", "hti_lookup: found entry %p with key %p", ent, ent_key); 120 | return ent; 121 | } 122 | } else { 123 | #ifndef NBD32 124 | // The key in is made up of two parts. The 48 low-order bits are a pointer. The 125 | // high-order 16 bits are taken from the hash. The bits from the hash are used as a 126 | // quick check to rule out non-equal keys without doing a complete compare. 127 | if ((key_hash >> 16) == (ent_key >> 48)) { 128 | #endif 129 | if (hti->ht->key_type->cmp(GET_PTR(ent_key), (void *)key) == 0) { 130 | TRACE("h1", "hti_lookup: found entry %p with key %p", ent, GET_PTR(ent_key)); 131 | return ent; 132 | #ifndef NBD32 133 | } 134 | #endif 135 | } 136 | } 137 | } 138 | 139 | ndx = get_next_ndx(ndx, key_hash, hti->scale); 140 | } 141 | 142 | // maximum number of probes exceeded 143 | TRACE("h1", "hti_lookup: maximum number of probes exceeded returning 0x0", 0, 0); 144 | return NULL; 145 | } 146 | 147 | // Allocate and initialize a hti_t with 2^ entries. 148 | static hti_t *hti_alloc (hashtable_t *parent, int scale) { 149 | hti_t *hti = (hti_t *)nbd_malloc(sizeof(hti_t)); 150 | memset(hti, 0, sizeof(hti_t)); 151 | hti->scale = scale; 152 | 153 | size_t sz = sizeof(entry_t) * (1ULL << scale); 154 | #ifdef USE_SYSTEM_MALLOC 155 | hti->unaligned_table_ptr = nbd_malloc(sz + CACHE_LINE_SIZE - 1); 156 | hti->table = (void *)(((size_t)hti->unaligned_table_ptr + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)); 157 | #else 158 | hti->table = nbd_malloc(sz); 159 | #endif 160 | memset((void *)hti->table, 0, sz); 161 | 162 | hti->probe = (int)(hti->scale * 1.5) + 2; 163 | int quarter = (1ULL << (hti->scale - 2)) / ENTRIES_PER_BUCKET; 164 | if (hti->probe > quarter && quarter > 4) { 165 | // When searching for a key probe a maximum of 1/4 166 | hti->probe = quarter; 167 | } 168 | ASSERT(hti->probe); 169 | hti->ht = parent; 170 | hti->ref_count = 1; // one for the parent 171 | 172 | assert(hti->scale >= MIN_SCALE && hti->scale < 63); // size must be a power of 2 173 | assert(sizeof(entry_t) * ENTRIES_PER_BUCKET % CACHE_LINE_SIZE == 0); // divisible into cache 174 | assert((size_t)hti->table % CACHE_LINE_SIZE == 0); // cache aligned 175 | 176 | return hti; 177 | } 178 | 179 | // Called when runs out of room for new keys. 180 | // 181 | // Initiates a copy by creating a larger hti_t and installing it in next>. 182 | static void hti_start_copy (hti_t *hti) { 183 | TRACE("h0", "hti_start_copy(hti %p scale %llu)", hti, hti->scale); 184 | 185 | // heuristics to determine the size of the new table 186 | size_t count = ht_count(hti->ht); 187 | unsigned int new_scale = hti->scale; 188 | new_scale += (count > (1ULL << (hti->scale - 1))) || (hti->key_count > (1ULL << (hti->scale - 2)) + (1ULL << (hti->scale - 3))); // double size if more than 1/2 full 189 | 190 | // Allocate the new table and attempt to install it. 191 | hti_t *next = hti_alloc(hti->ht, new_scale); 192 | hti_t *old_next = SYNC_CAS(&hti->next, NULL, next); 193 | if (old_next != NULL) { 194 | // Another thread beat us to it. 195 | TRACE("h0", "hti_start_copy: lost race to install new hti; found %p", old_next, 0); 196 | #ifdef USE_SYSTEM_MALLOC 197 | nbd_free(next->unaligned_table_ptr); 198 | #else 199 | nbd_free((void *)next->table); 200 | #endif 201 | return; 202 | } 203 | TRACE("h0", "hti_start_copy: new hti %p scale %llu", next, next->scale); 204 | SYNC_ADD(&hti->ht->hti_copies, 1); 205 | hti->ht->density = (double)hti->key_count / (1ULL << hti->scale) * 100; 206 | hti->ht->probe = hti->probe; 207 | } 208 | 209 | // Copy the key and value stored in (which must be an entry in ) to . 210 | // 211 | // Return 1 unless is already copied (then return 0), so the caller can account for the total 212 | // number of entries left to copy. 213 | static int hti_copy_entry (hti_t *ht1, volatile entry_t *ht1_ent, uint32_t key_hash, hti_t *ht2) { 214 | TRACE("h2", "hti_copy_entry: entry %p to table %p", ht1_ent, ht2); 215 | assert(ht1); 216 | assert(ht1->next); 217 | assert(ht2); 218 | assert(ht1_ent >= ht1->table && ht1_ent < ht1->table + (1ULL << ht1->scale)); 219 | #ifndef NBD32 220 | assert(key_hash == 0 || ht1->ht->key_type == NULL || (key_hash >> 16) == (ht1_ent->key >> 48)); 221 | #endif 222 | 223 | map_val_t ht1_ent_val = ht1_ent->val; 224 | if (EXPECT_FALSE(ht1_ent_val == COPIED_VALUE || ht1_ent_val == TAG_VALUE(TOMBSTONE, TAG1))) { 225 | TRACE("h1", "hti_copy_entry: entry %p already copied to table %p", ht1_ent, ht2); 226 | return FALSE; // already copied 227 | } 228 | 229 | // Kill empty entries. 230 | if (EXPECT_FALSE(ht1_ent_val == DOES_NOT_EXIST)) { 231 | map_val_t ht1_ent_val = SYNC_CAS(&ht1_ent->val, DOES_NOT_EXIST, COPIED_VALUE); 232 | if (ht1_ent_val == DOES_NOT_EXIST) { 233 | TRACE("h1", "hti_copy_entry: empty entry %p killed", ht1_ent, 0); 234 | return TRUE; 235 | } 236 | TRACE("h0", "hti_copy_entry: lost race to kill empty entry %p; the entry is not empty", ht1_ent, 0); 237 | } 238 | 239 | // Tag the value in the old entry to indicate a copy is in progress. 240 | ht1_ent_val = SYNC_FETCH_AND_OR(&ht1_ent->val, TAG_VALUE(0, TAG1)); 241 | TRACE("h2", "hti_copy_entry: tagged the value %p in old entry %p", ht1_ent_val, ht1_ent); 242 | if (ht1_ent_val == COPIED_VALUE || ht1_ent_val == TAG_VALUE(TOMBSTONE, TAG1)) { 243 | TRACE("h1", "hti_copy_entry: entry %p already copied to table %p", ht1_ent, ht2); 244 | return FALSE; // was already copied by another thread. 245 | } 246 | 247 | // The old table's dead entries don't need to be copied to the new table 248 | if (ht1_ent_val == TOMBSTONE) 249 | return TRUE; 250 | 251 | // Install the key in the new table. 252 | map_key_t ht1_ent_key = ht1_ent->key; 253 | map_key_t key = (ht1->ht->key_type == NULL) ? (map_key_t)ht1_ent_key : (map_key_t)GET_PTR(ht1_ent_key); 254 | 255 | // We use 0 to indicate that is uninitiallized. Occasionally the key's hash will really be 0 and we 256 | // waste time recomputing it every time. It is rare enough that it won't hurt performance. 257 | if (key_hash == 0) { 258 | #ifdef NBD32 259 | key_hash = (ht1->ht->key_type == NULL) ? murmur32_4b(ht1_ent_key) : ht1->ht->key_type->hash((void *)key); 260 | #else 261 | key_hash = (ht1->ht->key_type == NULL) ? murmur32_8b(ht1_ent_key) : ht1->ht->key_type->hash((void *)key); 262 | #endif 263 | } 264 | 265 | int ht2_ent_is_empty; 266 | volatile entry_t *ht2_ent = hti_lookup(ht2, key, key_hash, &ht2_ent_is_empty); 267 | TRACE("h0", "hti_copy_entry: copy entry %p to entry %p", ht1_ent, ht2_ent); 268 | 269 | // It is possible that there isn't any room in the new table either. 270 | if (EXPECT_FALSE(ht2_ent == NULL)) { 271 | TRACE("h0", "hti_copy_entry: no room in table %p copy to next table %p", ht2, ht2->next); 272 | if (ht2->next == NULL) { 273 | hti_start_copy(ht2); // initiate nested copy, if not already started 274 | } 275 | return hti_copy_entry(ht1, ht1_ent, key_hash, ht2->next); // recursive tail-call 276 | } 277 | 278 | if (ht2_ent_is_empty) { 279 | map_key_t old_ht2_ent_key = SYNC_CAS(&ht2_ent->key, DOES_NOT_EXIST, ht1_ent_key); 280 | if (old_ht2_ent_key != DOES_NOT_EXIST) { 281 | TRACE("h0", "hti_copy_entry: lost race to CAS key %p into new entry; found %p", 282 | ht1_ent_key, old_ht2_ent_key); 283 | return hti_copy_entry(ht1, ht1_ent, key_hash, ht2); // recursive tail-call 284 | } 285 | SYNC_ADD(&ht2->key_count, 1); 286 | } 287 | 288 | // Copy the value to the entry in the new table. 289 | ht1_ent_val = STRIP_TAG(ht1_ent_val, TAG1); 290 | map_val_t old_ht2_ent_val = SYNC_CAS(&ht2_ent->val, DOES_NOT_EXIST, ht1_ent_val); 291 | 292 | // If there is a nested copy in progress, we might have installed the key into a dead entry. 293 | if (old_ht2_ent_val == COPIED_VALUE) { 294 | TRACE("h0", "hti_copy_entry: nested copy in progress; copy %p to next table %p", ht2_ent, ht2->next); 295 | return hti_copy_entry(ht1, ht1_ent, key_hash, ht2->next); // recursive tail-call 296 | } 297 | 298 | // Mark the old entry as dead. 299 | ht1_ent->val = COPIED_VALUE; 300 | 301 | // Update the count if we were the one that completed the copy. 302 | if (old_ht2_ent_val == DOES_NOT_EXIST) { 303 | TRACE("h0", "hti_copy_entry: key %p value %p copied to new entry", key, ht1_ent_val); 304 | (void)SYNC_ADD(&ht1->count, -1); 305 | (void)SYNC_ADD(&ht2->count, 1); 306 | return TRUE; 307 | } 308 | 309 | TRACE("h0", "hti_copy_entry: lost race to install value %p in new entry; found value %p", 310 | ht1_ent_val, old_ht2_ent_val); 311 | return FALSE; // another thread completed the copy 312 | } 313 | 314 | // Compare with the existing value associated with . If the values match then 315 | // replace the existing value with . If is DOES_NOT_EXIST, delete the value associated with 316 | // the key by replacing it with a TOMBSTONE. 317 | // 318 | // Return the previous value associated with , or DOES_NOT_EXIST if is not in the table 319 | // or associated with a TOMBSTONE. If a copy is in progress and has been copied to the next 320 | // table then return COPIED_VALUE. 321 | // 322 | // NOTE: the returned value matches iff the set succeeds 323 | // 324 | // Certain values of have special meaning. If is CAS_EXPECT_EXISTS then any 325 | // real value matches (i.ent. not a TOMBSTONE or DOES_NOT_EXIST) as long as is in the table. If 326 | // is CAS_EXPECT_WHATEVER then skip the test entirely. 327 | // 328 | static map_val_t hti_cas (hti_t *hti, map_key_t key, uint32_t key_hash, map_val_t expected, map_val_t new) { 329 | TRACE("h1", "hti_cas: hti %p key %p", hti, key); 330 | TRACE("h1", "hti_cas: value %p expect %p", new, expected); 331 | assert(hti); 332 | assert(!IS_TAGGED(new, TAG1)); 333 | assert(key); 334 | 335 | int is_empty; 336 | volatile entry_t *ent = hti_lookup(hti, key, key_hash, &is_empty); 337 | 338 | // There is no room for , grow the table and try again. 339 | if (ent == NULL) { 340 | if (hti->next == NULL) { 341 | hti_start_copy(hti); 342 | } 343 | return COPIED_VALUE; 344 | } 345 | 346 | // Install in the table if it doesn't exist. 347 | if (is_empty) { 348 | TRACE("h0", "hti_cas: entry %p is empty", ent, 0); 349 | if (expected != CAS_EXPECT_WHATEVER && expected != CAS_EXPECT_DOES_NOT_EXIST) 350 | return DOES_NOT_EXIST; 351 | 352 | // No need to do anything, is already deleted. 353 | if (new == DOES_NOT_EXIST) 354 | return DOES_NOT_EXIST; 355 | 356 | // Allocate . 357 | map_key_t new_key = (hti->ht->key_type == NULL) 358 | ? (map_key_t)key 359 | : (map_key_t)hti->ht->key_type->clone((void *)key); 360 | #ifndef NBD32 361 | if (EXPECT_FALSE(hti->ht->key_type != NULL)) { 362 | // Combine pointer with bits from its hash 363 | new_key = ((uint64_t)(key_hash >> 16) << 48) | new_key; 364 | } 365 | #endif 366 | 367 | // CAS the key into the table. 368 | map_key_t old_ent_key = SYNC_CAS(&ent->key, DOES_NOT_EXIST, new_key); 369 | 370 | // Retry if another thread stole the entry out from under us. 371 | if (old_ent_key != DOES_NOT_EXIST) { 372 | TRACE("h0", "hti_cas: lost race to install key %p in entry %p", new_key, ent); 373 | TRACE("h0", "hti_cas: found %p instead of NULL", 374 | (hti->ht->key_type == NULL) ? (void *)old_ent_key : GET_PTR(old_ent_key), 0); 375 | if (hti->ht->key_type != NULL) { 376 | nbd_free(GET_PTR(new_key)); 377 | } 378 | return hti_cas(hti, key, key_hash, expected, new); // tail-call 379 | } 380 | TRACE("h2", "hti_cas: installed key %p in entry %p", new_key, ent); 381 | SYNC_ADD(&hti->key_count, 1); 382 | } 383 | 384 | TRACE("h0", "hti_cas: entry for key %p is %p", 385 | (hti->ht->key_type == NULL) ? (void *)ent->key : GET_PTR(ent->key), ent); 386 | 387 | // If the entry is in the middle of a copy, the copy must be completed first. 388 | map_val_t ent_val = ent->val; 389 | if (EXPECT_FALSE(IS_TAGGED(ent_val, TAG1))) { 390 | if (ent_val != COPIED_VALUE && ent_val != TAG_VALUE(TOMBSTONE, TAG1)) { 391 | int did_copy = hti_copy_entry(hti, ent, key_hash, VOLATILE_DEREF(hti).next); 392 | if (did_copy) { 393 | (void)SYNC_ADD(&hti->num_entries_copied, 1); 394 | } 395 | TRACE("h0", "hti_cas: value in the middle of a copy, copy completed by %s", 396 | (did_copy ? "self" : "other"), 0); 397 | } 398 | TRACE("h0", "hti_cas: value copied to next table, retry on next table", 0, 0); 399 | return COPIED_VALUE; 400 | } 401 | 402 | // Fail if the old value is not consistent with the caller's expectation. 403 | int old_existed = (ent_val != TOMBSTONE && ent_val != DOES_NOT_EXIST); 404 | if (EXPECT_FALSE(expected != CAS_EXPECT_WHATEVER && expected != ent_val)) { 405 | if (EXPECT_FALSE(expected != (old_existed ? CAS_EXPECT_EXISTS : CAS_EXPECT_DOES_NOT_EXIST))) { 406 | TRACE("h1", "hti_cas: value %p expected by caller not found; found value %p", 407 | expected, ent_val); 408 | return ent_val; 409 | } 410 | } 411 | 412 | // No need to update if value is unchanged. 413 | if ((new == DOES_NOT_EXIST && !old_existed) || ent_val == new) { 414 | TRACE("h1", "hti_cas: old value and new value were the same", 0, 0); 415 | return ent_val; 416 | } 417 | 418 | // CAS the value into the entry. Retry if it fails. 419 | map_val_t v = SYNC_CAS(&ent->val, ent_val, new == DOES_NOT_EXIST ? TOMBSTONE : new); 420 | if (EXPECT_FALSE(v != ent_val)) { 421 | TRACE("h0", "hti_cas: value CAS failed; expected %p found %p", ent_val, v); 422 | return hti_cas(hti, key, key_hash, expected, new); // recursive tail-call 423 | } 424 | 425 | // The set succeeded. Adjust the value count. 426 | if (old_existed && new == DOES_NOT_EXIST) { 427 | (void)SYNC_ADD(&hti->count, -1); 428 | } else if (!old_existed && new != DOES_NOT_EXIST) { 429 | (void)SYNC_ADD(&hti->count, 1); 430 | } 431 | 432 | // Return the previous value. 433 | TRACE("h0", "hti_cas: CAS succeeded; old value %p new value %p", ent_val, new); 434 | return ent_val; 435 | } 436 | 437 | // 438 | static map_val_t hti_get (hti_t *hti, map_key_t key, uint32_t key_hash) { 439 | int is_empty; 440 | volatile entry_t *ent = hti_lookup(hti, key, key_hash, &is_empty); 441 | 442 | // When hti_lookup() returns NULL it means we hit the reprobe limit while 443 | // searching the table. In that case, if a copy is in progress the key 444 | // might exist in the copy. 445 | if (EXPECT_FALSE(ent == NULL)) { 446 | if (VOLATILE_DEREF(hti).next != NULL) 447 | return hti_get(hti->next, key, key_hash); // recursive tail-call 448 | return DOES_NOT_EXIST; 449 | } 450 | 451 | if (is_empty) 452 | return DOES_NOT_EXIST; 453 | 454 | // If the entry is being copied, finish the copy and retry on the next table. 455 | map_val_t ent_val = ent->val; 456 | if (EXPECT_FALSE(IS_TAGGED(ent_val, TAG1))) { 457 | if (EXPECT_FALSE(ent_val != COPIED_VALUE && ent_val != TAG_VALUE(TOMBSTONE, TAG1))) { 458 | int did_copy = hti_copy_entry(hti, ent, key_hash, VOLATILE_DEREF(hti).next); 459 | if (did_copy) { 460 | (void)SYNC_ADD(&hti->num_entries_copied, 1); 461 | } 462 | } 463 | return hti_get(VOLATILE_DEREF(hti).next, key, key_hash); // tail-call 464 | } 465 | 466 | return (ent_val == TOMBSTONE) ? DOES_NOT_EXIST : ent_val; 467 | } 468 | 469 | // 470 | map_val_t ht_get (hashtable_t *ht, map_key_t key) { 471 | #ifdef NBD32 472 | uint32_t hash = (ht->key_type == NULL) ? murmur32_4b((uint64_t)key) : ht->key_type->hash((void *)key); 473 | #else 474 | uint32_t hash = (ht->key_type == NULL) ? murmur32_8b((uint64_t)key) : ht->key_type->hash((void *)key); 475 | #endif 476 | return hti_get(ht->hti, key, hash); 477 | } 478 | 479 | // returns TRUE if copy is done 480 | static int hti_help_copy (hti_t *hti) { 481 | volatile entry_t *ent; 482 | size_t limit; 483 | size_t total_copied = hti->num_entries_copied; 484 | size_t num_copied = 0; 485 | size_t x = hti->copy_scan; 486 | 487 | TRACE("h1", "ht_cas: help copy. scan is %llu, size is %llu", x, 1<scale); 488 | if (total_copied != (1ULL << hti->scale)) { 489 | // Panic if we've been around the array twice and still haven't finished the copy. 490 | int panic = (x >= (1ULL << (hti->scale + 1))); 491 | if (!panic) { 492 | limit = ENTRIES_PER_COPY_CHUNK; 493 | 494 | // Reserve some entries for this thread to copy. There is a race condition here because the 495 | // fetch and add isn't atomic, but that is ok. 496 | hti->copy_scan = x + ENTRIES_PER_COPY_CHUNK; 497 | 498 | // might be larger than the size of the table, if some thread stalls while 499 | // copying. In that case we just wrap around to the begining and make another pass through 500 | // the table. 501 | ent = hti->table + (x & MASK(hti->scale)); 502 | } else { 503 | TRACE("h1", "ht_cas: help copy panic", 0, 0); 504 | // scan the whole table 505 | ent = hti->table; 506 | limit = (1ULL << hti->scale); 507 | } 508 | 509 | // Copy the entries 510 | for (int i = 0; i < limit; ++i) { 511 | num_copied += hti_copy_entry(hti, ent++, 0, hti->next); 512 | assert(ent <= hti->table + (1ULL << hti->scale)); 513 | } 514 | if (num_copied != 0) { 515 | total_copied = SYNC_ADD(&hti->num_entries_copied, num_copied); 516 | } 517 | } 518 | 519 | return (total_copied == (1ULL << hti->scale)); 520 | } 521 | 522 | static void hti_defer_free (hti_t *hti) { 523 | assert(hti->ref_count == 0); 524 | 525 | for (uint32_t i = 0; i < (1ULL << hti->scale); ++i) { 526 | map_key_t key = hti->table[i].key; 527 | map_val_t val = hti->table[i].val; 528 | if (val == COPIED_VALUE) 529 | continue; 530 | assert(!IS_TAGGED(val, TAG1) || val == TAG_VALUE(TOMBSTONE, TAG1)); // copy not in progress 531 | if (hti->ht->key_type != NULL && key != DOES_NOT_EXIST) { 532 | rcu_defer_free(GET_PTR(key)); 533 | } 534 | } 535 | #ifdef USE_SYSTEM_MALLOC 536 | rcu_defer_free(hti->unaligned_table_ptr); 537 | #else 538 | rcu_defer_free((void *)hti->table); 539 | #endif 540 | rcu_defer_free(hti); 541 | } 542 | 543 | static void hti_release (hti_t *hti) { 544 | assert(hti->ref_count > 0); 545 | int ref_count = SYNC_ADD(&hti->ref_count, -1); 546 | if (ref_count == 0) { 547 | hti_defer_free(hti); 548 | } 549 | } 550 | 551 | // 552 | map_val_t ht_cas (hashtable_t *ht, map_key_t key, map_val_t expected_val, map_val_t new_val) { 553 | 554 | TRACE("h2", "ht_cas: key %p ht %p", key, ht); 555 | TRACE("h2", "ht_cas: expected val %p new val %p", expected_val, new_val); 556 | assert(key != DOES_NOT_EXIST); 557 | assert(!IS_TAGGED(new_val, TAG1) && new_val != DOES_NOT_EXIST && new_val != TOMBSTONE); 558 | 559 | hti_t *hti = ht->hti; 560 | 561 | // Help with an ongoing copy. 562 | if (EXPECT_FALSE(hti->next != NULL)) { 563 | int done = hti_help_copy(hti); 564 | 565 | // Unlink fully copied tables. 566 | if (done) { 567 | assert(hti->next); 568 | if (SYNC_CAS(&ht->hti, hti, hti->next) == hti) { 569 | hti_release(hti); 570 | } 571 | } 572 | } 573 | 574 | map_val_t old_val; 575 | #ifdef NBD32 576 | uint32_t key_hash = (ht->key_type == NULL) ? murmur32_4b((uint64_t)key) : ht->key_type->hash((void *)key); 577 | #else 578 | uint32_t key_hash = (ht->key_type == NULL) ? murmur32_8b((uint64_t)key) : ht->key_type->hash((void *)key); 579 | #endif 580 | while ((old_val = hti_cas(hti, key, key_hash, expected_val, new_val)) == COPIED_VALUE) { 581 | assert(hti->next); 582 | hti = hti->next; 583 | } 584 | 585 | return old_val == TOMBSTONE ? DOES_NOT_EXIST : old_val; 586 | } 587 | 588 | // Remove the value in associated with . Returns the value removed, or DOES_NOT_EXIST if there was 589 | // no value for that key. 590 | map_val_t ht_remove (hashtable_t *ht, map_key_t key) { 591 | hti_t *hti = ht->hti; 592 | map_val_t val; 593 | #ifdef NBD32 594 | uint32_t key_hash = (ht->key_type == NULL) ? murmur32_4b((uint64_t)key) : ht->key_type->hash((void *)key); 595 | #else 596 | uint32_t key_hash = (ht->key_type == NULL) ? murmur32_8b((uint64_t)key) : ht->key_type->hash((void *)key); 597 | #endif 598 | do { 599 | val = hti_cas(hti, key, key_hash, CAS_EXPECT_WHATEVER, DOES_NOT_EXIST); 600 | if (val != COPIED_VALUE) 601 | return val == TOMBSTONE ? DOES_NOT_EXIST : val; 602 | assert(hti->next); 603 | hti = hti->next; 604 | assert(hti); 605 | } while (1); 606 | } 607 | 608 | // Returns the number of key-values pairs in 609 | size_t ht_count (hashtable_t *ht) { 610 | hti_t *hti = ht->hti; 611 | size_t count = 0; 612 | while (hti) { 613 | count += hti->count; 614 | hti = hti->next; 615 | } 616 | return count; 617 | } 618 | 619 | // Allocate and initialize a new hash table. 620 | hashtable_t *ht_alloc (const datatype_t *key_type) { 621 | hashtable_t *ht = nbd_malloc(sizeof(hashtable_t)); 622 | ht->key_type = key_type; 623 | ht->hti = (hti_t *)hti_alloc(ht, MIN_SCALE); 624 | ht->hti_copies = 0; 625 | ht->density = 0.0; 626 | return ht; 627 | } 628 | 629 | // Free and its internal structures. 630 | void ht_free (hashtable_t *ht) { 631 | hti_t *hti = ht->hti; 632 | do { 633 | hti_t *next = hti->next; 634 | assert(hti->ref_count == 1); 635 | hti_release(hti); 636 | hti = next; 637 | } while (hti); 638 | nbd_free(ht); 639 | } 640 | 641 | void ht_print (hashtable_t *ht, int verbose) { 642 | printf("probe:%-2d density:%.1f%% count:%-8lld ", ht->probe, ht->density, (uint64_t)ht_count(ht)); 643 | hti_t *hti = ht->hti; 644 | while (hti) { 645 | if (verbose) { 646 | for (int i = 0; i < (1ULL << hti->scale); ++i) { 647 | volatile entry_t *ent = hti->table + i; 648 | printf("[0x%x] 0x%llx:0x%llx\n", i, (uint64_t)ent->key, (uint64_t)ent->val); 649 | if (i > 30) { 650 | printf("...\n"); 651 | break; 652 | } 653 | } 654 | } 655 | int scale = hti->scale; 656 | printf("hti count:%lld scale:%d key density:%.1f%% value density:%.1f%% probe:%d\n", 657 | (uint64_t)hti->count, scale, (double)hti->key_count / (1ULL << scale) * 100, 658 | (double)hti->count / (1ULL << scale) * 100, hti->probe); 659 | hti = hti->next; 660 | } 661 | } 662 | 663 | ht_iter_t *ht_iter_begin (hashtable_t *ht, map_key_t key) { 664 | hti_t *hti; 665 | int ref_count; 666 | do { 667 | hti = ht->hti; 668 | while (hti->next != NULL) { 669 | do { } while (hti_help_copy(hti) != TRUE); 670 | hti = hti->next; 671 | } 672 | do { 673 | ref_count = hti->ref_count; 674 | if(ref_count == 0) 675 | break; 676 | } while (ref_count != SYNC_CAS(&hti->ref_count, ref_count, ref_count + 1)); 677 | } while (ref_count == 0); 678 | 679 | ht_iter_t *iter = nbd_malloc(sizeof(ht_iter_t)); 680 | iter->hti = hti; 681 | iter->idx = -1; 682 | 683 | return iter; 684 | } 685 | 686 | map_val_t ht_iter_next (ht_iter_t *iter, map_key_t *key_ptr) { 687 | volatile entry_t *ent; 688 | map_key_t key; 689 | map_val_t val; 690 | size_t table_size = (1ULL << iter->hti->scale); 691 | do { 692 | iter->idx++; 693 | if (iter->idx == table_size) { 694 | return DOES_NOT_EXIST; 695 | } 696 | ent = &iter->hti->table[iter->idx]; 697 | key = (iter->hti->ht->key_type == NULL) ? (map_key_t)ent->key : (map_key_t)GET_PTR(ent->key); 698 | val = ent->val; 699 | 700 | } while (key == DOES_NOT_EXIST || val == DOES_NOT_EXIST || val == TOMBSTONE); 701 | 702 | if (val == COPIED_VALUE) { 703 | const datatype_t *key_type = iter->hti->ht->key_type; 704 | #ifdef NBD32 705 | uint32_t hash = (key_type == NULL) ? murmur32_4b((uint64_t)key) : key_type->hash((void *)key); 706 | #else 707 | uint32_t hash = (key_type == NULL) ? murmur32_8b((uint64_t)key) : key_type->hash((void *)key); 708 | #endif 709 | val = hti_get(iter->hti->next, (map_key_t)ent->key, hash); 710 | 711 | // Go to the next entry if key is already deleted. 712 | if (val == DOES_NOT_EXIST) 713 | return ht_iter_next(iter, key_ptr); // recursive tail-call 714 | } 715 | 716 | if (key_ptr) { 717 | *key_ptr = key; 718 | } 719 | return val; 720 | } 721 | 722 | void ht_iter_free (ht_iter_t *iter) { 723 | hti_release(iter->hti); 724 | nbd_free(iter); 725 | } 726 | -------------------------------------------------------------------------------- /map/list.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * Harris-Michael lock-free list-based set 6 | * http://www.research.ibm.com/people/m/michael/spaa-2002.pdf 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | #include "common.h" 13 | #include "list.h" 14 | #include "mem.h" 15 | #ifdef LIST_USE_HAZARD_POINTER 16 | #include "hazard.h" 17 | #else 18 | #include "rcu.h" 19 | #endif 20 | 21 | typedef struct node { 22 | map_key_t key; 23 | map_val_t val; 24 | markable_t next; // next node 25 | } node_t; 26 | 27 | struct ll_iter { 28 | node_t *pred; 29 | }; 30 | 31 | struct ll { 32 | node_t *head; 33 | const datatype_t *key_type; 34 | }; 35 | 36 | // Marking the field of a node logically removes it from the list 37 | #define MARK_NODE(x) TAG_VALUE((markable_t)(x), 0x1) 38 | #define HAS_MARK(x) (IS_TAGGED((x), 0x1) == 0x1) 39 | #define GET_NODE(x) ((node_t *)(x)) 40 | #define STRIP_MARK(x) ((node_t *)STRIP_TAG((x), 0x1)) 41 | 42 | static node_t *node_alloc (map_key_t key, map_val_t val) { 43 | node_t *item = (node_t *)nbd_malloc(sizeof(node_t)); 44 | assert(!HAS_MARK((size_t)item)); 45 | item->key = key; 46 | item->val = val; 47 | return item; 48 | } 49 | 50 | list_t *ll_alloc (const datatype_t *key_type) { 51 | list_t *ll = (list_t *)nbd_malloc(sizeof(list_t)); 52 | ll->key_type = key_type; 53 | ll->head = node_alloc(0, 0); 54 | ll->head->next = DOES_NOT_EXIST; 55 | return ll; 56 | } 57 | 58 | void ll_free (list_t *ll) { 59 | node_t *item = STRIP_MARK(ll->head->next); 60 | while (item != NULL) { 61 | node_t *next = STRIP_MARK(item->next); 62 | if (ll->key_type != NULL) { 63 | nbd_free((void *)item->key); 64 | } 65 | nbd_free(item); 66 | item = next; 67 | } 68 | } 69 | 70 | size_t ll_count (list_t *ll) { 71 | size_t count = 0; 72 | node_t *item = STRIP_MARK(ll->head->next); 73 | while (item) { 74 | if (!HAS_MARK(item->next)) { 75 | count++; 76 | } 77 | item = STRIP_MARK(item->next); 78 | } 79 | return count; 80 | } 81 | 82 | #ifdef LIST_USE_HAZARD_POINTER 83 | static void nbd_free_node (node_t *x) { 84 | nbd_free((void *)x->key); 85 | nbd_free(x); 86 | } 87 | #endif 88 | 89 | static int find_pred (node_t **pred_ptr, node_t **item_ptr, list_t *ll, map_key_t key, int help_remove) { 90 | node_t *pred = ll->head; 91 | node_t *item = GET_NODE(pred->next); 92 | TRACE("l2", "find_pred: searching for key %p in list (head is %p)", key, pred); 93 | #ifdef LIST_USE_HAZARD_POINTER 94 | haz_t *temp, *hp0 = haz_get_static(0), *hp1 = haz_get_static(1); 95 | #endif 96 | 97 | while (item != NULL) { 98 | #ifdef LIST_USE_HAZARD_POINTER 99 | haz_set(hp0, item); 100 | if (STRIP_MARK(pred->next) != item) 101 | return find_pred(pred_ptr, item_ptr, ll, key, help_remove); // retry 102 | #endif 103 | markable_t next = item->next; 104 | 105 | // A mark means the node is logically removed but not physically unlinked yet. 106 | while (EXPECT_FALSE(HAS_MARK(next))) { 107 | 108 | // Skip over logically removed items. 109 | if (!help_remove) { 110 | item = STRIP_MARK(item->next); 111 | if (EXPECT_FALSE(item == NULL)) 112 | break; 113 | TRACE("l3", "find_pred: skipping marked item %p (next is %p)", item, next); 114 | next = item->next; 115 | continue; 116 | } 117 | 118 | // Unlink logically removed items. 119 | TRACE("l3", "find_pred: unlinking marked item %p next is %p", item, next); 120 | 121 | markable_t other = SYNC_CAS(&pred->next, (markable_t)item, (markable_t)STRIP_MARK(next)); 122 | if (other == (markable_t)item) { 123 | TRACE("l2", "find_pred: unlinked item %p from pred %p", item, pred); 124 | item = STRIP_MARK(next); 125 | next = (item != NULL) ? item->next : DOES_NOT_EXIST; 126 | TRACE("l3", "find_pred: now current item is %p next is %p", item, next); 127 | 128 | // The thread that completes the unlink should free the memory. 129 | #ifdef LIST_USE_HAZARD_POINTER 130 | free_t free_ = (ll->key_type != NULL ? (free_t)nbd_free_node : nbd_free); 131 | haz_defer_free(GET_NODE(other), free_); 132 | #else 133 | if (ll->key_type != NULL) { 134 | rcu_defer_free((void *)GET_NODE(other)->key); 135 | } 136 | rcu_defer_free(GET_NODE(other)); 137 | #endif 138 | } else { 139 | TRACE("l2", "find_pred: lost a race to unlink item %p from pred %p", item, pred); 140 | TRACE("l2", "find_pred: pred's link changed to %p", other, 0); 141 | if (HAS_MARK(other)) 142 | return find_pred(pred_ptr, item_ptr, ll, key, help_remove); // retry 143 | item = GET_NODE(other); 144 | next = (item != NULL) ? item->next : DOES_NOT_EXIST; 145 | } 146 | } 147 | 148 | if (EXPECT_FALSE(item == NULL)) 149 | break; 150 | 151 | TRACE("l3", "find_pred: visiting item %p (next is %p)", item, next); 152 | TRACE("l4", "find_pred: key %p val %p", item->key, item->val); 153 | 154 | int d; 155 | if (EXPECT_TRUE(ll->key_type == NULL)) { 156 | d = item->key - key; 157 | } else { 158 | d = ll->key_type->cmp((void *)item->key, (void *)key); 159 | } 160 | 161 | // If we reached the key (or passed where it should be), we found the right predesssor 162 | if (d >= 0) { 163 | if (pred_ptr != NULL) { 164 | *pred_ptr = pred; 165 | } 166 | if (item_ptr != NULL) { 167 | *item_ptr = item; 168 | } 169 | if (d == 0) { 170 | TRACE("l2", "find_pred: found matching item %p in list, pred is %p", item, pred); 171 | return TRUE; 172 | } 173 | TRACE("l2", "find_pred: found proper place for key %p in list, pred is %p", key, pred); 174 | return FALSE; 175 | } 176 | 177 | pred = item; 178 | #ifdef LIST_USE_HAZARD_POINTER 179 | temp = hp0; hp0 = hp1; hp1 = temp; 180 | #endif 181 | item = GET_NODE(next); 182 | } 183 | 184 | // is not in . 185 | if (pred_ptr != NULL) { 186 | *pred_ptr = pred; 187 | } 188 | *item_ptr = NULL; 189 | TRACE("l2", "find_pred: reached end of list. last item is %p", pred, 0); 190 | return FALSE; 191 | } 192 | 193 | // Fast find. Do not help unlink partially removed nodes and do not return the found item's predecessor. 194 | map_val_t ll_lookup (list_t *ll, map_key_t key) { 195 | TRACE("l1", "ll_lookup: searching for key %p in list %p", key, ll); 196 | node_t *item; 197 | int found = find_pred(NULL, &item, ll, key, FALSE); 198 | 199 | // If we found an matching the key return its value. 200 | if (found) { 201 | map_val_t val = item->val; 202 | if (val != DOES_NOT_EXIST) { 203 | TRACE("l1", "ll_lookup: found item %p. val %p. returning item", item, item->val); 204 | return val; 205 | } 206 | } 207 | 208 | TRACE("l1", "ll_lookup: no item in the list matched the key", 0, 0); 209 | return DOES_NOT_EXIST; 210 | } 211 | 212 | map_val_t ll_cas (list_t *ll, map_key_t key, map_val_t expectation, map_val_t new_val) { 213 | TRACE("l1", "ll_cas: key %p list %p", key, ll); 214 | TRACE("l1", "ll_cas: expectation %p new value %p", expectation, new_val); 215 | ASSERT((int64_t)new_val > 0); 216 | 217 | do { 218 | node_t *pred, *old_item; 219 | int found = find_pred(&pred, &old_item, ll, key, TRUE); 220 | if (!found) { 221 | 222 | // There was not an item in the list that matches the key. 223 | if (EXPECT_FALSE(expectation != CAS_EXPECT_DOES_NOT_EXIST && expectation != CAS_EXPECT_WHATEVER)) { 224 | TRACE("l1", "ll_cas: the expectation was not met, the list was not changed", 0, 0); 225 | return DOES_NOT_EXIST; // failure 226 | } 227 | 228 | // Create a new item and insert it into the list. 229 | TRACE("l2", "ll_cas: attempting to insert item between %p and %p", pred, pred->next); 230 | map_key_t new_key = ll->key_type == NULL ? key : (map_key_t)ll->key_type->clone((void *)key); 231 | node_t *new_item = node_alloc(new_key, new_val); 232 | markable_t next = new_item->next = (markable_t)old_item; 233 | markable_t other = SYNC_CAS(&pred->next, (markable_t)next, (markable_t)new_item); 234 | if (other == next) { 235 | TRACE("l1", "ll_cas: successfully inserted new item %p", new_item, 0); 236 | return DOES_NOT_EXIST; // success 237 | } 238 | 239 | // Lost a race. Failed to insert the new item into the list. 240 | TRACE("l1", "ll_cas: lost a race. CAS failed. expected pred's link to be %p but found %p", next, other); 241 | if (ll->key_type != NULL) { 242 | nbd_free((void *)new_key); 243 | } 244 | nbd_free(new_item); 245 | continue; // retry 246 | } 247 | 248 | // Found an item in the list that matches the key. 249 | map_val_t old_item_val = old_item->val; 250 | do { 251 | // If the item's value is DOES_NOT_EXIST it means another thread removed the node out from under us. 252 | if (EXPECT_FALSE(old_item_val == DOES_NOT_EXIST)) { 253 | TRACE("l2", "ll_cas: lost a race, found an item but another thread removed it. retry", 0, 0); 254 | break; // retry 255 | } 256 | 257 | if (EXPECT_FALSE(expectation == CAS_EXPECT_DOES_NOT_EXIST)) { 258 | TRACE("l1", "ll_cas: found an item %p in the list that matched the key. the expectation was " 259 | "not met, the list was not changed", old_item, old_item_val); 260 | return old_item_val; // failure 261 | } 262 | 263 | // Use a CAS and not a SWAP. If the node is in the process of being removed and we used a SWAP, we could 264 | // replace DOES_NOT_EXIST with our value. Then another thread that is updating the value could think it 265 | // succeeded and return our value even though we indicated that the node has been removed. If the CAS 266 | // fails it means another thread either removed the node or updated its value. 267 | map_val_t ret_val = SYNC_CAS(&old_item->val, old_item_val, new_val); 268 | if (ret_val == old_item_val) { 269 | TRACE("l1", "ll_cas: the CAS succeeded. updated the value of the item", 0, 0); 270 | return ret_val; // success 271 | } 272 | TRACE("l2", "ll_cas: lost a race. the CAS failed. another thread changed the item's value", 0, 0); 273 | 274 | old_item_val = ret_val; 275 | } while (1); 276 | } while (1); 277 | } 278 | 279 | map_val_t ll_remove (list_t *ll, map_key_t key) { 280 | TRACE("l1", "ll_remove: removing item with key %p from list %p", key, ll); 281 | node_t *pred; 282 | node_t *item; 283 | int found = find_pred(&pred, &item, ll, key, TRUE); 284 | if (!found) { 285 | TRACE("l1", "ll_remove: remove failed, an item with a matching key does not exist in the list", 0, 0); 286 | return DOES_NOT_EXIST; 287 | } 288 | 289 | // Mark removed. If multiple threads try to remove the same item only one of them should succeed. 290 | markable_t next; 291 | markable_t old_next = item->next; 292 | do { 293 | next = old_next; 294 | old_next = SYNC_CAS(&item->next, next, MARK_NODE(STRIP_MARK(next))); 295 | if (HAS_MARK(old_next)) { 296 | TRACE("l1", "ll_remove: lost a race -- %p is already marked for removal by another thread", item, 0); 297 | return DOES_NOT_EXIST; 298 | } 299 | } while (next != old_next); 300 | TRACE("l2", "ll_remove: logically removed item %p", item, 0); 301 | ASSERT(HAS_MARK(VOLATILE_DEREF(item).next)); 302 | 303 | // Atomically swap out the item's value in case another thread is updating the item while we are 304 | // removing it. This establishes which operation occurs first logically, the update or the remove. 305 | map_val_t val = SYNC_SWAP(&item->val, DOES_NOT_EXIST); 306 | TRACE("l2", "ll_remove: replaced item's val %p with DOES_NOT_EXIT", val, 0); 307 | 308 | // Unlink from . If we lose a race to another thread just back off. It is safe to leave the 309 | // item logically removed for a later call (or some other thread) to physically unlink. By marking the 310 | // item earlier, we logically removed it. 311 | TRACE("l2", "ll_remove: unlink the item by linking its pred %p to its successor %p", pred, next); 312 | markable_t other; 313 | if ((other = SYNC_CAS(&pred->next, (markable_t)item, next)) != (markable_t)item) { 314 | TRACE("l1", "ll_remove: unlink failed; pred's link changed from %p to %p", item, other); 315 | return val; 316 | } 317 | 318 | // The thread that completes the unlink should free the memory. 319 | #ifdef LIST_USE_HAZARD_POINTER 320 | free_t free_ = (ll->key_type != NULL ? (free_t)nbd_free_node : nbd_free); 321 | haz_defer_free(GET_NODE(item), free_); 322 | #else 323 | if (ll->key_type != NULL) { 324 | rcu_defer_free((void *)item->key); 325 | } 326 | rcu_defer_free(item); 327 | #endif 328 | TRACE("l1", "ll_remove: successfully unlinked item %p from the list", item, 0); 329 | return val; 330 | } 331 | 332 | void ll_print (list_t *ll, int verbose) { 333 | if (verbose) { 334 | markable_t next = ll->head->next; 335 | int i = 0; 336 | while (next != DOES_NOT_EXIST) { 337 | node_t *item = STRIP_MARK(next); 338 | if (item == NULL) 339 | break; 340 | printf("%s%p:0x%llx ", HAS_MARK(item->next) ? "*" : "", item, (uint64_t)item->key); 341 | fflush(stdout); 342 | if (i++ > 30) { 343 | printf("..."); 344 | break; 345 | } 346 | next = item->next; 347 | } 348 | printf("\n"); 349 | } 350 | printf("count:%llu\n", (uint64_t)ll_count(ll)); 351 | } 352 | 353 | ll_iter_t *ll_iter_begin (list_t *ll, map_key_t key) { 354 | ll_iter_t *iter = (ll_iter_t *)nbd_malloc(sizeof(ll_iter_t)); 355 | if (key != DOES_NOT_EXIST) { 356 | find_pred(&iter->pred, NULL, ll, key, FALSE); 357 | } else { 358 | iter->pred = ll->head; 359 | } 360 | #ifdef LIST_USE_HAZARD_POINTER 361 | haz_register_dynamic((void **)&iter->pred); 362 | #endif 363 | return iter; 364 | } 365 | 366 | map_val_t ll_iter_next (ll_iter_t *iter, map_key_t *key_ptr) { 367 | assert(iter); 368 | if (iter->pred == NULL) 369 | return DOES_NOT_EXIST; 370 | 371 | // advance iterator to next item; skip items that have been removed 372 | markable_t item; 373 | #ifdef LIST_USE_HAZARD_POINTER 374 | haz_t *hp0 = haz_get_static(0); 375 | #endif 376 | do { 377 | #ifndef LIST_USE_HAZARD_POINTER 378 | item = iter->pred->next; 379 | #else //LIST_USE_HAZARD_POINTER 380 | do { 381 | item = iter->pred->next; 382 | haz_set(hp0, STRIP_MARK(item)); 383 | } while (item != VOLATILE_DEREF(iter->pred).next); 384 | #endif//LIST_USE_HAZARD_POINTER 385 | iter->pred = STRIP_MARK(item); 386 | if (iter->pred == NULL) 387 | return DOES_NOT_EXIST; 388 | } while (HAS_MARK(item)); 389 | 390 | if (key_ptr != NULL) { 391 | *key_ptr = GET_NODE(item)->key; 392 | } 393 | return GET_NODE(item)->val; 394 | } 395 | 396 | void ll_iter_free (ll_iter_t *iter) { 397 | #ifdef LIST_USE_HAZARD_POINTER 398 | haz_unregister_dynamic((void **)&iter->pred); 399 | #endif 400 | nbd_free(iter); 401 | } 402 | -------------------------------------------------------------------------------- /map/map.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * generic interface for map-like data structures 6 | */ 7 | 8 | #include "common.h" 9 | #include "map.h" 10 | #include "mem.h" 11 | 12 | struct map { 13 | const map_impl_t *impl; 14 | void *data; 15 | }; 16 | 17 | struct map_iter { 18 | const map_impl_t *impl; 19 | void *state; 20 | }; 21 | 22 | map_t *map_alloc (const map_impl_t *map_impl, const datatype_t *key_type) { 23 | map_t *map = nbd_malloc(sizeof(map_t)); 24 | map->impl = map_impl; 25 | map->data = map->impl->alloc(key_type); 26 | return map; 27 | } 28 | 29 | void map_free (map_t *map) { 30 | map->impl->free_(map->data); 31 | } 32 | 33 | void map_print (map_t *map, int verbose) { 34 | map->impl->print(map->data, verbose); 35 | } 36 | 37 | map_val_t map_count (map_t *map) { 38 | return map->impl->count(map->data); 39 | } 40 | 41 | map_val_t map_get (map_t *map, map_key_t key) { 42 | return map->impl->get(map->data, key); 43 | } 44 | 45 | map_val_t map_set (map_t *map, map_key_t key, map_val_t new_val) { 46 | return map->impl->cas(map->data, key, CAS_EXPECT_WHATEVER, new_val); 47 | } 48 | 49 | map_val_t map_add (map_t *map, map_key_t key, map_val_t new_val) { 50 | return map->impl->cas(map->data, key, CAS_EXPECT_DOES_NOT_EXIST, new_val); 51 | } 52 | 53 | map_val_t map_cas (map_t *map, map_key_t key, map_val_t expected_val, map_val_t new_val) { 54 | return map->impl->cas(map->data, key, expected_val, new_val); 55 | } 56 | 57 | map_val_t map_replace(map_t *map, map_key_t key, map_val_t new_val) { 58 | return map->impl->cas(map->data, key, CAS_EXPECT_EXISTS, new_val); 59 | } 60 | 61 | map_val_t map_remove (map_t *map, map_key_t key) { 62 | return map->impl->remove(map->data, key); 63 | } 64 | 65 | map_iter_t * map_iter_begin (map_t *map, map_key_t key) { 66 | map_iter_t *iter = nbd_malloc(sizeof(map_iter_t)); 67 | iter->impl = map->impl; 68 | iter->state = map->impl->iter_begin(map->data, key); 69 | return iter; 70 | } 71 | 72 | map_val_t map_iter_next (map_iter_t *iter, map_key_t *key_ptr) { 73 | return iter->impl->iter_next(iter->state, key_ptr); 74 | } 75 | 76 | void map_iter_free (map_iter_t *iter) { 77 | iter->impl->iter_free(iter->state); 78 | nbd_free(iter); 79 | } 80 | -------------------------------------------------------------------------------- /map/skiplist.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * Implementation of the lock-free skiplist data-structure created by Maurice Herlihy, Yossi Lev, 6 | * and Nir Shavit. See Herlihy's and Shivit's book "The Art of Multiprocessor Programming". 7 | * http://www.amazon.com/Art-Multiprocessor-Programming-Maurice-Herlihy/dp/0123705916/ 8 | * 9 | * See also Kir Fraser's dissertation "Practical Lock Freedom". 10 | * www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf 11 | * 12 | * I've generalized the data structure to support update operations like set() and CAS() in addition to 13 | * the normal add() and remove() operations. 14 | * 15 | * Warning: This code is written for the x86 memory-model. The algorithim depends on certain stores 16 | * and loads being ordered. This code won't work correctly on platforms with weaker memory models if 17 | * you don't add memory barriers in the right places. 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include "common.h" 24 | #include "skiplist.h" 25 | #include "runtime.h" 26 | #include "mem.h" 27 | #include "rcu.h" 28 | 29 | // Setting MAX_LEVELS to 1 essentially makes this data structure the Harris-Michael lock-free list (see list.c). 30 | #define MAX_LEVELS 24 31 | 32 | enum unlink { 33 | FORCE_UNLINK, 34 | ASSIST_UNLINK, 35 | DONT_UNLINK 36 | }; 37 | 38 | typedef struct node { 39 | map_key_t key; 40 | map_val_t val; 41 | unsigned num_levels; 42 | markable_t next[1]; 43 | } node_t; 44 | 45 | struct sl_iter { 46 | node_t *next; 47 | }; 48 | 49 | struct sl { 50 | node_t *head; 51 | const datatype_t *key_type; 52 | int high_water; // max historic number of levels 53 | }; 54 | 55 | // Marking the field of a node logically removes it from the list 56 | #if 0 57 | static inline markable_t MARK_NODE(node_t * x) { return TAG_VALUE((markable_t)x, 0x1); } 58 | static inline int HAS_MARK(markable_t x) { return (IS_TAGGED(x, 0x1) == 0x1); } 59 | static inline node_t * GET_NODE(markable_t x) { assert(!HAS_MARK(x)); return (node_t *)x; } 60 | static inline node_t * STRIP_MARK(markable_t x) { return ((node_t *)STRIP_TAG(x, 0x1)); } 61 | #else 62 | #define MARK_NODE(x) TAG_VALUE((markable_t)(x), 0x1) 63 | #define HAS_MARK(x) (IS_TAGGED((x), 0x1) == 0x1) 64 | #define GET_NODE(x) ((node_t *)(x)) 65 | #define STRIP_MARK(x) ((node_t *)STRIP_TAG((x), 0x1)) 66 | #endif 67 | 68 | static int random_levels (skiplist_t *sl) { 69 | uint64_t r = nbd_rand(); 70 | int z = __builtin_ctz(r); 71 | int levels = (int)(z / 1.5); 72 | if (levels == 0) 73 | return 1; 74 | if (levels > sl->high_water) { 75 | levels = SYNC_ADD(&sl->high_water, 1); 76 | TRACE("s2", "random_levels: increased high water mark to %lld", sl->high_water, 0); 77 | } 78 | if (levels > MAX_LEVELS) { levels = MAX_LEVELS; } 79 | return levels; 80 | } 81 | 82 | static node_t *node_alloc (int num_levels, map_key_t key, map_val_t val) { 83 | assert(num_levels >= 0 && num_levels <= MAX_LEVELS); 84 | size_t sz = sizeof(node_t) + (num_levels - 1) * sizeof(node_t *); 85 | node_t *item = (node_t *)nbd_malloc(sz); 86 | memset(item, 0, sz); 87 | item->key = key; 88 | item->val = val; 89 | item->num_levels = num_levels; 90 | TRACE("s2", "node_alloc: new node %p (%llu levels)", item, num_levels); 91 | return item; 92 | } 93 | 94 | skiplist_t *sl_alloc (const datatype_t *key_type) { 95 | skiplist_t *sl = (skiplist_t *)nbd_malloc(sizeof(skiplist_t)); 96 | sl->key_type = key_type; 97 | sl->high_water = 1; 98 | sl->head = node_alloc(MAX_LEVELS, 0, 0); 99 | memset(sl->head->next, 0, MAX_LEVELS * sizeof(skiplist_t *)); 100 | return sl; 101 | } 102 | 103 | void sl_free (skiplist_t *sl) { 104 | node_t *item = GET_NODE(sl->head->next[0]); 105 | while (item) { 106 | node_t *next = STRIP_MARK(item->next[0]); 107 | if (sl->key_type != NULL) { 108 | nbd_free((void *)item->key); 109 | } 110 | nbd_free(item); 111 | item = next; 112 | } 113 | } 114 | 115 | size_t sl_count (skiplist_t *sl) { 116 | size_t count = 0; 117 | node_t *item = GET_NODE(sl->head->next[0]); 118 | while (item) { 119 | if (!HAS_MARK(item->next[0])) { 120 | count++; 121 | } 122 | item = STRIP_MARK(item->next[0]); 123 | } 124 | return count; 125 | } 126 | 127 | static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl, map_key_t key, enum unlink unlink) { 128 | node_t *pred = sl->head; 129 | node_t *item = NULL; 130 | TRACE("s2", "find_preds: searching for key %p in skiplist (head is %p)", key, pred); 131 | int d = 0; 132 | 133 | // Traverse the levels of from the top level to the bottom 134 | for (int level = sl->high_water - 1; level >= 0; --level) { 135 | markable_t next = pred->next[level]; 136 | if (next == DOES_NOT_EXIST && level >= n) 137 | continue; 138 | TRACE("s3", "find_preds: traversing level %p starting at %p", level, pred); 139 | if (EXPECT_FALSE(HAS_MARK(next))) { 140 | TRACE("s2", "find_preds: pred %p is marked for removal (next %p); retry", pred, next); 141 | ASSERT(level == pred->num_levels - 1 || HAS_MARK(pred->next[level+1])); 142 | return find_preds(preds, succs, n, sl, key, unlink); // retry 143 | } 144 | item = GET_NODE(next); 145 | while (item != NULL) { 146 | next = item->next[level]; 147 | 148 | // A tag means an item is logically removed but not physically unlinked yet. 149 | while (EXPECT_FALSE(HAS_MARK(next))) { 150 | TRACE("s3", "find_preds: found marked item %p (next is %p)", item, next); 151 | if (unlink == DONT_UNLINK) { 152 | 153 | // Skip over logically removed items. 154 | item = STRIP_MARK(next); 155 | if (EXPECT_FALSE(item == NULL)) 156 | break; 157 | next = item->next[level]; 158 | } else { 159 | 160 | // Unlink logically removed items. 161 | markable_t other = SYNC_CAS(&pred->next[level], (markable_t)item, (markable_t)STRIP_MARK(next)); 162 | if (other == (markable_t)item) { 163 | TRACE("s3", "find_preds: unlinked item from pred %p", pred, 0); 164 | item = STRIP_MARK(next); 165 | } else { 166 | TRACE("s3", "find_preds: lost race to unlink item pred %p's link changed to %p", pred, other); 167 | if (HAS_MARK(other)) 168 | return find_preds(preds, succs, n, sl, key, unlink); // retry 169 | item = GET_NODE(other); 170 | } 171 | next = (item != NULL) ? item->next[level] : DOES_NOT_EXIST; 172 | } 173 | } 174 | 175 | if (EXPECT_FALSE(item == NULL)) { 176 | TRACE("s3", "find_preds: past the last item in the skiplist", 0, 0); 177 | break; 178 | } 179 | 180 | TRACE("s4", "find_preds: visiting item %p (next is %p)", item, next); 181 | TRACE("s4", "find_preds: key %p val %p", STRIP_MARK(item->key), item->val); 182 | 183 | if (EXPECT_TRUE(sl->key_type == NULL)) { 184 | d = item->key - key; 185 | } else { 186 | d = sl->key_type->cmp((void *)item->key, (void *)key); 187 | } 188 | 189 | if (d > 0) 190 | break; 191 | if (d == 0 && unlink != FORCE_UNLINK) 192 | break; 193 | 194 | pred = item; 195 | item = GET_NODE(next); 196 | } 197 | 198 | TRACE("s3", "find_preds: found pred %p next %p", pred, item); 199 | 200 | if (level < n) { 201 | if (preds != NULL) { 202 | preds[level] = pred; 203 | } 204 | if (succs != NULL) { 205 | succs[level] = item; 206 | } 207 | } 208 | } 209 | 210 | if (d == 0) { 211 | TRACE("s2", "find_preds: found matching item %p in skiplist, pred is %p", item, pred); 212 | return item; 213 | } 214 | TRACE("s2", "find_preds: found proper place for key %p in skiplist, pred is %p. returning null", key, pred); 215 | return NULL; 216 | } 217 | 218 | // Fast find that does not help unlink partially removed nodes and does not return the node's predecessors. 219 | map_val_t sl_lookup (skiplist_t *sl, map_key_t key) { 220 | TRACE("s1", "sl_lookup: searching for key %p in skiplist %p", key, sl); 221 | node_t *item = find_preds(NULL, NULL, 0, sl, key, DONT_UNLINK); 222 | 223 | // If we found an matching the return its value. 224 | if (item != NULL) { 225 | map_val_t val = item->val; 226 | if (val != DOES_NOT_EXIST) { 227 | TRACE("s1", "sl_lookup: found item %p. val %p. returning item", item, item->val); 228 | return val; 229 | } 230 | } 231 | 232 | TRACE("s1", "sl_lookup: no item in the skiplist matched the key", 0, 0); 233 | return DOES_NOT_EXIST; 234 | } 235 | 236 | map_key_t sl_min_key (skiplist_t *sl) { 237 | node_t *item = GET_NODE(sl->head->next[0]); 238 | while (item != NULL) { 239 | markable_t next = item->next[0]; 240 | if (!HAS_MARK(next)) 241 | return item->key; 242 | item = STRIP_MARK(next); 243 | } 244 | return DOES_NOT_EXIST; 245 | } 246 | 247 | static map_val_t update_item (node_t *item, map_val_t expectation, map_val_t new_val) { 248 | map_val_t old_val = item->val; 249 | 250 | // If the item's value is DOES_NOT_EXIST it means another thread removed the node out from under us. 251 | if (EXPECT_FALSE(old_val == DOES_NOT_EXIST)) { 252 | TRACE("s2", "update_item: lost a race to another thread removing the item. retry", 0, 0); 253 | return DOES_NOT_EXIST; // retry 254 | } 255 | 256 | if (EXPECT_FALSE(expectation == CAS_EXPECT_DOES_NOT_EXIST)) { 257 | TRACE("s1", "update_item: the expectation was not met; the skiplist was not changed", 0, 0); 258 | return old_val; // failure 259 | } 260 | 261 | // Use a CAS and not a SWAP. If the CAS fails it means another thread removed the node or updated its 262 | // value. If another thread removed the node but it is not unlinked yet and we used a SWAP, we could 263 | // replace DOES_NOT_EXIST with our value. Then another thread that is updating the value could think it 264 | // succeeded and return our value even though it should return DOES_NOT_EXIST. 265 | if (old_val == SYNC_CAS(&item->val, old_val, new_val)) { 266 | TRACE("s1", "update_item: the CAS succeeded. updated the value of the item", 0, 0); 267 | return old_val; // success 268 | } 269 | TRACE("s2", "update_item: lost a race. the CAS failed. another thread changed the item's value", 0, 0); 270 | 271 | // retry 272 | return update_item(item, expectation, new_val); // tail call 273 | } 274 | 275 | map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_t new_val) { 276 | TRACE("s1", "sl_cas: key %p skiplist %p", key, sl); 277 | TRACE("s1", "sl_cas: expectation %p new value %p", expectation, new_val); 278 | ASSERT((int64_t)new_val > 0); 279 | 280 | node_t *preds[MAX_LEVELS]; 281 | node_t *nexts[MAX_LEVELS]; 282 | node_t *new_item = NULL; 283 | int n = random_levels(sl); 284 | node_t *old_item = find_preds(preds, nexts, n, sl, key, ASSIST_UNLINK); 285 | 286 | // If there is already an item in the skiplist that matches the key just update its value. 287 | if (old_item != NULL) { 288 | map_val_t ret_val = update_item(old_item, expectation, new_val); 289 | if (ret_val != DOES_NOT_EXIST) 290 | return ret_val; 291 | 292 | // If we lose a race with a thread removing the item we tried to update then we have to retry. 293 | return sl_cas(sl, key, expectation, new_val); // tail call 294 | } 295 | 296 | if (EXPECT_FALSE(expectation != CAS_EXPECT_DOES_NOT_EXIST && expectation != CAS_EXPECT_WHATEVER)) { 297 | TRACE("s1", "sl_cas: the expectation was not met, the skiplist was not changed", 0, 0); 298 | return DOES_NOT_EXIST; // failure, the caller expected an item for the to already exist 299 | } 300 | 301 | // Create a new node and insert it into the skiplist. 302 | TRACE("s3", "sl_cas: attempting to insert a new item between %p and %p", preds[0], nexts[0]); 303 | map_key_t new_key = sl->key_type == NULL ? key : (map_key_t)sl->key_type->clone((void *)key); 304 | new_item = node_alloc(n, new_key, new_val); 305 | 306 | // Set 's next pointers to their proper values 307 | markable_t next = new_item->next[0] = (markable_t)nexts[0]; 308 | for (int level = 1; level < new_item->num_levels; ++level) { 309 | new_item->next[level] = (markable_t)nexts[level]; 310 | } 311 | 312 | // Link into from the bottom level up. After is inserted into the bottom level 313 | // it is officially part of the skiplist. 314 | node_t *pred = preds[0]; 315 | markable_t other = SYNC_CAS(&pred->next[0], next, (markable_t)new_item); 316 | if (other != next) { 317 | TRACE("s3", "sl_cas: failed to change pred's link: expected %p found %p", next, other); 318 | 319 | // Lost a race to another thread modifying the skiplist. Free the new item we allocated and retry. 320 | if (sl->key_type != NULL) { 321 | nbd_free((void *)new_key); 322 | } 323 | nbd_free(new_item); 324 | return sl_cas(sl, key, expectation, new_val); // tail call 325 | } 326 | 327 | TRACE("s3", "sl_cas: successfully inserted a new item %p at the bottom level", new_item, 0); 328 | 329 | ASSERT(new_item->num_levels <= MAX_LEVELS); 330 | for (int level = 1; level < new_item->num_levels; ++level) { 331 | TRACE("s3", "sl_cas: inserting the new item %p at level %p", new_item, level); 332 | do { 333 | node_t * pred = preds[level]; 334 | ASSERT(new_item->next[level]==(markable_t)nexts[level] || new_item->next[level]==MARK_NODE(nexts[level])); 335 | TRACE("s3", "sl_cas: attempting to to insert the new item between %p and %p", pred, nexts[level]); 336 | 337 | markable_t other = SYNC_CAS(&pred->next[level], (markable_t)nexts[level], (markable_t)new_item); 338 | if (other == (markable_t)nexts[level]) 339 | break; // successfully linked into the skiplist at the current 340 | TRACE("s3", "sl_cas: lost a race. failed to change pred's link. expected %p found %p", nexts[level], other); 341 | 342 | // Find 's new preds and nexts. 343 | find_preds(preds, nexts, new_item->num_levels, sl, key, ASSIST_UNLINK); 344 | 345 | for (int i = level; i < new_item->num_levels; ++i) { 346 | markable_t old_next = new_item->next[i]; 347 | if ((markable_t)nexts[i] == old_next) 348 | continue; 349 | 350 | // Update 's inconsistent next pointer before trying again. Use a CAS so if another thread 351 | // is trying to remove the new item concurrently we do not stomp on the mark it places on the item. 352 | TRACE("s3", "sl_cas: attempting to update the new item's link from %p to %p", old_next, nexts[i]); 353 | other = SYNC_CAS(&new_item->next[i], old_next, (markable_t)nexts[i]); 354 | ASSERT(other == old_next || other == MARK_NODE(old_next)); 355 | 356 | // If another thread is removing this item we can stop linking it into to skiplist 357 | if (HAS_MARK(other)) { 358 | find_preds(NULL, NULL, 0, sl, key, FORCE_UNLINK); // see comment below 359 | return DOES_NOT_EXIST; 360 | } 361 | } 362 | } while (1); 363 | } 364 | 365 | // In case another thread was in the process of removing the while we were added it, we have to 366 | // make sure it is completely unlinked before we return. We might have lost a race and inserted the new item 367 | // at some level after the other thread thought it was fully removed. That is a problem because once a thread 368 | // thinks it completely unlinks a node it queues it to be freed 369 | if (HAS_MARK(new_item->next[new_item->num_levels - 1])) { 370 | find_preds(NULL, NULL, 0, sl, key, FORCE_UNLINK); 371 | } 372 | 373 | return DOES_NOT_EXIST; // success, inserted a new item 374 | } 375 | 376 | map_val_t sl_remove (skiplist_t *sl, map_key_t key) { 377 | TRACE("s1", "sl_remove: removing item with key %p from skiplist %p", key, sl); 378 | node_t *preds[MAX_LEVELS]; 379 | node_t *item = find_preds(preds, NULL, sl->high_water, sl, key, ASSIST_UNLINK); 380 | if (item == NULL) { 381 | TRACE("s3", "sl_remove: remove failed, an item with a matching key does not exist in the skiplist", 0, 0); 382 | return DOES_NOT_EXIST; 383 | } 384 | 385 | // Mark at each level of from the top down. If multiple threads try to concurrently remove 386 | // the same item only one of them should succeed. Marking the bottom level establishes which of them succeeds. 387 | markable_t old_next = 0; 388 | for (int level = item->num_levels - 1; level >= 0; --level) { 389 | markable_t next; 390 | old_next = item->next[level]; 391 | do { 392 | TRACE("s3", "sl_remove: marking item at level %p (next %p)", level, old_next); 393 | next = old_next; 394 | old_next = SYNC_CAS(&item->next[level], next, MARK_NODE((node_t *)next)); 395 | if (HAS_MARK(old_next)) { 396 | TRACE("s2", "sl_remove: %p is already marked for removal by another thread (next %p)", item, old_next); 397 | if (level == 0) 398 | return DOES_NOT_EXIST; 399 | break; 400 | } 401 | } while (next != old_next); 402 | } 403 | 404 | // Atomically swap out the item's value in case another thread is updating the item while we are 405 | // removing it. This establishes which operation occurs first logically, the update or the remove. 406 | map_val_t val = SYNC_SWAP(&item->val, DOES_NOT_EXIST); 407 | TRACE("s2", "sl_remove: replaced item %p's value with DOES_NOT_EXIT", item, 0); 408 | 409 | // unlink the item 410 | find_preds(NULL, NULL, 0, sl, key, FORCE_UNLINK); 411 | 412 | // free the node 413 | if (sl->key_type != NULL) { 414 | rcu_defer_free((void *)item->key); 415 | } 416 | rcu_defer_free(item); 417 | 418 | return val; 419 | } 420 | 421 | void sl_print (skiplist_t *sl, int verbose) { 422 | 423 | if (verbose) { 424 | for (int level = MAX_LEVELS - 1; level >= 0; --level) { 425 | node_t *item = sl->head; 426 | if (item->next[level] == DOES_NOT_EXIST) 427 | continue; 428 | printf("(%d) ", level); 429 | int i = 0; 430 | while (item) { 431 | markable_t next = item->next[level]; 432 | printf("%s%p ", HAS_MARK(next) ? "*" : "", item); 433 | item = STRIP_MARK(next); 434 | if (i++ > 30) { 435 | printf("..."); 436 | break; 437 | } 438 | } 439 | printf("\n"); 440 | fflush(stdout); 441 | } 442 | node_t *item = sl->head; 443 | int i = 0; 444 | while (item) { 445 | int is_marked = HAS_MARK(item->next[0]); 446 | printf("%s%p:0x%llx ", is_marked ? "*" : "", item, (uint64_t)item->key); 447 | if (item != sl->head) { 448 | printf("[%d]", item->num_levels); 449 | } else { 450 | printf("[HEAD]"); 451 | } 452 | for (int level = 1; level < item->num_levels; ++level) { 453 | node_t *next = STRIP_MARK(item->next[level]); 454 | is_marked = HAS_MARK(item->next[0]); 455 | printf(" %p%s", next, is_marked ? "*" : ""); 456 | if (item == sl->head && item->next[level] == DOES_NOT_EXIST) 457 | break; 458 | } 459 | printf("\n"); 460 | fflush(stdout); 461 | item = STRIP_MARK(item->next[0]); 462 | if (i++ > 30) { 463 | printf("...\n"); 464 | break; 465 | } 466 | } 467 | } 468 | printf("levels:%-2d count:%-6lld \n", sl->high_water, (uint64_t)sl_count(sl)); 469 | } 470 | 471 | sl_iter_t *sl_iter_begin (skiplist_t *sl, map_key_t key) { 472 | sl_iter_t *iter = (sl_iter_t *)nbd_malloc(sizeof(sl_iter_t)); 473 | if (key != DOES_NOT_EXIST) { 474 | find_preds(NULL, &iter->next, 1, sl, key, DONT_UNLINK); 475 | } else { 476 | iter->next = GET_NODE(sl->head->next[0]); 477 | } 478 | return iter; 479 | } 480 | 481 | map_val_t sl_iter_next (sl_iter_t *iter, map_key_t *key_ptr) { 482 | assert(iter); 483 | node_t *item = iter->next; 484 | while (item != NULL && HAS_MARK(item->next[0])) { 485 | item = STRIP_MARK(item->next[0]); 486 | } 487 | if (item == NULL) { 488 | iter->next = NULL; 489 | return DOES_NOT_EXIST; 490 | } 491 | iter->next = STRIP_MARK(item->next[0]); 492 | if (key_ptr != NULL) { 493 | *key_ptr = item->key; 494 | } 495 | return item->val; 496 | } 497 | 498 | void sl_iter_free (sl_iter_t *iter) { 499 | nbd_free(iter); 500 | } 501 | -------------------------------------------------------------------------------- /map/unsafe_skiplist.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * non thread safe skiplist 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include "common.h" 12 | #include "skiplist.h" 13 | #include "runtime.h" 14 | #include "mem.h" 15 | 16 | #define MAX_LEVELS 24 17 | 18 | typedef struct node { 19 | map_key_t key; 20 | map_val_t val; 21 | int num_levels; 22 | struct node *next[1]; 23 | } node_t; 24 | 25 | struct sl_iter { 26 | node_t *next; 27 | }; 28 | 29 | struct sl { 30 | node_t *head; 31 | const datatype_t *key_type; 32 | int high_water; // max level of any item in the list 33 | }; 34 | 35 | static int random_levels (skiplist_t *sl) { 36 | uint64_t r = nbd_rand(); 37 | int z = __builtin_ctz(r); 38 | int levels = (int)(z / 1.5); 39 | if (levels == 0) 40 | return 1; 41 | if (levels > sl->high_water) { 42 | levels = SYNC_ADD(&sl->high_water, 1); 43 | TRACE("s2", "random_levels: increased high water mark to %lld", sl->high_water, 0); 44 | } 45 | if (levels > MAX_LEVELS) { levels = MAX_LEVELS; } 46 | return levels; 47 | } 48 | 49 | static node_t *node_alloc (int num_levels, map_key_t key, map_val_t val) { 50 | assert(num_levels > 0 && num_levels <= MAX_LEVELS); 51 | size_t sz = sizeof(node_t) + (num_levels - 1) * sizeof(node_t *); 52 | node_t *item = (node_t *)nbd_malloc(sz); 53 | memset(item, 0, sz); 54 | item->key = key; 55 | item->val = val; 56 | item->num_levels = num_levels; 57 | TRACE("s2", "node_alloc: new node %p (%llu levels)", item, num_levels); 58 | return item; 59 | } 60 | 61 | skiplist_t *sl_alloc (const datatype_t *key_type) { 62 | skiplist_t *sl = (skiplist_t *)nbd_malloc(sizeof(skiplist_t)); 63 | sl->key_type = key_type; 64 | sl->high_water = 1; 65 | sl->head = node_alloc(MAX_LEVELS, 0, 0); 66 | memset(sl->head->next, 0, MAX_LEVELS * sizeof(skiplist_t *)); 67 | return sl; 68 | } 69 | 70 | void sl_free (skiplist_t *sl) { 71 | node_t *item = sl->head->next[0]; 72 | while (item) { 73 | node_t *next = item->next[0]; 74 | if (sl->key_type != NULL) { 75 | nbd_free((void *)item->key); 76 | } 77 | nbd_free(item); 78 | item = next; 79 | } 80 | } 81 | 82 | size_t sl_count (skiplist_t *sl) { 83 | size_t count = 0; 84 | node_t *item = sl->head->next[0]; 85 | while (item) { 86 | count++; 87 | item = item->next[0]; 88 | } 89 | return count; 90 | } 91 | 92 | static node_t *find_preds (node_t **preds, node_t **succs, int n, skiplist_t *sl, map_key_t key, int unlink) { 93 | node_t *pred = sl->head; 94 | node_t *item = NULL; 95 | TRACE("s2", "find_preds: searching for key %p in skiplist (head is %p)", key, pred); 96 | int d = 0; 97 | 98 | // Traverse the levels of from the top level to the bottom 99 | for (int level = sl->high_water - 1; level >= 0; --level) { 100 | node_t *next = pred->next[level]; 101 | if (next == DOES_NOT_EXIST && level >= n) 102 | continue; 103 | TRACE("s3", "find_preds: traversing level %p starting at %p", level, pred); 104 | item = next; 105 | while (item != NULL) { 106 | next = item->next[level]; 107 | 108 | if (EXPECT_TRUE(sl->key_type == NULL)) { 109 | d = item->key - key; 110 | } else { 111 | d = sl->key_type->cmp((void *)item->key, (void *)key); 112 | } 113 | 114 | if (d >= 0) { 115 | if (d == 0 && unlink) { 116 | pred->next[level] = next; 117 | TRACE("s3", "find_preds: unlinked item from pred %p", pred, 0); 118 | item = next; 119 | next = (item != NULL) ? item->next[level] : DOES_NOT_EXIST; 120 | } 121 | break; 122 | } 123 | 124 | pred = item; 125 | item = next; 126 | } 127 | 128 | TRACE("s3", "find_preds: found pred %p next %p", pred, item); 129 | 130 | if (level < n) { 131 | if (preds != NULL) { 132 | preds[level] = pred; 133 | } 134 | if (succs != NULL) { 135 | succs[level] = item; 136 | } 137 | } 138 | } 139 | 140 | if (d == 0) { 141 | TRACE("s2", "find_preds: found matching item %p in skiplist, pred is %p", item, pred); 142 | return item; 143 | } 144 | TRACE("s2", "find_preds: found proper place for key %p in skiplist, pred is %p. returning null", key, pred); 145 | return NULL; 146 | } 147 | 148 | // Fast find that does not return the node's predecessors. 149 | map_val_t sl_lookup (skiplist_t *sl, map_key_t key) { 150 | TRACE("s1", "sl_lookup: searching for key %p in skiplist %p", key, sl); 151 | node_t *item = find_preds(NULL, NULL, 0, sl, key, FALSE); 152 | 153 | // If we found an matching the return its value. 154 | if (item != NULL) { 155 | map_val_t val = item->val; 156 | return val; 157 | } 158 | 159 | TRACE("s1", "sl_lookup: no item in the skiplist matched the key", 0, 0); 160 | return DOES_NOT_EXIST; 161 | } 162 | 163 | map_key_t sl_min_key (skiplist_t *sl) { 164 | node_t *item = sl->head->next[0]; 165 | while (item != NULL) 166 | return item->key; 167 | return DOES_NOT_EXIST; 168 | } 169 | 170 | map_val_t sl_cas (skiplist_t *sl, map_key_t key, map_val_t expectation, map_val_t new_val) { 171 | TRACE("s1", "sl_cas: key %p skiplist %p", key, sl); 172 | TRACE("s1", "sl_cas: expectation %p new value %p", expectation, new_val); 173 | ASSERT((int64_t)new_val > 0); 174 | 175 | node_t *preds[MAX_LEVELS]; 176 | node_t *nexts[MAX_LEVELS]; 177 | node_t *new_item = NULL; 178 | int n = random_levels(sl); 179 | node_t *old_item = find_preds(preds, nexts, n, sl, key, FALSE); 180 | 181 | // If there is already an item in the skiplist that matches the key just update its value. 182 | if (old_item != NULL) { 183 | map_val_t old_val = old_item->val; 184 | if (expectation == CAS_EXPECT_DOES_NOT_EXIST || 185 | (expectation != CAS_EXPECT_WHATEVER && expectation != CAS_EXPECT_EXISTS && expectation != old_val)) { 186 | TRACE("s1", "sl_cas: the expectation was not met; the skiplist was not changed", 0, 0); 187 | return old_val; 188 | } 189 | old_item->val = new_val; 190 | return old_val; 191 | } 192 | 193 | if (EXPECT_FALSE(expectation != CAS_EXPECT_DOES_NOT_EXIST && expectation != CAS_EXPECT_WHATEVER)) { 194 | TRACE("s1", "sl_cas: the expectation was not met, the skiplist was not changed", 0, 0); 195 | return DOES_NOT_EXIST; // failure, the caller expected an item for the to already exist 196 | } 197 | 198 | TRACE("s3", "sl_cas: inserting a new item between %p and %p", preds[0], nexts[0]); 199 | 200 | // Create a new node and insert it into the skiplist. 201 | map_key_t new_key = sl->key_type == NULL ? key : (map_key_t)sl->key_type->clone((void *)key); 202 | new_item = node_alloc(n, new_key, new_val); 203 | 204 | // Set 's next pointers to their proper values 205 | for (int level = 0; level < new_item->num_levels; ++level) { 206 | new_item->next[level] = nexts[level]; 207 | } 208 | 209 | // Link into 210 | for (int level = 0; level < new_item->num_levels; ++level) { 211 | preds[level]->next[level] = new_item; 212 | } 213 | 214 | return DOES_NOT_EXIST; // success, inserted a new item 215 | } 216 | 217 | map_val_t sl_remove (skiplist_t *sl, map_key_t key) { 218 | TRACE("s1", "sl_remove: removing item with key %p from skiplist %p", key, sl); 219 | node_t *preds[MAX_LEVELS]; 220 | node_t *item = find_preds(preds, NULL, sl->high_water, sl, key, FALSE); 221 | if (item == NULL) { 222 | TRACE("s3", "sl_remove: remove failed, an item with a matching key does not exist in the skiplist", 0, 0); 223 | return DOES_NOT_EXIST; 224 | } 225 | map_val_t val = item->val; 226 | 227 | // unlink the item 228 | find_preds(NULL, NULL, 0, sl, key, TRUE); 229 | 230 | // free the node 231 | if (sl->key_type != NULL) { 232 | nbd_free((void *)item->key); 233 | } 234 | nbd_free(item); 235 | 236 | return val; 237 | } 238 | 239 | void sl_print (skiplist_t *sl) { 240 | 241 | printf("high water: %d levels\n", sl->high_water); 242 | for (int level = MAX_LEVELS - 1; level >= 0; --level) { 243 | node_t *item = sl->head; 244 | if (item->next[level] == DOES_NOT_EXIST) 245 | continue; 246 | printf("(%d) ", level); 247 | int i = 0; 248 | while (item) { 249 | node_t *next = item->next[level]; 250 | printf("%p ", item); 251 | item = next; 252 | if (i++ > 30) { 253 | printf("..."); 254 | break; 255 | } 256 | } 257 | printf("\n"); 258 | fflush(stdout); 259 | } 260 | node_t *item = sl->head; 261 | int i = 0; 262 | while (item) { 263 | printf("%p:0x%llx ", item, (uint64_t)item->key); 264 | if (item != sl->head) { 265 | printf("[%d]", item->num_levels); 266 | } else { 267 | printf("[HEAD]"); 268 | } 269 | for (int level = 1; level < item->num_levels; ++level) { 270 | node_t *next = item->next[level]; 271 | printf(" %p", next); 272 | if (item == sl->head && item->next[level] == DOES_NOT_EXIST) 273 | break; 274 | } 275 | printf("\n"); 276 | fflush(stdout); 277 | item = item->next[0]; 278 | if (i++ > 30) { 279 | printf("...\n"); 280 | break; 281 | } 282 | } 283 | } 284 | 285 | sl_iter_t *sl_iter_begin (skiplist_t *sl, map_key_t key) { 286 | sl_iter_t *iter = (sl_iter_t *)nbd_malloc(sizeof(sl_iter_t)); 287 | if (key != DOES_NOT_EXIST) { 288 | find_preds(NULL, &iter->next, 1, sl, key, FALSE); 289 | } else { 290 | iter->next = sl->head->next[0]; 291 | } 292 | return iter; 293 | } 294 | 295 | map_val_t sl_iter_next (sl_iter_t *iter, map_key_t *key_ptr) { 296 | assert(iter); 297 | node_t *item = iter->next; 298 | if (item == NULL) { 299 | iter->next = NULL; 300 | return DOES_NOT_EXIST; 301 | } 302 | iter->next = item->next[0]; 303 | if (key_ptr != NULL) { 304 | *key_ptr = item->key; 305 | } 306 | return item->val; 307 | } 308 | 309 | void sl_iter_free (sl_iter_t *iter) { 310 | nbd_free(iter); 311 | } 312 | -------------------------------------------------------------------------------- /perf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | for ks in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 #26 27 28 29 30 3 | do 4 | for th in 8 5 | do 6 | output/perf_test $th $ks 7 | done 8 | done 9 | 10 | 11 | -------------------------------------------------------------------------------- /runtime/hazard.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * hazard pointers 6 | * 7 | * www.research.ibm.com/people/m/michael/ieeetpds-2004.pdf 8 | * 9 | */ 10 | #include "common.h" 11 | #include "lwt.h" 12 | #include "mem.h" 13 | #include "tls.h" 14 | #include "runtime.h" 15 | #include "hazard.h" 16 | #include "lwt.h" 17 | 18 | typedef struct pending { 19 | void * ptr; 20 | free_t free_; 21 | } pending_t; 22 | 23 | typedef struct haz_local { 24 | pending_t *pending; // to be freed 25 | int pending_size; 26 | int pending_count; 27 | 28 | haz_t static_haz[STATIC_HAZ_PER_THREAD]; 29 | 30 | haz_t **dynamic; 31 | int dynamic_size; 32 | int dynamic_count; 33 | 34 | } __attribute__ ((aligned(CACHE_LINE_SIZE))) haz_local_t; 35 | 36 | static haz_local_t haz_local_[MAX_NUM_THREADS] = {}; 37 | 38 | static void sort_hazards (haz_t *hazards, int n) { 39 | TRACE("H3", "sort_hazards: sorting hazard list %p of %p elements", hazards, n); 40 | return; 41 | } 42 | 43 | static int search_hazards (void *p, haz_t *hazards, int n) { 44 | TRACE("H4", "search_hazards: searching list %p for hazard %p", hazards, p); 45 | for (int i = 0; i < n; ++i) { 46 | if (hazards[i] == p) { 47 | TRACE("H2", "haz_search_hazards: found hazard %p", p, 0); 48 | return TRUE; 49 | } 50 | } 51 | return FALSE; 52 | } 53 | 54 | static void resize_pending (void) { 55 | TRACE("H2", "haz_resize_pending", 0, 0); 56 | LOCALIZE_THREAD_LOCAL(ThreadId, int); 57 | haz_local_t *l = haz_local_ + ThreadId; 58 | pending_t *p = nbd_malloc(sizeof(pending_t) * l->pending_size * 2); 59 | memcpy(p, l->pending, l->pending_size); 60 | nbd_free(l->pending); 61 | l->pending = p; 62 | l->pending_size *= 2; 63 | } 64 | 65 | void haz_defer_free (void *d, free_t f) { 66 | TRACE("H1", "haz_defer_free: %p (%p)", d, f); 67 | assert(d); 68 | assert(f); 69 | LOCALIZE_THREAD_LOCAL(ThreadId, int); 70 | haz_local_t *l = haz_local_ + ThreadId; 71 | while (l->pending_count == l->pending_size) { 72 | 73 | if (l->pending_size == 0) { 74 | l->pending_size = MAX_NUM_THREADS * STATIC_HAZ_PER_THREAD; 75 | l->pending = nbd_malloc(sizeof(pending_t) * l->pending_size); 76 | break; 77 | } 78 | 79 | // scan for hazard pointers 80 | haz_t *hazards = nbd_malloc(sizeof(haz_t) * l->pending_size); 81 | int hazard_count = 0; 82 | for (int i = 0; i < MAX_NUM_THREADS; ++i) { 83 | haz_local_t *h = haz_local_ + i; 84 | for (int j = 0; j < STATIC_HAZ_PER_THREAD; ++j) { 85 | if (h->static_haz[j] != NULL) { 86 | if (hazard_count == l->pending_size) { 87 | resize_pending(); 88 | nbd_free(hazards); 89 | haz_defer_free(d, f); 90 | return; 91 | } 92 | hazards[hazard_count++] = h->static_haz[j]; 93 | } 94 | } 95 | for (int j = 0; j < h->dynamic_count; ++j) { 96 | if (h->dynamic[j] != NULL && *h->dynamic[j] != NULL) { 97 | if (hazard_count == l->pending_size) { 98 | resize_pending(); 99 | nbd_free(hazards); 100 | haz_defer_free(d, f); 101 | return; 102 | } 103 | hazards[hazard_count++] = *h->dynamic[j]; 104 | } 105 | } 106 | } 107 | sort_hazards(hazards, hazard_count); 108 | 109 | // check for conflicts 110 | int conflicts_count = 0; 111 | for (int i = 0; i < l->pending_count; ++i) { 112 | pending_t *p = l->pending + i; 113 | if (search_hazards(p->ptr, hazards, hazard_count)) { 114 | l->pending[conflicts_count++] = *p; // put conflicts back on the pending list 115 | } else { 116 | assert(p->free_); 117 | assert(p->ptr); 118 | p->free_(p->ptr); // free pending item 119 | } 120 | } 121 | l->pending_count = conflicts_count; 122 | nbd_free(hazards); 123 | } 124 | assert(l->pending_size > l->pending_count); 125 | l->pending[ l->pending_count ].ptr = d; 126 | l->pending[ l->pending_count ].free_ = f; 127 | l->pending_count++; 128 | } 129 | 130 | haz_t *haz_get_static (int i) { 131 | TRACE("H1", "haz_get_static: %p", i, 0); 132 | if (i >= STATIC_HAZ_PER_THREAD) 133 | return NULL; 134 | LOCALIZE_THREAD_LOCAL(ThreadId, int); 135 | assert(i < STATIC_HAZ_PER_THREAD); 136 | haz_t *ret = &haz_local_[ThreadId].static_haz[i]; 137 | TRACE("H1", "haz_get_static: returning %p", ret, 0); 138 | return ret; 139 | } 140 | 141 | void haz_register_dynamic (haz_t *haz) { 142 | TRACE("H1", "haz_register_dynamic: %p", haz, 0); 143 | LOCALIZE_THREAD_LOCAL(ThreadId, int); 144 | haz_local_t *l = haz_local_ + ThreadId; 145 | 146 | if (l->dynamic_size == 0) { 147 | int n = MAX_NUM_THREADS * STATIC_HAZ_PER_THREAD; 148 | l->dynamic = nbd_malloc(sizeof(haz_t *) * n); 149 | l->dynamic_size = n; 150 | } 151 | 152 | if (l->dynamic_count == l->dynamic_size) { 153 | haz_t **d = nbd_malloc(sizeof(haz_t *) * l->dynamic_size * 2); 154 | memcpy(d, l->dynamic, l->dynamic_size); 155 | nbd_free(l->dynamic); 156 | l->dynamic = d; 157 | l->dynamic_size *= 2; 158 | } 159 | 160 | l->dynamic[ l->dynamic_count++ ] = haz; 161 | } 162 | 163 | // assumes was registered in the same thread 164 | void haz_unregister_dynamic (void **haz) { 165 | TRACE("H1", "haz_unregister_dynamic: %p", haz, 0); 166 | LOCALIZE_THREAD_LOCAL(ThreadId, int); 167 | haz_local_t *l = haz_local_ + ThreadId; 168 | 169 | for (int i = 0; i < l->dynamic_count; ++i) { 170 | if (l->dynamic[i] == haz) { 171 | if (i != l->dynamic_count - 1) { 172 | l->dynamic[i] = l->dynamic[ l->dynamic_count - 1 ]; 173 | } 174 | l->dynamic_count--; 175 | return; 176 | } 177 | } 178 | assert(0); 179 | } 180 | -------------------------------------------------------------------------------- /runtime/lwt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * lightweight tracing 6 | */ 7 | #include 8 | #include "common.h" 9 | #include "rlocal.h" 10 | #include "lwt.h" 11 | #include "mem.h" 12 | 13 | #define LWT_BUFFER_SCALE 20 14 | #define LWT_BUFFER_SIZE (1ULL << LWT_BUFFER_SCALE) 15 | #define LWT_BUFFER_MASK (LWT_BUFFER_SIZE - 1) 16 | 17 | volatile int halt_ = 0; 18 | 19 | typedef struct lwt_record { 20 | uint64_t timestamp; 21 | uint64_t format; 22 | size_t value1; 23 | size_t value2; 24 | } lwt_record_t; 25 | 26 | typedef struct lwt_buffer { 27 | uint32_t head; 28 | lwt_record_t x[0]; 29 | } lwt_buffer_t; 30 | 31 | lwt_buffer_t *TraceBuffer[MAX_NUM_THREADS] = {}; 32 | char TraceLevel[256] = {}; 33 | static const char *TraceSpec = ""; 34 | 35 | void lwt_thread_init (void) { 36 | int thread_index = GET_THREAD_INDEX(); 37 | 38 | if (TraceBuffer[thread_index] == NULL) { 39 | TraceBuffer[thread_index] = 40 | (lwt_buffer_t *)nbd_malloc(sizeof(lwt_buffer_t) + sizeof(lwt_record_t) * LWT_BUFFER_SIZE); 41 | memset(TraceBuffer[thread_index], 0, sizeof(lwt_buffer_t)); 42 | } 43 | } 44 | 45 | void lwt_set_trace_level (const char *flags) { 46 | assert(strlen(flags) % 2 == 0); // a well formed should be an even number of characters long 47 | TraceSpec = flags; 48 | memset(TraceLevel, 0, sizeof(TraceLevel)); 49 | for (int i = 0; flags[i]; i+=2) { 50 | TraceLevel[(unsigned)flags[i]] = flags[i+1]; 51 | } 52 | } 53 | 54 | static void dump_record (FILE *file, int thread_id, lwt_record_t *r, uint64_t offset) { 55 | // print the record if its trace category is enabled at a high enough level 56 | int flag = r->format >> 56; 57 | int level = (r->format >> 48) & 0xFF; 58 | if (TraceLevel[(unsigned)flag] >= level) { 59 | char s[3] = {flag, level, '\0'}; 60 | fprintf(file, "%09llu %d %s ", ((uint64_t)r->timestamp - offset) >> 5, thread_id, s); 61 | const char *format = (const char *)(size_t)(r->format & MASK(48)); // strip out the embedded flags 62 | fprintf(file, format, r->value1, r->value2); 63 | fprintf(file, "\n"); 64 | } 65 | } 66 | 67 | static void dump_buffer (FILE *file, int thread_index, uint64_t offset) { 68 | lwt_buffer_t *tb = TraceBuffer[thread_index]; 69 | assert(tb); 70 | if (tb->head > LWT_BUFFER_SIZE) { 71 | for (int i = tb->head & LWT_BUFFER_MASK; i < LWT_BUFFER_SIZE; ++i) { 72 | dump_record(file, thread_index + 1, tb->x + i, offset); 73 | } 74 | } 75 | 76 | for (int i = 0; i < (tb->head & LWT_BUFFER_MASK); ++i) { 77 | dump_record(file, thread_index + 1, tb->x + i, offset); 78 | } 79 | } 80 | 81 | void lwt_halt (void) { 82 | halt_ = 1; 83 | } 84 | 85 | void lwt_dump (const char *file_name) { 86 | halt_ = 1; 87 | uint64_t offset = (uint64_t)-1; 88 | 89 | for (int i = 0; i < MAX_NUM_THREADS; ++i) { 90 | if (TraceBuffer[i] != NULL && TraceBuffer[i]->head != 0) { 91 | uint64_t x = TraceBuffer[i]->x[0].timestamp; 92 | if (x < offset) { 93 | offset = x; 94 | } 95 | if (TraceBuffer[i]->head > LWT_BUFFER_SIZE) 96 | { 97 | x = TraceBuffer[i]->x[TraceBuffer[i]->head & LWT_BUFFER_MASK].timestamp; 98 | if (x < offset) { 99 | offset = x; 100 | } 101 | } 102 | } 103 | } 104 | 105 | if (offset != (uint64_t)-1) { 106 | FILE *file = fopen(file_name, "w"); 107 | assert(file); 108 | for (int i = 0; i < MAX_NUM_THREADS; ++i) { 109 | if (TraceBuffer[i] != NULL) { 110 | dump_buffer(file, i, offset); 111 | } 112 | } 113 | fflush(file); 114 | fclose(file); 115 | } 116 | } 117 | 118 | void lwt_trace_i (uint64_t format, size_t value1, size_t value2) { 119 | while (halt_) {} 120 | lwt_buffer_t *tb = TraceBuffer[GET_THREAD_INDEX()]; 121 | if (tb != NULL) { 122 | unsigned int u, l; 123 | __asm__ __volatile__("rdtsc" : "=a" (l), "=d" (u)); 124 | uint64_t timestamp = ((uint64_t)u << 32) | l; 125 | lwt_record_t temp = { timestamp, format, value1, value2 }; 126 | 127 | tb->x[tb->head++ & LWT_BUFFER_MASK] = temp; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /runtime/mem.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * Extreamly fast multi-threaded malloc. 6 | */ 7 | #ifndef USE_SYSTEM_MALLOC 8 | #define _BSD_SOURCE // so we get MAP_ANON on linux 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "common.h" 14 | #include "rlocal.h" 15 | #include "lwt.h" 16 | 17 | #ifndef NBD32 18 | #define MAX_SCALE 36 // allocate blocks up to 64GB (arbitrary, could be bigger) 19 | #define MIN_SCALE 3 // smallest allocated block is 8 bytes 20 | #define MAX_POINTER_BITS 48 21 | #define PAGE_SCALE 21 // 2MB pages 22 | #else 23 | #define MAX_SCALE 31 24 | #define MIN_SCALE 2 // smallest allocated block is 4 bytes 25 | #define MAX_POINTER_BITS 32 26 | #define PAGE_SCALE 12 // 4KB pages 27 | #endif 28 | #define PAGE_SIZE (1ULL << PAGE_SCALE) 29 | #define HEADERS_SIZE (((size_t)1ULL << (MAX_POINTER_BITS - PAGE_SCALE)) * sizeof(header_t)) 30 | 31 | typedef struct block { 32 | struct block *next; 33 | } block_t; 34 | 35 | // TODO: Break the page header into two parts. The first part is located in the header region. The 36 | // second part is located on the page and is only used when there are free items. 37 | typedef struct header { 38 | #ifdef RECYCLE_PAGES 39 | struct header *next; 40 | struct header *prev; 41 | block_t *free_list; // list of free blocks 42 | int num_in_use; 43 | #endif//RECYCLE_PAGES 44 | uint8_t owner; // thread id of owner 45 | uint8_t scale; // log2 of the block size 46 | } header_t; 47 | 48 | #ifdef RECYCLE_PAGES 49 | typedef struct size_class { 50 | header_t *active_page; 51 | header_t *oldest_partial; 52 | header_t *newest_partial; 53 | } size_class_t; 54 | #endif//RECYCLE_PAGES 55 | 56 | typedef struct tl { 57 | #ifndef RECYCLE_PAGES 58 | block_t *free_list[MAX_SCALE+1]; 59 | #else 60 | header_t *free_pages; 61 | size_class_t size_class[MAX_SCALE+1]; 62 | #endif//RECYCLE_PAGES 63 | block_t *blocks_from[MAX_NUM_THREADS]; 64 | block_t *blocks_to[MAX_NUM_THREADS]; 65 | } __attribute__((aligned(CACHE_LINE_SIZE))) tl_t; 66 | 67 | static header_t *headers_ = NULL; 68 | 69 | static tl_t tl_[MAX_NUM_THREADS] = {}; 70 | 71 | static inline header_t *get_header (void *r) { 72 | ASSERT(((size_t)r >> PAGE_SCALE) < HEADERS_SIZE); 73 | return headers_ + ((size_t)r >> PAGE_SCALE); 74 | } 75 | 76 | static void *get_new_region (int block_scale) { 77 | int thread_index = GET_THREAD_INDEX(); 78 | #ifdef RECYCLE_PAGES 79 | tl_t *tl = &tl_[thread_index]; // thread-local data 80 | if (block_scale <= PAGE_SCALE && tl->free_pages != NULL) { 81 | void *region = tl->free_pages; 82 | tl->free_pages = tl->free_pages->next; 83 | get_header(region)->scale = block_scale; 84 | return region; 85 | } 86 | #endif//RECYCLE_PAGES 87 | size_t region_size = (1ULL << block_scale); 88 | if (region_size < PAGE_SIZE) { 89 | region_size = PAGE_SIZE; 90 | } 91 | void *region = mmap(NULL, region_size, PROT_READ|PROT_WRITE, MAP_NORESERVE|MAP_ANON|MAP_PRIVATE, -1, 0); 92 | TRACE("m1", "get_new_region: mmapped new region %p (size %p)", region, region_size); 93 | if (region == (void *)-1) { 94 | perror("get_new_region: mmap"); 95 | exit(-1); 96 | } 97 | if ((size_t)region & (region_size - 1)) { 98 | TRACE("m0", "get_new_region: region not aligned", 0, 0); 99 | munmap(region, region_size); 100 | region = mmap(NULL, region_size * 2, PROT_READ|PROT_WRITE, MAP_NORESERVE|MAP_ANON|MAP_PRIVATE, -1, 0); 101 | if (region == (void *)-1) { 102 | perror("get_new_region: mmap"); 103 | exit(-1); 104 | } 105 | TRACE("m0", "get_new_region: mmapped new region %p (size %p)", region, region_size * 2); 106 | void *aligned = (void *)(((size_t)region + region_size) & ~(region_size - 1)); 107 | size_t extra = (char *)aligned - (char *)region; 108 | if (extra) { 109 | munmap(region, extra); 110 | TRACE("m0", "get_new_region: unmapped extra memory %p (size %p)", region, extra); 111 | } 112 | extra = ((char *)region + region_size) - (char *)aligned; 113 | if (extra) { 114 | munmap((char *)aligned + region_size, extra); 115 | TRACE("m0", "get_new_region: unmapped extra memory %p (size %p)", (char *)aligned + region_size, extra); 116 | } 117 | region = aligned; 118 | } 119 | assert(region); 120 | 121 | header_t *h = get_header(region); 122 | TRACE("m1", "get_new_region: header %p (%p)", h, h - headers_); 123 | assert(h->scale == 0); 124 | h->scale = block_scale; 125 | h->owner = thread_index; 126 | 127 | return region; 128 | } 129 | 130 | void mem_init (void) { 131 | assert(headers_ == NULL); 132 | // Allocate space for the page headers. This could be a big chunk of memory on 64 bit systems, 133 | // but it just takes up virtual address space. Physical space used by the headers is still 134 | // proportional to the amount of memory the user mallocs. 135 | headers_ = mmap(NULL, HEADERS_SIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); 136 | TRACE("m1", "mem_init: header page %p", headers_, 0); 137 | 138 | // initialize spsc queues 139 | for (int i = 0; i < MAX_NUM_THREADS; ++i) { 140 | for (int j = 0; j < MAX_NUM_THREADS; ++j) { 141 | if (i != j) { 142 | tl_[i].blocks_to[j] = (block_t *)&(tl_[j].blocks_from[i]); 143 | } 144 | } 145 | } 146 | } 147 | 148 | void nbd_free (void *x) { 149 | TRACE("m1", "nbd_free: block %p page %p", x, (size_t)x & ~MASK(PAGE_SCALE)); 150 | ASSERT(x); 151 | block_t *b = (block_t *)x; 152 | header_t *h = get_header(x); 153 | int b_scale = h->scale; 154 | TRACE("m1", "nbd_free: header %p scale %llu", h, b_scale); 155 | ASSERT(b_scale && b_scale <= MAX_SCALE); 156 | #ifdef RECYCLE_PAGES 157 | if (b_scale > PAGE_SCALE) { 158 | int rc = munmap(x, 1ULL << b_scale); 159 | ASSERT(rc == 0); 160 | rc = rc; 161 | } 162 | #endif 163 | #ifndef NDEBUG 164 | memset(b, 0xcd, (1ULL << b_scale)); // bear trap 165 | #endif 166 | int thread_index = GET_THREAD_INDEX(); 167 | tl_t *tl = &tl_[thread_index]; // thread-local data 168 | if (h->owner == thread_index) { 169 | TRACE("m1", "nbd_free: private block, old free list head %p", tl->free_list[b_scale], 0); 170 | 171 | #ifndef RECYCLE_PAGES 172 | b->next = tl->free_list[b_scale]; 173 | tl->free_list[b_scale] = b; 174 | #else //RECYCLE_PAGES 175 | b->next = h->free_list; 176 | h->free_list = b; 177 | h->num_in_use--; 178 | size_class_t *sc = &tl->size_class[b_scale]; 179 | if (sc->active_page != h) { 180 | if (h->num_in_use == 0) { 181 | // remove from the partial-page list 182 | if (h->next != NULL) { h->next->prev = h->prev; } 183 | if (h->prev != NULL) { h->prev->next = h->next; } 184 | // put on the free-page list 185 | h->next = tl->free_pages; 186 | tl->free_pages = h; 187 | } else { 188 | // move to the top of the partial-page list 189 | if (h->next != NULL) { 190 | h->next->prev = h->prev; 191 | if (h->prev != NULL) { h->prev->next = h->next; } 192 | h->prev = sc->newest_partial; 193 | h->next = NULL; 194 | sc->newest_partial = h; 195 | } 196 | } 197 | } 198 | #endif//RECYCLE_PAGES 199 | } else { 200 | // push onto it's owner's queue 201 | int b_owner = h->owner; 202 | TRACE("m1", "nbd_free: owner %llu", b_owner, 0); 203 | 204 | // The assignment statements are volatile to prevent the compiler from reordering them. 205 | VOLATILE_DEREF(b).next = NULL; 206 | VOLATILE_DEREF(tl->blocks_to[b_owner]).next = b; 207 | 208 | tl->blocks_to[b_owner] = b; 209 | } 210 | } 211 | 212 | static inline void process_incoming_blocks (tl_t *tl) { 213 | for (int p = 0; p < MAX_NUM_THREADS; ++p) { 214 | block_t *b = tl->blocks_from[p]; 215 | if (EXPECT_FALSE(b == NULL)) continue; // the queue is completely empty 216 | 217 | // Leave the last block on the queue. Removing the last block on the queue would create a 218 | // race with the producer thread putting a new block on the queue. 219 | for (block_t *next = b->next; next != NULL; b = next, next = b->next) { 220 | // push onto the appropriate free list 221 | #ifndef RECYCLE_PAGES 222 | int b_scale = get_header(b)->scale; 223 | b->next = tl->free_list[b_scale]; 224 | tl->free_list[b_scale] = b; 225 | #else //RECYCLE_PAGES 226 | header_t *h = get_header(b); 227 | b->next = h->free_list; 228 | h->free_list = b; 229 | #endif//RECYCLE_PAGES 230 | } 231 | tl->blocks_from[p] = b; 232 | } 233 | } 234 | 235 | static inline block_t *pop_free_list (tl_t *tl, int scale) { 236 | #ifndef RECYCLE_PAGES 237 | block_t **free_list = &tl->free_list[scale]; 238 | #else //RECYCLE_PAGES 239 | size_class_t *sc = &tl->size_class[scale]; 240 | if (EXPECT_FALSE(sc->active_page == NULL)) 241 | return NULL; 242 | block_t **free_list = &sc->active_page->free_list; 243 | #endif//RECYCLE_PAGES 244 | block_t *b = *free_list; 245 | if (EXPECT_FALSE(b == NULL)) 246 | return NULL; 247 | ASSERT(get_header(b)->scale == scale); 248 | *free_list = b->next; 249 | return b; 250 | } 251 | 252 | // Allocate a block of memory at least size . Blocks are binned in powers-of-two. Round up to 253 | // the nearest power of two. 254 | // 255 | // First check the current thread's free list for an available block. If there are no blocks on the 256 | // free list, pull items off of the current thread's incoming block queues and push them onto the 257 | // free list. If we didn't get an appropriate size block off of the block queues then allocate a new 258 | // page, break it up into blocks and push them onto the free list. 259 | void *nbd_malloc (size_t n) { 260 | // the scale is the log base 2 of , rounded up 261 | int b_scale = (sizeof(void *) * __CHAR_BIT__) - __builtin_clzl((n) - 1); 262 | TRACE("m1", "nbd_malloc: size %llu (scale %llu)", n, b_scale); 263 | 264 | if (EXPECT_FALSE(b_scale < MIN_SCALE)) { b_scale = MIN_SCALE; } 265 | if (EXPECT_FALSE(b_scale > MAX_SCALE)) { return NULL; } 266 | 267 | tl_t *tl = &tl_[GET_THREAD_INDEX()]; // thread-local data 268 | 269 | block_t *b = pop_free_list(tl, b_scale); 270 | if (b != NULL) { 271 | TRACE("m1", "nbd_malloc: returning block %p", b, 0); 272 | return b; 273 | assert(b); 274 | } 275 | 276 | // The free list is empty so process blocks freed from other threads and then check again. 277 | process_incoming_blocks(tl); 278 | b = pop_free_list(tl, b_scale); 279 | if (b != NULL) { 280 | TRACE("m1", "nbd_malloc: returning block %p", b, 0); 281 | return b; 282 | assert(b); 283 | } 284 | 285 | #ifdef RECYCLE_PAGES 286 | // The current active page is completely allocated. Make the oldest partially allocated page 287 | // the new active page. 288 | size_class_t *sc = &tl->size_class[b_scale]; 289 | if (sc->oldest_partial != NULL) { 290 | sc->active_page = sc->oldest_partial; 291 | sc->oldest_partial = sc->oldest_partial->next; 292 | sc->oldest_partial->prev = NULL; 293 | b = pop_free_list(tl, b_scale); 294 | ASSERT(b != NULL); 295 | TRACE("m1", "nbd_malloc: returning block %p", b, 0); 296 | return b; 297 | assert(b); 298 | } 299 | // There are no partially allocated pages so get a new page. 300 | 301 | #endif//RECYCLE_PAGES 302 | 303 | // Get a new page. 304 | char *page = get_new_region(b_scale); 305 | b = (block_t *)page; // grab the first block on the page 306 | 307 | // Break up the remainder of the page into blocks and put them on the free list. Start at the 308 | // end of the page so that the free list ends up in increasing order, for ease of debugging. 309 | if (b_scale < PAGE_SCALE) { 310 | size_t block_size = (1ULL << b_scale); 311 | block_t *head = NULL; 312 | for (int offset = PAGE_SIZE - block_size; offset > 0; offset -= block_size) { 313 | block_t *x = (block_t *)(page + offset); 314 | x->next = head; head = x; 315 | } 316 | #ifndef RECYCLE_PAGES 317 | tl->free_list[b_scale] = head; 318 | #else //RECYCLE_PAGES 319 | sc->active_page = get_header(page); 320 | sc->active_page->free_list = head; 321 | #endif//RECYCLE_PAGES 322 | } 323 | 324 | TRACE("m1", "nbd_malloc: returning block %p from new region %p", b, (size_t)b & ~MASK(PAGE_SCALE)); 325 | assert(b); 326 | return b; 327 | } 328 | #else//USE_SYSTEM_MALLOC 329 | #include 330 | #include "common.h" 331 | #include "rlocal.h" 332 | #include "lwt.h" 333 | 334 | void mem_init (void) { 335 | return; 336 | } 337 | 338 | void nbd_free (void *x) { 339 | TRACE("m1", "nbd_free: %p", x, 0); 340 | #ifndef NDEBUG 341 | memset(x, 0xcd, sizeof(void *)); // bear trap 342 | #endif//NDEBUG 343 | free(x); 344 | return; 345 | } 346 | 347 | void *nbd_malloc (size_t n) { 348 | TRACE("m1", "nbd_malloc: request size %llu", n, 0); 349 | void *x = malloc(n); 350 | TRACE("m1", "nbd_malloc: returning %p", x, 0); 351 | return x; 352 | } 353 | #endif//USE_SYSTEM_MALLOC 354 | -------------------------------------------------------------------------------- /runtime/mem2.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * fast multi-threaded malloc. 6 | */ 7 | #ifndef USE_SYSTEM_MALLOC 8 | #define _BSD_SOURCE // so we get MAP_ANON on linux 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "common.h" 14 | #include "rlocal.h" 15 | #include "lwt.h" 16 | 17 | #define CHUNK_SCALE 12 // 4k chunks 18 | #define PAGE_SCALE 21 // 2MB pages 19 | #define PAGE_SIZE (1ULL << PAGE_SCALE) 20 | 21 | // On both linux and Mac OS X the size of the mmap-able virtual address space is between 2^46 and 2^47. Linux has 22 | // no problem when you grab the whole thing. Mac OS X apparently does some O(n) thing on the first page fault 23 | // that takes over 2 seconds if you mmap 2^46 bytes. So on Mac OS X we only take 2^38 bytes of virtual space. Which 24 | // is OK though, since you can only buy a Mac with up to 32GB of RAM (as of 2/09). 25 | #ifndef NBD32 26 | #ifdef __MACOSX__ 27 | #define TOTAL_SCALE 38 28 | #else //__MACOSX__ 29 | #define TOTAL_SCALE 46 30 | #endif//__MACOSX__ 31 | #else// NBD32 32 | #define TOTAL_SCALE 32 33 | #endif//NBD32 34 | #define TOTAL_SIZE (1ULL << TOTAL_SCALE) 35 | 36 | #define INVALID_SLAB_CLASS 255 37 | #define METASLAB_CLASS_MAX 2 38 | #define NESTED_4K_SLAB_CLASS_MAX 16 39 | #define NESTED_32K_SLAB_CLASS_MAX 39 40 | #define NESTED_256K_SLAB_CLASS_MAX 63 41 | #define NESTED_SLAB_CLASS_MAX NESTED_256K_SLAB_CLASS_MAX 42 | #define LARGE_SLAB_CLASS_MAX 93 43 | #define HUGE_SLAB_CLASS_MAX (sizeof(BlockSize) / sizeof(*BlockSize)) 44 | #define SLAB_CLASS_MAX HUGE_SLAB_CLASS_MAX 45 | 46 | #define NESTED_SLAB_CASES NESTED_4K_SLAB_CASES: case NESTED_32K_SLAB_CASES: case NESTED_256K_SLAB_CASES 47 | #define NESTED_4K_SLAB_CASES METASLAB_CLASS_MAX+1 ... NESTED_4K_SLAB_CLASS_MAX 48 | #define NESTED_32K_SLAB_CASES NESTED_4K_SLAB_CLASS_MAX+1 ... NESTED_32K_SLAB_CLASS_MAX: case 0 49 | #define NESTED_256K_SLAB_CASES NESTED_32K_SLAB_CLASS_MAX+1 ... NESTED_SLAB_CLASS_MAX: case 1 50 | #define LARGE_SLAB_CASES NESTED_SLAB_CLASS_MAX+1 ... LARGE_SLAB_CLASS_MAX: case 2 51 | #define HUGE_SLAB_CASES LARGE_SLAB_CLASS_MAX+1 ... HUGE_SLAB_CLASS_MAX 52 | 53 | #define SLAB_CLASS_SCALE(class) ({ \ 54 | int _scale = 0; \ 55 | switch (class) { \ 56 | case NESTED_4K_SLAB_CASES: _scale = 12; break; \ 57 | case NESTED_32K_SLAB_CASES: _scale = 15; break; \ 58 | case NESTED_256K_SLAB_CASES: _scale = 18; break; \ 59 | case LARGE_SLAB_CASES: _scale = 21; break; \ 60 | } \ 61 | _scale; \ 62 | }) 63 | 64 | // indexed by class 65 | static const uint32_t BlockSize[] = { 66 | // meta slab classes (for the nested slabs) 67 | 1 << 12, 1 << 15, 1 << 18 68 | 69 | // nested slab classes (4kB, 32kB, and 256kB) 70 | 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 71 | 88, 96, 112, 120, 128, 144, 160, 176, 192, 224, 72 | 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 73 | 640, 704, 768, 832, 896, 960, 1024, 1152, 1280, 1408, 74 | 1536, 1664, 1856, 2048, 2240, 2432, 2688, 2944, 3200, 3520, 75 | 3840, 4160, 4544, 4928, 5312, 5696, 6144, 6592, 7040, 7488, 76 | 7936, 77 | 78 | // large slab classes (full page, 2MB) 79 | 8896, 9984, 11200, 12544, 14016, 15616, 17408, 19328, 21440, 23744, 80 | 26176, 28800, 31616, 34624, 37760, 41024, 44416, 47936, 51584, 55296, 81 | 59008, 62784, 66496, 70208, 73856, 77376, 80832, 84160, 87360, 90368, 82 | 93248, 95936, 98496, 100864, 83 | 84 | // huge slabs (slabs on huge blocks, 2MB-4MB) 85 | 110912, 121984, 134144, 147520, 162240, 178432, 196224, 215808, 237376, 261056, 86 | 287104, 315776, 347328, 382016, 420160, 462144, 508352, 559168, 615040, 676544, 87 | 744192, 818560, 900416, 990400, 1089408, 1198336, 1318144, 1449920, 1594880, 1754368, 88 | 1929792 89 | }; 90 | 91 | typedef uint8_t class_t; 92 | 93 | typedef struct block { 94 | struct block *next; 95 | } block_t; 96 | 97 | typedef struct slab { 98 | unsigned valid:1; 99 | unsigned free_list:15; 100 | unsigned num_in_use:9; 101 | unsigned class:6; 102 | } __attribute__((packed)) slab_t; 103 | 104 | typedef struct metaslab { 105 | slab_t slab; 106 | char * data; 107 | slab_t slab[1 << (PAGE_SCALE - CHUNK_SCALE)]; 108 | struct { 109 | struct metaslab *older; 110 | struct metaslab *newer; 111 | } q[NESTED_SLAB_CLASS_MAX+1]; 112 | uint64_t partial_slab_bitmap2[NESTED_32K_SLAB_CLASS_MAX+1]; 113 | uint8_t partial_slab_bitmap1[NESTED_SLAB_CLASS_MAX+1]; 114 | } metaslab_t; 115 | 116 | char *MemBase = NULL; 117 | char *MemEnd = NULL; 118 | char *PageBreak = NULL; 119 | size_t *PageMap = NULL; 120 | block_t *FreePages = NULL; 121 | struct { slab_t *slab; char *slab_base; } ActiveSlab[SLAB_CLASS_MAX + 1] = {}; 122 | 123 | struct { 124 | size_t slabs_in_use; 125 | size_t bytes_requested; 126 | size_t bytes_allocated; 127 | size_t total_bytes_allocated; 128 | } ClassStats[METASLAB_CLASS_MAX+1]; 129 | 130 | struct { 131 | slab_t *oldest; 132 | slab_t *newest; 133 | } PartialSlabQueue[SLAB_CLASS_MAX+1]; 134 | 135 | struct { 136 | slab_t *oldest; 137 | } FreshPartialSlabQueue[SLAB_CLASS_MAX+1]; 138 | 139 | static block_t *get_block (class_t slab_class); 140 | 141 | void mem_init (void) { 142 | ASSERT(INVALID_SLAB_CLASS > SLAB_CLASS_MAX); 143 | 144 | void *buf = mmap(NULL, TOTAL_SIZE, PROT_NONE, MAP_NORESERVE|MAP_ANON|MAP_PRIVATE, -1, 0); 145 | if (buf == (void *)-1) { 146 | perror("mmap"); 147 | exit(-1); 148 | } 149 | MemEnd = buf + TOTAL_SIZE; 150 | MemBase = (char *)( ((size_t)buf + PAGE_SIZE-1) & ~(PAGE_SIZE-1) ); // align to a page boundry 151 | 152 | size_t page_map_size = sizeof(void *) >> (TOTAL_SCALE - PAGE_SCALE); 153 | mprotect(MemBase, chunk_map_size, PROT_READ|PROT_WRITE); 154 | PageBreak = MemBase + chunk_map_size; 155 | PageMap = (size_t *)MemBase; 156 | } 157 | 158 | static class_t get_slab_class (size_t size) { 159 | for (int i = METASLAB_CLASS_MAX + 1; i <= SLAB_CLASS_MAX; ++i) { 160 | if (size <= BlockSize[i]) 161 | return i; 162 | } 163 | return INVALID_SLAB_CLASS; 164 | } 165 | 166 | static class_t get_meta_class (class_t class) { 167 | int scale = SLAB_CLASS_SCALE(class); 168 | if (scale == PAGE_SCALE || scale == 0) 169 | return INVALID_SLAB_CLASS; 170 | return (scale - 12) / 3; 171 | } 172 | 173 | static void *get_page (void) { 174 | block_t *p = FreePages; 175 | if (p == NULL) { 176 | p = (block_t *)PageBreak; 177 | PageBreak += PAGE_SIZE; 178 | return p; 179 | } 180 | FreePages = p->next; 181 | return p; 182 | } 183 | 184 | static void free_page (void *p) { 185 | ASSERT(p < (void *)PageBreak); 186 | block_t *b = (block_t *)p; 187 | b->next = FreePages; 188 | FreePages = b; 189 | } 190 | 191 | static void init_slab (void *b, class_t slab_class) { 192 | } 193 | 194 | static slab_t *new_large_slab (class_t slab_class) { 195 | return NULL; 196 | } 197 | 198 | static int find_partial_slab(metaslab_t *metaslab, class_t target_class, int target_index) { 199 | switch (target_class) { 200 | case NESTED_4K_SLAB_CASSES: 201 | { 202 | // search nearby the target first 203 | int base_index = (target_index & ~0x7); 204 | for (int i = 0; i < 8; ++i) { 205 | if (base_index + i == target_index) 206 | continue; 207 | if (metaslab->slab[base_index + i].class == target_class) 208 | return base_index + i; 209 | } 210 | do { 211 | metaslab->partial_slab_bitmap2[target_class] &= ~(1ULL << (base_index >> 3)); 212 | uint64_t bitmap = metaslab->partial_slab_bitmap2[target_class]; 213 | if (bitmap == 0) 214 | return NULL; 215 | int n = base_index >> 3; 216 | if (bitmap & (0xFF << (n & ~0x7))) { 217 | bitmap &= 0xFF << (n & ~0x7); // search nearby the target first 218 | } 219 | base_index = COUNT_TRAILING_ZEROS(bitmap) << 3; 220 | for (int i = 0; i < 8; ++i) { 221 | if (metaslab->slab[base_index + i].class == target_class) 222 | return base_index + i; 223 | } 224 | } while (1); 225 | } 226 | case NESTED_32K_SLAB_CASSES: 227 | { 228 | uint64_t bitmap = metaslab->partial_slab_bitmap2[target_class]; 229 | if (bitmap == 0) 230 | return NULL; 231 | int n = target_index >> 3; 232 | if (bitmap & (0xFF << (n & ~0x7))) { 233 | bitmap &= 0xFF << (n & ~0x7); // search nearby the target first 234 | } 235 | return COUNT_TRAILING_ZEROS(bitmap) << 3; 236 | } 237 | case NESTED_256K_SLAB_CASSES: 238 | { 239 | uint8_t bitmap = metaslab->partial_slab_bitmap1[target_class]; 240 | if (bitmap == 0) 241 | return NULL; 242 | return COUNT_TRAILING_ZEROS(bitmap) << 6; 243 | } 244 | default: 245 | ASSERT(FALSE); 246 | return -1; 247 | } 248 | } 249 | 250 | static void activate_new_slab (class_t slab_class) { 251 | slab_t *new_slab; 252 | switch (slab_class) { 253 | case NESTED_SLAB_CASES: 254 | int slab_index = ActiveSlab[slab_class].slab_index; 255 | metaslab_t *metaslab = ActiveSlab[slab_class].metaslab; 256 | 257 | // First look for a partial slab on the same metaslab as the old active slab. 258 | new_slab = find_partial_slab(metaslab, slab_class); 259 | if (new_slab == NULL) { 260 | // No partial slab on the same metaslab. Remove a metaslab from the front of the queue. 261 | metaslab_t *metaslab = (metaslab_t *)PartialSlabQueue[slab_class].oldest; 262 | if (metaslab != NULL) { 263 | ASSERT(metaslab->q[slab_class].older == NULL); 264 | PartialSlabQueue[slab_class].newest = (slab_t *)metaslab->q[slab_class].newer; 265 | metaslab->q[slab_class].newer->q[slab_class].older = NULL; 266 | new_slab = find_partial_slab(metaslab, slab_class); 267 | } else { 268 | // Can't find a partial slab; create a new slab. 269 | new_slab = (slab_t *)get_block(get_meta_class(slab_class)); 270 | init_slab(new_slab, slab_class); 271 | } 272 | } 273 | break; 274 | 275 | case LARGE_SLAB_CASES: 276 | case HUGE_SLAB_CASES: 277 | // large or huge slab class 278 | new_slab = PartialSlabQueue[slab_class].oldest; 279 | if (new_slab == NULL) { 280 | ASSERT(new_slab->older == NULL); 281 | PartialSlabQueue[slab_class].newest = new_slab->newer; 282 | new_slab->newer->older = NULL; 283 | } 284 | if (new_slab == NULL) { 285 | if (IS_HUGE_SLAB_CLASS(slab_class)) { 286 | new_slab = new_large_slab(slab_class); 287 | } else { 288 | ASSERT(IS_LARGE_SLAB_CLASS(slab_class)); 289 | new_slab = (slab_t *)get_page(); 290 | } 291 | init_slab(new_slab, slab_class); 292 | } 293 | break; 294 | 295 | default: 296 | ASSERT(FALSE); 297 | } 298 | 299 | ActiveSlab[slab_class] = new_slab; 300 | } 301 | 302 | static void *get_block(class_t slab_class) { 303 | 304 | // Look for a free block on the active slab. 305 | switch (slab_class) { 306 | case NESTED_SLAB_CASES: 307 | int slab_index = ActiveSlab[slab_class].slab_index; 308 | metaslab_t *metaslab = ActiveSlab[slab_class].metaslab; 309 | if (metaslab != NULL) { 310 | slab_t slab = metaslab->slab[slab_index]; 311 | if (slab.free_list) { 312 | char *slab_base = metaslab->data + ( ( slab_index - 1 ) << SLAB_CLASS_SCALE(slab_class) ); 313 | void *b = (void *)( slab_base + ( ( slab.free_list - 1 ) << 3 ) ); 314 | metaslab->slab[slab_index].free_list = *(uint16_t *)b; 315 | return b; 316 | } 317 | } 318 | break; 319 | 320 | case LARGE_SLAB_CASES: 321 | //TODO 322 | break; 323 | 324 | case HUGE_SLAB_CASES: 325 | //TODO 326 | break; 327 | 328 | default: 329 | ASSERT(FALSE); 330 | } 331 | 332 | // Find another slab, activate it, and try again. 333 | activate_new_slab(slab_class); 334 | return get_block(slab_class); // recursive tail-call 335 | } 336 | 337 | void *nbd_malloc (size_t n) { 338 | TRACE("m1", "nbd_malloc: size %llu", n, 0); 339 | if (n == 0) 340 | return NULL; 341 | 342 | block_t *b = get_block( get_slab_class(n) ); 343 | 344 | TRACE("m1", "nbd_malloc: returning block %p", b, 0); 345 | return b; 346 | } 347 | 348 | void nbd_free (void *x) { 349 | TRACE("m1", "nbd_free: block %p", x, 0); 350 | ASSERT(x); 351 | ASSERT(x >= (void *)MemBase && x < (void *)MemEnd); 352 | 353 | block_t *b = (block_t *)x; 354 | size_t page_index = (size_t)b >> PAGE_SCALE; 355 | metaslab_t *metaslab = PageMap[page_index]; 356 | ASSERT(metaslab); 357 | size_t slab_index = ((size_t)b & MASK(PAGE_SCALE)) >> 12; 358 | slab_t slab = metaslab->slab[slab_index]; 359 | 360 | // if is not valid is on a larger slab. 361 | if (slab.valid) { 362 | b->next = slab.free_list; 363 | // the of the block is offset by 1 so 0 can represent NULL. 364 | slab.free_list = ( ((size_t)b & MASK(12)) >> 3 ) + 1; 365 | } else { 366 | // is not on a 4kB slab. 367 | slab_index &= 0x7; // Try the 32kB slab. 368 | slab = metaslab->slab[slab_index]; 369 | if (slab.valid) { 370 | b->next = slab.free_list; 371 | slab.free_list = ( ((size_t)b & MASK(15)) >> 3 ) + 1; 372 | } else { 373 | // is not on a 32kB slab. 374 | slab_index &= 0x3F; // must be on the 256kB slab. 375 | slab = metaslab->slab[slab_index]; 376 | ASSERT(slab.valid); 377 | b->next = slab.free_list; 378 | slab.free_list = ( ((size_t)b & MASK(18)) >> 3 ) + 1; 379 | } 380 | } 381 | --slab.num_in_use; 382 | metaslab->slab[slab_index] = slab; 383 | if (slab.num_in_use == 0) { 384 | free_slab(metaslab, slab_index); 385 | } 386 | } 387 | 388 | #else//USE_SYSTEM_MALLOC 389 | #include 390 | 391 | void mem_init (void) { 392 | return; 393 | } 394 | 395 | void ndb_free (void *x) { 396 | TRACE("m1", "nbd_free: %p", x, 0); 397 | #ifndef NDEBUG 398 | memset(x, 0xcd, sizeof(void *)); // bear trap 399 | #endif//NDEBUG 400 | free(x); 401 | return; 402 | } 403 | 404 | void *nbd_malloc (size_t n) { 405 | TRACE("m1", "nbd_malloc: request size %llu", n, 0); 406 | void *x = malloc(n); 407 | TRACE("m1", "nbd_malloc: returning %p", x, 0); 408 | return x; 409 | } 410 | #endif//USE_SYSTEM_MALLOC 411 | -------------------------------------------------------------------------------- /runtime/mem_class_calc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef unsigned char uint8_t; 6 | typedef unsigned short uint16_t; 7 | typedef unsigned int uint32_t; 8 | 9 | #define CACHE_LINE_SCALE 6 10 | 11 | // Return the expected fraction of bytes wasted per slab. 12 | // 13 | // The internal fragmentation due to using size classes is biased by including the space required, 14 | // for a pointer to each block. 15 | double calc_frag(int slab_size, int block_size, int delta) 16 | { 17 | double quant = (double)delta / 2 / block_size; 18 | assert(quant >= 0.0); 19 | int blocks_per_slab = (int)(slab_size / block_size); 20 | 21 | // internal fragmentation that comes from tiling non-power-of-2 sized blocks in slabs 22 | int extra_space = slab_size - blocks_per_slab * block_size; 23 | assert(extra_space < block_size); 24 | 25 | // number of different cache line colors needed to evenly distribute cache line accesses 26 | int num_colors = block_size >> CACHE_LINE_SCALE; 27 | if (num_colors <= 1) 28 | return (double)extra_space/slab_size + quant; 29 | 30 | int num_overflow = num_colors - 1 - (extra_space >> CACHE_LINE_SCALE); 31 | if (num_overflow <= 0) 32 | return (double)extra_space/slab_size + quant; 33 | 34 | double coloring = (double)num_overflow * block_size / num_colors; 35 | return ((double)extra_space + coloring)/slab_size + quant; 36 | } 37 | 38 | // size classes for various alignments, max 6% expected internal fragmentation 39 | 40 | // 2B-128B blocks, 4k slab 41 | static uint8_t A1_4kB[] = { 2, 3, 5, 7, 9, 11, 14, 17, 20, 24, 28, 33, 39, 46, 53, 62, 70, 80, 91, 105, 120, 128 }; 42 | static uint8_t A2_4kB[] = { 2, 4, 6, 8, 10, 14, 18, 22, 28, 34, 40, 48, 56, 66, 74, 84, 94, 104, 120, 128 }; 43 | static uint8_t A4_4kB[] = { 4, 8, 12, 16, 20, 24, 32, 40, 48, 56, 68, 80, 92, 104, 120, 128 }; 44 | static uint8_t A8_4kB[] = { 8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 120, 128 }; 45 | static uint8_t A16_4kB[] = { 16, 32, 48, 64, 80, 96, 112, 128 }; 46 | 47 | // 128B-1kB blocks, 32k slab 48 | static uint16_t A1_32kB[] = { 137, 156, 178, 201, 227, 256, 288, 323, 361, 402, 447, 494, 545, 598, 654, 712, 771, 832, 895, 958, 1022 }; 49 | static uint16_t A8_32kB[] = { 144, 168, 192, 224, 256, 296, 336, 376, 424, 472, 528, 584, 640, 704, 768, 832, 896, 960, 1024 }; 50 | static uint16_t A16_32kB[] = { 144, 176, 208, 240, 272, 320, 368, 416, 464, 512, 576, 640, 704, 768, 832, 896, 960, 1024 }; 51 | 52 | // 1kB-8kB blocks, 256k slab 53 | static uint16_t A1_256kB[] = { 1152, 1297, 1458, 1636, 1832, 2048, 2284, 2541, 2820, 3124, 3550, 3904, 4280, 4676, 5092, 5525, 5974, 6435, 6906, 7380, 7856 }; 54 | static uint16_t A8_256kB[] = { 1152, 1288, 1440, 1608, 1792, 2000, 2224, 2472, 2744, 3032, 3344, 3680, 4040, 4416, 4816, 5232, 5664, 6112, 6568, 7032, 7504, 7976 }; 55 | static uint16_t A64_256kB[] = { 1152, 1280, 1408, 1536, 1664, 1856, 2048, 2240, 2432, 2688, 2944, 3200, 3520, 3840, 4160, 4544, 4928, 5312, 5696, 6144, 6592, 7040, 7488, 7936 }; 56 | 57 | // 8kB-100kB blocks, 2MB slab 58 | static uint32_t A64_2MB[] = { 59 | 8896, 9984, 11200, 12544, 14016, 15616, 17408, 19328, 21440, 23744, 26176, 28800, 31616, 34624, 37760, 41024, 60 | 44416, 47936, 51584, 55296, 59008, 62784, 66496, 70208, 73856, 77376, 80832, 84160, 87360, 90368, 93248, 95936, 61 | 98496, 100864 62 | }; 63 | 64 | int main (void) { 65 | 66 | double x = 100864; 67 | int n; 68 | for (n = 0; n < 40 && x < (1 << 21); ++n) { 69 | x *= 1.1; 70 | x = (uint32_t)x & ~63; 71 | printf("%u, ", (uint32_t)x); 72 | } 73 | printf("\n%d\n", n); 74 | return 0; 75 | const int start1 = 120832; 76 | const int start2 = 1408; 77 | const int alignment = 64; 78 | #define ischosen(x) \ 79 | (x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || \ 80 | x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || \ 81 | x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || \ 82 | x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || \ 83 | x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || \ 84 | x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || \ 85 | x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || \ 86 | x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || \ 87 | x == 0 || x == 0 || x == 0 || x == 0 || x == 0 || x == 0) 88 | 89 | const int slab_size = 1 << 21; 90 | const double thresh = .06; 91 | int block_size; 92 | int i = 0; 93 | for (block_size = start1; i < 87 && block_size < (slab_size >> 3); ++i, block_size += alignment) { 94 | printf("%5d ", block_size); 95 | 96 | int d; 97 | double min = 1; 98 | int ch = block_size + alignment; 99 | for (d = block_size; d >= alignment; d-=alignment) { 100 | int x = block_size - d; 101 | if (ischosen(x)) { 102 | double f = calc_frag(slab_size, block_size, d); 103 | if (f < thresh && f < min) { min = f; ch = d; } 104 | } 105 | } 106 | 107 | for (d = start2; d > start2 - 1024; d-=alignment) { 108 | if (d <= block_size && d <= ch) { 109 | double f = calc_frag(slab_size, block_size, d); 110 | if (f < thresh) { 111 | if (d == ch) { 112 | printf(" *%3.1f%% ", f*100); 113 | } else { 114 | printf(" %4.1f%% ", f*100); 115 | } 116 | continue; 117 | } 118 | } 119 | if (d-1 <= block_size && d-alignment <= ch && calc_frag(slab_size, block_size, d - alignment) < thresh) { 120 | printf("%6d ", block_size); 121 | continue; 122 | } 123 | printf(" "); 124 | } 125 | 126 | if (ischosen(block_size)) { 127 | printf("%5d*", block_size); 128 | } else { 129 | printf("%5d", block_size); 130 | } 131 | printf("\n"); 132 | } 133 | return 0; 134 | } 135 | -------------------------------------------------------------------------------- /runtime/random.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | #include "runtime.h" 10 | 11 | DECLARE_THREAD_LOCAL(rx_, uint32_t); 12 | DECLARE_THREAD_LOCAL(ry_, uint32_t); 13 | DECLARE_THREAD_LOCAL(rz_, uint32_t); 14 | DECLARE_THREAD_LOCAL(rc_, uint32_t); 15 | 16 | void rnd_init (void) { 17 | INIT_THREAD_LOCAL(rx_); 18 | INIT_THREAD_LOCAL(ry_); 19 | INIT_THREAD_LOCAL(rz_); 20 | INIT_THREAD_LOCAL(rc_); 21 | } 22 | 23 | // TODO: put a lock around this so that multiple threads being initialize concurrently don't read 24 | // the same values from /dev/urandom 25 | void rnd_thread_init (void) { 26 | int fd = open("/dev/urandom", O_RDONLY); 27 | if (fd == -1) { 28 | perror("Error opening /dev/urandom"); 29 | exit(1); 30 | } 31 | 32 | char buf[16]; 33 | 34 | int n = read(fd, buf, sizeof(buf)); 35 | if (n != 16) { 36 | if (n == -1) { 37 | perror("Error reading from /dev/urandom"); 38 | } 39 | fprintf(stderr, "Could not read enough bytes from /dev/urandom"); 40 | exit(1); 41 | } 42 | 43 | uint32_t x, y, z, c; 44 | memcpy(&x, buf + 0, 4); 45 | memcpy(&y, buf + 4, 4); 46 | memcpy(&z, buf + 8, 4); 47 | memcpy(&c, buf + 12, 4); 48 | 49 | SET_THREAD_LOCAL(rx_, x); 50 | SET_THREAD_LOCAL(ry_, y); 51 | SET_THREAD_LOCAL(rz_, z); 52 | SET_THREAD_LOCAL(rc_, z); 53 | } 54 | 55 | // George Marsaglia's KISS generator 56 | // 57 | // Even though this returns 64 bits, this algorithm was only designed to generate 32 bits. 58 | // The upper 32 bits is going to be highly correlated with the lower 32 bits of the next call. 59 | uint64_t nbd_rand (void) { 60 | LOCALIZE_THREAD_LOCAL(rx_, unsigned); 61 | LOCALIZE_THREAD_LOCAL(ry_, unsigned); 62 | LOCALIZE_THREAD_LOCAL(rz_, unsigned); 63 | LOCALIZE_THREAD_LOCAL(rc_, unsigned); 64 | 65 | uint32_t rx = 69069 * rx_ + 12345; 66 | uint32_t ry = ry_; 67 | ry ^= (ry << 13); 68 | ry ^= (ry >> 17); 69 | ry ^= (ry << 5); 70 | uint64_t t = rz_ * 698769069LL + rc_; 71 | uint64_t r = rx + ry + t; 72 | 73 | SET_THREAD_LOCAL(rx_, rx); 74 | SET_THREAD_LOCAL(ry_, ry); 75 | SET_THREAD_LOCAL(rz_, t); 76 | SET_THREAD_LOCAL(rc_, t >> 32); 77 | 78 | return r; 79 | } 80 | -------------------------------------------------------------------------------- /runtime/rcu.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * safe memory reclamation using a simple technique from rcu 6 | * 7 | * WARNING: not robust enough for real-world use 8 | */ 9 | #include 10 | #include "common.h" 11 | #include "rlocal.h" 12 | #include "lwt.h" 13 | #include "mem.h" 14 | #include "tls.h" 15 | #include "rcu.h" 16 | 17 | #define RCU_POST_THRESHOLD 10 18 | #define RCU_QUEUE_SCALE 20 19 | 20 | typedef struct fifo { 21 | uint32_t head; 22 | uint32_t tail; 23 | uint32_t scale; 24 | void *x[0]; 25 | } fifo_t; 26 | 27 | #define MOD_SCALE(x, b) ((x) & MASK(b)) 28 | static uint64_t rcu_[MAX_NUM_THREADS][MAX_NUM_THREADS] = {}; 29 | static uint64_t rcu_last_posted_[MAX_NUM_THREADS][MAX_NUM_THREADS] = {}; 30 | static fifo_t *pending_[MAX_NUM_THREADS] = {}; 31 | static int num_threads_ = 0; 32 | 33 | static fifo_t *fifo_alloc(int scale) { 34 | fifo_t *q = (fifo_t *)nbd_malloc(sizeof(fifo_t) + (1ULL << scale) * sizeof(void *)); 35 | memset(q, 0, sizeof(fifo_t)); 36 | q->scale = scale; 37 | q->head = 0; 38 | q->tail = 0; 39 | return q; 40 | } 41 | 42 | void rcu_thread_init (void) { 43 | int thread_index = GET_THREAD_INDEX(); 44 | if (pending_[thread_index] == NULL) { 45 | pending_[thread_index] = fifo_alloc(RCU_QUEUE_SCALE); 46 | (void)SYNC_ADD(&num_threads_, 1); 47 | } 48 | } 49 | 50 | void rcu_update (void) { 51 | int thread_index = GET_THREAD_INDEX(); 52 | int next_thread_index = (thread_index + 1) % num_threads_; 53 | TRACE("r1", "rcu_update: updating thread %llu", next_thread_index, 0); 54 | int i; 55 | for (i = 0; i < num_threads_; ++i) { 56 | if (i == thread_index) 57 | continue; 58 | 59 | // No need to post an update if the value hasn't changed 60 | if (rcu_[thread_index][i] == rcu_last_posted_[thread_index][i]) 61 | continue; 62 | 63 | uint64_t x = rcu_[thread_index][i]; 64 | rcu_[next_thread_index][i] = rcu_last_posted_[thread_index][i] = x; 65 | TRACE("r2", "rcu_update: posted updated value (%llu) for thread %llu", x, i); 66 | } 67 | 68 | // free 69 | fifo_t *q = pending_[thread_index]; 70 | while (q->tail != rcu_[thread_index][thread_index]) { 71 | uint32_t i = MOD_SCALE(q->tail, q->scale); 72 | TRACE("r0", "rcu_update: freeing %p from queue at position %llu", q->x[i], q->tail); 73 | nbd_free(q->x[i]); 74 | q->tail++; 75 | } 76 | } 77 | 78 | void rcu_defer_free (void *x) { 79 | assert(x); 80 | int thread_index = GET_THREAD_INDEX(); 81 | fifo_t *q = pending_[thread_index]; 82 | assert(MOD_SCALE(q->head + 1, q->scale) != MOD_SCALE(q->tail, q->scale)); 83 | uint32_t i = MOD_SCALE(q->head, q->scale); 84 | q->x[i] = x; 85 | TRACE("r0", "rcu_defer_free: put %p on queue at position %llu", x, q->head); 86 | q->head++; 87 | 88 | if (pending_[thread_index]->head - rcu_last_posted_[thread_index][thread_index] >= RCU_POST_THRESHOLD) { 89 | TRACE("r0", "rcu_defer_free: posting %llu", pending_[thread_index]->head, 0); 90 | int next_thread_index = (thread_index + 1) % num_threads_; 91 | rcu_[next_thread_index][thread_index] = pending_[thread_index]->head; 92 | rcu_last_posted_[thread_index][thread_index] = pending_[thread_index]->head; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /runtime/rlocal.h: -------------------------------------------------------------------------------- 1 | #ifndef RLOCAL_H 2 | #define RLOCAL_H 3 | 4 | #include "runtime.h" 5 | #include "tls.h" 6 | 7 | extern DECLARE_THREAD_LOCAL(ThreadId, int); 8 | 9 | #define GET_THREAD_INDEX() ({ LOCALIZE_THREAD_LOCAL(ThreadId, int); assert(ThreadId != 0); ThreadId - 1; }) 10 | 11 | void mem_init (void); 12 | void rnd_init (void); 13 | 14 | void rnd_thread_init (void); 15 | void rcu_thread_init (void); 16 | void lwt_thread_init (void); 17 | 18 | #endif//RLOCAL_H 19 | -------------------------------------------------------------------------------- /runtime/runtime.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | */ 5 | #include 6 | #include 7 | #include "common.h" 8 | #include "runtime.h" 9 | #include "rlocal.h" 10 | #include "mem.h" 11 | #include "tls.h" 12 | 13 | DECLARE_THREAD_LOCAL(ThreadId, int); 14 | static int ThreadIndex 15 | 16 | static int MaxThreadId = 0; 17 | 18 | __attribute__ ((constructor)) void nbd_init (void) { 19 | rnd_init(); 20 | mem_init(); 21 | } 22 | 23 | void nbd_thread_init (void) { 24 | LOCALIZE_THREAD_LOCAL(ThreadId, int); 25 | 26 | if (ThreadId == 0) { 27 | ++MaxThreadId; // TODO: reuse thread id's of threads that have been destroyed 28 | ASSERT(MaxThreadId <= MAX_NUM_THREADS); 29 | SET_THREAD_LOCAL(ThreadId, MaxThreadId); 30 | rnd_thread_init(); 31 | } 32 | 33 | lwt_thread_init(); 34 | rcu_thread_init(); 35 | } 36 | -------------------------------------------------------------------------------- /test/CuTest-license.txt: -------------------------------------------------------------------------------- 1 | NOTE 2 | 3 | The license is based on the zlib/libpng license. For more details see 4 | http://www.opensource.org/licenses/zlib-license.html. The intent of the 5 | license is to: 6 | 7 | - keep the license as simple as possible 8 | - encourage the use of CuTest in both free and commercial applications 9 | and libraries 10 | - keep the source code together 11 | - give credit to the CuTest contributors for their work 12 | 13 | If you ship CuTest in source form with your source distribution, the 14 | following license document must be included with it in unaltered form. 15 | If you find CuTest useful we would like to hear about it. 16 | 17 | LICENSE 18 | 19 | Copyright (c) 2003 Asim Jalis 20 | 21 | This software is provided 'as-is', without any express or implied 22 | warranty. In no event will the authors be held liable for any damages 23 | arising from the use of this software. 24 | 25 | Permission is granted to anyone to use this software for any purpose, 26 | including commercial applications, and to alter it and redistribute it 27 | freely, subject to the following restrictions: 28 | 29 | 1. The origin of this software must not be misrepresented; you must not 30 | claim that you wrote the original software. If you use this software in 31 | a product, an acknowledgment in the product documentation would be 32 | appreciated but is not required. 33 | 34 | 2. Altered source versions must be plainly marked as such, and must not 35 | be misrepresented as being the original software. 36 | 37 | 3. This notice may not be removed or altered from any source 38 | distribution. 39 | -------------------------------------------------------------------------------- /test/CuTest.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "CuTest.h" 9 | 10 | /*-------------------------------------------------------------------------* 11 | * CuStr 12 | *-------------------------------------------------------------------------*/ 13 | 14 | char* CuStrAlloc(int size) 15 | { 16 | char* newStr = (char*) malloc( sizeof(char) * (size) ); 17 | return newStr; 18 | } 19 | 20 | char* CuStrCopy(const char* old) 21 | { 22 | int len = strlen(old); 23 | char* newStr = CuStrAlloc(len + 1); 24 | strcpy(newStr, old); 25 | return newStr; 26 | } 27 | 28 | /*-------------------------------------------------------------------------* 29 | * CuString 30 | *-------------------------------------------------------------------------*/ 31 | 32 | void CuStringInit(CuString* str) 33 | { 34 | str->length = 0; 35 | str->size = STRING_MAX; 36 | str->buffer = (char*) malloc(sizeof(char) * str->size); 37 | str->buffer[0] = '\0'; 38 | } 39 | 40 | CuString* CuStringNew(void) 41 | { 42 | CuString* str = (CuString*) malloc(sizeof(CuString)); 43 | str->length = 0; 44 | str->size = STRING_MAX; 45 | str->buffer = (char*) malloc(sizeof(char) * str->size); 46 | str->buffer[0] = '\0'; 47 | return str; 48 | } 49 | 50 | void CuStringResize(CuString* str, int newSize) 51 | { 52 | str->buffer = (char*) realloc(str->buffer, sizeof(char) * newSize); 53 | str->size = newSize; 54 | } 55 | 56 | void CuStringAppend(CuString* str, const char* text) 57 | { 58 | int length; 59 | 60 | if (text == NULL) { 61 | text = "NULL"; 62 | } 63 | 64 | length = strlen(text); 65 | if (str->length + length + 1 >= str->size) 66 | CuStringResize(str, str->length + length + 1 + STRING_INC); 67 | str->length += length; 68 | strcat(str->buffer, text); 69 | } 70 | 71 | void CuStringAppendChar(CuString* str, char ch) 72 | { 73 | char text[2]; 74 | text[0] = ch; 75 | text[1] = '\0'; 76 | CuStringAppend(str, text); 77 | } 78 | 79 | void CuStringAppendFormat(CuString* str, const char* format, ...) 80 | { 81 | va_list argp; 82 | char buf[HUGE_STRING_LEN]; 83 | va_start(argp, format); 84 | vsprintf(buf, format, argp); 85 | va_end(argp); 86 | CuStringAppend(str, buf); 87 | } 88 | 89 | void CuStringInsert(CuString* str, const char* text, int pos) 90 | { 91 | int length = strlen(text); 92 | if (pos > str->length) 93 | pos = str->length; 94 | if (str->length + length + 1 >= str->size) 95 | CuStringResize(str, str->length + length + 1 + STRING_INC); 96 | memmove(str->buffer + pos + length, str->buffer + pos, (str->length - pos) + 1); 97 | str->length += length; 98 | memcpy(str->buffer + pos, text, length); 99 | } 100 | 101 | /*-------------------------------------------------------------------------* 102 | * CuTest 103 | *-------------------------------------------------------------------------*/ 104 | 105 | void CuTestInit(CuTest* t, const char* name, TestFunction function) 106 | { 107 | t->name = CuStrCopy(name); 108 | t->failed = 0; 109 | t->ran = 0; 110 | t->message = NULL; 111 | t->function = function; 112 | t->jumpBuf = NULL; 113 | } 114 | 115 | CuTest* CuTestNew(const char* name, TestFunction function) 116 | { 117 | CuTest* tc = CU_ALLOC(CuTest); 118 | CuTestInit(tc, name, function); 119 | return tc; 120 | } 121 | 122 | void CuTestRun(CuTest* tc) 123 | { 124 | jmp_buf buf; 125 | tc->jumpBuf = &buf; 126 | if (setjmp(buf) == 0) 127 | { 128 | tc->ran = 1; 129 | (tc->function)(tc); 130 | } 131 | tc->jumpBuf = 0; 132 | } 133 | 134 | static void CuFailInternal(CuTest* tc, const char* file, int line, CuString* string) 135 | { 136 | char buf[HUGE_STRING_LEN]; 137 | 138 | sprintf(buf, "%s:%d: ", file, line); 139 | CuStringInsert(string, buf, 0); 140 | 141 | tc->failed = 1; 142 | tc->message = string->buffer; 143 | extern void lwt_halt(void); 144 | extern void lwt_dump(const char *); 145 | lwt_dump(tc->name); 146 | if (tc->jumpBuf != 0) longjmp(*(tc->jumpBuf), 0); 147 | } 148 | 149 | void CuFail_Line(CuTest* tc, const char* file, int line, const char* message2, const char* message) 150 | { 151 | CuString string; 152 | 153 | CuStringInit(&string); 154 | if (message2 != NULL) 155 | { 156 | CuStringAppend(&string, message2); 157 | CuStringAppend(&string, ": "); 158 | } 159 | CuStringAppend(&string, message); 160 | CuFailInternal(tc, file, line, &string); 161 | } 162 | 163 | void CuAssert_Line(CuTest* tc, const char* file, int line, const char* message, int condition) 164 | { 165 | if (condition) return; 166 | CuFail_Line(tc, file, line, NULL, message); 167 | } 168 | 169 | void CuAssertStrEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, 170 | const char* expected, const char* actual) 171 | { 172 | CuString string; 173 | if ((expected == NULL && actual == NULL) || 174 | (expected != NULL && actual != NULL && 175 | strcmp(expected, actual) == 0)) 176 | { 177 | return; 178 | } 179 | 180 | CuStringInit(&string); 181 | if (message != NULL) 182 | { 183 | CuStringAppend(&string, message); 184 | CuStringAppend(&string, ": "); 185 | } 186 | CuStringAppend(&string, "expected <"); 187 | CuStringAppend(&string, expected); 188 | CuStringAppend(&string, "> but was <"); 189 | CuStringAppend(&string, actual); 190 | CuStringAppend(&string, ">"); 191 | CuFailInternal(tc, file, line, &string); 192 | } 193 | 194 | void CuAssertIntEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, 195 | int expected, int actual) 196 | { 197 | char buf[STRING_MAX]; 198 | if (expected == actual) return; 199 | sprintf(buf, "expected <%d> but was <%d>", expected, actual); 200 | CuFail_Line(tc, file, line, message, buf); 201 | } 202 | 203 | void CuAssertDblEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, 204 | double expected, double actual, double delta) 205 | { 206 | char buf[STRING_MAX]; 207 | if (fabs(expected - actual) <= delta) return; 208 | sprintf(buf, "expected <%lf> but was <%lf>", expected, actual); 209 | CuFail_Line(tc, file, line, message, buf); 210 | } 211 | 212 | void CuAssertPtrEquals_LineMsg(CuTest* tc, const char* file, int line, const char* message, 213 | void* expected, void* actual) 214 | { 215 | char buf[STRING_MAX]; 216 | if (expected == actual) return; 217 | sprintf(buf, "expected pointer <0x%p> but was <0x%p>", expected, actual); 218 | CuFail_Line(tc, file, line, message, buf); 219 | } 220 | 221 | 222 | /*-------------------------------------------------------------------------* 223 | * CuSuite 224 | *-------------------------------------------------------------------------*/ 225 | 226 | void CuSuiteInit(CuSuite* testSuite) 227 | { 228 | testSuite->count = 0; 229 | testSuite->failCount = 0; 230 | } 231 | 232 | CuSuite* CuSuiteNew(void) 233 | { 234 | CuSuite* testSuite = CU_ALLOC(CuSuite); 235 | CuSuiteInit(testSuite); 236 | return testSuite; 237 | } 238 | 239 | void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase) 240 | { 241 | assert(testSuite->count < MAX_TEST_CASES); 242 | testSuite->list[testSuite->count] = testCase; 243 | testSuite->count++; 244 | } 245 | 246 | void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2) 247 | { 248 | int i; 249 | for (i = 0 ; i < testSuite2->count ; ++i) 250 | { 251 | CuTest* testCase = testSuite2->list[i]; 252 | CuSuiteAdd(testSuite, testCase); 253 | } 254 | } 255 | 256 | void CuSuiteRun(CuSuite* testSuite) 257 | { 258 | int i; 259 | for (i = 0 ; i < testSuite->count ; ++i) 260 | { 261 | CuTest* testCase = testSuite->list[i]; 262 | CuTestRun(testCase); 263 | if (testCase->failed) { testSuite->failCount += 1; } 264 | } 265 | } 266 | 267 | void CuSuiteSummary(CuSuite* testSuite, CuString* summary) 268 | { 269 | int i; 270 | for (i = 0 ; i < testSuite->count ; ++i) 271 | { 272 | CuTest* testCase = testSuite->list[i]; 273 | CuStringAppend(summary, testCase->failed ? "F" : "."); 274 | } 275 | CuStringAppend(summary, "\n\n"); 276 | } 277 | 278 | void CuSuiteDetails(CuSuite* testSuite, CuString* details) 279 | { 280 | int i; 281 | int failCount = 0; 282 | 283 | if (testSuite->failCount == 0) 284 | { 285 | int passCount = testSuite->count - testSuite->failCount; 286 | const char* testWord = passCount == 1 ? "test" : "tests"; 287 | CuStringAppendFormat(details, "OK (%d %s)\n", passCount, testWord); 288 | } 289 | else 290 | { 291 | if (testSuite->failCount == 1) 292 | CuStringAppend(details, "There was 1 failure:\n"); 293 | else 294 | CuStringAppendFormat(details, "There were %d failures:\n", testSuite->failCount); 295 | 296 | for (i = 0 ; i < testSuite->count ; ++i) 297 | { 298 | CuTest* testCase = testSuite->list[i]; 299 | if (testCase->failed) 300 | { 301 | failCount++; 302 | CuStringAppendFormat(details, "%d) %s: %s\n", 303 | failCount, testCase->name, testCase->message); 304 | } 305 | } 306 | CuStringAppend(details, "\n!!!FAILURES!!!\n"); 307 | 308 | CuStringAppendFormat(details, "Runs: %d ", testSuite->count); 309 | CuStringAppendFormat(details, "Passes: %d ", testSuite->count - testSuite->failCount); 310 | CuStringAppendFormat(details, "Fails: %d\n", testSuite->failCount); 311 | } 312 | } 313 | -------------------------------------------------------------------------------- /test/CuTest.h: -------------------------------------------------------------------------------- 1 | #ifndef CU_TEST_H 2 | #define CU_TEST_H 3 | 4 | #include 5 | #include 6 | 7 | /* CuString */ 8 | 9 | char* CuStrAlloc(int size); 10 | char* CuStrCopy(const char* old); 11 | 12 | #define CU_ALLOC(TYPE) ((TYPE*) malloc(sizeof(TYPE))) 13 | 14 | #define HUGE_STRING_LEN 8192 15 | #define STRING_MAX 256 16 | #define STRING_INC 256 17 | 18 | typedef struct 19 | { 20 | int length; 21 | int size; 22 | char* buffer; 23 | } CuString; 24 | 25 | void CuStringInit(CuString* str); 26 | CuString* CuStringNew(void); 27 | void CuStringRead(CuString* str, const char* path); 28 | void CuStringAppend(CuString* str, const char* text); 29 | void CuStringAppendChar(CuString* str, char ch); 30 | void CuStringAppendFormat(CuString* str, const char* format, ...); 31 | void CuStringInsert(CuString* str, const char* text, int pos); 32 | void CuStringResize(CuString* str, int newSize); 33 | 34 | /* CuTest */ 35 | 36 | typedef struct CuTest CuTest; 37 | 38 | typedef void (*TestFunction)(CuTest *); 39 | 40 | struct CuTest 41 | { 42 | const char* name; 43 | TestFunction function; 44 | int failed; 45 | int ran; 46 | const char* message; 47 | jmp_buf *jumpBuf; 48 | }; 49 | 50 | void CuTestInit(CuTest* t, const char* name, TestFunction function); 51 | CuTest* CuTestNew(const char* name, TestFunction function); 52 | void CuTestRun(CuTest* tc); 53 | 54 | /* Internal versions of assert functions -- use the public versions */ 55 | void CuFail_Line(CuTest* tc, 56 | const char* file, int line, const char* message2, const char* message); 57 | void CuAssert_Line(CuTest* tc, 58 | const char* file, int line, const char* message, int condition); 59 | void CuAssertStrEquals_LineMsg(CuTest* tc, 60 | const char* file, int line, const char* message, const char* expected, const char* actual); 61 | void CuAssertIntEquals_LineMsg(CuTest* tc, 62 | const char* file, int line, const char* message, int expected, int actual); 63 | void CuAssertDblEquals_LineMsg(CuTest* tc, 64 | const char* file, int line, const char* message, double expected, double actual, double delta); 65 | void CuAssertPtrEquals_LineMsg(CuTest* tc, 66 | const char* file, int line, const char* message, void* expected, void* actual); 67 | 68 | /* public assert functions */ 69 | 70 | #define CuFail(tc, ms) CuFail_Line( (tc), __FILE__, __LINE__, NULL, (ms)) 71 | #define CuAssert(tc, ms, cond) CuAssert_Line((tc), __FILE__, __LINE__, (ms), (cond)) 72 | #define CuAssertTrue(tc, cond) CuAssert_Line((tc), __FILE__, __LINE__, "assert failed", (cond)) 73 | 74 | #define CuAssertStrEquals(tc,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 75 | #define CuAssertStrEquals_Msg(tc,ms,ex,ac) CuAssertStrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 76 | #define CuAssertIntEquals(tc,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 77 | #define CuAssertIntEquals_Msg(tc,ms,ex,ac) CuAssertIntEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 78 | #define CuAssertDblEquals(tc,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac),(dl)) 79 | #define CuAssertDblEquals_Msg(tc,ms,ex,ac,dl) CuAssertDblEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac),(dl)) 80 | #define CuAssertPtrEquals(tc,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,NULL,(ex),(ac)) 81 | #define CuAssertPtrEquals_Msg(tc,ms,ex,ac) CuAssertPtrEquals_LineMsg((tc),__FILE__,__LINE__,(ms),(ex),(ac)) 82 | 83 | #define CuAssertPtrNotNull(tc,p) CuAssert_Line((tc),__FILE__,__LINE__,"null pointer unexpected",(p != NULL)) 84 | #define CuAssertPtrNotNullMsg(tc,msg,p) CuAssert_Line((tc),__FILE__,__LINE__,(msg),(p != NULL)) 85 | 86 | /* CuSuite */ 87 | 88 | #define MAX_TEST_CASES 1024 89 | 90 | #define SUITE_ADD_TEST(SUITE,TEST) CuSuiteAdd(SUITE, CuTestNew(#TEST, TEST)) 91 | 92 | typedef struct 93 | { 94 | int count; 95 | CuTest* list[MAX_TEST_CASES]; 96 | int failCount; 97 | 98 | } CuSuite; 99 | 100 | 101 | void CuSuiteInit(CuSuite* testSuite); 102 | CuSuite* CuSuiteNew(void); 103 | void CuSuiteAdd(CuSuite* testSuite, CuTest *testCase); 104 | void CuSuiteAddSuite(CuSuite* testSuite, CuSuite* testSuite2); 105 | void CuSuiteRun(CuSuite* testSuite); 106 | void CuSuiteSummary(CuSuite* testSuite, CuString* summary); 107 | void CuSuiteDetails(CuSuite* testSuite, CuString* details); 108 | 109 | #endif /* CU_TEST_H */ 110 | -------------------------------------------------------------------------------- /test/haz_test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * hazard pointer test 6 | * 7 | */ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "common.h" 15 | #include "mem.h" 16 | #include "runtime.h" 17 | #include "hazard.h" 18 | 19 | #define NUM_ITERATIONS 10000000 20 | 21 | typedef struct node { 22 | struct node *next; 23 | } node_t; 24 | 25 | typedef struct lifo { 26 | node_t *head; 27 | } lifo_t; 28 | 29 | static volatile int wait_; 30 | static lifo_t *stk_; 31 | 32 | void *worker (void *arg) { 33 | int id = (int)(size_t)arg; 34 | unsigned int r = (unsigned int)(id + 1) * 0x5bd1e995; // seed psuedo-random number generator 35 | haz_t *hp0 = haz_get_static(0); 36 | 37 | // Wait for all the worker threads to be ready. 38 | (void)SYNC_ADD(&wait_, -1); 39 | do {} while (wait_); 40 | 41 | int i; 42 | for (i = 0; i < NUM_ITERATIONS; ++ i) { 43 | r ^= r << 6; r ^= r >> 21; r ^= r << 7; // generate next psuedo-random number 44 | if (r & 0x1000) { 45 | // push 46 | node_t *new_head = (node_t *)nbd_malloc(sizeof(node_t)); 47 | node_t *old_head = stk_->head; 48 | node_t *temp; 49 | do { 50 | temp = old_head; 51 | new_head->next = temp; 52 | } while ((old_head = SYNC_CAS(&stk_->head, temp, new_head)) != temp); 53 | } else { 54 | // pop 55 | node_t *temp; 56 | node_t *head = stk_->head; 57 | do { 58 | temp = head; 59 | if (temp == NULL) 60 | break; 61 | haz_set(hp0, temp); 62 | head = VOLATILE_DEREF(stk_).head; 63 | if (temp != head) 64 | continue; 65 | } while ((head = SYNC_CAS(&stk_->head, temp, temp->next)) != temp); 66 | 67 | if (temp != NULL) { 68 | haz_defer_free(temp, nbd_free); 69 | } 70 | } 71 | } 72 | 73 | return NULL; 74 | } 75 | 76 | int main (int argc, char **argv) { 77 | //lwt_set_trace_level("m0r0"); 78 | 79 | int num_threads = MAX_NUM_THREADS; 80 | if (argc == 2) 81 | { 82 | errno = 0; 83 | num_threads = strtol(argv[1], NULL, 10); 84 | if (errno) { 85 | fprintf(stderr, "%s: Invalid argument for number of threads\n", argv[0]); 86 | return -1; 87 | } 88 | if (num_threads <= 0) { 89 | fprintf(stderr, "%s: Number of threads must be at least 1\n", argv[0]); 90 | return -1; 91 | } 92 | } 93 | 94 | stk_ = (lifo_t *)nbd_malloc(sizeof(lifo_t)); 95 | memset(stk_, 0, sizeof(lifo_t)); 96 | 97 | struct timeval tv1, tv2; 98 | gettimeofday(&tv1, NULL); 99 | wait_ = num_threads; 100 | 101 | pthread_t thread[num_threads]; 102 | for (int i = 0; i < num_threads; ++i) { 103 | int rc = nbd_thread_create(thread + i, i, worker, (void *)(size_t)i); 104 | if (rc != 0) { perror("pthread_create"); return rc; } 105 | } 106 | for (int i = 0; i < num_threads; ++i) { 107 | pthread_join(thread[i], NULL); 108 | } 109 | 110 | gettimeofday(&tv2, NULL); 111 | int ms = (int)(1000000*(tv2.tv_sec - tv1.tv_sec) + tv2.tv_usec - tv1.tv_usec) / 1000; 112 | printf("Th:%d Time:%dms\n\n", num_threads, ms); 113 | fflush(stdout); 114 | 115 | return 0; 116 | } 117 | -------------------------------------------------------------------------------- /test/map_test1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "common.h" 7 | #include "nstring.h" 8 | #include "runtime.h" 9 | #include "map.h" 10 | #include "rcu.h" 11 | #include "list.h" 12 | #include "skiplist.h" 13 | #include "hashtable.h" 14 | 15 | #define NUM_ITERATIONS 10000000 16 | 17 | //#define TEST_STRING_KEYS 18 | 19 | static volatile int wait_; 20 | static long num_threads_; 21 | static map_t *map_; 22 | 23 | void *worker (void *arg) { 24 | nbd_thread_init(); 25 | 26 | // Wait for all the worker threads to be ready. 27 | (void)SYNC_ADD(&wait_, -1); 28 | do {} while (wait_); 29 | 30 | #ifdef TEST_STRING_KEYS 31 | nstring_t *key_str = ns_alloc(10); 32 | #endif 33 | 34 | for (int i = 0; i < NUM_ITERATIONS/num_threads_; ++i) { 35 | unsigned r = nbd_rand(); 36 | int key = r & 0xF; 37 | #ifdef TEST_STRING_KEYS 38 | key_str->len = sprintf(key_str->data, "%X", key) + 1; 39 | assert(key_str->len <= 10); 40 | if (r & (1 << 8)) { 41 | map_set(map_, (map_key_t)key_str, 1); 42 | } else { 43 | map_remove(map_, (map_key_t)key_str); 44 | } 45 | #else 46 | if (r & (1 << 8)) { 47 | map_set(map_, (map_key_t)(key + 1), 1); 48 | } else { 49 | map_remove(map_, (map_key_t)(key + 1)); 50 | } 51 | #endif 52 | 53 | rcu_update(); 54 | } 55 | 56 | return NULL; 57 | } 58 | 59 | int main (int argc, char **argv) { 60 | nbd_thread_init(); 61 | lwt_set_trace_level("r0m3s3"); 62 | 63 | char* program_name = argv[0]; 64 | pthread_t thread[MAX_NUM_THREADS]; 65 | 66 | if (argc > 2) { 67 | fprintf(stderr, "Usage: %s num_threads\n", program_name); 68 | return -1; 69 | } 70 | 71 | num_threads_ = MAX_NUM_THREADS; 72 | if (argc == 2) 73 | { 74 | errno = 0; 75 | num_threads_ = strtol(argv[1], NULL, 10); 76 | if (errno) { 77 | fprintf(stderr, "%s: Invalid argument for number of threads\n", program_name); 78 | return -1; 79 | } 80 | if (num_threads_ <= 0) { 81 | fprintf(stderr, "%s: Number of threads must be at least 1\n", program_name); 82 | return -1; 83 | } 84 | if (num_threads_ > MAX_NUM_THREADS) { 85 | fprintf(stderr, "%s: Number of threads cannot be more than %d\n", program_name, MAX_NUM_THREADS); 86 | return -1; 87 | } 88 | } 89 | 90 | static const map_impl_t *map_types[] = { &MAP_IMPL_LL, &MAP_IMPL_SL, &MAP_IMPL_HT }; 91 | for (int i = 0; i < sizeof(map_types)/sizeof(*map_types); ++i) { 92 | #ifdef TEST_STRING_KEYS 93 | map_ = map_alloc(map_types[i], &DATATYPE_NSTRING); 94 | #else 95 | map_ = map_alloc(map_types[i], NULL); 96 | #endif 97 | 98 | struct timeval tv1, tv2; 99 | gettimeofday(&tv1, NULL); 100 | 101 | wait_ = num_threads_; 102 | 103 | for (int i = 0; i < num_threads_; ++i) { 104 | int rc = pthread_create(thread + i, NULL, worker, (void*)(size_t)i); 105 | if (rc != 0) { perror("pthread_create"); return rc; } 106 | } 107 | 108 | for (int i = 0; i < num_threads_; ++i) { 109 | pthread_join(thread[i], NULL); 110 | } 111 | 112 | gettimeofday(&tv2, NULL); 113 | int ms = (int)(1000000*(tv2.tv_sec - tv1.tv_sec) + tv2.tv_usec - tv1.tv_usec) / 1000; 114 | map_print(map_, FALSE); 115 | printf("Th:%ld Time:%dms\n\n", num_threads_, ms); 116 | fflush(stdout); 117 | } 118 | 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /test/map_test2.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | * 5 | * tests ported from high-scale-lib 6 | * http://sourceforge.net/projects/high-scale-lib 7 | */ 8 | #include 9 | #include 10 | #include 11 | 12 | #include "CuTest.h" 13 | 14 | #include "common.h" 15 | #include "runtime.h" 16 | #include "nstring.h" 17 | #include "map.h" 18 | #include "list.h" 19 | #include "skiplist.h" 20 | #include "hashtable.h" 21 | #include "lwt.h" 22 | #include "mem.h" 23 | #include "rcu.h" 24 | 25 | #define ASSERT_EQUAL(x, y) CuAssertIntEquals(tc, x, y) 26 | 27 | //#define TEST_STRING_KEYS 28 | 29 | typedef struct worker_data { 30 | int id; 31 | CuTest *tc; 32 | map_t *map; 33 | volatile int *wait; 34 | } worker_data_t; 35 | 36 | static const map_impl_t *map_type_; 37 | 38 | static size_t iterator_size (map_t *map) { 39 | map_iter_t *iter = map_iter_begin(map, 0); 40 | size_t count = 0; 41 | while (map_iter_next(iter, NULL) != DOES_NOT_EXIST) { 42 | count++; 43 | } 44 | map_iter_free(iter); 45 | return count; 46 | } 47 | 48 | // Test some basic stuff; add a few keys, remove a few keys 49 | void basic_test (CuTest* tc) { 50 | 51 | #ifdef TEST_STRING_KEYS 52 | map_t *map = map_alloc(map_type_, &DATATYPE_NSTRING); 53 | nstring_t *s1 = ns_alloc(3); strcpy(s1->data, "k1"); 54 | nstring_t *s2 = ns_alloc(3); strcpy(s2->data, "k2"); 55 | nstring_t *s3 = ns_alloc(3); strcpy(s3->data, "k3"); 56 | nstring_t *s4 = ns_alloc(3); strcpy(s4->data, "k4"); 57 | map_key_t k1 = (map_key_t)s1; 58 | map_key_t k2 = (map_key_t)s2; 59 | map_key_t k3 = (map_key_t)s3; 60 | map_key_t k4 = (map_key_t)s4; 61 | #else 62 | map_t *map = map_alloc(map_type_, NULL); 63 | map_key_t k1 = (map_key_t)1; 64 | map_key_t k2 = (map_key_t)2; 65 | map_key_t k3 = (map_key_t)3; 66 | map_key_t k4 = (map_key_t)4; 67 | #endif 68 | 69 | ASSERT_EQUAL( 0, map_count (map) ); 70 | ASSERT_EQUAL( DOES_NOT_EXIST, map_add (map, k1,10) ); 71 | ASSERT_EQUAL( 1, map_count (map) ); 72 | ASSERT_EQUAL( 1, iterator_size(map) ); 73 | ASSERT_EQUAL( DOES_NOT_EXIST, map_add (map, k2,20) ); 74 | ASSERT_EQUAL( 2, map_count (map) ); 75 | ASSERT_EQUAL( 2, iterator_size(map) ); 76 | ASSERT_EQUAL( 20, map_get (map, k2) ); 77 | ASSERT_EQUAL( 10, map_set (map, k1,11) ); 78 | ASSERT_EQUAL( 20, map_set (map, k2,21) ); 79 | ASSERT_EQUAL( 2, map_count (map) ); 80 | ASSERT_EQUAL( 2, iterator_size(map) ); 81 | ASSERT_EQUAL( 21, map_add (map, k2,22) ); 82 | ASSERT_EQUAL( 11, map_remove (map, k1) ); 83 | ASSERT_EQUAL( DOES_NOT_EXIST, map_get (map, k1) ); 84 | ASSERT_EQUAL( 1, map_count (map) ); 85 | ASSERT_EQUAL( 1, iterator_size(map) ); 86 | ASSERT_EQUAL( DOES_NOT_EXIST, map_remove (map, k1) ); 87 | ASSERT_EQUAL( 21, map_remove (map, k2) ); 88 | ASSERT_EQUAL( 0, map_count (map) ); 89 | ASSERT_EQUAL( 0, iterator_size(map) ); 90 | ASSERT_EQUAL( DOES_NOT_EXIST, map_remove (map, k2) ); 91 | ASSERT_EQUAL( DOES_NOT_EXIST, map_remove (map, k3) ); 92 | ASSERT_EQUAL( 0, map_count (map) ); 93 | ASSERT_EQUAL( 0, iterator_size(map) ); 94 | 95 | ASSERT_EQUAL( DOES_NOT_EXIST, map_add (map, k4,40) ); 96 | ASSERT_EQUAL( 40, map_get (map, k4) ); 97 | ASSERT_EQUAL( 1, map_count (map) ); 98 | ASSERT_EQUAL( 1, iterator_size(map) ); 99 | ASSERT_EQUAL( 40, map_remove (map, k4) ); 100 | ASSERT_EQUAL( DOES_NOT_EXIST, map_get (map, k4) ); 101 | ASSERT_EQUAL( 0, map_count (map) ); 102 | ASSERT_EQUAL( 0, iterator_size(map) ); 103 | 104 | ASSERT_EQUAL( DOES_NOT_EXIST, map_replace(map, k4,10) ); 105 | ASSERT_EQUAL( DOES_NOT_EXIST, map_get (map, k4) ); 106 | ASSERT_EQUAL( DOES_NOT_EXIST, map_set (map, k4,40) ); 107 | ASSERT_EQUAL( 40, map_replace(map, k4,41) ); 108 | ASSERT_EQUAL( 41, map_get (map, k4) ); 109 | ASSERT_EQUAL( 41, map_remove (map, k4) ); 110 | ASSERT_EQUAL( DOES_NOT_EXIST, map_get (map, k4) ); 111 | ASSERT_EQUAL( 0, map_count (map) ); 112 | ASSERT_EQUAL( 0, iterator_size(map) ); 113 | 114 | ASSERT_EQUAL( DOES_NOT_EXIST, map_replace(map, k2,20) ); 115 | ASSERT_EQUAL( DOES_NOT_EXIST, map_get (map, k2) ); 116 | 117 | // In the end, all entries should be removed 118 | ASSERT_EQUAL( DOES_NOT_EXIST, map_set (map, k2,20) ); 119 | ASSERT_EQUAL( 20, map_replace(map, k2,21) ); 120 | ASSERT_EQUAL( 21, map_get (map, k2) ); 121 | ASSERT_EQUAL( 21, map_remove (map, k2) ); 122 | ASSERT_EQUAL( DOES_NOT_EXIST, map_get (map, k2) ); 123 | ASSERT_EQUAL( 0, map_count (map) ); 124 | ASSERT_EQUAL( 0, iterator_size(map) ); 125 | 126 | map_free(map); 127 | 128 | rcu_update(); // In a quiecent state. 129 | #ifdef TEST_STRING_KEYS 130 | nbd_free(s1); nbd_free(s2); nbd_free(s3); nbd_free(s4); 131 | #endif 132 | } 133 | 134 | void *add_remove_worker (void *arg) { 135 | nbd_thread_init(); 136 | 137 | worker_data_t *wd = (worker_data_t *)arg; 138 | map_t *map = wd->map; 139 | CuTest* tc = wd->tc; 140 | int d = wd->id; 141 | int iters = (map_type_ == &MAP_IMPL_LL ? 10000 : 100000); 142 | 143 | (void)SYNC_ADD(wd->wait, -1); 144 | do { } while (*wd->wait); // wait for all workers to be ready 145 | 146 | map_key_t key; 147 | #ifdef TEST_STRING_KEYS 148 | nstring_t *s = ns_alloc(9); 149 | key = (map_key_t)s; 150 | #endif 151 | 152 | for (int j = 0; j < 10; ++j) { 153 | for (int i = d+1; i < iters; i+=2) { 154 | #ifdef TEST_STRING_KEYS 155 | s->len = 1 + snprintf(s->data, 9, "%u", i); 156 | #else 157 | key = (map_key_t)i; 158 | #endif 159 | TRACE("t0", "test map_add() iteration (%llu, %llu)", j, i); 160 | ASSERT_EQUAL(DOES_NOT_EXIST, map_add(map, key, d+1) ); 161 | rcu_update(); // In a quiecent state. 162 | } 163 | for (int i = d+1; i < iters; i+=2) { 164 | #ifdef TEST_STRING_KEYS 165 | s->len = 1 + snprintf(s->data, 9, "%u", i); 166 | #else 167 | key = (map_key_t)i; 168 | #endif 169 | TRACE("t0", "test map_remove() iteration (%llu, %llu)", j, i); 170 | ASSERT_EQUAL(d+1, map_remove(map, key) ); 171 | rcu_update(); // In a quiecent state. 172 | } 173 | } 174 | #ifdef TEST_STRING_KEYS 175 | nbd_free(s); 176 | #endif 177 | return NULL; 178 | } 179 | 180 | // Do some simple concurrent testing 181 | void concurrent_add_remove_test (CuTest* tc) { 182 | 183 | pthread_t thread[2]; 184 | worker_data_t wd[2]; 185 | volatile int wait = 2; 186 | #ifdef TEST_STRING_KEYS 187 | map_t *map = map_alloc(map_type_, &DATATYPE_NSTRING); 188 | #else 189 | map_t *map = map_alloc(map_type_, NULL); 190 | #endif 191 | 192 | struct timeval tv1, tv2; 193 | gettimeofday(&tv1, NULL); 194 | 195 | // In 2 threads, add & remove even & odd elements concurrently 196 | int i; 197 | for (i = 0; i < 2; ++i) { 198 | wd[i].id = i; 199 | wd[i].tc = tc; 200 | wd[i].map = map; 201 | wd[i].wait = &wait; 202 | int rc = pthread_create(thread + i, NULL, add_remove_worker, wd + i); 203 | if (rc != 0) { perror("nbd_thread_create"); return; } 204 | } 205 | 206 | for (i = 0; i < 2; ++i) { 207 | pthread_join(thread[i], NULL); 208 | } 209 | 210 | gettimeofday(&tv2, NULL); 211 | int ms = (int)(1000000*(tv2.tv_sec - tv1.tv_sec) + tv2.tv_usec - tv1.tv_usec) / 1000; 212 | map_print(map, FALSE); 213 | printf("Time:%dms\n", ms); 214 | fflush(stdout); 215 | 216 | // In the end, all members should be removed 217 | ASSERT_EQUAL( 0, map_count(map) ); 218 | ASSERT_EQUAL( 0, iterator_size(map) ); 219 | 220 | // In a quiecent state; it is safe to free. 221 | map_free(map); 222 | } 223 | 224 | void basic_iteration_test (CuTest* tc) { 225 | #ifdef TEST_STRING_KEYS 226 | map_t *map = map_alloc(map_type_, &DATATYPE_NSTRING); 227 | nstring_t *s1 = ns_alloc(3); strcpy(s1->data, "k1"); 228 | nstring_t *s2 = ns_alloc(3); strcpy(s2->data, "k2"); 229 | map_key_t k1 = (map_key_t)s1; 230 | map_key_t k2 = (map_key_t)s2; 231 | nstring_t *x_k; 232 | nstring_t *y_k; 233 | #else 234 | map_t *map = map_alloc(map_type_, NULL); 235 | map_key_t k1 = (map_key_t)1; 236 | map_key_t k2 = (map_key_t)2; 237 | map_key_t x_k; 238 | map_key_t y_k; 239 | #endif 240 | 241 | ASSERT_EQUAL( DOES_NOT_EXIST, map_add (map, k1,1) ); 242 | ASSERT_EQUAL( DOES_NOT_EXIST, map_add (map, k2,2) ); 243 | 244 | map_val_t x_v, y_v; 245 | map_iter_t *iter = map_iter_begin(map, 0); 246 | x_v = map_iter_next(iter, (map_key_t *)&x_k); 247 | y_v = map_iter_next(iter, (map_key_t *)&y_k); 248 | ASSERT_EQUAL( DOES_NOT_EXIST, map_iter_next(iter, NULL) ); 249 | map_iter_free(iter); 250 | #ifdef TEST_STRING_KEYS 251 | ASSERT_EQUAL( TRUE, (ns_cmp(x_k, s1) == 0 && x_v == 1) || (ns_cmp(y_k, s1) == 0 && y_v == 1) ); 252 | ASSERT_EQUAL( TRUE, (ns_cmp(x_k, s2) == 0 && x_v == 2) || (ns_cmp(y_k, s2) == 0 && y_v == 2) ); 253 | nbd_free(s1); 254 | nbd_free(s2); 255 | #else 256 | ASSERT_EQUAL( TRUE, (x_k == k1 && x_v == 1) || (y_k == k1 && y_v == 1) ); 257 | ASSERT_EQUAL( TRUE, (x_k == k2 && x_v == 2) || (y_k == k2 && y_v == 2) ); 258 | #endif 259 | 260 | map_free(map); 261 | } 262 | 263 | void big_iteration_test (CuTest* tc) { 264 | static const int n = 10000; 265 | 266 | #ifdef TEST_STRING_KEYS 267 | map_t *map = map_alloc(map_type_, &DATATYPE_NSTRING); 268 | nstring_t *s = ns_alloc(9); 269 | nstring_t *s3 = ns_alloc(3); strcpy(s3->data, "k3"); 270 | nstring_t *s4 = ns_alloc(3); strcpy(s4->data, "k4"); 271 | map_key_t k3 = (map_key_t)s3; 272 | map_key_t k4 = (map_key_t)s4; 273 | map_key_t key = (map_key_t)s; 274 | #else 275 | map_t *map = map_alloc(map_type_, NULL); 276 | map_key_t k3 = (map_key_t)3; 277 | map_key_t k4 = (map_key_t)4; 278 | map_key_t key; 279 | #endif 280 | 281 | for (int i = 1; i <= n; ++i) { 282 | #ifdef TEST_STRING_KEYS 283 | s->len = 1 + snprintf(s->data, 9, "k%d", i); 284 | #else 285 | key = (map_key_t)i; 286 | #endif 287 | ASSERT_EQUAL( DOES_NOT_EXIST, map_get(map, key) ); 288 | ASSERT_EQUAL( DOES_NOT_EXIST, map_set(map, key, i) ); 289 | ASSERT_EQUAL( i, map_get(map, key) ); 290 | rcu_update(); // In a quiecent state. 291 | } 292 | 293 | ASSERT_EQUAL( n, map_count(map) ); 294 | ASSERT_EQUAL( n, iterator_size(map) ); 295 | 296 | uint64_t sum = 0; 297 | map_val_t val; 298 | map_iter_t *iter = map_iter_begin(map, 0); 299 | while ((val = map_iter_next(iter, NULL)) != DOES_NOT_EXIST) { 300 | sum += val; 301 | } 302 | map_iter_free(iter); 303 | ASSERT_EQUAL(n*(n+1)/2, sum); 304 | ASSERT_EQUAL(3, map_remove(map, k3)); 305 | ASSERT_EQUAL(4, map_remove(map, k4)); 306 | sum = 0; 307 | iter = map_iter_begin(map, 0); 308 | while ((val = map_iter_next(iter, NULL)) != DOES_NOT_EXIST) { 309 | sum += val; 310 | } 311 | map_iter_free(iter); 312 | ASSERT_EQUAL(n*(n+1)/2 - (3+4), sum); 313 | 314 | #ifdef TEST_STRING_KEYS 315 | nbd_free(s); 316 | #endif 317 | } 318 | 319 | int main (void) { 320 | nbd_thread_init(); 321 | lwt_set_trace_level("r0m3l2t0"); 322 | 323 | static const map_impl_t *map_types[] = { &MAP_IMPL_LL, &MAP_IMPL_SL, &MAP_IMPL_HT }; 324 | for (int i = 0; i < sizeof(map_types)/sizeof(*map_types); ++i) { 325 | map_type_ = map_types[i]; 326 | 327 | // Create and run test suite 328 | CuString *output = CuStringNew(); 329 | CuSuite* suite = CuSuiteNew(); 330 | 331 | SUITE_ADD_TEST(suite, concurrent_add_remove_test); 332 | // SUITE_ADD_TEST(suite, basic_test); 333 | // SUITE_ADD_TEST(suite, basic_iteration_test); 334 | // SUITE_ADD_TEST(suite, big_iteration_test); 335 | 336 | CuSuiteRun(suite); 337 | CuSuiteDetails(suite, output); 338 | printf("%s\n", output->buffer); 339 | } 340 | 341 | return 0; 342 | } 343 | -------------------------------------------------------------------------------- /test/perf_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "common.h" 8 | #include "nstring.h" 9 | #include "runtime.h" 10 | #include "map.h" 11 | #include "rcu.h" 12 | #include "mem.h" 13 | #include "list.h" 14 | #include "skiplist.h" 15 | #include "hashtable.h" 16 | 17 | //#define TEST_STRING_KEYS 18 | 19 | static int num_threads_; 20 | static volatile int start_, stop_, load_; 21 | static map_t *map_; 22 | static int get_range_, put_range_; 23 | static size_t num_keys_; 24 | static double load_time_; 25 | static int duration_; 26 | 27 | #define OP_SELECT_RANGE (1ULL << 20) 28 | 29 | void *worker (void *arg) { 30 | nbd_thread_init(); 31 | 32 | // Wait for all the worker threads to be ready. 33 | (void)SYNC_ADD(&load_, -1); 34 | do {} while (load_); 35 | 36 | // Pre-load map 37 | int n = num_keys_ / 2 / num_threads_; 38 | for (int i = 0; i < n; ++i) { 39 | map_key_t key = (nbd_rand() & (num_keys_ - 1)) + 1; 40 | map_set(map_, key, key); 41 | } 42 | 43 | // Wait for all the worker threads to be done loading. 44 | (void)SYNC_ADD(&start_, -1); 45 | do {} while (start_); 46 | 47 | uint64_t ops = 0; 48 | while (!stop_) { 49 | ++ops; 50 | map_key_t key = (nbd_rand() & (num_keys_ - 1)) + 1; 51 | map_key_t x = nbd_rand() & (OP_SELECT_RANGE - 1); 52 | if (x < get_range_) { 53 | #ifndef NDEBUG 54 | map_val_t val = 55 | #endif 56 | map_get(map_, key); 57 | #ifdef TEST_STRING_KEYS 58 | ASSERT(val == DOES_NOT_EXIST || ns_cmp((nstring_t *)key, (nstring_t *)val) == 0); 59 | #else 60 | ASSERT(val == DOES_NOT_EXIST || key == val); 61 | #endif 62 | } else if (x < put_range_) { 63 | map_add(map_, key, key); 64 | } else { 65 | map_remove(map_, key); 66 | } 67 | rcu_update(); 68 | } 69 | 70 | return (void *)ops; 71 | } 72 | 73 | uint64_t run_test (void) { 74 | load_ = num_threads_ + 1; 75 | start_ = num_threads_ + 1; 76 | 77 | stop_ = 0; 78 | 79 | pthread_t thread[MAX_NUM_THREADS]; 80 | for (int i = 0; i < num_threads_; ++i) { 81 | int rc = pthread_create(thread + i, NULL, worker, (void*)(size_t)i); 82 | if (rc != 0) { perror("pthread_create"); exit(rc); } 83 | } 84 | 85 | do { /* nothing */ } while (load_ != 1); 86 | load_ = 0; 87 | 88 | struct timeval tv1, tv2; 89 | gettimeofday(&tv1, NULL); 90 | 91 | do { /* nothing */ } while (start_ != 1); 92 | 93 | gettimeofday(&tv2, NULL); 94 | load_time_ = (double)(1000000*(tv2.tv_sec - tv1.tv_sec) + tv2.tv_usec - tv1.tv_usec) / 1000000; 95 | 96 | start_ = 0; 97 | sleep(duration_); 98 | stop_ = 1; 99 | 100 | uint64_t ops = 0; 101 | for (int i = 0; i < num_threads_; ++i) { 102 | void *count; 103 | pthread_join(thread[i], &count); 104 | ops += (size_t)count; 105 | } 106 | return ops; 107 | } 108 | 109 | int main (int argc, char **argv) { 110 | char* program_name = argv[0]; 111 | 112 | if (argc > 3) { 113 | fprintf(stderr, "Usage: %s num_threads\n", program_name); 114 | return -1; 115 | } 116 | 117 | num_threads_ = 2; 118 | if (num_threads_ > MAX_NUM_THREADS) { num_threads_ = MAX_NUM_THREADS; } 119 | if (argc > 1) 120 | { 121 | errno = 0; 122 | num_threads_ = strtol(argv[1], NULL, 10); 123 | if (errno) { 124 | fprintf(stderr, "%s: Invalid argument for number of threads\n", program_name); 125 | return -1; 126 | } 127 | if (num_threads_ <= 0) { 128 | fprintf(stderr, "%s: Number of threads must be at least 1\n", program_name); 129 | return -1; 130 | } 131 | } 132 | if (num_threads_ > MAX_NUM_THREADS) { 133 | fprintf(stderr, "%s: Number of threads cannot be more than %d\n", program_name, MAX_NUM_THREADS); 134 | return -1; 135 | } 136 | 137 | int table_scale = 12; 138 | if (argc > 2) { 139 | table_scale = strtol(argv[2], NULL, 10); 140 | if (errno) { 141 | fprintf(stderr, "%s: Invalid argument for the scale of the collection\n", program_name); 142 | return -1; 143 | } 144 | table_scale = strtol(argv[2], NULL, 10); 145 | if (table_scale < 0 || table_scale > 36) { 146 | fprintf(stderr, "%s: The scale of the collection must be between 0 and 36\n", program_name); 147 | return -1; 148 | } 149 | } 150 | 151 | int read_ratio = 90; 152 | int put_ratio = 50; 153 | get_range_ = (int)((double)OP_SELECT_RANGE / 100 * read_ratio); 154 | put_range_ = get_range_ + (int)(((double)OP_SELECT_RANGE - get_range_) / 100 * put_ratio); 155 | 156 | nbd_thread_init(); 157 | static const map_impl_t *map_types[] = { &MAP_IMPL_HT }; 158 | for (int i = 0; i < sizeof(map_types)/sizeof(*map_types); ++i) { 159 | #ifdef TEST_STRING_KEYS 160 | map_ = map_alloc(map_types[i], &DATATYPE_NSTRING); 161 | #else 162 | map_ = map_alloc(map_types[i], NULL); 163 | #endif 164 | 165 | num_keys_ = 1ULL << table_scale; 166 | 167 | duration_ = 1 + table_scale/4; 168 | double mops_per_sec = (double)run_test() / 1000000.0 / duration_; 169 | 170 | printf("Threads:%-2d Size:2^%-2d load time:%-4.2f Mops/s:%-4.2f per-thread:%-4.2f ", 171 | num_threads_, table_scale, load_time_, mops_per_sec, mops_per_sec/num_threads_); 172 | map_print(map_, FALSE); 173 | fflush(stdout); 174 | 175 | map_free(map_); 176 | } 177 | 178 | return 0; 179 | } 180 | -------------------------------------------------------------------------------- /test/rcu_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "common.h" 7 | #include "runtime.h" 8 | #include "mem.h" 9 | #include "rcu.h" 10 | 11 | #define NUM_ITERATIONS 10000000 12 | 13 | typedef struct node { 14 | struct node *next; 15 | } node_t; 16 | 17 | typedef struct lifo { 18 | node_t *head; 19 | } lifo_t; 20 | 21 | static volatile int wait_; 22 | static lifo_t *stk_; 23 | 24 | static lifo_t *lifo_alloc (void) { 25 | lifo_t *stk = (lifo_t *)nbd_malloc(sizeof(lifo_t)); 26 | memset(stk, 0, sizeof(lifo_t)); 27 | return stk; 28 | } 29 | 30 | static void lifo_aba_push (lifo_t *stk, node_t *x) { 31 | node_t *head; 32 | do { 33 | head = VOLATILE_DEREF(stk).head; 34 | VOLATILE_DEREF(x).next = head; 35 | } while (SYNC_CAS(&stk->head, head, x) != head); 36 | } 37 | 38 | node_t *lifo_aba_pop (lifo_t *stk) { 39 | node_t *head; 40 | do { 41 | head = VOLATILE_DEREF(stk).head; 42 | if (head == NULL) 43 | return NULL; 44 | } while (SYNC_CAS(&stk->head, head, head->next) != head); 45 | head->next = NULL; 46 | return head; 47 | } 48 | 49 | node_t *node_alloc (void) { 50 | node_t *node = (node_t *)nbd_malloc(sizeof(node_t)); 51 | memset(node, 0, sizeof(node_t)); 52 | return node; 53 | } 54 | 55 | void *worker (void *arg) { 56 | nbd_thread_init(); 57 | 58 | // Wait for all the worker threads to be ready. 59 | (void)__sync_fetch_and_add(&wait_, -1); 60 | do {} while (wait_); 61 | 62 | int i; 63 | for (i = 0; i < NUM_ITERATIONS; ++ i) { 64 | int n = nbd_rand(); 65 | if (n & 0x1) { 66 | lifo_aba_push(stk_, node_alloc()); 67 | } else { 68 | node_t *x = lifo_aba_pop(stk_); 69 | if (x) { 70 | rcu_defer_free(x); 71 | } 72 | } 73 | rcu_update(); 74 | } 75 | 76 | return NULL; 77 | } 78 | 79 | int main (int argc, char **argv) { 80 | nbd_thread_init(); 81 | lwt_set_trace_level("m3r3"); 82 | 83 | int num_threads = sysconf(_SC_NPROCESSORS_CONF); 84 | if (argc == 2) 85 | { 86 | errno = 0; 87 | num_threads = strtol(argv[1], NULL, 10); 88 | if (errno) { 89 | fprintf(stderr, "%s: Invalid argument for number of threads\n", argv[0]); 90 | return -1; 91 | } 92 | if (num_threads <= 0) { 93 | fprintf(stderr, "%s: Number of threads must be at least 1\n", argv[0]); 94 | return -1; 95 | } 96 | } 97 | 98 | stk_ = lifo_alloc(); 99 | wait_ = num_threads; 100 | 101 | struct timeval tv1, tv2; 102 | gettimeofday(&tv1, NULL); 103 | wait_ = num_threads; 104 | 105 | pthread_t thread[num_threads]; 106 | for (int i = 0; i < num_threads; ++i) { 107 | int rc = pthread_create(thread + i, NULL, worker, (void *)(size_t)i); 108 | if (rc != 0) { perror("pthread_create"); return rc; } 109 | } 110 | for (int i = 0; i < num_threads; ++i) { 111 | pthread_join(thread[i], NULL); 112 | } 113 | 114 | gettimeofday(&tv2, NULL); 115 | int ms = (int)(1000000*(tv2.tv_sec - tv1.tv_sec) + tv2.tv_usec - tv1.tv_usec) / 1000; 116 | printf("Th:%d Time:%dms\n\n", num_threads, ms); 117 | fflush(stdout); 118 | 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /test/txn_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "CuTest.h" 3 | 4 | #include "common.h" 5 | #include "runtime.h" 6 | #include "txn.h" 7 | #include "map.h" 8 | #include "hashtable.h" 9 | 10 | #define ASSERT_EQUAL(x, y) CuAssertIntEquals(tc, x, y) 11 | 12 | void test1 (CuTest* tc) { 13 | map_t *map = map_alloc(&MAP_IMPL_HT, NULL); 14 | txn_t *t1 = txn_begin(map); 15 | txn_t *t2 = txn_begin(map); 16 | map_key_t k1 = (map_key_t)1; 17 | txn_map_set(t1, k1, 2); 18 | txn_map_set(t1, k1, 3); 19 | ASSERT_EQUAL( DOES_NOT_EXIST, txn_map_get(t2, k1) ); 20 | txn_map_set(t2, k1, 4); 21 | ASSERT_EQUAL( 3, txn_map_get(t1, k1) ); 22 | ASSERT_EQUAL( 4, txn_map_get(t2, k1) ); 23 | ASSERT_EQUAL( TXN_VALIDATED, txn_commit(t2)); 24 | ASSERT_EQUAL( TXN_ABORTED, txn_commit(t1)); 25 | } 26 | 27 | int main (void) { 28 | nbd_thread_init(); 29 | lwt_set_trace_level("x3h3"); 30 | 31 | CuString *output = CuStringNew(); 32 | CuSuite* suite = CuSuiteNew(); 33 | SUITE_ADD_TEST(suite, test1); 34 | CuSuiteRun(suite); 35 | CuSuiteDetails(suite, output); 36 | printf("%s\n", output->buffer); 37 | 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /todo: -------------------------------------------------------------------------------- 1 | memory reclamation 2 | ------------------ 3 | - allow threads to dynamically enter and exit rcu's token passing ring 4 | - augment rcu with heartbeat manager to kill and recover from stalled threads 5 | - make rcu try yielding when its buffer gets full 6 | - use alternate memory reclamation schemes: hazard pointers and/or reference counting 7 | 8 | quality 9 | ------- 10 | - verify the memory management of keys in list, skiplist, and hashtable 11 | - transaction tests 12 | - validate function arguments in interface functions 13 | - document usage 14 | - document algorithms 15 | 16 | optimization 17 | ------------ 18 | - investigate 16 byte CAS; ht can store GUIDs inline instead of pointers to actual keys 19 | - txn write after write can just update the old update record instead of pushing a new one 20 | - use a shared scan for write-set validation in txn, similar to ht copy logic 21 | - experiment with the performance impact of not passing the hash between functions in ht 22 | - experiment with embedding the nstring keys in the list/skiplist nodes 23 | - lower skiplist's high_water when the actual number of levels in use drops 24 | - non-power-of 2 sized hashtables for improved memory usage 25 | - mem2 26 | 27 | features 28 | -------- 29 | - allow values of 0 to be inserted into maps (change DOES_NOT_EXIST to something other than 0) 30 | - read-committed type transactions 31 | - recycle free regions across size-classes and between threads 32 | -------------------------------------------------------------------------------- /txn/txn.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Josh Dybnis and released to the public domain, as explained at 3 | * http://creativecommons.org/licenses/publicdomain 4 | */ 5 | #include "common.h" 6 | #include "txn.h" 7 | #include "mem.h" 8 | #include "rcu.h" 9 | #include "lwt.h" 10 | #include "skiplist.h" 11 | 12 | #define UNDETERMINED_VERSION 0 13 | #define ABORTED_VERSION TAG_VALUE(0, TAG1) 14 | #define INITIAL_WRITES_SIZE 4 15 | #define PTR_TO_VAL(x) ((size_t)(x) >> 2) 16 | #define VAL_TO_PTR(x) ((update_t *)((x) << 2)) 17 | 18 | typedef struct update_rec update_t; 19 | typedef map_key_t version_t; 20 | 21 | struct update_rec { 22 | version_t version; // tagged versions are txn_t pointers, untagged are actual version numbers 23 | map_val_t value; 24 | map_val_t next; // an earlier update 25 | }; 26 | 27 | typedef struct write_rec { 28 | map_key_t key; 29 | update_t *rec; 30 | } write_rec_t; 31 | 32 | struct txn { 33 | version_t rv; 34 | version_t wv; 35 | map_t *map; 36 | write_rec_t *writes; 37 | size_t writes_size; 38 | size_t writes_count; 39 | size_t validate_scan; 40 | txn_state_e state; 41 | }; 42 | 43 | static txn_state_e txn_validate (txn_t *txn); 44 | 45 | static skiplist_t *active_ = NULL; 46 | 47 | static version_t version_ = 1; 48 | 49 | // Validate the updates for . Validation fails if there is a write-write conflict. That is if after our 50 | // read version another transaction committed a change to an entry we are also trying to change. 51 | // 52 | // If we encounter a potential conflict with a transaction that is in the process of validating, we help it 53 | // complete validating. It must be finished before we can decide to rollback or commit. 54 | // 55 | static txn_state_e validate_key (txn_t *txn, map_key_t key) { 56 | assert(txn->state != TXN_RUNNING); 57 | 58 | map_val_t val = map_get(txn->map, key); 59 | update_t *update = NULL; 60 | for (; val != DOES_NOT_EXIST; val = update->next) { 61 | 62 | // If the update or its version is not tagged it means the update is committed. 63 | // 64 | // We can stop at the first committed record we find that is at least as old as our read version. All 65 | // the other committed records following it will be older. And all the uncommitted records following it 66 | // will eventually conflict with it and abort. 67 | if (!IS_TAGGED(val, TAG2)) 68 | return TXN_VALIDATED; 69 | update = VAL_TO_PTR(val); 70 | if (!IS_TAGGED(update->version, TAG1)) 71 | return (update->version <= txn->rv) ? TXN_VALIDATED : TXN_ABORTED; 72 | 73 | // If the update's version is tagged then either the update was aborted or the the version number is 74 | // actually a pointer to a running transaction's txn_t. 75 | 76 | // Skip aborted transactions. 77 | if (EXPECT_FALSE(update->version == ABORTED_VERSION)) 78 | continue; 79 | 80 | // The update's transaction is still in progress. Access its txn_t. 81 | txn_t *writer = (txn_t *)VAL_TO_PTR(update->version); 82 | if (writer == txn) 83 | continue; // Skip our own updates. 84 | txn_state_e writer_state = writer->state; 85 | 86 | // Any running transaction will only be able to acquire a wv greater than ours. A transaction changes its 87 | // state to validating before aquiring a wv. We can ignore an unvalidated transaction if its version is 88 | // greater than ours. See the next comment below for the explination why. 89 | if (writer_state == TXN_RUNNING) 90 | continue; 91 | 92 | // If has a later version than us we can safely ignore its updates. It will not commit until 93 | // we have completed validation (in order to remain non-blocking it will help us validate if necessary). 94 | // This protocol ensures a deterministic resolution to every conflict and avoids infinite ping-ponging 95 | // between validating two conflicting transactions. 96 | if (writer_state == TXN_VALIDATING) { 97 | if (writer->wv > txn->wv) 98 | continue; 99 | // Help commit. We need to know if aborts or commits before we can decide what to 100 | // do. But we don't want to block, so we assist. 101 | writer_state = txn_validate(writer); 102 | } 103 | 104 | // Skip updates from aborted transactions. 105 | if (writer_state == TXN_ABORTED) 106 | continue; 107 | 108 | assert(writer_state == TXN_VALIDATED); 109 | return (writer->wv <= txn->rv) ? TXN_VALIDATED : TXN_ABORTED; 110 | } 111 | 112 | return TXN_VALIDATED; 113 | } 114 | 115 | static txn_state_e txn_validate (txn_t *txn) { 116 | assert(txn->state != TXN_RUNNING); 117 | switch (txn->state) { 118 | 119 | case TXN_VALIDATING: 120 | if (txn->wv == UNDETERMINED_VERSION) { 121 | version_t wv = SYNC_ADD(&version_, 1); 122 | (void)SYNC_CAS(&txn->wv, UNDETERMINED_VERSION, wv); 123 | } 124 | 125 | for (int i = 0; i < txn->writes_count; ++i) { 126 | txn_state_e s = validate_key(txn, txn->writes[i].key); 127 | if (s == TXN_ABORTED) { 128 | txn->state = TXN_ABORTED; 129 | break; 130 | } 131 | assert(s == TXN_VALIDATED); 132 | } 133 | if (txn->state == TXN_VALIDATING) { 134 | txn->state = TXN_VALIDATED; 135 | } 136 | break; 137 | 138 | case TXN_VALIDATED: 139 | case TXN_ABORTED: 140 | break; 141 | 142 | default: 143 | assert(FALSE); 144 | } 145 | 146 | return txn->state; 147 | } 148 | 149 | static update_t *alloc_update_rec (version_t ver, map_val_t val) { 150 | update_t *u = (update_t *)nbd_malloc(sizeof(update_t)); 151 | u->version = ver; 152 | u->value = val; 153 | u->next = DOES_NOT_EXIST; 154 | return u; 155 | } 156 | 157 | txn_t *txn_begin (map_t *map) { 158 | TRACE("x1", "txn_begin: map %p", map, 0); 159 | txn_t *txn = (txn_t *)nbd_malloc(sizeof(txn_t)); 160 | memset(txn, 0, sizeof(txn_t)); 161 | txn->wv = UNDETERMINED_VERSION; 162 | txn->state = TXN_RUNNING; 163 | txn->map = map; 164 | txn->writes = nbd_malloc(sizeof(*txn->writes) * INITIAL_WRITES_SIZE); 165 | txn->writes_size = INITIAL_WRITES_SIZE; 166 | if (EXPECT_FALSE(active_ == NULL)) { 167 | skiplist_t *a = sl_alloc(NULL); 168 | if (SYNC_CAS(&active_, NULL, a) != NULL) { 169 | sl_free(a); 170 | } 171 | } 172 | 173 | // acquire the read version for txn. must be careful to avoid a race 174 | do { 175 | txn->rv = version_; 176 | 177 | unsigned old_count; 178 | unsigned temp = 0; 179 | do { 180 | old_count = temp; 181 | temp = sl_cas(active_, txn->rv, old_count, old_count + 1); 182 | } while (temp != old_count); 183 | 184 | if (txn->rv == version_) 185 | break; 186 | 187 | temp = 1; 188 | do { 189 | old_count = temp; 190 | temp = sl_cas(active_, (map_key_t)txn->rv, old_count, old_count - 1); 191 | } while (temp != old_count); 192 | } while (1); 193 | 194 | TRACE("x1", "txn_begin: returning new transaction %p (read version %p)", txn, txn->rv); 195 | return txn; 196 | } 197 | 198 | void txn_abort (txn_t *txn) { 199 | if (txn->state != TXN_RUNNING) 200 | return; 201 | 202 | int i; 203 | for (i = 0; i < txn->writes_count; ++i) { 204 | update_t *update = (update_t *)txn->writes[i].rec; 205 | update->version = ABORTED_VERSION; 206 | } 207 | 208 | rcu_defer_free(txn->writes); 209 | rcu_defer_free(txn); 210 | } 211 | 212 | txn_state_e txn_commit (txn_t *txn) { 213 | if (txn->state != TXN_RUNNING) 214 | return txn->state; 215 | 216 | assert(txn->state == TXN_RUNNING); 217 | txn->state = TXN_VALIDATING; 218 | txn_state_e state = txn_validate(txn); 219 | 220 | // Detach from its updates. 221 | version_t wv = (txn->state == TXN_ABORTED) ? ABORTED_VERSION : txn->wv; 222 | int i; 223 | for (i = 0; i < txn->writes_count; ++i) { 224 | update_t *update = txn->writes[i].rec; 225 | update->version = wv; 226 | } 227 | 228 | // Lower the reference count for 's read version 229 | unsigned temp = 2; 230 | unsigned old_count; 231 | do { 232 | old_count = temp; 233 | temp = sl_cas(active_, (map_key_t)txn->rv, old_count, old_count - 1); 234 | if (temp == 1 && txn->rv != version_) { 235 | sl_remove(active_, (map_key_t)txn->rv); 236 | break; 237 | } 238 | } while (old_count != temp); 239 | 240 | rcu_defer_free(txn->writes); 241 | rcu_defer_free(txn); 242 | 243 | return state; 244 | } 245 | 246 | // Get most recent committed version prior to our read version. 247 | map_val_t txn_map_get (txn_t *txn, map_key_t key) { 248 | TRACE("x1", "txn_map_get: txn %p map %p", txn, txn->map); 249 | TRACE("x1", "txn_map_get: key %p", key, 0); 250 | 251 | if (txn->state != TXN_RUNNING) { 252 | TRACE("x1", "txn_map_get: error txn not running (state %p)", txn->state, 0); 253 | return ERROR_TXN_NOT_RUNNING; 254 | } 255 | 256 | // Iterate through the update records to find the latest committed version prior to our read version. 257 | map_val_t newest_val = map_get(txn->map, key); 258 | map_val_t val = newest_val; 259 | update_t *update; 260 | for ( ; (update = VAL_TO_PTR(val)) != NULL ; val = update->next) { 261 | 262 | // If TAG2 is set in it indicates that is an update record. Otherwise all the following are 263 | // true: is a literal value, it is older than any currently active transaction, and it is the most 264 | // recently set value for its key. Therefore it is visible to . 265 | if (!IS_TAGGED(val, TAG2)) { 266 | TRACE("x1", "txn_map_get: found untagged value; returning %p", val, 0); 267 | return val; 268 | } 269 | 270 | // If the update's version is not tagged it means the update is committed. 271 | if (!IS_TAGGED(update->version, TAG1)) { 272 | if (update->version <= txn->rv) { 273 | TRACE("x2", "txn_map_get: found committed update %p (version %p)", update, update->version); 274 | break; // success 275 | } 276 | TRACE("x2", "txn_map_get: skipping update %p (version %p)", update, update->version); 277 | continue; 278 | } 279 | 280 | // If the update's version is tagged then either the update was aborted or the the version number is 281 | // actually a pointer to a running transaction's txn_t. 282 | 283 | // Skip updates from aborted transactions. 284 | if (EXPECT_FALSE(update->version == ABORTED_VERSION)) { 285 | TRACE("x2", "txn_map_get: skipping aborted update %p", update, 0); 286 | continue; 287 | } 288 | 289 | // The update's transaction is still in progress. Access its txn_t. 290 | txn_t *writer = (txn_t *)VAL_TO_PTR(update->version); 291 | if (writer == txn) { 292 | TRACE("x2", "txn_map_get: found txn's own update %p", update, 0); 293 | break; // success 294 | } 295 | 296 | txn_state_e writer_state = writer->state; 297 | if (writer_state == TXN_RUNNING) { 298 | TRACE("x2", "txn_map_get: skipping update %p of in-progress transaction %p", update, writer); 299 | continue; 300 | } 301 | 302 | if (writer_state == TXN_VALIDATING) { 303 | TRACE("x2", "txn_map_get: update %p transaction %p validating", update, writer); 304 | if (writer->wv > txn->rv) 305 | continue; 306 | writer_state = txn_validate(writer); 307 | } 308 | 309 | // Skip updates from aborted transactions. 310 | if (writer_state == TXN_ABORTED) { 311 | TRACE("x2", "txn_map_get: skipping aborted update %p", update, 0); 312 | continue; 313 | } 314 | 315 | assert(writer_state == TXN_VALIDATED); 316 | if (writer->wv > txn->rv) { 317 | TRACE("x2", "txn_map_get: skipping update %p (version %p)", update, update->version); 318 | continue; 319 | } 320 | break; // success 321 | } 322 | 323 | if (update == NULL) { 324 | TRACE("x1", "txn_map_get: key does not exist in map", key, 0); 325 | return DOES_NOT_EXIST; 326 | } 327 | 328 | map_val_t value = update->value; 329 | TRACE("x1", "txn_map_get: key found returning value %p", value, 0); 330 | 331 | // collect some garbage 332 | version_t min_active_version = UNDETERMINED_VERSION; 333 | update_t *next_update = NULL; 334 | if (IS_TAGGED(update->next, TAG2)) { 335 | next_update = VAL_TO_PTR(update->next); 336 | 337 | // If (and all update records following it [execpt if it is aborted]) is old enough 338 | // that it is not visible to any active transaction we can safely free it. 339 | min_active_version = (version_t)sl_min_key(active_); 340 | if (next_update->version < min_active_version) { 341 | 342 | // If the is aborted, skip over it to look for more recent ones that may follow 343 | update_t *temp = next_update; 344 | while (temp->version == ABORTED_VERSION) { 345 | assert(!IS_TAGGED(temp->version, TAG1)); 346 | map_val_t next = temp->next; 347 | if (!IS_TAGGED(next, TAG2)) 348 | break; 349 | 350 | // Bail out of garbage collection if we find a record that might still be accessed by an 351 | // ongoing transaction. 352 | if (VAL_TO_PTR(next)->version >= min_active_version) 353 | return value; 354 | 355 | temp = VAL_TO_PTR(next); 356 | } 357 | 358 | // free the next update record and all the ones following it 359 | temp = next_update; 360 | map_val_t next; 361 | do { 362 | next = SYNC_SWAP(&temp->next, DOES_NOT_EXIST); 363 | 364 | // if we find ourself in a race just back off and let the other thread take care of it 365 | if (next == DOES_NOT_EXIST) 366 | return value; 367 | 368 | nbd_free(temp); 369 | 370 | temp = VAL_TO_PTR(next); 371 | 372 | } while (IS_TAGGED(next, TAG2)); 373 | } 374 | } 375 | 376 | // If there is one item left and it is visible by all active transactions we can merge it into the map itself. 377 | // There is no need for an update record. 378 | if (next_update == NULL && val == newest_val) { 379 | if (min_active_version == UNDETERMINED_VERSION) { 380 | min_active_version = (version_t)sl_min_key(active_); 381 | } 382 | if (update->version <= min_active_version) { 383 | if (map_cas(txn->map, key, TAG_VALUE(val, TAG2), value) == TAG_VALUE(val, TAG2)) { 384 | rcu_defer_free(update); 385 | } 386 | } 387 | } 388 | 389 | return value; 390 | } 391 | 392 | void txn_map_set (txn_t *txn, map_key_t key, map_val_t value) { 393 | TRACE("x1", "txn_map_set: txn %p map %p", txn, txn->map); 394 | TRACE("x1", "txn_map_set: key %p value %p", key, value); 395 | assert(!IS_TAGGED(value, TAG1) && !IS_TAGGED(value, TAG2)); 396 | 397 | if (txn->state != TXN_RUNNING) { 398 | TRACE("x1", "txn_map_set: error txn not running (state %p)", txn->state, 0); 399 | return; 400 | } 401 | 402 | // create a new update record 403 | version_t ver = TAG_VALUE(PTR_TO_VAL(txn), TAG1); // tagged versions are txn_t pointers 404 | update_t *update = alloc_update_rec(ver, value); 405 | 406 | // push the new update record onto 's update list 407 | map_val_t old_update = map_get(txn->map, key); 408 | TRACE("x2", "txn_map_set: old update %p new update record %p", old_update, update); 409 | do { 410 | update->next = old_update; 411 | map_val_t temp = map_cas(txn->map, key, old_update, TAG_VALUE(PTR_TO_VAL(update), TAG2)); 412 | if (temp == old_update) 413 | break; 414 | 415 | TRACE("x1", "txn_map_set: cas failed; found %p expected %p", temp, old_update); 416 | old_update = temp; 417 | } while (1); 418 | 419 | // add to the write set for commit-time validation 420 | if (txn->writes_count == txn->writes_size) { 421 | write_rec_t *w = nbd_malloc(sizeof(write_rec_t) * txn->writes_size * 2); 422 | memcpy(w, txn->writes, txn->writes_size * sizeof(write_rec_t)); 423 | txn->writes_size *= 2; 424 | nbd_free(txn->writes); 425 | txn->writes = w; 426 | } 427 | int i = txn->writes_count++; 428 | txn->writes[i].key = key; 429 | txn->writes[i].rec = update; 430 | } 431 | --------------------------------------------------------------------------------