├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── bmarks ├── .gitignore ├── atomic_bench.c ├── bank_one.c ├── bank_simple.c ├── bank_th.c ├── htlock_test.c ├── individual_ops.c ├── measure_contention.c ├── stress_latency.c ├── stress_one.c ├── stress_test.c ├── test_array_alloc.c ├── test_correctness.c ├── test_trylock.c └── uncontended.c ├── include ├── .gitignore ├── alock.h ├── atomic_ops.h ├── clh.h ├── gl_lock.h ├── hclh.h ├── htlock.h ├── lock_if.h ├── mcs.h ├── platform_defs.h ├── rw_ttas.h ├── spinlock.h ├── ticket.h ├── ttas.h └── utils.h ├── samples ├── sample_generic.c └── sample_mcs.c ├── scripts ├── correctness.sh ├── correctness_array.sh ├── correctness_trylock.sh ├── make_all_versions.sh ├── run_on_cores.sh ├── run_on_range.sh └── test.sh └── src ├── alock.c ├── clh.c ├── gl_lock.c ├── hclh.c ├── htlock.c ├── mcs.c ├── rw_ttas.c ├── spinlock.c ├── ticket.c └── ttas.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *~ 3 | /#README.md# 4 | /atomic_bench 5 | /atomic_latency 6 | /atomic_success 7 | /atomic_test 8 | /bank 9 | /bank_arrays 10 | /bank_clhs 11 | /bank_hclhs 12 | /bank_htickets 13 | /bank_mcss 14 | /bank_mutexs 15 | /bank_one 16 | /bank_one_arrays 17 | /bank_one_clhs 18 | /bank_one_hclhs 19 | /bank_one_htickets 20 | /bank_one_mcss 21 | /bank_one_mutexs 22 | /bank_one_rws 23 | /bank_one_spinlocks 24 | /bank_one_tickets 25 | /bank_one_ttass 26 | /bank_rws 27 | /bank_simple 28 | /bank_simple_arrays 29 | /bank_simple_clhs 30 | /bank_simple_hclhs 31 | /bank_simple_htickets 32 | /bank_simple_mcss 33 | /bank_simple_mutexs 34 | /bank_simple_rws 35 | /bank_simple_spinlocks 36 | /bank_simple_tickets 37 | /bank_simple_ttass 38 | /bank_spinlocks 39 | /bank_tickets 40 | /bank_ttass 41 | /htlock_test 42 | /individual_ops 43 | /libsync.a 44 | /measure_contention 45 | /stress_latency 46 | /stress_latency_arrays 47 | /stress_latency_clhs 48 | /stress_latency_hclhs 49 | /stress_latency_htickets 50 | /stress_latency_mcss 51 | /stress_latency_mutexs 52 | /stress_latency_rws 53 | /stress_latency_spinlocks 54 | /stress_latency_tickets 55 | /stress_latency_ttass 56 | /stress_one 57 | /stress_one_arrays 58 | /stress_one_clhs 59 | /stress_one_hclhs 60 | /stress_one_htickets 61 | /stress_one_mcss 62 | /stress_one_mutexs 63 | /stress_one_rws 64 | /stress_one_spinlocks 65 | /stress_one_tickets 66 | /stress_one_ttass 67 | /stress_test 68 | /stress_test_arrays 69 | /stress_test_clhs 70 | /stress_test_hclhs 71 | /stress_test_htickets 72 | /stress_test_mcss 73 | /stress_test_mutexs 74 | /stress_test_rws 75 | /stress_test_spinlocks 76 | /stress_test_tickets 77 | /stress_test_ttass 78 | /test_correctness 79 | /test_correctness_arrays 80 | /test_correctness_clhs 81 | /test_correctness_hclhs 82 | /test_correctness_htickets 83 | /test_correctness_mcss 84 | /test_correctness_mutexs 85 | /test_correctness_rws 86 | /test_correctness_spinlocks 87 | /test_correctness_tickets 88 | /test_correctness_ttass 89 | /trylock_test 90 | /uncontended 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Tudor Alexandru David 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PLATFORM_NUMA=1 2 | 3 | ifeq ($(DEBUG),1) 4 | DEBUG_FLAGS=-Wall -ggdb -DDEBUG 5 | COMPILE_FLAGS=-O0 -DADD_PADDING -fno-inline 6 | else 7 | DEBUG_FLAGS=-Wall 8 | COMPILE_FLAGS=-O3 -DADD_PADDING 9 | #COMPILE_FLAGS=-O3 -DADD_PADDING -DALTERNATE_CORES 10 | endif 11 | 12 | ifndef PLATFORM 13 | # PLATFORM=-DSPARC 14 | # PLATFORM=-DTILERA 15 | # PLATFORM=-DXEON 16 | # PLATFORM=-DOPTERON 17 | PLATFORM=-DDEFAULT 18 | endif 19 | 20 | ifeq ($(PLATFORM), -DDEFAULT) 21 | CORE_NUM := $(shell nproc) 22 | ifneq ($(CORE_SPEED_KHz), ) 23 | COMPILE_FLAGS += -DCORE_NUM=${CORE_NUM} 24 | else 25 | COMPILE_FLAGS += -DCORE_NUM=8 26 | endif 27 | $(info ********************************** Using as a default number of cores: $(CORE_NUM) on 1 socket) 28 | $(info ********************************** Is this correct? If not, fix it in platform_defs.h) 29 | endif 30 | 31 | ifeq ($(PLATFORM), -DOPTERON) #allow OPTERON_OPTIMIZE only for OPTERON platform 32 | OPTIMIZE=-DOPTERON_OPTIMIZE 33 | else 34 | OPTIMIZE= 35 | endif 36 | 37 | COMPILE_FLAGS += $(PLATFORM) 38 | COMPILE_FLAGS += $(OPTIMIZE) 39 | 40 | UNAME := $(shell uname) 41 | 42 | ifeq ($(PLATFORM),-DTILERA) 43 | GCC:=tile-gcc 44 | LIBS:=-lrt -lpthread -ltmc 45 | else 46 | ifeq ($(UNAME), Linux) 47 | GCC:=gcc 48 | LIBS := -lrt -lpthread -lnuma 49 | endif 50 | endif 51 | ifeq ($(UNAME), SunOS) 52 | GCC:=/opt/csw/bin/gcc 53 | LIBS := -lrt -lpthread 54 | COMPILE_FLAGS+= -m64 -mcpu=v9 -mtune=v9 55 | endif 56 | 57 | ifndef LOCK_VERSION 58 | # LOCK_VERSION=-DUSE_HCLH_LOCKS 59 | # LOCK_VERSION=-DUSE_TTAS_LOCKS 60 | LOCK_VERSION=-DUSE_SPINLOCK_LOCKS 61 | # LOCK_VERSION=-DUSE_MCS_LOCKS 62 | # LOCK_VERSION=-DUSE_ARRAY_LOCKS 63 | # LOCK_VERSION=-DUSE_RW_LOCKS 64 | # LOCK_VERSION=-DUSE_CLH_LOCKS 65 | # LOCK_VERSION=-DUSE_TICKET_LOCKS 66 | # LOCK_VERSION=-DUSE_MUTEX_LOCKS 67 | # LOCK_VERSION=-DUSE_HTICKET_LOCKS 68 | endif 69 | 70 | ifndef PRIMITIVE 71 | PRIMITIVE=-DTEST_CAS 72 | endif 73 | #ACCOUNT_PADDING=-DPAD_ACCOUNTS 74 | 75 | TOP := $(patsubst %/,%,$(dir $(lastword $(MAKEFILE_LIST)))) 76 | 77 | SRCPATH := $(TOP)/src 78 | MAININCLUDE := $(TOP)/include 79 | 80 | INCLUDES := -I$(MAININCLUDE) 81 | OBJ_FILES := mcs.o clh.o ttas.o spinlock.o rw_ttas.o ticket.o alock.o hclh.o gl_lock.o htlock.o 82 | 83 | 84 | all: bank bank_one bank_simple test_array_alloc test_trylock sample_generic sample_mcs test_correctness stress_one stress_test stress_latency atomic_bench individual_ops uncontended htlock_test measure_contention libsync.a 85 | @echo "############### Used: " $(LOCK_VERSION) " on " $(PLATFORM) " with " $(OPTIMIZE) 86 | 87 | libsync.a: ttas.o rw_ttas.o ticket.o clh.o mcs.o hclh.o alock.o htlock.o include/atomic_ops.h include/utils.h include/lock_if.h 88 | ar -r libsync.a ttas.o rw_ttas.o ticket.o clh.o mcs.o alock.o hclh.o htlock.o spinlock.o include/atomic_ops.h include/utils.h 89 | 90 | ttas.o: src/ttas.c 91 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/ttas.c $(LIBS) 92 | 93 | spinlock.o: src/spinlock.c 94 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/spinlock.c $(LIBS) 95 | 96 | rw_ttas.o: src/rw_ttas.c 97 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/rw_ttas.c $(LIBS) 98 | 99 | ticket.o: src/ticket.c 100 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/ticket.c $(LIBS) 101 | 102 | ticket_contention.o: src/ticket.c 103 | $(GCC) -D_GNU_SOURCE -DMEASURE_CONTENTION $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/ticket.c -o ticket_contention.o $(LIBS) 104 | 105 | gl_lock.o: src/gl_lock.c 106 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/gl_lock.c $(LIBS) 107 | 108 | mcs.o: src/mcs.c 109 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/mcs.c $(LIBS) 110 | 111 | clh.o: src/clh.c 112 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/clh.c $(LIBS) 113 | 114 | hclh.o: src/hclh.c 115 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/hclh.c $(LIBS) 116 | 117 | alock.o: src/alock.c 118 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/alock.c $(LIBS) 119 | 120 | htlock.o: src/htlock.c include/htlock.h 121 | $(GCC) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) -c src/htlock.c $(LIBS) 122 | bank: bmarks/bank_th.c $(OBJ_FILES) Makefile 123 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(ACCOUNT_PADDING) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/bank_th.c -o bank $(LIBS) 124 | 125 | bank_one: bmarks/bank_one.c $(OBJ_FILES) Makefile 126 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(ACCOUNT_PADDING) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/bank_one.c -o bank_one $(LIBS) 127 | 128 | 129 | bank_simple: bmarks/bank_simple.c $(OBJ_FILES) Makefile 130 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/bank_simple.c -o bank_simple $(LIBS) 131 | 132 | stress_test: bmarks/stress_test.c $(OBJ_FILES) Makefile 133 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/stress_test.c -o stress_test $(LIBS) 134 | 135 | measure_contention: bmarks/measure_contention.c $(OBJ_FILES) ticket_contention.o Makefile 136 | $(GCC) -DUSE_TICKET_LOCKS $(ALTERNATE_SOCKETS) $(NO_DELAYS) -DMEASURE_CONTENTION -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) ticket_contention.o bmarks/measure_contention.c -o measure_contention $(LIBS) 137 | 138 | stress_one: bmarks/stress_one.c $(OBJ_FILES) Makefile 139 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/stress_one.c -o stress_one $(LIBS) 140 | 141 | test_correctness: bmarks/test_correctness.c $(OBJ_FILES) Makefile 142 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/test_correctness.c -o test_correctness $(LIBS) 143 | 144 | sample_generic: samples/sample_generic.c $(OBJ_FILES) Makefile 145 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) samples/sample_generic.c -o sample_generic $(LIBS) 146 | 147 | sample_mcs: samples/sample_mcs.c $(OBJ_FILES) Makefile 148 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) samples/sample_mcs.c -o sample_mcs $(LIBS) 149 | 150 | 151 | test_trylock: bmarks/test_trylock.c $(OBJ_FILES) Makefile 152 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/test_trylock.c -o test_trylock $(LIBS) 153 | 154 | 155 | test_array_alloc: bmarks/test_array_alloc.c $(OBJ_FILES) Makefile 156 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/test_array_alloc.c -o test_array_alloc $(LIBS) 157 | 158 | 159 | stress_latency: bmarks/stress_latency.c $(OBJ_FILES) Makefile 160 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/stress_latency.c -o stress_latency $(LIBS) 161 | 162 | individual_ops: bmarks/individual_ops.c $(OBJ_FILES) Makefile 163 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/individual_ops.c -o individual_ops $(LIBS) 164 | 165 | uncontended: bmarks/uncontended.c $(OBJ_FILES) Makefile 166 | $(GCC) $(LOCK_VERSION) $(ALTERNATE_SOCKETS) $(NO_DELAYS) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) $(OBJ_FILES) bmarks/uncontended.c -o uncontended $(LIBS) 167 | 168 | atomic_bench: bmarks/atomic_bench.c Makefile 169 | $(GCC) $(ALTERNATE_SOCKETS) $(PRIMITIVE) -D_GNU_SOURCE $(COMPILE_FLAGS) $(DEBUG_FLAGS) $(INCLUDES) bmarks/atomic_bench.c -o atomic_bench $(LIBS) 170 | 171 | htlock_test: htlock.o bmarks/htlock_test.c Makefile 172 | $(GCC) -O0 -D_GNU_SOURCE $(COMPILE_FLAGS) $(PLATFORM) $(DEBUG_FLAGS) $(INCLUDES) bmarks/htlock_test.c -o htlock_test htlock.o $(LIBS) 173 | 174 | clean: 175 | rm -f *.o locks mcs_test hclh_test bank_one bank_simple bank* stress_latency* test_array_alloc test_trylock sample_generic test_correctness stress_one stress_test* atomic_bench uncontended individual_ops trylock_test htlock_test measure_contention libsync.a 176 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Libslock 2 | ======= 3 | 4 | This repository provides: 5 | - libslock, a cross-platform interface to atomic operations and other common operations 6 | - implementations of a number of well-known locking algorithms 7 | - benchmarks testing the performance of atomic operations and various locking algorithms 8 | 9 | The package has been tested on x86_64 Intel and AMD machines, Tilera and Sparc architectures. 10 | 11 | A version of this code was used in the paper **"Everything you always wanted to know about synchronization but were afraid to ask"** (accessible here: http://dl.acm.org/citation.cfm?doid=2517349.2522714). 12 | 13 | Makefile parameters: 14 | 15 | Locking Algorithm 16 | ----------------- 17 | Can be passed using `LOCK_VERSION` to the Makefile. `LOCK_VERSION` can take one of the following values: 18 | 19 | - `USE_TTAS_LOCKS` - use test-and-test-and-set locks 20 | - `USE_SPINLOCK_LOCKS` - use test-and-set spinlocks 21 | - `USE_TICKET_LOCKS` - use ticket locks 22 | - `USE_HTICKET_LOCKS` - use hierarchical ticket locks 23 | - `USE_MCS_LOCKS` - use MCS locks 24 | - `USE_CLH_LOCKS` - use CLH locks 25 | - `USE_HCLH_LOCKS` - use HCLH locks 26 | - `USE_ARRAY_LOCKS` - use array locks 27 | - `USE_RW_LOCKS` - use read-write locks (not used in paper, not optimized) 28 | - `USE_MUTEX_LOCKS` - use the phtread mutex 29 | 30 | 31 | Platform 32 | -------- 33 | Can be passed using `PLATFORM` to the Makefile; the settings are specific to the platforms we were using (topology, etc.); for other platforms the characteristics can be defined in `platform_defs.h`. The pre-defined platforms are: 34 | 35 | - `XEON` - 8 x 10-core Intel sever 36 | - `OPTERON` - 8 x 6-core AMD server 37 | - `NIGARA` - 8-core SparcT2 machine 38 | - `TILERA` - 36-core Tilera machine 39 | 40 | Detailed descriptions of these platforms can be found in the paper. 41 | 42 | The `OPTERON_OPTIMIZE` option uses some of the Opteron-specific optimizations mentioned in the paper. 43 | Atomic operation to be tested 44 | ----------------------------- 45 | For the benchmarks testing atomic operations, this parameter selects the desired operation. Can be passed to the Makefile using `PRIMITIVE`: 46 | 47 | - `TEST_FAI` - fetch-and-increment 48 | - `TEST_TAS` - test-and-set 49 | - `TEST_CAS` - compare-and-swap 50 | - `TEST_SWAP` - atomic swap 51 | - `TEST_CAS_FAI` - fetch-and-increment implemented using compare-and-swap 52 | 53 | `ALTERNATE_SOCKETS` is used for thread placement on the Niagara; if not set, hardware threads begin by being assinged to the same core; if set threads are disitributed evenly among the cores 54 | -------------------------------------------------------------------------------- /bmarks/.gitignore: -------------------------------------------------------------------------------- 1 | /#stress_test.c# 2 | /.#stress_test.c 3 | /cscope.out 4 | -------------------------------------------------------------------------------- /bmarks/htlock_test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: htlock_test.c 3 | * Author: Vasileios Trigonakis 4 | * 5 | * Description: tests the hierarchical ticket lock (both normal 6 | * lock function and the try_lock) 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #ifndef __sparc__ 19 | #include 20 | #endif 21 | #include "utils.h" 22 | #include "htlock.h" 23 | 24 | #define XSTR(s) #s 25 | #define ALIGNMENT 26 | 27 | 28 | #define DEFAULT_NUM_ENTRIES 1024 29 | #define DEFAULT_NUM_THREADS 1 30 | #define DEFAULT_DURATION 2000 31 | #define DEFAULT_SEED 0 32 | #define DEFAULT_CORE_TRY_LOCK 8 33 | 34 | int num_entries; 35 | int num_threads; 36 | int duration; 37 | int num_cores_try_lock; 38 | 39 | __thread uint32_t phys_id; 40 | __thread uint32_t cluster_id; 41 | static volatile int stop; 42 | 43 | typedef struct barrier 44 | { 45 | pthread_cond_t complete; 46 | pthread_mutex_t mutex; 47 | int count; 48 | int crossing; 49 | } barrier_t; 50 | 51 | void barrier_init(barrier_t *b, int n) 52 | { 53 | pthread_cond_init(&b->complete, NULL); 54 | pthread_mutex_init(&b->mutex, NULL); 55 | b->count = n; 56 | b->crossing = 0; 57 | } 58 | 59 | void barrier_cross(barrier_t *b) 60 | { 61 | pthread_mutex_lock(&b->mutex); 62 | /* One more thread through */ 63 | b->crossing++; 64 | /* If not all here, wait */ 65 | if (b->crossing < b->count) { 66 | pthread_cond_wait(&b->complete, &b->mutex); 67 | } else { 68 | pthread_cond_broadcast(&b->complete); 69 | /* Reset for next time */ 70 | b->crossing = 0; 71 | } 72 | pthread_mutex_unlock(&b->mutex); 73 | } 74 | 75 | typedef struct thread_data 76 | { 77 | union 78 | { 79 | struct { 80 | barrier_t *barrier; 81 | unsigned long num_operations; 82 | unsigned int seed; 83 | int id; 84 | htlock_t* locks; 85 | }; 86 | char padding[CACHE_LINE_SIZE]; 87 | }; 88 | } thread_data_t; 89 | 90 | uint32_t steps = 0; 91 | 92 | uint64_t atomic_counter[8] = {0}; 93 | 94 | void* 95 | test(void *data) 96 | { 97 | thread_data_t *d = (thread_data_t *)data; 98 | //#ifdef __sparc__ 99 | phys_id = the_cores[d->id]; 100 | cluster_id=get_cluster(phys_id); 101 | //#else 102 | // phys_id = d->id; 103 | //#endif 104 | 105 | init_thread_htlocks(phys_id); 106 | htlock_t* htls = d->locks; 107 | 108 | /* Init of local data if necessary */ 109 | /* Wait on barrier */ 110 | barrier_cross(d->barrier); 111 | 112 | uint8_t success = 1; 113 | 114 | while (stop == 0) 115 | { 116 | 117 | if (d->id < num_cores_try_lock) 118 | { 119 | success = htlock_trylock(htls); 120 | } 121 | else 122 | { 123 | htlock_lock(htls); 124 | } 125 | 126 | if (success) 127 | { 128 | asm volatile("NOP"); 129 | atomic_counter[0]++; 130 | asm volatile("NOP"); 131 | if (d->id < num_cores_try_lock) 132 | { 133 | htlock_release_try(htls); 134 | } 135 | else 136 | { 137 | htlock_release(htls); 138 | } 139 | d->num_operations++; 140 | } 141 | } 142 | 143 | if (d->id < num_cores_try_lock) 144 | { 145 | printf("[%02d] try_lock completed: %lu\n", d->id, d->num_operations); 146 | } 147 | 148 | /* Free any local data if necessary */ 149 | return NULL; 150 | } 151 | 152 | 153 | void catcher(int sig) 154 | { 155 | static int nb = 0; 156 | printf("CAUGHT SIGNAL %d\n", sig); 157 | if (++nb >= 3) 158 | exit(1); 159 | } 160 | 161 | 162 | int 163 | main(int argc, char* const argv[]) 164 | { 165 | set_cpu(the_cores[0]); 166 | struct option long_options[] = 167 | { 168 | // These options don't set a flag 169 | {"help", no_argument, NULL, 'h'}, 170 | {"entries", required_argument, NULL, 'e'}, 171 | {"duration", required_argument, NULL, 'd'}, 172 | {"num-threads", required_argument, NULL, 'n'}, 173 | {"try-lock", required_argument, NULL, 't'}, 174 | {NULL, 0, NULL, 0} 175 | }; 176 | 177 | #ifdef PRINT_OUTPUT 178 | printf("sizeof(htlock_global_t) = %lu\n", sizeof(htlock_global_t)); 179 | printf("sizeof(htlock_local_t) = %lu\n", sizeof(htlock_local_t)); 180 | printf("sizeof(htlock_t) = %lu\n", sizeof(htlock_t)); 181 | #endif 182 | int i, c; 183 | thread_data_t *data; 184 | pthread_t *threads; 185 | pthread_attr_t attr; 186 | barrier_t barrier; 187 | struct timeval start, end; 188 | struct timespec timeout; 189 | 190 | num_entries = DEFAULT_NUM_ENTRIES; 191 | num_threads = DEFAULT_NUM_THREADS; 192 | num_cores_try_lock = DEFAULT_CORE_TRY_LOCK; 193 | duration = DEFAULT_DURATION; 194 | sigset_t block_set; 195 | 196 | while(1) 197 | { 198 | i = 0; 199 | c = getopt_long(argc, argv, "he:d:n:t:", long_options, &i); 200 | 201 | if(c == -1) 202 | break; 203 | 204 | if(c == 0 && long_options[i].flag == 0) 205 | c = long_options[i].val; 206 | 207 | switch(c) { 208 | case 0: 209 | /* Flag is automatically set */ 210 | break; 211 | case 'h': 212 | printf("htlock test\n" 213 | "\n" 214 | "Usage:\n" 215 | " htlock_test [options...]\n" 216 | "\n" 217 | "Options:\n" 218 | " -h, --help\n" 219 | " Print this message\n" 220 | " -e, --entries \n" 221 | " Number of entries in the test (default=" XSTR(DEFAULT_NUM_LOCKS) ")\n" 222 | " -d, --duration \n" 223 | " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" 224 | " -n, --num-threads \n" 225 | " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" 226 | " -t, --try-lock \n" 227 | " Number of cores working with try-locks (" XSTR(DEFAULT_CORE_TRY_LOCK)")\n" 228 | ); 229 | exit(0); 230 | case 'e': 231 | num_entries = atoi(optarg); 232 | break; 233 | case 'd': 234 | duration = atoi(optarg); 235 | break; 236 | case 'n': 237 | num_threads = atoi(optarg); 238 | break; 239 | case 't': 240 | num_cores_try_lock = atoi(optarg); 241 | break; 242 | case '?': 243 | printf("Use -h or --help for help\n"); 244 | exit(0); 245 | default: 246 | exit(1); 247 | } 248 | } 249 | 250 | assert(duration >= 0); 251 | assert(num_entries >= 1); 252 | assert(num_threads > 0); 253 | 254 | #ifdef PRINT_OUTPUT 255 | printf("Number of entries : %d\n", num_entries); 256 | printf("Duration : %d\n", duration); 257 | printf("Number of threads : %d\n", num_threads); 258 | printf("Number of trylock threads : %d\n", num_cores_try_lock); 259 | printf("Type sizes : int=%d/long=%d/ptr=%d\n", 260 | (int)sizeof(int), 261 | (int)sizeof(long), 262 | (int)sizeof(void *)); 263 | #endif 264 | timeout.tv_sec = duration / 1000; 265 | timeout.tv_nsec = (duration % 1000) * 1000000; 266 | 267 | 268 | /* the_data = (data_t*) malloc(num_entries * sizeof(data_t)); */ 269 | /* for (i = 0; i < num_entries; i++) */ 270 | /* { */ 271 | /* the_data[i].data = 0; */ 272 | /* } */ 273 | 274 | if ((data = (thread_data_t *) malloc(num_threads * sizeof(thread_data_t))) == NULL) 275 | { 276 | perror("malloc"); 277 | exit(1); 278 | } 279 | 280 | if ((threads = (pthread_t *) malloc(num_threads * sizeof(pthread_t))) == NULL) 281 | { 282 | perror("malloc"); 283 | exit(1); 284 | } 285 | 286 | srand((int)time(NULL)); 287 | 288 | stop = 0; 289 | /* Access set from all threads */ 290 | barrier_init(&barrier, num_threads + 1); 291 | pthread_attr_init(&attr); 292 | 293 | 294 | /* initialize the locks */ 295 | 296 | htlock_t* htls = init_htlocks(1); 297 | 298 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 299 | #ifdef PRINT_OUTPUT 300 | printf("Creating threads: "); 301 | #endif 302 | for (i = 0; i < num_threads; i++) 303 | { 304 | #ifdef PRINT_OUTPUT 305 | printf("%d, ", i); 306 | #endif 307 | data[i].id = i; 308 | data[i].num_operations = 0; 309 | data[i].seed = rand(); 310 | data[i].barrier = &barrier; 311 | data[i].locks = htls; 312 | if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) 313 | { 314 | fprintf(stderr, "Error creating thread\n"); 315 | exit(1); 316 | } 317 | } 318 | #ifdef PRINT_OUTPUT 319 | printf("\n"); 320 | #endif 321 | pthread_attr_destroy(&attr); 322 | 323 | /* Catch some signals */ 324 | if (signal(SIGHUP, catcher) == SIG_ERR || 325 | signal(SIGINT, catcher) == SIG_ERR || 326 | signal(SIGTERM, catcher) == SIG_ERR) 327 | { 328 | perror("signal"); 329 | exit(1); 330 | } 331 | 332 | /* Start threads */ 333 | barrier_cross(&barrier); 334 | 335 | #ifdef PRINT_OUTPUT 336 | printf("STARTING...\n"); 337 | #endif 338 | gettimeofday(&start, NULL); 339 | if (duration > 0) 340 | { 341 | nanosleep(&timeout, NULL); 342 | } else 343 | { 344 | sigemptyset(&block_set); 345 | sigsuspend(&block_set); 346 | } 347 | stop = 1; 348 | gettimeofday(&end, NULL); 349 | #ifdef PRINT_OUTPUT 350 | printf("STOPPING...\n"); 351 | #endif 352 | 353 | /* Wait for thread completion */ 354 | for (i = 0; i < num_threads; i++) 355 | { 356 | if (pthread_join(threads[i], NULL) != 0) 357 | { 358 | fprintf(stderr, "Error waiting for thread completion\n"); 359 | exit(1); 360 | } 361 | } 362 | 363 | duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); 364 | 365 | unsigned long operations = 0; 366 | for (i = 0; i < num_threads; i++) 367 | { 368 | #ifdef PRINT_OUTPUT 369 | printf("Thread %d\n", i); 370 | printf(" #operations : %lu\n", data[i].num_operations); 371 | #endif 372 | operations += data[i].num_operations; 373 | } 374 | printf("Duration : %d (ms)\n", duration); 375 | printf("#operations : %lu (%f / s)\n", operations, operations * 1000.0 / duration); 376 | printf("atomic count val: %llu\n", (long long unsigned) atomic_counter[0]); 377 | 378 | 379 | 380 | /* free(the_data); */ 381 | free(threads); 382 | free(data); 383 | 384 | return 0; 385 | } 386 | -------------------------------------------------------------------------------- /bmarks/individual_ops.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifndef __sparc__ 12 | #include 13 | #endif 14 | #include "gl_lock.h" 15 | #include "atomic_ops.h" 16 | #include "utils.h" 17 | #include "lock_if.h" 18 | 19 | #define XSTR(s) #s 20 | 21 | //number of concurres threads 22 | #define DEFAULT_NUM_THREADS 1 23 | //total number of locks 24 | #define DEFAULT_NUM_LOCKS 2 25 | //number of lock acquisitions in this test 26 | #define DEFAULT_NUM_ACQ 10000 27 | //delay between consecutive acquire attempts in cycles 28 | #define DEFAULT_ACQ_DELAY 100 29 | //delay between lock acquire and release in cycles 30 | #define DEFAULT_ACQ_DURATION 10 31 | //the total duration of a test 32 | #define DEFAULT_DURATION 10000 33 | 34 | static volatile int stop; 35 | 36 | __thread uint32_t phys_id; 37 | __thread uint32_t cluster_id; 38 | 39 | volatile uint32_t tail; 40 | volatile uint32_t head; 41 | 42 | volatile global_data the_locks; 43 | __attribute__((aligned(CACHE_LINE_SIZE))) volatile local_data * local_th_data; 44 | 45 | typedef struct shared_data{ 46 | char the_data[64]; 47 | } shared_data; 48 | 49 | __attribute__((aligned(CACHE_LINE_SIZE))) volatile shared_data * some_data; 50 | int duration; 51 | int num_locks; 52 | int num_threads; 53 | int acq_duration; 54 | int acq_delay; 55 | 56 | ticks correction; 57 | typedef struct barrier { 58 | pthread_cond_t complete; 59 | pthread_mutex_t mutex; 60 | int count; 61 | int crossing; 62 | } barrier_t; 63 | 64 | void barrier_init(barrier_t *b, int n) 65 | { 66 | pthread_cond_init(&b->complete, NULL); 67 | pthread_mutex_init(&b->mutex, NULL); 68 | b->count = n; 69 | b->crossing = 0; 70 | } 71 | 72 | void barrier_cross(barrier_t *b) 73 | { 74 | pthread_mutex_lock(&b->mutex); 75 | /* One more thread through */ 76 | b->crossing++; 77 | /* If not all here, wait */ 78 | if (b->crossing < b->count) { 79 | pthread_cond_wait(&b->complete, &b->mutex); 80 | } else { 81 | pthread_cond_broadcast(&b->complete); 82 | /* Reset for next time */ 83 | b->crossing = 0; 84 | } 85 | pthread_mutex_unlock(&b->mutex); 86 | } 87 | 88 | typedef struct thread_data { 89 | barrier_t *barrier; 90 | unsigned long num_acquires; 91 | ticks acquire_time; 92 | ticks release_time; 93 | int id; 94 | char padding[CACHE_LINE_SIZE]; 95 | } thread_data_t; 96 | 97 | void *test(void *data) 98 | { 99 | thread_data_t *d = (thread_data_t *)data; 100 | 101 | phys_id = the_cores[d->id]; 102 | cluster_id = get_cluster(phys_id); 103 | /* local initialization of locks */ 104 | local_th_data[d->id] = init_lock_array_local(phys_id, num_locks, the_locks); 105 | 106 | barrier_cross(d->barrier); 107 | ticks begin; 108 | ticks begin_release; 109 | 110 | local_data local_d = local_th_data[d->id]; 111 | while (stop == 0) { 112 | uint32_t my_ticket = IAF_U32(&tail); 113 | while (head != my_ticket) { 114 | PAUSE; 115 | } 116 | COMPILER_BARRIER; 117 | begin = getticks(); 118 | COMPILER_BARRIER; 119 | acquire_lock(&local_d[1],&the_locks[1]); 120 | COMPILER_BARRIER; 121 | ticks end = getticks() - begin - correction; 122 | d->acquire_time+=end; 123 | COMPILER_BARRIER; 124 | begin_release = getticks(); 125 | release_lock(&local_d[1],&the_locks[1]); 126 | MEM_BARRIER; 127 | COMPILER_BARRIER; 128 | d->release_time+=getticks() - begin_release - correction; 129 | 130 | #ifdef PRINT_OUTPUT 131 | fprintf(stderr, "%d %llu\n",d->id, (unsigned long long int) end); 132 | #endif 133 | #ifdef __tile__ 134 | MEM_BARRIER; 135 | #endif 136 | COMPILER_BARRIER; 137 | head++; 138 | d->num_acquires++; 139 | } 140 | /* Free locks */ 141 | free_lock_array_local(local_th_data[d->id], num_locks); 142 | if (acq_delay>0) { 143 | cpause(acq_delay); 144 | } 145 | 146 | return NULL; 147 | } 148 | 149 | 150 | void catcher(int sig) 151 | { 152 | static int nb = 0; 153 | printf("CAUGHT SIGNAL %d\n", sig); 154 | if (++nb >= 3) 155 | exit(1); 156 | } 157 | 158 | 159 | 160 | int main(int argc, char **argv) 161 | { 162 | set_cpu(the_cores[0]); 163 | struct option long_options[] = { 164 | // These options don't set a flag 165 | {"help", no_argument, NULL, 'h'}, 166 | {"locks", required_argument, NULL, 'l'}, 167 | {"duration", required_argument, NULL, 'd'}, 168 | {"num-threads", required_argument, NULL, 'n'}, 169 | {"acquire", required_argument, NULL, 'a'}, 170 | {"pause", required_argument, NULL, 'p'}, 171 | {NULL, 0, NULL, 0} 172 | }; 173 | 174 | correction = getticks_correction_calc(); 175 | some_data = (shared_data*)malloc(4 * sizeof(shared_data)); 176 | int i, c; 177 | thread_data_t *data; 178 | pthread_t *threads; 179 | pthread_attr_t attr; 180 | barrier_t barrier; 181 | struct timeval start, end; 182 | struct timespec timeout; 183 | duration = DEFAULT_DURATION; 184 | num_locks = DEFAULT_NUM_LOCKS; 185 | num_threads = DEFAULT_NUM_THREADS; 186 | acq_duration = DEFAULT_ACQ_DURATION; 187 | acq_delay = DEFAULT_ACQ_DELAY; 188 | 189 | head=1; 190 | tail=0; 191 | 192 | sigset_t block_set; 193 | 194 | while(1) { 195 | i = 0; 196 | c = getopt_long(argc, argv, "hl:d:n:a:p:", long_options, &i); 197 | 198 | if(c == -1) 199 | break; 200 | 201 | if(c == 0 && long_options[i].flag == 0) 202 | c = long_options[i].val; 203 | 204 | switch(c) { 205 | case 0: 206 | /* Flag is automatically set */ 207 | break; 208 | case 'h': 209 | printf("lock stress test\n" 210 | "\n" 211 | "Usage:\n" 212 | " stress_test [options...]\n" 213 | "\n" 214 | "Options:\n" 215 | " -h, --help\n" 216 | " Print this message\n" 217 | " -l, --lcoks \n" 218 | " Number of locks in the test (default=" XSTR(DEFAULT_NUM_LOCKS) ")\n" 219 | " -d, --duration \n" 220 | " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" 221 | " -n, --num-threads \n" 222 | " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" 223 | " -a, --acquire \n" 224 | " Number of cycles a lock is held (default=" XSTR(DEFAULT_ACQ_DURATION) ")\n" 225 | " -p, --pause \n" 226 | " Number of cycles between a lock release and the next acquire (default=" XSTR(DEFAULT_ACQ_DELAY) ")\n" 227 | ); 228 | exit(0); 229 | case 'l': 230 | num_locks = atoi(optarg); 231 | break; 232 | case 'd': 233 | duration = atoi(optarg); 234 | break; 235 | case 'n': 236 | num_threads = atoi(optarg); 237 | break; 238 | case 'a': 239 | acq_duration = atoi(optarg); 240 | break; 241 | case 'p': 242 | acq_delay = atoi(optarg); 243 | break; 244 | case '?': 245 | printf("Use -h or --help for help\n"); 246 | exit(0); 247 | default: 248 | exit(1); 249 | } 250 | } 251 | 252 | assert(duration >= 0); 253 | assert(num_locks >= 2); 254 | assert(num_threads > 0); 255 | assert(acq_duration >= 0); 256 | assert(acq_delay >= 0); 257 | acq_delay=acq_delay/NOP_DURATION; 258 | 259 | #ifdef PRINT_OUTPUT 260 | printf("Number of locks : %d\n", num_locks); 261 | printf("Duration : %d\n", duration); 262 | printf("Number of threads : %d\n", num_threads); 263 | printf("Lock is held for : %d\n", acq_duration); 264 | printf("Delay between locks : %d\n", acq_delay); 265 | printf("Type sizes : int=%d/long=%d/ptr=%d\n", 266 | (int)sizeof(int), 267 | (int)sizeof(long), 268 | (int)sizeof(void *)); 269 | #endif 270 | timeout.tv_sec = duration / 1000; 271 | timeout.tv_nsec = (duration % 1000) * 1000000; 272 | 273 | if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) { 274 | perror("malloc"); 275 | exit(1); 276 | } 277 | if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) { 278 | perror("malloc"); 279 | exit(1); 280 | } 281 | 282 | local_th_data = (local_data *)malloc(num_threads*sizeof(local_data)); 283 | 284 | stop = 0; 285 | /* Init locks */ 286 | #ifdef PRINT_OUTPUT 287 | printf("Initializing locks\n"); 288 | #endif 289 | the_locks = init_lock_array_global(num_locks, num_threads); 290 | 291 | /* Access set from all threads */ 292 | barrier_init(&barrier, num_threads + 1); 293 | pthread_attr_init(&attr); 294 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 295 | for (i = 0; i < num_threads; i++) { 296 | #ifdef PRINT_OUTPUT 297 | printf("Creating thread %d\n", i); 298 | #endif 299 | data[i].id = i; 300 | data[i].num_acquires = 0; 301 | data[i].acquire_time = 0; 302 | data[i].release_time = 0; 303 | data[i].barrier = &barrier; 304 | if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) { 305 | fprintf(stderr, "Error creating thread\n"); 306 | exit(1); 307 | } 308 | } 309 | pthread_attr_destroy(&attr); 310 | 311 | /* Catch some signals */ 312 | if (signal(SIGHUP, catcher) == SIG_ERR || 313 | signal(SIGINT, catcher) == SIG_ERR || 314 | signal(SIGTERM, catcher) == SIG_ERR) { 315 | perror("signal"); 316 | exit(1); 317 | } 318 | 319 | /* Start threads */ 320 | barrier_cross(&barrier); 321 | 322 | #ifdef PRINT_OUTPUT 323 | printf("STARTING...\n"); 324 | #endif 325 | gettimeofday(&start, NULL); 326 | if (duration > 0) { 327 | nanosleep(&timeout, NULL); 328 | } else { 329 | sigemptyset(&block_set); 330 | sigsuspend(&block_set); 331 | } 332 | stop = 1; 333 | gettimeofday(&end, NULL); 334 | #ifdef PRINT_OUTPUT 335 | printf("STOPPING...\n"); 336 | #endif 337 | 338 | /* Wait for thread completion */ 339 | for (i = 0; i < num_threads; i++) { 340 | if (pthread_join(threads[i], NULL) != 0) { 341 | fprintf(stderr, "Error waiting for thread completion\n"); 342 | exit(1); 343 | } 344 | } 345 | 346 | #ifdef PRINT_OUTPUT 347 | fprintf(stderr, "%d %d %d %d\n",some_data[0].the_data[1],some_data[1].the_data[2],some_data[2].the_data[3],some_data[3].the_data[4]); 348 | #endif 349 | duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); 350 | 351 | unsigned long acquires = 0; 352 | ticks total_acquire = 0; 353 | ticks total_release = 0; 354 | for (i = 0; i < num_threads; i++) { 355 | #ifdef PRINT_OUTPUT 356 | printf("Thread %d\n", i); 357 | printf(" #acquire : %lu\n", data[i].num_acquires); 358 | #endif 359 | total_acquire += data[i].acquire_time; 360 | total_release += data[i].release_time; 361 | acquires += data[i].num_acquires; 362 | 363 | } 364 | 365 | #ifdef PRINT_OUTPUT 366 | printf("Duration : %d (ms)\n", duration); 367 | printf("Average acquire duration: %lu (cycles)\n", total_acquire/acquires); 368 | printf("Acerage release duration: %lu(cycles)\n", total_release/acquires); 369 | printf("#acquires : %lu (%f / s)\n", acquires, acquires * 1000.0 / duration); 370 | 371 | #endif 372 | printf("%d %lu %lu\n",num_threads, total_acquire/acquires,total_release/acquires); 373 | /* Cleanup locks */ 374 | free_lock_array_global(the_locks, num_locks); 375 | 376 | free(threads); 377 | free(data); 378 | 379 | return 0; 380 | } 381 | -------------------------------------------------------------------------------- /bmarks/test_array_alloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: test_array_alloc.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Test which exposes bugs in lock algorithms and initialization methods; 7 | * By no means an exhaustive test, but generally exposes 8 | * a buggy algorithm; 9 | * Each thread continuously increments a global counter 10 | * protected by a lock; if the final counter value is not 11 | * equal to the sum of the increments by each thread, then 12 | * the lock algorithm has a bug. 13 | * This test works with lock array allocation methods; 14 | * 15 | * The MIT License (MIT) 16 | * 17 | * Copyright (c) 2013 Tudor David 18 | * 19 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 20 | * this software and associated documentation files (the "Software"), to deal in 21 | * the Software without restriction, including without limitation the rights to 22 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 23 | * the Software, and to permit persons to whom the Software is furnished to do so, 24 | * subject to the following conditions: 25 | * 26 | * The above copyright notice and this permission notice shall be included in all 27 | * copies or substantial portions of the Software. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 31 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 32 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 33 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 35 | */ 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #ifndef __sparc__ 47 | #include 48 | #endif 49 | #include "gl_lock.h" 50 | #include "utils.h" 51 | #include "lock_if.h" 52 | #include "atomic_ops.h" 53 | 54 | uint64_t c[2] = {0, 0}; 55 | 56 | #define XSTR(s) #s 57 | 58 | //number of concurrent threads 59 | #define DEFAULT_NUM_THREADS 1 60 | //total duration of the test, in milliseconds 61 | #define DEFAULT_DURATION 10000 62 | 63 | #define DEFAULT_NUM_LOCKS 10 64 | 65 | static volatile int stop; 66 | 67 | __thread unsigned long* seeds; 68 | __thread uint32_t phys_id; 69 | __thread uint32_t cluster_id; 70 | global_data the_locks; 71 | __attribute__((aligned(CACHE_LINE_SIZE))) volatile local_data* local_th_data; 72 | int num_locks; 73 | 74 | typedef struct shared_data{ 75 | volatile uint64_t counter; 76 | char padding[56]; 77 | } shared_data; 78 | 79 | __attribute__((aligned(CACHE_LINE_SIZE))) volatile shared_data* protected_data; 80 | int duration; 81 | int num_threads; 82 | 83 | typedef struct barrier { 84 | pthread_cond_t complete; 85 | pthread_mutex_t mutex; 86 | int count; 87 | int crossing; 88 | } barrier_t; 89 | 90 | void barrier_init(barrier_t *b, int n) 91 | { 92 | pthread_cond_init(&b->complete, NULL); 93 | pthread_mutex_init(&b->mutex, NULL); 94 | b->count = n; 95 | b->crossing = 0; 96 | } 97 | 98 | void barrier_cross(barrier_t *b) 99 | { 100 | pthread_mutex_lock(&b->mutex); 101 | /* One more thread through */ 102 | b->crossing++; 103 | /* If not all here, wait */ 104 | if (b->crossing < b->count) { 105 | pthread_cond_wait(&b->complete, &b->mutex); 106 | } else { 107 | pthread_cond_broadcast(&b->complete); 108 | /* Reset for next time */ 109 | b->crossing = 0; 110 | } 111 | pthread_mutex_unlock(&b->mutex); 112 | } 113 | 114 | typedef struct thread_data { 115 | union 116 | { 117 | struct 118 | { 119 | barrier_t *barrier; 120 | unsigned long num_acquires; 121 | int id; 122 | }; 123 | char padding[CACHE_LINE_SIZE]; 124 | }; 125 | } thread_data_t; 126 | 127 | void *test_correctness(void *data) 128 | { 129 | thread_data_t *d = (thread_data_t *)data; 130 | phys_id = the_cores[d->id]; 131 | cluster_id = get_cluster(phys_id); 132 | 133 | local_th_data[d->id] = init_lock_array_local(phys_id, num_locks, the_locks); 134 | 135 | barrier_cross(d->barrier); 136 | 137 | local_data local_d = local_th_data[d->id]; 138 | while (stop == 0) { 139 | //just picked a random lock to acquire 140 | acquire_lock(&local_d[5],&the_locks[5]); 141 | protected_data->counter++; 142 | release_lock(&local_d[5],&the_locks[5]); 143 | d->num_acquires++; 144 | } 145 | 146 | free_lock_array_local(local_th_data[d->id], num_locks); 147 | return NULL; 148 | } 149 | 150 | 151 | void catcher(int sig) 152 | { 153 | static int nb = 0; 154 | printf("CAUGHT SIGNAL %d\n", sig); 155 | if (++nb >= 3) 156 | exit(1); 157 | } 158 | 159 | 160 | int main(int argc, char **argv) 161 | { 162 | set_cpu(the_cores[0]); 163 | struct option long_options[] = { 164 | // These options don't set a flag 165 | {"help", no_argument, NULL, 'h'}, 166 | {"duration", required_argument, NULL, 'd'}, 167 | {"num-threads", required_argument, NULL, 'n'}, 168 | {NULL, 0, NULL, 0} 169 | }; 170 | 171 | int i, c; 172 | thread_data_t *data; 173 | pthread_t *threads; 174 | pthread_attr_t attr; 175 | barrier_t barrier; 176 | struct timeval start, end; 177 | struct timespec timeout; 178 | duration = DEFAULT_DURATION; 179 | num_threads = DEFAULT_NUM_THREADS; 180 | num_locks = DEFAULT_NUM_LOCKS; 181 | sigset_t block_set; 182 | 183 | while(1) { 184 | i = 0; 185 | c = getopt_long(argc, argv, "h:d:n:", long_options, &i); 186 | 187 | if(c == -1) 188 | break; 189 | 190 | if(c == 0 && long_options[i].flag == 0) 191 | c = long_options[i].val; 192 | 193 | switch(c) { 194 | case 0: 195 | /* Flag is automatically set */ 196 | break; 197 | case 'h': 198 | printf("lock stress test\n" 199 | "\n" 200 | "Usage:\n" 201 | " stress_test [options...]\n" 202 | "\n" 203 | "Options:\n" 204 | " -h, --help\n" 205 | " Print this message\n" 206 | " -d, --duration \n" 207 | " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" 208 | " -n, --num-threads \n" 209 | " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" 210 | ); 211 | exit(0); 212 | case 'd': 213 | duration = atoi(optarg); 214 | break; 215 | case 'n': 216 | num_threads = atoi(optarg); 217 | break; 218 | case '?': 219 | printf("Use -h or --help for help\n"); 220 | exit(0); 221 | default: 222 | exit(1); 223 | } 224 | } 225 | assert(duration >= 0); 226 | assert(num_threads > 0); 227 | 228 | protected_data = (shared_data*) malloc(sizeof(shared_data)); 229 | protected_data->counter=0; 230 | #ifdef PRINT_OUTPUT 231 | printf("Duration : %d\n", duration); 232 | printf("Number of threads : %d\n", num_threads); 233 | #endif 234 | timeout.tv_sec = duration / 1000; 235 | timeout.tv_nsec = (duration % 1000) * 1000000; 236 | 237 | if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) { 238 | perror("malloc"); 239 | exit(1); 240 | } 241 | if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) { 242 | perror("malloc"); 243 | exit(1); 244 | } 245 | 246 | local_th_data = (local_data *)malloc(num_threads*sizeof(local_data)); 247 | 248 | stop = 0; 249 | /* Init locks */ 250 | #ifdef PRINT_OUTPUT 251 | printf("Initializing locks\n"); 252 | #endif 253 | the_locks = init_lock_array_global(num_locks, num_threads); 254 | 255 | /* Access set from all threads */ 256 | barrier_init(&barrier, num_threads + 1); 257 | pthread_attr_init(&attr); 258 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 259 | for (i = 0; i < num_threads; i++) { 260 | #ifdef PRINT_OUTPUT 261 | printf("Creating thread %d\n", i); 262 | #endif 263 | data[i].id = i; 264 | data[i].num_acquires = 0; 265 | data[i].barrier = &barrier; 266 | if (pthread_create(&threads[i], &attr, test_correctness, (void *)(&data[i])) != 0) { 267 | fprintf(stderr, "Error creating thread\n"); 268 | exit(1); 269 | } 270 | } 271 | pthread_attr_destroy(&attr); 272 | 273 | /* Catch some signals */ 274 | if (signal(SIGHUP, catcher) == SIG_ERR || 275 | signal(SIGINT, catcher) == SIG_ERR || 276 | signal(SIGTERM, catcher) == SIG_ERR) { 277 | perror("signal"); 278 | exit(1); 279 | } 280 | 281 | /* Start threads */ 282 | barrier_cross(&barrier); 283 | #ifdef PRINT_OUTPUT 284 | printf("STARTING...\n"); 285 | #endif 286 | gettimeofday(&start, NULL); 287 | if (duration > 0) { 288 | nanosleep(&timeout, NULL); 289 | } else { 290 | sigemptyset(&block_set); 291 | sigsuspend(&block_set); 292 | } 293 | stop = 1; 294 | gettimeofday(&end, NULL); 295 | #ifdef PRINT_OUTPUT 296 | printf("STOPPING...\n"); 297 | #endif 298 | /* Wait for thread completion */ 299 | for (i = 0; i < num_threads; i++) { 300 | if (pthread_join(threads[i], NULL) != 0) { 301 | fprintf(stderr, "Error waiting for thread completion\n"); 302 | exit(1); 303 | } 304 | } 305 | 306 | duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); 307 | 308 | uint64_t acquires = 0; 309 | for (i = 0; i < num_threads; i++) { 310 | #ifdef PRINT_OUTPUT 311 | printf("Thread %d\n", i); 312 | printf(" #acquire : %lu\n", data[i].num_acquires); 313 | #endif 314 | acquires += data[i].num_acquires; 315 | } 316 | #ifdef PRINT_OUTPUT 317 | printf("Duration : %d (ms)\n", duration); 318 | #endif 319 | printf("Counter total : %llu, Expected: %llu\n", (unsigned long long) protected_data->counter, (unsigned long long) acquires); 320 | if (protected_data->counter != acquires) { 321 | printf("Incorrect lock behavior!\n"); 322 | } 323 | 324 | /* Cleanup locks */ 325 | free_lock_array_global(the_locks,num_locks); 326 | 327 | free(threads); 328 | free(data); 329 | 330 | return 0; 331 | } 332 | -------------------------------------------------------------------------------- /bmarks/test_correctness.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: test_correct.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Test which exposes bugs in lock algorithms; 7 | * By no means an exhaustive test, but generally exposes 8 | * a buggy algorithm; 9 | * Each thread continuously increments a global counter 10 | * protected by a lock; if the final counter value is not 11 | * equal to the sum of the increments by each thread, then 12 | * the lock algorithm has a bug. 13 | * 14 | * The MIT License (MIT) 15 | * 16 | * Copyright (c) 2013 Tudor David 17 | * 18 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 19 | * this software and associated documentation files (the "Software"), to deal in 20 | * the Software without restriction, including without limitation the rights to 21 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 22 | * the Software, and to permit persons to whom the Software is furnished to do so, 23 | * subject to the following conditions: 24 | * 25 | * The above copyright notice and this permission notice shall be included in all 26 | * copies or substantial portions of the Software. 27 | * 28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 30 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 31 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 32 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 | */ 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #ifndef __sparc__ 46 | #include 47 | #endif 48 | #include "gl_lock.h" 49 | #include "utils.h" 50 | #include "lock_if.h" 51 | #include "atomic_ops.h" 52 | 53 | uint64_t c[2] = {0, 0}; 54 | 55 | #define XSTR(s) #s 56 | 57 | //number of concurrent threads 58 | #define DEFAULT_NUM_THREADS 1 59 | //total duration of the test, in milliseconds 60 | #define DEFAULT_DURATION 10000 61 | 62 | static volatile int stop; 63 | 64 | __thread unsigned long* seeds; 65 | __thread uint32_t phys_id; 66 | __thread uint32_t cluster_id; 67 | lock_global_data the_lock; 68 | __attribute__((aligned(CACHE_LINE_SIZE))) lock_local_data* local_th_data; 69 | 70 | typedef struct shared_data{ 71 | volatile uint64_t counter; 72 | char padding[56]; 73 | } shared_data; 74 | 75 | __attribute__((aligned(CACHE_LINE_SIZE))) volatile shared_data* protected_data; 76 | int duration; 77 | int num_threads; 78 | 79 | typedef struct barrier { 80 | pthread_cond_t complete; 81 | pthread_mutex_t mutex; 82 | int count; 83 | int crossing; 84 | } barrier_t; 85 | 86 | void barrier_init(barrier_t *b, int n) 87 | { 88 | pthread_cond_init(&b->complete, NULL); 89 | pthread_mutex_init(&b->mutex, NULL); 90 | b->count = n; 91 | b->crossing = 0; 92 | } 93 | 94 | void barrier_cross(barrier_t *b) 95 | { 96 | pthread_mutex_lock(&b->mutex); 97 | /* One more thread through */ 98 | b->crossing++; 99 | /* If not all here, wait */ 100 | if (b->crossing < b->count) { 101 | pthread_cond_wait(&b->complete, &b->mutex); 102 | } else { 103 | pthread_cond_broadcast(&b->complete); 104 | /* Reset for next time */ 105 | b->crossing = 0; 106 | } 107 | pthread_mutex_unlock(&b->mutex); 108 | } 109 | 110 | typedef struct thread_data { 111 | union 112 | { 113 | struct 114 | { 115 | barrier_t *barrier; 116 | unsigned long num_acquires; 117 | int id; 118 | }; 119 | char padding[CACHE_LINE_SIZE]; 120 | }; 121 | } thread_data_t; 122 | 123 | void *test_correctness(void *data) 124 | { 125 | thread_data_t *d = (thread_data_t *)data; 126 | phys_id = the_cores[d->id]; 127 | cluster_id = get_cluster(phys_id); 128 | 129 | init_lock_local(phys_id, &the_lock, &(local_th_data[d->id])); 130 | 131 | barrier_cross(d->barrier); 132 | 133 | lock_local_data* local_d = &(local_th_data[d->id]); 134 | while (stop == 0) { 135 | acquire_lock(local_d,&the_lock); 136 | protected_data->counter++; 137 | release_lock(local_d,&the_lock); 138 | d->num_acquires++; 139 | } 140 | 141 | free_lock_local(local_th_data[d->id]); 142 | return NULL; 143 | } 144 | 145 | 146 | void catcher(int sig) 147 | { 148 | static int nb = 0; 149 | printf("CAUGHT SIGNAL %d\n", sig); 150 | if (++nb >= 3) 151 | exit(1); 152 | } 153 | 154 | 155 | int main(int argc, char **argv) 156 | { 157 | set_cpu(the_cores[0]); 158 | struct option long_options[] = { 159 | // These options don't set a flag 160 | {"help", no_argument, NULL, 'h'}, 161 | {"duration", required_argument, NULL, 'd'}, 162 | {"num-threads", required_argument, NULL, 'n'}, 163 | {NULL, 0, NULL, 0} 164 | }; 165 | 166 | int i, c; 167 | thread_data_t *data; 168 | pthread_t *threads; 169 | pthread_attr_t attr; 170 | barrier_t barrier; 171 | struct timeval start, end; 172 | struct timespec timeout; 173 | duration = DEFAULT_DURATION; 174 | num_threads = DEFAULT_NUM_THREADS; 175 | sigset_t block_set; 176 | 177 | while(1) { 178 | i = 0; 179 | c = getopt_long(argc, argv, "h:d:n:", long_options, &i); 180 | 181 | if(c == -1) 182 | break; 183 | 184 | if(c == 0 && long_options[i].flag == 0) 185 | c = long_options[i].val; 186 | 187 | switch(c) { 188 | case 0: 189 | /* Flag is automatically set */ 190 | break; 191 | case 'h': 192 | printf("lock stress test\n" 193 | "\n" 194 | "Usage:\n" 195 | " stress_test [options...]\n" 196 | "\n" 197 | "Options:\n" 198 | " -h, --help\n" 199 | " Print this message\n" 200 | " -d, --duration \n" 201 | " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" 202 | " -n, --num-threads \n" 203 | " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" 204 | ); 205 | exit(0); 206 | case 'd': 207 | duration = atoi(optarg); 208 | break; 209 | case 'n': 210 | num_threads = atoi(optarg); 211 | break; 212 | case '?': 213 | printf("Use -h or --help for help\n"); 214 | exit(0); 215 | default: 216 | exit(1); 217 | } 218 | } 219 | assert(duration >= 0); 220 | assert(num_threads > 0); 221 | 222 | protected_data = (shared_data*) malloc(sizeof(shared_data)); 223 | protected_data->counter=0; 224 | #ifdef PRINT_OUTPUT 225 | printf("Duration : %d\n", duration); 226 | printf("Number of threads : %d\n", num_threads); 227 | #endif 228 | timeout.tv_sec = duration / 1000; 229 | timeout.tv_nsec = (duration % 1000) * 1000000; 230 | 231 | if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) { 232 | perror("malloc"); 233 | exit(1); 234 | } 235 | if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) { 236 | perror("malloc"); 237 | exit(1); 238 | } 239 | 240 | local_th_data = (lock_local_data *)malloc(num_threads*sizeof(lock_local_data)); 241 | 242 | stop = 0; 243 | /* Init locks */ 244 | #ifdef PRINT_OUTPUT 245 | printf("Initializing locks\n"); 246 | #endif 247 | init_lock_global_nt(num_threads,&the_lock); 248 | 249 | /* Access set from all threads */ 250 | barrier_init(&barrier, num_threads + 1); 251 | pthread_attr_init(&attr); 252 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 253 | for (i = 0; i < num_threads; i++) { 254 | #ifdef PRINT_OUTPUT 255 | printf("Creating thread %d\n", i); 256 | #endif 257 | data[i].id = i; 258 | data[i].num_acquires = 0; 259 | data[i].barrier = &barrier; 260 | if (pthread_create(&threads[i], &attr, test_correctness, (void *)(&data[i])) != 0) { 261 | fprintf(stderr, "Error creating thread\n"); 262 | exit(1); 263 | } 264 | } 265 | pthread_attr_destroy(&attr); 266 | 267 | /* Catch some signals */ 268 | if (signal(SIGHUP, catcher) == SIG_ERR || 269 | signal(SIGINT, catcher) == SIG_ERR || 270 | signal(SIGTERM, catcher) == SIG_ERR) { 271 | perror("signal"); 272 | exit(1); 273 | } 274 | 275 | /* Start threads */ 276 | barrier_cross(&barrier); 277 | #ifdef PRINT_OUTPUT 278 | printf("STARTING...\n"); 279 | #endif 280 | gettimeofday(&start, NULL); 281 | if (duration > 0) { 282 | nanosleep(&timeout, NULL); 283 | } else { 284 | sigemptyset(&block_set); 285 | sigsuspend(&block_set); 286 | } 287 | stop = 1; 288 | gettimeofday(&end, NULL); 289 | #ifdef PRINT_OUTPUT 290 | printf("STOPPING...\n"); 291 | #endif 292 | /* Wait for thread completion */ 293 | for (i = 0; i < num_threads; i++) { 294 | if (pthread_join(threads[i], NULL) != 0) { 295 | fprintf(stderr, "Error waiting for thread completion\n"); 296 | exit(1); 297 | } 298 | } 299 | 300 | duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); 301 | 302 | uint64_t acquires = 0; 303 | for (i = 0; i < num_threads; i++) { 304 | #ifdef PRINT_OUTPUT 305 | printf("Thread %d\n", i); 306 | printf(" #acquire : %lu\n", data[i].num_acquires); 307 | #endif 308 | acquires += data[i].num_acquires; 309 | } 310 | #ifdef PRINT_OUTPUT 311 | printf("Duration : %d (ms)\n", duration); 312 | #endif 313 | printf("Counter total : %llu, Expected: %llu\n", (unsigned long long) protected_data->counter, (unsigned long long) acquires); 314 | if (protected_data->counter != acquires) { 315 | printf("Incorrect lock behavior!\n"); 316 | } 317 | 318 | /* Cleanup locks */ 319 | free_lock_global(the_lock); 320 | 321 | free(threads); 322 | free(data); 323 | 324 | return 0; 325 | } 326 | -------------------------------------------------------------------------------- /bmarks/test_trylock.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: test_trylock.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Test which exposes bugs in trylock methods of the lock algorithms; 7 | * By no means an exhaustive test, but generally exposes 8 | * a buggy algorithm; 9 | * Each thread continuously increments a global counter 10 | * protected by a lock; if the final counter value is not 11 | * equal to the sum of the increments by each thread, then 12 | * the lock algorithm has a bug. 13 | * 14 | * The MIT License (MIT) 15 | * 16 | * Copyright (c) 2013 Tudor David 17 | * 18 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 19 | * this software and associated documentation files (the "Software"), to deal in 20 | * the Software without restriction, including without limitation the rights to 21 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 22 | * the Software, and to permit persons to whom the Software is furnished to do so, 23 | * subject to the following conditions: 24 | * 25 | * The above copyright notice and this permission notice shall be included in all 26 | * copies or substantial portions of the Software. 27 | * 28 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 29 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 30 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 31 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 32 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 | */ 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #ifndef __sparc__ 46 | #include 47 | #endif 48 | #include "gl_lock.h" 49 | #include "utils.h" 50 | #include "lock_if.h" 51 | #include "atomic_ops.h" 52 | 53 | uint64_t c[2] = {0, 0}; 54 | 55 | #define XSTR(s) #s 56 | 57 | //number of concurrent threads 58 | #define DEFAULT_NUM_THREADS 1 59 | //total duration of the test, in milliseconds 60 | #define DEFAULT_DURATION 10000 61 | 62 | static volatile int stop; 63 | 64 | __thread unsigned long* seeds; 65 | __thread uint32_t phys_id; 66 | __thread uint32_t cluster_id; 67 | lock_global_data the_lock; 68 | __attribute__((aligned(CACHE_LINE_SIZE))) lock_local_data* local_th_data; 69 | 70 | typedef struct shared_data{ 71 | volatile uint64_t counter; 72 | char padding[56]; 73 | } shared_data; 74 | 75 | __attribute__((aligned(CACHE_LINE_SIZE))) volatile shared_data* protected_data; 76 | int duration; 77 | int num_threads; 78 | 79 | typedef struct barrier { 80 | pthread_cond_t complete; 81 | pthread_mutex_t mutex; 82 | int count; 83 | int crossing; 84 | } barrier_t; 85 | 86 | void barrier_init(barrier_t *b, int n) 87 | { 88 | pthread_cond_init(&b->complete, NULL); 89 | pthread_mutex_init(&b->mutex, NULL); 90 | b->count = n; 91 | b->crossing = 0; 92 | } 93 | 94 | void barrier_cross(barrier_t *b) 95 | { 96 | pthread_mutex_lock(&b->mutex); 97 | /* One more thread through */ 98 | b->crossing++; 99 | /* If not all here, wait */ 100 | if (b->crossing < b->count) { 101 | pthread_cond_wait(&b->complete, &b->mutex); 102 | } else { 103 | pthread_cond_broadcast(&b->complete); 104 | /* Reset for next time */ 105 | b->crossing = 0; 106 | } 107 | pthread_mutex_unlock(&b->mutex); 108 | } 109 | 110 | typedef struct thread_data { 111 | union 112 | { 113 | struct 114 | { 115 | barrier_t *barrier; 116 | unsigned long num_acquires; 117 | int id; 118 | }; 119 | char padding[CACHE_LINE_SIZE]; 120 | }; 121 | } thread_data_t; 122 | 123 | void *test_correctness(void *data) 124 | { 125 | thread_data_t *d = (thread_data_t *)data; 126 | phys_id = the_cores[d->id]; 127 | cluster_id = get_cluster(phys_id); 128 | 129 | init_lock_local(phys_id, &the_lock, &(local_th_data[d->id])); 130 | 131 | barrier_cross(d->barrier); 132 | 133 | lock_local_data* local_d = &(local_th_data[d->id]); 134 | while (stop == 0) { 135 | while (acquire_trylock(local_d,&the_lock) !=0) { 136 | PAUSE; 137 | } 138 | protected_data->counter++; 139 | release_trylock(local_d,&the_lock); 140 | d->num_acquires++; 141 | } 142 | 143 | free_lock_local(local_th_data[d->id]); 144 | return NULL; 145 | } 146 | 147 | 148 | void catcher(int sig) 149 | { 150 | static int nb = 0; 151 | printf("CAUGHT SIGNAL %d\n", sig); 152 | if (++nb >= 3) 153 | exit(1); 154 | } 155 | 156 | 157 | int main(int argc, char **argv) 158 | { 159 | set_cpu(the_cores[0]); 160 | struct option long_options[] = { 161 | // These options don't set a flag 162 | {"help", no_argument, NULL, 'h'}, 163 | {"duration", required_argument, NULL, 'd'}, 164 | {"num-threads", required_argument, NULL, 'n'}, 165 | {NULL, 0, NULL, 0} 166 | }; 167 | 168 | int i, c; 169 | thread_data_t *data; 170 | pthread_t *threads; 171 | pthread_attr_t attr; 172 | barrier_t barrier; 173 | struct timeval start, end; 174 | struct timespec timeout; 175 | duration = DEFAULT_DURATION; 176 | num_threads = DEFAULT_NUM_THREADS; 177 | sigset_t block_set; 178 | 179 | while(1) { 180 | i = 0; 181 | c = getopt_long(argc, argv, "h:d:n:", long_options, &i); 182 | 183 | if(c == -1) 184 | break; 185 | 186 | if(c == 0 && long_options[i].flag == 0) 187 | c = long_options[i].val; 188 | 189 | switch(c) { 190 | case 0: 191 | /* Flag is automatically set */ 192 | break; 193 | case 'h': 194 | printf("lock stress test\n" 195 | "\n" 196 | "Usage:\n" 197 | " stress_test [options...]\n" 198 | "\n" 199 | "Options:\n" 200 | " -h, --help\n" 201 | " Print this message\n" 202 | " -d, --duration \n" 203 | " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" 204 | " -n, --num-threads \n" 205 | " Number of threads (default=" XSTR(DEFAULT_NUM_THREADS) ")\n" 206 | ); 207 | exit(0); 208 | case 'd': 209 | duration = atoi(optarg); 210 | break; 211 | case 'n': 212 | num_threads = atoi(optarg); 213 | break; 214 | case '?': 215 | printf("Use -h or --help for help\n"); 216 | exit(0); 217 | default: 218 | exit(1); 219 | } 220 | } 221 | assert(duration >= 0); 222 | assert(num_threads > 0); 223 | 224 | protected_data = (shared_data*) malloc(sizeof(shared_data)); 225 | protected_data->counter=0; 226 | #ifdef PRINT_OUTPUT 227 | printf("Duration : %d\n", duration); 228 | printf("Number of threads : %d\n", num_threads); 229 | #endif 230 | timeout.tv_sec = duration / 1000; 231 | timeout.tv_nsec = (duration % 1000) * 1000000; 232 | 233 | if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) { 234 | perror("malloc"); 235 | exit(1); 236 | } 237 | if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) { 238 | perror("malloc"); 239 | exit(1); 240 | } 241 | 242 | local_th_data = (lock_local_data *)malloc(num_threads*sizeof(lock_local_data)); 243 | 244 | stop = 0; 245 | /* Init locks */ 246 | #ifdef PRINT_OUTPUT 247 | printf("Initializing locks\n"); 248 | #endif 249 | init_lock_global_nt(num_threads,&the_lock); 250 | 251 | /* Access set from all threads */ 252 | barrier_init(&barrier, num_threads + 1); 253 | pthread_attr_init(&attr); 254 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 255 | for (i = 0; i < num_threads; i++) { 256 | #ifdef PRINT_OUTPUT 257 | printf("Creating thread %d\n", i); 258 | #endif 259 | data[i].id = i; 260 | data[i].num_acquires = 0; 261 | data[i].barrier = &barrier; 262 | if (pthread_create(&threads[i], &attr, test_correctness, (void *)(&data[i])) != 0) { 263 | fprintf(stderr, "Error creating thread\n"); 264 | exit(1); 265 | } 266 | } 267 | pthread_attr_destroy(&attr); 268 | 269 | /* Catch some signals */ 270 | if (signal(SIGHUP, catcher) == SIG_ERR || 271 | signal(SIGINT, catcher) == SIG_ERR || 272 | signal(SIGTERM, catcher) == SIG_ERR) { 273 | perror("signal"); 274 | exit(1); 275 | } 276 | 277 | /* Start threads */ 278 | barrier_cross(&barrier); 279 | #ifdef PRINT_OUTPUT 280 | printf("STARTING...\n"); 281 | #endif 282 | gettimeofday(&start, NULL); 283 | if (duration > 0) { 284 | nanosleep(&timeout, NULL); 285 | } else { 286 | sigemptyset(&block_set); 287 | sigsuspend(&block_set); 288 | } 289 | stop = 1; 290 | gettimeofday(&end, NULL); 291 | #ifdef PRINT_OUTPUT 292 | printf("STOPPING...\n"); 293 | #endif 294 | /* Wait for thread completion */ 295 | for (i = 0; i < num_threads; i++) { 296 | if (pthread_join(threads[i], NULL) != 0) { 297 | fprintf(stderr, "Error waiting for thread completion\n"); 298 | exit(1); 299 | } 300 | } 301 | 302 | duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); 303 | 304 | uint64_t acquires = 0; 305 | for (i = 0; i < num_threads; i++) { 306 | printf("Thread %d\n", i); 307 | printf(" # acquires : %lu\n", data[i].num_acquires); 308 | acquires += data[i].num_acquires; 309 | } 310 | #ifdef PRINT_OUTPUT 311 | printf("Duration : %d (ms)\n", duration); 312 | #endif 313 | printf("Counter total : %llu, Expected: %llu\n", (unsigned long long) protected_data->counter, (unsigned long long) acquires); 314 | if (protected_data->counter != acquires) { 315 | printf("Incorrect lock behavior!\n"); 316 | } 317 | 318 | /* Cleanup locks */ 319 | free_lock_global(the_lock); 320 | 321 | free(threads); 322 | free(data); 323 | 324 | return 0; 325 | } 326 | -------------------------------------------------------------------------------- /bmarks/uncontended.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifndef __sparc__ 12 | #include 13 | #endif 14 | #include "gl_lock.h" 15 | #include "atomic_ops.h" 16 | #include "utils.h" 17 | #include "lock_if.h" 18 | 19 | #define STR(s) #s 20 | #define XSTR(s) STR(s) 21 | 22 | //number of concurres threads 23 | #define DEFAULT_NUM_THREADS 2 24 | //total number of locks 25 | #define DEFAULT_NUM_LOCKS 2 26 | //number of lock acquisitions in this test 27 | #define DEFAULT_NUM_ACQ 10000 28 | //delay between consecutive acquire attempts in cycles 29 | #define DEFAULT_ACQ_DELAY 100 30 | //delay between lock acquire and release in cycles 31 | #define DEFAULT_ACQ_DURATION 10 32 | //the total duration of a test 33 | #define DEFAULT_DURATION 10000 34 | //the core that allocates the global lock data, then participates in lock acqusistions 35 | #define DEFAULT_HOME_CORE 0 36 | //the other core participating in the lock aacquisitions 37 | #define DEFAULT_REMOTE_CORE 1 38 | 39 | static volatile int stop; 40 | 41 | __thread uint32_t phys_id; 42 | __thread uint32_t cluster_id; 43 | 44 | volatile uint32_t tail; 45 | volatile uint32_t head; 46 | 47 | volatile global_data the_locks; 48 | __attribute__((aligned(CACHE_LINE_SIZE))) volatile local_data * local_th_data; 49 | 50 | typedef struct shared_data{ 51 | char the_data[64]; 52 | } shared_data; 53 | 54 | __attribute__((aligned(CACHE_LINE_SIZE))) volatile shared_data * some_data; 55 | int duration; 56 | int num_locks; 57 | int num_threads; 58 | int acq_duration; 59 | int home_core; 60 | int remote_core; 61 | int acq_delay; 62 | 63 | 64 | ticks correction; 65 | typedef struct barrier { 66 | pthread_cond_t complete; 67 | pthread_mutex_t mutex; 68 | int count; 69 | int crossing; 70 | } barrier_t; 71 | 72 | void barrier_init(barrier_t *b, int n) 73 | { 74 | pthread_cond_init(&b->complete, NULL); 75 | pthread_mutex_init(&b->mutex, NULL); 76 | b->count = n; 77 | b->crossing = 0; 78 | } 79 | 80 | void barrier_cross(barrier_t *b) 81 | { 82 | pthread_mutex_lock(&b->mutex); 83 | /* One more thread through */ 84 | b->crossing++; 85 | /* If not all here, wait */ 86 | if (b->crossing < b->count) { 87 | pthread_cond_wait(&b->complete, &b->mutex); 88 | } else { 89 | pthread_cond_broadcast(&b->complete); 90 | /* Reset for next time */ 91 | b->crossing = 0; 92 | } 93 | pthread_mutex_unlock(&b->mutex); 94 | } 95 | 96 | typedef struct thread_data { 97 | barrier_t *barrier; 98 | unsigned long num_acquires; 99 | ticks acquire_time; 100 | ticks release_time; 101 | int the_core; 102 | int id; 103 | char padding[CACHE_LINE_SIZE]; 104 | } thread_data_t; 105 | 106 | void *test(void *data) 107 | { 108 | thread_data_t *d = (thread_data_t *)data; 109 | 110 | phys_id = d->the_core; 111 | cluster_id = get_cluster(phys_id); 112 | 113 | /* local initialization of locks */ 114 | local_th_data[d->id] = init_lock_array_local(phys_id, num_locks, the_locks); 115 | 116 | barrier_cross(d->barrier); 117 | ticks begin; 118 | ticks begin_release; 119 | 120 | local_data local_d = local_th_data[d->id]; 121 | while (stop == 0) { 122 | uint32_t my_ticket = IAF_U32(&tail); 123 | while (head != my_ticket) { 124 | PAUSE; 125 | } 126 | COMPILER_BARRIER; 127 | begin = getticks(); 128 | COMPILER_BARRIER; 129 | acquire_lock(&local_d[1],&the_locks[1]); 130 | COMPILER_BARRIER; 131 | ticks end = getticks() - begin - correction; 132 | d->acquire_time+=end; 133 | COMPILER_BARRIER; 134 | begin_release = getticks(); 135 | release_lock(&local_d[1],&the_locks[1]); 136 | MEM_BARRIER; 137 | COMPILER_BARRIER; 138 | d->release_time+=getticks() - begin_release - correction; 139 | 140 | #ifdef PRINT_OUTPUT 141 | fprintf(stderr, "%d %llu\n",d->id, (unsigned long long int) end); 142 | #endif 143 | #ifdef __tile__ 144 | MEM_BARRIER; 145 | #endif 146 | COMPILER_BARRIER; 147 | head++; 148 | d->num_acquires++; 149 | } 150 | /* Free locks */ 151 | free_lock_array_local(local_th_data[d->id], num_locks); 152 | if (acq_delay>0) { 153 | cpause(acq_delay); 154 | } 155 | 156 | return NULL; 157 | } 158 | 159 | 160 | void catcher(int sig) 161 | { 162 | static int nb = 0; 163 | printf("CAUGHT SIGNAL %d\n", sig); 164 | if (++nb >= 3) 165 | exit(1); 166 | } 167 | 168 | 169 | int main(int argc, char **argv) 170 | { 171 | set_cpu(the_cores[0]); 172 | struct option long_options[] = { 173 | // These options don't set a flag 174 | {"help", no_argument, NULL, 'h'}, 175 | {"locks", required_argument, NULL, 'l'}, 176 | {"duration", required_argument, NULL, 'd'}, 177 | {"remote-core", required_argument, NULL, 'r'}, 178 | {"acquire", required_argument, NULL, 'a'}, 179 | {"pause", required_argument, NULL, 'p'}, 180 | {NULL, 0, NULL, 0} 181 | }; 182 | 183 | correction = getticks_correction_calc(); 184 | some_data = (shared_data*)malloc(4 * sizeof(shared_data)); 185 | int i, c; 186 | thread_data_t *data; 187 | pthread_t *threads; 188 | pthread_attr_t attr; 189 | barrier_t barrier; 190 | struct timeval start, end; 191 | struct timespec timeout; 192 | duration = DEFAULT_DURATION; 193 | num_locks = DEFAULT_NUM_LOCKS; 194 | num_threads = DEFAULT_NUM_THREADS; 195 | acq_duration = DEFAULT_ACQ_DURATION; 196 | acq_delay = DEFAULT_ACQ_DELAY; 197 | home_core = the_cores[DEFAULT_HOME_CORE]; 198 | remote_core = DEFAULT_REMOTE_CORE; 199 | 200 | head=1; 201 | tail=0; 202 | 203 | sigset_t block_set; 204 | 205 | while(1) { 206 | i = 0; 207 | c = getopt_long(argc, argv, "hl:d:a:r:p:", long_options, &i); 208 | 209 | if(c == -1) 210 | break; 211 | 212 | if(c == 0 && long_options[i].flag == 0) 213 | c = long_options[i].val; 214 | 215 | switch(c) { 216 | case 0: 217 | /* Flag is automatically set */ 218 | break; 219 | case 'h': 220 | printf("lock stress test\n" 221 | "\n" 222 | "Usage:\n" 223 | " stress_test [options...]\n" 224 | "\n" 225 | "Options:\n" 226 | " -h, --help\n" 227 | " Print this message\n" 228 | " -l, --locks \n" 229 | " Number of locks in the test (default=" XSTR(DEFAULT_NUM_LOCKS) ")\n" 230 | " -d, --duration \n" 231 | " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" 232 | " -r, --remote-core \n" 233 | " Remote core (default=" XSTR(DEFAULT_REMOTE_CORE) ")\n" 234 | " -a, --acquire \n" 235 | " Number of cycles a lock is held (default=" XSTR(DEFAULT_ACQ_DURATION) ")\n" 236 | " -p, --pause \n" 237 | " Number of cycles between a lock release and the next acquire (default=" XSTR(DEFAULT_ACQ_DELAY) ")\n" 238 | ); 239 | exit(0); 240 | case 'l': 241 | num_locks = atoi(optarg); 242 | break; 243 | case 'r': 244 | remote_core = atoi(optarg); 245 | break; 246 | 247 | case 'd': 248 | duration = atoi(optarg); 249 | break; 250 | case 'a': 251 | acq_duration = atoi(optarg); 252 | break; 253 | case 'p': 254 | acq_delay = atoi(optarg); 255 | break; 256 | case '?': 257 | printf("Use -h or --help for help\n"); 258 | exit(0); 259 | default: 260 | exit(1); 261 | } 262 | } 263 | 264 | assert(duration >= 0); 265 | assert(num_locks >= 2); 266 | assert(num_threads > 0); 267 | assert(acq_duration >= 0); 268 | assert(acq_delay >= 0); 269 | 270 | #ifdef PRINT_OUTPUT 271 | printf("Number of locks : %d\n", num_locks); 272 | printf("Duration : %d\n", duration); 273 | printf("Home core : %d\n", home_core); 274 | printf("Remote core : %d\n", remote_core); 275 | printf("Number of threads : %d\n", num_threads); 276 | printf("Lock is held for : %d\n", acq_duration); 277 | printf("Delay between locks : %d\n", acq_delay); 278 | printf("Type sizes : int=%d/long=%d/ptr=%d\n", 279 | (int)sizeof(int), 280 | (int)sizeof(long), 281 | (int)sizeof(void *)); 282 | #endif 283 | timeout.tv_sec = duration / 1000; 284 | timeout.tv_nsec = (duration % 1000) * 1000000; 285 | 286 | if (home_core==remote_core) num_threads=1; 287 | 288 | if ((data = (thread_data_t *)malloc(num_threads * sizeof(thread_data_t))) == NULL) { 289 | perror("malloc"); 290 | exit(1); 291 | } 292 | if ((threads = (pthread_t *)malloc(num_threads * sizeof(pthread_t))) == NULL) { 293 | perror("malloc"); 294 | exit(1); 295 | } 296 | 297 | local_th_data = (local_data *)malloc(num_threads*sizeof(local_data)); 298 | 299 | stop = 0; 300 | /* Init locks */ 301 | #ifdef PRINT_OUTPUT 302 | printf("Initializing locks\n"); 303 | #endif 304 | the_locks = init_lock_array_global(num_locks, num_threads); 305 | 306 | /* Access set from all threads */ 307 | barrier_init(&barrier, num_threads + 1); 308 | pthread_attr_init(&attr); 309 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 310 | for (i = 0; i < num_threads; i++) { 311 | #ifdef PRINT_OUTPUT 312 | printf("Creating thread %d\n", i); 313 | #endif 314 | data[i].id = i; 315 | if (i==0) data[i].the_core = home_core; 316 | else if (i==1) data[i].the_core = remote_core; 317 | data[i].num_acquires = 0; 318 | data[i].acquire_time = 0; 319 | data[i].release_time = 0; 320 | data[i].barrier = &barrier; 321 | if (pthread_create(&threads[i], &attr, test, (void *)(&data[i])) != 0) { 322 | fprintf(stderr, "Error creating thread\n"); 323 | exit(1); 324 | } 325 | } 326 | pthread_attr_destroy(&attr); 327 | 328 | /* Catch some signals */ 329 | if (signal(SIGHUP, catcher) == SIG_ERR || 330 | signal(SIGINT, catcher) == SIG_ERR || 331 | signal(SIGTERM, catcher) == SIG_ERR) { 332 | perror("signal"); 333 | exit(1); 334 | } 335 | 336 | /* Start threads */ 337 | barrier_cross(&barrier); 338 | 339 | #ifdef PRINT_OUTPUT 340 | printf("STARTING...\n"); 341 | #endif 342 | gettimeofday(&start, NULL); 343 | if (duration > 0) { 344 | nanosleep(&timeout, NULL); 345 | } else { 346 | sigemptyset(&block_set); 347 | sigsuspend(&block_set); 348 | } 349 | stop = 1; 350 | gettimeofday(&end, NULL); 351 | #ifdef PRINT_OUTPUT 352 | printf("STOPPING...\n"); 353 | #endif 354 | 355 | /* Wait for thread completion */ 356 | for (i = 0; i < num_threads; i++) { 357 | if (pthread_join(threads[i], NULL) != 0) { 358 | fprintf(stderr, "Error waiting for thread completion\n"); 359 | exit(1); 360 | } 361 | } 362 | 363 | #ifdef PRINT_OUTPUT 364 | fprintf(stderr, "%d %d %d %d\n",some_data[0].the_data[1],some_data[1].the_data[2],some_data[2].the_data[3],some_data[3].the_data[4]); 365 | #endif 366 | duration = (end.tv_sec * 1000 + end.tv_usec / 1000) - (start.tv_sec * 1000 + start.tv_usec / 1000); 367 | 368 | unsigned long acquires = 0; 369 | ticks total_acquire = 0; 370 | ticks total_release = 0; 371 | for (i = 0; i < num_threads; i++) { 372 | #ifdef PRINT_OUTPUT 373 | printf("Thread %d\n", i); 374 | printf(" #acquire : %lu\n", data[i].num_acquires); 375 | #endif 376 | total_acquire += data[i].acquire_time; 377 | total_release += data[i].release_time; 378 | acquires += data[i].num_acquires; 379 | 380 | } 381 | 382 | #ifdef PRINT_OUTPUT 383 | printf("Duration : %d (ms)\n", duration); 384 | printf("Average acquire duration: %lu (cycles)\n", total_acquire/acquires); 385 | printf("Acerage release duration: %lu(cycles)\n", total_release/acquires); 386 | printf("#acquires : %lu (%f / s)\n", acquires, acquires * 1000.0 / duration); 387 | 388 | #endif 389 | printf("%d %lu %lu\n",get_cluster(remote_core), total_acquire/acquires,total_release/acquires); 390 | /* Cleanup locks */ 391 | free_lock_array_global(the_locks, num_locks); 392 | 393 | free(threads); 394 | free(data); 395 | 396 | return 0; 397 | } 398 | -------------------------------------------------------------------------------- /include/.gitignore: -------------------------------------------------------------------------------- 1 | /cscope.out 2 | -------------------------------------------------------------------------------- /include/alock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: alock.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * array based lock, as described in Herlihy and Shavit's "Art of Multiprocessor Programming" 7 | * somewhat similar to clh, but requires more space, and needs an upper bound on the possible 8 | * number of processes 9 | * 10 | * The MIT License (MIT) 11 | * 12 | * Copyright (c) 2013 Tudor David 13 | * 14 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 15 | * this software and associated documentation files (the "Software"), to deal in 16 | * the Software without restriction, including without limitation the rights to 17 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 18 | * the Software, and to permit persons to whom the Software is furnished to do so, 19 | * subject to the following conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be included in all 22 | * copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 26 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 27 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 28 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 30 | */ 31 | 32 | #ifndef _ALOCK_H_ 33 | #define _ALOCK_H_ 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #ifndef __sparc__ 43 | #include 44 | #endif 45 | #include 46 | #include "utils.h" 47 | #include "atomic_ops.h" 48 | 49 | /* 50 | * this lock needs to know the maximum number of processes it can handle 51 | */ 52 | //TODO set this to a predefined value independent of the architecture? 53 | #ifdef __sparc__ 54 | #define MAX_NUM_PROCESSES 64 55 | #elif defined(__tile__) 56 | #define MAX_NUM_PROCESSES 36 57 | #elif defined(OPTERON) 58 | #define MAX_NUM_PROCESSES 48 59 | #else 60 | #define MAX_NUM_PROCESSES 80 61 | #endif 62 | 63 | typedef struct flag_line { 64 | volatile uint16_t flag; 65 | #ifdef ADD_PADDING 66 | uint8_t padding[CACHE_LINE_SIZE-2]; 67 | #endif 68 | } flag_t; 69 | 70 | typedef struct lock_shared { 71 | volatile uint32_t tail; 72 | uint32_t size; 73 | flag_t flags[MAX_NUM_PROCESSES]; 74 | } lock_shared_t; 75 | 76 | typedef struct lock { 77 | uint32_t my_index; 78 | lock_shared_t* shared_data; 79 | } array_lock_t; 80 | 81 | 82 | /* 83 | *lock array initalization and desctruction 84 | */ 85 | lock_shared_t* init_alock_array_global(uint32_t num_locks, uint32_t num_processes); 86 | 87 | array_lock_t* init_alock_array_local(uint32_t thread_num, uint32_t num_locks, lock_shared_t* the_locks); 88 | 89 | void end_alock_array_local(array_lock_t* local_locks, uint32_t size); 90 | 91 | void end_alock_array_global(lock_shared_t* the_locks, uint32_t size); 92 | 93 | /* 94 | *single lock initalization and desctruction 95 | */ 96 | int init_alock_global(uint32_t num_processes, lock_shared_t* the_lock); 97 | 98 | int init_alock_local(uint32_t thread_num, lock_shared_t* the_lock, array_lock_t* my_lock); 99 | 100 | void end_alock_local(array_lock_t local_lock); 101 | 102 | void end_alock_global(lock_shared_t the_lock); 103 | 104 | 105 | /* 106 | * Lock manipulation functions 107 | */ 108 | void alock_lock(array_lock_t* lock); 109 | 110 | void alock_unlock(array_lock_t* lock); 111 | 112 | int alock_trylock(array_lock_t* local_lock); 113 | 114 | int is_free_alock(lock_shared_t* the_lock); 115 | 116 | #endif 117 | -------------------------------------------------------------------------------- /include/atomic_ops.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: atomic_ops.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Cross-platform interface to common atomic operations 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | 33 | #ifndef _ATOMIC_OPS_H_INCLUDED_ 34 | #define _ATOMIC_OPS_H_INCLUDED_ 35 | 36 | #include 37 | 38 | #define COMPILER_BARRIER asm volatile("" ::: "memory") 39 | #ifdef __sparc__ 40 | /* 41 | * sparc code 42 | */ 43 | 44 | # include 45 | 46 | //test-and-set uint8_t 47 | static inline uint8_t tas_uint8(volatile uint8_t *addr) { 48 | uint8_t oldval; 49 | __asm__ __volatile__("ldstub %1,%0" 50 | : "=r"(oldval), "=m"(*addr) 51 | : "m"(*addr) : "memory"); 52 | return oldval; 53 | } 54 | 55 | 56 | static inline unsigned long xchg32(volatile unsigned int *m, unsigned int val) 57 | { 58 | unsigned long tmp1, tmp2; 59 | 60 | __asm__ __volatile__( 61 | " mov %0, %1\n" 62 | "1: lduw [%4], %2\n" 63 | " cas [%4], %2, %0\n" 64 | " cmp %2, %0\n" 65 | " bne,a,pn %%icc, 1b\n" 66 | " mov %1, %0\n" 67 | : "=&r" (val), "=&r" (tmp1), "=&r" (tmp2) 68 | : "0" (val), "r" (m) 69 | : "cc", "memory"); 70 | return val; 71 | } 72 | 73 | static inline unsigned long xchg64(volatile unsigned long *m, unsigned long val) 74 | { 75 | unsigned long tmp1, tmp2; 76 | 77 | __asm__ __volatile__( 78 | " mov %0, %1\n" 79 | "1: ldx [%4], %2\n" 80 | " casx [%4], %2, %0\n" 81 | " cmp %2, %0\n" 82 | " bne,a,pn %%xcc, 1b\n" 83 | " mov %1, %0\n" 84 | : "=&r" (val), "=&r" (tmp1), "=&r" (tmp2) 85 | : "0" (val), "r" (m) 86 | : "cc", "memory"); 87 | return val; 88 | } 89 | 90 | 91 | //Compare-and-swap 92 | #define CAS_PTR(a,b,c) atomic_cas_ptr(a,b,c) 93 | #define CAS_U8(a,b,c) atomic_cas_8(a,b,c) 94 | #define CAS_U16(a,b,c) atomic_cas_16(a,b,c) 95 | #define CAS_U32(a,b,c) atomic_cas_32(a,b,c) 96 | #define CAS_U64(a,b,c) atomic_cas_64(a,b,c) 97 | //Swap 98 | #define SWAP_PTR(a,b) atomic_swap_ptr(a,b) 99 | #define SWAP_U8(a,b) atomic_swap_8(a,b) 100 | #define SWAP_U16(a,b) atomic_swap_16(a,b) 101 | #define SWAP_U32(a,b) xchg32(a,b) 102 | #define SWAP_U64(a,b) atomic_swap_64(a,b) 103 | //Fetch-and-increment 104 | #define FAI_U8(a) (atomic_inc_8_nv(a)-1) 105 | #define FAI_U16(a) (atomic_inc_16_nv(a)-1) 106 | #define FAI_U32(a) (atomic_inc_32_nv(a)-1) 107 | #define FAI_U64(a) (atomic_inc_64_nv(a)-1) 108 | //Fetch-and-decrement 109 | #define FAD_U8(a) (atomic_dec_8_nv(a,)+1) 110 | #define FAD_U16(a) (atomic_dec_16_nv(a)+1) 111 | #define FAD_U32(a) (atomic_dec_32_nv(a)+1) 112 | #define FAD_U64(a) (atomic_dec_64_nv(a)+1) 113 | //Increment-and-fetch 114 | #define IAF_U8(a) atomic_inc_8_nv(a) 115 | #define IAF_U16(a) atomic_inc_16_nv(a) 116 | #define IAF_U32(a) atomic_inc_32_nv(a) 117 | #define IAF_U64(a) atomic_inc_64_nv(a) 118 | //Decrement-and-fetch 119 | #define DAF_U8(a) atomic_dec_8_nv(a) 120 | #define DAF_U16(a) atomic_dec_16_nv(a) 121 | #define DAF_U32(a) atomic_dec_32_nv(a) 122 | #define DAF_U64(a) atomic_dec_64_nv(a) 123 | //Test-and-set 124 | #define TAS_U8(a) tas_uint8(a) 125 | //Memory barrier 126 | #define MEM_BARRIER asm volatile("membar #LoadLoad | #LoadStore | #StoreLoad | #StoreStore"); 127 | //end of sparc code 128 | #elif defined(__tile__) 129 | /* 130 | * Tilera code 131 | */ 132 | #include 133 | #include 134 | //atomic operations interface 135 | //Compare-and-swap 136 | #define CAS_PTR(a,b,c) arch_atomic_val_compare_and_exchange(a,b,c) 137 | #define CAS_U8(a,b,c) arch_atomic_val_compare_and_exchange(a,b,c) 138 | #define CAS_U16(a,b,c) arch_atomic_val_compare_and_exchange(a,b,c) 139 | #define CAS_U32(a,b,c) arch_atomic_val_compare_and_exchange(a,b,c) 140 | #define CAS_U64(a,b,c) arch_atomic_val_compare_and_exchange(a,b,c) 141 | //Swap 142 | #define SWAP_PTR(a,b) arch_atomic_exchange(a,b) 143 | #define SWAP_U8(a,b) arch_atomic_exchange(a,b) 144 | #define SWAP_U16(a,b) arch_atomic_exchange(a,b) 145 | #define SWAP_U32(a,b) arch_atomic_exchange(a,b) 146 | #define SWAP_U64(a,b) arch_atomic_exchange(a,b) 147 | //Fetch-and-increment 148 | #define FAI_U8(a) arch_atomic_increment(a) 149 | #define FAI_U16(a) arch_atomic_increment(a) 150 | #define FAI_U32(a) arch_atomic_increment(a) 151 | #define FAI_U64(a) arch_atomic_increment(a) 152 | //Fetch-and-decrement 153 | #define FAD_U8(a) arch_atomic_decrement(a) 154 | #define FAD_U16(a) arch_atomic_decrement(a) 155 | #define FAD_U32(a) arch_atomic_decrement(a) 156 | #define FAD_U64(a) arch_atomic_decrement(a) 157 | //Increment-and-fetch 158 | #define IAF_U8(a) (arch_atomic_increment(a)+1) 159 | #define IAF_U16(a) (arch_atomic_increment(a)+1) 160 | #define IAF_U32(a) (arch_atomic_increment(a)+1) 161 | #define IAF_U64(a) (arch_atomic_increment(a)+1) 162 | //Decrement-and-fetch 163 | #define DAF_U8(a) (arch_atomic_decrement(a)-1) 164 | #define DAF_U16(a) (arch_atomic_decrement(a)-1) 165 | #define DAF_U32(a) (arch_atomic_decrement(a)-1) 166 | #define DAF_U64(a) (arch_atomic_decrement(a)-1) 167 | //Test-and-set 168 | #define TAS_U8(a) arch_atomic_val_compare_and_exchange(a,0,0xff) 169 | //Memory barrier 170 | #define MEM_BARRIER arch_atomic_full_barrier() 171 | //Relax CPU 172 | //define PAUSE cycle_relax() 173 | 174 | //end of tilera code 175 | #else 176 | /* 177 | * x86 code 178 | */ 179 | 180 | # include 181 | 182 | //Swap pointers 183 | static inline void* swap_pointer(volatile void* ptr, void *x) { 184 | # ifdef __i386__ 185 | __asm__ __volatile__("xchgl %0,%1" 186 | :"=r" ((unsigned) x) 187 | :"m" (*(volatile unsigned *)ptr), "0" (x) 188 | :"memory"); 189 | 190 | return x; 191 | # elif defined(__x86_64__) 192 | __asm__ __volatile__("xchgq %0,%1" 193 | :"=r" ((unsigned long long) x) 194 | :"m" (*(volatile long long *)ptr), "0" ((unsigned long long) x) 195 | :"memory"); 196 | 197 | return x; 198 | # endif 199 | } 200 | 201 | //Swap uint64_t 202 | static inline uint64_t swap_uint64(volatile uint64_t* target, uint64_t x) { 203 | __asm__ __volatile__("xchgq %0,%1" 204 | :"=r" ((uint64_t) x) 205 | :"m" (*(volatile uint64_t *)target), "0" ((uint64_t) x) 206 | :"memory"); 207 | 208 | return x; 209 | } 210 | 211 | //Swap uint32_t 212 | static inline uint32_t swap_uint32(volatile uint32_t* target, uint32_t x) { 213 | __asm__ __volatile__("xchgl %0,%1" 214 | :"=r" ((uint32_t) x) 215 | :"m" (*(volatile uint32_t *)target), "0" ((uint32_t) x) 216 | :"memory"); 217 | 218 | return x; 219 | } 220 | 221 | //Swap uint16_t 222 | static inline uint16_t swap_uint16(volatile uint16_t* target, uint16_t x) { 223 | __asm__ __volatile__("xchgw %0,%1" 224 | :"=r" ((uint16_t) x) 225 | :"m" (*(volatile uint16_t *)target), "0" ((uint16_t) x) 226 | :"memory"); 227 | 228 | return x; 229 | } 230 | 231 | //Swap uint8_t 232 | static inline uint8_t swap_uint8(volatile uint8_t* target, uint8_t x) { 233 | __asm__ __volatile__("xchgb %0,%1" 234 | :"=r" ((uint8_t) x) 235 | :"m" (*(volatile uint8_t *)target), "0" ((uint8_t) x) 236 | :"memory"); 237 | 238 | return x; 239 | } 240 | 241 | //test-and-set uint8_t 242 | static inline uint8_t tas_uint8(volatile uint8_t *addr) { 243 | uint8_t oldval; 244 | __asm__ __volatile__("xchgb %0,%1" 245 | : "=q"(oldval), "=m"(*addr) 246 | : "0"((unsigned char) 0xff), "m"(*addr) : "memory"); 247 | return (uint8_t) oldval; 248 | } 249 | 250 | //atomic operations interface 251 | //Compare-and-swap 252 | #define CAS_PTR(a,b,c) __sync_val_compare_and_swap(a,b,c) 253 | #define CAS_U8(a,b,c) __sync_val_compare_and_swap(a,b,c) 254 | #define CAS_U16(a,b,c) __sync_val_compare_and_swap(a,b,c) 255 | #define CAS_U32(a,b,c) __sync_val_compare_and_swap(a,b,c) 256 | #define CAS_U64(a,b,c) __sync_val_compare_and_swap(a,b,c) 257 | //Swap 258 | #define SWAP_PTR(a,b) swap_pointer(a,b) 259 | #define SWAP_U8(a,b) swap_uint8(a,b) 260 | #define SWAP_U16(a,b) swap_uint16(a,b) 261 | #define SWAP_U32(a,b) swap_uint32(a,b) 262 | #define SWAP_U64(a,b) swap_uint64(a,b) 263 | //Fetch-and-increment 264 | #define FAI_U8(a) __sync_fetch_and_add(a,1) 265 | #define FAI_U16(a) __sync_fetch_and_add(a,1) 266 | #define FAI_U32(a) __sync_fetch_and_add(a,1) 267 | #define FAI_U64(a) __sync_fetch_and_add(a,1) 268 | //Fetch-and-decrement 269 | #define FAD_U8(a) __sync_fetch_and_sub(a,1) 270 | #define FAD_U16(a) __sync_fetch_and_sub(a,1) 271 | #define FAD_U32(a) __sync_fetch_and_sub(a,1) 272 | #define FAD_U64(a) __sync_fetch_and_sub(a,1) 273 | //Increment-and-fetch 274 | #define IAF_U8(a) __sync_add_and_fetch(a,1) 275 | #define IAF_U16(a) __sync_add_and_fetch(a,1) 276 | #define IAF_U32(a) __sync_add_and_fetch(a,1) 277 | #define IAF_U64(a) __sync_add_and_fetch(a,1) 278 | //Decrement-and-fetch 279 | #define DAF_U8(a) __sync_sub_and_fetch(a,1) 280 | #define DAF_U16(a) __sync_sub_and_fetch(a,1) 281 | #define DAF_U32(a) __sync_sub_and_fetch(a,1) 282 | #define DAF_U64(a) __sync_sub_and_fetch(a,1) 283 | //Test-and-set 284 | #define TAS_U8(a) tas_uint8(a) 285 | //Memory barrier 286 | #define MEM_BARRIER __sync_synchronize() 287 | //Relax CPU 288 | //#define PAUSE _mm_pause() 289 | 290 | /*End of x86 code*/ 291 | #endif 292 | 293 | 294 | #endif 295 | 296 | 297 | 298 | -------------------------------------------------------------------------------- /include/clh.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: clh.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Implementation of a CLH lock 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | #ifndef _CLH_H_ 31 | #define _CLH_H_ 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #ifndef __sparc__ 41 | #include 42 | #endif 43 | #include 44 | #include "utils.h" 45 | #include "atomic_ops.h" 46 | 47 | typedef struct clh_qnode { 48 | volatile uint8_t locked; 49 | #ifdef ADD_PADDING 50 | uint8_t padding[CACHE_LINE_SIZE - 1]; 51 | #endif 52 | } clh_qnode; 53 | 54 | typedef volatile clh_qnode *clh_qnode_ptr; 55 | typedef clh_qnode_ptr clh_lock; 56 | 57 | typedef struct clh_local_params { 58 | clh_qnode* my_qnode; 59 | clh_qnode* my_pred; 60 | } clh_local_params; 61 | 62 | 63 | typedef struct clh_global_params { 64 | clh_lock* the_lock; 65 | #ifdef ADD_PADDING 66 | uint8_t padding[CACHE_LINE_SIZE - 8]; 67 | #endif 68 | } clh_global_params; 69 | 70 | /* 71 | *lock array creation and destruction methods 72 | */ 73 | clh_global_params* init_clh_array_global(uint32_t num_locks); 74 | 75 | clh_local_params* init_clh_array_local(uint32_t thread_num, uint32_t num_locks); 76 | 77 | void end_clh_array_local(clh_local_params* the_params, uint32_t size); 78 | 79 | void end_clh_array_global(clh_global_params* the_locks, uint32_t size); 80 | 81 | /* 82 | *single lock creation and destruction methods 83 | */ 84 | int init_clh_global(clh_global_params* the_lock); 85 | 86 | int init_clh_local(uint32_t thread_num, clh_local_params* local_d); 87 | 88 | void end_clh_local(clh_local_params the_params); 89 | 90 | void end_clh_global(clh_global_params the_lock); 91 | 92 | /* 93 | * Lock manipulation methods 94 | */ 95 | volatile clh_qnode* clh_acquire(clh_lock* the_lock, clh_qnode* my_qnode); 96 | 97 | clh_qnode* clh_release(clh_qnode* my_qnode, clh_qnode* my_pred); 98 | 99 | int clh_trylock(clh_lock * L, clh_qnode_ptr I); 100 | 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /include/gl_lock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: gl_lock.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Implementation of a global read-write lock; 7 | * Not used in any of the tests 8 | * 9 | * The MIT License (MIT) 10 | * 11 | * Copyright (c) 2013 Tudor David 12 | * 13 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 14 | * this software and associated documentation files (the "Software"), to deal in 15 | * the Software without restriction, including without limitation the rights to 16 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 17 | * the Software, and to permit persons to whom the Software is furnished to do so, 18 | * subject to the following conditions: 19 | * 20 | * The above copyright notice and this permission notice shall be included in all 21 | * copies or substantial portions of the Software. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 25 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 26 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 27 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 28 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 | */ 30 | 31 | 32 | 33 | 34 | #ifndef _GLLOCK_H_ 35 | #define _GLLOCK_H_ 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include "utils.h" 45 | #include "atomic_ops.h" 46 | 47 | typedef struct glock{ 48 | volatile unsigned char local_read; 49 | volatile unsigned char local_write; 50 | volatile unsigned char global_read; 51 | volatile unsigned char global_write; 52 | } glock; 53 | 54 | typedef struct glock_2{ 55 | volatile unsigned short local_lock; 56 | volatile unsigned short global_lock; 57 | } glock_2; 58 | 59 | typedef struct global_lock { 60 | union { 61 | volatile unsigned int lock_data; 62 | glock_2 lock_short; 63 | glock lock; 64 | volatile unsigned char padding[CACHE_LINE_SIZE]; 65 | }; 66 | } global_lock; 67 | 68 | 69 | void local_lock_write(global_lock* gl); 70 | 71 | void local_unlock_write(global_lock* gl); 72 | 73 | void local_lock_read(global_lock* gl); 74 | 75 | void local_unlock_read(global_lock* gl); 76 | 77 | void global_acquire_write(global_lock* gl); 78 | 79 | void global_acquire_read(global_lock* gl); 80 | 81 | void global_unlock_write(global_lock* gl); 82 | 83 | void global_unlock_read(global_lock* gl); 84 | 85 | #endif 86 | -------------------------------------------------------------------------------- /include/hclh.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: hclh.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Implementation of a hierarchical CLH lock 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | #ifndef _HCLH_H_ 31 | #define _HCLH_H_ 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #ifndef __sparc__ 41 | #include 42 | #endif 43 | #include 44 | #include "utils.h" 45 | #include "atomic_ops.h" 46 | 47 | typedef struct node_fields { 48 | volatile uint8_t successor_must_wait; 49 | volatile uint8_t tail_when_spliced; 50 | volatile uint8_t cluster_id; 51 | } node_fields; 52 | 53 | typedef struct qnode { 54 | union { 55 | volatile uint32_t data; 56 | node_fields fields; 57 | #ifdef ADD_PADDING 58 | volatile uint8_t padding[CACHE_LINE_SIZE]; 59 | #endif 60 | }; 61 | } qnode; 62 | 63 | typedef volatile qnode *qnode_ptr; 64 | typedef qnode_ptr local_queue; 65 | typedef qnode_ptr global_queue; 66 | 67 | //global parameters needed to oerate with a lock 68 | typedef struct hclh_global_params { 69 | global_queue* shared_queue; 70 | local_queue** local_queues; 71 | volatile uint32_t* init_done; 72 | #ifdef ADD_PADDING 73 | #if CACHE_LINE_SIZE == 16 74 | #else 75 | volatile uint8_t padding[CACHE_LINE_SIZE-20]; 76 | #endif 77 | #endif 78 | 79 | } hclh_global_params; 80 | 81 | //thread local parameters 82 | typedef struct hclh_local_params { 83 | qnode* my_qnode; 84 | qnode* my_pred; 85 | local_queue* my_queue; 86 | } hclh_local_params; 87 | 88 | 89 | 90 | /* 91 | * Methods aiding with array of locks manipulation 92 | */ 93 | 94 | hclh_global_params* init_hclh_array_global(uint32_t num_locks); 95 | 96 | 97 | hclh_local_params* init_hclh_array_local(uint32_t thread_num, uint32_t num_locks, hclh_global_params* the_params); 98 | 99 | 100 | void end_hclh_array_local(hclh_local_params* local_params, uint32_t size); 101 | 102 | 103 | void end_hclh_array_global(hclh_global_params* global_params, uint32_t size); 104 | 105 | /* 106 | *single lock initialization and desctruction 107 | */ 108 | int init_hclh_global(hclh_global_params* the_lock); 109 | 110 | 111 | int init_hclh_local(uint32_t thread_num, hclh_global_params* the_params, hclh_local_params* local_d); 112 | 113 | 114 | void end_hclh_local(hclh_local_params local_params); 115 | 116 | 117 | void end_hclh_global(hclh_global_params global_params); 118 | 119 | /* 120 | * Lock manipulation methods 121 | */ 122 | 123 | volatile qnode * hclh_acquire(local_queue *lq, global_queue *gq, qnode *my_qnode); 124 | 125 | qnode * hclh_release(qnode *my_qnode, qnode * my_pred); 126 | 127 | 128 | int is_free_hclh(local_queue *lq, global_queue *gq, qnode *my_qnode); 129 | 130 | #endif 131 | -------------------------------------------------------------------------------- /include/htlock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: htlock.h 3 | * Author: Vasileios Trigonakis 4 | * 5 | * Description: a numa-aware hierarchical teicket lock 6 | * The htlock contains N local ticket locks (N = number of memory 7 | * nodes) and 1 global ticket lock. A thread always tries to acquire 8 | * the local ticket lock first. If there isn't any (local) available, 9 | * it enqueues for acquiring the global ticket lock and at the same 10 | * time it "gives" NB_TICKETS_LOCAL tickets to the local ticket lock, 11 | * so that if more threads from the same socket try to acquire the lock, 12 | * they will enqueue on the local lock, without even accessing the 13 | * global one. 14 | * 15 | * The MIT License (MIT) 16 | * 17 | * Copyright (c) 2013 Vasileios Trigonakis 18 | * 19 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 20 | * this software and associated documentation files (the "Software"), to deal in 21 | * the Software without restriction, including without limitation the rights to 22 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 23 | * the Software, and to permit persons to whom the Software is furnished to do so, 24 | * subject to the following conditions: 25 | * 26 | * The above copyright notice and this permission notice shall be included in all 27 | * copies or substantial portions of the Software. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 31 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 32 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 33 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 35 | */ 36 | 37 | #ifndef _HTICKET_H_ 38 | #define _HTICKET_H_ 39 | 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #ifndef __sparc__ 49 | # ifndef __tile__ 50 | # include 51 | # include 52 | # endif 53 | #endif 54 | #include 55 | #include 56 | #include "utils.h" 57 | #include "atomic_ops.h" 58 | 59 | #define NB_TICKETS_LOCAL 128 /* max number of local tickets of local tickets 60 | before releasing global*/ 61 | 62 | typedef struct htlock_global 63 | { 64 | volatile uint32_t nxt; 65 | volatile uint32_t cur; 66 | uint8_t padding[CACHE_LINE_SIZE - 8]; 67 | } htlock_global_t; 68 | 69 | typedef struct htlock_local 70 | { 71 | volatile int32_t nxt; 72 | volatile int32_t cur; 73 | uint8_t padding[CACHE_LINE_SIZE - 8]; 74 | } htlock_local_t; 75 | 76 | typedef struct ALIGNED(CACHE_LINE_SIZE) htlock 77 | { 78 | htlock_global_t* global; 79 | htlock_local_t* local[NUMBER_OF_SOCKETS]; 80 | } htlock_t; 81 | 82 | extern int create_htlock(htlock_t* htl); 83 | extern void init_htlock(htlock_t* htl); /* initiliazes an htlock */ 84 | extern void init_thread_htlocks(uint32_t thread_num); 85 | extern htlock_t* init_htlocks(uint32_t num_locks); 86 | extern void free_htlocks(htlock_t* locks); 87 | 88 | 89 | extern uint32_t is_free_hticket(htlock_t* htl); 90 | extern void htlock_lock(htlock_t* l); 91 | extern uint32_t htlock_trylock(htlock_t* l); 92 | 93 | extern void htlock_release(htlock_t* l); 94 | extern inline void htlock_release_try(htlock_t* l); /* trylock rls */ 95 | 96 | static inline void 97 | wait_cycles(uint64_t cycles) 98 | { 99 | if (cycles < 256) 100 | { 101 | cycles /= 6; 102 | while (cycles--) 103 | { 104 | PAUSE; 105 | } 106 | } 107 | else 108 | { 109 | ticks _start_ticks = getticks(); 110 | ticks _end_ticks = _start_ticks + cycles - 130; 111 | while (getticks() < _end_ticks); 112 | } 113 | } 114 | 115 | #endif /* _HTICKET_H_ */ 116 | 117 | 118 | -------------------------------------------------------------------------------- /include/mcs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: mcs.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Implementation of an MCS lock 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | 33 | #ifndef _MCS_H_ 34 | #define _MCS_H_ 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #ifndef __sparc__ 44 | #include 45 | #endif 46 | #include 47 | #include "utils.h" 48 | #include "atomic_ops.h" 49 | 50 | typedef struct mcs_qnode { 51 | volatile uint8_t waiting; 52 | volatile struct mcs_qnode *volatile next; 53 | #ifdef ADD_PADDING 54 | #if CACHE_LINE_SIZE == 16 55 | #else 56 | uint8_t padding[CACHE_LINE_SIZE - 16]; 57 | #endif 58 | #endif 59 | } mcs_qnode; 60 | 61 | typedef volatile mcs_qnode *mcs_qnode_ptr; 62 | typedef mcs_qnode_ptr mcs_lock; //initialized to NULL 63 | 64 | typedef mcs_qnode* mcs_local_params; 65 | 66 | typedef struct mcs_global_params { 67 | mcs_lock* the_lock; 68 | #ifdef ADD_PADDING 69 | uint8_t padding[CACHE_LINE_SIZE - 8]; 70 | #endif 71 | } mcs_global_params; 72 | 73 | 74 | /* 75 | Methods for easy lock array manipulation 76 | */ 77 | 78 | mcs_global_params* init_mcs_array_global(uint32_t num_locks); 79 | 80 | mcs_qnode** init_mcs_array_local(uint32_t thread_num, uint32_t num_locks); 81 | 82 | void end_mcs_array_local(mcs_qnode** the_qnodes, uint32_t size); 83 | 84 | void end_mcs_array_global(mcs_global_params* the_locks, uint32_t size); 85 | /* 86 | single lock manipulation 87 | */ 88 | 89 | int init_mcs_global(mcs_global_params* the_lock); 90 | 91 | int init_mcs_local(uint32_t thread_num, mcs_qnode** the_qnode); 92 | 93 | void end_mcs_local(mcs_qnode* the_qnodes); 94 | 95 | void end_mcs_global(mcs_global_params the_locks); 96 | 97 | /* 98 | * Acquire and release methods 99 | */ 100 | 101 | void mcs_acquire(mcs_lock *the_lock, mcs_qnode_ptr I); 102 | 103 | void mcs_release(mcs_lock *the_lock, mcs_qnode_ptr I); 104 | 105 | int is_free_mcs(mcs_lock *L ); 106 | 107 | int mcs_trylock(mcs_lock *L, mcs_qnode_ptr I); 108 | #endif 109 | -------------------------------------------------------------------------------- /include/platform_defs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: platform_defs.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Platform specific definitions and parameters 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | 33 | #ifndef _PLATFORM_DEFS_H_INCLUDED_ 34 | #define _PLATFORM_DEFS_H_INCLUDED_ 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | /* 40 | * For each machine that is used, one needs to define 41 | * NUMBER_OF_SOCKETS: the number of sockets the machine has 42 | * CORES_PER_SOCKET: the number of cores per socket 43 | * CACHE_LINE_SIZE 44 | * NOP_DURATION: the duration in cycles of a noop instruction (generally 1 cycle on most small machines) 45 | * the_cores - a mapping from the core ids as configured in the OS to physical cores (the OS might not alwas be configured corrrectly) 46 | * get_cluster - a function that given a core id returns the socket number ot belongs to 47 | */ 48 | 49 | 50 | #ifdef DEFAULT 51 | # define NUMBER_OF_SOCKETS 1 52 | # define CORES_PER_SOCKET CORE_NUM 53 | # define CACHE_LINE_SIZE 64 54 | # define NOP_DURATION 2 55 | static uint8_t __attribute__ ((unused)) the_cores[] = { 56 | 0, 1, 2, 3, 4, 5, 6, 7, 57 | 8, 9, 10, 11, 12, 13, 14, 15, 58 | 16, 17, 18, 19, 20, 21, 22, 23, 59 | 24, 25, 26, 27, 28, 29, 30, 31, 60 | 32, 33, 34, 35, 36, 37, 38, 39, 61 | 40, 41, 42, 43, 44, 45, 46, 47 62 | }; 63 | #endif 64 | 65 | #ifdef SPARC 66 | # define NUMBER_OF_SOCKETS 8 67 | # define CORES_PER_SOCKET 8 68 | # define CACHE_LINE_SIZE 64 69 | # define NOP_DURATION 9 70 | 71 | #define ALTERNATE_SOCKETS 72 | #ifdef ALTERNATE_SOCKETS 73 | static uint8_t __attribute__ ((unused)) the_cores[] = { 74 | 0, 8, 16, 24, 32, 40, 48, 56, 75 | 1, 9, 17, 25, 33, 41, 49, 57, 76 | 2, 10, 18, 26, 34, 42, 50, 58, 77 | 3, 11, 19, 27, 35, 43, 51, 59, 78 | 4, 12, 20, 28, 36, 44, 52, 60, 79 | 5, 13, 21, 29, 37, 45, 53, 61, 80 | 6, 14, 22, 30, 38, 46, 54, 62, 81 | 7, 15, 23, 31, 39, 47, 55, 63 82 | }; 83 | 84 | static uint8_t the_sockets[] = { 85 | 0, 0, 0, 0, 0, 0, 0, 0, 86 | 1, 1, 1, 1, 1, 1, 1, 1, 87 | 2, 2, 2, 2, 2, 2, 2, 2, 88 | 3, 3, 3, 3, 3, 3, 3, 3, 89 | 4, 4, 4, 4, 4, 4, 4, 4, 90 | 5, 5, 5, 5, 5, 5, 5, 5, 91 | 6, 6, 6, 6, 6, 6, 6, 6, 92 | 7, 7, 7, 7, 7, 7, 7, 7 93 | }; 94 | 95 | #else 96 | static uint8_t __attribute__ ((unused)) the_cores[] = { 97 | 0, 1, 2, 3, 4, 5, 6, 7, 98 | 8, 9, 10, 11, 12, 13, 14, 15, 99 | 16, 17, 18, 19, 20, 21, 22, 23, 100 | 24, 25, 26, 27, 28, 29, 30, 31, 101 | 32, 33, 34, 35, 36, 37, 38, 39, 102 | 40, 41, 42, 43, 44, 45, 46, 47, 103 | 48, 49, 50, 51, 52, 53, 54, 55, 104 | 56, 57, 58, 59, 60, 61, 62, 63 105 | }; 106 | static uint8_t the_sockets[] = { 107 | 0, 0, 0, 0, 0, 0, 0, 0, 108 | 1, 1, 1, 1, 1, 1, 1, 1, 109 | 2, 2, 2, 2, 2, 2, 2, 2, 110 | 3, 3, 3, 3, 3, 3, 3, 3, 111 | 4, 4, 4, 4, 4, 4, 4, 4, 112 | 5, 5, 5, 5, 5, 5, 5, 5, 113 | 6, 6, 6, 6, 6, 6, 6, 6, 114 | 7, 7, 7, 7, 7, 7, 7, 7 115 | }; 116 | 117 | #endif 118 | #elif defined __tile__ 119 | #define NUMBER_OF_SOCKETS 1 120 | #define CORES_PER_SOCKET 36 121 | #define CACHE_LINE_SIZE 64 122 | # define NOP_DURATION 4 123 | static uint8_t __attribute__ ((unused)) the_cores[] = { 124 | 0, 1, 2, 3, 4, 5, 6, 7, 125 | 8, 9, 10, 11, 12, 13, 14, 15, 126 | 16, 17, 18, 19, 20, 21, 22, 23, 127 | 24, 25, 26, 27, 28, 29, 30, 31, 128 | 32, 33, 34, 35 129 | }; 130 | 131 | #elif defined(OPTERON) 132 | # define NUMBER_OF_SOCKETS 8 133 | # define CORES_PER_SOCKET 6 134 | # define CACHE_LINE_SIZE 64 135 | # define NOP_DURATION 2 136 | static uint8_t __attribute__ ((unused)) the_cores[] = { 137 | 0, 1, 2, 3, 4, 5, 6, 7, 138 | 8, 9, 10, 11, 12, 13, 14, 15, 139 | 16, 17, 18, 19, 20, 21, 22, 23, 140 | 24, 25, 26, 27, 28, 29, 30, 31, 141 | 32, 33, 34, 35, 36, 37, 38, 39, 142 | 40, 41, 42, 43, 44, 45, 46, 47 143 | }; 144 | 145 | #elif defined(XEON) 146 | # define NUMBER_OF_SOCKETS 8 147 | # define CORES_PER_SOCKET 10 148 | # define CACHE_LINE_SIZE 64 149 | # define NOP_DURATION 1 150 | static uint8_t __attribute__ ((unused)) the_cores[] = { 151 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 152 | 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 153 | 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 154 | 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 155 | 0, 41, 42, 43, 44, 45, 46, 47, 48, 49, 156 | 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 157 | 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 158 | 70, 71, 72, 73, 74, 75, 76, 77, 78, 79 159 | }; 160 | static uint8_t the_sockets[] = 161 | { 162 | 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 163 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 164 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 165 | 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 166 | 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 167 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 168 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 169 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 170 | }; 171 | 172 | #endif 173 | 174 | #if defined(OPTERON) 175 | # define PREFETCHW(x) asm volatile("prefetchw %0" :: "m" (*(unsigned long *)x)) 176 | #elif defined(__sparc__) 177 | # define PREFETCHW(x) 178 | #elif defined(XEON) 179 | # define PREFETCHW(x) 180 | #else 181 | # define PREFETCHW(x) 182 | #endif 183 | 184 | static inline int get_cluster(int thread_id) { 185 | #ifdef __solaris__ 186 | if (thread_id>64){ 187 | perror("Thread id too high"); 188 | return 0; 189 | } 190 | return thread_id/CORES_PER_SOCKET; 191 | #elif XEON 192 | if (thread_id>=80){ 193 | perror("Thread id too high"); 194 | return 0; 195 | } 196 | return the_sockets[thread_id]; 197 | #elif defined(__tile__) 198 | return 0; 199 | #else 200 | return thread_id/CORES_PER_SOCKET; 201 | #endif 202 | } 203 | 204 | #ifdef __cplusplus 205 | } 206 | 207 | #endif 208 | 209 | 210 | #endif 211 | -------------------------------------------------------------------------------- /include/rw_ttas.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: rw_ttas.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Implementation of a test-and-test-and-set read-write-lock 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | #ifndef _RWTTAS_H_ 31 | #define _RWTTAS_H_ 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #ifndef __sparc__ 41 | #include 42 | #endif 43 | #include 44 | #include "utils.h" 45 | #include "atomic_ops.h" 46 | 47 | #define MAX_DELAY 1000 48 | 49 | #ifdef __tile__ 50 | #define MAX_RW UINT32_MAX 51 | #define W_MASK 0x100000000 52 | typedef uint32_t rw_data_t; 53 | typedef uint64_t all_data_t; 54 | #else 55 | #define MAX_RW UINT8_MAX 56 | #define W_MASK 0x100 57 | typedef uint8_t rw_data_t; 58 | typedef uint16_t all_data_t; 59 | #endif 60 | 61 | typedef struct rw_ttas_data { 62 | volatile rw_data_t read_lock; 63 | volatile rw_data_t write_lock; 64 | } rw_ttas_data; 65 | 66 | 67 | typedef struct rw_ttas { 68 | union { 69 | rw_ttas_data rw; 70 | volatile all_data_t lock_data; 71 | #ifdef ADD_PADDING 72 | uint8_t padding[CACHE_LINE_SIZE]; 73 | #endif 74 | }; 75 | } rw_ttas; 76 | 77 | rw_ttas* init_rw_ttas_array_global(uint32_t num_locks); 78 | 79 | uint32_t* init_rw_ttas_array_local(uint32_t thread_num, uint32_t size); 80 | 81 | void end_rw_ttas_array_local(uint32_t* limits); 82 | 83 | void end_rw_ttas_array_global(rw_ttas* the_locks); 84 | 85 | int init_rw_ttas_global(rw_ttas* the_lock); 86 | 87 | int init_rw_ttas_local(uint32_t thread_num, uint32_t* limit); 88 | 89 | void end_rw_ttas_local(); 90 | 91 | void end_rw_ttas_global(); 92 | 93 | 94 | void read_acquire(rw_ttas* lock, uint32_t * limit); 95 | 96 | void read_release(rw_ttas * lock); 97 | 98 | void write_acquire(rw_ttas* lock, uint32_t * limit); 99 | 100 | int rw_trylock(rw_ttas* lock, uint32_t* limit); 101 | void write_release(rw_ttas * lock); 102 | 103 | int is_free_rw(rw_ttas* lock); 104 | 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /include/spinlock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: spinlock.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Implementation of a simple test-and-set spinlock 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | 33 | #ifndef _SPINLOCK_H_ 34 | #define _SPINLOCK_H_ 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #ifndef __sparc__ 44 | #include 45 | #endif 46 | #include 47 | #include "atomic_ops.h" 48 | #include "utils.h" 49 | 50 | typedef volatile uint32_t spinlock_index_t; 51 | #ifdef __tile__ 52 | typedef uint32_t spinlock_lock_data_t; 53 | #else 54 | typedef uint8_t spinlock_lock_data_t; 55 | #endif 56 | 57 | typedef struct spinlock_lock_t 58 | { 59 | union 60 | { 61 | spinlock_lock_data_t lock; 62 | #ifdef ADD_PADDING 63 | uint8_t padding[CACHE_LINE_SIZE]; 64 | #else 65 | uint8_t padding; 66 | #endif 67 | }; 68 | } spinlock_lock_t; 69 | 70 | 71 | /* 72 | * Lock manipulation methods 73 | */ 74 | 75 | void spinlock_lock(spinlock_lock_t* the_lock, uint32_t* limits); 76 | 77 | int spinlock_trylock(spinlock_lock_t* the_locks, uint32_t* limits); 78 | 79 | void spinlock_unlock(spinlock_lock_t* the_locks); 80 | 81 | int is_free_spinlock(spinlock_lock_t * the_lock); 82 | 83 | /* 84 | Some methods for easy lock array manipluation 85 | */ 86 | 87 | spinlock_lock_t* init_spinlock_array_global(uint32_t num_locks); 88 | 89 | uint32_t* init_spinlock_array_local(uint32_t thread_num, uint32_t size); 90 | 91 | void end_spinlock_array_local(uint32_t* limits); 92 | 93 | void end_spinlock_array_global(spinlock_lock_t* the_locks); 94 | 95 | /* 96 | * Methods for single lock manipulation 97 | */ 98 | 99 | int init_spinlock_global(spinlock_lock_t* the_lock); 100 | 101 | int init_spinlock_local(uint32_t thread_num, uint32_t* limit); 102 | 103 | void end_spinlock_local(); 104 | 105 | void end_spinlock_global(); 106 | 107 | #endif 108 | 109 | 110 | -------------------------------------------------------------------------------- /include/ticket.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: ticket.h 3 | * Author: Tudor David , Vasileios Trigonakis 4 | * 5 | * Description: 6 | * An implementation of a ticket lock with: 7 | * - proportional back-off optimization 8 | * - pretetchw for write optitization for the AMD Opteron 9 | * Magny-Cours processors 10 | * 11 | * The MIT License (MIT) 12 | * 13 | * Copyright (c) 2013 Tudor David, Vasileios Trigonakis 14 | * 15 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 16 | * this software and associated documentation files (the "Software"), to deal in 17 | * the Software without restriction, including without limitation the rights to 18 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 19 | * the Software, and to permit persons to whom the Software is furnished to do so, 20 | * subject to the following conditions: 21 | * 22 | * The above copyright notice and this permission notice shall be included in all 23 | * copies or substantial portions of the Software. 24 | * 25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 27 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 28 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 29 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 31 | */ 32 | 33 | 34 | #ifndef _TICKET_H_ 35 | #define _TICKET_H_ 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #if defined(PLATFORM_NUMA) 45 | # include 46 | #endif 47 | #include 48 | #include "utils.h" 49 | #include "atomic_ops.h" 50 | 51 | /* setting of the back-off based on the length of the queue */ 52 | #define TICKET_BASE_WAIT 512 53 | #define TICKET_MAX_WAIT 4095 54 | #define TICKET_WAIT_NEXT 128 55 | 56 | #define TICKET_ON_TW0_CLS 0 /* Put the head and the tail on separate 57 | cache lines (O: not, 1: do)*/ 58 | typedef struct ticketlock_t 59 | { 60 | volatile uint32_t head; 61 | #if TICKET_ON_TW0_CLS == 1 62 | uint8_t padding0[CACHE_LINE_SIZE - 4]; 63 | #endif 64 | volatile uint32_t tail; 65 | #ifdef ADD_PADDING 66 | uint8_t padding1[CACHE_LINE_SIZE - 8]; 67 | # if TICKET_ON_TW0_CLS == 1 68 | uint8_t padding2[4]; 69 | # endif 70 | #endif 71 | } ticketlock_t; 72 | 73 | 74 | 75 | int ticket_trylock(ticketlock_t* lock); 76 | void ticket_acquire(ticketlock_t* lock); 77 | void ticket_release(ticketlock_t* lock); 78 | int is_free_ticket(ticketlock_t* t); 79 | 80 | int create_ticketlock(ticketlock_t* the_lock); 81 | ticketlock_t* init_ticketlocks(uint32_t num_locks); 82 | void init_thread_ticketlocks(uint32_t thread_num); 83 | void free_ticketlocks(ticketlock_t* the_locks); 84 | 85 | #if defined(MEASURE_CONTENTION) 86 | extern void ticket_print_contention_stats(void); 87 | double ticket_avg_queue(void); 88 | #endif /* MEASURE_CONTENTION */ 89 | 90 | #endif 91 | 92 | 93 | -------------------------------------------------------------------------------- /include/ttas.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: ttas.h 3 | * Author: Tudor David , Vasileios Trigonakis 4 | * 5 | * Description: 6 | * Implementation of a test-and-test-and-set lock with back-off 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David, Vasileios Trigonakis 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | #ifndef _TTAS_H_ 32 | #define _TTAS_H_ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #ifndef __sparc__ 42 | #include 43 | #endif 44 | #include 45 | #include "atomic_ops.h" 46 | #include "utils.h" 47 | 48 | 49 | #define MIN_DELAY 100 50 | #define MAX_DELAY 1000 51 | 52 | typedef volatile uint32_t ttas_index_t; 53 | #ifdef __tile__ 54 | typedef uint32_t ttas_lock_data_t; 55 | #else 56 | typedef uint8_t ttas_lock_data_t; 57 | #endif 58 | 59 | typedef struct ttas_lock_t { 60 | union { 61 | ttas_lock_data_t lock; 62 | #ifdef ADD_PADDING 63 | uint8_t padding[CACHE_LINE_SIZE]; 64 | #else 65 | uint8_t padding; 66 | #endif 67 | }; 68 | }ttas_lock_t; 69 | 70 | 71 | static inline uint32_t backoff(uint32_t limit) { 72 | uint32_t delay = rand()%limit; 73 | limit = MAX_DELAY > 2*limit ? 2*limit : MAX_DELAY; 74 | cdelay(delay); 75 | return limit; 76 | 77 | } 78 | 79 | /* 80 | * Lock acquire and release methods 81 | */ 82 | 83 | void ttas_lock(ttas_lock_t* the_lock, uint32_t* limit); 84 | 85 | int ttas_trylock(ttas_lock_t* the_lock, uint32_t* limit); 86 | 87 | void ttas_unlock(ttas_lock_t* the_lock); 88 | 89 | int is_free_ttas(ttas_lock_t * the_lock); 90 | /* 91 | Some methods for easy lock array manipluation 92 | */ 93 | 94 | ttas_lock_t* init_ttas_array_global(uint32_t num_locks); 95 | 96 | 97 | uint32_t* init_ttas_array_local(uint32_t thread_num, uint32_t size); 98 | 99 | 100 | void end_ttas_array_local(uint32_t* limits); 101 | 102 | 103 | void end_ttas_array_global(ttas_lock_t* the_locks); 104 | 105 | /* 106 | * Single lock initialization and destruction 107 | */ 108 | 109 | int init_ttas_global(ttas_lock_t* the_lock); 110 | 111 | int init_ttas_local(uint32_t thread_num, uint32_t* limit); 112 | 113 | void end_ttas_local(); 114 | 115 | void end_ttas_global(); 116 | 117 | 118 | #endif 119 | 120 | 121 | -------------------------------------------------------------------------------- /include/utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: utils.h 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Some utility functions 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | #ifndef _UTILS_H_INCLUDED_ 33 | #define _UTILS_H_INCLUDED_ 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #ifdef __sparc__ 45 | # include 46 | # include 47 | # include 48 | #elif defined(__tile__) 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | #include 55 | #else 56 | # include 57 | # include 58 | # include 59 | #endif 60 | #include 61 | 62 | #include "platform_defs.h" 63 | 64 | #ifdef __cplusplus 65 | extern "C" { 66 | #endif 67 | 68 | #define ALIGNED(N) __attribute__ ((aligned (N))) 69 | 70 | #ifdef __sparc__ 71 | # define PAUSE asm volatile("rd %%ccr, %%g0\n\t" \ 72 | ::: "memory") 73 | 74 | #elif defined(__tile__) 75 | #define PAUSE cycle_relax() 76 | #else 77 | #define PAUSE _mm_pause() 78 | #endif 79 | static inline void 80 | pause_rep(uint32_t num_reps) 81 | { 82 | uint32_t i; 83 | for (i = 0; i < num_reps; i++) 84 | { 85 | PAUSE; 86 | /* PAUSE; */ 87 | /* asm volatile ("NOP"); */ 88 | } 89 | } 90 | 91 | static inline void 92 | nop_rep(uint32_t num_reps) 93 | { 94 | uint32_t i; 95 | for (i = 0; i < num_reps; i++) 96 | { 97 | asm volatile ("NOP"); 98 | } 99 | } 100 | 101 | 102 | 103 | 104 | //debugging functions 105 | #ifdef DEBUG 106 | # define DPRINT(args...) fprintf(stderr,args); 107 | # define DDPRINT(fmt, args...) printf("%s:%s:%d: "fmt, __FILE__, __FUNCTION__, __LINE__, args) 108 | #else 109 | # define DPRINT(...) 110 | # define DDPRINT(fmt, ...) 111 | #endif 112 | 113 | 114 | 115 | 116 | typedef uint64_t ticks; 117 | 118 | static inline double wtime(void) 119 | { 120 | struct timeval t; 121 | gettimeofday(&t,NULL); 122 | return (double)t.tv_sec + ((double)t.tv_usec)/1000000.0; 123 | } 124 | 125 | static inline void set_cpu(int cpu) { 126 | #ifdef __sparc__ 127 | processor_bind(P_LWPID,P_MYID, cpu, NULL); 128 | #elif defined(__tile__) 129 | if (cpu>=tmc_cpus_grid_total()) { 130 | perror("Thread id too high"); 131 | } 132 | // cput_set_t cpus; 133 | if (tmc_cpus_set_my_cpu(cpu)<0) { 134 | tmc_task_die("tmc_cpus_set_my_cpu() failed."); 135 | } 136 | #else 137 | cpu_set_t mask; 138 | CPU_ZERO(&mask); 139 | CPU_SET(cpu, &mask); 140 | numa_set_preferred(get_cluster(cpu)); 141 | pthread_t thread = pthread_self(); 142 | if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &mask) != 0) { 143 | fprintf(stderr, "Error setting thread affinity\n"); 144 | } 145 | #endif 146 | } 147 | 148 | #if defined(__i386__) 149 | static inline ticks getticks(void) { 150 | ticks ret; 151 | 152 | __asm__ __volatile__("rdtsc" : "=A" (ret)); 153 | return ret; 154 | } 155 | #elif defined(__x86_64__) 156 | static inline ticks getticks(void) 157 | { 158 | unsigned hi, lo; 159 | __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 160 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 161 | } 162 | #elif defined(__sparc__) 163 | static inline ticks getticks(){ 164 | ticks ret; 165 | __asm__ __volatile__ ("rd %%tick, %0" : "=r" (ret) : "0" (ret)); 166 | return ret; 167 | } 168 | #elif defined(__tile__) 169 | static inline ticks getticks(){ 170 | return get_cycle_count(); 171 | } 172 | #endif 173 | 174 | static inline void cdelay(ticks cycles){ 175 | ticks __ts_end = getticks() + (ticks) cycles; 176 | while (getticks() < __ts_end); 177 | } 178 | 179 | static inline void cpause(ticks cycles){ 180 | #if defined(XEON) 181 | cycles >>= 3; 182 | ticks i; 183 | for (i=0;i> 1; 238 | x |= x >> 2; 239 | x |= x >> 4; 240 | x |= x >> 8; 241 | x |= x >> 16; 242 | return x+1; 243 | } 244 | #define my_random xorshf96 245 | 246 | /* 247 | * Returns a pseudo-random value in [1;range). 248 | * Depending on the symbolic constant RAND_MAX>=32767 defined in stdlib.h, 249 | * the granularity of rand() could be lower-bounded by the 32767^th which might 250 | * be too high for given values of range and initial. 251 | */ 252 | static inline long rand_range(long r) { 253 | int m = RAND_MAX; 254 | long d, v = 0; 255 | 256 | do { 257 | d = (m > r ? r : m); 258 | v += 1 + (long) (d * ((double) rand() / ((double) (m) + 1.0))); 259 | r -= m; 260 | } while (r > 0); 261 | return v; 262 | } 263 | 264 | //fast but weak random number generator for the sparc machine 265 | static inline uint32_t fast_rand() { 266 | return ((getticks()&4294967295)>>4); 267 | } 268 | 269 | 270 | static inline unsigned long* seed_rand() { 271 | unsigned long* seeds; 272 | int num_seeds = CACHE_LINE_SIZE/sizeof(unsigned long); 273 | if (num_seeds<3) num_seeds=3; 274 | seeds = (unsigned long*) memalign(CACHE_LINE_SIZE, num_seeds * sizeof(unsigned long)); 275 | seeds[0] = getticks() % 123456789; 276 | seeds[1] = getticks() % 362436069; 277 | seeds[2] = getticks() % 521288629; 278 | return seeds; 279 | } 280 | 281 | //Marsaglia's xorshf generator 282 | static inline unsigned long xorshf96(unsigned long* x, unsigned long* y, unsigned long* z) { //period 2^96-1 283 | unsigned long t; 284 | (*x) ^= (*x) << 16; 285 | (*x) ^= (*x) >> 5; 286 | (*x) ^= (*x) << 1; 287 | 288 | t = *x; 289 | (*x) = *y; 290 | (*y) = *z; 291 | (*z) = t ^ (*x) ^ (*y); 292 | 293 | return *z; 294 | } 295 | 296 | #ifdef __cplusplus 297 | } 298 | 299 | #endif 300 | 301 | 302 | #endif 303 | -------------------------------------------------------------------------------- /samples/sample_generic.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: sample_generic.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Simple example of how to use the generic lock interface. 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #define NUM_THREADS 4 34 | 35 | #include "lock_if.h" 36 | #include "atomic_ops.h" 37 | 38 | /* global data */ 39 | lock_global_data the_lock; 40 | 41 | void *do_something(void *id) 42 | { 43 | int* my_core = (int*) id; 44 | /* local data */ 45 | lock_local_data my_data; 46 | /*initialize this thread's local data*/ 47 | init_lock_local(*my_core, &the_lock, &my_data); 48 | MEM_BARRIER; 49 | 50 | 51 | /*acquire the lock*/ 52 | acquire_lock(&my_data,&the_lock); 53 | printf("I have the lock\n"); 54 | /*release the lock*/ 55 | release_lock(&my_data,&the_lock); 56 | 57 | 58 | MEM_BARRIER; 59 | /*free internal memory structures which may have been allocated for the local data*/ 60 | free_lock_local(my_data); 61 | 62 | return NULL; 63 | 64 | } 65 | 66 | int main(int argc, char *argv[]) 67 | { 68 | pthread_t threads[NUM_THREADS]; 69 | long t; 70 | 71 | /*initialize the global data*/ 72 | init_lock_global(&the_lock); 73 | int ids[]={0,1,2,3}; 74 | 75 | MEM_BARRIER; 76 | 77 | for(t=0;t 4 | * 5 | * Description: 6 | * Simple sample showing how the interface of a particular lock can be used. 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #define NUM_THREADS 4 34 | 35 | #include "atomic_ops.h" //the memory barriers are defined there 36 | #include "mcs.h" 37 | 38 | /* global data */ 39 | mcs_global_params the_lock; 40 | 41 | void *do_something(void *id) 42 | { 43 | int* my_core = (int*) id; 44 | /* local data */ 45 | mcs_local_params my_data; 46 | /*initialize this thread's local data*/ 47 | init_mcs_local(*my_core, &my_data); 48 | MEM_BARRIER; 49 | 50 | 51 | /*acquire the lock*/ 52 | mcs_acquire(the_lock.the_lock,my_data); 53 | printf("I have the lock\n"); 54 | /*release the lock*/ 55 | mcs_release(the_lock.the_lock,my_data); 56 | 57 | 58 | MEM_BARRIER; 59 | /*free internal memory structures which may have been allocated for the local data*/ 60 | end_mcs_local(my_data); 61 | 62 | return NULL; 63 | 64 | } 65 | 66 | int main(int argc, char *argv[]) 67 | { 68 | pthread_t threads[NUM_THREADS]; 69 | long t; 70 | 71 | /*initialize the global data*/ 72 | init_mcs_global(&the_lock); 73 | int ids[]={0,1,2,3}; 74 | 75 | MEM_BARRIER; 76 | 77 | for(t=0;t> correctness.out 61 | ${prog_prefix}test_correctness -n ${num_cores} -d 1000 >> correctness.out 62 | done 63 | 64 | -------------------------------------------------------------------------------- /scripts/correctness_array.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | case "$1" in 3 | opteron) echo "running tests on opteron" 4 | THE_LOCKS="HCLH TTAS ARRAY MCS TICKET HTICKET MUTEX SPINLOCK CLH" 5 | num_cores=48 6 | platform_def="-DOPTERON" 7 | make="make" 8 | freq=2100000000 9 | platform=opteron 10 | prog_prefix="numactl --physcpubind=0 ../" 11 | ;; 12 | opteron_optimize) echo "running tests on opteron" 13 | THE_LOCKS="HCLH TTAS ARRAY MCS TICKET HTICKET MUTEX SPINLOCK CLH" 14 | num_cores=48 15 | optimize="-DOPTERON_OPTIMIZE" 16 | platform_def="-DOPTERON" 17 | make="make" 18 | freq=2100000000 19 | platform=opteron 20 | prog_prefix="numactl --physcpubind=0 ../" 21 | ;; 22 | xeon) echo "running tests on xeon" 23 | THE_LOCKS="HCLH TTAS ARRAY MCS TICKET HTICKET MUTEX SPINLOCK CLH" 24 | num_cores=80 25 | platform_def="-DXEON" 26 | freq=2130000000 27 | make="make" 28 | platform=xeon 29 | prog_prefix="numactl --physcpubind=1 ../" 30 | ;; 31 | niagara) echo "running tests on niagara" 32 | THE_LOCKS="TTAS ARRAY MCS TICKET MUTEX SPINLOCK CLH" 33 | ALTERNATE=-DALTERNATE_SOCKETS 34 | num_cores=64 35 | platform_def="-DSPARC" 36 | freq=1200000000 37 | make="make" 38 | platform=niagara 39 | prog_prefix="../" 40 | ;; 41 | tilera) echo "running tests on tilera" 42 | THE_LOCKS="TTAS ARRAY MCS TICKET MUTEX SPINLOCK CLH" 43 | num_cores=36 44 | platform_def="-DTILERA" 45 | freq=1200000000 46 | make="make" 47 | platform=tilera 48 | prog_prefix="../run ../" 49 | ;; 50 | *) echo "Program format ./run_all platform, where plafrom in opteron, xeon, niagara, tilera" 51 | exit; 52 | ;; 53 | esac 54 | 55 | rm correctness_array.out 56 | 57 | for prefix in ${THE_LOCKS} 58 | do 59 | cd ..; LOCK_VERSION=-DUSE_${prefix}_LOCKS PRIMITIVE=-DTEST_CAS OPTIMIZE=${optimize} PLATFORM=${platform_def} ${make} clean all; cd scripts; 60 | echo ${prefix} >> correctness_array.out 61 | ${prog_prefix}test_array_alloc -n ${num_cores} -d 1000 >> correctness_array.out 62 | done 63 | 64 | -------------------------------------------------------------------------------- /scripts/correctness_trylock.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | case "$1" in 3 | opteron) echo "running tests on opteron" 4 | THE_LOCKS="TTAS ARRAY MCS TICKET HTICKET MUTEX SPINLOCK" 5 | num_cores=48 6 | platform_def="-DOPTERON" 7 | make="make" 8 | freq=2100000000 9 | platform=opteron 10 | prog_prefix="numactl --physcpubind=0 ../" 11 | ;; 12 | opteron_optimize) echo "running tests on opteron" 13 | THE_LOCKS="TTAS ARRAY MCS TICKET HTICKET MUTEX SPINLOCK" 14 | num_cores=48 15 | optimize="-DOPTERON_OPTIMIZE" 16 | platform_def="-DOPTERON" 17 | make="make" 18 | freq=2100000000 19 | platform=opteron 20 | prog_prefix="numactl --physcpubind=0 ../" 21 | ;; 22 | xeon) echo "running tests on xeon" 23 | THE_LOCKS="TTAS ARRAY MCS TICKET HTICKET MUTEX SPINLOCK" 24 | num_cores=80 25 | platform_def="-DXEON" 26 | freq=2130000000 27 | make="make" 28 | platform=xeon 29 | prog_prefix="numactl --physcpubind=1 ../" 30 | ;; 31 | niagara) echo "running tests on niagara" 32 | THE_LOCKS="TTAS ARRAY MCS TICKET MUTEX SPINLOCK" 33 | ALTERNATE=-DALTERNATE_SOCKETS 34 | num_cores=64 35 | platform_def="-DSPARC" 36 | freq=1200000000 37 | make="make" 38 | platform=niagara 39 | prog_prefix="../" 40 | ;; 41 | tilera) echo "running tests on tilera" 42 | THE_LOCKS="TTAS ARRAY MCS TICKET MUTEX SPINLOCK" 43 | num_cores=36 44 | platform_def="-DTILERA" 45 | freq=1200000000 46 | make="make" 47 | platform=tilera 48 | prog_prefix="../run ../" 49 | ;; 50 | *) echo "Program format ./run_all platform, where plafrom in opteron, xeon, niagara, tilera" 51 | exit; 52 | ;; 53 | esac 54 | 55 | rm correctness_trylock.out 56 | 57 | for prefix in ${THE_LOCKS} 58 | do 59 | cd ..; LOCK_VERSION=-DUSE_${prefix}_LOCKS PRIMITIVE=-DTEST_CAS OPTIMIZE=${optimize} PLATFORM=${platform_def} ${make} clean all; cd scripts; 60 | echo ${prefix} >> correctness_trylock.out 61 | ${prog_prefix}test_trylock -n ${num_cores} -d 1000 >> correctness_trylock.out 62 | done 63 | 64 | -------------------------------------------------------------------------------- /scripts/make_all_versions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | LOCKS="USE_HCLH_LOCKS USE_SPINLOCK_LOCKS USE_TTAS_LOCKS USE_MCS_LOCKS USE_CLH_LOCKS USE_ARRAY_LOCKS USE_RW_LOCKS USE_TICKET_LOCKS USE_MUTEX_LOCKS USE_HTICKET_LOCKS" 4 | 5 | MAKE=""; 6 | UNAME=`uname`; 7 | if [ $UNAME = "Linux" ]; 8 | then 9 | MAKE=make; 10 | # jda() { cd $(pwd | sed "s/\(\/$@\/\).*/\1/g"); } 11 | # jda primitives 12 | else 13 | MAKE=gmake; 14 | fi; 15 | 16 | 17 | 18 | usage() 19 | { 20 | echo "$0 [-v] [-s suffix]"; 21 | echo " -v verbose"; 22 | echo " -s suffix suffix the executable with suffix"; 23 | } 24 | 25 | 26 | USUFFIX=""; 27 | VERBOSE=0; 28 | while getopts "hs:v" OPTION 29 | do 30 | case $OPTION in 31 | h) 32 | usage; 33 | exit 1 34 | ;; 35 | s) 36 | USUFFIX="_$OPTARG" 37 | echo "Using suffix: $USUFFIX" 38 | ;; 39 | v) 40 | VERBOSE=1 41 | ;; 42 | ?) 43 | usage; 44 | exit; 45 | ;; 46 | esac 47 | done 48 | 49 | for lock in $LOCKS 50 | do 51 | echo "Building: $lock"; 52 | touch Makefile; 53 | if [ $VERBOSE -eq 1 ]; then 54 | $MAKE all LOCK_VERSION=-D$lock 55 | else 56 | $MAKE all LOCK_VERSION=-D$lock > /dev/null; 57 | fi 58 | suffix=`echo $lock | sed -e "s/USE_//g" -e "s/_LOCK\?//g" | tr "[:upper:]" "[:lower:]"`; 59 | mv bank bank_$suffix$USUFFIX; 60 | mv bank_one bank_one_$suffix$USUFFIX; 61 | mv bank_simple bank_simple_$suffix$USUFFIX; 62 | mv stress_test stress_test_$suffix$USUFFIX; 63 | mv stress_one stress_one_$suffix$USUFFIX; 64 | mv stress_latency stress_latency_$suffix$USUFFIX; 65 | mv test_correctness test_correctness_$suffix$USUFFIX; 66 | done; 67 | -------------------------------------------------------------------------------- /scripts/run_on_cores.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | if [ $# -lt 4 ]; 5 | then 6 | echo "Usage: ./$@ \"RANGE\" APPLICATION [PARAMETERS]"; 7 | echo " e.g., ./$@ \"1 6 12 18\" stress_test -l1 -a0 -d1000"; 8 | exit; 9 | fi; 10 | 11 | cores="$1"; 12 | shift; 13 | app=$1; 14 | shift; 15 | 16 | for c in $cores; 17 | do 18 | printf "%-4d" $c; 19 | ./$app $@ -n$c 20 | done; 21 | -------------------------------------------------------------------------------- /scripts/run_on_range.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | if [ $# -lt 4 ]; 5 | then 6 | echo "Usage: ./$@ FROM_NUM_CORES STEP_NUM_CORES TO_NUM_CORES APPLICATION [PARAMETERS]"; 7 | exit; 8 | fi; 9 | 10 | lc=$1; 11 | shift; 12 | step=$1; 13 | shift; 14 | hc=$1; 15 | shift; 16 | app=$1; 17 | shift; 18 | 19 | for c in $(seq $lc $step $hc); 20 | do 21 | printf "%-4d" $c; 22 | ./$app $@ -n$c 23 | done; 24 | -------------------------------------------------------------------------------- /src/alock.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: alock.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Array lock implementation 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | 33 | #include "alock.h" 34 | 35 | int is_free_alock(lock_shared_t* the_lock) { 36 | if ((the_lock->flags[(the_lock->tail) % the_lock->size].flag) == (uint32_t)1) return 1; 37 | return 0; 38 | } 39 | 40 | int alock_trylock(array_lock_t* local_lock) { 41 | lock_shared_t *lock = local_lock->shared_data; 42 | uint32_t tail = lock->tail; 43 | if (lock->flags[tail % lock->size].flag==1) { 44 | if (CAS_U32(&(lock->tail), tail, tail+1)==tail) { 45 | local_lock->my_index = tail % lock->size; 46 | return 0; 47 | } 48 | } 49 | return 1; 50 | } 51 | 52 | void alock_lock(array_lock_t* local_lock) 53 | { 54 | #if defined(OPTERON_OPTIMIZE) 55 | PREFETCHW(local_lock); 56 | PREFETCHW(local_lock->shared_data); 57 | #endif /* OPTERON_OPTIMIZE */ 58 | lock_shared_t *lock = local_lock->shared_data; 59 | #ifdef __tile__ 60 | MEM_BARRIER; 61 | #endif 62 | uint32_t slot = FAI_U32(&(lock->tail)) % lock->size; 63 | local_lock->my_index = slot; 64 | 65 | volatile uint16_t* flag = &lock->flags[slot].flag; 66 | #ifdef __tile__ 67 | MEM_BARRIER; 68 | #endif 69 | #if defined(OPTERON_OPTIMIZE) 70 | PREFETCHW(flag); 71 | #endif /* OPTERON_OPTIMIZE */ 72 | while (*flag == 0) 73 | { 74 | PAUSE; 75 | #if defined(OPTERON_OPTIMIZE) 76 | pause_rep(23); 77 | PREFETCHW(flag); 78 | #endif /* OPTERON_OPTIMIZE */ 79 | } 80 | } 81 | 82 | void alock_unlock(array_lock_t* local_lock) 83 | { 84 | #if defined(OPTERON_OPTIMIZE) 85 | PREFETCHW(local_lock); 86 | PREFETCHW(local_lock->shared_data); 87 | #endif /* OPTERON_OPTIMIZE */ 88 | lock_shared_t *lock = local_lock->shared_data; 89 | uint32_t slot = local_lock->my_index; 90 | lock->flags[slot].flag = 0; 91 | #ifdef __tile__ 92 | MEM_BARRIER; 93 | #endif 94 | COMPILER_BARRIER; 95 | lock->flags[(slot + 1)%lock->size].flag = 1; 96 | } 97 | 98 | /* 99 | * Methods for array of locks manipulation 100 | */ 101 | lock_shared_t* init_alock_array_global(uint32_t num_locks, uint32_t num_processes) { 102 | uint32_t i; 103 | lock_shared_t* the_locks = (lock_shared_t*) calloc(num_locks, sizeof(lock_shared_t)); 104 | for (i = 0; i < num_locks; i++) { 105 | // the_locks[i]=(lock_shared_t*)malloc(sizeof(lock_shared_t)); 106 | // bzero((void*)the_locks[i],sizeof(lock_shared_t)); 107 | the_locks[i].size = num_processes; 108 | the_locks[i].flags[0].flag=1; 109 | the_locks[i].tail=0; 110 | } 111 | MEM_BARRIER; 112 | return the_locks; 113 | } 114 | 115 | array_lock_t* init_alock_array_local(uint32_t thread_num, uint32_t num_locks, lock_shared_t* the_locks) { 116 | //assign the thread to the correct core 117 | set_cpu(thread_num); 118 | 119 | uint32_t i; 120 | array_lock_t* local_locks = (array_lock_t*) malloc(num_locks * sizeof(array_lock_t)); 121 | for (i = 0; i < num_locks; i++) { 122 | // local_locks[i]=(array_lock_t*) malloc(sizeof(array_lock_t)); 123 | local_locks[i].my_index=0; 124 | local_locks[i].shared_data = &(the_locks[i]); 125 | } 126 | MEM_BARRIER; 127 | return local_locks; 128 | } 129 | 130 | int init_alock_global(uint32_t num_processes, lock_shared_t* the_lock) { 131 | bzero((void*)the_lock,sizeof(lock_shared_t)); 132 | the_lock->size = num_processes; 133 | the_lock->flags[0].flag=1; 134 | the_lock->tail=0; 135 | MEM_BARRIER; 136 | return 0; 137 | } 138 | 139 | int init_alock_local(uint32_t thread_num, lock_shared_t* the_lock, array_lock_t* local_lock) { 140 | //assign the thread to the correct core 141 | set_cpu(thread_num); 142 | 143 | local_lock->my_index=0; 144 | local_lock->shared_data = the_lock; 145 | MEM_BARRIER; 146 | return 0; 147 | } 148 | 149 | void end_alock_array_local(array_lock_t* local_locks, uint32_t size) { 150 | //uint32_t i; 151 | //for (i = 0; i < size; i++) { 152 | // free(local_locks[i]); 153 | //} 154 | free(local_locks); 155 | } 156 | 157 | void end_alock_array_global(lock_shared_t* the_locks, uint32_t size) { 158 | //uint32_t i; 159 | //for (i = 0; i < size; i++) { 160 | // free(the_locks[i]); 161 | //} 162 | free(the_locks); 163 | } 164 | 165 | void end_alock_local(array_lock_t local_lock) { 166 | //free(local_lock); 167 | } 168 | 169 | void end_alock_global(lock_shared_t the_lock) { 170 | //free(the_lock); 171 | } 172 | 173 | -------------------------------------------------------------------------------- /src/clh.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: clh.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Clh lock implementation 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | #include "clh.h" 33 | 34 | int clh_trylock(clh_lock * L, clh_qnode_ptr I) { 35 | return 1; 36 | } 37 | 38 | 39 | volatile clh_qnode* clh_acquire(clh_lock *L, clh_qnode* I ) 40 | { 41 | I->locked=1; 42 | #ifndef __tile__ 43 | clh_qnode_ptr pred = (clh_qnode*) SWAP_PTR((volatile void*) (L), (void*) I); 44 | #else 45 | MEM_BARRIER; 46 | clh_qnode_ptr pred = (clh_qnode*) SWAP_PTR( L, I); 47 | #endif 48 | if (pred == NULL) /* lock was free */ 49 | return NULL; 50 | #if defined(OPTERON_OPTIMIZE) 51 | PREFETCHW(pred); 52 | #endif /* OPTERON_OPTIMIZE */ 53 | while (pred->locked != 0) 54 | { 55 | PAUSE; 56 | #if defined(OPTERON_OPTIMIZE) 57 | pause_rep(23); 58 | PREFETCHW(pred); 59 | #endif /* OPTERON_OPTIMIZE */ 60 | } 61 | 62 | return pred; 63 | } 64 | 65 | clh_qnode* clh_release(clh_qnode *my_qnode, clh_qnode * my_pred) { 66 | COMPILER_BARRIER; 67 | #ifdef __tile__ 68 | MEM_BARRIER; 69 | #endif 70 | my_qnode->locked=0; 71 | return my_pred; 72 | } 73 | 74 | clh_global_params* init_clh_array_global(uint32_t num_locks) { 75 | clh_global_params* the_params; 76 | the_params = (clh_global_params*)malloc(num_locks * sizeof(clh_global_params)); 77 | uint32_t i; 78 | for (i=0;ilocked=0; 82 | *(the_params[i].the_lock) = a_node; 83 | } 84 | MEM_BARRIER; 85 | return the_params; 86 | } 87 | 88 | clh_local_params* init_clh_array_local(uint32_t thread_num, uint32_t num_locks) { 89 | set_cpu(thread_num); 90 | 91 | //init its qnodes 92 | uint32_t i; 93 | clh_local_params* local_params = (clh_local_params*)malloc(num_locks * sizeof(clh_local_params)); 94 | for (i=0;ilocked=0; 97 | local_params[i].my_pred = NULL; 98 | } 99 | MEM_BARRIER; 100 | return local_params; 101 | 102 | } 103 | 104 | void end_clh_array_local(clh_local_params* the_params, uint32_t size){ 105 | free(the_params); 106 | } 107 | 108 | void end_clh_array_global(clh_global_params* the_locks, uint32_t size) { 109 | uint32_t i; 110 | for (i = 0; i < size; i++) { 111 | free(the_locks[i].the_lock); 112 | } 113 | free(the_locks); 114 | } 115 | 116 | int init_clh_global(clh_global_params* the_params) { 117 | the_params->the_lock=(clh_lock*)malloc(sizeof(clh_lock)); 118 | clh_qnode * a_node = (clh_qnode *) malloc(sizeof(clh_qnode)); 119 | a_node->locked=0; 120 | *(the_params->the_lock) = a_node; 121 | MEM_BARRIER; 122 | return 0; 123 | } 124 | 125 | int init_clh_local(uint32_t thread_num, clh_local_params* local_params) { 126 | set_cpu(thread_num); 127 | 128 | //init its qnodes 129 | local_params->my_qnode = (clh_qnode*) malloc(sizeof(clh_qnode)); 130 | local_params->my_qnode->locked=0; 131 | local_params->my_pred = NULL; 132 | MEM_BARRIER; 133 | return 0; 134 | 135 | } 136 | 137 | void end_clh_local(clh_local_params the_params){ 138 | //empty method 139 | } 140 | 141 | void end_clh_global(clh_global_params the_lock) { 142 | free(the_lock.the_lock); 143 | } 144 | 145 | -------------------------------------------------------------------------------- /src/gl_lock.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: gl_lock.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Global read-write lock implementation 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | #include "gl_lock.h" 33 | 34 | void local_lock_write(global_lock* gl) { 35 | while(1) { 36 | while (gl->lock_short.global_lock != 0) {} 37 | unsigned int aux = (unsigned int) gl->lock_short.local_lock; 38 | if (__sync_val_compare_and_swap(&gl->lock_data, aux,aux+0x100) == (aux)) { 39 | return; 40 | } 41 | } 42 | } 43 | 44 | void local_unlock_write(global_lock* gl){ 45 | __sync_sub_and_fetch(&(gl->lock.local_write),1); 46 | } 47 | 48 | void local_lock_read(global_lock* gl) { 49 | while(1) { 50 | while (gl->lock.global_write != 0) {} 51 | unsigned int aux = (unsigned int) gl->lock_data & 0x00ffffff; 52 | if (__sync_val_compare_and_swap(&gl->lock_data, aux,aux+1) == (aux)) { 53 | return; 54 | } 55 | } 56 | } 57 | 58 | void local_unlock_read(global_lock* gl){ 59 | __sync_sub_and_fetch(&(gl->lock.local_read),1); 60 | } 61 | 62 | 63 | void global_acquire_write(global_lock* gl) { 64 | while(1) { 65 | while (gl->lock_data != 0) {} 66 | unsigned short aux = (unsigned short) 0x1000000; 67 | if (__sync_val_compare_and_swap(&gl->lock_data, 0, aux) == 0) { 68 | return; 69 | } 70 | } 71 | } 72 | 73 | 74 | void global_unlock_write(global_lock* gl) { 75 | COMPILER_BARRIER; 76 | #ifdef __tile__ 77 | MEM_BARRIER; 78 | #endif 79 | gl->lock_data = 0; 80 | } 81 | 82 | void global_acquire_read(global_lock* gl) { 83 | while(1) { 84 | while ((gl->lock.global_write != 0) || (gl->lock.local_write != 0)) {} 85 | unsigned int aux = (unsigned int) gl->lock_data & 0x00ff00ff; 86 | if (__sync_val_compare_and_swap(&gl->lock_data, aux,aux+0x10000) == (aux)) { 87 | return; 88 | } 89 | } 90 | } 91 | 92 | void global_unlock_read(global_lock* gl){ 93 | __sync_sub_and_fetch(&(gl->lock.global_read),1); 94 | } 95 | 96 | 97 | -------------------------------------------------------------------------------- /src/hclh.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: hclh.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Hierarchical CLH lock implementation 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | #include "hclh.h" 31 | 32 | __thread uint32_t hclh_node_mine; 33 | 34 | uint16_t wait_for_grant_or_cluster_master(volatile qnode *q, uint8_t my_cluster) { 35 | qnode aux; 36 | while(1) 37 | { 38 | aux.data=q->data; 39 | if ((aux.fields.cluster_id==my_cluster) && 40 | (aux.fields.tail_when_spliced==0) && 41 | (aux.fields.successor_must_wait==0)) 42 | return 1; 43 | if (aux.fields.tail_when_spliced==1) 44 | return 0; 45 | if (aux.fields.cluster_id!=my_cluster) 46 | return 0; 47 | PAUSE; 48 | } 49 | } 50 | 51 | volatile qnode * hclh_acquire(local_queue *lq, global_queue *gq, qnode *my_qnode) { 52 | volatile qnode* my_pred; 53 | do 54 | { 55 | #if defined(OPTERON_OPTIMIZE) 56 | PREFETCHW(lq); 57 | #endif /* OPTERON_OPTIMIZE */ 58 | my_pred = *lq; 59 | } while (CAS_PTR(lq, my_pred, my_qnode)!=my_pred); 60 | 61 | if (my_pred != NULL) 62 | { 63 | uint16_t i_own_lock = wait_for_grant_or_cluster_master(my_pred, my_qnode->fields.cluster_id); 64 | if (i_own_lock) 65 | { 66 | return my_pred; 67 | } 68 | } 69 | PAUSE; PAUSE; 70 | 71 | volatile qnode * local_tail; 72 | do 73 | { 74 | #if defined(OPTERON_OPTIMIZE) 75 | PREFETCHW(gq); 76 | PREFETCHW(lq); 77 | #endif /* OPTERON_OPTIMIZE */ 78 | my_pred = *gq; 79 | local_tail = *lq; 80 | PAUSE; 81 | } while(CAS_PTR(gq, my_pred, local_tail)!=my_pred); 82 | 83 | local_tail->fields.tail_when_spliced = 1; 84 | #if defined(OPTERON_OPTIMIZE) 85 | PREFETCHW(my_pred); 86 | #endif /* OPTERON_OPTIMIZE */ 87 | while (my_pred->fields.successor_must_wait) { 88 | PAUSE; 89 | #if defined(OPTERON_OPTIMIZE) 90 | pause_rep(23); 91 | PREFETCHW(my_pred); 92 | #endif /* OPTERON_OPTIMIZE */ 93 | } 94 | return my_pred; 95 | } 96 | 97 | int is_free_hclh(local_queue *lq, global_queue *gq, qnode *my_qnode) { 98 | if ((*lq)!=NULL) { 99 | qnode aux; 100 | aux.data=(*lq)->data; 101 | if ((aux.fields.cluster_id==my_qnode->fields.cluster_id) && 102 | (aux.fields.tail_when_spliced==0) && 103 | (aux.fields.successor_must_wait==0)) 104 | return 1; 105 | } 106 | if ((*gq)->fields.successor_must_wait==0) return 1; 107 | return 0; 108 | } 109 | 110 | qnode* hclh_release(qnode *my_qnode, qnode * my_pred) { 111 | my_qnode->fields.successor_must_wait = 0; 112 | qnode* pr = my_pred; 113 | qnode new_node; 114 | new_node.data=0; 115 | new_node.fields.cluster_id=hclh_node_mine; 116 | new_node.fields.successor_must_wait = 1; 117 | new_node.fields.tail_when_spliced=0; 118 | 119 | #if defined(OPTERON_OPTIMIZE) 120 | PREFETCHW(pr); 121 | #endif /* OPTERON_OPTIMIZE */ 122 | uint32_t old_data = pr->data; 123 | while (CAS_U32(&pr->data,old_data,new_node.data)!=old_data) 124 | { 125 | old_data=pr->data; 126 | PAUSE; 127 | #if defined(OPTERON_OPTIMIZE) 128 | PREFETCHW(pr); 129 | #endif /* OPTERON_OPTIMIZE */ 130 | } 131 | my_qnode=pr; 132 | return my_qnode; 133 | } 134 | 135 | /* 136 | * Methods aiding with array of locks manipulation 137 | */ 138 | 139 | #define INIT_VAL 123 140 | 141 | hclh_global_params* init_hclh_array_global(uint32_t num_locks) { 142 | hclh_global_params* the_params; 143 | the_params = (hclh_global_params*)malloc(num_locks * sizeof(hclh_global_params)); 144 | uint32_t i; 145 | for (i=0;idata=0; 152 | a_node->fields.cluster_id = NUMBER_OF_SOCKETS+1; 153 | *(the_params[i].shared_queue) = a_node; 154 | } 155 | MEM_BARRIER; 156 | return the_params; 157 | } 158 | 159 | 160 | hclh_local_params* init_hclh_array_local(uint32_t phys_core, uint32_t num_locks, hclh_global_params* the_params) { 161 | //assign the thread to the correct core 162 | set_cpu(phys_core); 163 | hclh_local_params* local_params; 164 | local_params = (hclh_local_params*)malloc(num_locks * sizeof(hclh_local_params)); 165 | uint32_t i; 166 | #ifdef XEON 167 | MEM_BARRIER; 168 | uint32_t real_core_num = 0; 169 | for (i = 0; i < (NUMBER_OF_SOCKETS * CORES_PER_SOCKET); i++) { 170 | if (the_cores[i]==phys_core) { 171 | real_core_num = i; 172 | break; 173 | } 174 | } 175 | phys_core=real_core_num; 176 | MEM_BARRIER; 177 | #endif 178 | hclh_node_mine = phys_core/CORES_PER_SOCKET; 179 | for (i = 0; i < num_locks; i++) { 180 | //local_params[i]=(hclh_local_params*) malloc(sizeof(hclh_local_params)); 181 | local_params[i].my_qnode = (qnode*) malloc(sizeof(qnode)); 182 | local_params[i].my_qnode->data = 0; 183 | local_params[i].my_qnode->fields.cluster_id = phys_core/CORES_PER_SOCKET; 184 | local_params[i].my_qnode->fields.successor_must_wait=1; 185 | local_params[i].my_pred = NULL; 186 | if (phys_core%CORES_PER_SOCKET==0) { 187 | the_params[i].local_queues[phys_core/CORES_PER_SOCKET] = (local_queue*)malloc(sizeof(local_queue)); 188 | *(the_params[i].local_queues[phys_core/CORES_PER_SOCKET]) = NULL; 189 | #ifdef __tile__ 190 | MEM_BARRIER; 191 | #endif 192 | the_params[i].init_done[phys_core/CORES_PER_SOCKET]=INIT_VAL; 193 | } 194 | while(the_params[i].init_done[phys_core/CORES_PER_SOCKET]!=INIT_VAL) {} 195 | local_params[i].my_queue = the_params[i].local_queues[phys_core/CORES_PER_SOCKET]; 196 | } 197 | MEM_BARRIER; 198 | return local_params; 199 | } 200 | 201 | void end_hclh_array_local(hclh_local_params* local_params, uint32_t size) { 202 | uint32_t i; 203 | for (i = 0; i < size; i++) { 204 | free(local_params[i].my_qnode); 205 | } 206 | free(local_params); 207 | } 208 | 209 | void end_hclh_array_global(hclh_global_params* global_params, uint32_t size) { 210 | uint32_t i; 211 | for (i = 0; i < size; i++) { 212 | free(global_params[i].shared_queue); 213 | free(global_params[i].local_queues); 214 | } 215 | free(global_params); 216 | } 217 | 218 | int init_hclh_global(hclh_global_params* the_params) { 219 | // hclh_global_params* the_params; 220 | // the_params=(hclh_global_params*)malloc(sizeof(hclh_global_params)); 221 | the_params->local_queues = (local_queue**)malloc(NUMBER_OF_SOCKETS*sizeof(local_queue*)); 222 | the_params->init_done=(uint32_t*)malloc(NUMBER_OF_SOCKETS * sizeof(uint32_t)); 223 | the_params->shared_queue = (global_queue*)malloc(sizeof(global_queue)); 224 | qnode * a_node = (qnode *) malloc(sizeof(qnode)); 225 | a_node->data=0; 226 | a_node->fields.cluster_id = NUMBER_OF_SOCKETS+1; 227 | *(the_params->shared_queue) = a_node; 228 | MEM_BARRIER; 229 | return 0; 230 | } 231 | 232 | 233 | int init_hclh_local(uint32_t phys_core, hclh_global_params* the_params, hclh_local_params* local_params) { 234 | //assign the thread to the correct core 235 | set_cpu(phys_core); 236 | #ifdef XEON 237 | MEM_BARRIER; 238 | uint32_t real_core_num = 0; 239 | int i; 240 | for (i = 0; i < (NUMBER_OF_SOCKETS * CORES_PER_SOCKET); i++) { 241 | if (the_cores[i]==phys_core) { 242 | real_core_num = i; 243 | break; 244 | } 245 | } 246 | phys_core=real_core_num; 247 | MEM_BARRIER; 248 | #endif 249 | 250 | hclh_node_mine = phys_core/CORES_PER_SOCKET; 251 | // local_params=(hclh_local_params*) malloc(sizeof(hclh_local_params)); 252 | local_params->my_qnode = (qnode*) malloc(sizeof(qnode)); 253 | local_params->my_qnode->data = 0; 254 | local_params->my_qnode->fields.cluster_id = phys_core/CORES_PER_SOCKET; 255 | local_params->my_qnode->fields.successor_must_wait=1; 256 | local_params->my_pred = NULL; 257 | if (phys_core%CORES_PER_SOCKET==0) { 258 | the_params->local_queues[phys_core/CORES_PER_SOCKET] = (local_queue*)malloc(sizeof(local_queue)); 259 | *(the_params->local_queues[phys_core/CORES_PER_SOCKET]) = NULL; 260 | #ifdef __tile__ 261 | MEM_BARRIER; 262 | #endif 263 | the_params->init_done[phys_core/CORES_PER_SOCKET]=INIT_VAL; 264 | } 265 | while(the_params->init_done[phys_core/CORES_PER_SOCKET]!=INIT_VAL) {} 266 | local_params->my_queue = the_params->local_queues[phys_core/CORES_PER_SOCKET]; 267 | MEM_BARRIER; 268 | return 0; 269 | } 270 | 271 | void end_hclh_local(hclh_local_params local_params) { 272 | free(local_params.my_qnode); 273 | } 274 | 275 | void end_hclh_global(hclh_global_params global_params) { 276 | free(global_params.shared_queue); 277 | int i; 278 | for (i=0;i 4 | * 5 | * Description: an numa-aware hierarchical ticket lock 6 | * The htlock contains N local ticket locks (N = number of memory 7 | * nodes) and 1 global ticket lock. A thread always tries to acquire 8 | * the local ticket lock first. If there isn't any (local) available, 9 | * it enqueues for acquiring the global ticket lock and at the same 10 | * time it "gives" NB_TICKETS_LOCAL tickets to the local ticket lock, 11 | * so that if more threads from the same socket try to acquire the lock, 12 | * they will enqueue on the local lock, without even accessing the 13 | * global one. 14 | * 15 | * The MIT License (MIT) 16 | * 17 | * Copyright (c) 2013 Vasileios Trigonakis 18 | * 19 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 20 | * this software and associated documentation files (the "Software"), to deal in 21 | * the Software without restriction, including without limitation the rights to 22 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 23 | * the Software, and to permit persons to whom the Software is furnished to do so, 24 | * subject to the following conditions: 25 | * 26 | * The above copyright notice and this permission notice shall be included in all 27 | * copies or substantial portions of the Software. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 31 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 32 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 33 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 35 | */ 36 | 37 | #include "htlock.h" 38 | 39 | __thread uint32_t htlock_node_mine, htlock_id_mine; 40 | 41 | int create_htlock(htlock_t* htl) 42 | { 43 | // htlock_t* htl; 44 | // htl = memalign(CACHE_LINE_SIZE, sizeof(htlock_t)); 45 | // if (htl == NULL) 46 | // { 47 | // fprintf(stderr,"Error @ memalign : create htlock\n"); 48 | // } 49 | assert(htl != NULL); 50 | 51 | htl->global = memalign(CACHE_LINE_SIZE, sizeof(htlock_global_t)); 52 | if (htl == NULL) 53 | { 54 | fprintf(stderr,"Error @ memalign : create htlock\n"); 55 | } 56 | assert(htl->global != NULL); 57 | 58 | uint32_t s; 59 | for (s = 0; s < NUMBER_OF_SOCKETS; s++) 60 | { 61 | #if defined(PLATFORM_NUMA) 62 | numa_set_preferred(s); 63 | htl->local[s] = (htlock_local_t*) numa_alloc_onnode(sizeof(htlock_local_t), s); 64 | #else 65 | htl->local[s] = (htlock_local_t*) malloc(sizeof(htlock_local_t)); 66 | #endif 67 | htl->local[s]->cur = NB_TICKETS_LOCAL; 68 | htl->local[s]->nxt = 0; 69 | assert(htl->local != NULL); 70 | } 71 | 72 | #if defined(PLATFORM_NUMA) 73 | numa_set_preferred(htlock_node_mine); 74 | #endif 75 | 76 | htl->global->cur = 0; 77 | htl->global->nxt = 0; 78 | 79 | MEM_BARRIER; 80 | return 0; 81 | } 82 | 83 | 84 | void 85 | init_htlock(htlock_t* htl) 86 | { 87 | assert(htl != NULL); 88 | htl->global->cur = 0; 89 | htl->global->nxt = 0; 90 | uint32_t n; 91 | for (n = 0; n < NUMBER_OF_SOCKETS; n++) 92 | { 93 | htl->local[n]->cur = NB_TICKETS_LOCAL; 94 | htl->local[n]->nxt = 0; 95 | } 96 | MEM_BARRIER; 97 | } 98 | 99 | void 100 | init_thread_htlocks(uint32_t phys_core) 101 | { 102 | set_cpu(phys_core); 103 | 104 | #if defined(XEON) 105 | uint32_t real_core_num = 0; 106 | uint32_t i; 107 | for (i = 0; i < (NUMBER_OF_SOCKETS * CORES_PER_SOCKET); i++) 108 | { 109 | if (the_cores[i]==phys_core) 110 | { 111 | real_core_num = i; 112 | break; 113 | } 114 | } 115 | htlock_id_mine = real_core_num; 116 | htlock_node_mine = get_cluster(phys_core); 117 | #else 118 | htlock_id_mine = phys_core; 119 | htlock_node_mine = get_cluster(phys_core); 120 | #endif 121 | /* printf("core %02d / node %3d\n", phys_core, htlock_node_mine); */ 122 | MEM_BARRIER; 123 | } 124 | 125 | uint32_t 126 | is_free_hticket(htlock_t* htl) 127 | { 128 | htlock_global_t* glb = htl->global; 129 | #if defined(OPTERON_OPTIMIZE) 130 | PREFETCHW(glb); 131 | #endif 132 | if (glb->cur == glb->nxt) 133 | { 134 | return 1; 135 | } 136 | return 0; 137 | } 138 | 139 | static htlock_t* 140 | create_htlock_no_alloc(htlock_t* htl, htlock_local_t* locals[NUMBER_OF_SOCKETS], size_t offset) 141 | { 142 | htl->global = memalign(CACHE_LINE_SIZE, sizeof(htlock_global_t)); 143 | if (htl == NULL) 144 | { 145 | fprintf(stderr,"Error @ memalign : create htlock\n"); 146 | } 147 | assert(htl->global != NULL); 148 | 149 | uint32_t s; 150 | for (s = 0; s < NUMBER_OF_SOCKETS; s++) 151 | { 152 | htl->local[s] = locals[s] + offset; 153 | } 154 | 155 | htl->global->cur = 0; 156 | htl->global->nxt = 0; 157 | uint32_t n; 158 | for (n = 0; n < NUMBER_OF_SOCKETS; n++) 159 | { 160 | htl->local[n]->cur = NB_TICKETS_LOCAL; 161 | htl->local[n]->nxt = 0; 162 | } 163 | 164 | MEM_BARRIER; 165 | return htl; 166 | } 167 | 168 | htlock_t* 169 | init_htlocks(uint32_t num_locks) 170 | { 171 | htlock_t* htls; 172 | htls = memalign(CACHE_LINE_SIZE, num_locks * sizeof(htlock_t)); 173 | if (htls == NULL) 174 | { 175 | fprintf(stderr, "Error @ memalign : init_htlocks\n"); 176 | } 177 | assert(htls != NULL); 178 | 179 | 180 | size_t alloc_locks = (num_locks < 64) ? 64 : num_locks; 181 | 182 | htlock_local_t* locals[NUMBER_OF_SOCKETS]; 183 | uint32_t n; 184 | for (n = 0; n < NUMBER_OF_SOCKETS; n++) 185 | { 186 | #if defined(PLATFORM_NUMA) 187 | numa_set_preferred(n); 188 | #endif 189 | locals[n] = (htlock_local_t*) calloc(alloc_locks, sizeof(htlock_local_t)); 190 | *((volatile int*) locals[n]) = 33; 191 | assert(locals[n] != NULL); 192 | } 193 | 194 | #if defined(OPTERON) || defined(XEON) 195 | numa_set_preferred(htlock_node_mine); 196 | #endif 197 | 198 | uint32_t i; 199 | for (i = 0; i < num_locks; i++) 200 | { 201 | create_htlock_no_alloc(htls + i, locals, i); 202 | } 203 | 204 | MEM_BARRIER; 205 | return htls; 206 | } 207 | 208 | 209 | void 210 | free_htlocks(htlock_t* locks) 211 | { 212 | free(locks); 213 | } 214 | 215 | static inline uint32_t 216 | sub_abs(const uint32_t a, const uint32_t b) 217 | { 218 | if (a > b) 219 | { 220 | return a - b; 221 | } 222 | else 223 | { 224 | return b - a; 225 | } 226 | } 227 | 228 | 229 | #define TICKET_BASE_WAIT 512 230 | #define TICKET_MAX_WAIT 4095 231 | #define TICKET_WAIT_NEXT 64 232 | 233 | 234 | static inline void 235 | htlock_wait_ticket(htlock_local_t* lock, const uint32_t ticket) 236 | { 237 | 238 | #if defined(OPTERON_OPTIMIZE) 239 | uint32_t wait = TICKET_BASE_WAIT; 240 | uint32_t distance_prev = 1; 241 | 242 | while (1) 243 | { 244 | PREFETCHW(lock); 245 | int32_t lock_cur = lock->cur; 246 | if (lock_cur == ticket) 247 | { 248 | break; 249 | } 250 | uint32_t distance = sub_abs(lock->cur, ticket); 251 | if (distance > 1) 252 | { 253 | if (distance != distance_prev) 254 | { 255 | distance_prev = distance; 256 | wait = TICKET_BASE_WAIT; 257 | } 258 | 259 | nop_rep(distance * wait); 260 | wait = (wait + TICKET_BASE_WAIT) & TICKET_MAX_WAIT; 261 | } 262 | else 263 | { 264 | nop_rep(TICKET_WAIT_NEXT); 265 | } 266 | } 267 | #else 268 | while (lock->cur != ticket) 269 | { 270 | uint32_t distance = sub_abs(lock->cur, ticket); 271 | if (distance > 1) 272 | { 273 | nop_rep(distance * TICKET_BASE_WAIT); 274 | } 275 | else 276 | { 277 | PAUSE; 278 | } 279 | } 280 | #endif /* OPTERON_OPTIMIZE */ 281 | } 282 | 283 | static inline void 284 | htlock_wait_global(htlock_local_t* lock, const uint32_t ticket) 285 | { 286 | while (lock->cur != ticket) 287 | { 288 | uint32_t distance = sub_abs(lock->cur, ticket); 289 | if (distance > 1) 290 | { 291 | wait_cycles(distance * 256); 292 | } 293 | else 294 | { 295 | PAUSE; 296 | } 297 | } 298 | } 299 | 300 | void 301 | htlock_lock(htlock_t* l) 302 | { 303 | htlock_local_t* localp = l->local[htlock_node_mine]; 304 | int32_t local_ticket; 305 | 306 | again_local: 307 | local_ticket = DAF_U32(&localp->nxt); 308 | if (local_ticket < -1) 309 | { 310 | PAUSE; 311 | wait_cycles(-local_ticket * 120); 312 | PAUSE; 313 | goto again_local; 314 | } 315 | 316 | if (local_ticket >= 0) /* local grabing successful */ 317 | { 318 | htlock_wait_ticket((htlock_local_t*) localp, local_ticket); 319 | } 320 | else /* no local ticket available */ 321 | { 322 | do 323 | { 324 | #if defined(OPTERON_OPTIMIZE) 325 | PREFETCHW(localp); 326 | #endif 327 | } while (localp->cur != NB_TICKETS_LOCAL); 328 | localp->nxt = NB_TICKETS_LOCAL; /* give tickets to the local neighbors */ 329 | 330 | htlock_global_t* globalp = l->global; 331 | uint32_t global_ticket = FAI_U32(&globalp->nxt); 332 | 333 | htlock_wait_global((htlock_local_t*) globalp, global_ticket); 334 | } 335 | } 336 | 337 | void 338 | htlock_release(htlock_t* l) 339 | { 340 | htlock_local_t* localp = l->local[htlock_node_mine]; 341 | #if defined(OPTERON_OPTIMIZE) 342 | PREFETCHW(localp); 343 | #endif 344 | int32_t local_cur = localp->cur; 345 | int32_t local_nxt = CAS_U32(&localp->nxt, local_cur, 0); 346 | if (local_cur == 0 || local_cur == local_nxt) /* global */ 347 | { 348 | #if defined(OPTERON_OPTIMIZE) 349 | PREFETCHW((l->global)); 350 | PREFETCHW(localp); 351 | #endif 352 | localp->cur = NB_TICKETS_LOCAL; 353 | l->global->cur++; 354 | } 355 | else /* local */ 356 | { 357 | #if defined(OPTERON_OPTIMIZE) 358 | PREFETCHW(localp); 359 | #endif 360 | localp->cur = local_cur - 1; 361 | } 362 | } 363 | 364 | uint32_t 365 | htlock_trylock(htlock_t* l) 366 | { 367 | htlock_global_t* globalp = l->global; 368 | PREFETCHW(globalp); 369 | uint32_t global_nxt = globalp->nxt; 370 | 371 | htlock_global_t tmp = 372 | { 373 | .nxt = global_nxt, 374 | .cur = global_nxt 375 | }; 376 | htlock_global_t tmp_new = 377 | { 378 | .nxt = global_nxt + 1, 379 | .cur = global_nxt 380 | }; 381 | 382 | uint64_t tmp64 = *(uint64_t*) &tmp; 383 | uint64_t tmp_new64 = *(uint64_t*) &tmp_new; 384 | 385 | if (CAS_U64((uint64_t*) globalp, tmp64, tmp_new64) == tmp64) 386 | { 387 | return 1; 388 | } 389 | 390 | return 0; 391 | } 392 | 393 | 394 | inline void 395 | htlock_release_try(htlock_t* l) /* trylock rls */ 396 | { 397 | PREFETCHW((l->global)); 398 | l->global->cur++; 399 | } 400 | 401 | -------------------------------------------------------------------------------- /src/mcs.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: mcs.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * MCS lock implementation 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | 33 | #include "mcs.h" 34 | 35 | int mcs_trylock(mcs_lock *L, mcs_qnode_ptr I) { 36 | I->next=NULL; 37 | #ifndef __tile__ 38 | if (CAS_PTR(L, NULL, I)==NULL) return 0; 39 | return 1; 40 | #else 41 | MEM_BARRIER; 42 | if (CAS_PTR( L, NULL, I)==NULL) return 0; 43 | return 1; 44 | #endif 45 | 46 | } 47 | 48 | void mcs_acquire(mcs_lock *L, mcs_qnode_ptr I) 49 | { 50 | I->next = NULL; 51 | #ifndef __tile__ 52 | mcs_qnode_ptr pred = (mcs_qnode*) SWAP_PTR((volatile void*) L, (void*) I); 53 | #else 54 | MEM_BARRIER; 55 | mcs_qnode_ptr pred = (mcs_qnode*) SWAP_PTR( L, I); 56 | #endif 57 | if (pred == NULL) /* lock was free */ 58 | return; 59 | I->waiting = 1; // word on which to spin 60 | MEM_BARRIER; 61 | pred->next = I; // make pred point to me 62 | 63 | #if defined(OPTERON_OPTIMIZE) 64 | PREFETCHW(I); 65 | #endif /* OPTERON_OPTIMIZE */ 66 | while (I->waiting != 0) 67 | { 68 | PAUSE; 69 | #if defined(OPTERON_OPTIMIZE) 70 | pause_rep(23); 71 | PREFETCHW(I); 72 | #endif /* OPTERON_OPTIMIZE */ 73 | } 74 | 75 | } 76 | 77 | void mcs_release(mcs_lock *L, mcs_qnode_ptr I) 78 | { 79 | #ifdef __tile__ 80 | MEM_BARRIER; 81 | #endif 82 | 83 | mcs_qnode_ptr succ; 84 | #if defined(OPTERON_OPTIMIZE) 85 | PREFETCHW(I); 86 | #endif /* OPTERON_OPTIMIZE */ 87 | if (!(succ = I->next)) /* I seem to have no succ. */ 88 | { 89 | /* try to fix global pointer */ 90 | if (CAS_PTR(L, I, NULL) == I) 91 | return; 92 | do { 93 | succ = I->next; 94 | PAUSE; 95 | } while (!succ); // wait for successor 96 | } 97 | succ->waiting = 0; 98 | } 99 | 100 | int is_free_mcs(mcs_lock *L ){ 101 | if ((*L) == NULL) return 1; 102 | return 0; 103 | } 104 | 105 | /* 106 | Methods for easy lock array manipulation 107 | */ 108 | 109 | mcs_global_params* init_mcs_array_global(uint32_t num_locks) { 110 | uint32_t i; 111 | mcs_global_params* the_locks = (mcs_global_params*)malloc(num_locks * sizeof(mcs_global_params)); 112 | for (i=0;ithe_lock=(mcs_lock*)malloc(sizeof(mcs_lock)); 153 | *(the_lock->the_lock)=0; 154 | MEM_BARRIER; 155 | return 0; 156 | } 157 | 158 | 159 | int init_mcs_local(uint32_t thread_num, mcs_qnode** the_qnode) { 160 | set_cpu(thread_num); 161 | 162 | (*the_qnode)=(mcs_qnode*)malloc(sizeof(mcs_qnode)); 163 | 164 | MEM_BARRIER; 165 | return 0; 166 | 167 | } 168 | 169 | void end_mcs_local(mcs_qnode* the_qnodes) { 170 | free(the_qnodes); 171 | } 172 | 173 | void end_mcs_global(mcs_global_params the_locks) { 174 | free(the_locks.the_lock); 175 | } 176 | 177 | -------------------------------------------------------------------------------- /src/rw_ttas.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: rw_ttas.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Read-write test-and-test-and set implementation 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | 32 | #include "rw_ttas.h" 33 | 34 | __thread unsigned long * rw_seeds; 35 | 36 | int rw_trylock(rw_ttas* lock, uint32_t* limit) { 37 | if (CAS_U16(&lock->lock_data,0,W_MASK)==0) return 0; 38 | return 1; 39 | 40 | } 41 | 42 | 43 | void read_acquire(rw_ttas* lock, uint32_t* limit) { 44 | uint32_t delay; 45 | while (1) 46 | { 47 | rw_data_t aux; 48 | #if defined(OPTERON_OPTIMIZE) 49 | // uint32_t t = 512; 50 | PREFETCHW(lock); 51 | #endif /* OPTERON_OPTIMIZE */ 52 | while ((aux=lock->lock_data)>MAX_RW) 53 | { 54 | #if defined(OPTERON_OPTIMIZE) 55 | // uint32_t wt = (my_random(&(rw_seeds[0]),&(rw_seeds[1]),&(rw_seeds[2])) % t) + 1; 56 | // pause_rep(wt); 57 | PREFETCHW(lock); 58 | /* t+=16; */ 59 | // t *= 4; 60 | // if (t > 102400) 61 | // { 62 | // t = 102400; 63 | // } 64 | PREFETCHW(lock); 65 | #endif /* OPTERON_OPTIMIZE */ 66 | } 67 | //uint16_t aux = (uint16_t) lock->lock_data; 68 | if (CAS_U16(&lock->lock_data,aux,aux+1)==aux) { 69 | return; 70 | } 71 | else 72 | { 73 | delay = my_random(&(rw_seeds[0]),&(rw_seeds[1]),&(rw_seeds[2]))%(*limit); 74 | *limit = MAX_DELAY > 2*(*limit) ? 2*(*limit) : MAX_DELAY; 75 | cdelay(delay); 76 | } 77 | } 78 | } 79 | 80 | void read_release(rw_ttas* lock) { 81 | DAF_U16(&(lock->lock_data)); 82 | } 83 | 84 | void write_acquire(rw_ttas* lock, uint32_t* limit) { 85 | uint32_t delay; 86 | while (1) 87 | { 88 | #if defined(OPTERON_OPTIMIZE) 89 | // uint32_t t = 512; 90 | PREFETCHW(lock); 91 | #endif /* OPTERON_OPTIMIZE */ 92 | while (lock->lock_data!=0) 93 | { 94 | #if defined(OPTERON_OPTIMIZE) 95 | // uint32_t wt = (my_random(&(rw_seeds[0]),&(rw_seeds[1]),&(rw_seeds[2])) % t) + 1; 96 | // pause_rep(wt); 97 | PREFETCHW(lock); 98 | // t *= 4; 99 | // if (t > 102400) 100 | // { 101 | // t = 102400; 102 | // } 103 | // PREFETCHW(lock); 104 | #endif /* OPTERON_OPTIMIZE */ 105 | } 106 | if (CAS_U16(&lock->lock_data,0,W_MASK)==0) { 107 | return; 108 | } 109 | else { 110 | delay = my_random(&(rw_seeds[0]),&(rw_seeds[1]),&(rw_seeds[2]))%(*limit); 111 | *limit = MAX_DELAY > 2*(*limit) ? 2*(*limit) : MAX_DELAY; 112 | cdelay(delay); 113 | } 114 | 115 | } 116 | } 117 | 118 | void write_release(rw_ttas* lock) { 119 | COMPILER_BARRIER; 120 | #ifdef __tile__ 121 | MEM_BARRIER; 122 | #endif 123 | 124 | lock->lock_data = 0; 125 | } 126 | 127 | int is_free_rw(rw_ttas* lock){ 128 | if (lock->lock_data==0) return 1; 129 | return 0; 130 | } 131 | 132 | /* 133 | * Some methods for easy lock array manipulation 134 | */ 135 | rw_ttas* init_rw_ttas_array_global(uint32_t num_locks) { 136 | rw_ttas* the_locks; 137 | the_locks = (rw_ttas*) malloc (num_locks * sizeof(rw_ttas)); 138 | uint32_t i; 139 | for (i = 0; i < num_locks; i++) { 140 | the_locks[i].lock_data = 0; 141 | } 142 | MEM_BARRIER; 143 | return the_locks; 144 | } 145 | 146 | uint32_t* init_rw_ttas_array_local(uint32_t thread_num, uint32_t size){ 147 | set_cpu(thread_num); 148 | rw_seeds = seed_rand(); 149 | uint32_t* limits; 150 | limits = (uint32_t*)malloc(size * sizeof(uint32_t)); 151 | uint32_t i; 152 | for (i = 0; i < size; i++) { 153 | limits[i]=1; 154 | } 155 | MEM_BARRIER; 156 | return limits; 157 | } 158 | 159 | void end_rw_ttas_array_local(uint32_t* limits) { 160 | free(limits); 161 | } 162 | 163 | void end_rw_ttas_array_global(rw_ttas* the_locks) { 164 | free(the_locks); 165 | } 166 | 167 | int init_rw_ttas_global(rw_ttas* the_lock) { 168 | the_lock->lock_data=0; 169 | MEM_BARRIER; 170 | return 0; 171 | } 172 | 173 | int init_rw_ttas_local(uint32_t thread_num, uint32_t * limit){ 174 | set_cpu(thread_num); 175 | *limit = 1; 176 | rw_seeds = seed_rand(); 177 | MEM_BARRIER; 178 | return 0; 179 | } 180 | 181 | void end_rw_ttas_local() { 182 | //method not needed 183 | } 184 | 185 | void end_rw_ttas_global() { 186 | //method not needed 187 | } 188 | 189 | -------------------------------------------------------------------------------- /src/spinlock.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: spinlock.c 3 | * Author: Tudor David 4 | * 5 | * Description: 6 | * Simple test-and-set spinlock 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | #include "spinlock.h" 32 | 33 | #define UNLOCKED 0 34 | #define LOCKED 1 35 | 36 | __thread unsigned long* spinlock_seeds; 37 | 38 | int spinlock_trylock(spinlock_lock_t* the_lock, uint32_t* limits) { 39 | if (TAS_U8(&(the_lock->lock))==0) return 0; 40 | return 1; 41 | } 42 | void 43 | spinlock_lock(spinlock_lock_t* the_lock, uint32_t* limits) 44 | { 45 | volatile spinlock_lock_data_t* l = &(the_lock->lock); 46 | while (TAS_U8(l)) 47 | { 48 | PAUSE; 49 | } 50 | } 51 | 52 | void 53 | spinlock_unlock(spinlock_lock_t *the_lock) 54 | { 55 | COMPILER_BARRIER; 56 | #ifdef __tile__ 57 | MEM_BARRIER; 58 | #endif 59 | the_lock->lock = UNLOCKED; 60 | } 61 | 62 | int is_free_spinlock(spinlock_lock_t * the_lock){ 63 | if (the_lock->lock==UNLOCKED) return 1; 64 | return 0; 65 | } 66 | 67 | /* 68 | Some methods for easy lock array manipulation 69 | */ 70 | 71 | 72 | spinlock_lock_t* init_spinlock_array_global(uint32_t num_locks) 73 | { 74 | spinlock_lock_t* the_locks; 75 | the_locks = (spinlock_lock_t*)malloc(num_locks * sizeof(spinlock_lock_t)); 76 | uint32_t i; 77 | for (i = 0; i < num_locks; i++) 78 | { 79 | the_locks[i].lock = UNLOCKED; 80 | } 81 | 82 | MEM_BARRIER; 83 | return the_locks; 84 | } 85 | 86 | uint32_t* init_spinlock_array_local(uint32_t thread_num, uint32_t size) 87 | { 88 | //assign the thread to the correct core 89 | set_cpu(thread_num); 90 | spinlock_seeds = seed_rand(); 91 | 92 | uint32_t* limits; 93 | limits = (uint32_t*)malloc(size * sizeof(uint32_t)); 94 | uint32_t i; 95 | for (i = 0; i < size; i++) 96 | { 97 | limits[i] = 1; 98 | } 99 | MEM_BARRIER; 100 | return limits; 101 | } 102 | 103 | void end_spinlock_array_local(uint32_t* limits) 104 | { 105 | free(limits); 106 | } 107 | 108 | void end_spinlock_array_global(spinlock_lock_t* the_locks) 109 | { 110 | free(the_locks); 111 | } 112 | 113 | int init_spinlock_global(spinlock_lock_t* the_lock) 114 | { 115 | the_lock->lock = UNLOCKED; 116 | MEM_BARRIER; 117 | return 0; 118 | } 119 | 120 | int init_spinlock_local(uint32_t thread_num, uint32_t* limit) 121 | { 122 | //assign the thread to the correct core 123 | set_cpu(thread_num); 124 | *limit = 1; 125 | spinlock_seeds = seed_rand(); 126 | MEM_BARRIER; 127 | return 0; 128 | } 129 | 130 | void end_spinlock_local() 131 | { 132 | //function not needed 133 | } 134 | 135 | void end_spinlock_global() 136 | { 137 | //function not needed 138 | } 139 | 140 | -------------------------------------------------------------------------------- /src/ticket.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: ticket.c 3 | * Author: Tudor David , Vasileios Trigonakis 4 | * 5 | * Description: 6 | * An implementation of a ticket lock with: 7 | * - proportional back-off optimization 8 | * - pretetchw for write optitization for the AMD Opteron 9 | * Magny-Cours processors 10 | * 11 | * The MIT License (MIT) 12 | * 13 | * Copyright (c) 2013 Tudor David, Vasileios Trigonakis 14 | * 15 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 16 | * this software and associated documentation files (the "Software"), to deal in 17 | * the Software without restriction, including without limitation the rights to 18 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 19 | * the Software, and to permit persons to whom the Software is furnished to do so, 20 | * subject to the following conditions: 21 | * 22 | * The above copyright notice and this permission notice shall be included in all 23 | * copies or substantial portions of the Software. 24 | * 25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 27 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 28 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 29 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 31 | */ 32 | 33 | #include "ticket.h" 34 | 35 | /* enable measure contantion to collect statistics about the 36 | average queuing per lock acquisition */ 37 | #if defined(MEASURE_CONTENTION) 38 | __thread uint64_t ticket_queued_total = 0; 39 | __thread uint64_t ticket_acquires = 0; 40 | #endif 41 | 42 | static inline uint32_t 43 | sub_abs(const uint32_t a, const uint32_t b) 44 | { 45 | if (a > b) 46 | { 47 | return a - b; 48 | } 49 | else 50 | { 51 | return b - a; 52 | } 53 | } 54 | 55 | int 56 | ticket_trylock(ticketlock_t* lock) 57 | { 58 | uint32_t me = lock->tail; 59 | uint32_t me_new = me + 1; 60 | uint64_t cmp = ((uint64_t) me << 32) + me_new; 61 | uint64_t cmp_new = ((uint64_t) me_new << 32) + me_new; 62 | uint64_t* la = (uint64_t*) lock; 63 | if (CAS_U64(la, cmp, cmp_new) == cmp) 64 | { 65 | return 0; 66 | } 67 | return 1; 68 | } 69 | 70 | void 71 | ticket_acquire(ticketlock_t* lock) 72 | { 73 | uint32_t my_ticket = IAF_U32(&(lock->tail)); 74 | 75 | 76 | #if defined(OPTERON_OPTIMIZE) 77 | uint32_t wait = TICKET_BASE_WAIT; 78 | uint32_t distance_prev = 1; 79 | # if defined(MEASURE_CONTENTION) 80 | uint8_t once = 1; 81 | ticket_acquires++; 82 | # endif 83 | 84 | while (1) 85 | { 86 | PREFETCHW(lock); 87 | uint32_t cur = lock->head; 88 | if (cur == my_ticket) 89 | { 90 | break; 91 | } 92 | uint32_t distance = sub_abs(cur, my_ticket); 93 | 94 | # if defined(MEASURE_CONTENTION) 95 | if (once) 96 | { 97 | ticket_queued_total += distance; 98 | once = 0; 99 | } 100 | # endif 101 | 102 | if (distance > 1) 103 | { 104 | if (distance != distance_prev) 105 | { 106 | distance_prev = distance; 107 | wait = TICKET_BASE_WAIT; 108 | } 109 | 110 | nop_rep(distance * wait); 111 | /* wait = (wait + TICKET_BASE_WAIT) & TICKET_MAX_WAIT; */ 112 | } 113 | else 114 | { 115 | nop_rep(TICKET_WAIT_NEXT); 116 | } 117 | 118 | if (distance > 20) 119 | { 120 | sched_yield(); 121 | /* pthread_yield(); */ 122 | } 123 | } 124 | 125 | #else /* !OPTERON_OPTIMIZE */ 126 | /* backoff proportional to the distance would make sense even without the PREFETCHW */ 127 | /* however, I did some tests on the Niagara and it performed worse */ 128 | 129 | # if defined(__x86_64__) 130 | # if defined(MEASURE_CONTENTION) 131 | uint8_t once = 1; 132 | ticket_acquires++; 133 | # endif 134 | 135 | uint32_t wait = TICKET_BASE_WAIT; 136 | uint32_t distance_prev = 1; 137 | 138 | while (1) 139 | { 140 | uint32_t cur = lock->head; 141 | if (cur == my_ticket) 142 | { 143 | break; 144 | } 145 | uint32_t distance = sub_abs(cur, my_ticket); 146 | 147 | # if defined(MEASURE_CONTENTION) 148 | if (once) 149 | { 150 | ticket_queued_total += distance; 151 | once = 0; 152 | } 153 | # endif 154 | 155 | if (distance > 1) 156 | { 157 | if (distance != distance_prev) 158 | { 159 | distance_prev = distance; 160 | wait = TICKET_BASE_WAIT; 161 | } 162 | 163 | nop_rep(distance * wait); 164 | } 165 | else 166 | { 167 | nop_rep(TICKET_WAIT_NEXT); 168 | } 169 | 170 | if (distance > 20) 171 | { 172 | sched_yield(); 173 | } 174 | } 175 | # else 176 | while (lock->head != my_ticket) 177 | { 178 | PAUSE; 179 | } 180 | # endif 181 | #endif /* OPTERON_OPTIMIZE */ 182 | } 183 | 184 | void 185 | ticket_release(ticketlock_t* lock) 186 | { 187 | #ifdef __tile__ 188 | MEM_BARRIER; 189 | #endif 190 | #if defined(OPTERON_OPTIMIZE) 191 | PREFETCHW(lock); 192 | #endif /* OPTERON */ 193 | COMPILER_BARRIER; 194 | lock->head++; 195 | } 196 | 197 | 198 | int create_ticketlock(ticketlock_t* the_lock) 199 | { 200 | the_lock->head=1; 201 | the_lock->tail=0; 202 | MEM_BARRIER; 203 | return 0; 204 | } 205 | 206 | 207 | int is_free_ticket(ticketlock_t* t) 208 | { 209 | if ((t->head - t->tail) == 1) 210 | { 211 | return 1; 212 | } 213 | return 0; 214 | } 215 | 216 | void init_thread_ticketlocks(uint32_t thread_num) 217 | { 218 | set_cpu(thread_num); 219 | } 220 | 221 | ticketlock_t* 222 | init_ticketlocks(uint32_t num_locks) 223 | { 224 | ticketlock_t* the_locks; 225 | the_locks = (ticketlock_t*) malloc(num_locks * sizeof(ticketlock_t)); 226 | uint32_t i; 227 | for (i = 0; i < num_locks; i++) 228 | { 229 | the_locks[i].head=1; 230 | the_locks[i].tail=0; 231 | } 232 | MEM_BARRIER; 233 | return the_locks; 234 | } 235 | 236 | void 237 | free_ticketlocks(ticketlock_t* the_locks) 238 | { 239 | free(the_locks); 240 | } 241 | 242 | 243 | #if defined(MEASURE_CONTENTION) 244 | void 245 | ticket_print_contention_stats() 246 | { 247 | double avg_q = ticket_queued_total / (double) ticket_acquires; 248 | printf("#Acquires: %10llu / #Total queuing: %10llu / Avg. queuing: %.3f\n", 249 | (long long unsigned) ticket_acquires, (long long unsigned) ticket_queued_total, avg_q); 250 | } 251 | 252 | double 253 | ticket_avg_queue() 254 | { 255 | double avg_q = ticket_queued_total / (double) ticket_acquires; 256 | return avg_q; 257 | } 258 | 259 | #endif /* MEASURE_CONTENTION */ 260 | -------------------------------------------------------------------------------- /src/ttas.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: ttas.c 3 | * Author: Tudor David , Vasileios Trigonakis 4 | * 5 | * Description: 6 | * Implementation of a test-and-test-and-set lock with back-off 7 | * 8 | * The MIT License (MIT) 9 | * 10 | * Copyright (c) 2013 Tudor David, Vasileios Trigonakis 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | * this software and associated documentation files (the "Software"), to deal in 14 | * the Software without restriction, including without limitation the rights to 15 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 16 | * the Software, and to permit persons to whom the Software is furnished to do so, 17 | * subject to the following conditions: 18 | * 19 | * The above copyright notice and this permission notice shall be included in all 20 | * copies or substantial portions of the Software. 21 | * 22 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 24 | * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 25 | * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 26 | * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 27 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | 31 | #include "ttas.h" 32 | 33 | #define UNLOCKED 0 34 | #define LOCKED 1 35 | 36 | __thread unsigned long * ttas_seeds; 37 | 38 | 39 | int ttas_trylock(ttas_lock_t * the_lock, uint32_t * limits) { 40 | if (TAS_U8(&(the_lock->lock))==0) return 0; 41 | return 1; 42 | } 43 | 44 | void ttas_lock(ttas_lock_t * the_lock, uint32_t* limit) { 45 | #if defined(OPTERON_OPTIMIZE) 46 | volatile ttas_lock_data_t* l = &(the_lock->lock); 47 | uint32_t delay; 48 | while (1){ 49 | PREFETCHW(l); 50 | while ((*l)==1) { 51 | PREFETCHW(l); 52 | } 53 | if (TAS_U8(&(the_lock->lock))==UNLOCKED) { 54 | return; 55 | } else { 56 | //backoff 57 | delay = my_random(&(ttas_seeds[0]),&(ttas_seeds[1]),&(ttas_seeds[2]))%(*limit); 58 | *limit = MAX_DELAY > 2*(*limit) ? 2*(*limit) : MAX_DELAY; 59 | cdelay(delay); 60 | } 61 | } 62 | 63 | #else /* !OPTERON_OPTIMIZE */ 64 | uint32_t delay; 65 | volatile ttas_lock_data_t* l = &(the_lock->lock); 66 | while (1){ 67 | while ((*l)==1) {} 68 | if (TAS_U8(l)==UNLOCKED) { 69 | return; 70 | } else { 71 | //backoff 72 | delay = my_random(&(ttas_seeds[0]),&(ttas_seeds[1]),&(ttas_seeds[2]))%(*limit); 73 | *limit = MAX_DELAY > 2*(*limit) ? 2*(*limit) : MAX_DELAY; 74 | cdelay(delay); 75 | } 76 | } 77 | #endif /* OPTERON_OPTIMIZE */ 78 | } 79 | 80 | 81 | int is_free_ttas(ttas_lock_t * the_lock){ 82 | if (the_lock->lock==UNLOCKED) return 1; 83 | return 0; 84 | } 85 | 86 | void ttas_unlock(ttas_lock_t *the_lock) 87 | { 88 | #ifdef __tile__ 89 | MEM_BARRIER; 90 | #endif 91 | COMPILER_BARRIER; 92 | the_lock->lock=0; 93 | } 94 | 95 | 96 | /* 97 | Some methods for easy lock array manipulation 98 | */ 99 | 100 | 101 | //ttas 102 | ttas_lock_t* init_ttas_array_global(uint32_t num_locks) { 103 | 104 | ttas_lock_t* the_locks; 105 | the_locks = (ttas_lock_t*)malloc(num_locks * sizeof(ttas_lock_t)); 106 | uint32_t i; 107 | for (i = 0; i < num_locks; i++) { 108 | the_locks[i].lock=0; 109 | } 110 | MEM_BARRIER; 111 | return the_locks; 112 | } 113 | 114 | uint32_t* init_ttas_array_local(uint32_t thread_num, uint32_t size){ 115 | //assign the thread to the correct core 116 | set_cpu(thread_num); 117 | ttas_seeds = seed_rand(); 118 | 119 | uint32_t* limits; 120 | limits = (uint32_t*)malloc(size * sizeof(uint32_t)); 121 | uint32_t i; 122 | for (i = 0; i < size; i++) { 123 | limits[i]=1; 124 | } 125 | MEM_BARRIER; 126 | return limits; 127 | } 128 | 129 | void end_ttas_array_local(uint32_t* limits) { 130 | free(limits); 131 | } 132 | 133 | void end_ttas_array_global(ttas_lock_t* the_locks) { 134 | free(the_locks); 135 | } 136 | 137 | int init_ttas_global(ttas_lock_t* the_lock) { 138 | the_lock->lock=0; 139 | MEM_BARRIER; 140 | return 0; 141 | } 142 | 143 | int init_ttas_local(uint32_t thread_num , uint32_t* limit){ 144 | //assign the thread to the correct core 145 | set_cpu(thread_num); 146 | *limit=1; 147 | ttas_seeds = seed_rand(); 148 | MEM_BARRIER; 149 | return 0; 150 | } 151 | 152 | void end_ttas_local() { 153 | //function not needed 154 | } 155 | 156 | void end_ttas_global() { 157 | //function not needed 158 | } 159 | 160 | --------------------------------------------------------------------------------