├── qd_library ├── .gitignore ├── threadid.cpp ├── util │ ├── pause.hpp │ └── type_tools.hpp ├── locks │ ├── tatas_lock.hpp │ ├── mutex_lock.hpp │ ├── pthreads_lock.hpp │ └── waitable_lock.hpp ├── readindicator │ └── reader_groups.hpp ├── waiting_future.hpp ├── threadid.hpp ├── qd.hpp ├── queues │ ├── simple_locked_queue.hpp │ └── entry_queue.hpp ├── qd_condition_variable.hpp └── padded.hpp ├── src ├── datastructures_bench │ ├── PR │ │ ├── .gitignore │ │ ├── perf_meas │ │ ├── gc │ │ │ ├── random.h │ │ │ ├── ptst.h │ │ │ ├── gc.h │ │ │ ├── portable_defns.h │ │ │ ├── ptst.c │ │ │ └── intel_defns.h │ │ ├── Makefile │ │ ├── README.md │ │ ├── prioq.h │ │ ├── common.c │ │ ├── gdb_skiplist_print.py │ │ └── common.h │ ├── synch_algorithms │ │ ├── glue_mcs_starve.cpp │ │ ├── glue_qd.cpp │ │ ├── glue_qd_starve.cpp │ │ ├── glue_qd_cas.cpp │ │ ├── glue_tatas.cpp │ │ ├── glue_qd_nodetach.cpp │ │ ├── glue_hqd.cpp │ │ ├── synch_algs_types.h │ │ ├── glue_mcs.cpp │ │ ├── synch_algs_system.h │ │ ├── glue.cpp.static │ │ ├── cpplock.cpp │ │ ├── glue.cpp │ │ ├── cpplock_nodetach.cpp │ │ ├── cpplock.h │ │ ├── clh.h │ │ ├── synch_algs_config.h │ │ └── qdlock.c │ └── datastructures │ │ └── pairingheap │ │ ├── dxlocked_pairingheap.h │ │ └── test_pairingheap.c ├── lock │ ├── common_lock_constants.h │ ├── rglue_hqd.cpp │ ├── rglue_qd.cpp │ ├── cpprdx.h │ ├── ticket_lock.h │ ├── aticket_lock.h │ ├── cpprdx.cpp │ ├── tts_rdx_lock.h │ ├── tatas_lock.h │ ├── wprw_lock.h │ ├── ticket_lock.c │ ├── simple_delayed_writers_lock.h │ ├── agnostic_dx_lock.c │ ├── all_equal_rdx_lock.h │ ├── tatas_lock.c │ ├── aticket_lock.c │ ├── mcs_lock.h │ ├── flat_comb_rdx_lock.h │ ├── rcpp_lock.cpp │ ├── agnostic_rdx_lock.h │ ├── cohort_lock.h │ ├── extract_numa_structure.py │ ├── mcs_lock.c │ ├── rhqd_lock.c │ ├── rcpp_lock.h │ ├── wprw_lock.c │ ├── cohort_lock.c │ ├── tts_rdx_lock.c │ └── agnostic_fdx_lock.h ├── utils │ ├── numa_node_info_support.c │ ├── thread_identifier.h │ ├── thread_identifier.c │ ├── numa_node_info_support.h │ ├── support_many_non_zero_indicator_types.h │ └── smp_utils.h ├── datastructures │ ├── dr_multi_writers_queue.c │ ├── numa_ingress_egress_nzi.c │ ├── multi_writers_queue.c │ ├── opti_multi_writers_queue.c │ ├── reader_groups_nzi.h │ ├── numa_ingress_egress_nzi.h │ ├── padded.hpp │ └── opti_multi_writers_queue.h ├── new_rep │ ├── misc │ │ ├── debug.h │ │ ├── random.h │ │ ├── thread_includes.h │ │ └── padded_types.h │ ├── tests │ │ ├── test_framework.h │ │ └── test_qd_queue.c │ └── locks │ │ ├── qd_lock.h │ │ ├── tatas_lock.h │ │ └── locks.h ├── benchmark │ ├── skiplist │ │ ├── skiplist.h │ │ └── kvset.h │ ├── run_benchmarks_on_intel_i7.py │ ├── run_benchmarks_on_amd_fx_6100.py │ ├── run_benchmarks_on_sandy.py │ ├── compare_benchmarks.py │ ├── produce_graphs_template.py │ ├── pairingheap │ │ ├── dxlocked_pairingheap.h │ │ └── test_pairingheap.c │ ├── perf_magic │ ├── perf_magic_simple │ ├── benchmark_lock.py │ ├── cache_benchmark_lock_simple.py │ ├── benchmark_lockXOpDist.py │ ├── benchmark_lock_XNonCW.py │ ├── cache_benchmark_lock.py │ ├── cache_benchmark_lockXOpDist.py │ └── cache_benchmark_lock_XNonCW.py ├── tests │ ├── test_framework.h │ └── test_multi_writers_queue.c └── profile │ └── profile_perf.py ├── .gitignore └── SConstruct /qd_library/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/.gitignore: -------------------------------------------------------------------------------- 1 | *.dat 2 | *.o -------------------------------------------------------------------------------- /src/lock/common_lock_constants.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_LOCK_CONSTANTS_H 2 | #define COMMON_LOCK_CONSTANTS_H 3 | 4 | #endif 5 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/perf_meas: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parapluu/lock_benchmarking/HEAD/src/datastructures_bench/PR/perf_meas -------------------------------------------------------------------------------- /src/utils/numa_node_info_support.c: -------------------------------------------------------------------------------- 1 | #include "numa_node_info_support.h" 2 | 3 | CPUToNodeMapWrapper CPUToNodeMap __attribute__((aligned(64))); 4 | -------------------------------------------------------------------------------- /src/datastructures/dr_multi_writers_queue.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "dr_multi_writers_queue.h" 4 | #include "utils/smp_utils.h" 5 | 6 | -------------------------------------------------------------------------------- /src/datastructures/numa_ingress_egress_nzi.c: -------------------------------------------------------------------------------- 1 | #include "numa_ingress_egress_nzi.h" 2 | 3 | __thread CacheLinePaddedInt myIngressEgressArriveNumaNode __attribute__((aligned(64))); 4 | -------------------------------------------------------------------------------- /qd_library/threadid.cpp: -------------------------------------------------------------------------------- 1 | #include "threadid.hpp" 2 | 3 | unsigned long thread_id_store::max_id = 0; 4 | std::set thread_id_store::orphans; 5 | std::mutex thread_id_store::mutex; 6 | 7 | thread_local thread_id_t thread_id; 8 | -------------------------------------------------------------------------------- /src/new_rep/misc/debug.h: -------------------------------------------------------------------------------- 1 | for(int i = index; i < messageEndOffset; i++){ 2 | printf("%02x", ((unsigned char)(q->buffer[i])) ); 3 | //printf("%2X", ((unsigned char *) q->buffer)[i] ); 4 | } 5 | -------------------------------------------------------------------------------- /src/utils/thread_identifier.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_IDENTIFIER_H 2 | #define THREAD_IDENTIFIER_H 3 | 4 | #include "smp_utils.h" 5 | 6 | extern __thread CacheLinePaddedInt myId; 7 | extern int myIdCounter; 8 | 9 | void assign_id_to_thread(); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /src/new_rep/misc/random.h: -------------------------------------------------------------------------------- 1 | #ifndef RANDOM_H 2 | #define RANDOM_H 3 | 4 | #include "stdlib.h" 5 | 6 | 7 | double random_double(unsigned int *seed_ptr){ 8 | double randomDouble = (double)rand_r(seed_ptr); 9 | return randomDouble/RAND_MAX; 10 | } 11 | 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/utils/thread_identifier.c: -------------------------------------------------------------------------------- 1 | #include "thread_identifier.h" 2 | 3 | __thread CacheLinePaddedInt myId __attribute__((aligned(128))); 4 | int myIdCounter __attribute__((aligned(128))) = 0; 5 | 6 | void assign_id_to_thread(){ 7 | myId.value = __sync_fetch_and_add(&myIdCounter, 1); 8 | } 9 | -------------------------------------------------------------------------------- /qd_library/util/pause.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_pause_hpp 2 | #define qd_pause_hpp qd_pause_hpp 3 | 4 | namespace qd { 5 | 6 | static inline void pause() { 7 | //__sync_synchronize(); 8 | __asm__ __volatile__("pause"); 9 | // std::this_thread::yield(); 10 | } 11 | 12 | } /* namespace qd */ 13 | 14 | #endif /* qd_pause_hpp */ 15 | -------------------------------------------------------------------------------- /src/new_rep/misc/thread_includes.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_INCLUDES_H 2 | #define THREAD_INCLUDES_H 3 | 4 | #include 5 | #include 6 | #include //Until c11 threads.h is available 7 | #include 8 | #include 9 | 10 | static inline void thread_yield(){ 11 | sched_yield(); 12 | } 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /src/datastructures/multi_writers_queue.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "multi_writers_queue.h" 4 | #include "utils/smp_utils.h" 5 | 6 | MWQueue * mwqueue_create(){ 7 | MWQueue * queue = malloc(sizeof(MWQueue)); 8 | return mwqueue_initialize(queue); 9 | } 10 | 11 | 12 | void mwqueue_free(MWQueue * queue){ 13 | free(queue); 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue_mcs_starve.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using intlock = mcs_lock; 4 | using locktype = qdlock_impl, starvation_policy_t::may_starve>; 5 | 6 | extern "C" { 7 | #include "cpplock.h" 8 | #include "cpplock.cpp" 9 | } // extern "C" 10 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue_qd.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using intlock = mcs_futex_lock; 4 | using locktype = qdlock_impl, starvation_policy_t::starvation_free>; 5 | 6 | extern "C" { 7 | #include "cpplock.h" 8 | #include "cpplock.cpp" 9 | } // extern "C" 10 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue_qd_starve.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using intlock = mcs_futex_lock; 4 | using locktype = qdlock_impl, starvation_policy_t::may_starve>; 5 | 6 | extern "C" { 7 | #include "cpplock.h" 8 | #include "cpplock.cpp" 9 | } // extern "C" 10 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue_qd_cas.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using intlock = mcs_futex_lock; 4 | using locktype = qdlock_impl, starvation_policy_t::starvation_free>; 5 | 6 | extern "C" { 7 | #include "cpplock.h" 8 | #include "cpplock.cpp" 9 | } // extern "C" 10 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue_tatas.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using intlock = extended_lock; 4 | using locktype = qdlock_impl, starvation_policy_t::may_starve>; 5 | 6 | extern "C" { 7 | #include "cpplock.h" 8 | #include "cpplock.cpp" 9 | } // extern "C" 10 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue_qd_nodetach.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using intlock = mcs_futex_lock; 4 | using locktype = qdlock_impl, starvation_policy_t::starvation_free>; 5 | 6 | extern "C" { 7 | #include "cpplock.h" 8 | #include "cpplock_nodetach.cpp" 9 | } // extern "C" 10 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue_hqd.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using intlock = mcs_lock; 4 | using locktype = hqdlock_impl, pinning_policy_t::pinned_threads, starvation_policy_t::starvation_free>; 5 | 6 | extern "C" { 7 | #include "cpplock.h" 8 | #include "cpplock.cpp" 9 | } // extern "C" 10 | -------------------------------------------------------------------------------- /src/benchmark/skiplist/skiplist.h: -------------------------------------------------------------------------------- 1 | #ifndef __SKIPLIST_H__ 2 | #define __SKIPLIST_H__ 3 | 4 | #include "kvset.h" 5 | #include "stdlib.h" 6 | 7 | KVSet * new_skiplist(int (*compare_function)(void *, void *), 8 | void (*free_function)(void *), 9 | void *(*malloc_function)(size_t), 10 | unsigned int key_offset); 11 | 12 | KVSet * new_skiplist_default(void); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *~ 3 | *.png 4 | *.dat 5 | *.pyc 6 | \#*# 7 | test_multi_writers_queue 8 | rw_bench_clone_aer 9 | rw_bench_clone_sdw 10 | rw_bench_clone_mcs 11 | rw_bench_clone_drmcs 12 | rw_bench_clone_aticket 13 | rw_bench_clone_ticket 14 | rw_bench_clone_cohort 15 | rw_bench_clone_wprwcohort 16 | test_aer 17 | test_aticket 18 | test_cohort 19 | test_drmcs 20 | test_mcs 21 | test_sdw 22 | test_ticket 23 | test_wprwcohort 24 | .sconsign.dblite 25 | bin/ 26 | bin_debug/ 27 | TAGS 28 | core 29 | bench_results 30 | bin_profile 31 | perf_data 32 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/gc/random.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * random.h 3 | * 4 | * A really simple random-number generator. Crappy linear congruential 5 | * taken from glibc, but has at least a 2^32 period. 6 | */ 7 | 8 | #ifndef __RANDOM_H__ 9 | #define __RANDOM_H__ 10 | 11 | typedef unsigned long rand_t; 12 | 13 | #define rand_init(_ptst) \ 14 | ((_ptst)->rand = RDTICK()) 15 | 16 | #define rand_next(_ptst) \ 17 | ((_ptst)->rand = ((_ptst)->rand * 1103515245) + 12345) 18 | 19 | #endif /* __RANDOM_H__ */ 20 | -------------------------------------------------------------------------------- /src/utils/numa_node_info_support.h: -------------------------------------------------------------------------------- 1 | #ifndef NUMA_NODE_INFO_SUPPORT_H 2 | #define NUMA_NODE_INFO_SUPPORT_H 3 | 4 | #include 5 | #include "smp_utils.h" 6 | 7 | typedef union CPUToNodeMapWrapperImpl { 8 | char padding[64]; 9 | char value[NUMBER_OF_NUMA_NODES * NUMBER_OF_CPUS_PER_NODE]; 10 | char pad[64 - ((sizeof(char) * NUMBER_OF_NUMA_NODES * NUMBER_OF_CPUS_PER_NODE) % 64)]; 11 | } CPUToNodeMapWrapper; 12 | 13 | extern CPUToNodeMapWrapper CPUToNodeMap __attribute__((aligned(64))); 14 | 15 | static inline 16 | int numa_node_id(){ 17 | return CPUToNodeMap.value[sched_getcpu()]; 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/tests/test_framework.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef __TEST_FRAMEWORK_H__ 5 | #define __TEST_FRAMEWORK_H__ 6 | 7 | 8 | #define TO_VP(intValue) (void *)(intValue) 9 | 10 | #define T(testFunCall, tesName) \ 11 | printf("STARTING TEST: "); \ 12 | test(testFunCall, tesName); 13 | 14 | void test(int success, char msg[]){ 15 | 16 | if(success){ 17 | printf("\033[32m -- SUCCESS! -- \033[m"); 18 | }else{ 19 | printf("\033[31m -- FAIL! -- \033[m"); 20 | } 21 | 22 | printf("TEST: %s\n", msg); 23 | 24 | } 25 | 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /src/new_rep/tests/test_framework.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef __TEST_FRAMEWORK_H__ 5 | #define __TEST_FRAMEWORK_H__ 6 | 7 | 8 | #define TO_VP(intValue) (void *)(intValue) 9 | 10 | #define T(testFunCall, tesName) \ 11 | printf("STARTING TEST: "); \ 12 | test(testFunCall, tesName); 13 | 14 | void test(int success, char msg[]){ 15 | 16 | if(success){ 17 | printf("\033[32m -- SUCCESS! -- \033[m"); 18 | }else{ 19 | printf("\033[31m -- FAIL! -- \033[m"); 20 | } 21 | 22 | printf("TEST: %s\n", msg); 23 | 24 | } 25 | 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/synch_algs_types.h: -------------------------------------------------------------------------------- 1 | #ifndef _TYPES_H_ 2 | #define _TYPES_H_ 3 | 4 | #include "synch_algs_system.h" 5 | 6 | typedef union int_aligned32_t { 7 | int32_t v CACHE_ALIGN; 8 | char pad[CACHE_LINE_SIZE]; 9 | } int_aligned32_t; 10 | 11 | typedef union int_aligned64_t { 12 | int64_t v CACHE_ALIGN; 13 | char pad[CACHE_LINE_SIZE]; 14 | } int_aligned64_t; 15 | 16 | #define null NULL 17 | #include 18 | //#define bool int32_t 19 | #define true 1 20 | #define false 0 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/datastructures/opti_multi_writers_queue.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "opti_multi_writers_queue.h" 4 | #include "utils/smp_utils.h" 5 | 6 | 7 | OptiMWQueue * omwqueue_create(){ 8 | OptiMWQueue * queue = malloc(sizeof(OptiMWQueue)); 9 | return omwqueue_initialize(queue); 10 | } 11 | 12 | OptiMWQueue * omwqueue_initialize(OptiMWQueue * queue){ 13 | for(int i = 0; i < MWQ_CAPACITY; i++){ 14 | queue->elements[i] = NULL; 15 | } 16 | queue->elementCount.value = MWQ_CAPACITY; 17 | queue->closed.value = true; 18 | __sync_synchronize(); 19 | return queue; 20 | } 21 | 22 | void omwqueue_free(OptiMWQueue * queue){ 23 | free(queue); 24 | } 25 | 26 | -------------------------------------------------------------------------------- /qd_library/util/type_tools.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_type_tools_hpp 2 | #define qd_type_tools_hpp qd_type_tools_hpp 3 | 4 | template 5 | struct sumsizes; 6 | template 7 | struct sumsizes { 8 | static constexpr long size = sizeof(T) + sumsizes::size; 9 | }; 10 | template<> 11 | struct sumsizes<> { 12 | static constexpr long size = 0; 13 | }; 14 | 15 | /* structure to create lists of types */ 16 | template 17 | class types; 18 | template 19 | class types { 20 | public: 21 | typedef T type; 22 | typedef types tail; 23 | }; 24 | template<> 25 | class types<> {}; 26 | 27 | #endif /* qd_type_tools_hpp */ 28 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/Makefile: -------------------------------------------------------------------------------- 1 | CC := gcc 2 | CFLAGS := -O3 -DINTEL -Wall -std=c99 3 | LDFLAGS := -lpthread `pkg-config --libs gsl` 4 | 5 | OS := $(shell uname -s) 6 | ifeq ($(OS),Linux) 7 | CFLAGS += -DCACHE_LINE_SIZE=`getconf LEVEL1_DCACHE_LINESIZE` 8 | LDFLAGS += -lrt 9 | endif 10 | ifeq ($(OS),Darwin) 11 | CFLAGS += -DCACHE_LINE_SIZE=`sysctl -n hw.cachelinesize` 12 | endif 13 | 14 | VPATH := gc 15 | DEPS += Makefile $(wildcard *.h) $(wildcard gc/*.h) 16 | TARGETS := perf_meas 17 | 18 | all: $(TARGETS) 19 | 20 | clean: 21 | rm -f $(TARGETS) core *.o 22 | 23 | %.o: %.c $(DEPS) 24 | $(CC) $(CFLAGS) -c -o $@ $< 25 | 26 | $(TARGETS): %: %.o ptst.o gc.o prioq.o common.o 27 | $(CC) -o $@ $^ $(LDFLAGS) 28 | 29 | 30 | .PHONY: all clean 31 | -------------------------------------------------------------------------------- /src/lock/rglue_hqd.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | #include "threadid.cpp" 3 | 4 | using intlock = mcs_lock; 5 | //using locktype = mrqdlock_impl, reader_groups<64>, 65536, starvation_policy_t::starvation_free>; 6 | //using locktype = mrqdlock_impl, reader_groups<64>, 65536, starvation_policy_t::may_starve>; 7 | using locktype = mrhqdlock_impl, reader_groups<64>, 65536, pinning_policy_t::pinned_threads, starvation_policy_t::may_starve>; 8 | 9 | extern "C" { 10 | #include "rcpp_lock.h" 11 | #include "rcpp_lock.cpp" 12 | } // extern "C" 13 | -------------------------------------------------------------------------------- /src/lock/rglue_qd.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | #include "threadid.cpp" 3 | 4 | //using intlock = extended_lock; 5 | using intlock = mcs_futex_lock; 6 | //using locktype = mrqdlock_impl, reader_groups<64>, 65536, starvation_policy_t::starvation_free>; 7 | //using locktype = mrqdlock_impl, reader_groups<64>, 65536, starvation_policy_t::may_starve>; 8 | using locktype = mrqdlock_impl, reader_groups<64>, 65536, starvation_policy_t::starvation_free>; 9 | 10 | extern "C" { 11 | #include "rcpp_lock.h" 12 | #include "rcpp_lock.cpp" 13 | } // extern "C" 14 | -------------------------------------------------------------------------------- /src/lock/cpprdx.h: -------------------------------------------------------------------------------- 1 | #ifndef cpprdx_h 2 | #define cpprdx_h cpprdx_h 3 | 4 | #ifdef __cplusplus 5 | #include "rdx.hpp" 6 | extern "C" { 7 | #endif 8 | 9 | typedef struct CPPRDXLockImpl { 10 | void (*writer)(void *); 11 | #ifdef __cplusplus 12 | RDX_Lock lock; 13 | #else 14 | char lock[16000]; 15 | #endif 16 | } CPPRDXLock; 17 | CPPRDXLock* cpprdx_create(void (*writer)(void *)); 18 | void cpprdx_free(CPPRDXLock* lock); 19 | void cpprdx_initialize(CPPRDXLock* lock, void (*writer)(void *)); 20 | void cpprdx_register_this_thread(); 21 | void cpprdx_write(CPPRDXLock* lock, void* writeInfo); 22 | void cpprdx_write_read_lock(CPPRDXLock* lock); 23 | void cpprdx_write_read_unlock(CPPRDXLock* lock); 24 | void cpprdx_read_lock(CPPRDXLock* lock); 25 | void cpprdx_read_unlock(CPPRDXLock* lock); 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue_mcs.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using intlock = mcs_lock; 4 | //using locktype = qdlock_impl>; 5 | //using locktype = qdlock_impl>; 6 | //using locktype = qdlock_impl>; 7 | //using locktype = qdlock_impl>; 8 | 9 | //using locktype = qdlock_impl>; 10 | using locktype = qdlock_impl, starvation_policy_t::starvation_free>; 11 | 12 | 13 | //using locktype = qdlock_impl>; 14 | //using locktype = qdlock_impl>; 15 | 16 | extern "C" { 17 | #include "cpplock.h" 18 | 19 | #include "cpplock.cpp" 20 | 21 | } // extern "C" 22 | -------------------------------------------------------------------------------- /qd_library/locks/tatas_lock.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_tatas_lock_hpp 2 | #define qd_tatas_lock_hpp qd_tatas_lock_hpp 3 | 4 | #include 5 | 6 | #include "util/pause.hpp" 7 | 8 | /** @brief a test-and-test-and-set lock */ 9 | class tatas_lock { 10 | std::atomic locked; /* TODO can std::atomic_flag be used? */ 11 | public: 12 | tatas_lock() : locked(false) {}; 13 | tatas_lock(tatas_lock&) = delete; /* TODO? */ 14 | bool try_lock() { 15 | if(is_locked()) return false; 16 | return !locked.exchange(true, std::memory_order_acq_rel); 17 | } 18 | void unlock() { 19 | locked.store(false, std::memory_order_release); 20 | } 21 | bool is_locked() { 22 | return locked.load(std::memory_order_acquire); 23 | } 24 | void lock() { 25 | while(!try_lock()) { 26 | qd::pause(); 27 | } 28 | } 29 | void wake() {} 30 | }; 31 | 32 | #endif /* qd_tatas_lock_hpp */ 33 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/synch_algs_system.h: -------------------------------------------------------------------------------- 1 | #ifndef _SYSTEM_H_ 2 | #define _SYSTEM_H_ 3 | 4 | #ifndef CACHE_LINE_SIZE 5 | # define CACHE_LINE_SIZE 64 6 | #endif 7 | 8 | #ifdef __GNUC__ 9 | # define CACHE_ALIGN __attribute__ ((aligned (CACHE_LINE_SIZE))) 10 | # define VAR_ALIGN __attribute__ ((aligned (16))) 11 | #elif defined(MSVC) 12 | # define CACHE_ALIGN __declspec(align(CACHE_LINE_SIZE)) 13 | # define VAR_ALIGN __declspec(align(16)) 14 | #else 15 | # define CACHE_ALIGN 16 | #endif 17 | 18 | 19 | #define PAD_CACHE(A) ((CACHE_LINE_SIZE - (A % CACHE_LINE_SIZE))/sizeof(int32_t)) 20 | 21 | 22 | #ifndef USE_CPUS 23 | # if defined(linux) 24 | # define USE_CPUS sysconf(_SC_NPROCESSORS_ONLN) 25 | # else 26 | # define USE_CPUS 1 27 | # endif 28 | #endif 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /qd_library/readindicator/reader_groups.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_reader_groups_hpp 2 | #define qd_reader_groups_hpp qd_reader_groups_hpp 3 | 4 | #include "threadid.hpp" 5 | 6 | template 7 | class reader_groups { 8 | struct alignas(64) counter_t { 9 | char pad1[64]; 10 | std::atomic cnt; 11 | char pad2[64]; 12 | counter_t() : cnt(0) {} 13 | }; 14 | std::array counters; 15 | public: 16 | reader_groups() { 17 | for(int i = 0; i < GROUPS; i++) { 18 | counters[i].cnt.store(0, std::memory_order_release); 19 | } 20 | } 21 | bool query() { 22 | for(counter_t& counter : counters) 23 | if(counter.cnt.load(std::memory_order_acquire) > 0) return true; 24 | return false; 25 | } 26 | void arrive() { 27 | counters[thread_id % GROUPS].cnt.fetch_add(1, std::memory_order_release); 28 | } 29 | void depart() { 30 | counters[thread_id % GROUPS].cnt.fetch_sub(1, std::memory_order_release); 31 | } 32 | }; 33 | 34 | #endif /* qd_reader_groups_hpp */ 35 | -------------------------------------------------------------------------------- /src/lock/ticket_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils/smp_utils.h" 3 | #include "common_lock_constants.h" 4 | #include "mcs_lock.h" 5 | 6 | 7 | #ifndef TICKET_LOCK_H 8 | #define TICKET_LOCK_H 9 | 10 | typedef struct TicketLockImpl { 11 | char pad1[64]; 12 | void (*writer)(void *, void **); 13 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 14 | CacheLinePaddedInt inCounter; 15 | CacheLinePaddedInt outCounter; 16 | } TicketLock; 17 | 18 | 19 | TicketLock * ticketlock_create(void (*writer)(void *, void **)); 20 | void ticketlock_free(TicketLock * lock); 21 | void ticketlock_initialize(TicketLock * lock, void (*writer)(void *, void **)); 22 | void ticketlock_register_this_thread(); 23 | void ticketlock_write(TicketLock *lock, void * writeInfo); 24 | void ticketlock_write_read_lock(TicketLock *lock); 25 | void ticketlock_write_read_unlock(TicketLock * lock); 26 | void ticketlock_read_lock(TicketLock *lock); 27 | void ticketlock_read_unlock(TicketLock *lock); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /qd_library/waiting_future.hpp: -------------------------------------------------------------------------------- 1 | #ifndef waiting_future_hpp 2 | #define waiting_future_hpp waiting_future_hpp 3 | 4 | #include 5 | 6 | template 7 | class waiting_future : public std::future { 8 | public: 9 | waiting_future() {} 10 | waiting_future(waiting_future& rhs) : std::future(rhs) {} 11 | waiting_future(waiting_future&& rhs) : std::future(std::move(rhs)) {} 12 | waiting_future(std::future& rhs) : std::future(rhs) {} 13 | waiting_future(std::future&& rhs) : std::future(std::move(rhs)) {} 14 | ~waiting_future() { 15 | if(this->valid()) { 16 | this->wait(); 17 | } 18 | } 19 | waiting_future& operator=(waiting_future& rhs) { 20 | std::future::operator=(rhs); 21 | return *this; 22 | } 23 | waiting_future& operator=(waiting_future&& rhs) { 24 | std::future::operator=(std::move(rhs)); 25 | return *this; 26 | } 27 | void discard() { 28 | std::future tmp; 29 | std::swap(tmp, *this); 30 | } 31 | }; 32 | 33 | #endif // waiting_future_hpp 34 | -------------------------------------------------------------------------------- /src/benchmark/skiplist/kvset.h: -------------------------------------------------------------------------------- 1 | #ifndef __KVSET_H__ 2 | #define __KVSET_H__ 3 | 4 | struct kv_set; 5 | 6 | typedef struct kv_set_functions 7 | { 8 | void (*delete_table)(struct kv_set * kv_set, 9 | void (*element_free_function)(void *context, void* element), 10 | void * context); 11 | void * (*put)(struct kv_set * kv_set, void * key_value); 12 | int (*put_new)(struct kv_set * kv_set, void * key_value); 13 | void * (*remove)(struct kv_set * kv_set, void * key); 14 | void * (*lookup)(struct kv_set * kv_set, void * key); 15 | int (*member)(struct kv_set * kv_set, void * key); 16 | void * (*first)(struct kv_set * kv_set); 17 | void * (*last)(struct kv_set * kv_set); 18 | void * (*next)(struct kv_set * kv_set, void * key); 19 | void * (*previous)(struct kv_set * kv_set, void * key); 20 | } KVSetFunctions; 21 | 22 | 23 | typedef struct kv_set 24 | { 25 | KVSetFunctions funs; 26 | unsigned int key_offset; 27 | void * type_specific_data; 28 | } KVSet; 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/new_rep/misc/padded_types.h: -------------------------------------------------------------------------------- 1 | #ifndef PADDED_TYPES_H 2 | #define PADDED_TYPES_H 3 | 4 | #include "misc/bsd_stdatomic.h"//Until c11 stdatoic.h is available 5 | 6 | #define CACHE_LINE_SIZE 64 7 | 8 | typedef union { 9 | volatile atomic_flag value; 10 | char padding[CACHE_LINE_SIZE]; 11 | } LLPaddedFlag; 12 | 13 | typedef union { 14 | volatile atomic_bool value; 15 | char padding[CACHE_LINE_SIZE]; 16 | } LLPaddedBool; 17 | 18 | typedef union { 19 | volatile atomic_int value; 20 | char padding[CACHE_LINE_SIZE]; 21 | } LLPaddedInt; 22 | 23 | typedef union { 24 | volatile atomic_uint value; 25 | char padding[CACHE_LINE_SIZE]; 26 | } LLPaddedUInt; 27 | 28 | typedef union { 29 | volatile atomic_ulong value; 30 | char padding[CACHE_LINE_SIZE]; 31 | } LLPaddedULong; 32 | 33 | typedef union { 34 | volatile atomic_intptr_t value; 35 | char padding[CACHE_LINE_SIZE]; 36 | } LLPaddedPointer; 37 | 38 | typedef union { 39 | volatile double value; 40 | char padding[CACHE_LINE_SIZE]; 41 | } LLPaddedDouble; 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /src/lock/aticket_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils/smp_utils.h" 3 | #include "common_lock_constants.h" 4 | #include "mcs_lock.h" 5 | 6 | 7 | #ifndef ATICKET_LOCK_H 8 | #define ATICKET_LOCK_H 9 | 10 | typedef struct ATicketLockImpl { 11 | char pad1[64]; 12 | void (*writer)(void *, void **); 13 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 14 | CacheLinePaddedInt inCounter; 15 | CacheLinePaddedInt outCounter; 16 | CacheLinePaddedInt spinAreas[ARRAY_SIZE]; 17 | } ATicketLock; 18 | 19 | 20 | ATicketLock * aticketlock_create(void (*writer)(void *, void **)); 21 | void aticketlock_free(ATicketLock * lock); 22 | void aticketlock_initialize(ATicketLock * lock, void (*writer)(void *, void **)); 23 | void aticketlock_register_this_thread(); 24 | void aticketlock_write(ATicketLock *lock, void * writeInfo); 25 | void aticketlock_write_read_lock(ATicketLock *lock); 26 | void aticketlock_write_read_unlock(ATicketLock * lock); 27 | void aticketlock_read_lock(ATicketLock *lock); 28 | void aticketlock_read_unlock(ATicketLock *lock); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/lock/cpprdx.cpp: -------------------------------------------------------------------------------- 1 | #include "rdx.hpp" 2 | #include "cpprdx.h" 3 | 4 | CPPRDXLock* cpprdx_create(void (*writer)(void *)) { 5 | auto lock = new CPPRDXLock; 6 | lock->writer = writer; 7 | return lock; 8 | } 9 | 10 | void cpprdx_free(CPPRDXLock* lock) { 11 | delete lock; 12 | } 13 | 14 | void cpprdx_initialize(CPPRDXLock* lock, void (*writer)(void *)) { 15 | new (&lock->lock) RDX_Lock; 16 | lock->writer = writer; 17 | } 18 | 19 | void cpprdx_register_this_thread() { 20 | // NOP 21 | } 22 | 23 | void cpprdx_write(CPPRDXLock* lock, void* writeInfo) { 24 | void (*f)(void *) = lock->writer; 25 | lock->lock.lock_delegate(std::function( [f, writeInfo] () { (*f)(writeInfo); } )); 26 | } 27 | 28 | void cpprdx_write_read_lock(CPPRDXLock* lock) { 29 | lock->lock.lock_exclusive(); 30 | } 31 | 32 | void cpprdx_write_read_unlock(CPPRDXLock* lock) { 33 | lock->lock.unlock_exclusive(); 34 | } 35 | 36 | void cpprdx_read_lock(CPPRDXLock* lock) { 37 | lock->lock.lock_read(); 38 | } 39 | 40 | void cpprdx_read_unlock(CPPRDXLock* lock) { 41 | lock->lock.unlock_read(); 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/README.md: -------------------------------------------------------------------------------- 1 | PR 2 | == 3 | 4 | A skiplist based lock-free priority queue implementation minimizing 5 | the amount of coherence traffic. Adapted from an implementation of 6 | Keir Fraser's skiplist 7 | (http://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf). 8 | 9 | For more information about the priority queue, see 10 | http://user.it.uu.se/~jonli208/priorityqueue. 11 | 12 | ### Build 13 | 14 | make perf_meas 15 | 16 | ### Usage 17 | 18 | Run the benchmark application as: 19 | 20 | ./perf_meas -n 8 -t 27 -o 64 21 | 22 | This will start a benchmark run with 8 threads, uniformly distributed 23 | keys, initial queue length of 2^15 elements, the offset parameter of 24 | the algorithm will be set to 64, and random operation (deletemin, 25 | insert). 26 | 27 | Run 28 | 29 | ./perf_meas -h 30 | 31 | for more information about the available parameters. 32 | 33 | ### Build Dependencies 34 | 35 | gsl 36 | 37 | ### Extras 38 | 39 | A SPIN model is included, with linearizability checks of the 40 | operations. The -O flag has to be used (if SPIN version >= 6), the 41 | model is using the old scope rules. -------------------------------------------------------------------------------- /src/datastructures_bench/PR/gc/ptst.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * ptst.h 3 | * 4 | * Per-thread state management. 5 | * 6 | * 7 | * Copyright (c) 2013, Jonatan Linden 8 | * Copyright (c) 2002-2003, K A Fraser 9 | */ 10 | 11 | #ifndef __PTST_H__ 12 | #define __PTST_H__ 13 | 14 | typedef struct ptst_st ptst_t; 15 | 16 | #include 17 | 18 | #include "gc.h" 19 | 20 | struct ptst_st 21 | { 22 | /* Thread id */ 23 | unsigned int id; 24 | /* State management */ 25 | ptst_t *next; 26 | unsigned int count; 27 | 28 | /* Utility structures */ 29 | gc_t *gc; 30 | char pad[56]; 31 | unsigned int rand; 32 | }; 33 | 34 | /* 35 | * Enter/leave a critical region. A thread gets a state handle for 36 | * use during critical regions. 37 | */ 38 | 39 | void critical_enter(void ); 40 | 41 | #define critical_exit() gc_exit(ptst) 42 | 43 | /* Iterators */ 44 | extern ptst_t *ptst_list; 45 | 46 | #define ptst_first() (ptst_list) 47 | #define ptst_next(_p) ((_p)->next) 48 | 49 | 50 | 51 | #endif /* __PTST_H__ */ 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /src/lock/tts_rdx_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "datastructures/opti_multi_writers_queue.h" 3 | #include "common_lock_constants.h" 4 | #include "utils/support_many_non_zero_indicator_types.h" 5 | 6 | #ifndef TTS_RDX_LOCK_H 7 | #define TTS_RDX_LOCK_H 8 | 9 | 10 | typedef struct TTSRDXLockImpl { 11 | OptiMWQueue writeQueue; 12 | char pad1[64]; 13 | void (*writer)(void *, void **); 14 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 15 | char pad3[64]; 16 | CacheLinePaddedInt writeBarrier; 17 | CacheLinePaddedBool lockWord; 18 | char pad4[64]; 19 | NZI_DATATYPE_NAME nonZeroIndicator; 20 | } TTSRDXLock; 21 | 22 | 23 | 24 | TTSRDXLock * ttsalock_create(void (*writer)(void *, void **)); 25 | void ttsalock_free(TTSRDXLock * lock); 26 | void ttsalock_initialize(TTSRDXLock * lock, void (*writer)(void *, void **)); 27 | void ttsalock_register_this_thread(); 28 | void ttsalock_write(TTSRDXLock *lock, void * writeInfo); 29 | void ttsalock_write_read_lock(TTSRDXLock *lock); 30 | void ttsalock_write_read_unlock(TTSRDXLock * lock); 31 | void ttsalock_read_lock(TTSRDXLock *lock); 32 | void ttsalock_read_unlock(TTSRDXLock *lock); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /qd_library/threadid.hpp: -------------------------------------------------------------------------------- 1 | #ifndef threadid_hpp 2 | #define threadid_hpp threadid_hpp 3 | 4 | #include 5 | #include 6 | 7 | class thread_id_store { 8 | static unsigned long max_id; 9 | static std::set orphans; 10 | static std::mutex mutex; 11 | typedef std::lock_guard scoped_lock; 12 | public: 13 | static unsigned long get() { 14 | scoped_lock lock(mutex); 15 | if(orphans.empty()) { 16 | max_id++; 17 | return max_id; 18 | } else { 19 | auto first = orphans.begin(); 20 | auto result = *first; 21 | orphans.erase(first); 22 | return result; 23 | } 24 | } 25 | static void free(unsigned long idx) { 26 | scoped_lock lock(mutex); 27 | if(idx == max_id) { 28 | max_id--; 29 | while(orphans.erase(max_id)) { 30 | max_id--; 31 | } 32 | } else { 33 | orphans.insert(idx); 34 | } 35 | } 36 | }; 37 | 38 | class thread_id_t { 39 | unsigned long id; 40 | public: 41 | operator unsigned long() { 42 | return id; 43 | } 44 | thread_id_t() : id(thread_id_store::get()) {} 45 | ~thread_id_t() { 46 | thread_id_store::free(id); 47 | } 48 | }; 49 | 50 | extern thread_local thread_id_t thread_id; 51 | 52 | #endif // threadid_hpp 53 | -------------------------------------------------------------------------------- /SConstruct: -------------------------------------------------------------------------------- 1 | 2 | #Locks and lock benchmarks 3 | 4 | AddOption('--cpp_locks', 5 | action='store_true', 6 | dest='cpp_locks', 7 | default=False) 8 | 9 | AddOption('--llvm', 10 | action='store_true', 11 | dest='use_llvm', 12 | default=False) 13 | 14 | AddOption('--use_cas_fetch_and_add', 15 | action='store_true', 16 | dest='use_cas_fetch_and_add', 17 | default=False) 18 | 19 | AddOption('--use_pinning', 20 | action='store_true', 21 | dest='use_pinning', 22 | default=False) 23 | 24 | AddOption('--use_queue_stats', 25 | action='store_true', 26 | dest='use_queue_stats', 27 | default=False) 28 | 29 | AddOption('--use_print_thread_queue_stats', 30 | action='store_true', 31 | dest='use_print_thread_queue_stats', 32 | default=False) 33 | 34 | mode = 'release' 35 | 36 | SConscript('SConscript.py', variant_dir='bin', duplicate=0, exports='mode') 37 | 38 | mode = 'debug' 39 | 40 | SConscript('SConscript.py', variant_dir='bin_debug', duplicate=0, exports='mode') 41 | 42 | mode = 'profile' 43 | 44 | SConscript('SConscript.py', variant_dir='bin_profile', duplicate=0, exports='mode') 45 | -------------------------------------------------------------------------------- /src/lock/tatas_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "common_lock_constants.h" 3 | #include "utils/smp_utils.h" 4 | 5 | #ifndef TATAS_LOCK_H 6 | #define TATAS_LOCK_H 7 | 8 | 9 | typedef struct TATASLockImpl { 10 | char pad1[64]; 11 | void (*writer)(void *, void **); 12 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 13 | char pad3[64]; 14 | CacheLinePaddedBool lockWord; 15 | char pad4[64]; 16 | } TATASLock; 17 | 18 | 19 | 20 | TATASLock * tataslock_create(void (*writer)(void *, void **)); 21 | void tataslock_free(TATASLock * lock); 22 | void tataslock_initialize(TATASLock * lock, void (*writer)(void *, void **)); 23 | void tataslock_register_this_thread(); 24 | void tataslock_write(TATASLock *lock, void * writeInfo); 25 | void tataslock_write_read_lock(TATASLock *lock); 26 | void tataslock_write_read_unlock(TATASLock * lock); 27 | void tataslock_read_lock(TATASLock *lock); 28 | void tataslock_read_unlock(TATASLock *lock); 29 | 30 | static inline 31 | bool tataslock_is_locked(TATASLock *lock){ 32 | bool locked; 33 | load_acq(locked, lock->lockWord.value); 34 | return locked; 35 | } 36 | 37 | static inline 38 | bool tataslock_try_write_read_lock(TATASLock *lock) { 39 | return !__sync_lock_test_and_set(&lock->lockWord.value, true); 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/gc/gc.h: -------------------------------------------------------------------------------- 1 | #ifndef __GC_H__ 2 | #define __GC_H__ 3 | 4 | typedef struct gc_st gc_t; 5 | 6 | /* Most of these functions peek into a per-thread state struct. */ 7 | #include "ptst.h" 8 | 9 | /* Initialise GC section of given per-thread state structure. */ 10 | gc_t *gc_init(void); 11 | 12 | int gc_add_allocator(int alloc_size); 13 | void gc_remove_allocator(int alloc_id); 14 | 15 | /* 16 | * Memory allocate/free. An unsafe free can be used when an object was 17 | * not made visible to other processes. 18 | */ 19 | void *gc_alloc(ptst_t *ptst, int alloc_id); 20 | void gc_free(ptst_t *ptst, void *p, int alloc_id); 21 | void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id); 22 | 23 | /* 24 | * Hook registry. Allows users to hook in their own per-epoch delay 25 | * lists. 26 | */ 27 | typedef void (*hook_fn_t)(ptst_t *, void *); 28 | int gc_add_hook(hook_fn_t fn); 29 | void gc_remove_hook(int hook_id); 30 | void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id); 31 | 32 | /* Per-thread entry/exit from critical regions */ 33 | void gc_enter(ptst_t *ptst); 34 | void gc_exit(ptst_t *ptst); 35 | 36 | /* Start-of-day initialisation of garbage collector. */ 37 | void _init_gc_subsystem(void); 38 | void _destroy_gc_subsystem(void); 39 | 40 | #endif /* __GC_H__ */ 41 | -------------------------------------------------------------------------------- /src/benchmark/run_benchmarks_on_intel_i7.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import subprocess 6 | 7 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 8 | 9 | command = [ 10 | os.path.join(bin_dir_path, 'benchmark_lock.py'), 11 | #number of iterations 12 | '5', 13 | #Output dir (standard means a dir in bench_result based on the git 14 | #commit id and the date) 15 | 'standard', 16 | #benchmark prefixes (comma separated list) 17 | 'pairing_heap_bench', 18 | #locks to benchmark (comma separated list) 19 | 'qdlock,hqdlock,ccsynch,flatcomb,clh', 20 | #use pinning to NUMA nodes (comma separated list) 21 | 'no', 22 | #Benchmark number of threads (comma separated list) 23 | '1,2,3,4,5,6,7,8', 24 | #Percentage dequeue (comma separated list) 25 | '0.5', 26 | #Seconds to run the benchmark (comma separated list) 27 | '1', 28 | #Number of work items performed in write-critical section (comma 29 | #separated list) 30 | '2', 31 | #Number of work items performed in read-critical section (comma 32 | #separated list) 33 | '0', 34 | #Number of work items performed in non-critical section (comma 35 | #separated list) 36 | '0,32,64'] 37 | 38 | process = subprocess.Popen(command) 39 | process.wait() 40 | -------------------------------------------------------------------------------- /src/benchmark/run_benchmarks_on_amd_fx_6100.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import subprocess 6 | 7 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 8 | 9 | command = [ 10 | os.path.join(bin_dir_path, 'benchmark_lock.py'), 11 | #number of iterations 12 | '5', 13 | #Output dir (standard means a dir in bench_result based on the git 14 | #commit id and the date) 15 | 'standard', 16 | #benchmark prefixes (comma separated list) 17 | 'rw_bench_clone', 18 | #locks to benchmark (comma separated list) 19 | 'aer_rgnzi,drmcs_rgnzi', 20 | #use pinning to NUMA nodes (comma separated list) 21 | 'no', 22 | #Benchmark number of threads (comma separated list) 23 | '1,2,3,4,5,6', 24 | #Procentage reads (comma separated list) 25 | '0.0,0.25,0.5,0.8,0.9,0.95,0.99,1.0', 26 | #Seconds to run the benchmark (comma separated list) 27 | '1', 28 | #Numper of work items performed in write-critical seciton (comma 29 | #separated list) 30 | '4', 31 | #Numper of work items performed in read-critical seciton (comma 32 | #separated list) 33 | '4', 34 | #Numper of work items performed in non-critical seciton (comma 35 | #separated list) 36 | '0,64'] 37 | 38 | process = subprocess.Popen(command) 39 | process.wait() 40 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/prioq.h: -------------------------------------------------------------------------------- 1 | #ifndef PRIOQ_H 2 | #define PRIOQ_H 3 | #include 4 | #include "common.h" 5 | 6 | typedef int pkey_t; 7 | typedef void *pval_t; 8 | 9 | #define KEY_NULL 0 10 | #define NUM_LEVELS 32 11 | /* Internal key values with special meanings. */ 12 | #define SENTINEL_KEYMIN ( INT_MIN ) /* Key value of first dummy node. */ 13 | #define SENTINEL_KEYMAX ( INT_MAX ) /* Key value of last dummy node. */ 14 | 15 | 16 | typedef struct node_s 17 | { 18 | pkey_t k; 19 | int level; 20 | int inserting; //char pad2[4]; 21 | pval_t v; 22 | struct node_s *next[1]; 23 | } node_t; 24 | 25 | typedef struct 26 | { 27 | int max_offset; 28 | int max_level; 29 | int nthreads; 30 | node_t *head; 31 | node_t *tail; 32 | char pad[128]; 33 | } pq_t; 34 | 35 | #define get_marked_ref(_p) ((void *)(((uintptr_t)(_p)) | 1)) 36 | #define get_unmarked_ref(_p) ((void *)(((uintptr_t)(_p)) & ~1)) 37 | #define is_marked_ref(_p) (((uintptr_t)(_p)) & 1) 38 | 39 | 40 | /* Interface */ 41 | 42 | extern pq_t *pq_init(int max_offset); 43 | 44 | extern void pq_destroy(pq_t *pq); 45 | 46 | extern void insertq(pq_t *pq, pkey_t k, pval_t v); 47 | 48 | extern pval_t deletemin(pq_t *pq); 49 | 50 | extern void sequential_length(pq_t *pq); 51 | 52 | #endif // PRIOQ_H 53 | -------------------------------------------------------------------------------- /src/benchmark/run_benchmarks_on_sandy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import subprocess 6 | 7 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 8 | 9 | command = [ 10 | os.path.join(bin_dir_path, 'benchmark_lock.py'), 11 | #number of iterations 12 | '5', 13 | #Output dir (standard menas a dir in bench_result based on the git 14 | #commit id and the date) 15 | 'standard', 16 | #benchmark prefixes (comma separated list) 17 | 'rw_bench_clone', 18 | #locks to benchmark (comma separated list) 19 | 'aer_rgnzi,drmcs_rgnzi,cohort,wprwcohort_rgnzi', 20 | #use pinning to NUMA nodes (comma separated list) 21 | 'no,yes', 22 | #Benchmark number of threads (comma separated list) 23 | '1,2,4,8,12,16,24,32,48,62,64', 24 | #Procentage reads (comma separated list) 25 | '0.0,0.25,0.5,0.8,0.9,0.95,0.99,1.0', 26 | #Seconds to run the benchmark (comma separated list) 27 | '1', 28 | #Number of work items performed in write-critical section (comma 29 | #separated list) 30 | '4', 31 | #Number of work items performed in read-critical section (comma 32 | #separated list) 33 | '4', 34 | #Number of work items performed in non-critical section (comma 35 | #separated list) 36 | '0,64'] 37 | 38 | process = subprocess.Popen(command) 39 | process.wait() 40 | -------------------------------------------------------------------------------- /src/new_rep/locks/qd_lock.h: -------------------------------------------------------------------------------- 1 | #ifndef QD_LOCK_H 2 | #define QD_LOCK_H 3 | 4 | #include "misc/bsd_stdatomic.h"//Until c11 stdatomic.h is available 5 | #include "misc/thread_includes.h"//Until c11 thread.h is available 6 | #include 7 | 8 | #include "misc/padded_types.h" 9 | #include "locks/tatas_lock.h" 10 | #include "qd_queues/qd_queue.h" 11 | 12 | /* Queue Delegation Lock */ 13 | 14 | typedef struct QDLockImpl { 15 | TATASLock mutexLock; 16 | QDQueue queue; 17 | } QDLock; 18 | 19 | void qd_initialize(QDLock * lock){ 20 | tatas_initialize(&lock->mutexLock); 21 | qdq_initialize(&lock->queue); 22 | } 23 | 24 | void qd_delegate(QDLock* l, 25 | void (*funPtr)(unsigned int, void *), 26 | unsigned int messageSize, 27 | void * messageAddress) { 28 | while(true) { 29 | if(tatas_try_lock(&l->mutexLock)) { 30 | qdq_open(&l->queue); 31 | funPtr(messageSize, messageAddress); 32 | qdq_flush(&l->queue); 33 | tatas_unlock(&l->mutexLock); 34 | return; 35 | } else if(qdq_enqueue(&l->queue, 36 | funPtr, 37 | messageSize, 38 | messageAddress)){ 39 | return; 40 | } 41 | thread_yield(); 42 | } 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/common.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include "common.h" 3 | 4 | #if defined(__linux__) 5 | pid_t 6 | gettid(void) 7 | { 8 | return (pid_t) syscall(SYS_gettid); 9 | } 10 | 11 | void 12 | pina(pid_t t, int cpu) 13 | { 14 | cpu_set_t cpuset; 15 | CPU_ZERO(&cpuset); 16 | CPU_SET(cpu, &cpuset); 17 | E_en(sched_setaffinity(t, sizeof(cpu_set_t), &cpuset)); 18 | } 19 | 20 | void 21 | gettime(struct timespec *ts) 22 | { 23 | E(clock_gettime(CLOCK_MONOTONIC, ts)); 24 | } 25 | 26 | #endif 27 | 28 | #if defined(__APPLE__) 29 | void 30 | gettime(struct timespec *ts) 31 | { 32 | uint64_t time = mach_absolute_time(); 33 | 34 | static mach_timebase_info_data_t info = {0,0}; 35 | 36 | if (info.denom == 0) { 37 | mach_timebase_info(&info); 38 | } 39 | 40 | uint64_t elapsed = time * (info.numer / info.denom); 41 | 42 | ts->tv_sec = elapsed * 1e-9; 43 | ts->tv_nsec = elapsed - (ts->tv_sec * 1e9); 44 | } 45 | #endif 46 | 47 | 48 | 49 | 50 | struct timespec 51 | timediff (struct timespec begin, struct timespec end) 52 | { 53 | struct timespec tmp; 54 | if ((end.tv_nsec - begin.tv_nsec) < 0) { 55 | tmp.tv_sec = end.tv_sec - begin.tv_sec - 1; 56 | tmp.tv_nsec = 1000000000 + end.tv_nsec - begin.tv_nsec; 57 | } else { 58 | tmp.tv_sec = end.tv_sec - begin.tv_sec; 59 | tmp.tv_nsec = end.tv_nsec - begin.tv_nsec; 60 | } 61 | return tmp; 62 | } 63 | -------------------------------------------------------------------------------- /qd_library/locks/mutex_lock.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_mutex_lock_hpp 2 | #define qd_mutex_lock_hpp qd_mutex_lock_hpp 3 | 4 | #include 5 | #include 6 | 7 | /** @brief a std::mutex based lock */ 8 | class mutex_lock { 9 | std::atomic locked; 10 | std::mutex mutex; 11 | public: 12 | mutex_lock() : locked(false), mutex() {}; 13 | mutex_lock(mutex_lock&) = delete; /* TODO? */ 14 | bool try_lock() { 15 | if(!is_locked() && mutex.try_lock()) { 16 | locked.store(true, std::memory_order_release); 17 | return true; 18 | } else { 19 | return false; 20 | } 21 | } 22 | void unlock() { 23 | locked.store(false, std::memory_order_release); 24 | mutex.unlock(); 25 | } 26 | bool is_locked() { 27 | /* This may sometimes return false when the lock is already acquired. 28 | * This is safe, because the locking call that acquired the lock in 29 | * that case has not yet returned (it needs to set the locked flag first), 30 | * so this is concurrent with calling is_locked first and then locking the lock. 31 | * 32 | * This may also sometimes return false when the lock is still locked, but 33 | * about to be unlocked. This is safe, because of a similar argument as above. 34 | */ 35 | return locked.load(std::memory_order_acquire); 36 | } 37 | void lock() { 38 | mutex.lock(); 39 | locked.store(true, std::memory_order_release); 40 | } 41 | }; 42 | 43 | #endif /* qd_mutex_lock_hpp */ 44 | -------------------------------------------------------------------------------- /src/benchmark/compare_benchmarks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | from os import listdir 5 | from os.path import join 6 | from os.path import dirname 7 | from os import mkdir 8 | from subprocess import Popen 9 | from subprocess import PIPE 10 | from shutil import copy 11 | sys.argv.pop(0) 12 | 13 | if len(sys.argv) < 2: 14 | print """Not enough parameters: 15 | 16 | Optional flag -matplotlib (must be first) 17 | 18 | The first parameter is the output dir or output file if -matplotlib is specified (where the graphs are placed). 19 | 20 | The rest of the parameters are benchmark output dirs of benchmark that 21 | shall be compared. The benchmark results are produced by the 22 | bin/benchmark_lock.py script. 23 | 24 | """ 25 | sys.exit() 26 | 27 | output_dir_or_file = sys.argv.pop(0) 28 | 29 | compare_dirs = sys.argv 30 | 31 | dat_files = [f for f in listdir(compare_dirs[0]) if f.endswith(".dat")] 32 | 33 | output_file = output_dir_or_file 34 | copy(join(dirname(__file__), '../src/benchmark/produce_graphs_template.py'), output_file) 35 | with open(output_file, "a") as f: 36 | for in_file_name in dat_files: 37 | f.write('set_up_figure("%s")\n' % in_file_name) 38 | for in_dir in compare_dirs: 39 | f.write('plot_file("%s", "%s")\n' % (join(in_dir,in_file_name), in_dir.split('#')[2])) 40 | f.write('complete_figure("%s")\n' % in_file_name) 41 | f.write("\n") 42 | 43 | 44 | FORMAT='png' 45 | execfile(output_file) 46 | -------------------------------------------------------------------------------- /src/new_rep/locks/tatas_lock.h: -------------------------------------------------------------------------------- 1 | #ifndef TATAS_LOCK_H 2 | #define TATAS_LOCK_H 3 | 4 | #include "misc/padded_types.h" 5 | 6 | #include "misc/bsd_stdatomic.h"//Until c11 stdatomic.h is available 7 | #include "misc/thread_includes.h"//Until c11 thread.h is available 8 | #include 9 | 10 | 11 | typedef struct TATASLockImpl { 12 | LLPaddedFlag lockFlag; 13 | } TATASLock; 14 | 15 | void tatas_initialize(TATASLock * lock){ 16 | atomic_init( &lock->lockFlag.value, false ); 17 | } 18 | 19 | void tatas_lock(TATASLock *lock) { 20 | while(true){ 21 | while(atomic_load_explicit(&lock->lockFlag.value, 22 | memory_order_acquire)){ 23 | thread_yield(); 24 | } 25 | if( ! atomic_flag_test_and_set_explicit(&lock->lockFlag.value, 26 | memory_order_acquire)){ 27 | return; 28 | } 29 | } 30 | } 31 | 32 | void tatas_unlock(TATASLock * lock) { 33 | atomic_flag_clear_explicit(&lock->lockFlag.value, memory_order_release); 34 | } 35 | 36 | bool tatas_is_locked(TATASLock *lock){ 37 | return atomic_load_explicit(&lock->lockFlag.value, memory_order_acquire); 38 | } 39 | 40 | bool tatas_try_lock(TATASLock *lock) { 41 | if(!atomic_load_explicit(&lock->lockFlag.value, memory_order_acquire)){ 42 | return !atomic_flag_test_and_set_explicit(&lock->lockFlag.value, memory_order_acquire); 43 | } else { 44 | return false; 45 | } 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /src/utils/support_many_non_zero_indicator_types.h: -------------------------------------------------------------------------------- 1 | #ifndef SUPPORT_MANY_NON_ZERO_INDICATOR_TYPES_H 2 | #define SUPPORT_MANY_NON_ZERO_INDICATOR_TYPES_H 3 | 4 | #ifdef NZI_TYPE_ReaderGroups 5 | //*********************************** 6 | //ReaderGroups 7 | //*********************************** 8 | #include "datastructures/reader_groups_nzi.h" 9 | 10 | #define NZI_DATATYPE_NAME ReaderGroupsNZI 11 | #define NZI_FUN_PREFIX rgnzi 12 | 13 | #elif defined (NZI_TYPE_NUMAIngressEgressCounter) 14 | //*********************************** 15 | //NUMA Ingress Egress Counter 16 | //*********************************** 17 | #include "datastructures/numa_ingress_egress_nzi.h" 18 | 19 | #define NZI_DATATYPE_NAME NUMAIngressEgress 20 | #define NZI_FUN_PREFIX nienzi 21 | 22 | #else 23 | 24 | #define NZI_DATATYPE_NAME NoNZIDatatypeSpecified 25 | #define NZI_FUN_PREFIX no_such_nzi_type_prefix 26 | 27 | #endif 28 | 29 | #ifdef NZI_FUN_PREFIX 30 | 31 | #define MY_NZI_CONCAT(a,b) a ## _ ## b 32 | #define MY_NZI_EVAL_CONCAT(a,b) MY_NZI_CONCAT(a,b) 33 | #define MY_NZI_FUN(name) MY_NZI_EVAL_CONCAT(NZI_FUN_PREFIX, name) 34 | 35 | #define NZI_INITIALIZE(nzi) MY_NZI_FUN(initialize)(nzi) 36 | #define NZI_ARRIVE(nzi) MY_NZI_FUN(arrive)(nzi) 37 | #define NZI_DEPART(nzi) MY_NZI_FUN(depart)(nzi) 38 | #define NZI_QUERY(nzi) MY_NZI_FUN(query)(nzi) 39 | #define NZI_WAIT_UNIL_EMPTY(nzi) MY_NZI_FUN(wait_unil_empty)(nzi) 40 | 41 | #endif 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /qd_library/locks/pthreads_lock.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_pthreads_lock_hpp 2 | #define qd_pthreads_lock_hpp qd_pthreads_lock_hpp 3 | 4 | #include 5 | #include 6 | 7 | /** @brief a pthreads based lock */ 8 | class pthreads_lock { 9 | std::atomic locked; 10 | pthread_mutex_t mutex; 11 | public: 12 | pthreads_lock() : locked(false), mutex(PTHREAD_MUTEX_INITIALIZER) {}; 13 | pthreads_lock(pthreads_lock&) = delete; /* TODO? */ 14 | bool try_lock() { 15 | if(!is_locked() && !pthread_mutex_trylock(&mutex)) { 16 | locked.store(true, std::memory_order_release); 17 | return true; 18 | } else { 19 | return false; 20 | } 21 | } 22 | void unlock() { 23 | locked.store(false, std::memory_order_release); 24 | pthread_mutex_unlock(&mutex); 25 | } 26 | bool is_locked() { 27 | /* This may sometimes return false when the lock is already acquired. 28 | * This is safe, because the locking call that acquired the lock in 29 | * that case has not yet returned (it needs to set the locked flag first), 30 | * so this is concurrent with calling is_locked first and then locking the lock. 31 | * 32 | * This may also sometimes return false when the lock is still locked, but 33 | * about to be unlocked. This is safe, because of a similar argument as above. 34 | */ 35 | return locked.load(std::memory_order_acquire); 36 | } 37 | void lock() { 38 | pthread_mutex_lock(&mutex); 39 | locked.store(true, std::memory_order_release); 40 | } 41 | }; 42 | 43 | #endif /* qd_pthreads_lock_hpp */ 44 | -------------------------------------------------------------------------------- /src/datastructures/reader_groups_nzi.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils/smp_utils.h" 3 | #include "utils/thread_identifier.h" 4 | 5 | #ifndef READER_GROUPS_NZI_H 6 | #define READER_GROUPS_NZI_H 7 | 8 | typedef struct ReaderGroupsNZIImpl { 9 | CacheLinePaddedInt readerGroups[NUMBER_OF_READER_GROUPS]; 10 | } ReaderGroupsNZI; 11 | 12 | static inline 13 | void rgnzi_initialize(ReaderGroupsNZI * nzi){ 14 | for(int i = 0; i < NUMBER_OF_READER_GROUPS; i++){ 15 | nzi->readerGroups[i].value = 0; 16 | } 17 | __sync_synchronize(); 18 | } 19 | 20 | static inline 21 | void rgnzi_arrive(ReaderGroupsNZI * nzi){ 22 | __sync_fetch_and_add(&nzi->readerGroups[myId.value % NUMBER_OF_READER_GROUPS].value, 1); 23 | } 24 | 25 | static inline 26 | void rgnzi_depart(ReaderGroupsNZI * nzi){ 27 | __sync_fetch_and_sub(&nzi->readerGroups[myId.value % NUMBER_OF_READER_GROUPS].value, 1); 28 | } 29 | 30 | 31 | static inline 32 | bool rgnzi_query(ReaderGroupsNZI * nzi){ 33 | for(int i = 0; i < NUMBER_OF_READER_GROUPS; i++){ 34 | if(ACCESS_ONCE(nzi->readerGroups[i].value) > 0){ 35 | return false; 36 | } 37 | } 38 | return true; 39 | } 40 | 41 | static inline 42 | void rgnzi_wait_unil_empty(ReaderGroupsNZI * nzi){ 43 | int count; 44 | for(int i = 0; i < NUMBER_OF_READER_GROUPS; i++){ 45 | load_acq(count, nzi->readerGroups[i].value); 46 | while(count > 0){ 47 | __sync_synchronize(); 48 | load_acq(count, nzi->readerGroups[i].value); 49 | } 50 | } 51 | } 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /src/lock/wprw_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils/smp_utils.h" 3 | #include "common_lock_constants.h" 4 | #include "utils/support_many_lock_types.h" 5 | #include "utils/support_many_non_zero_indicator_types.h" 6 | 7 | #ifndef WPRW_LOCK_H 8 | #define WPRW_LOCK_H 9 | 10 | #ifdef LOCK_TYPE_WPRW_MCSLock 11 | //*********************************** 12 | //MCSLock 13 | //*********************************** 14 | #include "mcs_lock.h" 15 | 16 | #define LOCK_DATATYPE_NAME_WPRW MCSLock 17 | 18 | #elif defined (LOCK_TYPE_WPRW_CohortLock) 19 | //*********************************** 20 | //CohortLock 21 | //*********************************** 22 | #include "cohort_lock.h" 23 | 24 | #define LOCK_DATATYPE_NAME_WPRW CohortLock 25 | 26 | #else 27 | 28 | #define LOCK_DATATYPE_NAME_WPRW NoLockDatatypeSpecified 29 | 30 | #endif 31 | 32 | typedef struct WPRWLockImpl { 33 | char pad1[64]; 34 | LOCK_DATATYPE_NAME_WPRW lock; 35 | CacheLinePaddedInt writeBarrier; 36 | NZI_DATATYPE_NAME nonZeroIndicator; 37 | // CacheLinePaddedInt readLocks[NUMBER_OF_READER_GROUPS]; 38 | } WPRWLock; 39 | 40 | WPRWLock * wprwlock_create(void (*writer)(void *, void **)); 41 | void wprwlock_free(WPRWLock * lock); 42 | void wprwlock_initialize(WPRWLock * lock, void (*writer)(void *, void **)); 43 | void wprwlock_register_this_thread(); 44 | void wprwlock_write(WPRWLock *lock, void * writeInfo); 45 | void wprwlock_write_read_lock(WPRWLock *lock); 46 | void wprwlock_write_read_unlock(WPRWLock * lock); 47 | void wprwlock_read_lock(WPRWLock *lock); 48 | void wprwlock_read_unlock(WPRWLock *lock); 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/lock/ticket_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "ticket_lock.h" 6 | 7 | 8 | TicketLock * ticketlock_create(void (*writer)(void *, void **)){ 9 | TicketLock * lock = malloc(sizeof(TicketLock)); 10 | ticketlock_initialize(lock, writer); 11 | return lock; 12 | } 13 | 14 | void ticketlock_initialize(TicketLock * lock, void (*writer)(void *, void **)){ 15 | lock->writer = writer; 16 | lock->inCounter.value = 0; 17 | lock->outCounter.value = 0; 18 | __sync_synchronize(); 19 | } 20 | 21 | void ticketlock_free(TicketLock * lock){ 22 | free(lock); 23 | } 24 | 25 | 26 | void ticketlock_register_this_thread(){ 27 | } 28 | 29 | void ticketlock_write(TicketLock *lock, void * writeInfo) { 30 | ticketlock_write_read_lock(lock); 31 | lock->writer(writeInfo, NULL); 32 | ticketlock_write_read_unlock(lock); 33 | } 34 | 35 | void ticketlock_write_read_lock(TicketLock *lock) { 36 | int outCounter; 37 | int myTicket = __sync_fetch_and_add(&lock->inCounter.value, 1); 38 | load_acq(outCounter, lock->outCounter.value); 39 | while(outCounter != myTicket){ 40 | load_acq(outCounter, lock->outCounter.value); 41 | __sync_synchronize(); 42 | } 43 | } 44 | 45 | void ticketlock_write_read_unlock(TicketLock * lock) { 46 | __sync_fetch_and_add(&lock->outCounter.value, 1); 47 | } 48 | 49 | void ticketlock_read_lock(TicketLock *lock) { 50 | ticketlock_write_read_lock(lock); 51 | } 52 | 53 | void ticketlock_read_unlock(TicketLock *lock) { 54 | ticketlock_write_read_unlock(lock); 55 | } 56 | -------------------------------------------------------------------------------- /src/lock/simple_delayed_writers_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "datastructures/multi_writers_queue.h" 3 | #include "common_lock_constants.h" 4 | #include "utils/support_many_non_zero_indicator_types.h" 5 | 6 | #ifndef SIMPLE_DELAYED_WRITERS_LOCK_H 7 | #define SIMPLE_DELAYED_WRITERS_LOCK_H 8 | 9 | struct NodeImpl; 10 | 11 | typedef union CacheLinePaddedNodePtrImpl { 12 | struct NodeImpl * value; 13 | char padding[64]; 14 | } CacheLinePaddedNodePtr; 15 | 16 | typedef struct NodeImpl { 17 | MWQueue writeQueue; 18 | CacheLinePaddedNodePtr next; 19 | CacheLinePaddedBool locked; 20 | bool readLockIsWriteLock; 21 | char pad[64 - ((sizeof(bool)) % 64)]; 22 | } Node; 23 | 24 | typedef struct SimpleDelayedWritesLockImpl { 25 | char pad1[64]; 26 | void (*writer)(void *, void **); 27 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 28 | CacheLinePaddedNodePtr endOfQueue; 29 | NZI_DATATYPE_NAME nonZeroIndicator; 30 | } SimpleDelayedWritesLock; 31 | 32 | 33 | 34 | SimpleDelayedWritesLock * sdwlock_create(void (*writer)(void *, void **)); 35 | void sdwlock_free(SimpleDelayedWritesLock * lock); 36 | void sdwlock_initialize(SimpleDelayedWritesLock * lock, void (*writer)(void *, void **)); 37 | void sdwlock_register_this_thread(); 38 | void sdwlock_write(SimpleDelayedWritesLock *lock, void * writeInfo); 39 | void sdwlock_write_read_lock(SimpleDelayedWritesLock *lock); 40 | void sdwlock_write_read_unlock(SimpleDelayedWritesLock * lock); 41 | void sdwlock_read_lock(SimpleDelayedWritesLock *lock); 42 | void sdwlock_read_unlock(SimpleDelayedWritesLock *lock); 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /src/lock/agnostic_dx_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "agnostic_dx_lock.h" 7 | #include "utils/smp_utils.h" 8 | 9 | #define READ_PATIENCE_LIMIT 130000 10 | 11 | AgnosticDXLock * adxlock_create(void (*writer)(void *, void **)){ 12 | AgnosticDXLock * lock = malloc(sizeof(AgnosticDXLock)); 13 | adxlock_initialize(lock, writer); 14 | return lock; 15 | } 16 | 17 | void adxlock_initialize(AgnosticDXLock * lock, void (*defaultWriter)(void *, void **)){ 18 | //TODO check if the following typecast is fine 19 | lock->defaultWriter = defaultWriter; 20 | LOCK_INITIALIZE(&lock->lock, defaultWriter); 21 | drmvqueue_initialize(&lock->writeQueue); 22 | __sync_synchronize(); 23 | } 24 | 25 | void adxlock_free(AgnosticDXLock * lock){ 26 | free(lock); 27 | } 28 | 29 | void adxlock_register_this_thread(){ 30 | } 31 | 32 | 33 | void adxlock_write(AgnosticDXLock *lock, void * writeInfo) { 34 | adxlock_delegate(lock, lock->defaultWriter, writeInfo); 35 | } 36 | 37 | void adxlock_write_read_lock(AgnosticDXLock *lock) { 38 | LOCK_WRITE_READ_LOCK(&lock->lock); 39 | drmvqueue_reset_fully_read(&lock->writeQueue); 40 | __sync_synchronize();//Flush 41 | } 42 | 43 | void adxlock_write_read_unlock(AgnosticDXLock * lock) { 44 | drmvqueue_flush(&lock->writeQueue); 45 | LOCK_WRITE_READ_UNLOCK(&lock->lock); 46 | } 47 | 48 | void adxlock_read_lock(AgnosticDXLock *lock) { 49 | adxlock_write_read_lock(lock); 50 | } 51 | 52 | void adxlock_read_unlock(AgnosticDXLock *lock) { 53 | adxlock_write_read_unlock(lock); 54 | } 55 | -------------------------------------------------------------------------------- /qd_library/qd.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_qd_hpp 2 | #define qd_qd_hpp qd_qd_hpp 3 | 4 | #include "locks/waitable_lock.hpp" 5 | #include "locks/tatas_lock.hpp" 6 | #include "locks/mutex_lock.hpp" 7 | #include "locks/futex_lock.hpp" 8 | #include "locks/mcs_futex_lock.hpp" 9 | #include "locks/mcs_lock.hpp" 10 | #include "locks/ticket_futex_lock.hpp" 11 | 12 | #include "queues/buffer_queue.hpp" 13 | #include "queues/dual_buffer_queue.hpp" 14 | #include "queues/entry_queue.hpp" 15 | #include "queues/simple_locked_queue.hpp" 16 | 17 | #include "qdlock.hpp" 18 | #include "hqdlock.hpp" 19 | #include "mrqdlock.hpp" 20 | 21 | #include "qd_condition_variable.hpp" 22 | 23 | template 24 | class extended_lock : public Lock { 25 | public: 26 | bool try_lock_or_wait() { 27 | return this->try_lock(); 28 | } 29 | }; 30 | 31 | using internal_lock = mcs_futex_lock; 32 | using qdlock = qdlock_impl>; 33 | using mrqdlock = mrqdlock_impl, reader_groups<64>, 65536>; 34 | using qd_condition_variable = qd_condition_variable_impl; 35 | 36 | #define DELEGATE_F(function, ...) template delegate_f(__VA_ARGS__) 37 | #define DELEGATE_N(function, ...) template delegate_n(__VA_ARGS__) 38 | #define DELEGATE_P(function, ...) template delegate_p(__VA_ARGS__) 39 | #define DELEGATE_FP(function, ...) template delegate_fp(__VA_ARGS__) 40 | #define WAIT_REDELEGATE_P(function, ...) template wait_redelegate_p(__VA_ARGS__) 41 | 42 | #endif /* qd_qd_hpp */ 43 | -------------------------------------------------------------------------------- /src/lock/all_equal_rdx_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "datastructures/multi_writers_queue.h" 3 | #include "common_lock_constants.h" 4 | #include "utils/support_many_non_zero_indicator_types.h" 5 | 6 | #ifndef ALL_EQUAL_RDX_LOCK_H 7 | #define ALL_EQUAL_RDX_LOCK_H 8 | 9 | struct NodeImpl; 10 | 11 | typedef union CacheLinePaddedNodePtrImpl { 12 | struct NodeImpl * value; 13 | char padding[64]; 14 | } CacheLinePaddedNodePtr; 15 | 16 | 17 | typedef struct NodeImpl { 18 | MWQueue writeQueue; 19 | CacheLinePaddedNodePtr next; 20 | CacheLinePaddedBool locked; 21 | CacheLinePaddedBool readSpinningEnabled; 22 | NZI_DATATYPE_NAME nonZeroIndicator; 23 | bool readLockIsWriteLock; 24 | bool readLockIsSpinningOnNode; 25 | struct NodeImpl * readLockSpinningNode; 26 | char pad[64 - ((sizeof(bool)*2 + sizeof(struct NodeImpl *)) % 64)]; 27 | } Node; 28 | 29 | typedef struct AllEqualRDXLockImpl { 30 | char pad1[64]; 31 | void (*writer)(void *, void **); 32 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 33 | CacheLinePaddedNodePtr endOfQueue; 34 | NZI_DATATYPE_NAME nonZeroIndicator; 35 | } AllEqualRDXLock; 36 | 37 | 38 | 39 | AllEqualRDXLock * aerlock_create(void (*writer)(void *, void **)); 40 | void aerlock_free(AllEqualRDXLock * lock); 41 | void aerlock_initialize(AllEqualRDXLock * lock, void (*writer)(void *, void **)); 42 | void aerlock_register_this_thread(); 43 | void aerlock_write(AllEqualRDXLock *lock, void * writeInfo); 44 | void aerlock_write_read_lock(AllEqualRDXLock *lock); 45 | void aerlock_write_read_unlock(AllEqualRDXLock * lock); 46 | void aerlock_read_lock(AllEqualRDXLock *lock); 47 | void aerlock_read_unlock(AllEqualRDXLock *lock); 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /src/lock/tatas_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "tatas_lock.h" 7 | 8 | 9 | TATASLock * tataslock_create(void (*writer)(void *, void **)){ 10 | TATASLock * lock = malloc(sizeof(TATASLock)); 11 | tataslock_initialize(lock, writer); 12 | return lock; 13 | } 14 | 15 | void tataslock_initialize(TATASLock * lock, void (*writer)(void *, void **)){ 16 | lock->writer = writer; 17 | lock->lockWord.value = 0; 18 | __sync_synchronize(); 19 | } 20 | 21 | void tataslock_free(TATASLock * lock){ 22 | free(lock); 23 | } 24 | 25 | void tataslock_register_this_thread(){ 26 | } 27 | 28 | void tataslock_write(TATASLock *lock, void * writeInfo) { 29 | tataslock_write_read_lock(lock); 30 | lock->writer(writeInfo, NULL); 31 | tataslock_write_read_unlock(lock); 32 | } 33 | 34 | void tataslock_write_read_lock(TATASLock *lock) { 35 | bool currentlylocked; 36 | while(true){ 37 | load_acq(currentlylocked, lock->lockWord.value); 38 | while(currentlylocked){ 39 | load_acq(currentlylocked, lock->lockWord.value); 40 | } 41 | currentlylocked = __sync_lock_test_and_set(&lock->lockWord.value, true); 42 | if(!currentlylocked){ 43 | //Was not locked before operation 44 | return; 45 | } 46 | __sync_synchronize();//Pause instruction? 47 | } 48 | } 49 | 50 | void tataslock_write_read_unlock(TATASLock * lock) { 51 | __sync_lock_release(&lock->lockWord.value); 52 | } 53 | 54 | void tataslock_read_lock(TATASLock *lock) { 55 | tataslock_write_read_lock(lock); 56 | } 57 | 58 | void tataslock_read_unlock(TATASLock *lock) { 59 | tataslock_write_read_unlock(lock); 60 | } 61 | -------------------------------------------------------------------------------- /src/benchmark/produce_graphs_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import matplotlib 4 | matplotlib.use('Agg') 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | try: 9 | Format = FORMAT 10 | except NameError: 11 | Format = 'pdf' 12 | 13 | def read_dat_file(the_file): 14 | with open(the_file, 'r') as f: 15 | lines = f.readlines() 16 | x = [] 17 | y = [] 18 | for line in lines: 19 | p = line.split() 20 | x.append(float(p[0])) 21 | y.append(float(p[2])/float(p[1])) 22 | return (x, y) 23 | 24 | from itertools import cycle 25 | markers = None 26 | def set_up_figure(title): 27 | markers = cycle(['o', 's', 'd', '^', 'v', '<', '>', 'D', 'h']) 28 | plt.figure() 29 | plt.autoscale(enable=True, tight=False) 30 | plt.xlabel('Number of Threads') 31 | plt.ylabel('Operations / Microsecond') 32 | plt.title(title) 33 | 34 | 35 | def plot_file(the_file, title): 36 | (x_list, y_list) = read_dat_file(the_file) 37 | mapped = [(a, [b for (comp_a, b) in zip(x_list, y_list) if a == comp_a]) for a in x_list] 38 | mapped.sort() 39 | x,y_vals = zip(*mapped) 40 | y = map(lambda v : sum(v) / float(len(v)), y_vals) 41 | emin = map(lambda (v, avg) : avg - min(v), zip(y_vals, y)) 42 | emax = map(lambda (v, avg) : max(v) - avg, zip(y_vals, y)) 43 | plt.errorbar(x, y, [emin, emax], label=title, linewidth=2, elinewidth=1, marker='o') 44 | #plt.plot(x, y, label=title, linewidth=2) 45 | 46 | 47 | def complete_figure(save_file_name): 48 | plt.axis(xmin=0) 49 | plt.axis(ymin=0) 50 | plt.tight_layout() 51 | plt.legend(loc='best') 52 | plt.savefig(save_file_name + '.' + Format, bbox_inches='tight', dpi=400) 53 | print save_file_name + '.' + Format 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/benchmark/pairingheap/dxlocked_pairingheap.h: -------------------------------------------------------------------------------- 1 | #include "pairingheap.h" 2 | #include "utils/support_many_lock_types.h" 3 | 4 | #ifndef DXLOCKED_PAIRINGHEAP_H 5 | #define DXLOCKED_PAIRINGHEAP_H 6 | 7 | typedef struct DXPriorityQueueImpl{ 8 | char pad1[128]; 9 | struct node* value; 10 | char pad2[128 - (sizeof(struct node*))]; 11 | } DXPriorityQueue; 12 | 13 | DXPriorityQueue dx_pq_ph_datastructure __attribute__((aligned(64))); 14 | 15 | LOCK_DATATYPE_NAME dx_pq_ph_lock __attribute__((aligned(64))); 16 | 17 | 18 | void dx_pq_ph_init(){ 19 | LOCK_INITIALIZE(&dx_pq_ph_lock, NULL);//Default write function not used 20 | dx_pq_ph_datastructure.value = NULL; 21 | } 22 | 23 | void dx_pq_ph_destroy(){ 24 | destroy_heap(dx_pq_ph_datastructure.value); 25 | } 26 | 27 | void dx_pq_ph_enqueue_critical_section(void * enqueueValue, void ** notUsed){ 28 | dx_pq_ph_datastructure.value = 29 | insert(dx_pq_ph_datastructure.value, (int)(long)enqueueValue); 30 | } 31 | 32 | void dx_pq_ph_enqueue(int value){ 33 | LOCK_DELEGATE(&dx_pq_ph_lock, &dx_pq_ph_enqueue_critical_section, (void*)(long)value); 34 | } 35 | 36 | void dx_pq_ph_dequeue_critical_section(void * notUsed, void ** resultLocationPtr){ 37 | int * resultLocation = (int*)resultLocationPtr; 38 | if(dx_pq_ph_datastructure.value != NULL){ 39 | *resultLocation = top(dx_pq_ph_datastructure.value); 40 | dx_pq_ph_datastructure.value = pop(dx_pq_ph_datastructure.value); 41 | }else{ 42 | *resultLocation = -1; 43 | } 44 | } 45 | 46 | int dx_pq_ph_dequeue(){ 47 | return (int)(long)LOCK_DELEGATE_RETURN_BLOCK(&dx_pq_ph_lock, 48 | &dx_pq_ph_dequeue_critical_section, 49 | NULL); 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/datastructures_bench/datastructures/pairingheap/dxlocked_pairingheap.h: -------------------------------------------------------------------------------- 1 | #include "pairingheap.h" 2 | #include "support_many_lock_types.h" 3 | 4 | #ifndef DXLOCKED_PAIRINGHEAP_H 5 | #define DXLOCKED_PAIRINGHEAP_H 6 | 7 | typedef struct DXPriorityQueueImpl{ 8 | char pad1[128]; 9 | struct node* value; 10 | char pad2[128 - (sizeof(struct node*))]; 11 | } DXPriorityQueue; 12 | 13 | DXPriorityQueue dx_pq_ph_datastructure __attribute__((aligned(64))); 14 | 15 | LOCK_DATATYPE_NAME dx_pq_ph_lock __attribute__((aligned(64))); 16 | 17 | 18 | void dx_pq_ph_init(){ 19 | LOCK_INITIALIZE(&dx_pq_ph_lock, NULL);//Default write function not used 20 | dx_pq_ph_datastructure.value = NULL; 21 | } 22 | 23 | void dx_pq_ph_destroy(){ 24 | destroy_heap(dx_pq_ph_datastructure.value); 25 | } 26 | 27 | void dx_pq_ph_enqueue_critical_section(void * enqueueValue, void ** notUsed){ 28 | dx_pq_ph_datastructure.value = 29 | insert(dx_pq_ph_datastructure.value, (int)(long)enqueueValue); 30 | } 31 | 32 | void dx_pq_ph_enqueue(int value){ 33 | LOCK_DELEGATE(&dx_pq_ph_lock, &dx_pq_ph_enqueue_critical_section, (void*)(long)value); 34 | } 35 | 36 | void dx_pq_ph_dequeue_critical_section(void * notUsed, void ** resultLocationPtr){ 37 | int * resultLocation = (int*)resultLocationPtr; 38 | if(dx_pq_ph_datastructure.value != NULL){ 39 | *resultLocation = top(dx_pq_ph_datastructure.value); 40 | dx_pq_ph_datastructure.value = pop(dx_pq_ph_datastructure.value); 41 | }else{ 42 | *resultLocation = -1; 43 | } 44 | } 45 | 46 | int dx_pq_ph_dequeue(){ 47 | return (int)(long)LOCK_DELEGATE_RETURN_BLOCK(&dx_pq_ph_lock, 48 | &dx_pq_ph_dequeue_critical_section, 49 | NULL); 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/gdb_skiplist_print.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import gdb 4 | 5 | class SkiplistPrintCommand(gdb.Command): 6 | """Iterate and print a list. 7 | 8 | skip [MAX] 9 | 10 | Given a list EXPR, iterate though the list nodes' ->next pointers, printing 11 | each node iterated. We will iterate thorugh MAX list nodes, to prevent 12 | infinite loops with corrupt lists. If MAX is zero, we will iterate the 13 | entire list. 14 | 15 | List nodes types are expected to have a member named "next". List types 16 | may be the same as node types, or a separate type with an explicit 17 | head node, called "head".""" 18 | 19 | MAX_ITER = 10 20 | 21 | def __init__(self): 22 | super(SkiplistPrintCommand, self).__init__("skiplist-print", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL) 23 | 24 | def invoke(self, _args, from_tty): 25 | args = gdb.string_to_argv(_args) 26 | start_node = args[0] 27 | 28 | if len(args) > 1: 29 | max_iter = int(args[1]) 30 | else: 31 | max_iter = self.MAX_ITER 32 | 33 | if len(args) > 2: 34 | lvl = int(args[2]) 35 | else: 36 | lvl = 0 37 | 38 | p_node_t = gdb.lookup_type('node_t').pointer() 39 | long_t = gdb.lookup_type('long') 40 | node = gdb.parse_and_eval(start_node) 41 | print node 42 | 43 | for i in xrange(max_iter): 44 | nexts = node['next'] 45 | nxt = gdb.Value(nexts[lvl]).cast(long_t) 46 | nxt = nxt & ~1 47 | node = gdb.Value(nxt).cast(p_node_t).dereference() 48 | nexts = node['next'] 49 | print node['k'], node['level'], node['inserting'], 50 | k = 0 51 | while k < node['level']: 52 | print(nexts[k]), 53 | k+=1 54 | print("") 55 | 56 | SkiplistPrintCommand() 57 | -------------------------------------------------------------------------------- /src/lock/aticket_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "aticket_lock.h" 6 | 7 | 8 | ATicketLock * aticketlock_create(void (*writer)(void *, void **)){ 9 | ATicketLock * lock = malloc(sizeof(ATicketLock)); 10 | aticketlock_initialize(lock, writer); 11 | return lock; 12 | } 13 | 14 | void aticketlock_initialize(ATicketLock * lock, void (*writer)(void *, void **)){ 15 | lock->writer = writer; 16 | lock->inCounter.value = 0; 17 | lock->outCounter.value = 0; 18 | for(int i = 0; i < ARRAY_SIZE; i++){ 19 | lock->spinAreas[i].value = 0; 20 | } 21 | __sync_synchronize(); 22 | } 23 | 24 | void aticketlock_free(ATicketLock * lock){ 25 | free(lock); 26 | } 27 | 28 | 29 | void aticketlock_register_this_thread(){ 30 | } 31 | 32 | void aticketlock_write(ATicketLock *lock, void * writeInfo) { 33 | aticketlock_write_read_lock(lock); 34 | lock->writer(writeInfo, NULL); 35 | aticketlock_write_read_unlock(lock); 36 | } 37 | 38 | void aticketlock_write_read_lock(ATicketLock *lock) { 39 | int waitTicket; 40 | int myTicket = __sync_fetch_and_add(&lock->inCounter.value, 1); 41 | int spinPosition = myTicket % ARRAY_SIZE; 42 | load_acq(waitTicket, lock->spinAreas[spinPosition].value); 43 | while(waitTicket != myTicket){ 44 | __sync_synchronize(); 45 | load_acq(waitTicket, lock->spinAreas[spinPosition].value); 46 | } 47 | } 48 | 49 | void aticketlock_write_read_unlock(ATicketLock * lock) { 50 | lock->outCounter.value = lock->outCounter.value + 1; 51 | int nextPosition = lock->outCounter.value % ARRAY_SIZE; 52 | store_rel(lock->spinAreas[nextPosition].value, lock->outCounter.value); 53 | __sync_synchronize();//Push change 54 | } 55 | 56 | void aticketlock_read_lock(ATicketLock *lock) { 57 | aticketlock_write_read_lock(lock); 58 | } 59 | 60 | void aticketlock_read_unlock(ATicketLock *lock) { 61 | aticketlock_write_read_unlock(lock); 62 | } 63 | -------------------------------------------------------------------------------- /src/lock/mcs_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils/smp_utils.h" 3 | #include "common_lock_constants.h" 4 | 5 | #ifndef MCS_LOCK_H 6 | #define MCS_LOCK_H 7 | 8 | struct MCSNodeImpl; 9 | 10 | typedef union CacheLinePaddedMCSNodePtrImpl { 11 | struct MCSNodeImpl * value; 12 | char padding[64]; 13 | } CacheLinePaddedMCSNodePtr; 14 | 15 | typedef struct MCSNodeImpl { 16 | char pad1[64]; 17 | CacheLinePaddedMCSNodePtr next; 18 | CacheLinePaddedBool locked; 19 | } MCSNode; 20 | 21 | typedef struct MCSLockImpl { 22 | char pad1[64]; 23 | void (*writer)(void *, void **); 24 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 25 | CacheLinePaddedMCSNodePtr endOfQueue; 26 | } MCSLock; 27 | 28 | 29 | 30 | MCSLock * mcslock_create(void (*writer)(void *, void **)); 31 | void mcslock_free(MCSLock * lock); 32 | void mcslock_initialize(MCSLock * lock, void (*writer)(void *, void **)); 33 | void mcslock_register_this_thread(); 34 | void mcslock_write(MCSLock *lock, void * writeInfo); 35 | bool mcslock_write_read_lock(MCSLock *lock); 36 | void mcslock_write_read_unlock(MCSLock * lock); 37 | void mcslock_read_lock(MCSLock *lock); 38 | void mcslock_read_unlock(MCSLock *lock); 39 | 40 | static inline 41 | bool mcslock_is_locked(MCSLock *lock){ 42 | MCSNode * endOfQueue; 43 | load_acq(endOfQueue, lock->endOfQueue.value); 44 | return endOfQueue != NULL; 45 | } 46 | 47 | extern __thread MCSNode myMCSNode __attribute__((aligned(64))); 48 | 49 | static inline 50 | bool set_if_null_ptr(MCSNode ** pointerToOldValue, MCSNode * newValue){ 51 | return __sync_bool_compare_and_swap(pointerToOldValue, NULL, newValue); 52 | } 53 | 54 | static inline 55 | bool mcslock_try_write_read_lock(MCSLock *lock) { 56 | MCSNode * node = &myMCSNode; 57 | if(ACCESS_ONCE(lock->endOfQueue.value) != NULL){ 58 | return false; 59 | }else{ 60 | node->next.value = NULL; 61 | return set_if_null_ptr(&lock->endOfQueue.value, node); 62 | } 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /src/lock/flat_comb_rdx_lock.h: -------------------------------------------------------------------------------- 1 | #include "utils/smp_utils.h" 2 | #include 3 | #include "utils/support_many_non_zero_indicator_types.h" 4 | 5 | #ifndef ALL_EQUAL_RDX_LOCK_H 6 | #define ALL_EQUAL_RDX_LOCK_H 7 | 8 | struct FCMCSNodeImpl; 9 | 10 | typedef union CacheLinePaddedFCMCSNodePtrImpl { 11 | struct FCMCSNodeImpl * value; 12 | char padding[64]; 13 | } CacheLinePaddedFCMCSNodePtr; 14 | 15 | typedef struct FCMCSNodeImpl { 16 | char pad1[64]; 17 | CacheLinePaddedFCMCSNodePtr next; 18 | CacheLinePaddedBool locked; 19 | } FCMCSNode; 20 | 21 | struct FlatCombNodeImpl; 22 | 23 | typedef union CacheLinePaddedFlatCombNodePtrImpl { 24 | struct FlatCombNodeImpl * value; 25 | char padding[64]; 26 | } CacheLinePaddedFlatCombNodePtr; 27 | 28 | typedef struct FlatCombNodeImpl { 29 | char pad1[64]; 30 | struct FlatCombNodeImpl * next; 31 | void * request; 32 | unsigned long last_used; 33 | char pad2[64 - (2 * sizeof(void *) + sizeof(unsigned long)) % 64]; 34 | CacheLinePaddedBool active; 35 | char pad3[64]; 36 | } FlatCombNode; 37 | 38 | typedef struct FlatCombRDXLockImpl { 39 | char pad1[64]; 40 | NZI_DATATYPE_NAME nonZeroIndicator; 41 | CacheLinePaddedInt writeBarrier; 42 | CacheLinePaddedFCMCSNodePtr endOfMCSQueue; 43 | CacheLinePaddedFlatCombNodePtr combine_list; 44 | void (*writer)(void *, void **); 45 | unsigned long combine_count; 46 | } FlatCombRDXLock; 47 | 48 | FlatCombRDXLock * fcrdxlock_create(void (*writer)(void *, void **)); 49 | void fcrdxlock_initialize(FlatCombRDXLock * lock, void (*writer)(void *, void **)); 50 | void fcrdxlock_free(FlatCombRDXLock * lock); 51 | 52 | void fcrdxlock_register_this_thread(); 53 | 54 | void fcrdxlock_write(FlatCombRDXLock *lock, void * writeInfo); 55 | 56 | void fcrdxlock_write_read_lock(FlatCombRDXLock *lock); 57 | void fcrdxlock_write_read_unlock(FlatCombRDXLock * lock); 58 | 59 | void fcrdxlock_read_lock(FlatCombRDXLock *lock); 60 | void fcrdxlock_read_unlock(FlatCombRDXLock *lock); 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /qd_library/locks/waitable_lock.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_waitable_lock_hpp 2 | #define qd_waitable_lock_hpp qd_waitable_lock_hpp 3 | 4 | #include 5 | 6 | /** 7 | * @brief lock class wrapper to add wait/notify functionality 8 | * @tparam Lock a locking class 9 | * @details This wrapper adds functionality to wait on each instance of a class 10 | * without requireing a lock to be taken/released. This is useful when 11 | * implementing another lock, so that spinning can be avoided. 12 | * @warning This implementation relies on std::condition_variable_any not actually needing a lock, 13 | * which violates its preconditions. 14 | * @remarks This is likely not the most efficient way of implementing waiting. 15 | * @todo The private inheritance is used to get a memory layout in which 16 | * clang++-3.4 spills less than if these structures appeared in the opposite 17 | * order. This "optimization" might not be the best solution. 18 | */ 19 | template 20 | class waitable_lock : private std::condition_variable_any, public Lock { 21 | /** 22 | * @brief a dummy lock class 23 | * @warning This lock does not provide locking. 24 | * @details This class is not intended for use as a lock, but as 25 | * std::condition_variable_any requires a lock class, 26 | * this provides it. 27 | */ 28 | struct null_lock { 29 | void lock() {} 30 | void unlock() {} 31 | }; 32 | 33 | /** @brief an associated dummy lock for the std::condition_variable_any */ 34 | null_lock not_a_lock; 35 | 36 | public: 37 | 38 | /** @brief wait until notified */ 39 | void wait() { 40 | std::condition_variable_any::wait(not_a_lock); 41 | } 42 | 43 | /** @brief notify (at least) one waiting thread */ 44 | void notify_one() { 45 | std::condition_variable_any::notify_one(); 46 | } 47 | 48 | /** @brief notify all waiting threads */ 49 | void notify_all() { 50 | std::condition_variable_any::notify_all(); 51 | } 52 | }; 53 | 54 | #endif /* qd_waitable_lock_hpp */ 55 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue.cpp.static: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using locktype = qdlock; 4 | 5 | extern "C" { 6 | #include "cpplock.h" 7 | 8 | AgnosticDXLock* cpplock_new() { 9 | AgnosticDXLock* x = (AgnosticDXLock*) std::malloc(sizeof(AgnosticDXLock) + sizeof(locktype)-1+1024); 10 | new (&x->lock) locktype; 11 | return x; 12 | } 13 | 14 | void cpplock_init(AgnosticDXLock* x) { 15 | locktype* l = reinterpret_cast(&x->lock); 16 | new (l) locktype; 17 | } 18 | void cpplock_free(AgnosticDXLock* x) { 19 | locktype* l = reinterpret_cast(&x->lock); 20 | l->~locktype(); 21 | std::free(x); 22 | } 23 | 24 | void delegate_wrapper(void (*fun)(int, int *), int d) { 25 | fun(d, nullptr); 26 | } 27 | void delegate_and_wait_wrapper(void (*fun)(int, int *), int d , int* r, std::atomic* f) { 28 | fun(d, r); 29 | f->store(true, std::memory_order_release); 30 | } 31 | void cpplock_delegate(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 32 | locktype* l = reinterpret_cast(&x->lock); 33 | l->DELEGATE_N(delegate_wrapper, delgateFun, data); 34 | } 35 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 36 | locktype* l = reinterpret_cast(&x->lock); 37 | int resp; 38 | std::atomic flag(false); 39 | l->DELEGATE_N(delegate_and_wait_wrapper, delgateFun, data, &resp, &flag); 40 | while(!flag.load(std::memory_order_acquire)) { 41 | qd::pause(); 42 | } 43 | return resp; 44 | } 45 | void cpplock_lock(AgnosticDXLock* x) { 46 | locktype* l = reinterpret_cast(&x->lock); 47 | l->lock(); 48 | } 49 | void cpplock_unlock(AgnosticDXLock* x) { 50 | locktype* l = reinterpret_cast(&x->lock); 51 | l->unlock(); 52 | } 53 | //void cpplock_rlock(AgnosticDXLock* x) { 54 | // locktype* l = reinterpret_cast(&x->lock); 55 | // l->rlock(); 56 | //} 57 | //void cpplock_runlock(AgnosticDXLock* x) { 58 | // locktype* l = reinterpret_cast(&x->lock); 59 | // l->runlock(); 60 | //} 61 | 62 | } // extern "C" 63 | -------------------------------------------------------------------------------- /src/benchmark/pairingheap/test_pairingheap.c: -------------------------------------------------------------------------------- 1 | #include "pairingheap.h" 2 | 3 | /* For verification purpose only. */ 4 | #include 5 | 6 | #define BIG_RAND() (rand() % 10000) 7 | /* End of verification purpose only part */ 8 | 9 | void heap_sort(int* xs, int n){ 10 | int i; 11 | struct node* h = NULL; 12 | for(i=0; i0) 33 | x ^= xs[--n]; 34 | return x; 35 | } 36 | 37 | void test_heap_sort(){ 38 | int m = 1000; 39 | int i, n, c, *xs; 40 | while(m--){ 41 | n = 1 + BIG_RAND(); 42 | xs = (int*)malloc(sizeof(int)*n); 43 | for(i=0; i 5 | 6 | #define BIG_RAND() (rand() % 10000) 7 | /* End of verification purpose only part */ 8 | 9 | void heap_sort(int* xs, int n){ 10 | int i; 11 | struct node* h = NULL; 12 | for(i=0; i0) 33 | x ^= xs[--n]; 34 | return x; 35 | } 36 | 37 | void test_heap_sort(){ 38 | int m = 1000; 39 | int i, n, c, *xs; 40 | while(m--){ 41 | n = 1 + BIG_RAND(); 42 | xs = (int*)malloc(sizeof(int)*n); 43 | for(i=0; ilock) locktype; 4 | return x; 5 | } 6 | 7 | void rcpplock_init(RCPPLock* x) { 8 | locktype* l = reinterpret_cast(&x->lock); 9 | new (l) locktype; 10 | } 11 | void rcpplock_free(RCPPLock* x) { 12 | locktype* l = reinterpret_cast(&x->lock); 13 | l->~locktype(); 14 | std::free(x); 15 | } 16 | 17 | void rcpplock_delegate(RCPPLock* x, void (*delgateFun)(void*, void* *), void* data) { 18 | locktype* l = reinterpret_cast(&x->lock); 19 | l->delegate_n([](void (*fun)(void*, void* *), void* d) {fun(d, nullptr);}, delgateFun, data); 20 | } 21 | #if 1 22 | void* rcpplock_delegate_and_wait(RCPPLock* x, void (*delgateFun)(void*, void* *), void* data) { 23 | locktype* l = reinterpret_cast(&x->lock); 24 | void* resp; 25 | std::atomic flag(false); 26 | l->delegate_n([](void (*fun)(void*, void* *), void* d , void** r, std::atomic* f) { fun(d, r); f->store(true, std::memory_order_release); }, delgateFun, data, &resp, &flag); 27 | while(!flag.load(std::memory_order_acquire)) { 28 | qd::pause(); 29 | } 30 | return resp; 31 | } 32 | #endif 33 | #if 0 34 | int rcpplock_delegate_and_wait(RCPPLock* x, void (*delgateFun)(int, int *), int data) { 35 | locktype* l = reinterpret_cast(&x->lock); 36 | static const int reserved = -999999; 37 | std::atomic resp(reserved); 38 | l->delegate_n([](void (*fun)(int, int *), int d , std::atomic* r) { int v = -1; fun(d, &v); r->store(v, std::memory_order_release);}, delgateFun, data, &resp); 39 | while(resp.load(std::memory_order_acquire) == reserved) { 40 | qd::pause(); 41 | } 42 | return resp; 43 | } 44 | #endif 45 | void rcpplock_lock(RCPPLock* x) { 46 | locktype* l = reinterpret_cast(&x->lock); 47 | l->lock(); 48 | } 49 | void rcpplock_unlock(RCPPLock* x) { 50 | locktype* l = reinterpret_cast(&x->lock); 51 | l->unlock(); 52 | } 53 | void rcpplock_rlock(RCPPLock* x) { 54 | locktype* l = reinterpret_cast(&x->lock); 55 | l->rlock(); 56 | } 57 | void rcpplock_runlock(RCPPLock* x) { 58 | locktype* l = reinterpret_cast(&x->lock); 59 | l->runlock(); 60 | } 61 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/gc/portable_defns.h: -------------------------------------------------------------------------------- 1 | #ifndef __PORTABLE_DEFNS_H__ 2 | #define __PORTABLE_DEFNS_H__ 3 | 4 | #define MAX_THREADS 128 /* Nobody will ever have more! */ 5 | #define INTEL 1 6 | #define CACHE_LINE_SIZE 64 7 | #if defined(SPARC) 8 | #include "sparc_defns.h" 9 | #elif defined(INTEL) 10 | #include "intel_defns.h" 11 | #elif defined(PPC) 12 | #include "ppc_defns.h" 13 | #elif defined(IA64) 14 | #include "ia64_defns.h" 15 | #elif defined(MIPS) 16 | #include "mips_defns.h" 17 | #elif defined(ALPHA) 18 | #include "alpha_defns.h" 19 | #else 20 | #error "A valid architecture has not been defined" 21 | #endif 22 | 23 | #include 24 | 25 | #ifndef MB_NEAR_CAS 26 | #define RMB_NEAR_CAS() RMB() 27 | #define WMB_NEAR_CAS() WMB() 28 | #define MB_NEAR_CAS() MB() 29 | #endif 30 | 31 | typedef unsigned long int_addr_t; 32 | 33 | typedef int bool_t; 34 | #define FALSE 0 35 | #define TRUE 1 36 | 37 | #define ADD_TO(_v,_x) \ 38 | do { \ 39 | int __val = (_v), __newval; \ 40 | while ( (__newval = CASIO(&(_v),__val,__val+(_x))) != __val ) \ 41 | __val = __newval; \ 42 | } while ( 0 ) 43 | 44 | /* 45 | * Allow us to efficiently align and pad structures so that shared fields 46 | * don't cause contention on thread-local or read-only fields. 47 | */ 48 | #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE] 49 | #define ALIGNED_ALLOC(_s) \ 50 | ((void *)(((unsigned long)malloc((_s)+CACHE_LINE_SIZE*2) + \ 51 | CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE-1))) 52 | 53 | 54 | /* 55 | * POINTER MARKING 56 | */ 57 | #define get_marked_ref(_p) ((void *)(((unsigned long)(_p)) | 1)) 58 | #define get_unmarked_ref(_p) ((void *)(((unsigned long)(_p)) & ~1)) 59 | #define is_marked_ref(_p) (((unsigned long)(_p)) & 1) 60 | 61 | 62 | 63 | /* Read field @_f into variable @_x. */ 64 | #define READ_FIELD(_x,_f) ((_x) = (_f)) 65 | 66 | #define WEAK_DEP_ORDER_RMB() ((void)0) 67 | #define WEAK_DEP_ORDER_WMB() ((void)0) 68 | #define WEAK_DEP_ORDER_MB() ((void)0) 69 | 70 | 71 | 72 | #endif /* __PORTABLE_DEFNS_H__ */ 73 | -------------------------------------------------------------------------------- /src/lock/agnostic_rdx_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "datastructures/dr_multi_writers_queue.h" 3 | #include "common_lock_constants.h" 4 | #include "utils/support_many_non_zero_indicator_types.h" 5 | #include "utils/support_many_lock_types.h" 6 | 7 | #ifndef AGNOSTIC_RDX_LOCK_H 8 | #define AGNOSTIC_RDX_LOCK_H 9 | 10 | #ifdef LOCK_TYPE_WPRW_MCSLock 11 | //*********************************** 12 | //MCSLock 13 | //*********************************** 14 | #include "mcs_lock.h" 15 | 16 | #define LOCK_DATATYPE_NAME_WPRW MCSLock 17 | 18 | #elif defined (LOCK_TYPE_WPRW_CohortLock) 19 | //*********************************** 20 | //CohortLock 21 | //*********************************** 22 | #include "cohort_lock.h" 23 | 24 | #define LOCK_DATATYPE_NAME_WPRW CohortLock 25 | 26 | #elif defined (LOCK_TYPE_WPRW_TATASLock) 27 | //*********************************** 28 | //TATASLock 29 | //*********************************** 30 | #include "tatas_lock.h" 31 | 32 | #define LOCK_DATATYPE_NAME_WPRW TATASLock 33 | 34 | #else 35 | 36 | #define LOCK_DATATYPE_NAME_WPRW NoLockDatatypeSpecified 37 | 38 | #endif 39 | 40 | 41 | typedef struct AgnosticRDXLockImpl { 42 | DRMWQueue writeQueue; 43 | char pad1[64]; 44 | void (*writer)(void *, void **); 45 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 46 | char pad3[64]; 47 | CacheLinePaddedInt writeBarrier; 48 | LOCK_DATATYPE_NAME_WPRW lock; 49 | char pad4[64]; 50 | NZI_DATATYPE_NAME nonZeroIndicator; 51 | } AgnosticRDXLock; 52 | 53 | 54 | 55 | AgnosticRDXLock * ardxlock_create(void (*writer)(void *, void **)); 56 | void ardxlock_free(AgnosticRDXLock * lock); 57 | void ardxlock_initialize(AgnosticRDXLock * lock, void (*writer)(void *, void **)); 58 | void ardxlock_register_this_thread(); 59 | void ardxlock_write_with_response(AgnosticRDXLock *lock, void (*delgateFun)(void *, void **), void * data, void ** responseLocation); 60 | void ardxlock_delegate(AgnosticRDXLock *lock, void (*delgateFun)(void *, void**), void * data); 61 | void ardxlock_write(AgnosticRDXLock *lock, void * writeInfo); 62 | void ardxlock_write_read_lock(AgnosticRDXLock *lock); 63 | void ardxlock_write_read_unlock(AgnosticRDXLock * lock); 64 | void ardxlock_read_lock(AgnosticRDXLock *lock); 65 | void ardxlock_read_unlock(AgnosticRDXLock *lock); 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /src/new_rep/locks/locks.h: -------------------------------------------------------------------------------- 1 | #ifndef LOCKS_H 2 | #define LOCKS_H 3 | 4 | #include "locks/tatas_lock.h" 5 | #include "locks/qd_lock.h" 6 | 7 | typedef enum {TATAS_LOCK, QD_LOCK} LL_lock_type_name; 8 | 9 | #define LL_initialize(X) _Generic((X), \ 10 | TATASLock * : tatas_initialize((TATASLock *)X), \ 11 | QDLock * : qd_initialize((QDLock *)X) \ 12 | ) 13 | 14 | void * LL_create(LL_lock_type_name llLockType){ 15 | if(TATAS_LOCK == llLockType){ 16 | TATASLock * l = aligned_alloc(CACHE_LINE_SIZE, sizeof(TATASLock)); 17 | LL_initialize(l); 18 | return l; 19 | } else if (QD_LOCK == llLockType){ 20 | QDLock * l = aligned_alloc(CACHE_LINE_SIZE, sizeof(QDLock)); 21 | LL_initialize(l); 22 | return l; 23 | } 24 | return NULL;/* Should not be reachable */ 25 | } 26 | 27 | #define LL_free(X) _Generic((X),\ 28 | default : free(X) \ 29 | ) 30 | 31 | #define LL_lock(X) _Generic((X), \ 32 | TATASLock *: tatas_lock((TATASLock *)X), \ 33 | QDLock * : tatas_lock(&((QDLock *)X)->mutexLock) \ 34 | ) 35 | 36 | #define LL_unlock(X) _Generic((X), \ 37 | TATASLock *: tatas_unlock((TATASLock *)X), \ 38 | QDLock * : tatas_unlock(&((QDLock *)X)->mutexLock) \ 39 | ) 40 | 41 | #define LL_is_locked(X) _Generic((X), \ 42 | TATASLock *: tatas_is_locked((TATASLock *)X), \ 43 | QDLock * : tatas_is_locked(&((QDLock *)X)->mutexLock) \ 44 | ) 45 | 46 | #define LL_try_lock(X) _Generic((X), \ 47 | TATASLock *: tatas_try_lock(X), \ 48 | QDLock * : tatas_try_lock(&((QDLock *)X)->mutexLock) \ 49 | ) 50 | 51 | void ________TATAS_DELEGATE(TATASLock* l, 52 | void (*funPtr)(unsigned int, void *), 53 | unsigned int messageSize, 54 | void * messageAddress){ 55 | tatas_lock(l); 56 | funPtr(messageSize, messageAddress); 57 | tatas_unlock(l); 58 | } 59 | 60 | #define LL_delegate(X, funPtr, messageSize, messageAddress) _Generic((X), \ 61 | TATASLock *: ________TATAS_DELEGATE((TATASLock *)X, funPtr, messageSize, messageAddress), \ 62 | QDLock * : qd_delegate((QDLock *)X, funPtr, messageSize, messageAddress) \ 63 | ) 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/cpplock.cpp: -------------------------------------------------------------------------------- 1 | AgnosticDXLock* cpplock_new() { 2 | AgnosticDXLock* x = (AgnosticDXLock*) std::malloc(sizeof(AgnosticDXLock) + sizeof(locktype)-1+1024); 3 | new (&x->lock) locktype; 4 | return x; 5 | } 6 | 7 | void cpplock_init(AgnosticDXLock* x) { 8 | locktype* l = reinterpret_cast(&x->lock); 9 | new (l) locktype; 10 | } 11 | void cpplock_free(AgnosticDXLock* x) { 12 | locktype* l = reinterpret_cast(&x->lock); 13 | l->~locktype(); 14 | std::free(x); 15 | } 16 | 17 | void cpplock_delegate(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 18 | locktype* l = reinterpret_cast(&x->lock); 19 | l->delegate_n([](void (*fun)(int, int *), int d) {fun(d, nullptr);}, delgateFun, data); 20 | } 21 | #if 1 22 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 23 | locktype* l = reinterpret_cast(&x->lock); 24 | int resp; 25 | std::atomic flag(false); 26 | l->delegate_n([](void (*fun)(int, int *), int d , int* r, std::atomic* f) { fun(d, r); f->store(true, std::memory_order_release); }, delgateFun, data, &resp, &flag); 27 | while(!flag.load(std::memory_order_acquire)) { 28 | qd::pause(); 29 | } 30 | return resp; 31 | } 32 | #endif 33 | #if 0 34 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 35 | locktype* l = reinterpret_cast(&x->lock); 36 | static const int reserved = -999999; 37 | std::atomic resp(reserved); 38 | l->delegate_n([](void (*fun)(int, int *), int d , std::atomic* r) { int v = -1; fun(d, &v); r->store(v, std::memory_order_release);}, delgateFun, data, &resp); 39 | while(resp.load(std::memory_order_acquire) == reserved) { 40 | qd::pause(); 41 | } 42 | return resp; 43 | } 44 | #endif 45 | void cpplock_lock(AgnosticDXLock* x) { 46 | locktype* l = reinterpret_cast(&x->lock); 47 | l->lock(); 48 | } 49 | void cpplock_unlock(AgnosticDXLock* x) { 50 | locktype* l = reinterpret_cast(&x->lock); 51 | l->unlock(); 52 | } 53 | //void cpplock_rlock(AgnosticDXLock* x) { 54 | // locktype* l = reinterpret_cast(&x->lock); 55 | // l->rlock(); 56 | //} 57 | //void cpplock_runlock(AgnosticDXLock* x) { 58 | // locktype* l = reinterpret_cast(&x->lock); 59 | // l->runlock(); 60 | //} 61 | -------------------------------------------------------------------------------- /src/tests/test_multi_writers_queue.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "datastructures/multi_writers_queue.h" 5 | #include "test_framework.h" 6 | 7 | 8 | int test_create(){ 9 | 10 | MWQueue * test = mwqueue_create(); 11 | mwqueue_free(test); 12 | return 1; 13 | 14 | } 15 | 16 | int test_offer(){ 17 | { 18 | MWQueue * queue = mwqueue_create(); 19 | mwqueue_reset_fully_read(queue); 20 | for(void * i = NULL; i < (void*)(MWQ_CAPACITY/2); i++){ 21 | mwqueue_offer(queue, i); 22 | } 23 | 24 | mwqueue_free(queue); 25 | } 26 | { 27 | MWQueue * queue = mwqueue_create(); 28 | mwqueue_reset_fully_read(queue); 29 | for(void * i = NULL; i < (void*)(MWQ_CAPACITY*2); i++){ 30 | mwqueue_offer(queue, i); 31 | } 32 | 33 | mwqueue_free(queue); 34 | } 35 | return 1; 36 | } 37 | 38 | 39 | int test_offer_and_take(){ 40 | { 41 | MWQueue * queue = mwqueue_create(); 42 | mwqueue_reset_fully_read(queue); 43 | for(void * i = (void*)1; i <= (void*)(MWQ_CAPACITY/2); i++){ 44 | mwqueue_offer(queue, i); 45 | } 46 | 47 | for(int i = 1; i <= (MWQ_CAPACITY/2); i++){ 48 | assert(NULL != mwqueue_take(queue)); 49 | } 50 | 51 | assert(NULL == mwqueue_take(queue)); 52 | 53 | mwqueue_free(queue); 54 | } 55 | { 56 | MWQueue * queue = mwqueue_create(); 57 | mwqueue_reset_fully_read(queue); 58 | for(void * i = (void*)1; i <= (void*)(MWQ_CAPACITY * 2); i++){ 59 | mwqueue_offer(queue, i); 60 | } 61 | 62 | for(void * i = 0; i < TO_VP(MWQ_CAPACITY); i++){ 63 | assert(NULL != mwqueue_take(queue)); 64 | } 65 | 66 | assert(NULL == mwqueue_take(queue)); 67 | 68 | mwqueue_free(queue); 69 | } 70 | return 1; 71 | } 72 | 73 | 74 | int main(int argc, char **argv){ 75 | 76 | printf("\n\n\n\033[32m ### STARTING MULTI WRITES QUEUE TESTS! -- \033[m\n\n\n"); 77 | 78 | T(test_create(), "test_create()"); 79 | 80 | T(test_offer(), "test_offer()"); 81 | 82 | T(test_offer_and_take(), "test_offer_and_take()"); 83 | 84 | printf("\n\n\n\033[32m ### MULTI WRITES QUEUE COMPLETED! -- \033[m\n\n\n"); 85 | 86 | exit(0); 87 | 88 | } 89 | 90 | -------------------------------------------------------------------------------- /qd_library/queues/simple_locked_queue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_simple_locked_queue_hpp 2 | #define qd_simple_locked_queue_hpp qd_simple_locked_queue_hpp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | class simple_locked_queue { 10 | std::mutex lock; 11 | std::queue> queue; 12 | typedef std::lock_guard scoped_guard; 13 | 14 | typedef void(*ftype)(char*); 15 | 16 | /* some constants */ 17 | static const bool CLOSED = false; 18 | static const bool SUCCESS = true; 19 | 20 | void forwardall(char*, long i) { 21 | assert(i <= 120); 22 | if(i > 120) throw "up"; 23 | }; 24 | template 25 | void forwardall(char* buffer, long offset, P&& p, Ts&&... ts) { 26 | assert(offset <= 120); 27 | auto ptr = reinterpret_cast(&buffer[offset]); 28 | new (ptr) P(std::forward

(p)); 29 | forwardall(buffer, offset+sizeof(p), std::forward(ts)...); 30 | } 31 | public: 32 | void open() { 33 | /* TODO this function should not even be here */ 34 | /* no-op as this is an "infinite" queue that always accepts more data */ 35 | } 36 | /** 37 | * @brief enqueues an entry 38 | * @tparam P return type of associated function 39 | * @param op wrapper function for associated function 40 | * @return SUCCESS on successful storing in queue, CLOSED otherwise 41 | */ 42 | template 43 | bool enqueue(ftype op, Ps*... ps) { 44 | std::array val; 45 | scoped_guard l(lock); 46 | queue.push(val); 47 | forwardall(queue.back().data(), 0, std::move(op), std::move(*ps)...); 48 | return SUCCESS; 49 | } 50 | 51 | /** execute all stored operations */ 52 | void flush() { 53 | scoped_guard l(lock); 54 | while(!queue.empty()) { 55 | auto operation = queue.front(); 56 | char* ptr = operation.data(); 57 | ftype* fun = reinterpret_cast(ptr); 58 | ptr += sizeof(ftype*); 59 | (*fun)(ptr); 60 | queue.pop(); 61 | } 62 | } 63 | /** execute one stored operation */ 64 | void flush_one() { 65 | scoped_guard l(lock); 66 | if(!queue.empty()) { 67 | char* ptr = queue.front().data(); 68 | ftype* fun = reinterpret_cast(ptr); 69 | ptr += sizeof(ftype); 70 | (*fun)(ptr); 71 | queue.pop(); 72 | } 73 | } 74 | }; 75 | 76 | #endif /* qd_simple_locked_queue_hpp */ 77 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/glue.cpp: -------------------------------------------------------------------------------- 1 | #include "qd.hpp" 2 | 3 | using locktype = qdlock; 4 | 5 | extern "C" { 6 | #include "cpplock.h" 7 | 8 | AgnosticDXLock* cpplock_new() { 9 | AgnosticDXLock* x = (AgnosticDXLock*) std::malloc(sizeof(AgnosticDXLock) + sizeof(locktype)-1+1024); 10 | new (&x->lock) locktype; 11 | return x; 12 | } 13 | 14 | void cpplock_init(AgnosticDXLock* x) { 15 | locktype* l = reinterpret_cast(&x->lock); 16 | new (l) locktype; 17 | } 18 | void cpplock_free(AgnosticDXLock* x) { 19 | locktype* l = reinterpret_cast(&x->lock); 20 | l->~locktype(); 21 | std::free(x); 22 | } 23 | 24 | void cpplock_delegate(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 25 | locktype* l = reinterpret_cast(&x->lock); 26 | l->delegate_n([](void (*fun)(int, int *), int d) {fun(d, nullptr);}, delgateFun, data); 27 | } 28 | #if 0 29 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 30 | locktype* l = reinterpret_cast(&x->lock); 31 | int resp; 32 | std::atomic flag(false); 33 | l->delegate_n([](void (*fun)(int, int *), int d , int* r, std::atomic* f) { fun(d, r); f->store(true, std::memory_order_release);}, delgateFun, data, &resp, &flag); 34 | while(!flag.load(std::memory_order_acquire)) { 35 | qd::pause(); 36 | } 37 | return resp; 38 | } 39 | #endif 40 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 41 | locktype* l = reinterpret_cast(&x->lock); 42 | static const int reserved = -999999; 43 | std::atomic resp(reserved); 44 | l->delegate_n([](void (*fun)(int, int *), int d , std::atomic* r) { int v; fun(d, &v); r->store(v, std::memory_order_release);}, delgateFun, data, &resp); 45 | while(resp.load(std::memory_order_acquire) == reserved) { 46 | qd::pause(); 47 | } 48 | return resp; 49 | } 50 | void cpplock_lock(AgnosticDXLock* x) { 51 | locktype* l = reinterpret_cast(&x->lock); 52 | l->lock(); 53 | } 54 | void cpplock_unlock(AgnosticDXLock* x) { 55 | locktype* l = reinterpret_cast(&x->lock); 56 | l->unlock(); 57 | } 58 | //void cpplock_rlock(AgnosticDXLock* x) { 59 | // locktype* l = reinterpret_cast(&x->lock); 60 | // l->rlock(); 61 | //} 62 | //void cpplock_runlock(AgnosticDXLock* x) { 63 | // locktype* l = reinterpret_cast(&x->lock); 64 | // l->runlock(); 65 | //} 66 | 67 | } // extern "C" 68 | -------------------------------------------------------------------------------- /src/datastructures/numa_ingress_egress_nzi.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils/smp_utils.h" 3 | #include "utils/numa_node_info_support.h" 4 | 5 | #ifndef READER_GROUPS_NZI_H 6 | #define READER_GROUPS_NZI_H 7 | 8 | #define INGRESS_EGRESS_PADDING 32 9 | 10 | extern __thread CacheLinePaddedInt myIngressEgressArriveNumaNode __attribute__((aligned(64))); 11 | 12 | typedef struct IngressEgressCounterImpl { 13 | unsigned long ingress; 14 | char pad1[INGRESS_EGRESS_PADDING - sizeof(unsigned long) % INGRESS_EGRESS_PADDING]; 15 | unsigned long egress; 16 | char pad2[INGRESS_EGRESS_PADDING - sizeof(unsigned long) % INGRESS_EGRESS_PADDING]; 17 | char pad3[64]; 18 | } IngressEgressCounter; 19 | 20 | typedef struct NUMAIngressEgressNZIImpl { 21 | IngressEgressCounter readerCounters[NUMBER_OF_NUMA_NODES]; 22 | } NUMAIngressEgress; 23 | 24 | static inline 25 | void nienzi_initialize(NUMAIngressEgress * nzi){ 26 | for(int i = 0; i < NUMBER_OF_NUMA_NODES; i++){ 27 | nzi->readerCounters[i].ingress = 0; 28 | nzi->readerCounters[i].egress = 0; 29 | } 30 | __sync_synchronize(); 31 | } 32 | 33 | static inline 34 | void nienzi_arrive(NUMAIngressEgress * nzi){ 35 | int myNumaNode = numa_node_id(); 36 | myIngressEgressArriveNumaNode.value = myNumaNode; 37 | __sync_fetch_and_add(&nzi->readerCounters[myNumaNode].ingress, 1); 38 | } 39 | 40 | static inline 41 | void nienzi_depart(NUMAIngressEgress * nzi){ 42 | int myNumaNode = myIngressEgressArriveNumaNode.value; 43 | __sync_fetch_and_add(&nzi->readerCounters[myNumaNode].egress, 1); 44 | } 45 | 46 | 47 | static inline 48 | bool nienzi_query(NUMAIngressEgress * nzi){ 49 | for(int i = 0; i < NUMBER_OF_NUMA_NODES; i++){ 50 | if(ACCESS_ONCE(nzi->readerCounters[i].ingress) != 51 | ACCESS_ONCE(nzi->readerCounters[i].egress)){ 52 | return false; 53 | } 54 | } 55 | return true; 56 | } 57 | 58 | static inline 59 | void nienzi_wait_unil_empty(NUMAIngressEgress * nzi){ 60 | int ingressCount; 61 | int egressCount; 62 | for(int i = 0; i < NUMBER_OF_NUMA_NODES; i++){ 63 | load_acq(ingressCount, nzi->readerCounters[i].ingress); 64 | load_acq(egressCount, nzi->readerCounters[i].egress); 65 | while(ingressCount != egressCount){ 66 | __sync_synchronize(); 67 | load_acq(ingressCount, nzi->readerCounters[i].ingress); 68 | load_acq(egressCount, nzi->readerCounters[i].egress); 69 | } 70 | } 71 | } 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/cpplock_nodetach.cpp: -------------------------------------------------------------------------------- 1 | AgnosticDXLock* cpplock_new() { 2 | AgnosticDXLock* x = (AgnosticDXLock*) std::malloc(sizeof(AgnosticDXLock) + sizeof(locktype)-1+1024); 3 | new (&x->lock) locktype; 4 | return x; 5 | } 6 | 7 | void cpplock_init(AgnosticDXLock* x) { 8 | locktype* l = reinterpret_cast(&x->lock); 9 | new (l) locktype; 10 | } 11 | void cpplock_free(AgnosticDXLock* x) { 12 | locktype* l = reinterpret_cast(&x->lock); 13 | l->~locktype(); 14 | std::free(x); 15 | } 16 | 17 | void cpplock_delegate(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 18 | locktype* l = reinterpret_cast(&x->lock); 19 | std::atomic flag(false); 20 | l->delegate_n([](void (*fun)(int, int *), int d , std::atomic* f) { fun(d, nullptr); f->store(true, std::memory_order_release); }, delgateFun, data, &flag); 21 | while(!flag.load(std::memory_order_acquire)) { 22 | qd::pause(); 23 | } 24 | } 25 | #if 1 26 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 27 | locktype* l = reinterpret_cast(&x->lock); 28 | int resp; 29 | std::atomic flag(false); 30 | l->delegate_n([](void (*fun)(int, int *), int d , int* r, std::atomic* f) { fun(d, r); f->store(true, std::memory_order_release); }, delgateFun, data, &resp, &flag); 31 | while(!flag.load(std::memory_order_acquire)) { 32 | qd::pause(); 33 | } 34 | return resp; 35 | } 36 | #endif 37 | #if 0 38 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) { 39 | locktype* l = reinterpret_cast(&x->lock); 40 | static const int reserved = -999999; 41 | std::atomic resp(reserved); 42 | l->delegate_n([](void (*fun)(int, int *), int d , std::atomic* r) { int v = -1; fun(d, &v); r->store(v, std::memory_order_release);}, delgateFun, data, &resp); 43 | while(resp.load(std::memory_order_acquire) == reserved) { 44 | qd::pause(); 45 | } 46 | return resp; 47 | } 48 | #endif 49 | void cpplock_lock(AgnosticDXLock* x) { 50 | locktype* l = reinterpret_cast(&x->lock); 51 | l->lock(); 52 | } 53 | void cpplock_unlock(AgnosticDXLock* x) { 54 | locktype* l = reinterpret_cast(&x->lock); 55 | l->unlock(); 56 | } 57 | //void cpplock_rlock(AgnosticDXLock* x) { 58 | // locktype* l = reinterpret_cast(&x->lock); 59 | // l->rlock(); 60 | //} 61 | //void cpplock_runlock(AgnosticDXLock* x) { 62 | // locktype* l = reinterpret_cast(&x->lock); 63 | // l->runlock(); 64 | //} 65 | -------------------------------------------------------------------------------- /src/lock/cohort_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils/smp_utils.h" 3 | #include "common_lock_constants.h" 4 | #include "ticket_lock.h" 5 | #include "aticket_lock.h" 6 | 7 | 8 | #ifndef COHORT_LOCK_H 9 | #define COHORT_LOCK_H 10 | 11 | #define MAXIMUM_NUMBER_OF_HAND_OVERS 64 12 | 13 | typedef struct NodeLocalLockDataImpl { 14 | char pad1[64]; 15 | TicketLock lock; 16 | CacheLinePaddedInt numberOfHandOvers; 17 | CacheLinePaddedBool needToTakeGlobalLock; 18 | } NodeLocalLockData; 19 | 20 | typedef struct CohortLockImpl { 21 | char pad1[64]; 22 | void (*writer)(void *, void **); 23 | char pad2[64 - sizeof(void (*)(void*)) % 64]; 24 | ATicketLock globalLock; 25 | NodeLocalLockData localLockData[NUMBER_OF_NUMA_NODES]; 26 | } CohortLock; 27 | 28 | 29 | CohortLock * cohortlock_create(void (*writer)(void *, void **)); 30 | void cohortlock_free(CohortLock * lock); 31 | void cohortlock_initialize(CohortLock * lock, void (*writer)(void *, void **)); 32 | void cohortlock_register_this_thread(); 33 | void cohortlock_write(CohortLock *lock, void * writeInfo); 34 | bool cohortlock_write_read_lock(CohortLock *lock); 35 | void cohortlock_write_read_unlock(CohortLock * lock); 36 | void cohortlock_read_lock(CohortLock *lock); 37 | void cohortlock_read_unlock(CohortLock *lock); 38 | 39 | static inline 40 | bool cohortlock_is_locked(CohortLock *lock){ 41 | int inCounter; 42 | int outCounter; 43 | load_acq(inCounter, lock->globalLock.inCounter.value); 44 | load_acq(outCounter, lock->globalLock.outCounter.value); 45 | return (inCounter != outCounter); 46 | } 47 | 48 | extern __thread CacheLinePaddedInt myLocalNode __attribute__((aligned(64))); 49 | 50 | #ifdef PINNING 51 | extern __thread CacheLinePaddedInt numa_node; 52 | #endif 53 | 54 | static inline 55 | bool cohortlock_is_local_locked(CohortLock *lock){ 56 | int inCounter; 57 | int outCounter; 58 | #ifdef PINNING 59 | NodeLocalLockData * localData = &lock->localLockData[numa_node.value]; 60 | #else 61 | NodeLocalLockData * localData = &lock->localLockData[myLocalNode.value]; 62 | #endif 63 | load_acq(inCounter, localData->lock.inCounter.value); 64 | load_acq(outCounter, localData->lock.outCounter.value); 65 | return (inCounter != outCounter); 66 | } 67 | 68 | static inline 69 | bool cohortlock_try_write_read_lock(CohortLock *lock) { 70 | if(!cohortlock_is_locked(lock) && 71 | !cohortlock_is_local_locked(lock)){ 72 | cohortlock_write_read_lock(lock); 73 | return true; 74 | }else{ 75 | return false; 76 | } 77 | } 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /src/lock/extract_numa_structure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | from os import listdir 5 | from os.path import join 6 | from os import mkdir 7 | from subprocess import Popen 8 | from subprocess import PIPE 9 | import re 10 | import socket 11 | 12 | def numa_structure(): 13 | lscpu_pipe = Popen("lscpu",stdout = PIPE).stdout 14 | number_of_numa_nodes = 0 15 | cpus_per_node = 0 16 | not_ready = True 17 | outputString = "" 18 | coresNodeList = [] 19 | while not_ready: 20 | line = lscpu_pipe.readline() 21 | if line: 22 | matchObject = re.search("NUMA node\d CPU\(s\):(.*)", line, re.M) 23 | if matchObject: 24 | number_of_numa_nodes = number_of_numa_nodes + 1 25 | cpusString = matchObject.group(1).strip() 26 | rangeMatchObject = re.search("(\d+)-(\d+)", cpusString, re.M) 27 | coreListString = "" 28 | if rangeMatchObject: 29 | start = int(rangeMatchObject.group(1).strip()) 30 | end = int(rangeMatchObject.group(2).strip()) 31 | cpus_per_node = end + 1 32 | coreList = [] 33 | for i in range(start, end+1): 34 | coreList.append(str(i)) 35 | coreListString = ",".join(coreList) 36 | else: 37 | cpus_per_node = len(cpusString.split(",")) 38 | coreListString = re.sub(r' ', "", cpusString) 39 | coresNodeList.append(coreListString) 40 | else: 41 | not_ready = False 42 | 43 | #Hack because of bug in cpuinfo for bulldozer 44 | if socket.gethostname()=="bulldozer": 45 | newCoresNodeList = [] 46 | for i in range(0, number_of_numa_nodes, 2): 47 | newCoresNodeList.append(coresNodeList[i] + "," + coresNodeList[i+1]) 48 | coresNodeList = newCoresNodeList 49 | number_of_numa_nodes = number_of_numa_nodes / 2 50 | cpus_per_node = cpus_per_node * 2 51 | lscpu_pipe.close() 52 | coresNodeList = ["{" + x + "}" for x in coresNodeList] 53 | numaStructure = "{" + ",".join(coresNodeList) + "}" 54 | return(number_of_numa_nodes, cpus_per_node, numaStructure) 55 | 56 | def numa_structure_defines(): 57 | number_of_numa_nodes, cpus_per_node, numaStructure = numa_structure() 58 | return [('NUMBER_OF_NUMA_NODES', str(number_of_numa_nodes)), 59 | ('NUMBER_OF_CPUS_PER_NODE', str(cpus_per_node)), 60 | ('NUMA_STRUCTURE', numaStructure), 61 | ('_GNU_SOURCE', '1')] 62 | 63 | number_of_numa_nodes, cpus_per_node, numaStructure = numa_structure() 64 | 65 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/cpplock.h: -------------------------------------------------------------------------------- 1 | #ifndef CPPLOCK_H 2 | #define CPPLOCK_H 3 | 4 | typedef struct AgnosticDXLockImpl { 5 | void (*defaultWriter)(int, int *); 6 | char pad2[64 - (sizeof(void * (*)(void*)) % 64)]; 7 | char lock[256*1024*1024]; 8 | } AgnosticDXLock; 9 | 10 | AgnosticDXLock* cpplock_new(); 11 | void cpplock_free(AgnosticDXLock*); 12 | void cpplock_init(AgnosticDXLock*); 13 | void cpplock_delegate(AgnosticDXLock* lock, void (*delgateFun)(int, int *), int data); 14 | int cpplock_delegate_and_wait (AgnosticDXLock* lock, void (*delgateFun)(int, int *), int data); 15 | void cpplock_lock(AgnosticDXLock*); 16 | void cpplock_unlock(AgnosticDXLock*); 17 | void cpplock_rlock(AgnosticDXLock*); 18 | void cpplock_runlock(AgnosticDXLock*); 19 | 20 | static void adxlock_initialize(AgnosticDXLock * lock, void (*defaultWriter)(int, int *)); 21 | static inline AgnosticDXLock * adxlock_create(void (*writer)(int, int *)){ 22 | AgnosticDXLock * lock = cpplock_new(); 23 | return lock; 24 | } 25 | 26 | static inline void adxlock_initialize(AgnosticDXLock * lock, void (*defaultWriter)(int, int *)){ 27 | //TODO check if the following typecast is fine 28 | lock->defaultWriter = defaultWriter; 29 | cpplock_init(lock); 30 | __sync_synchronize(); 31 | } 32 | 33 | static inline void adxlock_free(AgnosticDXLock * lock){ 34 | cpplock_free(lock); 35 | } 36 | 37 | static inline void adxlock_register_this_thread(){ 38 | } 39 | 40 | //int delegate_cpp(void (*delgateFun)(int, int *), int data, int* resp) { 41 | // int response = cpplock_delegate_wrapper(delegateFun); 42 | 43 | 44 | 45 | static inline 46 | int adxlock_write_with_response_block(AgnosticDXLock *lock, 47 | void (*delgateFun)(int, int *), 48 | int data){ 49 | return cpplock_delegate_and_wait(lock, delgateFun, data); 50 | } 51 | static inline 52 | void adxlock_delegate(AgnosticDXLock *lock, 53 | void (*delgateFun)(int, int *), 54 | int data) { 55 | cpplock_delegate(lock, delgateFun, data); 56 | } 57 | 58 | static inline 59 | void adxlock_write(AgnosticDXLock *lock, int writeInfo) { 60 | adxlock_delegate(lock, lock->defaultWriter, writeInfo); 61 | } 62 | 63 | static inline 64 | void adxlock_write_read_lock(AgnosticDXLock *lock) { 65 | cpplock_lock(lock); 66 | } 67 | 68 | static inline 69 | void adxlock_write_read_unlock(AgnosticDXLock * lock) { 70 | cpplock_unlock(lock); 71 | } 72 | 73 | //void adxlock_read_lock(AgnosticDXLock *lock) { 74 | // cpplock_rlock(lock); 75 | //} 76 | 77 | //void adxlock_read_unlock(AgnosticDXLock *lock) { 78 | // cpplock_runlock(lock); 79 | //} 80 | 81 | 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /src/benchmark/perf_magic: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HUMAN_READABLE= 4 | 5 | FILE=$@ 6 | if [ "$FILE" == "-" ] || [ "$FILE" == "" ] 7 | then 8 | index=0 9 | while read line 10 | do 11 | input=$(echo $line | egrep " r....:u" | sed -e 's/,//g;s/r....:u//g;s/[[:space:]]\+/ /g') 12 | if [ "$input" == "" ] 13 | then 14 | continue 15 | fi 16 | ARR[$index]=$input 17 | index=$((index+1)) 18 | done 19 | elif ! [ -f $FILE ] 20 | then 21 | echo "file '$FILE' not found." 22 | exit 23 | else 24 | ARR=($(egrep " r....:u" $FILE | sed -e 'N;N;N;s/,//g;s/r....:u//g;s/[[:space:]]\+/ /g')) 25 | fi 26 | 27 | rate() { 28 | if [ $HUMAN_READABLE ] 29 | then 30 | echo $(dc -e "2 k $1 100 * $2 / p")% 31 | else 32 | echo $(dc -e "10 k $1 $2 / p") 33 | fi 34 | } 35 | 36 | amount() { 37 | if [ $HUMAN_READABLE ] 38 | then 39 | if [ $1 -gt 1000000000 ] 40 | then 41 | echo $(dc -e "2 k $1 1000000000 / p") x10^9 42 | elif [ $1 -gt 1000000 ] 43 | then 44 | echo $(dc -e "2 k $1 1000000 / p") x10^6 45 | elif [ $1 -gt 1000 ] 46 | then 47 | echo $(dc -e "2 k $1 1000 / p") x10^3 48 | else 49 | echo $1 50 | fi 51 | else 52 | echo $1 53 | fi 54 | } 55 | 56 | L1=${ARR[0]} 57 | L2=${ARR[1]} 58 | L3=${ARR[2]} 59 | ALL=${ARR[3]} 60 | 61 | L1_MISS=$(($ALL - $L1)) 62 | L1_REFS=$ALL 63 | L1_HIT_RATE=$(rate ${L1} ${L1_REFS}) 64 | L1_MISS_RATE=$(rate ${L1_MISS} ${L1_REFS}) 65 | 66 | L2_MISS=$(($ALL - $L2 - $L1)) 67 | L2_REFS=$(($ALL - $L1)) 68 | L2_HIT_RATE=$(rate ${L2} ${L2_REFS}) 69 | L2_MISS_RATE=$(rate ${L2_MISS} ${L2_REFS}) 70 | 71 | L3_MISS=$(($ALL - $L3 - $L2 - $L1)) 72 | L3_REFS=$(($ALL - $L2 - $L1)) 73 | L3_HIT_RATE=$(rate ${L3} ${L3_REFS}) 74 | L3_MISS_RATE=$(rate ${L3_MISS} ${L3_REFS}) 75 | 76 | if [ $HUMAN_READABLE ] 77 | then 78 | echo "memory loads: $(amount $ALL)" 79 | echo 80 | echo "L1 hits: $(amount $L1)" 81 | echo "L1 misses: $(amount $L1_MISS)" 82 | echo "L1 hit rate: ${L1_HIT_RATE}" 83 | echo "L1 miss rate: ${L1_MISS_RATE}" 84 | echo 85 | echo "L2 hits: $(amount $L2)" 86 | echo "L2 misses: $(amount $L2_MISS)" 87 | echo "L2 hit rate: ${L2_HIT_RATE}" 88 | echo "L2 miss rate: ${L2_MISS_RATE}" 89 | echo 90 | echo "L3 hits: $(amount $L3)" 91 | echo "L3 misses: $(amount $L3_MISS)" 92 | echo "L3 hit rate: ${L3_HIT_RATE}" 93 | echo "L3 miss rate: ${L3_MISS_RATE}" 94 | 95 | else 96 | echo -n " $(amount $ALL)" 97 | echo -n " $(amount $L1)" 98 | echo -n " $(amount $L1_MISS)" 99 | echo -n " ${L1_HIT_RATE}" 100 | echo -n " ${L1_MISS_RATE}" 101 | echo -n " $(amount $L2)" 102 | echo -n " $(amount $L2_MISS)" 103 | echo -n " ${L2_HIT_RATE}" 104 | echo -n " ${L2_MISS_RATE}" 105 | echo -n " $(amount $L3)" 106 | echo -n " $(amount $L3_MISS)" 107 | echo -n " ${L3_HIT_RATE}" 108 | echo -n " ${L3_MISS_RATE}" 109 | 110 | fi 111 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | #define _GNU_SOURCE 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #if defined(__linux__) 15 | #include 16 | #include 17 | #include 18 | #endif 19 | 20 | #if defined(__APPLE__) 21 | #include 22 | #endif 23 | 24 | 25 | 26 | #define DCL_ALIGN __attribute__((aligned (2*CACHE_LINE_SIZE))) 27 | #define CACHELINE __attribute__((aligned (1*CACHE_LINE_SIZE))) 28 | 29 | #define ATPAGESIZE __attribute__((aligned (PAGESIZE))) 30 | 31 | #define SQR(x) (x)*(x) 32 | 33 | #define max(a,b) \ 34 | ({ __typeof__ (a) _a = (a); \ 35 | __typeof__ (b) _b = (b); \ 36 | _a > _b ? _a : _b; }) 37 | 38 | #define min(a,b) \ 39 | ({ __typeof__ (a) _a = (a); \ 40 | __typeof__ (b) _b = (b); \ 41 | _a < _b ? _a : _b; }) 42 | 43 | 44 | typedef struct thread_args_s 45 | { 46 | pthread_t thread; 47 | int id; 48 | gsl_rng *rng; 49 | int measure; 50 | int cycles; 51 | char pad[128]; 52 | } thread_args_t; 53 | 54 | 55 | #define E(c) \ 56 | do { \ 57 | int _c = (c); \ 58 | if (_c < 0) { \ 59 | fprintf(stderr, "E: %s: %d: %s\n", \ 60 | __FILE__, __LINE__, #c); \ 61 | } \ 62 | } while (0) 63 | 64 | #define E_en(c) \ 65 | do { \ 66 | int _c = (c); \ 67 | if (_c != 0) { \ 68 | fprintf(stderr, strerror(_c)); \ 69 | } \ 70 | } while (0) 71 | 72 | #define E_NULL(c) \ 73 | do { \ 74 | if ((c) == NULL) { \ 75 | perror("E_NULL"); \ 76 | } \ 77 | } while (0) 78 | 79 | 80 | #if defined(__x86_64__) 81 | /* accurate time measurements on late recent cpus */ 82 | static inline uint64_t __attribute__((always_inline)) 83 | read_tsc_p() 84 | { 85 | uint64_t tsc; 86 | __asm__ __volatile__ ("rdtscp\n" 87 | "shl $32, %%rdx\n" 88 | "or %%rdx, %%rax" 89 | : "=a"(tsc) 90 | : 91 | : "%rcx", "%rdx"); 92 | return tsc; 93 | } 94 | 95 | #define CB() __asm__ __volatile__("":::"memory") 96 | #define IMB() __asm__ __volatile__("mfence":::"memory") 97 | #define IRMB() __asm__ __volatile__("lfence":::"memory") 98 | #define IWMB() __asm__ __volatile__("sfence":::"memory") 99 | 100 | #else 101 | #error Unsupported architecture 102 | #endif // __x86_64__ 103 | 104 | 105 | #if defined(__linux__) 106 | extern pid_t gettid(void); 107 | extern void pina(pid_t t, int cpu); 108 | #endif 109 | 110 | extern void gettime(struct timespec *t); 111 | extern struct timespec timediff(struct timespec, struct timespec); 112 | 113 | 114 | #endif 115 | 116 | -------------------------------------------------------------------------------- /src/benchmark/perf_magic_simple: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HUMAN_READABLE= 4 | 5 | echo RUNNING 6 | 7 | FILE=$@ 8 | if [ "$FILE" == "-" ] || [ "$FILE" == "" ] 9 | then 10 | index=0 11 | while read line 12 | do 13 | echo line 14 | input=$(echo $line | egrep " r....:u" | sed -e 's/,//g;s/r....:u//g;s/[[:space:]]\+/ /g') 15 | echo "HEJ" 16 | echo $input 17 | if [ "$input" == "" ] 18 | then 19 | continue 20 | fi 21 | ARR[$index]=$input 22 | index=$((index+1)) 23 | done 24 | elif ! [ -f $FILE ] 25 | then 26 | echo "file '$FILE' not found." 27 | exit 28 | else 29 | ARR=($(egrep " r....:u" $FILE | sed -e 'N;N;N;s/,//g;s/r....:u//g;s/[[:space:]]\+/ /g')) 30 | fi 31 | 32 | rate() { 33 | if [ $HUMAN_READABLE ] 34 | then 35 | echo $(dc -e "2 k $1 100 * $2 / p")% 36 | else 37 | echo $(dc -e "10 k $1 $2 / p") 38 | fi 39 | } 40 | 41 | amount() { 42 | if [ $HUMAN_READABLE ] 43 | then 44 | if [ $1 -gt 1000000000 ] 45 | then 46 | echo $(dc -e "2 k $1 1000000000 / p") x10^9 47 | elif [ $1 -gt 1000000 ] 48 | then 49 | echo $(dc -e "2 k $1 1000000 / p") x10^6 50 | elif [ $1 -gt 1000 ] 51 | then 52 | echo $(dc -e "2 k $1 1000 / p") x10^3 53 | else 54 | echo $1 55 | fi 56 | else 57 | echo $1 58 | fi 59 | } 60 | 61 | L1=${ARR[0]} 62 | L2=${ARR[1]} 63 | L3=${ARR[2]} 64 | ALL=${ARR[3]} 65 | 66 | L1_MISS=$(($ALL - $L1)) 67 | L1_REFS=$ALL 68 | L1_HIT_RATE=$(rate ${L1} ${L1_REFS}) 69 | L1_MISS_RATE=$(rate ${L1_MISS} ${L1_REFS}) 70 | 71 | L2_MISS=$(($ALL - $L2 - $L1)) 72 | L2_REFS=$(($ALL - $L1)) 73 | L2_HIT_RATE=$(rate ${L2} ${L2_REFS}) 74 | L2_MISS_RATE=$(rate ${L2_MISS} ${L2_REFS}) 75 | 76 | L3_MISS=$(($ALL - $L3 - $L2 - $L1)) 77 | L3_REFS=$(($ALL - $L2 - $L1)) 78 | L3_HIT_RATE=$(rate ${L3} ${L3_REFS}) 79 | L3_MISS_RATE=$(rate ${L3_MISS} ${L3_REFS}) 80 | 81 | if [ $HUMAN_READABLE ] 82 | then 83 | echo "memory loads: $(amount $ALL)" 84 | echo 85 | echo "L1 hits: $(amount $L1)" 86 | echo "L1 misses: $(amount $L1_MISS)" 87 | echo "L1 hit rate: ${L1_HIT_RATE}" 88 | echo "L1 miss rate: ${L1_MISS_RATE}" 89 | echo 90 | echo "L2 hits: $(amount $L2)" 91 | echo "L2 misses: $(amount $L2_MISS)" 92 | echo "L2 hit rate: ${L2_HIT_RATE}" 93 | echo "L2 miss rate: ${L2_MISS_RATE}" 94 | echo 95 | echo "L3 hits: $(amount $L3)" 96 | echo "L3 misses: $(amount $L3_MISS)" 97 | echo "L3 hit rate: ${L3_HIT_RATE}" 98 | echo "L3 miss rate: ${L3_MISS_RATE}" 99 | 100 | else 101 | echo -n " $(amount $ALL)" 102 | echo -n " $(amount $L1)" 103 | echo -n " $(amount $L1_MISS)" 104 | echo -n " ${L1_HIT_RATE}" 105 | echo -n " ${L1_MISS_RATE}" 106 | echo -n " $(amount $L2)" 107 | echo -n " $(amount $L2_MISS)" 108 | echo -n " ${L2_HIT_RATE}" 109 | echo -n " ${L2_MISS_RATE}" 110 | echo -n " $(amount $L3)" 111 | echo -n " $(amount $L3_MISS)" 112 | echo -n " ${L3_HIT_RATE}" 113 | echo -n " ${L3_MISS_RATE}" 114 | 115 | fi 116 | -------------------------------------------------------------------------------- /src/lock/mcs_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "mcs_lock.h" 6 | 7 | __thread MCSNode myMCSNode __attribute__((aligned(64))); 8 | 9 | static inline 10 | MCSNode * get_and_set_node_ptr(MCSNode ** pointerToOldValue, MCSNode * newValue){ 11 | MCSNode * x = ACCESS_ONCE(*pointerToOldValue); 12 | while (true) { 13 | if (__sync_bool_compare_and_swap(pointerToOldValue, x, newValue)) 14 | return x; 15 | x = ACCESS_ONCE(*pointerToOldValue); 16 | } 17 | } 18 | 19 | MCSLock * mcslock_create(void (*writer)(void *, void **)){ 20 | MCSLock * lock = malloc(sizeof(MCSLock)); 21 | mcslock_initialize(lock, writer); 22 | return lock; 23 | } 24 | 25 | void mcslock_initialize(MCSLock * lock, void (*writer)(void *, void **)){ 26 | lock->writer = writer; 27 | lock->endOfQueue.value = NULL; 28 | __sync_synchronize(); 29 | } 30 | 31 | void mcslock_free(MCSLock * lock){ 32 | free(lock); 33 | } 34 | 35 | void mcslock_register_this_thread(){ 36 | MCSNode * node = &myMCSNode; 37 | node->locked.value = false; 38 | node->next.value = NULL; 39 | } 40 | 41 | void mcslock_write(MCSLock *lock, void * writeInfo) { 42 | mcslock_write_read_lock(lock); 43 | lock->writer(writeInfo, NULL); 44 | mcslock_write_read_unlock(lock); 45 | 46 | } 47 | 48 | //Returns true if it is taken over from another writer and false otherwise 49 | bool mcslock_write_read_lock(MCSLock *lock) { 50 | bool isNodeLocked; 51 | MCSNode * node = &myMCSNode; 52 | node->next.value = NULL; 53 | MCSNode * predecessor = get_and_set_node_ptr(&lock->endOfQueue.value, node); 54 | if (predecessor != NULL) { 55 | store_rel(node->locked.value, true); 56 | store_rel(predecessor->next.value, node); 57 | load_acq(isNodeLocked, node->locked.value); 58 | //Wait 59 | while (isNodeLocked) { 60 | __sync_synchronize(); 61 | load_acq(isNodeLocked, node->locked.value); 62 | } 63 | return true; 64 | }else{ 65 | return false; 66 | } 67 | } 68 | 69 | void mcslock_write_read_unlock(MCSLock * lock) { 70 | MCSNode * nextNode; 71 | MCSNode * node = &myMCSNode; 72 | load_acq(nextNode, node->next.value); 73 | if (nextNode == NULL) { 74 | if (__sync_bool_compare_and_swap(&lock->endOfQueue.value, node, NULL)){ 75 | return; 76 | } 77 | //wait 78 | load_acq(nextNode, node->next.value); 79 | while (nextNode == NULL) { 80 | __sync_synchronize(); 81 | load_acq(nextNode, node->next.value); 82 | } 83 | } 84 | store_rel(node->next.value->locked.value, false); 85 | __sync_synchronize();//Push change 86 | } 87 | 88 | void mcslock_read_lock(MCSLock *lock) { 89 | mcslock_write_read_lock(lock); 90 | } 91 | 92 | void mcslock_read_unlock(MCSLock *lock) { 93 | mcslock_write_read_unlock(lock); 94 | } 95 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/gc/ptst.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * ptst.c 3 | * 4 | * Per-thread state management. Essentially the state management parts 5 | * of MB's garbage-collection code have been pulled out and placed 6 | * here, for the use of other utility routines. 7 | * 8 | * Copyright (c) 2013, Jonatan Linden 9 | * Copyright (c) 2002-2003, K A Fraser 10 | * 11 | * All rights reserved. 12 | * 13 | * Redistribution and use in source and binary forms, with or without 14 | * modification, are permitted provided that the following conditions 15 | * are met: 16 | * 17 | * * Redistributions of source code must retain the above copyright 18 | * notice, this list of conditions and the following disclaimer. 19 | * 20 | * * Redistributions in binary form must reproduce the above 21 | * copyright notice, this list of conditions and the following 22 | * disclaimer in the documentation and/or other materials provided 23 | * with the distribution. 24 | * 25 | * * The name of the author may not be used to endorse or promote 26 | * products derived from this software without specific prior 27 | * written permission. 28 | * 29 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 30 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 31 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 33 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 35 | * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 37 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 38 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 39 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 | */ 41 | 42 | #include 43 | #include 44 | #include 45 | #include "random.h" 46 | #include "portable_defns.h" 47 | #include "ptst.h" 48 | 49 | ptst_t *ptst_list = NULL; 50 | extern __thread ptst_t *ptst; 51 | static unsigned int next_id = 0; 52 | 53 | void 54 | critical_enter() 55 | { 56 | ptst_t *next, *new_next; 57 | 58 | if ( ptst == NULL ) 59 | { 60 | ptst = (ptst_t *) ALIGNED_ALLOC(sizeof(ptst_t)); 61 | if ( ptst == NULL ) exit(1); 62 | 63 | memset(ptst, 0, sizeof(ptst_t)); 64 | ptst->gc = gc_init(); 65 | ptst->count = 1; 66 | ptst->id = __sync_fetch_and_add(&next_id, 1); 67 | rand_init(ptst); 68 | new_next = ptst_list; 69 | do { 70 | ptst->next = next = new_next; 71 | } 72 | while ( (new_next = __sync_val_compare_and_swap(&ptst_list, next, ptst)) != next ); 73 | } 74 | 75 | gc_enter(ptst); 76 | return; 77 | } 78 | 79 | 80 | 81 | static void ptst_destructor(ptst_t *ptst) 82 | { 83 | ptst->count = 0; 84 | } 85 | 86 | 87 | -------------------------------------------------------------------------------- /src/lock/rhqd_lock.c: -------------------------------------------------------------------------------- 1 | #define READ_PATIENCE_LIMIT 130000 2 | #include "rhqd_lock.h" 3 | 4 | QDLock * qdlock_create(void (*defaultWriter)(void *, void **)){ 5 | QDLock * lock = (QDLock *)malloc(sizeof(QDLock)); 6 | qdlock_initialize(lock, defaultWriter); 7 | return lock; 8 | } 9 | 10 | void qdlock_initialize(QDLock * lock, void (*defaultWriter)(void *, void **)){ 11 | //TODO check if the following typecast is fine 12 | lock->defaultWriter = defaultWriter; 13 | tataslock_initialize(&lock->lock, defaultWriter); 14 | drmvqueue_initialize(&lock->writeQueue); 15 | __sync_synchronize(); 16 | } 17 | 18 | void qdlock_free(QDLock * lock){ 19 | free(lock); 20 | } 21 | 22 | //******* 23 | //rhqdlock 24 | //******* 25 | 26 | void rhqdlock_initialize(RHQDLock * lock, void (*defaultWriter)(void *, void **)); 27 | RHQDLock * rhqdlock_create(void (*writer)(void *, void **)){ 28 | RHQDLock * lock = (RHQDLock *)malloc(sizeof(RHQDLock)); 29 | rhqdlock_initialize(lock, writer); 30 | return lock; 31 | } 32 | 33 | void rhqdlock_initialize(RHQDLock * lock, void (*defaultWriter)(void *, void **)){ 34 | for(int n = 0; n < NUMBER_OF_NUMA_NODES; n++){ 35 | qdlock_initialize(&lock->localLocks[n], defaultWriter); 36 | } 37 | mcslock_initialize(&lock->globalLock, defaultWriter); 38 | NZI_INITIALIZE(&lock->nonZeroIndicator); 39 | lock->writeBarrier.value = 0; 40 | __sync_synchronize(); 41 | } 42 | 43 | void rhqdlock_free(RHQDLock * lock){ 44 | free(lock); 45 | } 46 | 47 | void rhqdlock_register_this_thread(){ 48 | assign_id_to_thread(); 49 | mcslock_register_this_thread(); 50 | } 51 | 52 | 53 | 54 | void rhqdlock_write(RHQDLock *lock, void * writeInfo) { 55 | rhqdlock_delegate(lock, lock->localLocks[0].defaultWriter, writeInfo); 56 | } 57 | 58 | void rhqdlock_write_read_lock(RHQDLock *lock) { 59 | waitUntilWriteBarrierOff(lock); 60 | mcslock_write_read_lock(&lock->globalLock); 61 | NZI_WAIT_UNIL_EMPTY(&lock->nonZeroIndicator); 62 | } 63 | 64 | void rhqdlock_write_read_unlock(RHQDLock * lock) { 65 | mcslock_write_read_unlock(&lock->globalLock); 66 | } 67 | 68 | void rhqdlock_read_lock(RHQDLock *lock) { 69 | bool bRaised = false; 70 | int readPatience = 0; 71 | start: 72 | NZI_ARRIVE(&lock->nonZeroIndicator); 73 | if(mcslock_is_locked(&lock->globalLock)){ 74 | NZI_DEPART(&lock->nonZeroIndicator); 75 | while(mcslock_is_locked(&lock->globalLock)){ 76 | __sync_synchronize();//Pause (pause instruction might be better) 77 | if((readPatience == READ_PATIENCE_LIMIT) && !bRaised){ 78 | __sync_fetch_and_add(&lock->writeBarrier.value, 1); 79 | bRaised = true; 80 | } 81 | readPatience = readPatience + 1; 82 | } 83 | goto start; 84 | } 85 | if(bRaised){ 86 | __sync_fetch_and_sub(&lock->writeBarrier.value, 1); 87 | } 88 | } 89 | 90 | void rhqdlock_read_unlock(RHQDLock *lock) { 91 | NZI_DEPART(&lock->nonZeroIndicator); 92 | } 93 | -------------------------------------------------------------------------------- /qd_library/qd_condition_variable.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_condition_variable_hpp 2 | #define qd_condition_variable_hpp qd_condition_variable_hpp 3 | 4 | #include "util/pause.hpp" 5 | #include "qdlock_base.hpp" 6 | 7 | template 8 | class qd_condition_variable_impl : private qdlock_base { 9 | typedef qdlock_base base; 10 | public: 11 | qd_condition_variable_impl() { 12 | this->delegation_queue.open(); 13 | } 14 | qd_condition_variable_impl(const qd_condition_variable_impl&) = delete; 15 | qd_condition_variable_impl& operator=(const qd_condition_variable_impl&) = delete; 16 | 17 | /* TODO: these notify implementations / flush implementations run at risk of deadlocking 18 | * when the notifying thread becomes a helper and also needs to perform additional 19 | * synchronization steps. 20 | */ 21 | void notify_one() { 22 | this->mutex_lock.lock(); 23 | this->delegation_queue.flush_one(); 24 | this->mutex_lock.unlock(); 25 | } 26 | void notify_all() { 27 | this->mutex_lock.lock(); 28 | this->delegation_queue.flush(); 29 | this->mutex_lock.unlock(); 30 | } 31 | 32 | /* interface _p functions: User provides a promise, which is used explicitly by the delegated (void) function */ 33 | template 34 | auto wait_redelegate_p(Lock* l, Promise&& result, Ps&&... ps) 35 | -> void 36 | { 37 | wait_redelegate(l, std::forward(result), std::forward(ps)...); 38 | } 39 | template 40 | auto wait_redelegate_p(Function&& f, Lock* l, Promise&& result, Ps&&... ps) 41 | -> void 42 | { 43 | /* type of functor/function ptr stored in f, set template function pointer to NULL */ 44 | wait_redelegate(l, std::forward(result), std::forward(f), std::forward(ps)...); 45 | } 46 | private: 47 | 48 | 49 | template 50 | auto wait_redelegate(Lock* l, Promise&& result, Ps&&... ps) 51 | -> void 52 | { 53 | while(true) { 54 | /* TODO enqueue a function that re-delegates the provided function with its parameter to Lock l TODO */ 55 | std::nullptr_t no_promise; 56 | if(this->template enqueue>(&no_promise, &l, &result, (&ps)...)) { 57 | return; 58 | } 59 | qd::pause(); 60 | } 61 | } 62 | template 63 | static void redelegate(Lock* l, Promise&& p, Ps&&... ps) { 64 | using no_promise = typename base::no_promise::promise; 65 | l->template delegate(nullptr, std::forward(p), std::forward(ps)...); 66 | } 67 | 68 | /* TODO: wait_for and wait_until for time-based waiting */ 69 | 70 | }; 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /qd_library/padded.hpp: -------------------------------------------------------------------------------- 1 | #ifndef padded_hpp 2 | #define padded_hpp 3 | 4 | template 5 | class padded_base; 6 | 7 | template 8 | class padded_base { 9 | T value; 10 | char padding[PADSIZE - (sizeof(T)%PADSIZE)]; 11 | typedef padded_base P_type; 12 | typedef typename std::remove_pointer::type T_dereferenced; 13 | friend class padded_base; 14 | public: 15 | padded_base() {} /* this is a basic type, it is NOT initialized to 0 */ 16 | padded_base(const T v) : value(v) {} 17 | padded_base(const P_type& v) : value(v) {} 18 | padded_base(padded_base* const v) : value(v) {} 19 | padded_base(padded_base* const v) : value(&v->value) {} 20 | 21 | operator T&() { 22 | return value; 23 | } 24 | operator const T&() const { 25 | return value; 26 | } 27 | P_type& operator=(P_type other) { 28 | swap(*this, other); 29 | return *this; 30 | } 31 | P_type& operator=(T other) { 32 | using std::swap; 33 | swap(value, other); 34 | return *this; 35 | } 36 | 37 | bool operator==(const T other) { 38 | return value == other; 39 | } 40 | bool operator!=(const T other) { 41 | return !(*this == other); 42 | } 43 | T_dereferenced& operator*() { 44 | return *value; 45 | } 46 | const T_dereferenced& operator*() const { 47 | return *value; 48 | } 49 | T_dereferenced* operator->() { 50 | return value; 51 | } 52 | const T_dereferenced* operator->() const { 53 | return value; 54 | } 55 | 56 | T& get() { 57 | return value; 58 | } 59 | 60 | const T& get() const { 61 | return value; 62 | } 63 | 64 | friend void swap(P_type& first, P_type& second) { 65 | using std::swap; 66 | swap(first.value, second.value); 67 | } 68 | }; 69 | 70 | template 71 | class padded_base : public T { 72 | char padding[PADSIZE - (sizeof(T)%PADSIZE)]; 73 | //typedef padded_base P_type; 74 | //typedef typename std::remove_pointer::type T_dereferenced; 75 | //friend class padded_base; 76 | public: 77 | using T::T; 78 | using T::operator=; 79 | // T_dereferenced& operator*() { 80 | // return **this; 81 | // } 82 | // const T_dereferenced& operator*() const { 83 | // return *value; 84 | // } 85 | // T_dereferenced* operator->() { 86 | // return value; 87 | // } 88 | // const T_dereferenced* operator->() const { 89 | // return value; 90 | // } 91 | // 92 | T& get() { 93 | return *this; 94 | } 95 | 96 | const T& get() const { 97 | return *this; 98 | } 99 | }; 100 | 101 | template 102 | class padded : public padded_base::value> { 103 | typedef padded_base::value> base_type; 104 | public: 105 | using base_type::base_type; 106 | using base_type::operator=; 107 | }; 108 | 109 | #endif // padded_hpp 110 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/clh.h: -------------------------------------------------------------------------------- 1 | #ifndef _CLH_H_ 2 | 3 | #define _CLH_H_ 4 | 5 | #include 6 | #include 7 | #include "synch_algs_config.h" 8 | #include "synch_algs_primitives.h" 9 | 10 | #ifdef POSIX_LOCKS 11 | 12 | typedef pthread_mutex_t CLHLockStruct; 13 | 14 | 15 | static inline void clhLock(LockStruct *l, int pid) { 16 | pthread_mutex_lock(l); 17 | } 18 | 19 | static inline void clhUnlock(LockStruct *l, int pid) { 20 | pthread_mutex_unlock(l); 21 | } 22 | 23 | LockStruct *clhLockInit(void) { 24 | LockStruct *l, tmp = PTHREAD_MUTEX_INITIALIZER; 25 | int error; 26 | 27 | error = posix_memalign((void *)&l, CACHE_LINE_SIZE, sizeof(CLHLockStruct)); 28 | *l = tmp; 29 | return l; 30 | } 31 | 32 | #else 33 | typedef union CLHLockNode { 34 | bool locked; 35 | char align[CACHE_LINE_SIZE]; 36 | } CLHLockNode; 37 | 38 | typedef struct CLHLockStruct { 39 | volatile CLHLockNode *Tail CACHE_ALIGN; 40 | char pad1[128]; 41 | // volatile CLHLockNode *MyNode[N_THREADS] CACHE_ALIGN; 42 | // volatile CLHLockNode *MyPred[N_THREADS] CACHE_ALIGN; 43 | } CLHLockStruct; 44 | 45 | typedef struct CLHThreadLocalData { 46 | char pad1[128]; 47 | volatile CLHLockNode *MyNode CACHE_ALIGN; 48 | volatile CLHLockNode *MyPred CACHE_ALIGN; 49 | char pad2[128 - 2*sizeof(CLHLockNode *)]; 50 | } CLHThreadLocalData; 51 | 52 | __thread CLHThreadLocalData threadLocalData __attribute__((aligned(64))); 53 | 54 | 55 | static inline void clhLock(CLHLockStruct *l, int pid) { 56 | threadLocalData.MyNode->locked = true; 57 | threadLocalData.MyPred = (CLHLockNode *)__SWAP(&l->Tail, (void *)threadLocalData.MyNode); 58 | while (threadLocalData.MyPred->locked == true) { 59 | #if N_THREADS > USE_CPUS 60 | sched_yield(); 61 | #else 62 | ; 63 | #endif 64 | } 65 | } 66 | 67 | static inline void clhUnlock(CLHLockStruct *l, int pid) { 68 | threadLocalData.MyNode->locked = false; 69 | threadLocalData.MyNode = threadLocalData.MyPred; 70 | #ifdef sparc 71 | StoreFence(); 72 | #endif 73 | } 74 | 75 | void clhThreadLocalInit(){ 76 | threadLocalData.MyNode = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockNode)); 77 | threadLocalData.MyPred = null; 78 | } 79 | 80 | CLHLockStruct *clhLockInit(void) { 81 | CLHLockStruct *l; 82 | // int j; 83 | 84 | l = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockStruct)); 85 | l->Tail = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockNode)); 86 | l->Tail->locked = false; 87 | 88 | // for (j = 0; j < N_THREADS; j++) { 89 | // l->MyNode[j] = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockNode)); 90 | // l->MyPred[j] = null; 91 | //} 92 | 93 | return l; 94 | } 95 | 96 | void clhLockInitExisting(CLHLockStruct * l) { 97 | l->Tail = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockNode)); 98 | l->Tail->locked = false; 99 | } 100 | 101 | #endif 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /src/datastructures/padded.hpp: -------------------------------------------------------------------------------- 1 | #ifndef padded_hpp 2 | #define padded_hpp 3 | 4 | template 5 | class padded_base; 6 | 7 | template 8 | class padded_base { 9 | T value; 10 | char padding[PADSIZE - (sizeof(T)%PADSIZE)]; 11 | typedef padded_base P_type; 12 | typedef typename std::remove_pointer::type T_dereferenced; 13 | friend class padded_base; 14 | public: 15 | padded_base() {} /* this is a basic type, it is NOT initialized to 0 */ 16 | padded_base(const T v) : value(v) {} 17 | padded_base(const P_type& v) : value(v) {} 18 | padded_base(padded_base* const v) : value(v) {} 19 | padded_base(padded_base* const v) : value(&v->value) {} 20 | 21 | operator T&() { 22 | return value; 23 | } 24 | operator const T&() const { 25 | return value; 26 | } 27 | P_type& operator=(P_type other) { 28 | swap(*this, other); 29 | return *this; 30 | } 31 | P_type& operator=(T other) { 32 | using std::swap; 33 | swap(value, other); 34 | return *this; 35 | } 36 | 37 | bool operator==(const T other) { 38 | return value == other; 39 | } 40 | bool operator!=(const T other) { 41 | return !(*this == other); 42 | } 43 | T_dereferenced& operator*() { 44 | return *value; 45 | } 46 | const T_dereferenced& operator*() const { 47 | return *value; 48 | } 49 | T_dereferenced* operator->() { 50 | return value; 51 | } 52 | const T_dereferenced* operator->() const { 53 | return value; 54 | } 55 | 56 | T& get() { 57 | return value; 58 | } 59 | 60 | const T& get() const { 61 | return value; 62 | } 63 | 64 | friend void swap(P_type& first, P_type& second) { 65 | using std::swap; 66 | swap(first.value, second.value); 67 | } 68 | }; 69 | 70 | template 71 | class padded_base : public T { 72 | char padding[PADSIZE - (sizeof(T)%PADSIZE)]; 73 | //typedef padded_base P_type; 74 | //typedef typename std::remove_pointer::type T_dereferenced; 75 | //friend class padded_base; 76 | public: 77 | using T::T; 78 | using T::operator=; 79 | // T_dereferenced& operator*() { 80 | // return **this; 81 | // } 82 | // const T_dereferenced& operator*() const { 83 | // return *value; 84 | // } 85 | // T_dereferenced* operator->() { 86 | // return value; 87 | // } 88 | // const T_dereferenced* operator->() const { 89 | // return value; 90 | // } 91 | // 92 | T& get() { 93 | return *this; 94 | } 95 | 96 | const T& get() const { 97 | return *this; 98 | } 99 | }; 100 | 101 | template 102 | class padded : public padded_base::value> { 103 | typedef padded_base::value> base_type; 104 | public: 105 | using base_type::base_type; 106 | using base_type::operator=; 107 | }; 108 | 109 | #endif // padded_hpp 110 | -------------------------------------------------------------------------------- /src/lock/rcpp_lock.h: -------------------------------------------------------------------------------- 1 | #ifndef RCPPLOCK_H 2 | #define RCPPLOCK_H 3 | 4 | //RHQDLock 5 | 6 | typedef struct RCPPLockImpl { 7 | void (*defaultWriter)(void*, void **); 8 | char pad2[64 - (sizeof(void * (*)(void*)) % 64)]; 9 | char lock[256*1024*1024]; 10 | } RCPPLock; 11 | 12 | RCPPLock* rcpplock_new(); 13 | void rcpplock_free(RCPPLock*); 14 | void rcpplock_init(RCPPLock*); 15 | void rcpplock_delegate(RCPPLock* lock, void (*delgateFun)(void *, void **), void * data); 16 | static void* rcpplock_delegate_and_wait (RCPPLock* lock, void (*delgateFun)(void *, void **), void * data); 17 | void rcpplock_lock(RCPPLock*); 18 | void rcpplock_unlock(RCPPLock*); 19 | void rcpplock_rlock(RCPPLock*); 20 | void rcpplock_runlock(RCPPLock*); 21 | 22 | static inline void rcpplock_initialize(RCPPLock * lock, void (*defaultWriter)(void *, void **)) { 23 | lock->defaultWriter = defaultWriter; 24 | rcpplock_init(lock); 25 | __sync_synchronize(); 26 | } 27 | static inline RCPPLock * rcpplock_create(void (*writer)(void *, void **)) { 28 | (void)writer; 29 | RCPPLock* lock = rcpplock_new(); 30 | return lock; 31 | } 32 | 33 | static inline void rcpplock_register_this_thread() {} 34 | 35 | //static void rcpplock_write_with_response(RCPPLock *rcpplock, 36 | // void (*delgateFun)(void *, void **), 37 | // void * data, 38 | // void ** responseLocation); 39 | static void * rcpplock_write_with_response_block(RCPPLock *lock, 40 | void (*delgateFun)(void *, void **), 41 | void * data); 42 | void rcpplock_delegate(RCPPLock *lock, 43 | void (*delgateFun)(void *, void **), 44 | void * data); 45 | static inline void rcpplock_write(RCPPLock *lock, void * writeInfo) { 46 | rcpplock_delegate(lock, lock->defaultWriter, writeInfo); 47 | } 48 | 49 | static inline void rcpplock_write_read_lock(RCPPLock *lock) { 50 | rcpplock_lock(lock); 51 | } 52 | static inline void rcpplock_write_read_unlock(RCPPLock * lock) { 53 | rcpplock_unlock(lock); 54 | } 55 | static inline void rcpplock_read_lock(RCPPLock *lock) { 56 | rcpplock_rlock(lock); 57 | } 58 | static inline void rcpplock_read_unlock(RCPPLock *lock) { 59 | rcpplock_runlock(lock); 60 | } 61 | 62 | 63 | #if 0 64 | static inline 65 | void rcpplock_write_with_response(RHQDLock *rcpplock, 66 | void (*delgateFun)(void *, void **), 67 | void * data, 68 | void ** responseLocation){ 69 | } 70 | static inline 71 | 72 | void rcpplock_delegate(RHQDLock *lock, 73 | void (*delgateFun)(void *, void **), 74 | void * data) { 75 | rcpplock_write_with_response(lock, delgateFun, data, NULL); 76 | #endif 77 | 78 | static inline 79 | void * rcpplock_write_with_response_block(RCPPLock *lock, 80 | void (*delgateFun)(void *, void **), 81 | void * data){ 82 | return rcpplock_delegate_and_wait(lock, delgateFun, data); 83 | } 84 | 85 | 86 | 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /src/utils/smp_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef SMP_UTILS_H 2 | #define SMP_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | //Make sure compiler does not optimize away memory access 8 | #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) 9 | 10 | //Atomic get 11 | #define GET(value_ptr) __sync_fetch_and_add(value_ptr, 0) 12 | 13 | //Compiller barrier 14 | #define barrier() __asm__ __volatile__("": : :"memory") 15 | 16 | //See the following URL for explanation of acquire and release semantics: 17 | //http://preshing.com/20120913/acquire-and-release-semantics 18 | 19 | //Load with acquire barrier 20 | #if __x86_64__ 21 | #define load_acq(assign_to,load_from) \ 22 | assign_to = ACCESS_ONCE(load_from) 23 | #else 24 | #define load_acq(assign_to,load_from) \ 25 | do { \ 26 | barrier(); \ 27 | assign_to = ACCESS_ONCE(load_from); \ 28 | __sync_synchronize(); \ 29 | } while(0) 30 | #endif 31 | 32 | 33 | //Store with release barrier 34 | #if __x86_64__ 35 | #define store_rel(store_to,store_value) \ 36 | do{ \ 37 | barrier(); \ 38 | store_to = store_value; \ 39 | barrier(); \ 40 | }while(0); 41 | #else 42 | #define store_rel(store_to,store_value) \ 43 | do{ \ 44 | __sync_synchronize(); \ 45 | store_to = store_value; \ 46 | barrier(); \ 47 | }while(0); 48 | #endif 49 | 50 | //Intel pause instruction 51 | #if __x86_64__ 52 | #define pause_instruction() \ 53 | __asm volatile ("pause") 54 | #else 55 | #define pause_instruction() \ 56 | __sync_synchronize() 57 | #endif 58 | 59 | static inline 60 | int get_and_set_int(int * pointerToOldValue, int newValue){ 61 | int x = ACCESS_ONCE(*pointerToOldValue); 62 | while (true) { 63 | if (__sync_bool_compare_and_swap(pointerToOldValue, x, newValue)) 64 | return x; 65 | x = ACCESS_ONCE(*pointerToOldValue); 66 | } 67 | } 68 | 69 | static inline 70 | unsigned long get_and_set_ulong(unsigned long * pointerToOldValue, unsigned long newValue){ 71 | unsigned long x = ACCESS_ONCE(*pointerToOldValue); 72 | while (true) { 73 | if (__sync_bool_compare_and_swap(pointerToOldValue, x, newValue)) 74 | return x; 75 | x = ACCESS_ONCE(*pointerToOldValue); 76 | } 77 | } 78 | 79 | typedef union CacheLinePaddedBoolImpl { 80 | bool value; 81 | char padding[64]; 82 | } CacheLinePaddedBool; 83 | 84 | typedef union CacheLinePaddedIntImpl { 85 | int value; 86 | char padding[128]; 87 | } CacheLinePaddedInt; 88 | 89 | 90 | typedef union CacheLinePaddedULongImpl { 91 | unsigned long value; 92 | char padding[128]; 93 | } CacheLinePaddedULong; 94 | 95 | typedef union CacheLinePaddedDoubleImpl { 96 | double value; 97 | char padding[128]; 98 | } CacheLinePaddedDouble; 99 | 100 | typedef union CacheLinePaddedPointerImpl { 101 | void * value; 102 | char padding[64]; 103 | } CacheLinePaddedPointer; 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /src/lock/wprw_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "wprw_lock.h" 8 | #include "utils/support_many_lock_types.h" 9 | #include "utils/smp_utils.h" 10 | #include "utils/thread_identifier.h" 11 | 12 | 13 | #define READ_PATIENCE_LIMIT 130000 14 | 15 | static inline 16 | bool isWriteLocked(WPRWLock * lock){ 17 | #ifdef LOCK_TYPE_MCSLock 18 | MCSNode * endOfQueue; 19 | load_acq(endOfQueue, lock->lock.endOfQueue.value); 20 | return endOfQueue != NULL; 21 | #elif defined (LOCK_TYPE_CohortLock) 22 | int inCounter; 23 | int outCounter; 24 | load_acq(inCounter, lock->lock.globalLock.inCounter.value); 25 | load_acq(outCounter, lock->lock.globalLock.outCounter.value); 26 | return (inCounter != outCounter); 27 | #else 28 | printf("WPRW LOCK: Unsuported mutal exclusion lock\n"); 29 | assert(false); 30 | return false; 31 | #endif 32 | } 33 | 34 | WPRWLock * wprwlock_create(void (*writer)(void *, void **)){ 35 | WPRWLock * lock = malloc(sizeof(WPRWLock)); 36 | wprwlock_initialize(lock, writer); 37 | return lock; 38 | } 39 | 40 | void wprwlock_initialize(WPRWLock * lock, void (*writer)(void *, void **)){ 41 | LOCK_INITIALIZE(&lock->lock, writer); 42 | lock->writeBarrier.value = 0; 43 | NZI_INITIALIZE(&lock->nonZeroIndicator); 44 | __sync_synchronize(); 45 | } 46 | 47 | void wprwlock_free(WPRWLock * lock){ 48 | free(lock); 49 | } 50 | 51 | 52 | void wprwlock_register_this_thread(){ 53 | LOCK_REGISTER_THIS_THREAD(); 54 | assign_id_to_thread(); 55 | } 56 | 57 | void wprwlock_write(WPRWLock *lock, void * writeInfo) { 58 | wprwlock_write_read_lock(lock); 59 | lock->lock.writer(writeInfo, NULL); 60 | wprwlock_write_read_unlock(lock); 61 | } 62 | 63 | void wprwlock_write_read_lock(WPRWLock *lock) { 64 | bool writeBarrierOn; 65 | load_acq(writeBarrierOn, lock->writeBarrier.value); 66 | while(writeBarrierOn){ 67 | __sync_synchronize(); 68 | load_acq(writeBarrierOn, lock->writeBarrier.value); 69 | } 70 | if(!LOCK_WRITE_READ_LOCK(&lock->lock)){ 71 | NZI_WAIT_UNIL_EMPTY(&lock->nonZeroIndicator); 72 | } 73 | } 74 | 75 | void wprwlock_write_read_unlock(WPRWLock * lock) { 76 | LOCK_WRITE_READ_UNLOCK(&lock->lock); 77 | } 78 | 79 | void wprwlock_read_lock(WPRWLock *lock) { 80 | bool bRaised = false; 81 | int readPatience = 0; 82 | start: 83 | NZI_ARRIVE(&lock->nonZeroIndicator); 84 | if(isWriteLocked(lock)){ 85 | NZI_DEPART(&lock->nonZeroIndicator); 86 | while(isWriteLocked(lock)){ 87 | __sync_synchronize();//Pause (pause instruction might be better) 88 | if((readPatience == READ_PATIENCE_LIMIT) && !bRaised){ 89 | __sync_fetch_and_add(&lock->writeBarrier.value, 1); 90 | bRaised = true; 91 | } 92 | readPatience = readPatience + 1; 93 | } 94 | goto start; 95 | } 96 | if(bRaised){ 97 | __sync_fetch_and_sub(&lock->writeBarrier.value, 1); 98 | } 99 | } 100 | 101 | void wprwlock_read_unlock(WPRWLock *lock) { 102 | NZI_DEPART(&lock->nonZeroIndicator); 103 | } 104 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/synch_algs_config.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONFIG_H_ 2 | 3 | #define _CONFIG_H_ 4 | 5 | // Definition: USE_CPUS 6 | // -------------------- 7 | // Define the number of processing cores that your computation 8 | // system offers or the maximum number of cores that you like to use. 9 | #ifndef USE_CPUS 10 | # define USE_CPUS 64 11 | #endif 12 | 13 | // Definition: N_THREADS 14 | // --------------------- 15 | // Define the number of threads that you like to run experiments. 16 | // In case N_THREADS > USE_CPUS, two or more threads may run in 17 | // any processing core. 18 | #ifndef N_THREADS 19 | # define N_THREADS 64 20 | #endif 21 | 22 | // Definition: MAX_WORK 23 | // -------------------- 24 | // Define the maximum local work that each thread executes 25 | // between two calls of some simulated shared object's 26 | // operation. A zero value means no work between two calls. 27 | // The exact value depends on the speed of processing cores. 28 | // Try not to use big values (avoiding slow contention) 29 | // or not to use small values (avoiding long runs and 30 | // unrealistic cache misses ratios). 31 | #define MAX_WORK 64 32 | 33 | // definition: RUNS 34 | // ---------------- 35 | // Define the total number of the calls of object's 36 | // operations that will be executed. 37 | #define RUNS (10000000 / N_THREADS) 38 | 39 | // Definition: DEBUG 40 | // ----------------- 41 | // Enable this definition, in case you would like some 42 | // parts of the code. Usually leads to performance loss. 43 | // This way of debugging is deprecated. It is better to 44 | // compile your code with debug option. 45 | // See Readme for more details. 46 | //#define DEBUG 47 | 48 | // Definition OBJECT_SIZE 49 | // ---------------------- 50 | // This definition is only used in lfobject.c, simopt.c 51 | // and luobject.c experiments. In any other case it is 52 | // ignored. Its default value is 1. It is used for simulating 53 | // of an atomic array of Fetch&Multiply objects with 54 | // OBJECT_SIZE elements. All elements are updated 55 | // simultaneously. 56 | #ifndef OBJECT_SIZE 57 | # define OBJECT_SIZE 1 58 | #endif 59 | 60 | // Definition: DISABLE_BACKOFF 61 | // --------------------------- 62 | // By defining this, any backoff scheme in any algorithm 63 | // is disabled. Be careful, upper an lower bounds must 64 | // also used as experiments' arguments, but they are ignored. 65 | //#define DISABLE_BACKOFF 66 | 67 | 68 | #define Object int32_t 69 | 70 | // Definition: RetVal 71 | // ------------------ 72 | // Define the type of the return value that simulated 73 | // atomic objects must return. Be careful, this type 74 | // must be read/written atomically by target machine. 75 | // Usually, 32 or 64 bit (in some cases i.e. x86_64 76 | // types of 128 bits are supported). In case that you 77 | // need a larger type use indirection. 78 | #define RetVal int32_t 79 | 80 | // Definition: ArgVal 81 | // ------------------ 82 | // Define the type of the argument value of atomic objects. 83 | // All atomic objects have same argument types. In case 84 | // that you 'd like to use different argument values in each 85 | // atomic object, redefine it in object's source file. 86 | #define ArgVal int32_t 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /src/datastructures_bench/synch_algorithms/qdlock.c: -------------------------------------------------------------------------------- 1 | #define MAX_NUM_OF_HELPED_OPS 4096 2 | #include "qdlock.h" 3 | 4 | 5 | AgnosticDXLock * adxlock_create(void (*writer)(int, int *)){ 6 | AgnosticDXLock * lock = (AgnosticDXLock *)malloc(sizeof(AgnosticDXLock)); 7 | adxlock_initialize(lock, writer); 8 | return lock; 9 | } 10 | void adxlock_initialize(AgnosticDXLock * lock, void (*defaultWriter)(int, int *)){ 11 | //TODO check if the following typecast is fine 12 | lock->defaultWriter = defaultWriter; 13 | tataslock_initialize(&lock->lock, defaultWriter); 14 | drmvqueue_initialize(&lock->writeQueue); 15 | __sync_synchronize(); 16 | } 17 | void adxlock_write_with_response(AgnosticDXLock *lock, 18 | void (*delgateFun)(int, int *), 19 | int data, 20 | int * responseLocation){ 21 | int counter = 0; 22 | DelegateRequestEntry e; 23 | e.request = delgateFun; 24 | e.data = data; 25 | e.responseLocation = responseLocation; 26 | do{ 27 | if(!tataslock_is_locked(&lock->lock)){ 28 | if(tataslock_try_write_read_lock(&lock->lock)){ 29 | #ifdef ACTIVATE_NO_CONTENTION_OPT 30 | if(counter > 0){ 31 | #endif 32 | drmvqueue_reset_fully_read(&lock->writeQueue); 33 | delgateFun(data, responseLocation); 34 | drmvqueue_flush(&lock->writeQueue); 35 | tataslock_write_read_unlock(&lock->lock); 36 | return; 37 | #ifdef ACTIVATE_NO_CONTENTION_OPT 38 | }else{ 39 | delgateFun(data, responseLocation); 40 | tataslock_write_read_unlock(&lock->lock); 41 | return; 42 | } 43 | #endif 44 | } 45 | }else{ 46 | while(tataslock_is_locked(&lock->lock)){ 47 | if(drmvqueue_offer(&lock->writeQueue, e)){ 48 | return; 49 | }else{ 50 | __sync_synchronize(); 51 | __sync_synchronize(); 52 | } 53 | } 54 | } 55 | if((counter & 7) == 0){ 56 | #ifdef USE_YIELD 57 | sched_yield(); 58 | #endif 59 | } 60 | counter = counter + 1; 61 | sched_yield(); 62 | }while(true); 63 | } 64 | 65 | int adxlock_write_with_response_block(AgnosticDXLock *lock, 66 | void (*delgateFun)(int, int *), 67 | int data){ 68 | int counter = 0; 69 | int returnValue = INT_MIN; 70 | int currentValue; 71 | adxlock_write_with_response(lock, delgateFun, data, &returnValue); 72 | load_acq(currentValue, returnValue); 73 | while(currentValue == INT_MIN){ 74 | if((counter & 7) == 0){ 75 | #ifdef USE_YIELD 76 | sched_yield(); 77 | #endif 78 | }else{ 79 | __sync_synchronize(); 80 | } 81 | counter = counter + 1; 82 | load_acq(currentValue, returnValue); 83 | } 84 | return currentValue; 85 | } 86 | void adxlock_delegate(AgnosticDXLock *lock, 87 | void (*delgateFun)(int, int *), 88 | int data) { 89 | adxlock_write_with_response(lock, delgateFun, data, NULL); 90 | } 91 | void adxlock_write(AgnosticDXLock *lock, int writeInfo) { 92 | adxlock_delegate(lock, lock->defaultWriter, writeInfo); 93 | } 94 | void adxlock_write_read_unlock(AgnosticDXLock * lock) { 95 | drmvqueue_flush(&lock->writeQueue); 96 | tataslock_write_read_unlock(&lock->lock); 97 | } 98 | -------------------------------------------------------------------------------- /src/benchmark/benchmark_lock.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import datetime 6 | import subprocess 7 | import re 8 | 9 | import sys 10 | 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock')) 14 | 15 | from extract_numa_structure import numa_structure 16 | 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure() 18 | 19 | parameters = sys.argv 20 | 21 | parameters.pop(0) 22 | 23 | if len(parameters) < 11: 24 | print """Not enough parameters: 25 | 26 | Look at bin/run_benchmarks_on_intel_i7.py and 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of 28 | parameters. 29 | 30 | """ 31 | sys.exit() 32 | 33 | 34 | iterations = int(parameters.pop(0)) 35 | 36 | 37 | output_dir_base = parameters.pop(0) 38 | 39 | if output_dir_base=='standard': 40 | output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")) #subprocess.check_output(['git', 'rev-parse', 'HEAD'])[-11:-1] 41 | 42 | benchmark_names = parameters.pop(0).split(',') 43 | 44 | lock_ids = parameters.pop(0).split(',') 45 | 46 | pinning_settings = parameters.pop(0).split(',') 47 | 48 | thread_counts = parameters.pop(0).split(",") 49 | 50 | #* Percentage read 51 | #* Number of seconds to benchmark 52 | #* Iterations spent in write critical section 53 | #* Iterations spent in read critical section 54 | #* Iterations spent in non critical section 55 | 56 | 57 | percentages_reads = parameters.pop(0).split(',') 58 | 59 | run_times_seconds = parameters.pop(0).split(',') 60 | 61 | iterations_wcs = parameters.pop(0).split(',') 62 | 63 | iterations_rcs = parameters.pop(0).split(',') 64 | 65 | iterations_ncs = parameters.pop(0).split(',') 66 | 67 | for iteration in range(iterations): 68 | print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n" 69 | for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id) 70 | for benchmark_name in benchmark_names 71 | for lock_id in lock_ids]: 72 | for settings in [[pr,rts,iw,ir,incs] 73 | for pr in percentages_reads 74 | for rts in run_times_seconds 75 | for iw in iterations_wcs 76 | for ir in iterations_rcs 77 | for incs in iterations_ncs]: 78 | for pinning in pinning_settings: 79 | output_file_dir_str = ('bench_results/' + 80 | benchmark_id + '#' + output_dir_base + '#' + lock_id + '/') 81 | if not os.path.exists(output_file_dir_str): 82 | os.makedirs(output_file_dir_str) 83 | output_file_str = (output_file_dir_str + 84 | 'b_' + pinning + '_' + '_'.join(settings) + '.dat') 85 | with open(output_file_str, "a") as outfile: 86 | print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n" 87 | for thread_count in thread_counts: 88 | command = [bin_dir_path + '/' + benchmark_id, thread_count] + settings 89 | if pinning=='no': 90 | process = subprocess.Popen(command, stdout=outfile) 91 | process.wait() 92 | else: 93 | max_node_id = (int(thread_count)-1) / num_of_cpus_per_node 94 | nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])] 95 | process = subprocess.Popen(nomactrl + command, stdout=outfile) 96 | process.wait() 97 | print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n" 98 | 99 | print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n" 100 | -------------------------------------------------------------------------------- /src/benchmark/cache_benchmark_lock_simple.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import datetime 6 | import subprocess 7 | import re 8 | 9 | import sys 10 | 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock')) 14 | 15 | from extract_numa_structure import numa_structure 16 | 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure() 18 | 19 | parameters = sys.argv 20 | 21 | parameters.pop(0) 22 | 23 | if len(parameters) < 11: 24 | print """Not enough parameters: 25 | 26 | Look at bin/run_benchmarks_on_intel_i7.py and 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of 28 | parameters. 29 | 30 | """ 31 | sys.exit() 32 | 33 | iterations = int(parameters.pop(0)) 34 | 35 | output_dir_base = parameters.pop(0) 36 | 37 | if output_dir_base=='standard': 38 | output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")) 39 | 40 | benchmark_names = parameters.pop(0).split(',') 41 | 42 | lock_ids = parameters.pop(0).split(',') 43 | 44 | pinning_settings = parameters.pop(0).split(',') 45 | 46 | thread_counts = parameters.pop(0).split(",") 47 | 48 | #* Percentage read 49 | #* Number of seconds to benchmark 50 | #* Iterations spent in write critical section 51 | #* Iterations spent in read critical section 52 | #* Iterations spent in non critical section 53 | 54 | 55 | percentages_reads = parameters.pop(0).split(',') 56 | 57 | run_times_seconds = parameters.pop(0).split(',') 58 | 59 | iterations_wcs = parameters.pop(0).split(',') 60 | 61 | iterations_rcs = parameters.pop(0).split(',') 62 | 63 | iterations_ncs = parameters.pop(0).split(',') 64 | 65 | 66 | for iteration in range(iterations): 67 | print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n" 68 | for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id) 69 | for benchmark_name in benchmark_names 70 | for lock_id in lock_ids]: 71 | for settings in [[pr,rts,iw,ir,incs] 72 | for pr in percentages_reads 73 | for rts in run_times_seconds 74 | for iw in iterations_wcs 75 | for ir in iterations_rcs 76 | for incs in iterations_ncs]: 77 | for pinning in pinning_settings: 78 | output_file_dir_str = ('bench_results/' + 79 | benchmark_id + '#' + output_dir_base + '#' + lock_id + '/') 80 | if not os.path.exists(output_file_dir_str): 81 | os.makedirs(output_file_dir_str) 82 | output_file_str = (output_file_dir_str + 83 | 'b_' + pinning + '_' + '_'.join(settings) + '.dat') 84 | with open(output_file_str, "a") as outfile: 85 | print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n" 86 | for thread_count in thread_counts: 87 | realcmd = [bin_dir_path + '/' + benchmark_id, thread_count] + settings 88 | perfcmd = ['perf', 'stat','-B', '-e', 'r01d1:u,r02d1:u,r04d1:u,r81d0:u'] 89 | command = perfcmd + realcmd 90 | (outString, outErr) = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 91 | print (bin_dir_path + '/' + 'perf_magic_simple') 92 | cachedata = subprocess.Popen(bin_dir_path + '/' + 'perf_magic_simple', stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(outErr)[0] 93 | outfile.write(outString.rstrip('\n') + cachedata + '\n') 94 | print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n" 95 | 96 | print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n" 97 | -------------------------------------------------------------------------------- /src/benchmark/benchmark_lockXOpDist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import datetime 6 | import subprocess 7 | import re 8 | 9 | import sys 10 | 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock')) 14 | 15 | from extract_numa_structure import numa_structure 16 | 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure() 18 | 19 | parameters = sys.argv 20 | 21 | parameters.pop(0) 22 | 23 | if len(parameters) < 11: 24 | print """Not enough parameters: 25 | 26 | Look at bin/run_benchmarks_on_intel_i7.py and 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of 28 | parameters. 29 | 30 | """ 31 | sys.exit() 32 | 33 | iterations = int(parameters.pop(0)) 34 | 35 | output_dir_base = parameters.pop(0) 36 | 37 | if output_dir_base=='standard': 38 | output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")) 39 | 40 | benchmark_names = parameters.pop(0).split(',') 41 | 42 | lock_ids = parameters.pop(0).split(',') 43 | 44 | pinning_settings = parameters.pop(0).split(',') 45 | 46 | thread_counts = parameters.pop(0).split(",") 47 | 48 | #* Percentage read 49 | #* Number of seconds to benchmark 50 | #* Iterations spent in write critical section 51 | #* Iterations spent in read critical section 52 | #* Iterations spent in non critical section 53 | 54 | 55 | percentages_reads = parameters.pop(0).split(',') 56 | 57 | run_times_seconds = parameters.pop(0).split(',') 58 | 59 | iterations_wcs = parameters.pop(0).split(',') 60 | 61 | iterations_rcs = parameters.pop(0).split(',') 62 | 63 | iterations_ncs = parameters.pop(0).split(',') 64 | 65 | 66 | for iteration in range(iterations): 67 | print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n" 68 | for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id) 69 | for benchmark_name in benchmark_names 70 | for lock_id in lock_ids]: 71 | for settings in [[tc,rts,iw,ir,ncsw] 72 | for tc in thread_counts 73 | for rts in run_times_seconds 74 | for iw in iterations_wcs 75 | for ir in iterations_rcs 76 | for ncsw in iterations_ncs]: 77 | for pinning in pinning_settings: 78 | output_file_dir_str = ('bench_results/' + 79 | benchmark_id + '#' + output_dir_base + '#' + lock_id + '/') 80 | if not os.path.exists(output_file_dir_str): 81 | os.makedirs(output_file_dir_str) 82 | output_file_str = (output_file_dir_str + 83 | 'xodi_' + pinning + '_' + '_'.join(settings) + '.dat') 84 | with open(output_file_str, "a") as outfile: 85 | print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n" 86 | for pr in percentages_reads: 87 | [tc,rts,iw,ir,ncsw] = settings 88 | command = [bin_dir_path + '/' + benchmark_id,tc,pr,rts,iw,ir,ncsw] 89 | print command 90 | if pinning=='no': 91 | outString = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0] 92 | outfile.write(str(pr) + " " + ' '.join(outString.split(" ")[1:])) 93 | else: 94 | max_node_id = (int(tc)-1) / num_of_cpus_per_node 95 | nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])] 96 | outString = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE).communicate()[0] 97 | outfile.write(str(pr) + " " + ' '.join(outString.split(" ")[1:])) 98 | print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n" 99 | 100 | print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n" 101 | 102 | 103 | -------------------------------------------------------------------------------- /src/benchmark/benchmark_lock_XNonCW.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import datetime 6 | import subprocess 7 | import re 8 | 9 | import sys 10 | 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock')) 14 | 15 | from extract_numa_structure import numa_structure 16 | 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure() 18 | 19 | parameters = sys.argv 20 | 21 | parameters.pop(0) 22 | 23 | if len(parameters) < 11: 24 | print """Not enough parameters: 25 | 26 | Look at bin/run_benchmarks_on_intel_i7.py and 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of 28 | parameters. 29 | 30 | """ 31 | sys.exit() 32 | 33 | iterations = int(parameters.pop(0)) 34 | 35 | output_dir_base = parameters.pop(0) 36 | 37 | if output_dir_base=='standard': 38 | output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")) 39 | 40 | benchmark_names = parameters.pop(0).split(',') 41 | 42 | lock_ids = parameters.pop(0).split(',') 43 | 44 | pinning_settings = parameters.pop(0).split(',') 45 | 46 | thread_counts = parameters.pop(0).split(",") 47 | 48 | #* Percentage read 49 | #* Number of seconds to benchmark 50 | #* Iterations spent in write critical section 51 | #* Iterations spent in read critical section 52 | #* Iterations spent in non critical section 53 | 54 | 55 | percentages_reads = parameters.pop(0).split(',') 56 | 57 | run_times_seconds = parameters.pop(0).split(',') 58 | 59 | iterations_wcs = parameters.pop(0).split(',') 60 | 61 | iterations_rcs = parameters.pop(0).split(',') 62 | 63 | iterations_ncs = parameters.pop(0).split(',') 64 | 65 | 66 | for iteration in range(iterations): 67 | print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n" 68 | for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id) 69 | for benchmark_name in benchmark_names 70 | for lock_id in lock_ids]: 71 | for settings in [[tc,pr,rts,iw,ir] 72 | for tc in thread_counts 73 | for pr in percentages_reads 74 | for rts in run_times_seconds 75 | for iw in iterations_wcs 76 | for ir in iterations_rcs]: 77 | for pinning in pinning_settings: 78 | output_file_dir_str = ('bench_results/' + 79 | benchmark_id + '#' + output_dir_base + '#' + lock_id + '/') 80 | if not os.path.exists(output_file_dir_str): 81 | os.makedirs(output_file_dir_str) 82 | output_file_str = (output_file_dir_str + 83 | 'xncw_' + pinning + '_' + '_'.join(settings) + '.dat') 84 | with open(output_file_str, "a") as outfile: 85 | print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n" 86 | for non_cs_work in iterations_ncs: 87 | [tc,pr,rts,iw,ir] = settings 88 | command = [bin_dir_path + '/' + benchmark_id,tc,pr,rts,iw,ir,non_cs_work] 89 | print command 90 | if pinning=='no': 91 | outString = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0] 92 | outfile.write(str(non_cs_work) + " " + ' '.join(outString.split(" ")[1:])) 93 | else: 94 | max_node_id = (int(tc)-1) / num_of_cpus_per_node 95 | nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])] 96 | outString = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE).communicate()[0] 97 | outfile.write(str(non_cs_work) + " " + ' '.join(outString.split(" ")[1:])) 98 | print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n" 99 | 100 | print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n" 101 | -------------------------------------------------------------------------------- /src/lock/cohort_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "cohort_lock.h" 8 | #include "utils/numa_node_info_support.h" 9 | 10 | 11 | __thread CacheLinePaddedInt myLocalNode __attribute__((aligned(64))); 12 | 13 | 14 | static inline 15 | bool nodeHasWaitingThreads(TicketLock * localLock){ 16 | int localLockInCounter; 17 | int localLockOutCounter; 18 | load_acq(localLockInCounter, localLock->inCounter.value); 19 | load_acq(localLockOutCounter, localLock->outCounter.value); 20 | return (localLockInCounter - localLockOutCounter) > 1; 21 | } 22 | 23 | CohortLock * cohortlock_create(void (*writer)(void *, void **)){ 24 | CohortLock * lock = malloc(sizeof(CohortLock)); 25 | cohortlock_initialize(lock, writer); 26 | return lock; 27 | } 28 | 29 | void cohortlock_initialize(CohortLock * lock, void (*writer)(void *, void **)){ 30 | lock->writer = writer; 31 | aticketlock_initialize(&lock->globalLock, writer); 32 | for(int i = 0; i < NUMBER_OF_NUMA_NODES; i++){ 33 | ticketlock_initialize(&lock->localLockData[i].lock, writer); 34 | lock->localLockData[i].numberOfHandOvers.value = 0; 35 | lock->localLockData[i].needToTakeGlobalLock.value = true; 36 | } 37 | //Initialize CPUToNodeMap 38 | int numaStructure[NUMBER_OF_NUMA_NODES][NUMBER_OF_CPUS_PER_NODE] = NUMA_STRUCTURE; 39 | for(char node = 0; node < NUMBER_OF_NUMA_NODES; node++){ 40 | for(int i = 0; i < NUMBER_OF_CPUS_PER_NODE; i++){ 41 | CPUToNodeMap.value[numaStructure[(int)node][i]] = node; 42 | } 43 | } 44 | 45 | __sync_synchronize(); 46 | } 47 | 48 | void cohortlock_free(CohortLock * lock){ 49 | free(lock); 50 | } 51 | 52 | 53 | void cohortlock_register_this_thread(){ 54 | } 55 | 56 | void cohortlock_write(CohortLock *lock, void * writeInfo) { 57 | cohortlock_write_read_lock(lock); 58 | lock->writer(writeInfo, NULL); 59 | cohortlock_write_read_unlock(lock); 60 | } 61 | 62 | 63 | 64 | //Returns true if it is taken over from another writer and false otherwise 65 | bool cohortlock_write_read_lock(CohortLock *lock) { 66 | #ifdef PINNING 67 | NodeLocalLockData * localData = &lock->localLockData[numa_node.value]; 68 | #else 69 | myLocalNode.value = numa_node_id(); 70 | NodeLocalLockData * localData = &lock->localLockData[myLocalNode.value]; 71 | #endif 72 | ticketlock_write_read_lock(&localData->lock); 73 | if(localData->needToTakeGlobalLock.value){ 74 | aticketlock_write_read_lock(&lock->globalLock); 75 | return false; 76 | }else{ 77 | return true; 78 | } 79 | } 80 | 81 | void cohortlock_write_read_unlock(CohortLock * lock) { 82 | #ifdef PINNING 83 | NodeLocalLockData * localData = &lock->localLockData[numa_node.value]; 84 | #else 85 | NodeLocalLockData * localData = &lock->localLockData[myLocalNode.value]; 86 | #endif 87 | if(nodeHasWaitingThreads(&localData->lock) && 88 | (localData->numberOfHandOvers.value < MAXIMUM_NUMBER_OF_HAND_OVERS)){ 89 | localData->needToTakeGlobalLock.value = false; 90 | localData->numberOfHandOvers.value++; 91 | ticketlock_write_read_unlock(&localData->lock); 92 | 93 | }else{ 94 | localData->needToTakeGlobalLock.value = true; 95 | localData->numberOfHandOvers.value = 0; 96 | aticketlock_write_read_unlock(&lock->globalLock); 97 | ticketlock_write_read_unlock(&localData->lock); 98 | } 99 | } 100 | 101 | void cohortlock_read_lock(CohortLock *lock) { 102 | cohortlock_write_read_lock(lock); 103 | } 104 | 105 | void cohortlock_read_unlock(CohortLock *lock) { 106 | cohortlock_write_read_unlock(lock); 107 | } 108 | -------------------------------------------------------------------------------- /src/benchmark/cache_benchmark_lock.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import datetime 6 | import subprocess 7 | import re 8 | 9 | import sys 10 | 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock')) 14 | 15 | from extract_numa_structure import numa_structure 16 | 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure() 18 | 19 | parameters = sys.argv 20 | 21 | parameters.pop(0) 22 | 23 | if len(parameters) < 11: 24 | print """Not enough parameters: 25 | 26 | Look at bin/run_benchmarks_on_intel_i7.py and 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of 28 | parameters. 29 | 30 | """ 31 | sys.exit() 32 | 33 | iterations = int(parameters.pop(0)) 34 | 35 | output_dir_base = parameters.pop(0) 36 | 37 | if output_dir_base=='standard': 38 | output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")) 39 | 40 | benchmark_names = parameters.pop(0).split(',') 41 | 42 | lock_ids = parameters.pop(0).split(',') 43 | 44 | pinning_settings = parameters.pop(0).split(',') 45 | 46 | thread_counts = parameters.pop(0).split(",") 47 | 48 | #* Percentage read 49 | #* Number of seconds to benchmark 50 | #* Iterations spent in write critical section 51 | #* Iterations spent in read critical section 52 | #* Iterations spent in non critical section 53 | 54 | 55 | percentages_reads = parameters.pop(0).split(',') 56 | 57 | run_times_seconds = parameters.pop(0).split(',') 58 | 59 | iterations_wcs = parameters.pop(0).split(',') 60 | 61 | iterations_rcs = parameters.pop(0).split(',') 62 | 63 | iterations_ncs = parameters.pop(0).split(',') 64 | 65 | 66 | for iteration in range(iterations): 67 | print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n" 68 | for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id) 69 | for benchmark_name in benchmark_names 70 | for lock_id in lock_ids]: 71 | for settings in [[pr,rts,iw,ir,incs] 72 | for pr in percentages_reads 73 | for rts in run_times_seconds 74 | for iw in iterations_wcs 75 | for ir in iterations_rcs 76 | for incs in iterations_ncs]: 77 | for pinning in pinning_settings: 78 | output_file_dir_str = ('bench_results/' + 79 | benchmark_id + '#' + output_dir_base + '#' + lock_id + '/') 80 | if not os.path.exists(output_file_dir_str): 81 | os.makedirs(output_file_dir_str) 82 | output_file_str = (output_file_dir_str + 83 | 'b_' + pinning + '_' + '_'.join(settings) + '.dat') 84 | with open(output_file_str, "a") as outfile: 85 | print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n" 86 | for thread_count in thread_counts: 87 | realcmd = [bin_dir_path + '/' + benchmark_id, thread_count] + settings 88 | perfcmd = ['perf', 'stat','-B', '-e', 'r01d1:u,r02d1:u,r04d1:u,r81d0:u'] 89 | command = perfcmd + realcmd 90 | if pinning=='no': 91 | (outString, outErr) = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 92 | else: 93 | max_node_id = (int(thread_count)-1) / num_of_cpus_per_node 94 | nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])] 95 | process = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 96 | cachedata = subprocess.Popen(bin_dir_path + '/' + 'perf_magic', stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(outErr)[0] 97 | outfile.write(outString.rstrip('\n') + cachedata + '\n') 98 | print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n" 99 | 100 | print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n" 101 | -------------------------------------------------------------------------------- /src/benchmark/cache_benchmark_lockXOpDist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import datetime 6 | import subprocess 7 | import re 8 | 9 | import sys 10 | 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock')) 14 | 15 | from extract_numa_structure import numa_structure 16 | 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure() 18 | 19 | parameters = sys.argv 20 | 21 | parameters.pop(0) 22 | 23 | if len(parameters) < 11: 24 | print """Not enough parameters: 25 | 26 | Look at bin/run_benchmarks_on_intel_i7.py and 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of 28 | parameters. 29 | 30 | """ 31 | sys.exit() 32 | 33 | iterations = int(parameters.pop(0)) 34 | 35 | output_dir_base = parameters.pop(0) 36 | 37 | if output_dir_base=='standard': 38 | output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")) 39 | 40 | benchmark_names = parameters.pop(0).split(',') 41 | 42 | lock_ids = parameters.pop(0).split(',') 43 | 44 | pinning_settings = parameters.pop(0).split(',') 45 | 46 | thread_counts = parameters.pop(0).split(",") 47 | 48 | #* Percentage read 49 | #* Number of seconds to benchmark 50 | #* Iterations spent in write critical section 51 | #* Iterations spent in read critical section 52 | #* Iterations spent in non critical section 53 | 54 | 55 | percentages_reads = parameters.pop(0).split(',') 56 | 57 | run_times_seconds = parameters.pop(0).split(',') 58 | 59 | iterations_wcs = parameters.pop(0).split(',') 60 | 61 | iterations_rcs = parameters.pop(0).split(',') 62 | 63 | iterations_ncs = parameters.pop(0).split(',') 64 | 65 | 66 | for iteration in range(iterations): 67 | print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n" 68 | for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id) 69 | for benchmark_name in benchmark_names 70 | for lock_id in lock_ids]: 71 | for settings in [[tc,rts,iw,ir,ncsw] 72 | for tc in thread_counts 73 | for rts in run_times_seconds 74 | for iw in iterations_wcs 75 | for ir in iterations_rcs 76 | for ncsw in iterations_ncs]: 77 | for pinning in pinning_settings: 78 | output_file_dir_str = ('bench_results/' + 79 | benchmark_id + '#' + output_dir_base + '#' + lock_id + '/') 80 | if not os.path.exists(output_file_dir_str): 81 | os.makedirs(output_file_dir_str) 82 | output_file_str = (output_file_dir_str + 83 | 'xodi_' + pinning + '_' + '_'.join(settings) + '.dat') 84 | with open(output_file_str, "a") as outfile: 85 | print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n" 86 | for pr in percentages_reads: 87 | [tc,rts,iw,ir,ncsw] = settings 88 | realcmd = [bin_dir_path + '/' + benchmark_id,tc,pr,rts,iw,ir,ncsw] 89 | perfcmd = ['perf', 'stat','-B', '-e', 'r01d1:u,r02d1:u,r04d1:u,r81d0:u'] 90 | command = perfcmd + realcmd 91 | print command 92 | if pinning=='no': 93 | (outString, outErr) = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 94 | else: 95 | max_node_id = (int(tc)-1) / num_of_cpus_per_node 96 | nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])] 97 | (outString, outErr) = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 98 | cachedata = subprocess.Popen(bin_dir_path + '/' + 'perf_magic', stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(outErr)[0] 99 | outfile.write(str(pr) + " " + ' '.join(outString.split(" ")[1:]).rstrip('\n') + cachedata + '\n') 100 | print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n" 101 | 102 | print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n" 103 | -------------------------------------------------------------------------------- /src/datastructures_bench/PR/gc/intel_defns.h: -------------------------------------------------------------------------------- 1 | #ifndef __INTEL_DEFNS_H__ 2 | #define __INTEL_DEFNS_H__ 3 | 4 | #include 5 | #include 6 | 7 | #ifndef INTEL 8 | #define INTEL 9 | #endif 10 | 11 | #if 0 12 | #define pthread_mutex_init(_m,_i) \ 13 | ({ pthread_mutex_init(_m,_i); (_m)->__m_kind = PTHREAD_MUTEX_ADAPTIVE_NP; }) 14 | #endif 15 | 16 | 17 | /* 18 | * I. Compare-and-swap. 19 | */ 20 | 21 | /* 22 | * This is a strong barrier! Reads cannot be delayed beyond a later store. 23 | * Reads cannot be hoisted beyond a LOCK prefix. Stores always in-order. 24 | */ 25 | #define CAS(_a, _o, _n) \ 26 | ({ __typeof__(_o) __o = _o; \ 27 | __asm__ __volatile__( \ 28 | "lock cmpxchg %3,%1" \ 29 | : "=a" (__o), "=m" (*(volatile unsigned int *)(_a)) \ 30 | : "0" (__o), "r" (_n) ); \ 31 | __o; \ 32 | }) 33 | 34 | #define FAS(_a, _n) \ 35 | ({ __typeof__(_n) __o; \ 36 | __asm__ __volatile__( \ 37 | "lock xchg %0,%1" \ 38 | : "=r" (__o), "=m" (*(volatile unsigned int *)(_a)) \ 39 | : "0" (_n) ); \ 40 | __o; \ 41 | }) 42 | 43 | #define CAS64(_a, _o, _n) \ 44 | ({ __typeof__(_o) __o = _o; \ 45 | __asm__ __volatile__( \ 46 | "movl %3, %%ecx;" \ 47 | "movl %4, %%ebx;" \ 48 | "lock cmpxchg8b %1" \ 49 | : "=A" (__o), "=m" (*(volatile unsigned long long *)(_a)) \ 50 | : "0" (__o), "m" (_n >> 32), "m" (_n) \ 51 | : "ebx", "ecx" ); \ 52 | __o; \ 53 | }) 54 | 55 | /* Update Integer location, return Old value. */ 56 | #define CASIO CAS 57 | #define FASIO FAS 58 | /* Update Pointer location, return Old value. */ 59 | #define CASPO CAS 60 | #define FASPO FAS 61 | /* Update 32/64-bit location, return Old value. */ 62 | #define CAS32O CAS 63 | #define CAS64O CAS64 64 | 65 | /* 66 | * II. Memory barriers. 67 | * WMB(): All preceding write operations must commit before any later writes. 68 | * RMB(): All preceding read operations must commit before any later reads. 69 | * MB(): All preceding memory accesses must commit before any later accesses. 70 | * 71 | * If the compiler does not observe these barriers (but any sane compiler 72 | * will!), then VOLATILE should be defined as 'volatile'. 73 | */ 74 | 75 | #define MB() __sync_synchronize() 76 | #define WMB() __asm__ __volatile__ ("" : : : "memory") 77 | #define RMB() MB() 78 | #define VOLATILE /*volatile*/ 79 | 80 | /* On Intel, CAS is a strong barrier, but not a compile barrier. */ 81 | #define RMB_NEAR_CAS() WMB() 82 | #define WMB_NEAR_CAS() WMB() 83 | #define MB_NEAR_CAS() WMB() 84 | 85 | 86 | /* 87 | * III. Cycle counter access. 88 | */ 89 | 90 | typedef unsigned long long tick_t; 91 | 92 | static inline tick_t __attribute__((always_inline)) 93 | RDTICK() 94 | { tick_t __t; 95 | __asm__ __volatile__("rdtsc\n" 96 | "shl $32,%%rdx\n" 97 | "or %%rdx,%%rax" 98 | : "=a"(__t) 99 | : 100 | : "%rcx", "%rdx"); 101 | return __t; 102 | } 103 | 104 | 105 | 106 | 107 | /* 108 | * IV. Types. 109 | */ 110 | 111 | typedef unsigned char _u8; 112 | typedef unsigned short _u16; 113 | typedef unsigned int _u32; 114 | typedef unsigned long long _u64; 115 | 116 | #endif /* __INTEL_DEFNS_H__ */ 117 | -------------------------------------------------------------------------------- /src/benchmark/cache_benchmark_lock_XNonCW.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import os 5 | import datetime 6 | import subprocess 7 | import re 8 | 9 | import sys 10 | 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock')) 14 | 15 | from extract_numa_structure import numa_structure 16 | 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure() 18 | 19 | parameters = sys.argv 20 | 21 | parameters.pop(0) 22 | 23 | if len(parameters) < 12: 24 | print """Not enough parameters: 25 | 26 | Look at bin/run_benchmarks_on_intel_i7.py and 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of 28 | parameters. 29 | 30 | """ 31 | sys.exit() 32 | 33 | iterations = int(parameters.pop(0)) 34 | 35 | output_dir_base = parameters.pop(0) 36 | 37 | if output_dir_base=='standard': 38 | output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")) 39 | 40 | benchmark_names = parameters.pop(0).split(',') 41 | 42 | lock_ids = parameters.pop(0).split(',') 43 | 44 | pinning_settings = parameters.pop(0).split(',') 45 | 46 | thread_counts = parameters.pop(0).split(",") 47 | 48 | #* Percentage read 49 | #* Number of seconds to benchmark 50 | #* Iterations spent in write critical section 51 | #* Iterations spent in read critical section 52 | #* Iterations spent in non critical section 53 | 54 | 55 | percentages_reads = parameters.pop(0).split(',') 56 | 57 | run_times_seconds = parameters.pop(0).split(',') 58 | 59 | iterations_wcs = parameters.pop(0).split(',') 60 | 61 | iterations_rcs = parameters.pop(0).split(',') 62 | 63 | iterations_ncs = parameters.pop(0).split(',') 64 | 65 | 66 | for iteration in range(iterations): 67 | print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n" 68 | for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id) 69 | for benchmark_name in benchmark_names 70 | for lock_id in lock_ids]: 71 | for settings in [[tc,pr,rts,iw,ir] 72 | for tc in thread_counts 73 | for pr in percentages_reads 74 | for rts in run_times_seconds 75 | for iw in iterations_wcs 76 | for ir in iterations_rcs]: 77 | for pinning in pinning_settings: 78 | output_file_dir_str = ('bench_results/' + 79 | benchmark_id + '#' + output_dir_base + '#' + lock_id + '/') 80 | if not os.path.exists(output_file_dir_str): 81 | os.makedirs(output_file_dir_str) 82 | output_file_str = (output_file_dir_str + 83 | 'xncw_' + pinning + '_' + '_'.join(settings) + '.dat') 84 | with open(output_file_str, "a") as outfile: 85 | print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n" 86 | for non_cs_work in iterations_ncs: 87 | [tc,pr,rts,iw,ir] = settings 88 | realcmd = [bin_dir_path + '/' + benchmark_id,tc,pr,rts,iw,ir,non_cs_work] 89 | perfcmd = ['perf', 'stat','-B', '-e', 'r01d1:u,r02d1:u,r04d1:u,r81d0:u'] 90 | command = perfcmd + realcmd 91 | print command 92 | if pinning=='no': 93 | (outString, outErr) = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 94 | else: 95 | max_node_id = (int(tc)-1) / num_of_cpus_per_node 96 | nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])] 97 | (outString, outErr) = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 98 | cachedata = subprocess.Popen(bin_dir_path + '/' + 'perf_magic', stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(outErr)[0] 99 | outfile.write(str(non_cs_work) + " " + ' '.join(outString.split(" ")[1:]).rstrip('\n') + cachedata + '\n') 100 | print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n" 101 | 102 | print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n" 103 | 104 | -------------------------------------------------------------------------------- /src/lock/tts_rdx_lock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "tts_rdx_lock.h" 7 | #include "utils/smp_utils.h" 8 | #include "utils/thread_identifier.h" 9 | 10 | #define READ_PATIENCE_LIMIT 130000 11 | 12 | TTSRDXLock * ttsalock_create(void (*writer)(void *, void **)){ 13 | TTSRDXLock * lock = malloc(sizeof(TTSRDXLock)); 14 | ttsalock_initialize(lock, writer); 15 | return lock; 16 | } 17 | 18 | void ttsalock_initialize(TTSRDXLock * lock, void (*writer)(void *, void **)){ 19 | lock->writer = writer; 20 | lock->lockWord.value = 0; 21 | NZI_INITIALIZE(&lock->nonZeroIndicator); 22 | omwqueue_initialize(&lock->writeQueue); 23 | __sync_synchronize(); 24 | } 25 | 26 | void ttsalock_free(TTSRDXLock * lock){ 27 | free(lock); 28 | } 29 | 30 | void ttsalock_register_this_thread(){ 31 | assign_id_to_thread(); 32 | } 33 | 34 | static inline 35 | void waitUntilWriteBarrierOff(TTSRDXLock *lock) { 36 | bool writeBarrierOn; 37 | load_acq(writeBarrierOn, lock->writeBarrier.value); 38 | while(writeBarrierOn){ 39 | __sync_synchronize(); 40 | load_acq(writeBarrierOn, lock->writeBarrier.value); 41 | } 42 | } 43 | 44 | void ttsalock_write(TTSRDXLock *lock, void * writeInfo) { 45 | bool currentlylocked; 46 | waitUntilWriteBarrierOff(lock); 47 | while(!omwqueue_offer(&lock->writeQueue, writeInfo)){ 48 | load_acq(currentlylocked, lock->lockWord.value); 49 | if(!currentlylocked){ 50 | currentlylocked = __sync_lock_test_and_set(&lock->lockWord.value, true); 51 | if(!currentlylocked){ 52 | //Was not locked before operation 53 | omwqueue_reset_fully_read(&lock->writeQueue); 54 | NZI_WAIT_UNIL_EMPTY(&lock->nonZeroIndicator); 55 | lock->writer(writeInfo, NULL); 56 | ttsalock_write_read_unlock(lock); 57 | return; 58 | } 59 | } 60 | //A __sync_synchronize(); or a pause instruction 61 | //is probably necessary here to make it perform on 62 | //sandy 63 | } 64 | } 65 | 66 | void ttsalock_write_read_lock(TTSRDXLock *lock) { 67 | bool currentlylocked; 68 | waitUntilWriteBarrierOff(lock); 69 | while(true){ 70 | load_acq(currentlylocked, lock->lockWord.value); 71 | while(currentlylocked){ 72 | load_acq(currentlylocked, lock->lockWord.value); 73 | } 74 | currentlylocked = __sync_lock_test_and_set(&lock->lockWord.value, true); 75 | if(!currentlylocked){ 76 | //Was not locked before operation 77 | omwqueue_reset_fully_read(&lock->writeQueue); 78 | __sync_synchronize();//Flush 79 | NZI_WAIT_UNIL_EMPTY(&lock->nonZeroIndicator); 80 | return; 81 | } 82 | } 83 | } 84 | 85 | void ttsalock_write_read_unlock(TTSRDXLock * lock) { 86 | omwqueue_flush(&lock->writeQueue, lock->writer); 87 | __sync_lock_release(&lock->lockWord.value); 88 | } 89 | 90 | void ttsalock_read_lock(TTSRDXLock *lock) { 91 | bool bRaised = false; 92 | int readPatience = 0; 93 | start: 94 | NZI_ARRIVE(&lock->nonZeroIndicator); 95 | if(lock->lockWord.value){ 96 | NZI_DEPART(&lock->nonZeroIndicator); 97 | while(lock->lockWord.value){ 98 | __sync_synchronize();//Pause (pause instruction might be better) 99 | if((readPatience == READ_PATIENCE_LIMIT) && !bRaised){ 100 | __sync_fetch_and_add(&lock->writeBarrier.value, 1); 101 | bRaised = true; 102 | } 103 | readPatience = readPatience + 1; 104 | } 105 | goto start; 106 | } 107 | if(bRaised){ 108 | __sync_fetch_and_sub(&lock->writeBarrier.value, 1); 109 | } 110 | } 111 | 112 | void ttsalock_read_unlock(TTSRDXLock *lock) { 113 | NZI_DEPART(&lock->nonZeroIndicator); 114 | } 115 | -------------------------------------------------------------------------------- /src/new_rep/tests/test_qd_queue.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "misc/bsd_stdatomic.h"//Until c11 stdatomic.h is available 5 | 6 | #include "qd_queues/qd_queue.h" 7 | #include "tests/test_framework.h" 8 | #include "misc/random.h" 9 | 10 | int test_initialize(){ 11 | 12 | QDQueue test; 13 | qdq_initialize(&test); 14 | return 1; 15 | 16 | } 17 | volatile atomic_ulong counter = ATOMIC_VAR_INIT(0); 18 | void critical_section(unsigned int messageSize, void * message){ 19 | if(messageSize == 0 && message != NULL){ /* Prevent warning */ 20 | atomic_fetch_add(&counter, 1); 21 | }else{ 22 | assert(false); 23 | } 24 | } 25 | int test_enqueue(int nrOfEnqueues){ 26 | QDQueue queue; 27 | qdq_initialize(&queue); 28 | qdq_open(&queue); 29 | for(int i = 0; i < nrOfEnqueues; i++){ 30 | qdq_enqueue(&queue, critical_section, 0, NULL); 31 | } 32 | return 1; 33 | } 34 | 35 | int test_enqueue_and_flush(int nrOfEnqueues){ 36 | atomic_store(&counter, 0); 37 | QDQueue queue; 38 | qdq_initialize(&queue); 39 | qdq_open(&queue); 40 | unsigned long enqueueCounter = 0; 41 | for(int i = 0; i < nrOfEnqueues; i++){ 42 | if(qdq_enqueue(&queue, critical_section, 0, NULL)){ 43 | enqueueCounter = enqueueCounter + 1; 44 | } 45 | } 46 | qdq_flush(&queue); 47 | assert(atomic_load(&counter) == enqueueCounter); 48 | return 1; 49 | } 50 | 51 | void variable_message_size_cs(unsigned int messageSize, void * message){ 52 | unsigned char * messageBytes = (unsigned char *)message; 53 | for(unsigned int i = 0; i < messageSize; i++){ 54 | assert(((unsigned int)messageBytes[i]) == messageSize); 55 | } 56 | atomic_fetch_add(&counter, 1); 57 | } 58 | int test_variable_message_sizes(int nrOfEnqueues){ 59 | atomic_store(&counter, 0); 60 | QDQueue queue; 61 | qdq_initialize(&queue); 62 | qdq_open(&queue); 63 | unsigned int seed = 0; 64 | unsigned long enqueueCounter = 0; 65 | for(int i = 0; i < nrOfEnqueues; i++){ 66 | unsigned int messageSize = (unsigned int)(15.0*random_double(&seed)); 67 | char messageBuffer[messageSize]; 68 | for(unsigned int i = 0; i < messageSize; i++){ 69 | messageBuffer[i] = (unsigned char)messageSize; 70 | } 71 | if(qdq_enqueue(&queue, variable_message_size_cs, messageSize, messageBuffer)){ 72 | enqueueCounter = enqueueCounter + 1; 73 | } 74 | } 75 | qdq_flush(&queue); 76 | assert(atomic_load(&counter) == enqueueCounter); 77 | return 1; 78 | } 79 | 80 | int main(/*int argc, char **argv*/){ 81 | 82 | printf("\n\n\n\033[32m ### STARTING QD QUEUE TESTS! -- \033[m\n\n\n"); 83 | 84 | T(test_initialize(), "test_initialize()"); 85 | 86 | T(test_enqueue(1), "test_enqueue(nrOfEnqueues = 1)"); 87 | T(test_enqueue(15), "test_enqueue(nrOfEnqueues = 2)"); 88 | T(test_enqueue(15), "test_enqueue(nrOfEnqueues = 15)"); 89 | T(test_enqueue(QD_QUEUE_BUFFER_SIZE*2), "test_enqueue(nrOfEnqueues = QD_QUEUE_BUFFER_SIZE*2)"); 90 | 91 | T(test_enqueue_and_flush(1), "test_enqueue_and_flush(nrOfEnqueues = 1)"); 92 | T(test_enqueue_and_flush(2), "test_enqueue_and_flush(nrOfEnqueues = 2)"); 93 | T(test_enqueue_and_flush(15), "test_enqueue_and_flush(nrOfEnqueues = 15)"); 94 | T(test_enqueue_and_flush(QD_QUEUE_BUFFER_SIZE*2), "test_enqueue_and_flush(nrOfEnqueues = QD_QUEUE_BUFFER_SIZE*2)"); 95 | 96 | T(test_variable_message_sizes(1), "test_variable_message_sizes(nrOfEnqueues = 1)"); 97 | T(test_variable_message_sizes(2), "test_variable_message_sizes(nrOfEnqueues = 2)"); 98 | T(test_variable_message_sizes(15), "test_variable_message_sizes(nrOfEnqueues = 15)"); 99 | T(test_variable_message_sizes(QD_QUEUE_BUFFER_SIZE*2), "test_variable_message_sizes(nrOfEnqueues = QD_QUEUE_BUFFER_SIZE*2)"); 100 | 101 | printf("\n\n\n\033[32m ### QD QUEUE COMPLETED! -- \033[m\n\n\n"); 102 | 103 | exit(0); 104 | 105 | } 106 | -------------------------------------------------------------------------------- /src/datastructures/opti_multi_writers_queue.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils/smp_utils.h" 3 | 4 | #ifndef MULTI_WRITES_QUEUE_H 5 | #define MULTI_WRITES_QUEUE_H 6 | 7 | #define MWQ_CAPACITY 4048 8 | typedef void * entry; 9 | typedef struct OptiMWQImpl { 10 | char padd1[64]; 11 | CacheLinePaddedBool closed; 12 | char padd2[64]; 13 | CacheLinePaddedULong elementCount; 14 | entry elements[MWQ_CAPACITY]; 15 | char padd3[64 - ((sizeof(entry)*MWQ_CAPACITY) % 64)]; 16 | } OptiMWQueue; 17 | 18 | 19 | 20 | OptiMWQueue * omwqueue_create(); 21 | OptiMWQueue * omwqueue_initialize(OptiMWQueue * queue); 22 | void omwqueue_free(OptiMWQueue * queue); 23 | static bool omwqueue_offer(OptiMWQueue * queue, entry e); 24 | static void omwqueue_flush(OptiMWQueue * queue, void (*writer)(void *, void **)); 25 | static void omwqueue_reset_fully_read(OptiMWQueue * queue); 26 | 27 | static inline 28 | unsigned long min(unsigned long i1, unsigned long i2){ 29 | return i1 < i2 ? i1 : i2; 30 | } 31 | static inline 32 | bool omwqueue_offer(OptiMWQueue * queue, entry e){ 33 | bool closed; 34 | load_acq(closed, queue->closed.value); 35 | if(!closed){ 36 | int index = __sync_fetch_and_add(&queue->elementCount.value, 1); 37 | if(index < MWQ_CAPACITY){ 38 | store_rel(queue->elements[index], e); 39 | __sync_synchronize();//Flush 40 | return true; 41 | }else{ 42 | store_rel(queue->closed.value, true); 43 | __sync_synchronize();//Flush 44 | return false; 45 | } 46 | }else{ 47 | return false; 48 | } 49 | } 50 | 51 | static inline 52 | void omwqueue_flush(OptiMWQueue * queue, void (*writer)(void *, void **)){ 53 | unsigned long numOfElementsToRead; 54 | unsigned long newNumOfElementsToRead; 55 | unsigned long currentElementIndex = 0; 56 | bool closed = false; 57 | load_acq(numOfElementsToRead, queue->elementCount.value); 58 | if(numOfElementsToRead >= MWQ_CAPACITY){ 59 | closed = true; 60 | numOfElementsToRead = MWQ_CAPACITY; 61 | } 62 | 63 | while(true){ 64 | if(currentElementIndex < numOfElementsToRead){ 65 | //There is definitly an element that we should read 66 | entry theElement; 67 | load_acq(theElement, queue->elements[currentElementIndex]); 68 | while(theElement == NULL) { 69 | __sync_synchronize(); 70 | load_acq(theElement, queue->elements[currentElementIndex]); 71 | } 72 | store_rel(queue->elements[currentElementIndex], NULL); 73 | currentElementIndex = currentElementIndex + 1; 74 | writer(theElement, NULL); 75 | }else if (closed){ 76 | //The queue is closed and there is no more elements that need to be read: 77 | return; 78 | }else{ 79 | //Seems like there are no elements that should be read and the queue is 80 | //not closed. Check again if there are still no more elements that should 81 | //be read before closing the queue 82 | load_acq(newNumOfElementsToRead, queue->elementCount.value); 83 | if(newNumOfElementsToRead == numOfElementsToRead){ 84 | //numOfElementsToRead has not changed. Close the queue. 85 | numOfElementsToRead = 86 | min(get_and_set_ulong(&queue->elementCount.value, MWQ_CAPACITY + 1), 87 | MWQ_CAPACITY); 88 | closed = true; 89 | }else if(newNumOfElementsToRead < MWQ_CAPACITY){ 90 | numOfElementsToRead = newNumOfElementsToRead; 91 | }else{ 92 | closed = true; 93 | numOfElementsToRead = MWQ_CAPACITY; 94 | } 95 | } 96 | } 97 | } 98 | 99 | static inline 100 | void omwqueue_reset_fully_read(OptiMWQueue * queue){ 101 | store_rel(queue->elementCount.value, 0); 102 | store_rel(queue->closed.value, false); 103 | } 104 | #endif 105 | -------------------------------------------------------------------------------- /src/lock/agnostic_fdx_lock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "datastructures/dr_multi_writers_queue.h" 3 | #include "common_lock_constants.h" 4 | #include "utils/support_many_non_zero_indicator_types.h" 5 | #include "utils/support_many_lock_types.h" 6 | 7 | #ifndef AGNOSTIC_FDX_LOCK_H 8 | #define AGNOSTIC_FDX_LOCK_H 9 | 10 | #ifdef LOCK_TYPE_WPRW_MCSLock 11 | //*********************************** 12 | //MCSLock 13 | //*********************************** 14 | #include "mcs_lock.h" 15 | 16 | #define LOCK_DATATYPE_NAME_WPRW MCSLock 17 | 18 | #elif defined (LOCK_TYPE_WPRW_CohortLock) 19 | //*********************************** 20 | //CohortLock 21 | //*********************************** 22 | #include "cohort_lock.h" 23 | 24 | #define LOCK_DATATYPE_NAME_WPRW CohortLock 25 | 26 | #elif defined (LOCK_TYPE_WPRW_TATASLock) 27 | //*********************************** 28 | //TATASLock 29 | //*********************************** 30 | #include "tatas_lock.h" 31 | 32 | #define LOCK_DATATYPE_NAME_WPRW TATASLock 33 | 34 | #else 35 | 36 | #define LOCK_DATATYPE_NAME_WPRW NoLockDatatypeSpecified 37 | 38 | #endif 39 | 40 | struct FlatCombNodeImpl; 41 | 42 | typedef union CacheLinePaddedFlatCombNodePtrImpl { 43 | struct FlatCombNodeImpl * value; 44 | char padding[64]; 45 | } CacheLinePaddedFlatCombNodePtr; 46 | 47 | typedef struct FlatCombNodeImpl { 48 | char pad1[128]; 49 | struct FlatCombNodeImpl * next; 50 | void * data; 51 | void ** responseLocation; 52 | unsigned long last_used; 53 | char pad2[128 - (3 * sizeof(void *) + sizeof(unsigned long)) % 64]; 54 | void (*request)(void *, void **); 55 | char pad3[128 - (sizeof(void *)) % 64]; 56 | CacheLinePaddedBool active; 57 | char pad4[128]; 58 | } FlatCombNode; 59 | 60 | 61 | typedef struct AgnosticFDXLockImpl { 62 | CacheLinePaddedFlatCombNodePtr combineList; 63 | unsigned long combineCount; 64 | char pad1[64 - sizeof(unsigned long) % 64]; 65 | char pad2[64]; 66 | void (*defaultWriter)(void *, void**); 67 | char pad3[64 - sizeof(void * (*)(void*)) % 64]; 68 | char pad4[128]; 69 | LOCK_DATATYPE_NAME_WPRW lock; 70 | char pad5[64]; 71 | } AgnosticFDXLock; 72 | 73 | 74 | 75 | AgnosticFDXLock * afdxlock_create(void (*writer)(void *, void **)); 76 | void afdxlock_free(AgnosticFDXLock * lock); 77 | void afdxlock_initialize(AgnosticFDXLock * lock, void (*writer)(void *, void **)); 78 | void afdxlock_register_this_thread(); 79 | void afdxlock_write(AgnosticFDXLock *lock, void * writeInfo); 80 | void afdxlock_write_with_response(AgnosticFDXLock *lock, 81 | void (*writer)(void *, void **), 82 | void * data, 83 | void ** responseLocation); 84 | void * afdxlock_write_with_response_block(AgnosticFDXLock *lock, 85 | void (*delgateFun)(void *, void **), 86 | void * data); 87 | static void afdxlock_delegate(AgnosticFDXLock *lock, 88 | void (*delgateFun)(void *, void **), 89 | void * data); 90 | void afdxlock_write_read_lock(AgnosticFDXLock *lock); 91 | void afdxlock_write_read_unlock(AgnosticFDXLock * lock); 92 | void afdxlock_read_lock(AgnosticFDXLock *lock); 93 | void afdxlock_read_unlock(AgnosticFDXLock *lock); 94 | 95 | static inline 96 | void afdxlock_delegate(AgnosticFDXLock *lock, void (*delgateFun)(void *, void **), void * data) { 97 | afdxlock_write_with_response(lock, delgateFun, data, NULL); 98 | } 99 | static inline 100 | void activateFCNode(AgnosticFDXLock *lock, FlatCombNode * fcNode){ 101 | fcNode->active.value = true; 102 | FlatCombNode ** pointerToOldValue = &lock->combineList.value; 103 | FlatCombNode * oldValue = ACCESS_ONCE(*pointerToOldValue); 104 | while (true) { 105 | fcNode->next = oldValue; 106 | if (__sync_bool_compare_and_swap(pointerToOldValue, oldValue, fcNode)) 107 | return; 108 | oldValue = ACCESS_ONCE(*pointerToOldValue); 109 | } 110 | } 111 | 112 | #endif 113 | -------------------------------------------------------------------------------- /qd_library/queues/entry_queue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef qd_entry_queue_hpp 2 | #define qd_entry_queue_hpp qd_entry_queue_hpp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | /** 11 | * @brief a buffer-based tantrum queue with fixed-size entries 12 | * @tparam ENTRIES the number of entries 13 | */ 14 | template 15 | class entry_queue { 16 | /** type for the size field for queue entries, loads must not be optimized away in flush */ 17 | typedef std::atomic sizetype; 18 | 19 | /** type for function pointers to be stored in this queue */ 20 | typedef void(*ftype)(char*); 21 | 22 | struct entry_t { 23 | std::atomic fun; 24 | char buf[BUFFER_SIZE]; 25 | }; 26 | void forwardall(long, long) {}; 27 | template 28 | void forwardall(long idx, long offset, P&& p, Ts&&... ts) { 29 | auto ptr = reinterpret_cast(&entry_array[idx].buf[offset]); 30 | new (ptr) P(std::forward

(p)); 31 | forwardall(idx, offset+sizeof(p), std::forward(ts)...); 32 | } 33 | public: 34 | /** constants for current state of the queue */ 35 | enum class status : long { OPEN=0, SUCCESS=0, FULL, CLOSED }; 36 | 37 | entry_queue() : counter(ENTRIES), closed(status::CLOSED) {} 38 | /** opens the queue */ 39 | void open() { 40 | counter.store(0, std::memory_order_relaxed); 41 | closed.store(status::OPEN, std::memory_order_relaxed); 42 | } 43 | 44 | /** 45 | * @brief enqueues an entry 46 | * @tparam P return type of associated function 47 | * @param op wrapper function for associated function 48 | * @return SUCCESS on successful storing in queue, FULL if the queue is full and CLOSED if the queue is closed explicitly 49 | */ 50 | template 51 | status enqueue(void (*op)(char*), Ps*... ps) { 52 | auto current_status = closed.load(std::memory_order_relaxed); 53 | if(current_status != status::OPEN) { 54 | return current_status; 55 | } 56 | /* entry size = size of size + size of wrapper functor + size of promise + size of all parameters*/ 57 | constexpr long size = sumsizes::size; 58 | /* get memory in buffer */ 59 | long index = counter.fetch_add(1, std::memory_order_relaxed); 60 | if(index < ENTRIES) { 61 | static_assert(size <= BUFFER_SIZE, "entry_queue buffer per entry too small."); 62 | /* entry available: move op, p and parameters to buffer, then set size of entry */ 63 | forwardall(index, 0, std::move(*ps)...); 64 | entry_array[index].fun.store(op, std::memory_order_release); 65 | return status::SUCCESS; 66 | } else { 67 | return status::FULL; 68 | } 69 | } 70 | 71 | /** execute all stored operations, leave queue in closed state */ 72 | void flush() { 73 | long todo = 0; 74 | bool open = true; 75 | while(open) { 76 | long done = todo; 77 | todo = counter.load(std::memory_order_relaxed); 78 | if(todo == done) { /* close queue */ 79 | todo = counter.exchange(ENTRIES, std::memory_order_relaxed); 80 | closed.store(status::CLOSED, std::memory_order_relaxed); 81 | open = false; 82 | } 83 | if(todo >= static_cast(ENTRIES)) { /* queue closed */ 84 | todo = ENTRIES; 85 | closed.store(status::CLOSED, std::memory_order_relaxed); 86 | open = false; 87 | } 88 | for(long index = done; index < todo; index++) { 89 | /* synchronization on entry size field: 0 until entry available */ 90 | ftype fun = nullptr; 91 | do { 92 | fun = entry_array[index].fun.load(std::memory_order_acquire); 93 | } while(!fun); 94 | 95 | /* call functor with pointer to promise (of unknown type) */ 96 | fun(&entry_array[index].buf[0]); 97 | 98 | /* cleanup: call destructor of (now empty) functor and clear buffer area */ 99 | // fun->~ftype(); 100 | entry_array[index].fun.store(nullptr, std::memory_order_relaxed); 101 | } 102 | } 103 | } 104 | private: 105 | /** counter for how many entries are already in use */ 106 | std::atomic counter; 107 | char pad[128]; 108 | /** optimization flag: no writes when queue in known-closed state */ 109 | std::atomic closed; 110 | char pad2[128]; 111 | /** the buffer for entries to this queue */ 112 | std::array entry_array; 113 | }; 114 | 115 | #endif /* qd_buffer_queue_hpp */ 116 | --------------------------------------------------------------------------------