├── qd_library
    ├── .gitignore
    ├── threadid.cpp
    ├── util
    │   ├── pause.hpp
    │   └── type_tools.hpp
    ├── locks
    │   ├── tatas_lock.hpp
    │   ├── mutex_lock.hpp
    │   ├── pthreads_lock.hpp
    │   └── waitable_lock.hpp
    ├── readindicator
    │   └── reader_groups.hpp
    ├── waiting_future.hpp
    ├── threadid.hpp
    ├── qd.hpp
    ├── queues
    │   ├── simple_locked_queue.hpp
    │   └── entry_queue.hpp
    ├── qd_condition_variable.hpp
    └── padded.hpp
├── src
    ├── datastructures_bench
    │   ├── PR
    │   │   ├── .gitignore
    │   │   ├── perf_meas
    │   │   ├── gc
    │   │   │   ├── random.h
    │   │   │   ├── ptst.h
    │   │   │   ├── gc.h
    │   │   │   ├── portable_defns.h
    │   │   │   ├── ptst.c
    │   │   │   └── intel_defns.h
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── prioq.h
    │   │   ├── common.c
    │   │   ├── gdb_skiplist_print.py
    │   │   └── common.h
    │   ├── synch_algorithms
    │   │   ├── glue_mcs_starve.cpp
    │   │   ├── glue_qd.cpp
    │   │   ├── glue_qd_starve.cpp
    │   │   ├── glue_qd_cas.cpp
    │   │   ├── glue_tatas.cpp
    │   │   ├── glue_qd_nodetach.cpp
    │   │   ├── glue_hqd.cpp
    │   │   ├── synch_algs_types.h
    │   │   ├── glue_mcs.cpp
    │   │   ├── synch_algs_system.h
    │   │   ├── glue.cpp.static
    │   │   ├── cpplock.cpp
    │   │   ├── glue.cpp
    │   │   ├── cpplock_nodetach.cpp
    │   │   ├── cpplock.h
    │   │   ├── clh.h
    │   │   ├── synch_algs_config.h
    │   │   └── qdlock.c
    │   └── datastructures
    │   │   └── pairingheap
    │   │       ├── dxlocked_pairingheap.h
    │   │       └── test_pairingheap.c
    ├── lock
    │   ├── common_lock_constants.h
    │   ├── rglue_hqd.cpp
    │   ├── rglue_qd.cpp
    │   ├── cpprdx.h
    │   ├── ticket_lock.h
    │   ├── aticket_lock.h
    │   ├── cpprdx.cpp
    │   ├── tts_rdx_lock.h
    │   ├── tatas_lock.h
    │   ├── wprw_lock.h
    │   ├── ticket_lock.c
    │   ├── simple_delayed_writers_lock.h
    │   ├── agnostic_dx_lock.c
    │   ├── all_equal_rdx_lock.h
    │   ├── tatas_lock.c
    │   ├── aticket_lock.c
    │   ├── mcs_lock.h
    │   ├── flat_comb_rdx_lock.h
    │   ├── rcpp_lock.cpp
    │   ├── agnostic_rdx_lock.h
    │   ├── cohort_lock.h
    │   ├── extract_numa_structure.py
    │   ├── mcs_lock.c
    │   ├── rhqd_lock.c
    │   ├── rcpp_lock.h
    │   ├── wprw_lock.c
    │   ├── cohort_lock.c
    │   ├── tts_rdx_lock.c
    │   └── agnostic_fdx_lock.h
    ├── utils
    │   ├── numa_node_info_support.c
    │   ├── thread_identifier.h
    │   ├── thread_identifier.c
    │   ├── numa_node_info_support.h
    │   ├── support_many_non_zero_indicator_types.h
    │   └── smp_utils.h
    ├── datastructures
    │   ├── dr_multi_writers_queue.c
    │   ├── numa_ingress_egress_nzi.c
    │   ├── multi_writers_queue.c
    │   ├── opti_multi_writers_queue.c
    │   ├── reader_groups_nzi.h
    │   ├── numa_ingress_egress_nzi.h
    │   ├── padded.hpp
    │   └── opti_multi_writers_queue.h
    ├── new_rep
    │   ├── misc
    │   │   ├── debug.h
    │   │   ├── random.h
    │   │   ├── thread_includes.h
    │   │   └── padded_types.h
    │   ├── tests
    │   │   ├── test_framework.h
    │   │   └── test_qd_queue.c
    │   └── locks
    │   │   ├── qd_lock.h
    │   │   ├── tatas_lock.h
    │   │   └── locks.h
    ├── benchmark
    │   ├── skiplist
    │   │   ├── skiplist.h
    │   │   └── kvset.h
    │   ├── run_benchmarks_on_intel_i7.py
    │   ├── run_benchmarks_on_amd_fx_6100.py
    │   ├── run_benchmarks_on_sandy.py
    │   ├── compare_benchmarks.py
    │   ├── produce_graphs_template.py
    │   ├── pairingheap
    │   │   ├── dxlocked_pairingheap.h
    │   │   └── test_pairingheap.c
    │   ├── perf_magic
    │   ├── perf_magic_simple
    │   ├── benchmark_lock.py
    │   ├── cache_benchmark_lock_simple.py
    │   ├── benchmark_lockXOpDist.py
    │   ├── benchmark_lock_XNonCW.py
    │   ├── cache_benchmark_lock.py
    │   ├── cache_benchmark_lockXOpDist.py
    │   └── cache_benchmark_lock_XNonCW.py
    ├── tests
    │   ├── test_framework.h
    │   └── test_multi_writers_queue.c
    └── profile
    │   └── profile_perf.py
├── .gitignore
└── SConstruct


/qd_library/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/.gitignore:
--------------------------------------------------------------------------------
1 | *.dat
2 | *.o


--------------------------------------------------------------------------------
/src/lock/common_lock_constants.h:
--------------------------------------------------------------------------------
1 | #ifndef COMMON_LOCK_CONSTANTS_H
2 | #define COMMON_LOCK_CONSTANTS_H
3 | 
4 | #endif
5 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/perf_meas:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/parapluu/lock_benchmarking/HEAD/src/datastructures_bench/PR/perf_meas


--------------------------------------------------------------------------------
/src/utils/numa_node_info_support.c:
--------------------------------------------------------------------------------
1 | #include "numa_node_info_support.h"
2 | 
3 | CPUToNodeMapWrapper CPUToNodeMap __attribute__((aligned(64)));
4 | 


--------------------------------------------------------------------------------
/src/datastructures/dr_multi_writers_queue.c:
--------------------------------------------------------------------------------
1 | #include <stdlib.h>
2 | 
3 | #include "dr_multi_writers_queue.h"
4 | #include "utils/smp_utils.h"
5 | 
6 | 


--------------------------------------------------------------------------------
/src/datastructures/numa_ingress_egress_nzi.c:
--------------------------------------------------------------------------------
1 | #include "numa_ingress_egress_nzi.h"
2 | 
3 | __thread CacheLinePaddedInt myIngressEgressArriveNumaNode __attribute__((aligned(64)));
4 | 


--------------------------------------------------------------------------------
/qd_library/threadid.cpp:
--------------------------------------------------------------------------------
1 | #include "threadid.hpp"
2 | 
3 | unsigned long thread_id_store::max_id = 0;
4 | std::set<unsigned long> thread_id_store::orphans;
5 | std::mutex thread_id_store::mutex;
6 | 
7 | thread_local thread_id_t thread_id;
8 | 


--------------------------------------------------------------------------------
/src/new_rep/misc/debug.h:
--------------------------------------------------------------------------------
1 |             for(int i = index; i < messageEndOffset; i++){
2 |                 printf("%02x", ((unsigned char)(q->buffer[i])) );
3 |             //printf("%2X", ((unsigned char *) q->buffer)[i] );
4 |             }
5 | 


--------------------------------------------------------------------------------
/src/utils/thread_identifier.h:
--------------------------------------------------------------------------------
 1 | #ifndef THREAD_IDENTIFIER_H
 2 | #define THREAD_IDENTIFIER_H
 3 | 
 4 | #include "smp_utils.h"
 5 | 
 6 | extern __thread CacheLinePaddedInt myId;
 7 | extern int myIdCounter;
 8 | 
 9 | void assign_id_to_thread();
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/src/new_rep/misc/random.h:
--------------------------------------------------------------------------------
 1 | #ifndef RANDOM_H
 2 | #define RANDOM_H
 3 | 
 4 | #include "stdlib.h"
 5 | 
 6 | 
 7 | double random_double(unsigned int *seed_ptr){
 8 |     double randomDouble = (double)rand_r(seed_ptr);
 9 |     return randomDouble/RAND_MAX;
10 | }
11 | 
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/src/utils/thread_identifier.c:
--------------------------------------------------------------------------------
1 | #include "thread_identifier.h"
2 | 
3 | __thread CacheLinePaddedInt myId __attribute__((aligned(128)));
4 | int myIdCounter __attribute__((aligned(128))) = 0;
5 | 
6 | void assign_id_to_thread(){
7 |     myId.value = __sync_fetch_and_add(&myIdCounter, 1);
8 | }
9 | 


--------------------------------------------------------------------------------
/qd_library/util/pause.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_pause_hpp
 2 | #define qd_pause_hpp qd_pause_hpp
 3 | 
 4 | namespace qd {
 5 | 
 6 | static inline void pause() {
 7 | 	//__sync_synchronize();
 8 | 	__asm__ __volatile__("pause");
 9 | //	std::this_thread::yield();
10 | }
11 | 
12 | } /* namespace qd */
13 | 
14 | #endif /* qd_pause_hpp */
15 | 


--------------------------------------------------------------------------------
/src/new_rep/misc/thread_includes.h:
--------------------------------------------------------------------------------
 1 | #ifndef THREAD_INCLUDES_H
 2 | #define THREAD_INCLUDES_H
 3 | 
 4 | #include <time.h>
 5 | #include <stdlib.h>
 6 | #include <pthread.h>//Until c11 threads.h is available
 7 | #include <time.h>
 8 | #include <sched.h>
 9 | 
10 | static inline void thread_yield(){
11 |     sched_yield();
12 | }
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/src/datastructures/multi_writers_queue.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | #include "multi_writers_queue.h"
 4 | #include "utils/smp_utils.h"
 5 | 
 6 | MWQueue * mwqueue_create(){
 7 |     MWQueue * queue = malloc(sizeof(MWQueue));
 8 |     return mwqueue_initialize(queue);
 9 | }
10 | 
11 | 
12 | void mwqueue_free(MWQueue * queue){
13 |     free(queue);
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue_mcs_starve.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using intlock = mcs_lock;
 4 | using locktype = qdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, starvation_policy_t::may_starve>;
 5 | 
 6 | extern "C" {
 7 | #include "cpplock.h"
 8 | #include "cpplock.cpp"
 9 | } // extern "C"
10 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue_qd.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using intlock = mcs_futex_lock;
 4 | using locktype = qdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, starvation_policy_t::starvation_free>;
 5 | 
 6 | extern "C" {
 7 | #include "cpplock.h"
 8 | #include "cpplock.cpp"
 9 | } // extern "C"
10 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue_qd_starve.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using intlock = mcs_futex_lock;
 4 | using locktype = qdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, starvation_policy_t::may_starve>;
 5 | 
 6 | extern "C" {
 7 | #include "cpplock.h"
 8 | #include "cpplock.cpp"
 9 | } // extern "C"
10 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue_qd_cas.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using intlock = mcs_futex_lock;
 4 | using locktype = qdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_compare_and_swap>, starvation_policy_t::starvation_free>;
 5 | 
 6 | extern "C" {
 7 | #include "cpplock.h"
 8 | #include "cpplock.cpp"
 9 | } // extern "C"
10 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue_tatas.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using intlock = extended_lock<tatas_lock>;
 4 | using locktype = qdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, starvation_policy_t::may_starve>;
 5 | 
 6 | extern "C" {
 7 | #include "cpplock.h"
 8 | #include "cpplock.cpp"
 9 | } // extern "C"
10 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue_qd_nodetach.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using intlock = mcs_futex_lock;
 4 | using locktype = qdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, starvation_policy_t::starvation_free>;
 5 | 
 6 | extern "C" {
 7 | #include "cpplock.h"
 8 | #include "cpplock_nodetach.cpp"
 9 | } // extern "C"
10 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue_hqd.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using intlock = mcs_lock;
 4 | using locktype = hqdlock_impl<intlock, intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, pinning_policy_t::pinned_threads, starvation_policy_t::starvation_free>;
 5 | 
 6 | extern "C" {
 7 | #include "cpplock.h"
 8 | #include "cpplock.cpp"
 9 | } // extern "C"
10 | 


--------------------------------------------------------------------------------
/src/benchmark/skiplist/skiplist.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SKIPLIST_H__
 2 | #define __SKIPLIST_H__
 3 | 
 4 | #include "kvset.h"
 5 | #include "stdlib.h"
 6 | 
 7 | KVSet * new_skiplist(int (*compare_function)(void *, void *), 
 8 |                      void (*free_function)(void *),
 9 |                      void *(*malloc_function)(size_t),
10 |                      unsigned int key_offset); 
11 | 
12 | KVSet * new_skiplist_default(void);
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *~
 3 | *.png
 4 | *.dat
 5 | *.pyc
 6 | \#*#
 7 | test_multi_writers_queue
 8 | rw_bench_clone_aer
 9 | rw_bench_clone_sdw
10 | rw_bench_clone_mcs
11 | rw_bench_clone_drmcs
12 | rw_bench_clone_aticket
13 | rw_bench_clone_ticket
14 | rw_bench_clone_cohort
15 | rw_bench_clone_wprwcohort
16 | test_aer
17 | test_aticket
18 | test_cohort
19 | test_drmcs
20 | test_mcs
21 | test_sdw
22 | test_ticket
23 | test_wprwcohort
24 | .sconsign.dblite
25 | bin/
26 | bin_debug/
27 | TAGS
28 | core
29 | bench_results
30 | bin_profile
31 | perf_data
32 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/gc/random.h:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * random.h
 3 |  * 
 4 |  * A really simple random-number generator. Crappy linear congruential
 5 |  * taken from glibc, but has at least a 2^32 period.
 6 |  */
 7 | 
 8 | #ifndef __RANDOM_H__
 9 | #define __RANDOM_H__
10 | 
11 | typedef unsigned long rand_t;
12 | 
13 | #define rand_init(_ptst) \
14 |     ((_ptst)->rand = RDTICK())
15 | 
16 | #define rand_next(_ptst) \
17 |     ((_ptst)->rand = ((_ptst)->rand * 1103515245) + 12345)
18 | 
19 | #endif /* __RANDOM_H__ */
20 | 


--------------------------------------------------------------------------------
/src/utils/numa_node_info_support.h:
--------------------------------------------------------------------------------
 1 | #ifndef NUMA_NODE_INFO_SUPPORT_H
 2 | #define NUMA_NODE_INFO_SUPPORT_H
 3 | 
 4 | #include <sched.h>
 5 | #include "smp_utils.h"
 6 | 
 7 | typedef union CPUToNodeMapWrapperImpl {
 8 |     char padding[64];
 9 |     char value[NUMBER_OF_NUMA_NODES * NUMBER_OF_CPUS_PER_NODE];
10 |     char pad[64 - ((sizeof(char) * NUMBER_OF_NUMA_NODES * NUMBER_OF_CPUS_PER_NODE) % 64)];
11 | } CPUToNodeMapWrapper;
12 | 
13 | extern CPUToNodeMapWrapper CPUToNodeMap __attribute__((aligned(64)));
14 | 
15 | static inline
16 | int numa_node_id(){
17 |     return CPUToNodeMap.value[sched_getcpu()];
18 | }
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/tests/test_framework.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <assert.h>
 3 | 
 4 | #ifndef __TEST_FRAMEWORK_H__
 5 | #define __TEST_FRAMEWORK_H__
 6 | 
 7 | 
 8 | #define TO_VP(intValue) (void *)(intValue)
 9 | 
10 | #define T(testFunCall, tesName)                 \
11 |     printf("STARTING TEST: ");                  \
12 |     test(testFunCall, tesName);
13 | 
14 | void test(int success, char msg[]){
15 | 
16 |     if(success){
17 |         printf("\033[32m -- SUCCESS! -- \033[m");
18 |     }else{
19 |         printf("\033[31m -- FAIL! -- \033[m");
20 |     }
21 | 
22 |     printf("TEST: %s\n", msg);
23 | 
24 | }
25 | 
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/src/new_rep/tests/test_framework.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <assert.h>
 3 | 
 4 | #ifndef __TEST_FRAMEWORK_H__
 5 | #define __TEST_FRAMEWORK_H__
 6 | 
 7 | 
 8 | #define TO_VP(intValue) (void *)(intValue)
 9 | 
10 | #define T(testFunCall, tesName)                 \
11 |     printf("STARTING TEST: ");                  \
12 |     test(testFunCall, tesName);
13 | 
14 | void test(int success, char msg[]){
15 | 
16 |     if(success){
17 |         printf("\033[32m -- SUCCESS! -- \033[m");
18 |     }else{
19 |         printf("\033[31m -- FAIL! -- \033[m");
20 |     }
21 | 
22 |     printf("TEST: %s\n", msg);
23 | 
24 | }
25 | 
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/synch_algs_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TYPES_H_
 2 | #define _TYPES_H_
 3 | 
 4 | #include "synch_algs_system.h"
 5 | 
 6 | typedef union int_aligned32_t {
 7 |    int32_t v CACHE_ALIGN;
 8 |    char pad[CACHE_LINE_SIZE];
 9 | }  int_aligned32_t;
10 | 
11 | typedef union int_aligned64_t {
12 |    int64_t v CACHE_ALIGN;
13 |    char pad[CACHE_LINE_SIZE];
14 | }  int_aligned64_t;
15 | 
16 | #define null                           NULL
17 | #include <stdbool.h>
18 | //#define bool                           int32_t
19 | #define true                           1
20 | #define false                          0
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/datastructures/opti_multi_writers_queue.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | #include "opti_multi_writers_queue.h"
 4 | #include "utils/smp_utils.h"
 5 | 
 6 | 
 7 | OptiMWQueue * omwqueue_create(){
 8 |     OptiMWQueue * queue = malloc(sizeof(OptiMWQueue));
 9 |     return omwqueue_initialize(queue);
10 | }
11 | 
12 | OptiMWQueue * omwqueue_initialize(OptiMWQueue * queue){
13 |     for(int i = 0; i < MWQ_CAPACITY; i++){
14 |         queue->elements[i] = NULL;
15 |     }
16 |     queue->elementCount.value = MWQ_CAPACITY;
17 |     queue->closed.value = true;
18 |     __sync_synchronize();
19 |     return queue;
20 | }
21 | 
22 | void omwqueue_free(OptiMWQueue * queue){
23 |     free(queue);
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/qd_library/util/type_tools.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_type_tools_hpp
 2 | #define qd_type_tools_hpp qd_type_tools_hpp
 3 | 
 4 | template<typename... Ps>
 5 | struct sumsizes;
 6 | template<typename T, typename... Ps>
 7 | struct sumsizes<T, Ps...> {
 8 | 	static constexpr long size = sizeof(T) + sumsizes<Ps...>::size;
 9 | };
10 | template<>
11 | struct sumsizes<> {
12 | 	static constexpr long size = 0;
13 | };
14 | 
15 | /* structure to create lists of types */
16 | template<typename... Ts>
17 | class types;
18 | template<typename T, typename... Ts>
19 | class types<T, Ts...> {
20 | 	public:
21 | 		typedef T type;
22 | 		typedef types<Ts...> tail;
23 | };
24 | template<>
25 | class types<> {};
26 | 
27 | #endif /* qd_type_tools_hpp */
28 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/Makefile:
--------------------------------------------------------------------------------
 1 | CC	:= gcc 
 2 | CFLAGS	:= -O3 -DINTEL -Wall -std=c99
 3 | LDFLAGS	:= -lpthread `pkg-config --libs gsl`
 4 | 
 5 | OS	:= $(shell uname -s)
 6 |     ifeq ($(OS),Linux)
 7 | 	CFLAGS  += -DCACHE_LINE_SIZE=`getconf LEVEL1_DCACHE_LINESIZE`
 8 |         LDFLAGS += -lrt
 9 |     endif
10 |     ifeq ($(OS),Darwin)
11 | 	CFLAGS += -DCACHE_LINE_SIZE=`sysctl -n hw.cachelinesize`
12 |     endif
13 | 
14 | VPATH	:= gc
15 | DEPS	+= Makefile $(wildcard *.h) $(wildcard gc/*.h)
16 | TARGETS := perf_meas
17 | 
18 | all: 	$(TARGETS)
19 | 
20 | clean:
21 | 	rm -f $(TARGETS) core *.o 
22 | 
23 | %.o: %.c $(DEPS)
24 | 	$(CC) $(CFLAGS) -c -o $@ $<
25 | 
26 | $(TARGETS): %: %.o ptst.o gc.o prioq.o common.o
27 | 	$(CC) -o $@ $^ $(LDFLAGS)
28 | 
29 | 
30 | .PHONY: all clean
31 | 


--------------------------------------------------------------------------------
/src/lock/rglue_hqd.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | #include "threadid.cpp"
 3 | 
 4 | using intlock = mcs_lock;
 5 | //using locktype = mrqdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, reader_groups<64>, 65536, starvation_policy_t::starvation_free>;
 6 | //using locktype = mrqdlock_impl<intlock, dual_buffer_queue<4096, 24, atomic_instruction_policy_t::use_fetch_and_add>, reader_groups<64>, 65536, starvation_policy_t::may_starve>;
 7 | using locktype = mrhqdlock_impl<intlock, intlock, dual_buffer_queue<4096, 24, atomic_instruction_policy_t::use_fetch_and_add>, reader_groups<64>, 65536, pinning_policy_t::pinned_threads, starvation_policy_t::may_starve>;
 8 | 
 9 | extern "C" {
10 | #include "rcpp_lock.h"
11 | #include "rcpp_lock.cpp"
12 | } // extern "C"
13 | 


--------------------------------------------------------------------------------
/src/lock/rglue_qd.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | #include "threadid.cpp"
 3 | 
 4 | //using intlock = extended_lock<tatas_lock>;
 5 | using intlock = mcs_futex_lock;
 6 | //using locktype = mrqdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, reader_groups<64>, 65536, starvation_policy_t::starvation_free>;
 7 | //using locktype = mrqdlock_impl<intlock, dual_buffer_queue<4096, 24, atomic_instruction_policy_t::use_fetch_and_add>, reader_groups<64>, 65536, starvation_policy_t::may_starve>;
 8 | using locktype = mrqdlock_impl<intlock, dual_buffer_queue<4096, 24, atomic_instruction_policy_t::use_fetch_and_add>, reader_groups<64>, 65536, starvation_policy_t::starvation_free>;
 9 | 
10 | extern "C" {
11 | #include "rcpp_lock.h"
12 | #include "rcpp_lock.cpp"
13 | } // extern "C"
14 | 


--------------------------------------------------------------------------------
/src/lock/cpprdx.h:
--------------------------------------------------------------------------------
 1 | #ifndef cpprdx_h
 2 | #define cpprdx_h cpprdx_h
 3 | 
 4 | #ifdef __cplusplus
 5 | #include "rdx.hpp"
 6 | extern "C" {
 7 | #endif
 8 | 
 9 | typedef struct CPPRDXLockImpl {
10 | 	void (*writer)(void *);
11 | #ifdef __cplusplus
12 | 	RDX_Lock lock;
13 | #else
14 | 	char lock[16000];
15 | #endif
16 | } CPPRDXLock;
17 | CPPRDXLock* cpprdx_create(void (*writer)(void *));
18 | void cpprdx_free(CPPRDXLock* lock);
19 | void cpprdx_initialize(CPPRDXLock* lock, void (*writer)(void *));
20 | void cpprdx_register_this_thread();
21 | void cpprdx_write(CPPRDXLock* lock, void* writeInfo);
22 | void cpprdx_write_read_lock(CPPRDXLock* lock);
23 | void cpprdx_write_read_unlock(CPPRDXLock* lock);
24 | void cpprdx_read_lock(CPPRDXLock* lock);
25 | void cpprdx_read_unlock(CPPRDXLock* lock);
26 | 
27 | #ifdef __cplusplus
28 | }
29 | #endif
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue_mcs.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using intlock = mcs_lock;
 4 | //using locktype = qdlock_impl<intlock, buffer_queue<262139>>;
 5 | //using locktype = qdlock_impl<intlock, buffer_queue<4096*40>>;
 6 | //using locktype = qdlock_impl<intlock, dual_buffer_queue<12288, 8>>;
 7 | //using locktype = qdlock_impl<intlock, dual_buffer_queue<2048, 64>>;
 8 | 
 9 | //using locktype = qdlock_impl<intlock, dual_buffer_queue<4096, 32>>;
10 | using locktype = qdlock_impl<intlock, dual_buffer_queue<6144, 16, atomic_instruction_policy_t::use_fetch_and_add>, starvation_policy_t::starvation_free>;
11 | 
12 | 
13 | //using locktype = qdlock_impl<intlock, dual_buffer_queue<24576, 4>>;
14 | //using locktype = qdlock_impl<intlock, entry_queue<4096,96>>;
15 | 
16 | extern "C" {
17 | #include "cpplock.h"
18 | 
19 | #include "cpplock.cpp"
20 | 
21 | } // extern "C"
22 | 


--------------------------------------------------------------------------------
/qd_library/locks/tatas_lock.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_tatas_lock_hpp
 2 | #define qd_tatas_lock_hpp qd_tatas_lock_hpp
 3 | 
 4 | #include<atomic>
 5 | 
 6 | #include "util/pause.hpp"
 7 | 
 8 | /** @brief a test-and-test-and-set lock */
 9 | class tatas_lock {
10 | 	std::atomic<bool> locked; /* TODO can std::atomic_flag be used? */
11 | 	public:
12 | 		tatas_lock() : locked(false) {};
13 | 		tatas_lock(tatas_lock&) = delete; /* TODO? */
14 | 		bool try_lock() {
15 | 			if(is_locked()) return false;
16 | 			return !locked.exchange(true, std::memory_order_acq_rel);
17 | 		}
18 | 		void unlock() {
19 | 			locked.store(false, std::memory_order_release);
20 | 		}
21 | 		bool is_locked() {
22 | 			return locked.load(std::memory_order_acquire);
23 | 		}
24 | 		void lock() {
25 | 			while(!try_lock()) {
26 | 				qd::pause();
27 | 			}
28 | 		}
29 | 		void wake() {}
30 | };
31 | 
32 | #endif /* qd_tatas_lock_hpp */
33 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/synch_algs_system.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SYSTEM_H_
 2 | #define _SYSTEM_H_
 3 | 
 4 | #ifndef CACHE_LINE_SIZE
 5 | #    define CACHE_LINE_SIZE            64
 6 | #endif
 7 | 
 8 | #ifdef __GNUC__
 9 | #    define CACHE_ALIGN                __attribute__ ((aligned (CACHE_LINE_SIZE)))
10 | #    define VAR_ALIGN                  __attribute__ ((aligned (16)))
11 | #elif defined(MSVC)
12 | #    define CACHE_ALIGN                __declspec(align(CACHE_LINE_SIZE)) 
13 | #    define VAR_ALIGN                  __declspec(align(16)) 
14 | #else
15 | #    define CACHE_ALIGN
16 | #endif
17 | 
18 | 
19 | #define PAD_CACHE(A)                  ((CACHE_LINE_SIZE - (A % CACHE_LINE_SIZE))/sizeof(int32_t))
20 | 
21 | 
22 | #ifndef USE_CPUS
23 | #    if defined(linux)
24 | #        define USE_CPUS               sysconf(_SC_NPROCESSORS_ONLN)
25 | #    else
26 | #        define USE_CPUS               1
27 | #    endif
28 | #endif
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/qd_library/readindicator/reader_groups.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_reader_groups_hpp
 2 | #define qd_reader_groups_hpp qd_reader_groups_hpp
 3 | 
 4 | #include "threadid.hpp"
 5 | 
 6 | template<int GROUPS>
 7 | class reader_groups {
 8 | 	struct alignas(64) counter_t {
 9 | 		char pad1[64];
10 | 		std::atomic<int> cnt;
11 | 		char pad2[64];
12 | 		counter_t() : cnt(0) {}
13 | 	};
14 | 	std::array<counter_t, GROUPS> counters;
15 | 	public:
16 | 		reader_groups() {
17 | 			for(int i = 0; i < GROUPS; i++) {
18 | 				counters[i].cnt.store(0, std::memory_order_release);
19 | 			}
20 | 		}
21 | 		bool query() {
22 | 			for(counter_t& counter : counters)
23 | 				if(counter.cnt.load(std::memory_order_acquire) > 0) return true;
24 | 			return false;
25 | 		}
26 | 		void arrive() {
27 | 			counters[thread_id % GROUPS].cnt.fetch_add(1, std::memory_order_release);
28 | 		}
29 | 		void depart() {
30 | 			counters[thread_id % GROUPS].cnt.fetch_sub(1, std::memory_order_release);
31 | 		}
32 | };
33 | 
34 | #endif /* qd_reader_groups_hpp */
35 | 


--------------------------------------------------------------------------------
/src/lock/ticket_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "utils/smp_utils.h"
 3 | #include "common_lock_constants.h"
 4 | #include "mcs_lock.h"
 5 | 
 6 | 
 7 | #ifndef TICKET_LOCK_H
 8 | #define TICKET_LOCK_H
 9 | 
10 | typedef struct TicketLockImpl {
11 |     char pad1[64];
12 |     void (*writer)(void *, void **);
13 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
14 |     CacheLinePaddedInt inCounter;
15 |     CacheLinePaddedInt outCounter;
16 | } TicketLock;
17 | 
18 | 
19 | TicketLock * ticketlock_create(void (*writer)(void *, void **));
20 | void ticketlock_free(TicketLock * lock);
21 | void ticketlock_initialize(TicketLock * lock, void (*writer)(void *, void **));
22 | void ticketlock_register_this_thread();
23 | void ticketlock_write(TicketLock *lock, void * writeInfo);
24 | void ticketlock_write_read_lock(TicketLock *lock);
25 | void ticketlock_write_read_unlock(TicketLock * lock);
26 | void ticketlock_read_lock(TicketLock *lock);
27 | void ticketlock_read_unlock(TicketLock *lock);
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/qd_library/waiting_future.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef waiting_future_hpp
 2 | #define waiting_future_hpp waiting_future_hpp
 3 | 
 4 | #include<future>
 5 | 
 6 | template<typename T>
 7 | class waiting_future : public std::future<T> {
 8 | 	public:
 9 | 		waiting_future() {}
10 | 		waiting_future(waiting_future& rhs) : std::future<T>(rhs) {}
11 | 		waiting_future(waiting_future&& rhs) : std::future<T>(std::move(rhs)) {}
12 | 		waiting_future(std::future<T>& rhs) : std::future<T>(rhs) {}
13 | 		waiting_future(std::future<T>&& rhs) : std::future<T>(std::move(rhs)) {}
14 | 		~waiting_future() {
15 | 			if(this->valid()) {
16 | 				this->wait();
17 | 			}
18 | 		}
19 | 		waiting_future& operator=(waiting_future& rhs) {
20 | 			std::future<T>::operator=(rhs);
21 | 			return *this;
22 | 		}
23 | 		waiting_future& operator=(waiting_future&& rhs) {
24 | 			std::future<T>::operator=(std::move(rhs));
25 | 			return *this;
26 | 		}
27 | 		void discard() {
28 | 			std::future<T> tmp;
29 | 			std::swap(tmp, *this);
30 | 		}
31 | };
32 | 
33 | #endif // waiting_future_hpp
34 | 


--------------------------------------------------------------------------------
/src/benchmark/skiplist/kvset.h:
--------------------------------------------------------------------------------
 1 | #ifndef __KVSET_H__
 2 | #define __KVSET_H__
 3 | 
 4 | struct kv_set;
 5 | 
 6 | typedef struct kv_set_functions
 7 | {   
 8 |     void (*delete_table)(struct kv_set * kv_set,
 9 |                          void (*element_free_function)(void *context, void* element),
10 |                          void * context);
11 |     void * (*put)(struct kv_set * kv_set, void * key_value);
12 |     int (*put_new)(struct kv_set * kv_set, void * key_value);
13 |     void * (*remove)(struct kv_set * kv_set, void * key);
14 |     void * (*lookup)(struct kv_set * kv_set, void * key);
15 |     int (*member)(struct kv_set * kv_set, void * key);
16 |     void * (*first)(struct kv_set * kv_set);
17 |     void * (*last)(struct kv_set * kv_set);    
18 |     void * (*next)(struct kv_set * kv_set, void * key);
19 |     void * (*previous)(struct kv_set * kv_set, void * key);
20 | } KVSetFunctions;
21 | 
22 | 
23 | typedef struct kv_set
24 | {
25 |     KVSetFunctions funs;
26 |     unsigned int key_offset;
27 |     void * type_specific_data;
28 | } KVSet;
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/src/new_rep/misc/padded_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef PADDED_TYPES_H
 2 | #define PADDED_TYPES_H
 3 | 
 4 | #include "misc/bsd_stdatomic.h"//Until c11 stdatoic.h is available
 5 | 
 6 | #define CACHE_LINE_SIZE 64
 7 | 
 8 | typedef union {
 9 |     volatile atomic_flag value;
10 |     char padding[CACHE_LINE_SIZE];
11 | } LLPaddedFlag;
12 | 
13 | typedef union {
14 |     volatile atomic_bool value;
15 |     char padding[CACHE_LINE_SIZE];
16 | } LLPaddedBool;
17 | 
18 | typedef union {
19 |     volatile atomic_int value;
20 |     char padding[CACHE_LINE_SIZE];
21 | } LLPaddedInt;
22 | 
23 | typedef union {
24 |     volatile atomic_uint value;
25 |     char padding[CACHE_LINE_SIZE];
26 | } LLPaddedUInt;
27 | 
28 | typedef union {
29 |     volatile atomic_ulong value;
30 |     char padding[CACHE_LINE_SIZE];
31 | } LLPaddedULong;
32 | 
33 | typedef union {
34 |     volatile atomic_intptr_t value;
35 |     char padding[CACHE_LINE_SIZE];
36 | } LLPaddedPointer;
37 | 
38 | typedef union {
39 |     volatile double value;
40 |     char padding[CACHE_LINE_SIZE];
41 | } LLPaddedDouble;
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/src/lock/aticket_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "utils/smp_utils.h"
 3 | #include "common_lock_constants.h"
 4 | #include "mcs_lock.h"
 5 | 
 6 | 
 7 | #ifndef ATICKET_LOCK_H
 8 | #define ATICKET_LOCK_H
 9 | 
10 | typedef struct ATicketLockImpl {
11 |     char pad1[64];
12 |     void (*writer)(void *, void **);
13 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
14 |     CacheLinePaddedInt inCounter;
15 |     CacheLinePaddedInt outCounter;
16 |     CacheLinePaddedInt spinAreas[ARRAY_SIZE];
17 | } ATicketLock;
18 | 
19 | 
20 | ATicketLock * aticketlock_create(void (*writer)(void *, void **));
21 | void aticketlock_free(ATicketLock * lock);
22 | void aticketlock_initialize(ATicketLock * lock, void (*writer)(void *, void **));
23 | void aticketlock_register_this_thread();
24 | void aticketlock_write(ATicketLock *lock, void * writeInfo);
25 | void aticketlock_write_read_lock(ATicketLock *lock);
26 | void aticketlock_write_read_unlock(ATicketLock * lock);
27 | void aticketlock_read_lock(ATicketLock *lock);
28 | void aticketlock_read_unlock(ATicketLock *lock);
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/src/lock/cpprdx.cpp:
--------------------------------------------------------------------------------
 1 | #include "rdx.hpp"
 2 | #include "cpprdx.h"
 3 | 
 4 | CPPRDXLock* cpprdx_create(void (*writer)(void *)) {
 5 | 	auto lock = new CPPRDXLock;
 6 | 	lock->writer = writer;
 7 | 	return lock;
 8 | }
 9 | 
10 | void cpprdx_free(CPPRDXLock* lock) {
11 | 	delete lock;
12 | }
13 | 
14 | void cpprdx_initialize(CPPRDXLock* lock, void (*writer)(void *)) {
15 | 	new (&lock->lock) RDX_Lock;
16 | 	lock->writer = writer;
17 | }
18 | 
19 | void cpprdx_register_this_thread() {
20 | 	// NOP
21 | }
22 | 
23 | void cpprdx_write(CPPRDXLock* lock, void* writeInfo) {
24 | 	void (*f)(void *) = lock->writer;
25 | 	lock->lock.lock_delegate(std::function<void()>( [f, writeInfo] () { (*f)(writeInfo); } ));
26 | }
27 | 
28 | void cpprdx_write_read_lock(CPPRDXLock* lock) {
29 | 	lock->lock.lock_exclusive();
30 | }
31 | 
32 | void cpprdx_write_read_unlock(CPPRDXLock* lock) {
33 | 	lock->lock.unlock_exclusive();
34 | }
35 | 
36 | void cpprdx_read_lock(CPPRDXLock* lock) {
37 | 	lock->lock.lock_read();
38 | }
39 | 
40 | void cpprdx_read_unlock(CPPRDXLock* lock) {
41 | 	lock->lock.unlock_read();
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/README.md:
--------------------------------------------------------------------------------
 1 | PR
 2 | ==
 3 | 
 4 | A skiplist based lock-free priority queue implementation minimizing
 5 | the amount of coherence traffic. Adapted from an implementation of
 6 | Keir Fraser's skiplist
 7 | (http://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf).
 8 | 
 9 | For more information about the priority queue, see
10 | http://user.it.uu.se/~jonli208/priorityqueue.
11 | 
12 | ### Build
13 | 
14 |     make perf_meas
15 | 
16 | ### Usage
17 | 
18 | Run the benchmark application as:
19 | 
20 |     ./perf_meas -n 8 -t 27 -o 64 
21 |     
22 | This will start a benchmark run with 8 threads, uniformly distributed
23 | keys, initial queue length of 2^15 elements, the offset parameter of
24 | the algorithm will be set to 64, and random operation (deletemin,
25 | insert).
26 | 
27 | Run 
28 | 
29 |     ./perf_meas -h
30 | 
31 | for more information about the available parameters.
32 | 
33 | ### Build Dependencies
34 | 
35 |     gsl
36 | 
37 | ### Extras
38 | 
39 | A SPIN model is included, with linearizability checks of the
40 | operations. The -O flag has to be used (if SPIN version >= 6), the
41 | model is using the old scope rules.


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/gc/ptst.h:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * ptst.h
 3 |  * 
 4 |  * Per-thread state management.
 5 |  *
 6 |  *
 7 |  * Copyright (c) 2013, Jonatan Linden
 8 |  * Copyright (c) 2002-2003, K A Fraser
 9 |  */
10 | 
11 | #ifndef __PTST_H__
12 | #define __PTST_H__
13 | 
14 | typedef struct ptst_st ptst_t;
15 | 
16 | #include <gsl/gsl_rng.h>
17 | 
18 | #include "gc.h"
19 | 
20 | struct ptst_st
21 | {
22 |     /* Thread id */
23 |     unsigned int id;
24 |     /* State management */
25 |     ptst_t      *next;
26 |     unsigned int count;
27 | 
28 |     /* Utility structures */
29 |     gc_t        *gc;
30 |     char pad[56];
31 |     unsigned int rand;
32 | };
33 | 
34 |  /*
35 |  * Enter/leave a critical region. A thread gets a state handle for
36 |  * use during critical regions.
37 |  */
38 | 
39 | void critical_enter(void );
40 | 
41 | #define critical_exit() gc_exit(ptst)
42 | 
43 | /* Iterators */
44 | extern ptst_t *ptst_list;
45 | 
46 | #define ptst_first()  (ptst_list)
47 | #define ptst_next(_p) ((_p)->next)
48 | 
49 | 
50 | 
51 | #endif /* __PTST_H__ */
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/src/lock/tts_rdx_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "datastructures/opti_multi_writers_queue.h"
 3 | #include "common_lock_constants.h"
 4 | #include "utils/support_many_non_zero_indicator_types.h"
 5 | 
 6 | #ifndef TTS_RDX_LOCK_H
 7 | #define TTS_RDX_LOCK_H
 8 | 
 9 | 
10 | typedef struct TTSRDXLockImpl {
11 |     OptiMWQueue writeQueue;
12 |     char pad1[64];
13 |     void (*writer)(void *, void **);
14 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
15 |     char pad3[64];
16 |     CacheLinePaddedInt writeBarrier;
17 |     CacheLinePaddedBool lockWord;
18 |     char pad4[64];
19 |     NZI_DATATYPE_NAME nonZeroIndicator;
20 | } TTSRDXLock;
21 | 
22 | 
23 | 
24 | TTSRDXLock * ttsalock_create(void (*writer)(void *, void **));
25 | void ttsalock_free(TTSRDXLock * lock);
26 | void ttsalock_initialize(TTSRDXLock * lock, void (*writer)(void *, void **));
27 | void ttsalock_register_this_thread();
28 | void ttsalock_write(TTSRDXLock *lock, void * writeInfo);
29 | void ttsalock_write_read_lock(TTSRDXLock *lock);
30 | void ttsalock_write_read_unlock(TTSRDXLock * lock);
31 | void ttsalock_read_lock(TTSRDXLock *lock);
32 | void ttsalock_read_unlock(TTSRDXLock *lock);
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/qd_library/threadid.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef threadid_hpp
 2 | #define threadid_hpp threadid_hpp
 3 | 
 4 | #include <mutex>
 5 | #include <set>
 6 | 
 7 | class thread_id_store {
 8 | 	static unsigned long max_id;
 9 | 	static std::set<unsigned long> orphans;
10 | 	static std::mutex mutex;
11 | 	typedef std::lock_guard<std::mutex> scoped_lock;
12 | 	public:
13 | 		static unsigned long get() {
14 | 			scoped_lock lock(mutex);
15 | 			if(orphans.empty()) {
16 | 				max_id++;
17 | 				return max_id;
18 | 			} else {
19 | 				auto first = orphans.begin();
20 | 				auto result = *first;
21 | 				orphans.erase(first);
22 | 				return result;
23 | 			}
24 | 		}
25 | 		static void free(unsigned long idx) {
26 | 			scoped_lock lock(mutex);
27 | 			if(idx == max_id) {
28 | 				max_id--;
29 | 				while(orphans.erase(max_id)) {
30 | 					max_id--;
31 | 				}
32 | 			} else {
33 | 				orphans.insert(idx);
34 | 			}
35 | 		}
36 | };
37 | 
38 | class thread_id_t {
39 | 	unsigned long id;
40 | 	public:
41 | 		operator unsigned long() {
42 | 			return id;
43 | 		}
44 | 		thread_id_t() : id(thread_id_store::get()) {}
45 | 		~thread_id_t() {
46 | 			thread_id_store::free(id);
47 | 		}
48 | };
49 | 
50 | extern thread_local thread_id_t thread_id;
51 | 
52 | #endif // threadid_hpp
53 | 


--------------------------------------------------------------------------------
/SConstruct:
--------------------------------------------------------------------------------
 1 | 
 2 | #Locks and lock benchmarks
 3 | 
 4 | AddOption('--cpp_locks',
 5 |           action='store_true',
 6 |           dest='cpp_locks',
 7 |           default=False)
 8 | 
 9 | AddOption('--llvm',
10 |           action='store_true',
11 |           dest='use_llvm',
12 |           default=False)
13 | 
14 | AddOption('--use_cas_fetch_and_add',
15 |           action='store_true',
16 |           dest='use_cas_fetch_and_add',
17 |           default=False)
18 | 
19 | AddOption('--use_pinning',
20 |           action='store_true',
21 |           dest='use_pinning',
22 |           default=False)
23 | 
24 | AddOption('--use_queue_stats',
25 |           action='store_true',
26 |           dest='use_queue_stats',
27 |           default=False)
28 | 
29 | AddOption('--use_print_thread_queue_stats',
30 |           action='store_true',
31 |           dest='use_print_thread_queue_stats',
32 |           default=False)
33 | 
34 | mode = 'release'
35 | 
36 | SConscript('SConscript.py', variant_dir='bin', duplicate=0, exports='mode')
37 | 
38 | mode = 'debug'
39 | 
40 | SConscript('SConscript.py', variant_dir='bin_debug', duplicate=0, exports='mode')
41 | 
42 | mode = 'profile'
43 | 
44 | SConscript('SConscript.py', variant_dir='bin_profile', duplicate=0, exports='mode')
45 | 


--------------------------------------------------------------------------------
/src/lock/tatas_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "common_lock_constants.h"
 3 | #include "utils/smp_utils.h"
 4 | 
 5 | #ifndef TATAS_LOCK_H
 6 | #define TATAS_LOCK_H
 7 | 
 8 | 
 9 | typedef struct TATASLockImpl {
10 |     char pad1[64];
11 |     void (*writer)(void *, void **);
12 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
13 |     char pad3[64];
14 |     CacheLinePaddedBool lockWord;
15 |     char pad4[64];
16 | } TATASLock;
17 | 
18 | 
19 | 
20 | TATASLock * tataslock_create(void (*writer)(void *, void **));
21 | void tataslock_free(TATASLock * lock);
22 | void tataslock_initialize(TATASLock * lock, void (*writer)(void *, void **));
23 | void tataslock_register_this_thread();
24 | void tataslock_write(TATASLock *lock, void * writeInfo);
25 | void tataslock_write_read_lock(TATASLock *lock);
26 | void tataslock_write_read_unlock(TATASLock * lock);
27 | void tataslock_read_lock(TATASLock *lock);
28 | void tataslock_read_unlock(TATASLock *lock);
29 | 
30 | static inline
31 | bool tataslock_is_locked(TATASLock *lock){
32 |     bool locked;
33 |     load_acq(locked, lock->lockWord.value);
34 |     return locked;
35 | }
36 | 
37 | static inline
38 | bool tataslock_try_write_read_lock(TATASLock *lock) {
39 |     return !__sync_lock_test_and_set(&lock->lockWord.value, true);
40 | }
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/gc/gc.h:
--------------------------------------------------------------------------------
 1 | #ifndef __GC_H__
 2 | #define __GC_H__
 3 | 
 4 | typedef struct gc_st gc_t;
 5 | 
 6 | /* Most of these functions peek into a per-thread state struct. */
 7 | #include "ptst.h"
 8 | 
 9 | /* Initialise GC section of given per-thread state structure. */
10 | gc_t *gc_init(void);
11 | 
12 | int gc_add_allocator(int alloc_size);
13 | void gc_remove_allocator(int alloc_id);
14 | 
15 | /*
16 |  * Memory allocate/free. An unsafe free can be used when an object was
17 |  * not made visible to other processes.
18 |  */
19 | void *gc_alloc(ptst_t *ptst, int alloc_id);
20 | void gc_free(ptst_t *ptst, void *p, int alloc_id);
21 | void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id);
22 | 
23 | /*
24 |  * Hook registry. Allows users to hook in their own per-epoch delay
25 |  * lists.
26 |  */
27 | typedef void (*hook_fn_t)(ptst_t *, void *);
28 | int gc_add_hook(hook_fn_t fn);
29 | void gc_remove_hook(int hook_id);
30 | void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id);
31 | 
32 | /* Per-thread entry/exit from critical regions */
33 | void gc_enter(ptst_t *ptst);
34 | void gc_exit(ptst_t *ptst);
35 | 
36 | /* Start-of-day initialisation of garbage collector. */
37 | void _init_gc_subsystem(void);
38 | void _destroy_gc_subsystem(void);
39 | 
40 | #endif /* __GC_H__ */
41 | 


--------------------------------------------------------------------------------
/src/benchmark/run_benchmarks_on_intel_i7.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | import subprocess
 6 | 
 7 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 8 | 
 9 | command = [
10 |     os.path.join(bin_dir_path, 'benchmark_lock.py'),
11 |     #number of iterations
12 |     '5',
13 |     #Output dir (standard means a dir in bench_result based on the git
14 |     #commit id and the date)
15 |     'standard', 
16 |     #benchmark prefixes (comma separated list)
17 |     'pairing_heap_bench',
18 |     #locks to benchmark (comma separated list)
19 |     'qdlock,hqdlock,ccsynch,flatcomb,clh',
20 |     #use pinning to NUMA nodes (comma separated list)
21 |     'no',
22 |     #Benchmark number of threads (comma separated list)
23 |     '1,2,3,4,5,6,7,8',
24 |     #Percentage dequeue (comma separated list)
25 |     '0.5',
26 |     #Seconds to run the benchmark (comma separated list)
27 |     '1',
28 |     #Number of work items performed in write-critical section (comma
29 |     #separated list)
30 |     '2',
31 |     #Number of work items performed in read-critical section (comma
32 |     #separated list)
33 |     '0',
34 |     #Number of work items performed in non-critical section (comma
35 |     #separated list)
36 |     '0,32,64']
37 | 
38 | process = subprocess.Popen(command)
39 | process.wait()
40 | 


--------------------------------------------------------------------------------
/src/benchmark/run_benchmarks_on_amd_fx_6100.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | import subprocess
 6 | 
 7 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 8 | 
 9 | command = [
10 |     os.path.join(bin_dir_path, 'benchmark_lock.py'),
11 |     #number of iterations
12 |     '5',
13 |     #Output dir (standard means a dir in bench_result based on the git
14 |     #commit id and the date)
15 |     'standard', 
16 |     #benchmark prefixes (comma separated list)
17 |     'rw_bench_clone',
18 |     #locks to benchmark (comma separated list)
19 |     'aer_rgnzi,drmcs_rgnzi',
20 |     #use pinning to NUMA nodes (comma separated list)
21 |     'no',
22 |     #Benchmark number of threads (comma separated list)
23 |     '1,2,3,4,5,6',
24 |     #Procentage reads (comma separated list)
25 |     '0.0,0.25,0.5,0.8,0.9,0.95,0.99,1.0',
26 |     #Seconds to run the benchmark (comma separated list)
27 |     '1',
28 |     #Numper of work items performed in write-critical seciton (comma
29 |     #separated list)
30 |     '4',
31 |     #Numper of work items performed in read-critical seciton (comma
32 |     #separated list)
33 |     '4',
34 |     #Numper of work items performed in non-critical seciton (comma
35 |     #separated list)
36 |     '0,64']
37 | 
38 | process = subprocess.Popen(command)
39 | process.wait()
40 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/prioq.h:
--------------------------------------------------------------------------------
 1 | #ifndef PRIOQ_H
 2 | #define PRIOQ_H
 3 | #include <limits.h>
 4 | #include "common.h"
 5 | 
 6 | typedef int pkey_t;
 7 | typedef void *pval_t;
 8 | 
 9 | #define KEY_NULL 0
10 | #define NUM_LEVELS 32
11 | /* Internal key values with special meanings. */
12 | #define SENTINEL_KEYMIN ( INT_MIN ) /* Key value of first dummy node. */
13 | #define SENTINEL_KEYMAX ( INT_MAX ) /* Key value of last dummy node.  */
14 | 
15 | 
16 | typedef struct node_s
17 | {
18 |     pkey_t    k;
19 |     int       level;
20 |     int       inserting; //char pad2[4];
21 |     pval_t    v;
22 |     struct node_s *next[1];
23 | } node_t;
24 | 
25 | typedef struct
26 | {
27 |     int    max_offset;
28 |     int    max_level;
29 |     int    nthreads;
30 |     node_t *head;
31 |     node_t *tail;
32 |     char   pad[128];
33 | } pq_t;
34 | 
35 | #define get_marked_ref(_p)      ((void *)(((uintptr_t)(_p)) | 1))
36 | #define get_unmarked_ref(_p)    ((void *)(((uintptr_t)(_p)) & ~1))
37 | #define is_marked_ref(_p)       (((uintptr_t)(_p)) & 1)
38 | 
39 | 
40 | /* Interface */
41 | 
42 | extern pq_t *pq_init(int max_offset);
43 | 
44 | extern void pq_destroy(pq_t *pq);
45 | 
46 | extern void insertq(pq_t *pq, pkey_t k, pval_t v);
47 | 
48 | extern pval_t deletemin(pq_t *pq);
49 | 
50 | extern void sequential_length(pq_t *pq);
51 | 
52 | #endif // PRIOQ_H
53 | 


--------------------------------------------------------------------------------
/src/benchmark/run_benchmarks_on_sandy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | import subprocess
 6 | 
 7 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 8 | 
 9 | command = [
10 |     os.path.join(bin_dir_path, 'benchmark_lock.py'),
11 |     #number of iterations
12 |     '5',
13 |     #Output dir (standard menas a dir in bench_result based on the git
14 |     #commit id and the date)
15 |     'standard',
16 |     #benchmark prefixes (comma separated list)
17 |     'rw_bench_clone',
18 |     #locks to benchmark (comma separated list)
19 |     'aer_rgnzi,drmcs_rgnzi,cohort,wprwcohort_rgnzi',
20 |     #use pinning to NUMA nodes (comma separated list)
21 |     'no,yes',
22 |     #Benchmark number of threads (comma separated list)
23 |     '1,2,4,8,12,16,24,32,48,62,64',
24 |     #Procentage reads (comma separated list)
25 |     '0.0,0.25,0.5,0.8,0.9,0.95,0.99,1.0',
26 |     #Seconds to run the benchmark (comma separated list)
27 |     '1',
28 |     #Number of work items performed in write-critical section (comma
29 |     #separated list)
30 |     '4',
31 |     #Number of work items performed in read-critical section (comma
32 |     #separated list)
33 |     '4',
34 |     #Number of work items performed in non-critical section (comma
35 |     #separated list)
36 |     '0,64']
37 | 
38 | process = subprocess.Popen(command)
39 | process.wait()
40 | 


--------------------------------------------------------------------------------
/src/new_rep/locks/qd_lock.h:
--------------------------------------------------------------------------------
 1 | #ifndef QD_LOCK_H
 2 | #define QD_LOCK_H
 3 | 
 4 | #include "misc/bsd_stdatomic.h"//Until c11 stdatomic.h is available
 5 | #include "misc/thread_includes.h"//Until c11 thread.h is available
 6 | #include <stdbool.h>
 7 | 
 8 | #include "misc/padded_types.h"
 9 | #include "locks/tatas_lock.h"
10 | #include "qd_queues/qd_queue.h"
11 | 
12 | /* Queue Delegation Lock */
13 | 
14 | typedef struct QDLockImpl {
15 |     TATASLock mutexLock;
16 |     QDQueue queue;
17 | } QDLock;
18 | 
19 | void qd_initialize(QDLock * lock){
20 |     tatas_initialize(&lock->mutexLock);
21 |     qdq_initialize(&lock->queue);
22 | }
23 | 
24 | void qd_delegate(QDLock* l,
25 |                  void (*funPtr)(unsigned int, void *), 
26 |                  unsigned int messageSize,
27 |                  void * messageAddress) {
28 |     while(true) {
29 |         if(tatas_try_lock(&l->mutexLock)) {
30 |             qdq_open(&l->queue);
31 |             funPtr(messageSize, messageAddress);
32 |             qdq_flush(&l->queue);
33 |             tatas_unlock(&l->mutexLock);
34 |             return;
35 |         } else if(qdq_enqueue(&l->queue,
36 |                               funPtr,
37 |                               messageSize,
38 |                               messageAddress)){
39 |             return;
40 |         }
41 |         thread_yield();
42 |     }
43 | }
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/common.c:
--------------------------------------------------------------------------------
 1 | #define _GNU_SOURCE
 2 | #include "common.h"
 3 | 
 4 | #if defined(__linux__)
 5 | pid_t 
 6 | gettid(void) 
 7 | {
 8 |     return (pid_t) syscall(SYS_gettid);
 9 | }
10 | 
11 | void
12 | pina(pid_t t, int cpu) 
13 | {
14 |     cpu_set_t cpuset;
15 |     CPU_ZERO(&cpuset);
16 |     CPU_SET(cpu, &cpuset);
17 |     E_en(sched_setaffinity(t, sizeof(cpu_set_t), &cpuset));
18 | }
19 | 
20 | void
21 | gettime(struct timespec *ts)
22 | {
23 |     E(clock_gettime(CLOCK_MONOTONIC, ts));
24 | }
25 | 
26 | #endif
27 | 
28 | #if defined(__APPLE__)
29 | void
30 | gettime(struct timespec *ts)
31 | {
32 |     uint64_t time = mach_absolute_time();
33 | 
34 |     static mach_timebase_info_data_t info = {0,0};
35 | 
36 |     if (info.denom == 0)  {
37 | 	mach_timebase_info(&info);
38 |     }
39 | 
40 |     uint64_t elapsed = time * (info.numer / info.denom);
41 | 
42 |     ts->tv_sec = elapsed * 1e-9;
43 |     ts->tv_nsec = elapsed - (ts->tv_sec * 1e9);
44 | }
45 | #endif
46 | 
47 | 
48 | 
49 | 
50 | struct timespec
51 | timediff (struct timespec begin, struct timespec end)
52 | {
53 |     struct timespec tmp;
54 |     if ((end.tv_nsec - begin.tv_nsec) < 0) {
55 | 	tmp.tv_sec = end.tv_sec - begin.tv_sec - 1;
56 | 	tmp.tv_nsec = 1000000000 + end.tv_nsec - begin.tv_nsec;
57 |     } else {
58 | 	tmp.tv_sec = end.tv_sec - begin.tv_sec;
59 | 	tmp.tv_nsec = end.tv_nsec - begin.tv_nsec;
60 |     }
61 |     return tmp;
62 | }
63 | 


--------------------------------------------------------------------------------
/qd_library/locks/mutex_lock.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_mutex_lock_hpp
 2 | #define qd_mutex_lock_hpp qd_mutex_lock_hpp
 3 | 
 4 | #include<atomic>
 5 | #include<mutex>
 6 | 
 7 | /** @brief a std::mutex based lock */
 8 | class mutex_lock {
 9 | 	std::atomic<bool> locked;
10 | 	std::mutex mutex;
11 | 	public:
12 | 		mutex_lock() : locked(false), mutex() {};
13 | 		mutex_lock(mutex_lock&) = delete; /* TODO? */
14 | 		bool try_lock() {
15 | 			if(!is_locked() && mutex.try_lock()) {
16 | 				locked.store(true, std::memory_order_release);
17 | 				return true;
18 | 			} else {
19 | 				return false;
20 | 			}
21 | 		}
22 | 		void unlock() {
23 | 			locked.store(false, std::memory_order_release);
24 | 			mutex.unlock();
25 | 		}
26 | 		bool is_locked() {
27 | 			/* This may sometimes return false when the lock is already acquired.
28 | 			 * This is safe, because the locking call that acquired the lock in
29 | 			 * that case has not yet returned (it needs to set the locked flag first),
30 | 			 * so this is concurrent with calling is_locked first and then locking the lock.
31 | 			 * 
32 | 			 * This may also sometimes return false when the lock is still locked, but
33 | 			 * about to be unlocked. This is safe, because of a similar argument as above.
34 | 			 */
35 | 			return locked.load(std::memory_order_acquire);
36 | 		}
37 | 		void lock() {
38 | 			mutex.lock();
39 | 			locked.store(true, std::memory_order_release);
40 | 		}
41 | };
42 | 
43 | #endif /* qd_mutex_lock_hpp */
44 | 


--------------------------------------------------------------------------------
/src/benchmark/compare_benchmarks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | from os import listdir
 5 | from os.path import join
 6 | from os.path import dirname
 7 | from os import mkdir
 8 | from subprocess import Popen
 9 | from subprocess import PIPE 
10 | from shutil import copy
11 | sys.argv.pop(0)
12 | 
13 | if len(sys.argv) < 2:
14 |     print """Not enough parameters:
15 | 
16 | Optional flag -matplotlib (must be first)
17 | 
18 | The first parameter is the output dir or output file if -matplotlib is specified (where the graphs are placed).
19 | 
20 | The rest of the parameters are benchmark output dirs of benchmark that
21 | shall be compared. The benchmark results are produced by the
22 | bin/benchmark_lock.py script.
23 | 
24 | """
25 |     sys.exit()
26 | 
27 | output_dir_or_file = sys.argv.pop(0)
28 | 
29 | compare_dirs = sys.argv
30 | 
31 | dat_files = [f for f in listdir(compare_dirs[0]) if f.endswith(".dat")]
32 | 
33 | output_file = output_dir_or_file
34 | copy(join(dirname(__file__), '../src/benchmark/produce_graphs_template.py'), output_file)
35 | with open(output_file, "a") as f:
36 | 	for in_file_name in dat_files:
37 | 	    f.write('set_up_figure("%s")\n' % in_file_name)
38 | 	    for in_dir in compare_dirs:
39 | 		f.write('plot_file("%s", "%s")\n' % (join(in_dir,in_file_name), in_dir.split('#')[2]))
40 | 	    f.write('complete_figure("%s")\n' % in_file_name)
41 | 	    f.write("\n")
42 | 
43 | 
44 | FORMAT='png'
45 | execfile(output_file)
46 | 


--------------------------------------------------------------------------------
/src/new_rep/locks/tatas_lock.h:
--------------------------------------------------------------------------------
 1 | #ifndef TATAS_LOCK_H
 2 | #define TATAS_LOCK_H
 3 | 
 4 | #include "misc/padded_types.h"
 5 | 
 6 | #include "misc/bsd_stdatomic.h"//Until c11 stdatomic.h is available
 7 | #include "misc/thread_includes.h"//Until c11 thread.h is available
 8 | #include <stdbool.h>
 9 | 
10 | 
11 | typedef struct TATASLockImpl {
12 |     LLPaddedFlag lockFlag;
13 | } TATASLock;
14 | 
15 | void tatas_initialize(TATASLock * lock){
16 |     atomic_init( &lock->lockFlag.value, false );
17 | }
18 | 
19 | void tatas_lock(TATASLock *lock) {
20 |     while(true){
21 |         while(atomic_load_explicit(&lock->lockFlag.value, 
22 |                                    memory_order_acquire)){
23 |             thread_yield();
24 |         }
25 |         if( ! atomic_flag_test_and_set_explicit(&lock->lockFlag.value,
26 |                                                 memory_order_acquire)){
27 |             return;
28 |         }
29 |    }
30 | }
31 | 
32 | void tatas_unlock(TATASLock * lock) {
33 |     atomic_flag_clear_explicit(&lock->lockFlag.value, memory_order_release);
34 | }
35 | 
36 | bool tatas_is_locked(TATASLock *lock){
37 |     return atomic_load_explicit(&lock->lockFlag.value, memory_order_acquire);
38 | }
39 | 
40 | bool tatas_try_lock(TATASLock *lock) {
41 |     if(!atomic_load_explicit(&lock->lockFlag.value, memory_order_acquire)){
42 |         return !atomic_flag_test_and_set_explicit(&lock->lockFlag.value, memory_order_acquire);
43 |     } else {
44 |         return false;
45 |     }
46 | }
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/src/utils/support_many_non_zero_indicator_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef SUPPORT_MANY_NON_ZERO_INDICATOR_TYPES_H
 2 | #define SUPPORT_MANY_NON_ZERO_INDICATOR_TYPES_H
 3 | 
 4 | #ifdef NZI_TYPE_ReaderGroups
 5 | //***********************************
 6 | //ReaderGroups
 7 | //***********************************
 8 | #include "datastructures/reader_groups_nzi.h"
 9 | 
10 | #define NZI_DATATYPE_NAME ReaderGroupsNZI
11 | #define NZI_FUN_PREFIX rgnzi
12 | 
13 | #elif defined (NZI_TYPE_NUMAIngressEgressCounter)
14 | //***********************************
15 | //NUMA Ingress Egress Counter
16 | //***********************************
17 | #include "datastructures/numa_ingress_egress_nzi.h"
18 | 
19 | #define NZI_DATATYPE_NAME NUMAIngressEgress
20 | #define NZI_FUN_PREFIX nienzi
21 | 
22 | #else
23 | 
24 | #define NZI_DATATYPE_NAME NoNZIDatatypeSpecified
25 | #define NZI_FUN_PREFIX no_such_nzi_type_prefix
26 | 
27 | #endif
28 | 
29 | #ifdef NZI_FUN_PREFIX
30 | 
31 | #define MY_NZI_CONCAT(a,b) a ## _ ## b                                                
32 | #define MY_NZI_EVAL_CONCAT(a,b) MY_NZI_CONCAT(a,b)                                        
33 | #define MY_NZI_FUN(name) MY_NZI_EVAL_CONCAT(NZI_FUN_PREFIX, name)                              
34 | 
35 | #define NZI_INITIALIZE(nzi) MY_NZI_FUN(initialize)(nzi)
36 | #define NZI_ARRIVE(nzi) MY_NZI_FUN(arrive)(nzi)
37 | #define NZI_DEPART(nzi) MY_NZI_FUN(depart)(nzi)
38 | #define NZI_QUERY(nzi) MY_NZI_FUN(query)(nzi)
39 | #define NZI_WAIT_UNIL_EMPTY(nzi) MY_NZI_FUN(wait_unil_empty)(nzi)
40 | 
41 | #endif
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/qd_library/locks/pthreads_lock.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_pthreads_lock_hpp
 2 | #define qd_pthreads_lock_hpp qd_pthreads_lock_hpp
 3 | 
 4 | #include<atomic>
 5 | #include<pthread.h>
 6 | 
 7 | /** @brief a pthreads based lock */
 8 | class pthreads_lock {
 9 | 	std::atomic<bool> locked;
10 | 	pthread_mutex_t mutex;
11 | 	public:
12 | 		pthreads_lock() : locked(false), mutex(PTHREAD_MUTEX_INITIALIZER) {};
13 | 		pthreads_lock(pthreads_lock&) = delete; /* TODO? */
14 | 		bool try_lock() {
15 | 			if(!is_locked() && !pthread_mutex_trylock(&mutex)) {
16 | 				locked.store(true, std::memory_order_release);
17 | 				return true;
18 | 			} else {
19 | 				return false;
20 | 			}
21 | 		}
22 | 		void unlock() {
23 | 			locked.store(false, std::memory_order_release);
24 | 			pthread_mutex_unlock(&mutex);
25 | 		}
26 | 		bool is_locked() {
27 | 			/* This may sometimes return false when the lock is already acquired.
28 | 			 * This is safe, because the locking call that acquired the lock in
29 | 			 * that case has not yet returned (it needs to set the locked flag first),
30 | 			 * so this is concurrent with calling is_locked first and then locking the lock.
31 | 			 * 
32 | 			 * This may also sometimes return false when the lock is still locked, but
33 | 			 * about to be unlocked. This is safe, because of a similar argument as above.
34 | 			 */
35 | 			return locked.load(std::memory_order_acquire);
36 | 		}
37 | 		void lock() {
38 | 			pthread_mutex_lock(&mutex);
39 | 			locked.store(true, std::memory_order_release);
40 | 		}
41 | };
42 | 
43 | #endif /* qd_pthreads_lock_hpp */
44 | 


--------------------------------------------------------------------------------
/src/datastructures/reader_groups_nzi.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "utils/smp_utils.h"
 3 | #include "utils/thread_identifier.h"
 4 | 
 5 | #ifndef READER_GROUPS_NZI_H
 6 | #define READER_GROUPS_NZI_H
 7 | 
 8 | typedef struct ReaderGroupsNZIImpl {
 9 |     CacheLinePaddedInt readerGroups[NUMBER_OF_READER_GROUPS];
10 | } ReaderGroupsNZI;
11 | 
12 | static inline
13 | void rgnzi_initialize(ReaderGroupsNZI * nzi){
14 |     for(int i = 0; i < NUMBER_OF_READER_GROUPS; i++){
15 |         nzi->readerGroups[i].value = 0;
16 |     }
17 |     __sync_synchronize();
18 | }
19 | 
20 | static inline
21 | void rgnzi_arrive(ReaderGroupsNZI * nzi){
22 |     __sync_fetch_and_add(&nzi->readerGroups[myId.value % NUMBER_OF_READER_GROUPS].value, 1);
23 | }
24 | 
25 | static inline
26 | void rgnzi_depart(ReaderGroupsNZI * nzi){
27 |     __sync_fetch_and_sub(&nzi->readerGroups[myId.value % NUMBER_OF_READER_GROUPS].value, 1);
28 | }
29 | 
30 | 
31 | static inline
32 | bool rgnzi_query(ReaderGroupsNZI * nzi){
33 |     for(int i = 0; i < NUMBER_OF_READER_GROUPS; i++){
34 |         if(ACCESS_ONCE(nzi->readerGroups[i].value) > 0){
35 |             return false;
36 |         }
37 |     }
38 |     return true;
39 | }
40 | 
41 | static inline
42 | void rgnzi_wait_unil_empty(ReaderGroupsNZI * nzi){
43 |     int count;
44 |     for(int i = 0; i < NUMBER_OF_READER_GROUPS; i++){
45 |         load_acq(count, nzi->readerGroups[i].value); 
46 |         while(count > 0){
47 |             __sync_synchronize();
48 |             load_acq(count, nzi->readerGroups[i].value);
49 |         }
50 |     }
51 | }
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/src/lock/wprw_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "utils/smp_utils.h"
 3 | #include "common_lock_constants.h"
 4 | #include "utils/support_many_lock_types.h"
 5 | #include "utils/support_many_non_zero_indicator_types.h"
 6 | 
 7 | #ifndef WPRW_LOCK_H
 8 | #define WPRW_LOCK_H
 9 | 
10 | #ifdef LOCK_TYPE_WPRW_MCSLock
11 | //***********************************
12 | //MCSLock
13 | //***********************************
14 | #include "mcs_lock.h"
15 | 
16 | #define LOCK_DATATYPE_NAME_WPRW MCSLock
17 | 
18 | #elif defined (LOCK_TYPE_WPRW_CohortLock)
19 | //***********************************
20 | //CohortLock
21 | //***********************************
22 | #include "cohort_lock.h"
23 | 
24 | #define LOCK_DATATYPE_NAME_WPRW CohortLock
25 | 
26 | #else
27 | 
28 | #define LOCK_DATATYPE_NAME_WPRW NoLockDatatypeSpecified
29 | 
30 | #endif
31 | 
32 | typedef struct WPRWLockImpl {
33 |     char pad1[64];
34 |     LOCK_DATATYPE_NAME_WPRW lock;
35 |     CacheLinePaddedInt writeBarrier;
36 |     NZI_DATATYPE_NAME nonZeroIndicator;
37 |     //    CacheLinePaddedInt readLocks[NUMBER_OF_READER_GROUPS];
38 | } WPRWLock;
39 | 
40 | WPRWLock * wprwlock_create(void (*writer)(void *, void **));
41 | void wprwlock_free(WPRWLock * lock);
42 | void wprwlock_initialize(WPRWLock * lock, void (*writer)(void *, void **));
43 | void wprwlock_register_this_thread();
44 | void wprwlock_write(WPRWLock *lock, void * writeInfo);
45 | void wprwlock_write_read_lock(WPRWLock *lock);
46 | void wprwlock_write_read_unlock(WPRWLock * lock);
47 | void wprwlock_read_lock(WPRWLock *lock);
48 | void wprwlock_read_unlock(WPRWLock *lock);
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/src/lock/ticket_lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | #include "ticket_lock.h"
 6 | 
 7 |  
 8 | TicketLock * ticketlock_create(void (*writer)(void *, void **)){
 9 |     TicketLock * lock = malloc(sizeof(TicketLock));
10 |     ticketlock_initialize(lock, writer);
11 |     return lock;
12 | }
13 | 
14 | void ticketlock_initialize(TicketLock * lock, void (*writer)(void *, void **)){
15 |     lock->writer = writer;
16 |     lock->inCounter.value = 0;
17 |     lock->outCounter.value = 0;
18 |     __sync_synchronize();
19 | }
20 | 
21 | void ticketlock_free(TicketLock * lock){
22 |     free(lock);
23 | }
24 | 
25 | 
26 | void ticketlock_register_this_thread(){
27 | }
28 | 
29 | void ticketlock_write(TicketLock *lock, void * writeInfo) {
30 |     ticketlock_write_read_lock(lock);
31 |     lock->writer(writeInfo, NULL);
32 |     ticketlock_write_read_unlock(lock);
33 | }
34 | 
35 | void ticketlock_write_read_lock(TicketLock *lock) {
36 |     int outCounter;
37 |     int myTicket = __sync_fetch_and_add(&lock->inCounter.value, 1);
38 |     load_acq(outCounter, lock->outCounter.value);
39 |     while(outCounter != myTicket){
40 |         load_acq(outCounter, lock->outCounter.value);
41 |         __sync_synchronize();
42 |     }
43 | }
44 | 
45 | void ticketlock_write_read_unlock(TicketLock * lock) {
46 |     __sync_fetch_and_add(&lock->outCounter.value, 1);
47 | }
48 | 
49 | void ticketlock_read_lock(TicketLock *lock) {
50 |     ticketlock_write_read_lock(lock);
51 | }
52 | 
53 | void ticketlock_read_unlock(TicketLock *lock) {
54 |     ticketlock_write_read_unlock(lock);
55 | }
56 | 


--------------------------------------------------------------------------------
/src/lock/simple_delayed_writers_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "datastructures/multi_writers_queue.h"
 3 | #include "common_lock_constants.h"
 4 | #include "utils/support_many_non_zero_indicator_types.h"
 5 | 
 6 | #ifndef SIMPLE_DELAYED_WRITERS_LOCK_H
 7 | #define SIMPLE_DELAYED_WRITERS_LOCK_H
 8 | 
 9 | struct NodeImpl;
10 | 
11 | typedef union CacheLinePaddedNodePtrImpl {
12 |     struct NodeImpl * value;
13 |     char padding[64];
14 | } CacheLinePaddedNodePtr;
15 | 
16 | typedef struct NodeImpl {
17 |     MWQueue writeQueue;
18 |     CacheLinePaddedNodePtr next;
19 |     CacheLinePaddedBool locked;
20 |     bool readLockIsWriteLock;
21 |     char pad[64 - ((sizeof(bool)) % 64)];
22 | } Node;
23 | 
24 | typedef struct SimpleDelayedWritesLockImpl {
25 |     char pad1[64];
26 |     void (*writer)(void *, void **);
27 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
28 |     CacheLinePaddedNodePtr endOfQueue;
29 |     NZI_DATATYPE_NAME nonZeroIndicator;
30 | } SimpleDelayedWritesLock;
31 | 
32 | 
33 | 
34 | SimpleDelayedWritesLock * sdwlock_create(void (*writer)(void *, void **));
35 | void sdwlock_free(SimpleDelayedWritesLock * lock);
36 | void sdwlock_initialize(SimpleDelayedWritesLock * lock, void (*writer)(void *, void **));
37 | void sdwlock_register_this_thread();
38 | void sdwlock_write(SimpleDelayedWritesLock *lock, void * writeInfo);
39 | void sdwlock_write_read_lock(SimpleDelayedWritesLock *lock);
40 | void sdwlock_write_read_unlock(SimpleDelayedWritesLock * lock);
41 | void sdwlock_read_lock(SimpleDelayedWritesLock *lock);
42 | void sdwlock_read_unlock(SimpleDelayedWritesLock *lock);
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/src/lock/agnostic_dx_lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | #include <limits.h>
 5 | #include <stdio.h>
 6 | #include "agnostic_dx_lock.h"
 7 | #include "utils/smp_utils.h"
 8 | 
 9 | #define READ_PATIENCE_LIMIT 130000
10 |  
11 | AgnosticDXLock * adxlock_create(void (*writer)(void *, void **)){
12 |     AgnosticDXLock * lock = malloc(sizeof(AgnosticDXLock));
13 |     adxlock_initialize(lock, writer);
14 |     return lock;
15 | }
16 | 
17 | void adxlock_initialize(AgnosticDXLock * lock, void (*defaultWriter)(void *, void **)){
18 |     //TODO check if the following typecast is fine
19 |     lock->defaultWriter = defaultWriter;
20 |     LOCK_INITIALIZE(&lock->lock, defaultWriter);
21 |     drmvqueue_initialize(&lock->writeQueue);
22 |     __sync_synchronize();
23 | }
24 | 
25 | void adxlock_free(AgnosticDXLock * lock){
26 |     free(lock);
27 | }
28 | 
29 | void adxlock_register_this_thread(){
30 | }
31 | 
32 | 
33 | void adxlock_write(AgnosticDXLock *lock, void * writeInfo) {
34 |     adxlock_delegate(lock, lock->defaultWriter, writeInfo);
35 | }
36 | 
37 | void adxlock_write_read_lock(AgnosticDXLock *lock) {
38 |     LOCK_WRITE_READ_LOCK(&lock->lock);    
39 |     drmvqueue_reset_fully_read(&lock->writeQueue);
40 |     __sync_synchronize();//Flush
41 | }
42 | 
43 | void adxlock_write_read_unlock(AgnosticDXLock * lock) {
44 |     drmvqueue_flush(&lock->writeQueue);
45 |     LOCK_WRITE_READ_UNLOCK(&lock->lock);
46 | }
47 | 
48 | void adxlock_read_lock(AgnosticDXLock *lock) {
49 |     adxlock_write_read_lock(lock);
50 | }
51 | 
52 | void adxlock_read_unlock(AgnosticDXLock *lock) {
53 |     adxlock_write_read_unlock(lock);
54 | }
55 | 


--------------------------------------------------------------------------------
/qd_library/qd.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_qd_hpp
 2 | #define qd_qd_hpp qd_qd_hpp
 3 | 
 4 | #include "locks/waitable_lock.hpp"
 5 | #include "locks/tatas_lock.hpp"
 6 | #include "locks/mutex_lock.hpp"
 7 | #include "locks/futex_lock.hpp"
 8 | #include "locks/mcs_futex_lock.hpp"
 9 | #include "locks/mcs_lock.hpp"
10 | #include "locks/ticket_futex_lock.hpp"
11 | 
12 | #include "queues/buffer_queue.hpp"
13 | #include "queues/dual_buffer_queue.hpp"
14 | #include "queues/entry_queue.hpp"
15 | #include "queues/simple_locked_queue.hpp"
16 | 
17 | #include "qdlock.hpp"
18 | #include "hqdlock.hpp"
19 | #include "mrqdlock.hpp"
20 | 
21 | #include "qd_condition_variable.hpp"
22 | 
23 | template<typename Lock>
24 | class extended_lock : public Lock {
25 | 	public:
26 | 		bool try_lock_or_wait() {
27 | 			return this->try_lock();
28 | 		}
29 | };
30 | 
31 | using internal_lock = mcs_futex_lock;
32 | using qdlock = qdlock_impl<internal_lock, buffer_queue<262139>>;
33 | using mrqdlock = mrqdlock_impl<internal_lock, buffer_queue<16384>, reader_groups<64>, 65536>;
34 | using qd_condition_variable = qd_condition_variable_impl<mutex_lock, simple_locked_queue>;
35 | 
36 | #define DELEGATE_F(function, ...) template delegate_f<decltype(function), function>(__VA_ARGS__)
37 | #define DELEGATE_N(function, ...) template delegate_n<decltype(function), function>(__VA_ARGS__)
38 | #define DELEGATE_P(function, ...) template delegate_p<decltype(function), function>(__VA_ARGS__)
39 | #define DELEGATE_FP(function, ...) template delegate_fp<decltype(function), function>(__VA_ARGS__)
40 | #define WAIT_REDELEGATE_P(function, ...) template wait_redelegate_p<decltype(function), function>(__VA_ARGS__)
41 | 
42 | #endif /* qd_qd_hpp */
43 | 


--------------------------------------------------------------------------------
/src/lock/all_equal_rdx_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "datastructures/multi_writers_queue.h"
 3 | #include "common_lock_constants.h"
 4 | #include "utils/support_many_non_zero_indicator_types.h"
 5 | 
 6 | #ifndef ALL_EQUAL_RDX_LOCK_H
 7 | #define ALL_EQUAL_RDX_LOCK_H
 8 | 
 9 | struct NodeImpl;
10 | 
11 | typedef union CacheLinePaddedNodePtrImpl {
12 |     struct NodeImpl * value;
13 |     char padding[64];
14 | } CacheLinePaddedNodePtr;
15 | 
16 | 
17 | typedef struct NodeImpl {
18 |     MWQueue writeQueue;
19 |     CacheLinePaddedNodePtr next;
20 |     CacheLinePaddedBool locked;
21 |     CacheLinePaddedBool readSpinningEnabled;
22 |     NZI_DATATYPE_NAME nonZeroIndicator;
23 |     bool readLockIsWriteLock;
24 |     bool readLockIsSpinningOnNode;
25 |     struct NodeImpl * readLockSpinningNode;
26 |     char pad[64 - ((sizeof(bool)*2 + sizeof(struct NodeImpl *)) % 64)];
27 | } Node;
28 | 
29 | typedef struct AllEqualRDXLockImpl {
30 |     char pad1[64];
31 |     void (*writer)(void *, void **);
32 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
33 |     CacheLinePaddedNodePtr endOfQueue;
34 |     NZI_DATATYPE_NAME nonZeroIndicator;
35 | } AllEqualRDXLock;
36 | 
37 | 
38 | 
39 | AllEqualRDXLock * aerlock_create(void (*writer)(void *, void **));
40 | void aerlock_free(AllEqualRDXLock * lock);
41 | void aerlock_initialize(AllEqualRDXLock * lock, void (*writer)(void *, void **));
42 | void aerlock_register_this_thread();
43 | void aerlock_write(AllEqualRDXLock *lock, void * writeInfo);
44 | void aerlock_write_read_lock(AllEqualRDXLock *lock);
45 | void aerlock_write_read_unlock(AllEqualRDXLock * lock);
46 | void aerlock_read_lock(AllEqualRDXLock *lock);
47 | void aerlock_read_unlock(AllEqualRDXLock *lock);
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/src/lock/tatas_lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | #include <limits.h>
 5 | #include <stdio.h>
 6 | #include "tatas_lock.h"
 7 | 
 8 | 
 9 | TATASLock * tataslock_create(void (*writer)(void *, void **)){
10 |     TATASLock * lock = malloc(sizeof(TATASLock));
11 |     tataslock_initialize(lock, writer);
12 |     return lock;
13 | }
14 | 
15 | void tataslock_initialize(TATASLock * lock, void (*writer)(void *, void **)){
16 |     lock->writer = writer;
17 |     lock->lockWord.value = 0;
18 |     __sync_synchronize();
19 | }
20 | 
21 | void tataslock_free(TATASLock * lock){
22 |     free(lock);
23 | }
24 | 
25 | void tataslock_register_this_thread(){
26 | }
27 | 
28 | void tataslock_write(TATASLock *lock, void * writeInfo) {
29 |     tataslock_write_read_lock(lock);
30 |     lock->writer(writeInfo, NULL);
31 |     tataslock_write_read_unlock(lock);
32 | }
33 | 
34 | void tataslock_write_read_lock(TATASLock *lock) {
35 |     bool currentlylocked;
36 |     while(true){
37 |         load_acq(currentlylocked, lock->lockWord.value);
38 |         while(currentlylocked){
39 |             load_acq(currentlylocked, lock->lockWord.value);
40 |         }
41 |         currentlylocked = __sync_lock_test_and_set(&lock->lockWord.value, true);
42 |         if(!currentlylocked){
43 |             //Was not locked before operation
44 |             return;
45 |         }
46 |         __sync_synchronize();//Pause instruction?
47 |     }
48 | }
49 | 
50 | void tataslock_write_read_unlock(TATASLock * lock) {
51 |     __sync_lock_release(&lock->lockWord.value);
52 | }
53 | 
54 | void tataslock_read_lock(TATASLock *lock) {
55 |     tataslock_write_read_lock(lock);
56 | }
57 | 
58 | void tataslock_read_unlock(TATASLock *lock) {
59 |     tataslock_write_read_unlock(lock);
60 | }
61 | 


--------------------------------------------------------------------------------
/src/benchmark/produce_graphs_template.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import matplotlib
 4 | matplotlib.use('Agg')
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | try:
 9 | 	Format = FORMAT
10 | except NameError:
11 | 	Format = 'pdf'
12 | 
13 | def read_dat_file(the_file):
14 |     with open(the_file, 'r') as f:
15 |         lines = f.readlines()
16 |         x = []
17 |         y = []
18 |         for line in lines:
19 |             p = line.split()
20 |             x.append(float(p[0]))
21 |             y.append(float(p[2])/float(p[1]))
22 |         return (x, y)
23 | 
24 | from itertools import cycle
25 | markers = None
26 | def set_up_figure(title):
27 |     markers = cycle(['o', 's', 'd', '^', 'v', '<', '>', 'D', 'h'])
28 |     plt.figure()
29 |     plt.autoscale(enable=True, tight=False)
30 |     plt.xlabel('Number of Threads')
31 |     plt.ylabel('Operations / Microsecond') 
32 |     plt.title(title)
33 | 
34 | 
35 | def plot_file(the_file, title):
36 |     (x_list, y_list) = read_dat_file(the_file)
37 |     mapped = [(a, [b for (comp_a, b) in zip(x_list, y_list) if a == comp_a]) for a in x_list]
38 |     mapped.sort()
39 |     x,y_vals = zip(*mapped)
40 |     y = map(lambda v : sum(v) / float(len(v)), y_vals)
41 |     emin = map(lambda (v, avg) : avg - min(v), zip(y_vals, y))
42 |     emax = map(lambda (v, avg) : max(v) - avg, zip(y_vals, y))
43 |     plt.errorbar(x, y, [emin, emax], label=title, linewidth=2, elinewidth=1, marker='o')
44 |     #plt.plot(x, y, label=title, linewidth=2)
45 | 
46 | 
47 | def complete_figure(save_file_name):
48 |     plt.axis(xmin=0)
49 |     plt.axis(ymin=0)
50 |     plt.tight_layout()
51 |     plt.legend(loc='best')
52 |     plt.savefig(save_file_name + '.' + Format, bbox_inches='tight', dpi=400)
53 |     print save_file_name + '.' + Format
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/src/benchmark/pairingheap/dxlocked_pairingheap.h:
--------------------------------------------------------------------------------
 1 | #include "pairingheap.h"
 2 | #include "utils/support_many_lock_types.h"
 3 | 
 4 | #ifndef DXLOCKED_PAIRINGHEAP_H
 5 | #define DXLOCKED_PAIRINGHEAP_H
 6 | 
 7 | typedef struct DXPriorityQueueImpl{
 8 |     char pad1[128];
 9 |     struct node* value;
10 |     char pad2[128 - (sizeof(struct node*))];
11 | } DXPriorityQueue;
12 | 
13 | DXPriorityQueue dx_pq_ph_datastructure __attribute__((aligned(64)));
14 | 
15 | LOCK_DATATYPE_NAME dx_pq_ph_lock __attribute__((aligned(64)));
16 | 
17 | 
18 | void dx_pq_ph_init(){
19 |     LOCK_INITIALIZE(&dx_pq_ph_lock, NULL);//Default write function not used
20 |     dx_pq_ph_datastructure.value = NULL;
21 | }
22 | 
23 | void dx_pq_ph_destroy(){
24 |     destroy_heap(dx_pq_ph_datastructure.value);
25 | }
26 | 
27 | void dx_pq_ph_enqueue_critical_section(void * enqueueValue, void ** notUsed){
28 |     dx_pq_ph_datastructure.value = 
29 |         insert(dx_pq_ph_datastructure.value, (int)(long)enqueueValue);    
30 | }
31 | 
32 | void dx_pq_ph_enqueue(int value){
33 |     LOCK_DELEGATE(&dx_pq_ph_lock, &dx_pq_ph_enqueue_critical_section, (void*)(long)value); 
34 | }
35 | 
36 | void dx_pq_ph_dequeue_critical_section(void * notUsed, void ** resultLocationPtr){
37 |     int * resultLocation = (int*)resultLocationPtr;
38 |     if(dx_pq_ph_datastructure.value != NULL){
39 |         *resultLocation = top(dx_pq_ph_datastructure.value);
40 |         dx_pq_ph_datastructure.value = pop(dx_pq_ph_datastructure.value);
41 |     }else{
42 |         *resultLocation = -1;
43 |     }
44 | }
45 | 
46 | int dx_pq_ph_dequeue(){
47 |    return (int)(long)LOCK_DELEGATE_RETURN_BLOCK(&dx_pq_ph_lock, 
48 |                                                 &dx_pq_ph_dequeue_critical_section, 
49 |                                                 NULL);
50 | }
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/datastructures/pairingheap/dxlocked_pairingheap.h:
--------------------------------------------------------------------------------
 1 | #include "pairingheap.h"
 2 | #include "support_many_lock_types.h"
 3 | 
 4 | #ifndef DXLOCKED_PAIRINGHEAP_H
 5 | #define DXLOCKED_PAIRINGHEAP_H
 6 | 
 7 | typedef struct DXPriorityQueueImpl{
 8 |     char pad1[128];
 9 |     struct node* value;
10 |     char pad2[128 - (sizeof(struct node*))];
11 | } DXPriorityQueue;
12 | 
13 | DXPriorityQueue dx_pq_ph_datastructure __attribute__((aligned(64)));
14 | 
15 | LOCK_DATATYPE_NAME dx_pq_ph_lock __attribute__((aligned(64)));
16 | 
17 | 
18 | void dx_pq_ph_init(){
19 |     LOCK_INITIALIZE(&dx_pq_ph_lock, NULL);//Default write function not used
20 |     dx_pq_ph_datastructure.value = NULL;
21 | }
22 | 
23 | void dx_pq_ph_destroy(){
24 |     destroy_heap(dx_pq_ph_datastructure.value);
25 | }
26 | 
27 | void dx_pq_ph_enqueue_critical_section(void * enqueueValue, void ** notUsed){
28 |     dx_pq_ph_datastructure.value = 
29 |         insert(dx_pq_ph_datastructure.value, (int)(long)enqueueValue);    
30 | }
31 | 
32 | void dx_pq_ph_enqueue(int value){
33 |     LOCK_DELEGATE(&dx_pq_ph_lock, &dx_pq_ph_enqueue_critical_section, (void*)(long)value); 
34 | }
35 | 
36 | void dx_pq_ph_dequeue_critical_section(void * notUsed, void ** resultLocationPtr){
37 |     int * resultLocation = (int*)resultLocationPtr;
38 |     if(dx_pq_ph_datastructure.value != NULL){
39 |         *resultLocation = top(dx_pq_ph_datastructure.value);
40 |         dx_pq_ph_datastructure.value = pop(dx_pq_ph_datastructure.value);
41 |     }else{
42 |         *resultLocation = -1;
43 |     }
44 | }
45 | 
46 | int dx_pq_ph_dequeue(){
47 |    return (int)(long)LOCK_DELEGATE_RETURN_BLOCK(&dx_pq_ph_lock, 
48 |                                                 &dx_pq_ph_dequeue_critical_section, 
49 |                                                 NULL);
50 | }
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/gdb_skiplist_print.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import gdb
 4 | 
 5 | class SkiplistPrintCommand(gdb.Command):
 6 |     """Iterate and print a list.
 7 | 
 8 | skip <EXPR> [MAX]
 9 | 
10 | Given a list EXPR, iterate though the list nodes' ->next pointers, printing
11 | each node iterated. We will iterate thorugh MAX list nodes, to prevent
12 | infinite loops with corrupt lists. If MAX is zero, we will iterate the
13 | entire list.
14 | 
15 | List nodes types are expected to have a member named "next". List types
16 | may be the same as node types, or a separate type with an explicit
17 | head node, called "head"."""
18 | 
19 |     MAX_ITER = 10
20 | 
21 |     def __init__(self):
22 |         super(SkiplistPrintCommand, self).__init__("skiplist-print", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL)
23 | 
24 |     def invoke(self, _args, from_tty):
25 |         args = gdb.string_to_argv(_args)
26 |         start_node = args[0]
27 | 
28 |         if len(args) > 1:
29 |             max_iter = int(args[1])
30 |         else:
31 |             max_iter = self.MAX_ITER
32 | 
33 |         if len(args) > 2:
34 |             lvl = int(args[2])
35 |         else:
36 |             lvl = 0
37 |         
38 |         p_node_t = gdb.lookup_type('node_t').pointer()
39 |         long_t = gdb.lookup_type('long')
40 |         node = gdb.parse_and_eval(start_node)
41 |         print node
42 | 
43 |         for i in xrange(max_iter):
44 |             nexts = node['next']
45 |             nxt = gdb.Value(nexts[lvl]).cast(long_t)
46 |             nxt = nxt & ~1
47 |             node = gdb.Value(nxt).cast(p_node_t).dereference()
48 |             nexts = node['next']
49 |             print node['k'], node['level'], node['inserting'],
50 |             k = 0
51 |             while k < node['level']:
52 |                 print(nexts[k]),
53 |                 k+=1
54 |             print("")
55 | 
56 | SkiplistPrintCommand()
57 | 


--------------------------------------------------------------------------------
/src/lock/aticket_lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | #include "aticket_lock.h"
 6 | 
 7 |  
 8 | ATicketLock * aticketlock_create(void (*writer)(void *, void **)){
 9 |     ATicketLock * lock = malloc(sizeof(ATicketLock));
10 |     aticketlock_initialize(lock, writer);
11 |     return lock;
12 | }
13 | 
14 | void aticketlock_initialize(ATicketLock * lock, void (*writer)(void *, void **)){
15 |     lock->writer = writer;
16 |     lock->inCounter.value = 0;
17 |     lock->outCounter.value = 0;
18 |     for(int i = 0; i < ARRAY_SIZE; i++){
19 |         lock->spinAreas[i].value = 0;
20 |     }
21 |     __sync_synchronize();
22 | }
23 | 
24 | void aticketlock_free(ATicketLock * lock){
25 |     free(lock);
26 | }
27 | 
28 | 
29 | void aticketlock_register_this_thread(){
30 | }
31 | 
32 | void aticketlock_write(ATicketLock *lock, void * writeInfo) {
33 |     aticketlock_write_read_lock(lock);
34 |     lock->writer(writeInfo, NULL);
35 |     aticketlock_write_read_unlock(lock);
36 | }
37 | 
38 | void aticketlock_write_read_lock(ATicketLock *lock) {
39 |     int waitTicket;
40 |     int myTicket = __sync_fetch_and_add(&lock->inCounter.value, 1);
41 |     int spinPosition = myTicket % ARRAY_SIZE;
42 |     load_acq(waitTicket, lock->spinAreas[spinPosition].value);
43 |     while(waitTicket != myTicket){
44 |         __sync_synchronize();
45 |         load_acq(waitTicket, lock->spinAreas[spinPosition].value);
46 |     }
47 | }
48 | 
49 | void aticketlock_write_read_unlock(ATicketLock * lock) {
50 |     lock->outCounter.value = lock->outCounter.value + 1;
51 |     int nextPosition = lock->outCounter.value % ARRAY_SIZE;
52 |     store_rel(lock->spinAreas[nextPosition].value, lock->outCounter.value);
53 |     __sync_synchronize();//Push change
54 | }
55 | 
56 | void aticketlock_read_lock(ATicketLock *lock) {
57 |     aticketlock_write_read_lock(lock);
58 | }
59 | 
60 | void aticketlock_read_unlock(ATicketLock *lock) {
61 |     aticketlock_write_read_unlock(lock);
62 | }
63 | 


--------------------------------------------------------------------------------
/src/lock/mcs_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "utils/smp_utils.h"
 3 | #include "common_lock_constants.h"
 4 | 
 5 | #ifndef MCS_LOCK_H
 6 | #define MCS_LOCK_H
 7 | 
 8 | struct MCSNodeImpl;
 9 | 
10 | typedef union CacheLinePaddedMCSNodePtrImpl {
11 |     struct MCSNodeImpl * value;
12 |     char padding[64];
13 | } CacheLinePaddedMCSNodePtr;
14 | 
15 | typedef struct MCSNodeImpl {
16 |     char pad1[64];
17 |     CacheLinePaddedMCSNodePtr next;
18 |     CacheLinePaddedBool locked;
19 | } MCSNode;
20 | 
21 | typedef struct MCSLockImpl {
22 |     char pad1[64];
23 |     void (*writer)(void *, void **);
24 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
25 |     CacheLinePaddedMCSNodePtr endOfQueue;
26 | } MCSLock;
27 | 
28 | 
29 | 
30 | MCSLock * mcslock_create(void (*writer)(void *, void **));
31 | void mcslock_free(MCSLock * lock);
32 | void mcslock_initialize(MCSLock * lock, void (*writer)(void *, void **));
33 | void mcslock_register_this_thread();
34 | void mcslock_write(MCSLock *lock, void * writeInfo);
35 | bool mcslock_write_read_lock(MCSLock *lock);
36 | void mcslock_write_read_unlock(MCSLock * lock);
37 | void mcslock_read_lock(MCSLock *lock);
38 | void mcslock_read_unlock(MCSLock *lock);
39 | 
40 | static inline
41 | bool mcslock_is_locked(MCSLock *lock){
42 |     MCSNode * endOfQueue;
43 |     load_acq(endOfQueue, lock->endOfQueue.value);
44 |     return endOfQueue != NULL;
45 | }
46 | 
47 | extern __thread MCSNode myMCSNode __attribute__((aligned(64)));
48 | 
49 | static inline
50 | bool set_if_null_ptr(MCSNode ** pointerToOldValue, MCSNode * newValue){
51 |     return __sync_bool_compare_and_swap(pointerToOldValue, NULL, newValue);
52 | }
53 | 
54 | static inline
55 | bool mcslock_try_write_read_lock(MCSLock *lock) {
56 |     MCSNode * node = &myMCSNode;
57 |     if(ACCESS_ONCE(lock->endOfQueue.value) != NULL){
58 |         return false;
59 |     }else{
60 |         node->next.value = NULL;
61 |         return set_if_null_ptr(&lock->endOfQueue.value, node);
62 |     }
63 | }
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/src/lock/flat_comb_rdx_lock.h:
--------------------------------------------------------------------------------
 1 | #include "utils/smp_utils.h"
 2 | #include <stdbool.h>
 3 | #include "utils/support_many_non_zero_indicator_types.h"
 4 | 
 5 | #ifndef ALL_EQUAL_RDX_LOCK_H
 6 | #define ALL_EQUAL_RDX_LOCK_H
 7 | 
 8 | struct FCMCSNodeImpl;
 9 | 
10 | typedef union CacheLinePaddedFCMCSNodePtrImpl {
11 |     struct FCMCSNodeImpl * value;
12 |     char padding[64];
13 | } CacheLinePaddedFCMCSNodePtr;
14 | 
15 | typedef struct FCMCSNodeImpl {
16 |     char pad1[64];
17 |     CacheLinePaddedFCMCSNodePtr next;
18 |     CacheLinePaddedBool locked;
19 | } FCMCSNode;
20 | 
21 | struct FlatCombNodeImpl;
22 | 
23 | typedef union CacheLinePaddedFlatCombNodePtrImpl {
24 |     struct FlatCombNodeImpl * value;
25 |     char padding[64];
26 | } CacheLinePaddedFlatCombNodePtr;
27 | 
28 | typedef struct FlatCombNodeImpl {
29 |     char pad1[64];
30 |     struct FlatCombNodeImpl * next;
31 |     void * request;
32 |     unsigned long last_used;
33 |     char pad2[64 - (2 * sizeof(void *) + sizeof(unsigned long)) % 64];
34 |     CacheLinePaddedBool active;
35 |     char pad3[64];
36 | } FlatCombNode;
37 | 
38 | typedef struct FlatCombRDXLockImpl {
39 |     char pad1[64];
40 |     NZI_DATATYPE_NAME nonZeroIndicator;
41 |     CacheLinePaddedInt writeBarrier;
42 |     CacheLinePaddedFCMCSNodePtr endOfMCSQueue;
43 |     CacheLinePaddedFlatCombNodePtr combine_list;
44 |     void (*writer)(void *, void **);
45 |     unsigned long combine_count;
46 | } FlatCombRDXLock;
47 | 
48 | FlatCombRDXLock * fcrdxlock_create(void (*writer)(void *, void **));
49 | void fcrdxlock_initialize(FlatCombRDXLock * lock, void (*writer)(void *, void **));
50 | void fcrdxlock_free(FlatCombRDXLock * lock);
51 | 
52 | void fcrdxlock_register_this_thread();
53 | 
54 | void fcrdxlock_write(FlatCombRDXLock *lock, void * writeInfo);
55 | 
56 | void fcrdxlock_write_read_lock(FlatCombRDXLock *lock);
57 | void fcrdxlock_write_read_unlock(FlatCombRDXLock * lock);
58 | 
59 | void fcrdxlock_read_lock(FlatCombRDXLock *lock);
60 | void fcrdxlock_read_unlock(FlatCombRDXLock *lock);
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/qd_library/locks/waitable_lock.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_waitable_lock_hpp
 2 | #define qd_waitable_lock_hpp qd_waitable_lock_hpp
 3 | 
 4 | #include<condition_variable>
 5 | 
 6 | /**
 7 |  * @brief lock class wrapper to add wait/notify functionality
 8 |  * @tparam Lock a locking class
 9 |  * @details This wrapper adds functionality to wait on each instance of a class
10 |  *          without requireing a lock to be taken/released. This is useful when
11 |  *          implementing another lock, so that spinning can be avoided.
12 |  * @warning  This implementation relies on std::condition_variable_any not actually needing a lock,
13 |  *       which violates its preconditions.
14 |  * @remarks This is likely not the most efficient way of implementing waiting.
15 |  * @todo The private inheritance is used to get a memory layout in which
16 |  *       clang++-3.4 spills less than if these structures appeared in the opposite
17 |  *       order. This "optimization" might not be the best solution.
18 |  */
19 | template<typename Lock>
20 | class waitable_lock : private std::condition_variable_any, public Lock {
21 | 	/**
22 | 	 * @brief a dummy lock class
23 | 	 * @warning This lock does not provide locking.
24 | 	 * @details This class is not intended for use as a lock, but as
25 | 	 *          std::condition_variable_any requires a lock class,
26 | 	 *          this provides it.
27 | 	 */
28 | 	struct null_lock {
29 | 		void lock() {}
30 | 		void unlock() {}
31 | 	};
32 | 
33 | 	/** @brief an associated dummy lock for the std::condition_variable_any */
34 | 	null_lock not_a_lock;
35 | 
36 | 	public:
37 | 
38 | 		/** @brief wait until notified */
39 | 		void wait() {
40 | 			std::condition_variable_any::wait(not_a_lock);
41 | 		}
42 | 
43 | 		/** @brief notify (at least) one waiting thread */
44 | 		void notify_one() {
45 | 			std::condition_variable_any::notify_one();
46 | 		}
47 | 
48 | 		/** @brief notify all waiting threads */
49 | 		void notify_all() {
50 | 			std::condition_variable_any::notify_all();
51 | 		}
52 | };
53 | 
54 | #endif /* qd_waitable_lock_hpp */
55 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue.cpp.static:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using locktype = qdlock;
 4 | 
 5 | extern "C" {
 6 | #include "cpplock.h"
 7 | 
 8 | AgnosticDXLock* cpplock_new() {
 9 | 	AgnosticDXLock* x = (AgnosticDXLock*) std::malloc(sizeof(AgnosticDXLock) + sizeof(locktype)-1+1024);
10 | 	new (&x->lock) locktype;
11 | 	return x;
12 | }
13 | 
14 | void cpplock_init(AgnosticDXLock* x) {
15 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
16 | 	new (l) locktype;
17 | }
18 | void cpplock_free(AgnosticDXLock* x) {
19 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
20 | 	l->~locktype();
21 | 	std::free(x);
22 | }
23 | 
24 | void delegate_wrapper(void (*fun)(int, int *), int d) {
25 | 	fun(d, nullptr);
26 | }
27 | void delegate_and_wait_wrapper(void (*fun)(int, int *), int d , int* r, std::atomic<bool>* f) {
28 | 	fun(d, r);
29 | 	f->store(true, std::memory_order_release);
30 | }
31 | void cpplock_delegate(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
32 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
33 | 	l->DELEGATE_N(delegate_wrapper, delgateFun, data);
34 | }
35 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
36 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
37 | 	int resp;
38 | 	std::atomic<bool> flag(false);
39 | 	l->DELEGATE_N(delegate_and_wait_wrapper, delgateFun, data, &resp, &flag);
40 | 	while(!flag.load(std::memory_order_acquire)) {
41 | 		qd::pause();
42 | 	}
43 | 	return resp;
44 | }
45 | void cpplock_lock(AgnosticDXLock* x) {
46 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
47 | 	l->lock();
48 | }
49 | void cpplock_unlock(AgnosticDXLock* x) {
50 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
51 | 	l->unlock();
52 | }
53 | //void cpplock_rlock(AgnosticDXLock* x) {
54 | //	locktype* l = reinterpret_cast<locktype*>(&x->lock);
55 | //	l->rlock();
56 | //}
57 | //void cpplock_runlock(AgnosticDXLock* x) {
58 | //	locktype* l = reinterpret_cast<locktype*>(&x->lock);
59 | //	l->runlock();
60 | //}
61 | 
62 | } // extern "C"
63 | 


--------------------------------------------------------------------------------
/src/benchmark/pairingheap/test_pairingheap.c:
--------------------------------------------------------------------------------
 1 | #include "pairingheap.h"
 2 | 
 3 | /* For verification purpose only. */
 4 | #include <assert.h> 
 5 | 
 6 | #define BIG_RAND()  (rand() % 10000) 
 7 | /* End of verification purpose only part */
 8 | 
 9 | void heap_sort(int* xs, int n){
10 |   int i;
11 |   struct node* h = NULL;
12 |   for(i=0; i<n; ++i)
13 |     h = insert(h, xs[i]);
14 |   for(i=0; i<n; ++i){
15 |     xs[i] = top(h);
16 |     h = pop(h);
17 |   }
18 |   destroy_heap(h);
19 | }
20 | 
21 | /* Testing */
22 | int sorted(const int* xs, int n){
23 |   int i;
24 |   for(i=0; i<n-1; ++i)
25 |     if(xs[i+1] < xs[i])
26 |       return 0;
27 |   return 1;
28 | }
29 | 
30 | int check_sum(const int* xs, int n){
31 |   int x = 0;
32 |   while(n>0)
33 |     x ^= xs[--n];
34 |   return x;
35 | }
36 | 
37 | void test_heap_sort(){
38 |   int m = 1000;
39 |   int i, n, c, *xs;
40 |   while(m--){
41 |     n = 1 + BIG_RAND();
42 |     xs = (int*)malloc(sizeof(int)*n);
43 |     for(i=0; i<n; ++i)
44 |       xs[i] = BIG_RAND();
45 |     c = check_sum(xs, n);
46 |     heap_sort(xs, n);
47 |     assert(sorted(xs, n));
48 |     assert(c == check_sum(xs, n));
49 |     free(xs);
50 |   }
51 | }
52 | 
53 | void test_decrease_key(){
54 |   int m = 100;
55 |   int i, n, c, *xs;
56 |   struct node* h;
57 |   struct node** ns;
58 |   while(m--){
59 |     h = NULL;
60 |     n = 1 + BIG_RAND();
61 |     xs = (int*)malloc(sizeof(int)*n);
62 |     ns = (struct node**)malloc(sizeof(struct node*)*n);
63 |     for(i=0; i<n; ++i){
64 |       xs[i] = BIG_RAND();
65 |       ns[i] = singleton(xs[i]);
66 |       h = insert_node(h, ns[i]);
67 |     }
68 |     for(i=0; i<n; ++i){
69 |       xs[i] -= BIG_RAND();
70 |       h = decrease_key(h, ns[i], xs[i]);
71 |     }
72 |     c = check_sum(xs, n);
73 |     for(i=0; i<n; ++i){
74 |       xs[i] = top(h);
75 |       h = pop(h);
76 |     }
77 |     assert(sorted(xs, n));
78 |     assert(c == check_sum(xs, n));
79 |     free(xs);
80 |     free(ns);
81 |     destroy_heap(h);
82 |   }
83 | }
84 | 
85 | int main(){
86 |   test_heap_sort();
87 |   test_decrease_key();
88 |   return 0;
89 | }
90 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/datastructures/pairingheap/test_pairingheap.c:
--------------------------------------------------------------------------------
 1 | #include "pairingheap.h"
 2 | 
 3 | /* For verification purpose only. */
 4 | #include <assert.h> 
 5 | 
 6 | #define BIG_RAND()  (rand() % 10000) 
 7 | /* End of verification purpose only part */
 8 | 
 9 | void heap_sort(int* xs, int n){
10 |   int i;
11 |   struct node* h = NULL;
12 |   for(i=0; i<n; ++i)
13 |     h = insert(h, xs[i]);
14 |   for(i=0; i<n; ++i){
15 |     xs[i] = top(h);
16 |     h = pop(h);
17 |   }
18 |   destroy_heap(h);
19 | }
20 | 
21 | /* Testing */
22 | int sorted(const int* xs, int n){
23 |   int i;
24 |   for(i=0; i<n-1; ++i)
25 |     if(xs[i+1] < xs[i])
26 |       return 0;
27 |   return 1;
28 | }
29 | 
30 | int check_sum(const int* xs, int n){
31 |   int x = 0;
32 |   while(n>0)
33 |     x ^= xs[--n];
34 |   return x;
35 | }
36 | 
37 | void test_heap_sort(){
38 |   int m = 1000;
39 |   int i, n, c, *xs;
40 |   while(m--){
41 |     n = 1 + BIG_RAND();
42 |     xs = (int*)malloc(sizeof(int)*n);
43 |     for(i=0; i<n; ++i)
44 |       xs[i] = BIG_RAND();
45 |     c = check_sum(xs, n);
46 |     heap_sort(xs, n);
47 |     assert(sorted(xs, n));
48 |     assert(c == check_sum(xs, n));
49 |     free(xs);
50 |   }
51 | }
52 | 
53 | void test_decrease_key(){
54 |   int m = 100;
55 |   int i, n, c, *xs;
56 |   struct node* h;
57 |   struct node** ns;
58 |   while(m--){
59 |     h = NULL;
60 |     n = 1 + BIG_RAND();
61 |     xs = (int*)malloc(sizeof(int)*n);
62 |     ns = (struct node**)malloc(sizeof(struct node*)*n);
63 |     for(i=0; i<n; ++i){
64 |       xs[i] = BIG_RAND();
65 |       ns[i] = singleton(xs[i]);
66 |       h = insert_node(h, ns[i]);
67 |     }
68 |     for(i=0; i<n; ++i){
69 |       xs[i] -= BIG_RAND();
70 |       h = decrease_key(h, ns[i], xs[i]);
71 |     }
72 |     c = check_sum(xs, n);
73 |     for(i=0; i<n; ++i){
74 |       xs[i] = top(h);
75 |       h = pop(h);
76 |     }
77 |     assert(sorted(xs, n));
78 |     assert(c == check_sum(xs, n));
79 |     free(xs);
80 |     free(ns);
81 |     destroy_heap(h);
82 |   }
83 | }
84 | 
85 | int main(){
86 |   test_heap_sort();
87 |   test_decrease_key();
88 |   return 0;
89 | }
90 | 


--------------------------------------------------------------------------------
/src/profile/profile_perf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | import datetime
 6 | import subprocess
 7 | import re
 8 | import sys
 9 | from os.path import join
10 | import shutil
11 | import multiprocessing
12 | 
13 | 
14 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
15 | 
16 | sys.argv.pop(0)
17 | 
18 | if len(sys.argv) == 0:
19 |     print """
20 | profile with perf script
21 | ========================
22 | 
23 | Usage:
24 | 
25 | ./bin_profile/profile_perf.py locksubfix [An optional list of rw_bench parameters. Default parameters will be used if not specified.]"""
26 |     exit(0)
27 | 
28 | lock_subfix = sys.argv.pop(0)
29 | 
30 | result = ''
31 | 
32 | print lock_subfix
33 | 
34 | print sys.argv
35 | 
36 | command = None
37 | 
38 | if len(sys.argv) == 0:
39 |     command = ['perf',
40 |                'record',
41 |                '-g',
42 |                join(bin_dir_path, 'rw_bench_clone_' + lock_subfix), 
43 |                str(multiprocessing.cpu_count()), 
44 |                '0.8',
45 |                '10',
46 |                '4',
47 |                '4',
48 |                '0']
49 | else:
50 |     command = ['perf',
51 |                'record',
52 |                '-g',
53 |                join(bin_dir_path, 'rw_bench_clone_' + lock_subfix)] + sys.argv
54 | 
55 | subprocess.call(command)
56 | 
57 | out_dir = 'perf_data'
58 | 
59 | if not os.path.exists(out_dir):
60 |     os.makedirs(out_dir)
61 | 
62 | the_date = datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")
63 | 
64 | out_file = join(out_dir, 'perf.data_' + the_date + result)
65 | 
66 | shutil.copy2('perf.data', out_file)
67 | 
68 | print ""
69 | 
70 | print "Info:"
71 | 
72 | print ""
73 | 
74 | print 'The command: ' + ' '.join(command)
75 | 
76 | print ""
77 | 
78 | print "The profile data can be found in the file: " + out_file
79 | 
80 | print ""
81 | 
82 | print "Analyze modules:  perf report --stdio -g none --sort comm,dso -i " + out_file
83 | 
84 | print ""
85 | 
86 | print "Analyze functions:  perf report --stdio -g none -i " + out_file + " | c++filt"
87 | 
88 | print ""
89 | 
90 | print "Analyze call graph:  perf report --stdio -g graph -i " + out_file + " | c++filt"
91 | 


--------------------------------------------------------------------------------
/src/lock/rcpp_lock.cpp:
--------------------------------------------------------------------------------
 1 | RCPPLock* rcpplock_new() {
 2 | 	RCPPLock* x = (RCPPLock*) std::malloc(sizeof(RCPPLock) + sizeof(locktype)-1+1024);
 3 | 	new (&x->lock) locktype;
 4 | 	return x;
 5 | }
 6 | 
 7 | void rcpplock_init(RCPPLock* x) {
 8 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
 9 | 	new (l) locktype;
10 | }
11 | void rcpplock_free(RCPPLock* x) {
12 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
13 | 	l->~locktype();
14 | 	std::free(x);
15 | }
16 | 	
17 | void rcpplock_delegate(RCPPLock* x, void (*delgateFun)(void*, void* *), void* data) {
18 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
19 | 	l->delegate_n([](void (*fun)(void*, void* *), void* d) {fun(d, nullptr);}, delgateFun, data);
20 | }
21 | #if 1
22 | void* rcpplock_delegate_and_wait(RCPPLock* x, void (*delgateFun)(void*, void* *), void* data) {
23 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
24 | 	void* resp;
25 | 	std::atomic<bool> flag(false);
26 | 	l->delegate_n([](void (*fun)(void*, void* *), void* d , void** r, std::atomic<bool>* f) { fun(d, r);  f->store(true, std::memory_order_release); }, delgateFun, data, &resp, &flag);
27 | 	while(!flag.load(std::memory_order_acquire)) {
28 | 		qd::pause();
29 | 	}
30 | 	return resp;
31 | }
32 | #endif
33 | #if 0
34 | int rcpplock_delegate_and_wait(RCPPLock* x, void (*delgateFun)(int, int *), int data) {
35 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
36 | 	static const int reserved = -999999;
37 | 	std::atomic<int> resp(reserved);
38 | 	l->delegate_n([](void (*fun)(int, int *), int d , std::atomic<int>* r) { int v = -1; fun(d, &v);  r->store(v, std::memory_order_release);}, delgateFun, data, &resp);
39 | 	while(resp.load(std::memory_order_acquire) == reserved) {
40 | 		qd::pause();
41 | 	}
42 | 	return resp;
43 | }
44 | #endif
45 | void rcpplock_lock(RCPPLock* x) {
46 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
47 | 	l->lock();
48 | }
49 | void rcpplock_unlock(RCPPLock* x) {
50 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
51 | 	l->unlock();
52 | }
53 | void rcpplock_rlock(RCPPLock* x) {
54 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
55 | 	l->rlock();
56 | }
57 | void rcpplock_runlock(RCPPLock* x) {
58 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
59 | 	l->runlock();
60 | }
61 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/gc/portable_defns.h:
--------------------------------------------------------------------------------
 1 | #ifndef __PORTABLE_DEFNS_H__
 2 | #define __PORTABLE_DEFNS_H__
 3 | 
 4 | #define MAX_THREADS 128 /* Nobody will ever have more! */
 5 | #define INTEL 1
 6 | #define CACHE_LINE_SIZE 64
 7 | #if defined(SPARC)
 8 | #include "sparc_defns.h"
 9 | #elif defined(INTEL)
10 | #include "intel_defns.h"
11 | #elif defined(PPC)
12 | #include "ppc_defns.h"
13 | #elif defined(IA64)
14 | #include "ia64_defns.h"
15 | #elif defined(MIPS)
16 | #include "mips_defns.h"
17 | #elif defined(ALPHA)
18 | #include "alpha_defns.h"
19 | #else
20 | #error "A valid architecture has not been defined"
21 | #endif
22 | 
23 | #include <string.h>
24 | 
25 | #ifndef MB_NEAR_CAS
26 | #define RMB_NEAR_CAS() RMB()
27 | #define WMB_NEAR_CAS() WMB()
28 | #define MB_NEAR_CAS()  MB()
29 | #endif
30 | 
31 | typedef unsigned long int_addr_t;
32 | 
33 | typedef int bool_t;
34 | #define FALSE 0
35 | #define TRUE  1
36 | 
37 | #define ADD_TO(_v,_x)                                                   \
38 | do {                                                                    \
39 |     int __val = (_v), __newval;                                         \
40 |     while ( (__newval = CASIO(&(_v),__val,__val+(_x))) != __val )       \
41 |         __val = __newval;                                               \
42 | } while ( 0 )
43 | 
44 | /*
45 |  * Allow us to efficiently align and pad structures so that shared fields
46 |  * don't cause contention on thread-local or read-only fields.
47 |  */
48 | #define CACHE_PAD(_n) char __pad ## _n [CACHE_LINE_SIZE]
49 | #define ALIGNED_ALLOC(_s)                                       \
50 |     ((void *)(((unsigned long)malloc((_s)+CACHE_LINE_SIZE*2) +  \
51 |         CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE-1)))
52 | 
53 | 
54 | /*
55 |  * POINTER MARKING
56 |  */
57 | #define get_marked_ref(_p)      ((void *)(((unsigned long)(_p)) | 1))
58 | #define get_unmarked_ref(_p)    ((void *)(((unsigned long)(_p)) & ~1))
59 | #define is_marked_ref(_p)       (((unsigned long)(_p)) & 1)
60 | 
61 | 
62 | 
63 | /* Read field @_f into variable @_x. */
64 | #define READ_FIELD(_x,_f) ((_x) = (_f))
65 | 
66 | #define WEAK_DEP_ORDER_RMB() ((void)0)
67 | #define WEAK_DEP_ORDER_WMB() ((void)0)
68 | #define WEAK_DEP_ORDER_MB()  ((void)0)
69 | 
70 | 
71 | 
72 | #endif /* __PORTABLE_DEFNS_H__ */
73 | 


--------------------------------------------------------------------------------
/src/lock/agnostic_rdx_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "datastructures/dr_multi_writers_queue.h"
 3 | #include "common_lock_constants.h"
 4 | #include "utils/support_many_non_zero_indicator_types.h"
 5 | #include "utils/support_many_lock_types.h"
 6 | 
 7 | #ifndef AGNOSTIC_RDX_LOCK_H
 8 | #define AGNOSTIC_RDX_LOCK_H
 9 | 
10 | #ifdef LOCK_TYPE_WPRW_MCSLock
11 | //***********************************
12 | //MCSLock
13 | //***********************************
14 | #include "mcs_lock.h"
15 | 
16 | #define LOCK_DATATYPE_NAME_WPRW MCSLock
17 | 
18 | #elif defined (LOCK_TYPE_WPRW_CohortLock)
19 | //***********************************
20 | //CohortLock
21 | //***********************************
22 | #include "cohort_lock.h"
23 | 
24 | #define LOCK_DATATYPE_NAME_WPRW CohortLock
25 | 
26 | #elif defined (LOCK_TYPE_WPRW_TATASLock)
27 | //***********************************
28 | //TATASLock
29 | //***********************************
30 | #include "tatas_lock.h"
31 | 
32 | #define LOCK_DATATYPE_NAME_WPRW TATASLock
33 | 
34 | #else
35 | 
36 | #define LOCK_DATATYPE_NAME_WPRW NoLockDatatypeSpecified
37 | 
38 | #endif
39 | 
40 | 
41 | typedef struct AgnosticRDXLockImpl {
42 |     DRMWQueue writeQueue;
43 |     char pad1[64];
44 |     void (*writer)(void *, void **);
45 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
46 |     char pad3[64];
47 |     CacheLinePaddedInt writeBarrier;
48 |     LOCK_DATATYPE_NAME_WPRW lock;
49 |     char pad4[64];
50 |     NZI_DATATYPE_NAME nonZeroIndicator;
51 | } AgnosticRDXLock;
52 | 
53 | 
54 | 
55 | AgnosticRDXLock * ardxlock_create(void (*writer)(void *, void **));
56 | void ardxlock_free(AgnosticRDXLock * lock);
57 | void ardxlock_initialize(AgnosticRDXLock * lock, void (*writer)(void *, void **));
58 | void ardxlock_register_this_thread();
59 | void ardxlock_write_with_response(AgnosticRDXLock *lock, void (*delgateFun)(void *, void **), void * data, void ** responseLocation);
60 | void ardxlock_delegate(AgnosticRDXLock *lock, void (*delgateFun)(void *, void**), void * data);
61 | void ardxlock_write(AgnosticRDXLock *lock, void * writeInfo);
62 | void ardxlock_write_read_lock(AgnosticRDXLock *lock);
63 | void ardxlock_write_read_unlock(AgnosticRDXLock * lock);
64 | void ardxlock_read_lock(AgnosticRDXLock *lock);
65 | void ardxlock_read_unlock(AgnosticRDXLock *lock);
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/src/new_rep/locks/locks.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOCKS_H
 2 | #define LOCKS_H
 3 | 
 4 | #include "locks/tatas_lock.h"
 5 | #include "locks/qd_lock.h"
 6 | 
 7 | typedef enum {TATAS_LOCK, QD_LOCK} LL_lock_type_name;
 8 | 
 9 | #define LL_initialize(X) _Generic((X),      \
10 |      TATASLock * : tatas_initialize((TATASLock *)X), \
11 |      QDLock * : qd_initialize((QDLock *)X)   \
12 |                                 )
13 | 
14 | void * LL_create(LL_lock_type_name llLockType){
15 |     if(TATAS_LOCK == llLockType){
16 |         TATASLock * l = aligned_alloc(CACHE_LINE_SIZE, sizeof(TATASLock));
17 |         LL_initialize(l);
18 |         return l;
19 |     } else if (QD_LOCK == llLockType){
20 |         QDLock * l = aligned_alloc(CACHE_LINE_SIZE, sizeof(QDLock));
21 |         LL_initialize(l);
22 |         return l;
23 |     }
24 |     return NULL;/* Should not be reachable */
25 | }
26 | 
27 | #define LL_free(X) _Generic((X),\
28 |      default : free(X) \
29 |      )
30 | 
31 | #define LL_lock(X) _Generic((X),         \
32 |      TATASLock *: tatas_lock((TATASLock *)X),     \
33 |      QDLock * : tatas_lock(&((QDLock *)X)->mutexLock)  \
34 |                               )
35 | 
36 | #define LL_unlock(X) _Generic((X),    \
37 |     TATASLock *: tatas_unlock((TATASLock *)X), \
38 |     QDLock * : tatas_unlock(&((QDLock *)X)->mutexLock) \
39 |     )
40 | 
41 | #define LL_is_locked(X) _Generic((X),    \
42 |     TATASLock *: tatas_is_locked((TATASLock *)X), \
43 |     QDLock * : tatas_is_locked(&((QDLock *)X)->mutexLock) \
44 |     )
45 | 
46 | #define LL_try_lock(X) _Generic((X),    \
47 |     TATASLock *: tatas_try_lock(X), \
48 |     QDLock * : tatas_try_lock(&((QDLock *)X)->mutexLock) \
49 |     )
50 | 
51 | void ________TATAS_DELEGATE(TATASLock* l,
52 |                             void (*funPtr)(unsigned int, void *), 
53 |                             unsigned int messageSize,
54 |                             void * messageAddress){
55 |     tatas_lock(l);
56 |     funPtr(messageSize, messageAddress);
57 |     tatas_unlock(l);
58 | }
59 | 
60 | #define LL_delegate(X, funPtr, messageSize, messageAddress) _Generic((X),      \
61 |     TATASLock *: ________TATAS_DELEGATE((TATASLock *)X, funPtr, messageSize, messageAddress), \
62 |     QDLock * : qd_delegate((QDLock *)X, funPtr, messageSize, messageAddress)  \
63 |     )
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/cpplock.cpp:
--------------------------------------------------------------------------------
 1 | AgnosticDXLock* cpplock_new() {
 2 | 	AgnosticDXLock* x = (AgnosticDXLock*) std::malloc(sizeof(AgnosticDXLock) + sizeof(locktype)-1+1024);
 3 | 	new (&x->lock) locktype;
 4 | 	return x;
 5 | }
 6 | 
 7 | void cpplock_init(AgnosticDXLock* x) {
 8 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
 9 | 	new (l) locktype;
10 | }
11 | void cpplock_free(AgnosticDXLock* x) {
12 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
13 | 	l->~locktype();
14 | 	std::free(x);
15 | }
16 | 	
17 | void cpplock_delegate(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
18 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
19 | 	l->delegate_n([](void (*fun)(int, int *), int d) {fun(d, nullptr);}, delgateFun, data);
20 | }
21 | #if 1
22 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
23 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
24 | 	int resp;
25 | 	std::atomic<bool> flag(false);
26 | 	l->delegate_n([](void (*fun)(int, int *), int d , int* r, std::atomic<bool>* f) { fun(d, r);  f->store(true, std::memory_order_release); }, delgateFun, data, &resp, &flag);
27 | 	while(!flag.load(std::memory_order_acquire)) {
28 | 		qd::pause();
29 | 	}
30 | 	return resp;
31 | }
32 | #endif
33 | #if 0
34 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
35 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
36 | 	static const int reserved = -999999;
37 | 	std::atomic<int> resp(reserved);
38 | 	l->delegate_n([](void (*fun)(int, int *), int d , std::atomic<int>* r) { int v = -1; fun(d, &v);  r->store(v, std::memory_order_release);}, delgateFun, data, &resp);
39 | 	while(resp.load(std::memory_order_acquire) == reserved) {
40 | 		qd::pause();
41 | 	}
42 | 	return resp;
43 | }
44 | #endif
45 | void cpplock_lock(AgnosticDXLock* x) {
46 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
47 | 	l->lock();
48 | }
49 | void cpplock_unlock(AgnosticDXLock* x) {
50 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
51 | 	l->unlock();
52 | }
53 | //void cpplock_rlock(AgnosticDXLock* x) {
54 | //	locktype* l = reinterpret_cast<locktype*>(&x->lock);
55 | //	l->rlock();
56 | //}
57 | //void cpplock_runlock(AgnosticDXLock* x) {
58 | //	locktype* l = reinterpret_cast<locktype*>(&x->lock);
59 | //	l->runlock();
60 | //}
61 | 


--------------------------------------------------------------------------------
/src/tests/test_multi_writers_queue.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "datastructures/multi_writers_queue.h"
 5 | #include "test_framework.h"
 6 | 
 7 | 
 8 | int test_create(){
 9 | 
10 |     MWQueue * test = mwqueue_create();
11 |     mwqueue_free(test);
12 |     return 1;
13 | 
14 | }
15 | 
16 | int test_offer(){
17 |     {
18 |         MWQueue * queue = mwqueue_create();
19 |         mwqueue_reset_fully_read(queue);
20 |         for(void * i = NULL; i < (void*)(MWQ_CAPACITY/2); i++){
21 |             mwqueue_offer(queue, i);
22 |         } 
23 | 
24 |         mwqueue_free(queue);
25 |     }
26 |     {
27 |         MWQueue * queue = mwqueue_create();
28 |         mwqueue_reset_fully_read(queue);
29 |         for(void * i = NULL; i < (void*)(MWQ_CAPACITY*2); i++){
30 |             mwqueue_offer(queue, i);
31 |         }
32 | 
33 |         mwqueue_free(queue);
34 |     }
35 |     return 1;
36 | }
37 | 
38 | 
39 | int test_offer_and_take(){
40 |     {
41 |         MWQueue * queue = mwqueue_create();
42 |         mwqueue_reset_fully_read(queue);
43 |         for(void * i = (void*)1; i <= (void*)(MWQ_CAPACITY/2); i++){
44 |             mwqueue_offer(queue, i);
45 |         } 
46 | 
47 |         for(int i = 1; i <= (MWQ_CAPACITY/2); i++){
48 |             assert(NULL != mwqueue_take(queue));
49 |         }
50 | 
51 |         assert(NULL == mwqueue_take(queue));
52 | 
53 |         mwqueue_free(queue);
54 |     }
55 |     {
56 |         MWQueue * queue = mwqueue_create();
57 |         mwqueue_reset_fully_read(queue);
58 |         for(void * i = (void*)1; i <= (void*)(MWQ_CAPACITY * 2); i++){
59 |             mwqueue_offer(queue, i);
60 |         }
61 | 
62 |         for(void * i = 0; i < TO_VP(MWQ_CAPACITY); i++){
63 |             assert(NULL != mwqueue_take(queue));
64 |         } 
65 | 
66 |         assert(NULL == mwqueue_take(queue));
67 | 
68 |         mwqueue_free(queue);
69 |     }
70 |     return 1;
71 | }
72 | 
73 | 
74 | int main(int argc, char **argv){
75 |     
76 |     printf("\n\n\n\033[32m ### STARTING MULTI WRITES QUEUE TESTS! -- \033[m\n\n\n");
77 | 
78 |     T(test_create(), "test_create()");
79 | 
80 |     T(test_offer(), "test_offer()");
81 | 
82 |     T(test_offer_and_take(), "test_offer_and_take()");
83 | 
84 |     printf("\n\n\n\033[32m ### MULTI WRITES QUEUE COMPLETED! -- \033[m\n\n\n");
85 | 
86 |     exit(0);
87 | 
88 | }
89 | 
90 | 


--------------------------------------------------------------------------------
/qd_library/queues/simple_locked_queue.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_simple_locked_queue_hpp
 2 | #define qd_simple_locked_queue_hpp qd_simple_locked_queue_hpp
 3 | 
 4 | #include<array>
 5 | #include<cassert>
 6 | #include<mutex>
 7 | #include<queue>
 8 | 
 9 | class simple_locked_queue {
10 | 	std::mutex lock;
11 | 	std::queue<std::array<char, 128>> queue;
12 | 	typedef std::lock_guard<std::mutex> scoped_guard;
13 | 	
14 | 	typedef void(*ftype)(char*);
15 | 	
16 | 	/* some constants */
17 | 	static const bool CLOSED = false;
18 | 	static const bool SUCCESS = true;
19 | 
20 | 	void forwardall(char*, long i) {
21 | 		assert(i <= 120);
22 | 		if(i > 120) throw "up";
23 | 	};
24 | 	template<typename P, typename... Ts>
25 | 	void forwardall(char* buffer, long offset, P&& p, Ts&&... ts) {
26 | 		assert(offset <= 120);
27 | 		auto ptr = reinterpret_cast<P*>(&buffer[offset]);
28 | 		new (ptr) P(std::forward<P>(p));
29 | 		forwardall(buffer, offset+sizeof(p), std::forward<Ts>(ts)...);
30 | 	}
31 | 	public:
32 | 		void open() {
33 | 			/* TODO this function should not even be here */
34 | 			/* no-op as this is an "infinite" queue that always accepts more data */
35 | 		}
36 | 		/**
37 | 		 * @brief enqueues an entry
38 | 		 * @tparam P return type of associated function
39 | 		 * @param op wrapper function for associated function
40 | 		 * @return SUCCESS on successful storing in queue, CLOSED otherwise
41 | 		 */
42 | 		template<typename... Ps>
43 | 		bool enqueue(ftype op, Ps*... ps) {
44 | 			std::array<char, 128> val;
45 | 			scoped_guard l(lock);
46 | 			queue.push(val);
47 | 			forwardall(queue.back().data(), 0, std::move(op), std::move(*ps)...);
48 | 			return SUCCESS;
49 | 		}
50 | 
51 | 		/** execute all stored operations */
52 | 		void flush() {
53 | 			scoped_guard l(lock);
54 | 			while(!queue.empty()) {
55 | 				auto operation = queue.front();
56 | 				char* ptr = operation.data();
57 | 				ftype* fun = reinterpret_cast<ftype*>(ptr);
58 | 				ptr += sizeof(ftype*);
59 | 				(*fun)(ptr);
60 | 				queue.pop();
61 | 			}
62 | 		}
63 | 		/** execute one stored operation */
64 | 		void flush_one() {
65 | 			scoped_guard l(lock);
66 | 			if(!queue.empty()) {
67 | 				char* ptr = queue.front().data();
68 | 				ftype* fun = reinterpret_cast<ftype*>(ptr);
69 | 				ptr += sizeof(ftype);
70 | 				(*fun)(ptr);
71 | 				queue.pop();
72 | 			}
73 | 		}
74 | };
75 | 
76 | #endif /* qd_simple_locked_queue_hpp */
77 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/glue.cpp:
--------------------------------------------------------------------------------
 1 | #include "qd.hpp"
 2 | 
 3 | using locktype = qdlock;
 4 | 
 5 | extern "C" {
 6 | #include "cpplock.h"
 7 | 
 8 | AgnosticDXLock* cpplock_new() {
 9 | 	AgnosticDXLock* x = (AgnosticDXLock*) std::malloc(sizeof(AgnosticDXLock) + sizeof(locktype)-1+1024);
10 | 	new (&x->lock) locktype;
11 | 	return x;
12 | }
13 | 
14 | void cpplock_init(AgnosticDXLock* x) {
15 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
16 | 	new (l) locktype;
17 | }
18 | void cpplock_free(AgnosticDXLock* x) {
19 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
20 | 	l->~locktype();
21 | 	std::free(x);
22 | }
23 | 	
24 | void cpplock_delegate(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
25 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
26 | 	l->delegate_n([](void (*fun)(int, int *), int d) {fun(d, nullptr);}, delgateFun, data);
27 | }
28 | #if 0
29 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
30 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
31 | 	int resp;
32 | 	std::atomic<bool> flag(false);
33 | 	l->delegate_n([](void (*fun)(int, int *), int d , int* r, std::atomic<bool>* f) { fun(d, r);  f->store(true, std::memory_order_release);}, delgateFun, data, &resp, &flag);
34 | 	while(!flag.load(std::memory_order_acquire)) {
35 | 		qd::pause();
36 | 	}
37 | 	return resp;
38 | }
39 | #endif
40 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
41 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
42 | 	static const int reserved = -999999;
43 | 	std::atomic<int> resp(reserved);
44 | 	l->delegate_n([](void (*fun)(int, int *), int d , std::atomic<int>* r) { int v; fun(d, &v);  r->store(v, std::memory_order_release);}, delgateFun, data, &resp);
45 | 	while(resp.load(std::memory_order_acquire) == reserved) {
46 | 		qd::pause();
47 | 	}
48 | 	return resp;
49 | }
50 | void cpplock_lock(AgnosticDXLock* x) {
51 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
52 | 	l->lock();
53 | }
54 | void cpplock_unlock(AgnosticDXLock* x) {
55 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
56 | 	l->unlock();
57 | }
58 | //void cpplock_rlock(AgnosticDXLock* x) {
59 | //	locktype* l = reinterpret_cast<locktype*>(&x->lock);
60 | //	l->rlock();
61 | //}
62 | //void cpplock_runlock(AgnosticDXLock* x) {
63 | //	locktype* l = reinterpret_cast<locktype*>(&x->lock);
64 | //	l->runlock();
65 | //}
66 | 
67 | } // extern "C"
68 | 


--------------------------------------------------------------------------------
/src/datastructures/numa_ingress_egress_nzi.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "utils/smp_utils.h"
 3 | #include "utils/numa_node_info_support.h"
 4 | 
 5 | #ifndef READER_GROUPS_NZI_H
 6 | #define READER_GROUPS_NZI_H
 7 | 
 8 | #define INGRESS_EGRESS_PADDING 32
 9 | 
10 | extern __thread CacheLinePaddedInt myIngressEgressArriveNumaNode __attribute__((aligned(64)));
11 | 
12 | typedef struct IngressEgressCounterImpl {
13 |     unsigned long ingress;
14 |     char pad1[INGRESS_EGRESS_PADDING - sizeof(unsigned long) % INGRESS_EGRESS_PADDING];
15 |     unsigned long egress;
16 |     char pad2[INGRESS_EGRESS_PADDING - sizeof(unsigned long) % INGRESS_EGRESS_PADDING];
17 |     char pad3[64];
18 | } IngressEgressCounter;
19 | 
20 | typedef struct NUMAIngressEgressNZIImpl {
21 |     IngressEgressCounter readerCounters[NUMBER_OF_NUMA_NODES];
22 | } NUMAIngressEgress;
23 | 
24 | static inline
25 | void nienzi_initialize(NUMAIngressEgress * nzi){
26 |     for(int i = 0; i < NUMBER_OF_NUMA_NODES; i++){
27 |         nzi->readerCounters[i].ingress = 0;
28 |         nzi->readerCounters[i].egress = 0;
29 |     }
30 |     __sync_synchronize();
31 | }
32 | 
33 | static inline
34 | void nienzi_arrive(NUMAIngressEgress * nzi){
35 |     int myNumaNode = numa_node_id();
36 |     myIngressEgressArriveNumaNode.value = myNumaNode;
37 |     __sync_fetch_and_add(&nzi->readerCounters[myNumaNode].ingress, 1);
38 | }
39 | 
40 | static inline
41 | void nienzi_depart(NUMAIngressEgress * nzi){
42 |     int myNumaNode = myIngressEgressArriveNumaNode.value;
43 |     __sync_fetch_and_add(&nzi->readerCounters[myNumaNode].egress, 1);
44 | }
45 | 
46 | 
47 | static inline
48 | bool nienzi_query(NUMAIngressEgress * nzi){
49 |     for(int i = 0; i < NUMBER_OF_NUMA_NODES; i++){
50 |         if(ACCESS_ONCE(nzi->readerCounters[i].ingress) !=
51 |            ACCESS_ONCE(nzi->readerCounters[i].egress)){
52 |             return false;
53 |         }
54 |     }
55 |     return true;
56 | }
57 | 
58 | static inline
59 | void nienzi_wait_unil_empty(NUMAIngressEgress * nzi){
60 |     int ingressCount;
61 |     int egressCount;
62 |     for(int i = 0; i < NUMBER_OF_NUMA_NODES; i++){
63 |         load_acq(ingressCount, nzi->readerCounters[i].ingress); 
64 |         load_acq(egressCount, nzi->readerCounters[i].egress);
65 |         while(ingressCount != egressCount){
66 |             __sync_synchronize();
67 |             load_acq(ingressCount, nzi->readerCounters[i].ingress); 
68 |             load_acq(egressCount, nzi->readerCounters[i].egress);
69 |         }
70 |     }
71 | }
72 | 
73 | #endif
74 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/cpplock_nodetach.cpp:
--------------------------------------------------------------------------------
 1 | AgnosticDXLock* cpplock_new() {
 2 | 	AgnosticDXLock* x = (AgnosticDXLock*) std::malloc(sizeof(AgnosticDXLock) + sizeof(locktype)-1+1024);
 3 | 	new (&x->lock) locktype;
 4 | 	return x;
 5 | }
 6 | 
 7 | void cpplock_init(AgnosticDXLock* x) {
 8 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
 9 | 	new (l) locktype;
10 | }
11 | void cpplock_free(AgnosticDXLock* x) {
12 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
13 | 	l->~locktype();
14 | 	std::free(x);
15 | }
16 | 	
17 | void cpplock_delegate(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
18 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
19 | 	std::atomic<bool> flag(false);
20 | 	l->delegate_n([](void (*fun)(int, int *), int d , std::atomic<bool>* f) { fun(d, nullptr);  f->store(true, std::memory_order_release); }, delgateFun, data, &flag);
21 | 	while(!flag.load(std::memory_order_acquire)) {
22 | 		qd::pause();
23 | 	}
24 | }
25 | #if 1
26 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
27 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
28 | 	int resp;
29 | 	std::atomic<bool> flag(false);
30 | 	l->delegate_n([](void (*fun)(int, int *), int d , int* r, std::atomic<bool>* f) { fun(d, r);  f->store(true, std::memory_order_release); }, delgateFun, data, &resp, &flag);
31 | 	while(!flag.load(std::memory_order_acquire)) {
32 | 		qd::pause();
33 | 	}
34 | 	return resp;
35 | }
36 | #endif
37 | #if 0
38 | int cpplock_delegate_and_wait(AgnosticDXLock* x, void (*delgateFun)(int, int *), int data) {
39 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
40 | 	static const int reserved = -999999;
41 | 	std::atomic<int> resp(reserved);
42 | 	l->delegate_n([](void (*fun)(int, int *), int d , std::atomic<int>* r) { int v = -1; fun(d, &v);  r->store(v, std::memory_order_release);}, delgateFun, data, &resp);
43 | 	while(resp.load(std::memory_order_acquire) == reserved) {
44 | 		qd::pause();
45 | 	}
46 | 	return resp;
47 | }
48 | #endif
49 | void cpplock_lock(AgnosticDXLock* x) {
50 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
51 | 	l->lock();
52 | }
53 | void cpplock_unlock(AgnosticDXLock* x) {
54 | 	locktype* l = reinterpret_cast<locktype*>(&x->lock);
55 | 	l->unlock();
56 | }
57 | //void cpplock_rlock(AgnosticDXLock* x) {
58 | //	locktype* l = reinterpret_cast<locktype*>(&x->lock);
59 | //	l->rlock();
60 | //}
61 | //void cpplock_runlock(AgnosticDXLock* x) {
62 | //	locktype* l = reinterpret_cast<locktype*>(&x->lock);
63 | //	l->runlock();
64 | //}
65 | 


--------------------------------------------------------------------------------
/src/lock/cohort_lock.h:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include "utils/smp_utils.h"
 3 | #include "common_lock_constants.h"
 4 | #include "ticket_lock.h"
 5 | #include "aticket_lock.h"
 6 | 
 7 | 
 8 | #ifndef COHORT_LOCK_H
 9 | #define COHORT_LOCK_H
10 | 
11 | #define MAXIMUM_NUMBER_OF_HAND_OVERS 64
12 | 
13 | typedef struct NodeLocalLockDataImpl {
14 |     char pad1[64];
15 |     TicketLock lock;
16 |     CacheLinePaddedInt numberOfHandOvers;
17 |     CacheLinePaddedBool needToTakeGlobalLock;
18 | } NodeLocalLockData;
19 | 
20 | typedef struct CohortLockImpl {
21 |     char pad1[64];
22 |     void (*writer)(void *, void **);
23 |     char pad2[64 - sizeof(void (*)(void*)) % 64];
24 |     ATicketLock globalLock;
25 |     NodeLocalLockData localLockData[NUMBER_OF_NUMA_NODES];
26 | } CohortLock;
27 | 
28 | 
29 | CohortLock * cohortlock_create(void (*writer)(void *, void **));
30 | void cohortlock_free(CohortLock * lock);
31 | void cohortlock_initialize(CohortLock * lock, void (*writer)(void *, void **));
32 | void cohortlock_register_this_thread();
33 | void cohortlock_write(CohortLock *lock, void * writeInfo);
34 | bool cohortlock_write_read_lock(CohortLock *lock);
35 | void cohortlock_write_read_unlock(CohortLock * lock);
36 | void cohortlock_read_lock(CohortLock *lock);
37 | void cohortlock_read_unlock(CohortLock *lock);
38 | 
39 | static inline
40 | bool cohortlock_is_locked(CohortLock *lock){
41 |     int inCounter;
42 |     int outCounter;
43 |     load_acq(inCounter, lock->globalLock.inCounter.value);
44 |     load_acq(outCounter, lock->globalLock.outCounter.value);
45 |     return (inCounter != outCounter);
46 | }
47 | 
48 | extern __thread CacheLinePaddedInt myLocalNode __attribute__((aligned(64)));
49 | 
50 | #ifdef PINNING
51 | extern __thread CacheLinePaddedInt numa_node;
52 | #endif
53 | 
54 | static inline
55 | bool cohortlock_is_local_locked(CohortLock *lock){
56 |     int inCounter;
57 |     int outCounter;
58 | #ifdef PINNING
59 |     NodeLocalLockData * localData = &lock->localLockData[numa_node.value];
60 | #else
61 |     NodeLocalLockData * localData = &lock->localLockData[myLocalNode.value];
62 | #endif 
63 |     load_acq(inCounter, localData->lock.inCounter.value);
64 |     load_acq(outCounter, localData->lock.outCounter.value);
65 |     return (inCounter != outCounter);
66 | }
67 | 
68 | static inline
69 | bool cohortlock_try_write_read_lock(CohortLock *lock) {
70 |     if(!cohortlock_is_locked(lock) && 
71 |        !cohortlock_is_local_locked(lock)){
72 |         cohortlock_write_read_lock(lock);
73 |         return true;
74 |     }else{
75 |         return false;
76 |     }
77 | }
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/src/lock/extract_numa_structure.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | from os import listdir
 5 | from os.path import join
 6 | from os import mkdir
 7 | from subprocess import Popen
 8 | from subprocess import PIPE
 9 | import re 
10 | import socket
11 | 
12 | def numa_structure():
13 |     lscpu_pipe = Popen("lscpu",stdout = PIPE).stdout
14 |     number_of_numa_nodes = 0
15 |     cpus_per_node = 0
16 |     not_ready = True
17 |     outputString = ""
18 |     coresNodeList = []
19 |     while not_ready:
20 |         line = lscpu_pipe.readline()
21 |         if line:
22 |             matchObject = re.search("NUMA node\d CPU\(s\):(.*)", line, re.M)
23 |             if matchObject:
24 |                 number_of_numa_nodes = number_of_numa_nodes + 1
25 |                 cpusString = matchObject.group(1).strip()
26 |                 rangeMatchObject = re.search("(\d+)-(\d+)", cpusString, re.M)
27 |                 coreListString = ""
28 |                 if rangeMatchObject:
29 |                     start = int(rangeMatchObject.group(1).strip())
30 |                     end = int(rangeMatchObject.group(2).strip())
31 |                     cpus_per_node = end + 1
32 |                     coreList = []
33 |                     for i in range(start, end+1):
34 |                         coreList.append(str(i))
35 |                     coreListString = ",".join(coreList)
36 |                 else:
37 |                     cpus_per_node = len(cpusString.split(","))
38 |                     coreListString = re.sub(r' ', "", cpusString)
39 |                 coresNodeList.append(coreListString)
40 |         else:
41 |             not_ready = False
42 | 
43 |     #Hack because of bug in cpuinfo for bulldozer
44 |     if socket.gethostname()=="bulldozer":
45 |         newCoresNodeList = [] 
46 |         for i in range(0, number_of_numa_nodes, 2):
47 |             newCoresNodeList.append(coresNodeList[i] + "," + coresNodeList[i+1])
48 |         coresNodeList = newCoresNodeList
49 |         number_of_numa_nodes = number_of_numa_nodes / 2
50 |         cpus_per_node = cpus_per_node * 2
51 |     lscpu_pipe.close()
52 |     coresNodeList = ["{" + x + "}" for x in coresNodeList]
53 |     numaStructure = "{" + ",".join(coresNodeList) + "}"
54 |     return(number_of_numa_nodes, cpus_per_node, numaStructure)
55 | 
56 | def numa_structure_defines():
57 |     number_of_numa_nodes, cpus_per_node, numaStructure = numa_structure()
58 |     return [('NUMBER_OF_NUMA_NODES', str(number_of_numa_nodes)),
59 |             ('NUMBER_OF_CPUS_PER_NODE', str(cpus_per_node)),
60 |             ('NUMA_STRUCTURE', numaStructure),
61 | 	    ('_GNU_SOURCE', '1')]
62 | 
63 | number_of_numa_nodes, cpus_per_node, numaStructure = numa_structure()
64 |     
65 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/cpplock.h:
--------------------------------------------------------------------------------
 1 | #ifndef CPPLOCK_H
 2 | #define CPPLOCK_H
 3 | 
 4 | typedef struct AgnosticDXLockImpl {
 5 |     void (*defaultWriter)(int, int *);
 6 |     char pad2[64 - (sizeof(void * (*)(void*)) % 64)];
 7 |     char lock[256*1024*1024];
 8 | } AgnosticDXLock;
 9 | 
10 | AgnosticDXLock* cpplock_new();
11 | void cpplock_free(AgnosticDXLock*);
12 | void cpplock_init(AgnosticDXLock*);
13 | void cpplock_delegate(AgnosticDXLock* lock, void (*delgateFun)(int, int *), int data);
14 | int cpplock_delegate_and_wait (AgnosticDXLock* lock, void (*delgateFun)(int, int *), int data);
15 | void cpplock_lock(AgnosticDXLock*);
16 | void cpplock_unlock(AgnosticDXLock*);
17 | void cpplock_rlock(AgnosticDXLock*);
18 | void cpplock_runlock(AgnosticDXLock*);
19 | 
20 | static void adxlock_initialize(AgnosticDXLock * lock, void (*defaultWriter)(int, int *));
21 | static inline AgnosticDXLock * adxlock_create(void (*writer)(int, int *)){
22 |     AgnosticDXLock * lock = cpplock_new();
23 |     return lock;
24 | }
25 | 
26 | static inline void adxlock_initialize(AgnosticDXLock * lock, void (*defaultWriter)(int, int *)){
27 |     //TODO check if the following typecast is fine
28 |     lock->defaultWriter = defaultWriter;
29 |     cpplock_init(lock);
30 |     __sync_synchronize();
31 | }
32 | 
33 | static inline void adxlock_free(AgnosticDXLock * lock){
34 |     cpplock_free(lock);
35 | }
36 | 
37 | static inline void adxlock_register_this_thread(){
38 | }
39 | 
40 | //int delegate_cpp(void (*delgateFun)(int, int *), int data, int* resp) {
41 | //	int response = cpplock_delegate_wrapper(delegateFun);
42 | 
43 | 
44 | 
45 | static inline
46 | int adxlock_write_with_response_block(AgnosticDXLock *lock, 
47 |                                       void (*delgateFun)(int, int *), 
48 |                                       int data){
49 |     return cpplock_delegate_and_wait(lock, delgateFun, data);
50 | }
51 | static inline
52 | void adxlock_delegate(AgnosticDXLock *lock, 
53 |                       void (*delgateFun)(int, int *), 
54 |                       int data) {
55 |     cpplock_delegate(lock, delgateFun, data);
56 | }
57 | 
58 | static inline
59 | void adxlock_write(AgnosticDXLock *lock, int writeInfo) {
60 |     adxlock_delegate(lock, lock->defaultWriter, writeInfo);
61 | }
62 | 
63 | static inline
64 | void adxlock_write_read_lock(AgnosticDXLock *lock) {
65 |     cpplock_lock(lock);
66 | }
67 | 
68 | static inline
69 | void adxlock_write_read_unlock(AgnosticDXLock * lock) {
70 |     cpplock_unlock(lock);
71 | }
72 | 
73 | //void adxlock_read_lock(AgnosticDXLock *lock) {
74 | //    cpplock_rlock(lock);
75 | //}
76 | 
77 | //void adxlock_read_unlock(AgnosticDXLock *lock) {
78 | //    cpplock_runlock(lock);
79 | //}
80 | 
81 | 
82 | 
83 | #endif
84 | 


--------------------------------------------------------------------------------
/src/benchmark/perf_magic:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | HUMAN_READABLE=
  4 | 
  5 | FILE=$@
  6 | if [ "$FILE" == "-" ] || [ "$FILE" == "" ]
  7 | then
  8 | 	index=0
  9 | 	while read line
 10 | 	do
 11 | 		input=$(echo $line | egrep " r....:u" | sed -e 's/,//g;s/r....:u//g;s/[[:space:]]\+/ /g')
 12 | 		if [ "$input" == "" ]
 13 | 		then
 14 | 			continue
 15 | 		fi
 16 | 		ARR[$index]=$input
 17 | 		index=$((index+1))
 18 | 	done
 19 | elif ! [ -f $FILE ]
 20 | then
 21 | 	echo "file '$FILE' not found."
 22 | 	exit
 23 | else
 24 | 	ARR=($(egrep " r....:u" $FILE | sed -e 'N;N;N;s/,//g;s/r....:u//g;s/[[:space:]]\+/ /g'))
 25 | fi
 26 | 
 27 | rate() {
 28 | 	if [ $HUMAN_READABLE ]
 29 | 	then
 30 | 		echo $(dc -e "2 k $1 100 * $2 / p")%
 31 | 	else
 32 | 		echo $(dc -e "10 k $1 $2 / p")
 33 | 	fi
 34 | }
 35 | 
 36 | amount() {
 37 | 	if [ $HUMAN_READABLE ]
 38 | 	then
 39 | 		if [ $1 -gt 1000000000 ]
 40 | 		then
 41 | 			echo $(dc -e "2 k $1 1000000000 / p") x10^9
 42 | 		elif [ $1 -gt 1000000 ]
 43 | 		then
 44 | 			echo $(dc -e "2 k $1 1000000 / p") x10^6
 45 | 		elif [ $1 -gt 1000 ]
 46 | 		then
 47 | 			echo $(dc -e "2 k $1 1000 / p") x10^3
 48 | 		else
 49 | 			echo $1
 50 | 		fi
 51 | 	else
 52 | 		echo $1
 53 | 	fi
 54 | }
 55 | 
 56 | L1=${ARR[0]}
 57 | L2=${ARR[1]}
 58 | L3=${ARR[2]}
 59 | ALL=${ARR[3]}
 60 | 
 61 | L1_MISS=$(($ALL - $L1))
 62 | L1_REFS=$ALL
 63 | L1_HIT_RATE=$(rate ${L1} ${L1_REFS})
 64 | L1_MISS_RATE=$(rate ${L1_MISS} ${L1_REFS})
 65 | 
 66 | L2_MISS=$(($ALL - $L2 - $L1))
 67 | L2_REFS=$(($ALL - $L1))
 68 | L2_HIT_RATE=$(rate ${L2} ${L2_REFS})
 69 | L2_MISS_RATE=$(rate ${L2_MISS} ${L2_REFS})
 70 | 
 71 | L3_MISS=$(($ALL - $L3 - $L2 - $L1))
 72 | L3_REFS=$(($ALL - $L2 - $L1))
 73 | L3_HIT_RATE=$(rate ${L3} ${L3_REFS})
 74 | L3_MISS_RATE=$(rate ${L3_MISS} ${L3_REFS})
 75 | 
 76 | if [ $HUMAN_READABLE ]
 77 | then
 78 | 	echo "memory loads: $(amount $ALL)"
 79 | 	echo
 80 | 	echo "L1 hits: $(amount $L1)"
 81 | 	echo "L1 misses: $(amount $L1_MISS)"
 82 | 	echo "L1 hit rate: ${L1_HIT_RATE}"
 83 | 	echo "L1 miss rate: ${L1_MISS_RATE}"
 84 | 	echo 
 85 | 	echo "L2 hits: $(amount $L2)"
 86 | 	echo "L2 misses: $(amount $L2_MISS)"
 87 | 	echo "L2 hit rate: ${L2_HIT_RATE}"
 88 | 	echo "L2 miss rate: ${L2_MISS_RATE}"
 89 | 	echo
 90 | 	echo "L3 hits: $(amount $L3)"
 91 | 	echo "L3 misses: $(amount $L3_MISS)"
 92 | 	echo "L3 hit rate: ${L3_HIT_RATE}"
 93 | 	echo "L3 miss rate: ${L3_MISS_RATE}"
 94 | 
 95 | else
 96 | 	echo -n " $(amount $ALL)"
 97 | 	echo -n " $(amount $L1)"
 98 | 	echo -n " $(amount $L1_MISS)"
 99 | 	echo -n " ${L1_HIT_RATE}"
100 | 	echo -n " ${L1_MISS_RATE}"
101 | 	echo -n " $(amount $L2)"
102 | 	echo -n " $(amount $L2_MISS)"
103 | 	echo -n " ${L2_HIT_RATE}"
104 | 	echo -n " ${L2_MISS_RATE}"
105 | 	echo -n " $(amount $L3)"
106 | 	echo -n " $(amount $L3_MISS)"
107 | 	echo -n " ${L3_HIT_RATE}"
108 | 	echo -n " ${L3_MISS_RATE}"
109 | 
110 | fi
111 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/common.h:
--------------------------------------------------------------------------------
  1 | #ifndef COMMON_H
  2 | #define COMMON_H
  3 | #define _GNU_SOURCE
  4 | #include <inttypes.h>
  5 | #include <unistd.h>
  6 | #include <assert.h>
  7 | #include <errno.h>
  8 | #include <stdio.h>
  9 | #include <stdint.h>
 10 | #include <string.h>
 11 | #include <pthread.h>
 12 | #include <gsl/gsl_rng.h>
 13 | 
 14 | #if defined(__linux__)
 15 | #include <time.h>
 16 | #include <sched.h>
 17 | #include <sys/syscall.h>
 18 | #endif
 19 | 
 20 | #if defined(__APPLE__)
 21 | #include <mach/mach_time.h>
 22 | #endif
 23 | 
 24 | 
 25 | 
 26 | #define DCL_ALIGN __attribute__((aligned (2*CACHE_LINE_SIZE)))
 27 | #define CACHELINE  __attribute__((aligned (1*CACHE_LINE_SIZE)))
 28 | 
 29 | #define ATPAGESIZE   __attribute__((aligned (PAGESIZE)))
 30 | 
 31 | #define SQR(x) (x)*(x)
 32 | 
 33 | #define max(a,b)		 \
 34 |     ({ __typeof__ (a) _a = (a);	 \
 35 |        __typeof__ (b) _b = (b);	 \
 36 |        _a > _b ? _a : _b; })
 37 | 
 38 | #define min(a,b)		 \
 39 |     ({ __typeof__ (a) _a = (a);	 \
 40 |        __typeof__ (b) _b = (b);	 \
 41 |        _a < _b ? _a : _b; })
 42 | 
 43 | 
 44 | typedef struct thread_args_s
 45 | {
 46 |     pthread_t thread;
 47 |     int id;
 48 |     gsl_rng *rng;
 49 |     int measure;
 50 |     int cycles;
 51 |     char pad[128];
 52 | } thread_args_t;
 53 | 
 54 | 
 55 | #define E(c)					\
 56 |     do {					\
 57 | 	int _c = (c);				\
 58 | 	if (_c < 0) {				\
 59 | 	    fprintf(stderr, "E: %s: %d: %s\n",	\
 60 | 		    __FILE__, __LINE__, #c);	\
 61 | 	}					\
 62 |     } while (0)
 63 | 
 64 | #define E_en(c)					\
 65 |     do {					\
 66 | 	int _c = (c);				\
 67 | 	if (_c != 0) {				\
 68 | 	    fprintf(stderr, strerror(_c));	\
 69 | 	}					\
 70 |     } while (0)
 71 | 
 72 | #define E_NULL(c)				\
 73 |     do {					\
 74 | 	if ((c) == NULL) {			\
 75 | 	    perror("E_NULL");			\
 76 | 	}					\
 77 |     } while (0)
 78 | 
 79 | 
 80 | #if defined(__x86_64__)
 81 | /* accurate time measurements on late recent cpus */
 82 | static inline uint64_t __attribute__((always_inline))
 83 | read_tsc_p()
 84 | {
 85 |    uint64_t tsc;
 86 |    __asm__ __volatile__ ("rdtscp\n"
 87 | 	 "shl $32, %%rdx\n"
 88 | 	 "or %%rdx, %%rax"
 89 | 	 : "=a"(tsc)
 90 | 	 :
 91 | 	 : "%rcx", "%rdx");
 92 |    return tsc;
 93 | }
 94 | 
 95 | #define CB()     __asm__ __volatile__("":::"memory")
 96 | #define IMB()    __asm__ __volatile__("mfence":::"memory")
 97 | #define IRMB()   __asm__ __volatile__("lfence":::"memory")
 98 | #define IWMB()   __asm__ __volatile__("sfence":::"memory")
 99 | 
100 | #else
101 | #error Unsupported architecture
102 | #endif // __x86_64__
103 | 
104 | 
105 | #if defined(__linux__)
106 | extern pid_t gettid(void);
107 | extern void  pina(pid_t t, int cpu);
108 | #endif
109 | 
110 | extern void gettime(struct timespec *t);
111 | extern struct timespec timediff(struct timespec, struct timespec);
112 | 
113 | 
114 | #endif
115 | 
116 | 


--------------------------------------------------------------------------------
/src/benchmark/perf_magic_simple:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | HUMAN_READABLE=
  4 | 
  5 | echo RUNNING
  6 | 
  7 | FILE=$@
  8 | if [ "$FILE" == "-" ] || [ "$FILE" == "" ]
  9 | then
 10 | 	index=0
 11 | 	while read line
 12 | 	do
 13 | 		echo line 
 14 | 		input=$(echo $line | egrep " r....:u" | sed -e 's/,//g;s/r....:u//g;s/[[:space:]]\+/ /g')
 15 | 		echo "HEJ"
 16 | 		echo $input
 17 | 		if [ "$input" == "" ]
 18 | 		then
 19 | 			continue
 20 | 		fi
 21 | 		ARR[$index]=$input
 22 | 		index=$((index+1))
 23 | 	done
 24 | elif ! [ -f $FILE ]
 25 | then
 26 | 	echo "file '$FILE' not found."
 27 | 	exit
 28 | else
 29 | 	ARR=($(egrep " r....:u" $FILE | sed -e 'N;N;N;s/,//g;s/r....:u//g;s/[[:space:]]\+/ /g'))
 30 | fi
 31 | 
 32 | rate() {
 33 | 	if [ $HUMAN_READABLE ]
 34 | 	then
 35 | 		echo $(dc -e "2 k $1 100 * $2 / p")%
 36 | 	else
 37 | 		echo $(dc -e "10 k $1 $2 / p")
 38 | 	fi
 39 | }
 40 | 
 41 | amount() {
 42 | 	if [ $HUMAN_READABLE ]
 43 | 	then
 44 | 		if [ $1 -gt 1000000000 ]
 45 | 		then
 46 | 			echo $(dc -e "2 k $1 1000000000 / p") x10^9
 47 | 		elif [ $1 -gt 1000000 ]
 48 | 		then
 49 | 			echo $(dc -e "2 k $1 1000000 / p") x10^6
 50 | 		elif [ $1 -gt 1000 ]
 51 | 		then
 52 | 			echo $(dc -e "2 k $1 1000 / p") x10^3
 53 | 		else
 54 | 			echo $1
 55 | 		fi
 56 | 	else
 57 | 		echo $1
 58 | 	fi
 59 | }
 60 | 
 61 | L1=${ARR[0]}
 62 | L2=${ARR[1]}
 63 | L3=${ARR[2]}
 64 | ALL=${ARR[3]}
 65 | 
 66 | L1_MISS=$(($ALL - $L1))
 67 | L1_REFS=$ALL
 68 | L1_HIT_RATE=$(rate ${L1} ${L1_REFS})
 69 | L1_MISS_RATE=$(rate ${L1_MISS} ${L1_REFS})
 70 | 
 71 | L2_MISS=$(($ALL - $L2 - $L1))
 72 | L2_REFS=$(($ALL - $L1))
 73 | L2_HIT_RATE=$(rate ${L2} ${L2_REFS})
 74 | L2_MISS_RATE=$(rate ${L2_MISS} ${L2_REFS})
 75 | 
 76 | L3_MISS=$(($ALL - $L3 - $L2 - $L1))
 77 | L3_REFS=$(($ALL - $L2 - $L1))
 78 | L3_HIT_RATE=$(rate ${L3} ${L3_REFS})
 79 | L3_MISS_RATE=$(rate ${L3_MISS} ${L3_REFS})
 80 | 
 81 | if [ $HUMAN_READABLE ]
 82 | then
 83 | 	echo "memory loads: $(amount $ALL)"
 84 | 	echo
 85 | 	echo "L1 hits: $(amount $L1)"
 86 | 	echo "L1 misses: $(amount $L1_MISS)"
 87 | 	echo "L1 hit rate: ${L1_HIT_RATE}"
 88 | 	echo "L1 miss rate: ${L1_MISS_RATE}"
 89 | 	echo 
 90 | 	echo "L2 hits: $(amount $L2)"
 91 | 	echo "L2 misses: $(amount $L2_MISS)"
 92 | 	echo "L2 hit rate: ${L2_HIT_RATE}"
 93 | 	echo "L2 miss rate: ${L2_MISS_RATE}"
 94 | 	echo
 95 | 	echo "L3 hits: $(amount $L3)"
 96 | 	echo "L3 misses: $(amount $L3_MISS)"
 97 | 	echo "L3 hit rate: ${L3_HIT_RATE}"
 98 | 	echo "L3 miss rate: ${L3_MISS_RATE}"
 99 | 
100 | else
101 | 	echo -n " $(amount $ALL)"
102 | 	echo -n " $(amount $L1)"
103 | 	echo -n " $(amount $L1_MISS)"
104 | 	echo -n " ${L1_HIT_RATE}"
105 | 	echo -n " ${L1_MISS_RATE}"
106 | 	echo -n " $(amount $L2)"
107 | 	echo -n " $(amount $L2_MISS)"
108 | 	echo -n " ${L2_HIT_RATE}"
109 | 	echo -n " ${L2_MISS_RATE}"
110 | 	echo -n " $(amount $L3)"
111 | 	echo -n " $(amount $L3_MISS)"
112 | 	echo -n " ${L3_HIT_RATE}"
113 | 	echo -n " ${L3_MISS_RATE}"
114 | 
115 | fi
116 | 


--------------------------------------------------------------------------------
/src/lock/mcs_lock.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdlib.h>
 3 | #include <pthread.h>
 4 | 
 5 | #include "mcs_lock.h"
 6 | 
 7 | __thread MCSNode myMCSNode __attribute__((aligned(64)));
 8 | 
 9 | static inline
10 | MCSNode * get_and_set_node_ptr(MCSNode ** pointerToOldValue, MCSNode * newValue){
11 |     MCSNode * x = ACCESS_ONCE(*pointerToOldValue);
12 |     while (true) {
13 |         if (__sync_bool_compare_and_swap(pointerToOldValue, x, newValue))
14 |             return x;
15 |         x = ACCESS_ONCE(*pointerToOldValue);
16 |     }
17 | }
18 |  
19 | MCSLock * mcslock_create(void (*writer)(void *, void **)){
20 |     MCSLock * lock = malloc(sizeof(MCSLock));
21 |     mcslock_initialize(lock, writer);
22 |     return lock;
23 | }
24 | 
25 | void mcslock_initialize(MCSLock * lock, void (*writer)(void *, void **)){
26 |     lock->writer = writer;
27 |     lock->endOfQueue.value = NULL;
28 |     __sync_synchronize();
29 | }
30 | 
31 | void mcslock_free(MCSLock * lock){
32 |     free(lock);
33 | }
34 | 
35 | void mcslock_register_this_thread(){
36 |     MCSNode * node = &myMCSNode;
37 |     node->locked.value = false;
38 |     node->next.value = NULL;
39 | }
40 | 
41 | void mcslock_write(MCSLock *lock, void * writeInfo) {
42 |     mcslock_write_read_lock(lock);
43 |     lock->writer(writeInfo, NULL);
44 |     mcslock_write_read_unlock(lock);
45 |     
46 | }
47 | 
48 | //Returns true if it is taken over from another writer and false otherwise
49 | bool mcslock_write_read_lock(MCSLock *lock) {
50 |     bool isNodeLocked;
51 |     MCSNode * node = &myMCSNode;
52 |     node->next.value = NULL;
53 |     MCSNode * predecessor = get_and_set_node_ptr(&lock->endOfQueue.value, node);
54 |     if (predecessor != NULL) {
55 |         store_rel(node->locked.value, true);
56 |         store_rel(predecessor->next.value, node);
57 |         load_acq(isNodeLocked, node->locked.value);
58 |         //Wait
59 |         while (isNodeLocked) {
60 |             __sync_synchronize();
61 |             load_acq(isNodeLocked, node->locked.value);
62 |         }
63 |         return true;
64 |     }else{
65 |         return false;
66 |     }
67 | }
68 | 
69 | void mcslock_write_read_unlock(MCSLock * lock) {
70 |     MCSNode * nextNode;
71 |     MCSNode * node = &myMCSNode;
72 |     load_acq(nextNode, node->next.value);
73 |     if (nextNode == NULL) {
74 |         if (__sync_bool_compare_and_swap(&lock->endOfQueue.value, node, NULL)){
75 |             return;
76 |         }
77 |         //wait
78 |         load_acq(nextNode, node->next.value);
79 |         while (nextNode == NULL) {
80 |              __sync_synchronize();
81 |             load_acq(nextNode, node->next.value);
82 |         }
83 |     }
84 |     store_rel(node->next.value->locked.value, false);
85 |     __sync_synchronize();//Push change
86 | }
87 | 
88 | void mcslock_read_lock(MCSLock *lock) {
89 |     mcslock_write_read_lock(lock);
90 | }
91 | 
92 | void mcslock_read_unlock(MCSLock *lock) {
93 |     mcslock_write_read_unlock(lock);
94 | }
95 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/gc/ptst.c:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * ptst.c
 3 |  * 
 4 |  * Per-thread state management. Essentially the state management parts
 5 |  * of MB's garbage-collection code have been pulled out and placed
 6 |  * here, for the use of other utility routines.
 7 |  * 
 8 |  * Copyright (c) 2013, Jonatan Linden
 9 |  * Copyright (c) 2002-2003, K A Fraser
10 |  * 
11 |  * All rights reserved.
12 |  * 
13 |  * Redistribution and use in source and binary forms, with or without
14 |  * modification, are permitted provided that the following conditions
15 |  * are met:
16 |  * 
17 |  *  * Redistributions of source code must retain the above copyright
18 |  *    notice, this list of conditions and the following disclaimer.
19 |  * 
20 |  *  * Redistributions in binary form must reproduce the above
21 |  *    copyright notice, this list of conditions and the following
22 |  *    disclaimer in the documentation and/or other materials provided
23 |  *    with the distribution.
24 |  * 
25 |  *  * The name of the author may not be used to endorse or promote
26 |  *    products derived from this software without specific prior
27 |  *    written permission.
28 |  * 
29 |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
30 |  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
31 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
33 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
35 |  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
37 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
38 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
39 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 |  */
41 | 
42 | #include <stdio.h>
43 | #include <stdlib.h>
44 | #include <string.h>
45 | #include "random.h"
46 | #include "portable_defns.h"
47 | #include "ptst.h"
48 | 
49 | ptst_t *ptst_list = NULL;
50 | extern __thread ptst_t *ptst;
51 | static unsigned int next_id = 0;
52 | 
53 | void
54 | critical_enter()
55 | {
56 |     ptst_t *next, *new_next;
57 | 
58 |     if ( ptst == NULL ) 
59 |     {
60 | 	ptst = (ptst_t *) ALIGNED_ALLOC(sizeof(ptst_t));
61 | 	if ( ptst == NULL ) exit(1);
62 | 	    
63 | 	memset(ptst, 0, sizeof(ptst_t));
64 | 	ptst->gc = gc_init();
65 | 	ptst->count = 1;
66 | 	ptst->id = __sync_fetch_and_add(&next_id, 1);
67 | 	rand_init(ptst);
68 | 	new_next = ptst_list;
69 | 	do {
70 | 	    ptst->next = next = new_next;
71 | 	} 
72 | 	while ( (new_next = __sync_val_compare_and_swap(&ptst_list, next, ptst)) != next );
73 |     }
74 |     
75 |     gc_enter(ptst);
76 |     return;
77 | }
78 | 
79 | 
80 | 
81 | static void ptst_destructor(ptst_t *ptst) 
82 | {
83 |     ptst->count = 0;
84 | }
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/src/lock/rhqd_lock.c:
--------------------------------------------------------------------------------
 1 | #define READ_PATIENCE_LIMIT 130000
 2 | #include "rhqd_lock.h"
 3 | 
 4 | QDLock * qdlock_create(void (*defaultWriter)(void *, void **)){
 5 |     QDLock * lock = (QDLock *)malloc(sizeof(QDLock));
 6 |     qdlock_initialize(lock, defaultWriter);
 7 |     return lock;
 8 | }
 9 | 
10 | void qdlock_initialize(QDLock * lock, void (*defaultWriter)(void *, void **)){
11 |     //TODO check if the following typecast is fine
12 |     lock->defaultWriter = defaultWriter;
13 |     tataslock_initialize(&lock->lock, defaultWriter);
14 |     drmvqueue_initialize(&lock->writeQueue);
15 |     __sync_synchronize();
16 | }
17 | 
18 | void qdlock_free(QDLock * lock){
19 |     free(lock);
20 | }
21 | 
22 | //*******
23 | //rhqdlock
24 | //*******
25 | 
26 | void rhqdlock_initialize(RHQDLock * lock, void (*defaultWriter)(void *, void **));
27 | RHQDLock * rhqdlock_create(void (*writer)(void *, void **)){
28 |     RHQDLock * lock = (RHQDLock *)malloc(sizeof(RHQDLock));
29 |     rhqdlock_initialize(lock, writer);
30 |     return lock;
31 | }
32 | 
33 | void rhqdlock_initialize(RHQDLock * lock, void (*defaultWriter)(void *, void **)){
34 |     for(int n = 0; n < NUMBER_OF_NUMA_NODES; n++){
35 |         qdlock_initialize(&lock->localLocks[n], defaultWriter);
36 |     }
37 |     mcslock_initialize(&lock->globalLock, defaultWriter);
38 |     NZI_INITIALIZE(&lock->nonZeroIndicator);
39 |     lock->writeBarrier.value = 0;
40 |     __sync_synchronize();
41 | }
42 | 
43 | void rhqdlock_free(RHQDLock * lock){
44 |     free(lock);
45 | }
46 | 
47 | void rhqdlock_register_this_thread(){
48 |     assign_id_to_thread();
49 |     mcslock_register_this_thread();
50 | }
51 | 
52 | 
53 | 
54 | void rhqdlock_write(RHQDLock *lock, void * writeInfo) {
55 |     rhqdlock_delegate(lock, lock->localLocks[0].defaultWriter, writeInfo);
56 | }
57 | 
58 | void rhqdlock_write_read_lock(RHQDLock *lock) {
59 |     waitUntilWriteBarrierOff(lock);
60 |     mcslock_write_read_lock(&lock->globalLock);
61 |     NZI_WAIT_UNIL_EMPTY(&lock->nonZeroIndicator);
62 | }
63 | 
64 | void rhqdlock_write_read_unlock(RHQDLock * lock) {
65 |     mcslock_write_read_unlock(&lock->globalLock);
66 | }
67 | 
68 | void rhqdlock_read_lock(RHQDLock *lock) {
69 |     bool bRaised = false; 
70 |     int readPatience = 0;
71 |  start:
72 |     NZI_ARRIVE(&lock->nonZeroIndicator);
73 |     if(mcslock_is_locked(&lock->globalLock)){
74 |         NZI_DEPART(&lock->nonZeroIndicator);
75 |         while(mcslock_is_locked(&lock->globalLock)){
76 |             __sync_synchronize();//Pause (pause instruction might be better)
77 |             if((readPatience == READ_PATIENCE_LIMIT) && !bRaised){
78 |                 __sync_fetch_and_add(&lock->writeBarrier.value, 1);
79 |                 bRaised = true;
80 |             }
81 |             readPatience = readPatience + 1;
82 |         }
83 |         goto start;
84 |     }
85 |     if(bRaised){
86 |         __sync_fetch_and_sub(&lock->writeBarrier.value, 1);
87 |     }
88 | }
89 | 
90 | void rhqdlock_read_unlock(RHQDLock *lock) {
91 |     NZI_DEPART(&lock->nonZeroIndicator);
92 | }
93 | 


--------------------------------------------------------------------------------
/qd_library/qd_condition_variable.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef qd_condition_variable_hpp
 2 | #define qd_condition_variable_hpp qd_condition_variable_hpp
 3 | 
 4 | #include "util/pause.hpp"
 5 | #include "qdlock_base.hpp"
 6 | 
 7 | template<class MLock, class DQueue>
 8 | class qd_condition_variable_impl : private qdlock_base<MLock, DQueue> {
 9 | 	typedef qdlock_base<MLock, DQueue> base;
10 | 	public:
11 | 		qd_condition_variable_impl() {
12 | 			this->delegation_queue.open();
13 | 		}
14 | 		qd_condition_variable_impl(const qd_condition_variable_impl&) = delete;
15 | 		qd_condition_variable_impl& operator=(const qd_condition_variable_impl&) = delete;
16 | 
17 | 		/* TODO: these notify implementations / flush implementations run at risk of deadlocking
18 | 		 *       when the notifying thread becomes a helper and also needs to perform additional
19 | 		 *       synchronization steps.
20 | 		 */
21 | 		void notify_one() {
22 | 			this->mutex_lock.lock();
23 | 			this->delegation_queue.flush_one();
24 | 			this->mutex_lock.unlock();
25 | 		}
26 | 		void notify_all() {
27 | 			this->mutex_lock.lock();
28 | 			this->delegation_queue.flush();
29 | 			this->mutex_lock.unlock();
30 | 		}
31 | 		
32 | 		/* interface _p functions: User provides a promise, which is used explicitly by the delegated (void) function */
33 | 		template<typename Function, Function f, typename Lock, typename Promise, typename... Ps>
34 | 		auto wait_redelegate_p(Lock* l, Promise&& result, Ps&&... ps)
35 | 		-> void
36 | 		{
37 | 			wait_redelegate<Function, f, Lock, Promise, Ps...>(l, std::forward<Promise>(result), std::forward<Ps>(ps)...);
38 | 		}
39 | 		template<typename Function, typename Lock, typename Promise, typename... Ps>
40 | 		auto wait_redelegate_p(Function&& f, Lock* l, Promise&& result, Ps&&... ps)
41 | 		-> void
42 | 		{
43 | 			/* type of functor/function ptr stored in f, set template function pointer to NULL */
44 | 			wait_redelegate<std::nullptr_t, nullptr, Lock, Promise, Function, Ps...>(l, std::forward<Promise>(result), std::forward<Function>(f), std::forward<Ps>(ps)...);
45 | 		}
46 | 	private:
47 | 		
48 | 		
49 | 		template<typename Function, Function f, typename Lock, typename Promise, typename... Ps>
50 | 		auto wait_redelegate(Lock* l, Promise&& result, Ps&&... ps)
51 | 		-> void
52 | 		{
53 | 			while(true) {
54 | 				/* TODO enqueue a function that re-delegates the provided function with its parameter to Lock l TODO */
55 | 				std::nullptr_t no_promise;
56 | 				if(this->template enqueue<void (*)(Lock*, typename Promise::promise&&, Ps&&...), redelegate<Lock, Function, f, Promise&&, Ps&&...>>(&no_promise, &l, &result, (&ps)...)) {
57 | 					return;
58 | 				}
59 | 				qd::pause();
60 | 			}
61 | 		}
62 | 		template<typename Lock, typename Function, Function f, typename Promise, typename... Ps>
63 | 		static void redelegate(Lock* l, Promise&& p, Ps&&... ps) {
64 | 			using no_promise = typename base::no_promise::promise;
65 | 			l->template delegate<Function, f, no_promise, typename Lock::reader_indicator_t, Promise, Ps...>(nullptr, std::forward<Promise>(p), std::forward<Ps>(ps)...);
66 | 	}
67 | 
68 | 	/* TODO: wait_for and wait_until for time-based waiting */
69 | 
70 | };
71 | 
72 | #endif
73 | 


--------------------------------------------------------------------------------
/qd_library/padded.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef padded_hpp
  2 | #define padded_hpp
  3 | 
  4 | template<typename T, unsigned int PADSIZE, bool T_is_class>
  5 | class padded_base;
  6 | 
  7 | template<typename T, unsigned int PADSIZE>
  8 | class padded_base<T, PADSIZE, false> {
  9 | 	T value;
 10 | 	char padding[PADSIZE - (sizeof(T)%PADSIZE)];
 11 | 	typedef padded_base<T, PADSIZE, false> P_type;
 12 | 	typedef typename std::remove_pointer<T>::type T_dereferenced;
 13 | 	friend class padded_base<T*, PADSIZE, false>;
 14 | 	public:
 15 | 		padded_base() {} /* this is a basic type, it is NOT initialized to 0 */
 16 | 		padded_base(const T v) : value(v) {}
 17 | 		padded_base(const P_type& v) : value(v) {}
 18 | 		padded_base(padded_base<T_dereferenced, PADSIZE, true>* const v) : value(v) {}
 19 | 		padded_base(padded_base<T_dereferenced, PADSIZE, false>* const v) : value(&v->value) {}
 20 | 
 21 | 		operator T&() {
 22 | 			return value;
 23 | 		}
 24 | 		operator const T&() const {
 25 | 			return value;
 26 | 		}
 27 | 		P_type& operator=(P_type other) {
 28 | 			swap(*this, other);
 29 | 			return *this;
 30 | 		}
 31 | 		P_type& operator=(T other) {
 32 | 			using std::swap;
 33 | 			swap(value, other);
 34 | 			return *this;
 35 | 		}
 36 | 
 37 | 		bool operator==(const T other) {
 38 | 			return value == other;
 39 | 		}
 40 | 		bool operator!=(const T other) {
 41 | 			return !(*this == other);
 42 | 		}
 43 | 		T_dereferenced& operator*() {
 44 | 			return *value;
 45 | 		}
 46 | 		const T_dereferenced& operator*() const {
 47 | 			return *value;
 48 | 		}
 49 | 		T_dereferenced* operator->() {
 50 | 			return value;
 51 | 		}
 52 | 		const T_dereferenced* operator->() const {
 53 | 			return value;
 54 | 		}
 55 | 
 56 | 		T& get() {
 57 | 			return value;
 58 | 		}
 59 | 
 60 | 		const T& get() const {
 61 | 			return value;
 62 | 		}
 63 | 
 64 | 		friend void swap(P_type& first, P_type& second) {
 65 | 			using std::swap;
 66 | 			swap(first.value, second.value);
 67 | 		}
 68 | };
 69 | 
 70 | template<typename T, unsigned int PADSIZE>
 71 | class padded_base<T, PADSIZE, true> : public T {
 72 | 	char padding[PADSIZE - (sizeof(T)%PADSIZE)];
 73 | 	//typedef padded_base<T, PADSIZE, true> P_type;
 74 | 	//typedef typename std::remove_pointer<T>::type T_dereferenced;
 75 | 	//friend class padded_base<T*, PADSIZE, false>;
 76 | 	public:
 77 | 		using T::T;
 78 | 		using T::operator=;
 79 | //		T_dereferenced& operator*() {
 80 | //			return **this;
 81 | //		}
 82 | //		const T_dereferenced& operator*() const {
 83 | //			return *value;
 84 | //		}
 85 | //		T_dereferenced* operator->() {
 86 | //			return value;
 87 | //		}
 88 | //		const T_dereferenced* operator->() const {
 89 | //			return value;
 90 | //		}
 91 | //
 92 | 		T& get() {
 93 | 			return *this;
 94 | 		}
 95 | 
 96 | 		const T& get() const {
 97 | 			return *this;
 98 | 		}
 99 | };
100 | 
101 | template<typename T, int PADSIZE = 128>
102 | class padded : public padded_base<T, PADSIZE, std::is_class<T>::value> {
103 | 	typedef padded_base<T, PADSIZE, std::is_class<T>::value> base_type;
104 | 	public:
105 | 		using base_type::base_type;
106 | 		using base_type::operator=;
107 | };
108 | 
109 | #endif // padded_hpp
110 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/clh.h:
--------------------------------------------------------------------------------
  1 | #ifndef _CLH_H_
  2 | 
  3 | #define _CLH_H_
  4 | 
  5 | #include <stdlib.h>
  6 | #include <pthread.h>
  7 | #include "synch_algs_config.h"
  8 | #include "synch_algs_primitives.h"
  9 | 
 10 | #ifdef POSIX_LOCKS
 11 | 
 12 | typedef pthread_mutex_t CLHLockStruct;
 13 | 
 14 | 
 15 | static inline void clhLock(LockStruct *l, int pid) {
 16 |     pthread_mutex_lock(l);
 17 | }
 18 | 
 19 | static inline void clhUnlock(LockStruct *l, int pid) {
 20 |     pthread_mutex_unlock(l);
 21 | }
 22 | 
 23 | LockStruct *clhLockInit(void) {
 24 |     LockStruct *l, tmp = PTHREAD_MUTEX_INITIALIZER;
 25 |     int error;
 26 | 
 27 |     error = posix_memalign((void *)&l, CACHE_LINE_SIZE, sizeof(CLHLockStruct));
 28 |     *l = tmp;
 29 |     return l;
 30 | }
 31 | 
 32 | #else
 33 | typedef union CLHLockNode {
 34 |     bool locked;
 35 |     char align[CACHE_LINE_SIZE];
 36 | } CLHLockNode;
 37 | 
 38 | typedef struct CLHLockStruct {
 39 |     volatile CLHLockNode *Tail CACHE_ALIGN;
 40 |     char pad1[128];
 41 |     //    volatile CLHLockNode *MyNode[N_THREADS] CACHE_ALIGN;
 42 |     //    volatile CLHLockNode *MyPred[N_THREADS] CACHE_ALIGN;
 43 | } CLHLockStruct;
 44 | 
 45 | typedef struct CLHThreadLocalData {
 46 |     char pad1[128];
 47 |     volatile CLHLockNode *MyNode CACHE_ALIGN;
 48 |     volatile CLHLockNode *MyPred CACHE_ALIGN;
 49 |     char pad2[128 - 2*sizeof(CLHLockNode *)];
 50 | } CLHThreadLocalData;
 51 | 
 52 | __thread CLHThreadLocalData threadLocalData __attribute__((aligned(64)));
 53 | 
 54 | 
 55 | static inline void clhLock(CLHLockStruct *l, int pid) {
 56 |     threadLocalData.MyNode->locked = true;
 57 |     threadLocalData.MyPred = (CLHLockNode *)__SWAP(&l->Tail, (void *)threadLocalData.MyNode);
 58 |     while (threadLocalData.MyPred->locked == true) {
 59 | #if N_THREADS > USE_CPUS
 60 |         sched_yield();
 61 | #else
 62 |         ;
 63 | #endif
 64 |     }
 65 | }
 66 | 
 67 | static inline void clhUnlock(CLHLockStruct *l, int pid) {
 68 |     threadLocalData.MyNode->locked = false;
 69 |     threadLocalData.MyNode = threadLocalData.MyPred;
 70 | #ifdef sparc
 71 |     StoreFence();
 72 | #endif
 73 | }
 74 | 
 75 | void clhThreadLocalInit(){
 76 |     threadLocalData.MyNode = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockNode));
 77 |     threadLocalData.MyPred = null;
 78 | }
 79 | 
 80 | CLHLockStruct *clhLockInit(void) {
 81 |     CLHLockStruct *l;
 82 |     //    int j;
 83 | 
 84 |     l = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockStruct));
 85 |     l->Tail = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockNode));
 86 |     l->Tail->locked = false;
 87 | 
 88 |     //    for (j = 0; j < N_THREADS; j++) {
 89 |     //    l->MyNode[j] = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockNode));
 90 |     //    l->MyPred[j] = null;
 91 |     //}
 92 | 
 93 |     return l;
 94 | }
 95 | 
 96 | void clhLockInitExisting(CLHLockStruct * l) {
 97 |     l->Tail = getAlignedMemory(CACHE_LINE_SIZE, sizeof(CLHLockNode));
 98 |     l->Tail->locked = false;
 99 | }
100 | 
101 | #endif
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/src/datastructures/padded.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef padded_hpp
  2 | #define padded_hpp
  3 | 
  4 | template<typename T, unsigned int PADSIZE, bool T_is_class>
  5 | class padded_base;
  6 | 
  7 | template<typename T, unsigned int PADSIZE>
  8 | class padded_base<T, PADSIZE, false> {
  9 | 	T value;
 10 | 	char padding[PADSIZE - (sizeof(T)%PADSIZE)];
 11 | 	typedef padded_base<T, PADSIZE, false> P_type;
 12 | 	typedef typename std::remove_pointer<T>::type T_dereferenced;
 13 | 	friend class padded_base<T*, PADSIZE, false>;
 14 | 	public:
 15 | 		padded_base() {} /* this is a basic type, it is NOT initialized to 0 */
 16 | 		padded_base(const T v) : value(v) {}
 17 | 		padded_base(const P_type& v) : value(v) {}
 18 | 		padded_base(padded_base<T_dereferenced, PADSIZE, true>* const v) : value(v) {}
 19 | 		padded_base(padded_base<T_dereferenced, PADSIZE, false>* const v) : value(&v->value) {}
 20 | 
 21 | 		operator T&() {
 22 | 			return value;
 23 | 		}
 24 | 		operator const T&() const {
 25 | 			return value;
 26 | 		}
 27 | 		P_type& operator=(P_type other) {
 28 | 			swap(*this, other);
 29 | 			return *this;
 30 | 		}
 31 | 		P_type& operator=(T other) {
 32 | 			using std::swap;
 33 | 			swap(value, other);
 34 | 			return *this;
 35 | 		}
 36 | 
 37 | 		bool operator==(const T other) {
 38 | 			return value == other;
 39 | 		}
 40 | 		bool operator!=(const T other) {
 41 | 			return !(*this == other);
 42 | 		}
 43 | 		T_dereferenced& operator*() {
 44 | 			return *value;
 45 | 		}
 46 | 		const T_dereferenced& operator*() const {
 47 | 			return *value;
 48 | 		}
 49 | 		T_dereferenced* operator->() {
 50 | 			return value;
 51 | 		}
 52 | 		const T_dereferenced* operator->() const {
 53 | 			return value;
 54 | 		}
 55 | 
 56 | 		T& get() {
 57 | 			return value;
 58 | 		}
 59 | 
 60 | 		const T& get() const {
 61 | 			return value;
 62 | 		}
 63 | 
 64 | 		friend void swap(P_type& first, P_type& second) {
 65 | 			using std::swap;
 66 | 			swap(first.value, second.value);
 67 | 		}
 68 | };
 69 | 
 70 | template<typename T, unsigned int PADSIZE>
 71 | class padded_base<T, PADSIZE, true> : public T {
 72 | 	char padding[PADSIZE - (sizeof(T)%PADSIZE)];
 73 | 	//typedef padded_base<T, PADSIZE, true> P_type;
 74 | 	//typedef typename std::remove_pointer<T>::type T_dereferenced;
 75 | 	//friend class padded_base<T*, PADSIZE, false>;
 76 | 	public:
 77 | 		using T::T;
 78 | 		using T::operator=;
 79 | //		T_dereferenced& operator*() {
 80 | //			return **this;
 81 | //		}
 82 | //		const T_dereferenced& operator*() const {
 83 | //			return *value;
 84 | //		}
 85 | //		T_dereferenced* operator->() {
 86 | //			return value;
 87 | //		}
 88 | //		const T_dereferenced* operator->() const {
 89 | //			return value;
 90 | //		}
 91 | //
 92 | 		T& get() {
 93 | 			return *this;
 94 | 		}
 95 | 
 96 | 		const T& get() const {
 97 | 			return *this;
 98 | 		}
 99 | };
100 | 
101 | template<typename T, int PADSIZE = 128>
102 | class padded : public padded_base<T, PADSIZE, std::is_class<T>::value> {
103 | 	typedef padded_base<T, PADSIZE, std::is_class<T>::value> base_type;
104 | 	public:
105 | 		using base_type::base_type;
106 | 		using base_type::operator=;
107 | };
108 | 
109 | #endif // padded_hpp
110 | 


--------------------------------------------------------------------------------
/src/lock/rcpp_lock.h:
--------------------------------------------------------------------------------
 1 | #ifndef RCPPLOCK_H
 2 | #define RCPPLOCK_H
 3 | 
 4 | //RHQDLock
 5 | 
 6 | typedef struct RCPPLockImpl {
 7 |     void (*defaultWriter)(void*, void **);
 8 |     char pad2[64 - (sizeof(void * (*)(void*)) % 64)];
 9 |     char lock[256*1024*1024];
10 | } RCPPLock;
11 | 
12 | RCPPLock* rcpplock_new();
13 | void rcpplock_free(RCPPLock*);
14 | void rcpplock_init(RCPPLock*);
15 | void rcpplock_delegate(RCPPLock* lock, void (*delgateFun)(void *, void **), void * data);
16 | static void* rcpplock_delegate_and_wait (RCPPLock* lock, void (*delgateFun)(void *, void **), void * data);
17 | void rcpplock_lock(RCPPLock*);
18 | void rcpplock_unlock(RCPPLock*);
19 | void rcpplock_rlock(RCPPLock*);
20 | void rcpplock_runlock(RCPPLock*);
21 | 
22 | static inline void rcpplock_initialize(RCPPLock * lock, void (*defaultWriter)(void *, void **)) {
23 |     lock->defaultWriter = defaultWriter;
24 |     rcpplock_init(lock);
25 |     __sync_synchronize();
26 | }
27 | static inline RCPPLock * rcpplock_create(void (*writer)(void *, void **)) {
28 | 	(void)writer;
29 | 	RCPPLock* lock = rcpplock_new();
30 | 	return lock;
31 | }
32 | 
33 | static inline void rcpplock_register_this_thread() {}
34 | 
35 | //static void rcpplock_write_with_response(RCPPLock *rcpplock, 
36 | //                                  void (*delgateFun)(void *, void **), 
37 | //                                  void * data, 
38 | //                                  void ** responseLocation);
39 | static void * rcpplock_write_with_response_block(RCPPLock *lock, 
40 |                                           void (*delgateFun)(void *, void **), 
41 |                                           void * data);
42 | void rcpplock_delegate(RCPPLock *lock, 
43 |                        void (*delgateFun)(void *, void **), 
44 |                        void * data);
45 | static inline void rcpplock_write(RCPPLock *lock, void * writeInfo) {
46 | 	rcpplock_delegate(lock, lock->defaultWriter, writeInfo);
47 | }
48 | 
49 | static inline void rcpplock_write_read_lock(RCPPLock *lock) {
50 | 	rcpplock_lock(lock);
51 | }
52 | static inline void rcpplock_write_read_unlock(RCPPLock * lock) {
53 | 	rcpplock_unlock(lock);
54 | }
55 | static inline void rcpplock_read_lock(RCPPLock *lock) {
56 | 	rcpplock_rlock(lock);
57 | }
58 | static inline void rcpplock_read_unlock(RCPPLock *lock) {
59 | 	rcpplock_runlock(lock);
60 | }
61 | 
62 | 
63 | #if 0
64 | static inline
65 | void rcpplock_write_with_response(RHQDLock *rcpplock, 
66 |                                  void (*delgateFun)(void *, void **), 
67 |                                  void * data, 
68 |                                  void ** responseLocation){
69 | }
70 | static inline
71 | 
72 | void rcpplock_delegate(RHQDLock *lock, 
73 |                       void (*delgateFun)(void *, void **), 
74 |                       void * data) {
75 |     rcpplock_write_with_response(lock, delgateFun, data, NULL);
76 | #endif
77 | 
78 | static inline
79 | void * rcpplock_write_with_response_block(RCPPLock *lock, 
80 |                                       void (*delgateFun)(void *, void **), 
81 |                                       void * data){
82 | 	return rcpplock_delegate_and_wait(lock, delgateFun, data);
83 | }
84 | 
85 | 
86 | 
87 | 
88 | #endif
89 | 


--------------------------------------------------------------------------------
/src/utils/smp_utils.h:
--------------------------------------------------------------------------------
  1 | #ifndef SMP_UTILS_H
  2 | #define SMP_UTILS_H
  3 | 
  4 | #include <stdbool.h>
  5 | #include <stdio.h>
  6 | 
  7 | //Make sure compiler does not optimize away memory access
  8 | #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  9 | 
 10 | //Atomic get
 11 | #define GET(value_ptr)  __sync_fetch_and_add(value_ptr, 0)
 12 | 
 13 | //Compiller barrier
 14 | #define barrier() __asm__ __volatile__("": : :"memory")
 15 | 
 16 | //See the following URL for explanation of acquire and release semantics:
 17 | //http://preshing.com/20120913/acquire-and-release-semantics
 18 | 
 19 | //Load with acquire barrier
 20 | #if __x86_64__
 21 | #define load_acq(assign_to,load_from) \
 22 |     assign_to = ACCESS_ONCE(load_from)
 23 | #else
 24 | #define load_acq(assign_to,load_from)           \
 25 |     do {                                        \
 26 |         barrier();                              \
 27 |         assign_to = ACCESS_ONCE(load_from);     \
 28 |         __sync_synchronize();                   \
 29 |     } while(0)
 30 | #endif
 31 | 
 32 | 
 33 | //Store with release barrier
 34 | #if __x86_64__
 35 | #define store_rel(store_to,store_value) \
 36 |     do{                                 \
 37 |         barrier();                      \
 38 |         store_to = store_value;        \
 39 |         barrier();                      \
 40 |     }while(0);
 41 | #else
 42 | #define store_rel(store_to,store_value) \
 43 |     do{                                 \
 44 |         __sync_synchronize();           \
 45 |         store_to = store_value;        \
 46 |         barrier();                      \
 47 |     }while(0);
 48 | #endif
 49 | 
 50 | //Intel pause instruction
 51 | #if __x86_64__
 52 | #define pause_instruction() \
 53 |   __asm volatile ("pause")
 54 | #else
 55 | #define pause_instruction() \
 56 |   __sync_synchronize()
 57 | #endif
 58 | 
 59 | static inline
 60 | int get_and_set_int(int * pointerToOldValue, int newValue){
 61 |     int x = ACCESS_ONCE(*pointerToOldValue);
 62 |     while (true) {
 63 |         if (__sync_bool_compare_and_swap(pointerToOldValue, x, newValue))
 64 |             return x;
 65 |         x = ACCESS_ONCE(*pointerToOldValue);
 66 |     }
 67 | }
 68 | 
 69 | static inline
 70 | unsigned long get_and_set_ulong(unsigned long * pointerToOldValue, unsigned long newValue){
 71 |     unsigned long x = ACCESS_ONCE(*pointerToOldValue);
 72 |     while (true) {
 73 |         if (__sync_bool_compare_and_swap(pointerToOldValue, x, newValue))
 74 |             return x;
 75 |         x = ACCESS_ONCE(*pointerToOldValue);
 76 |     }
 77 | }
 78 | 
 79 | typedef union CacheLinePaddedBoolImpl {
 80 |     bool value;
 81 |     char padding[64];
 82 | } CacheLinePaddedBool;
 83 | 
 84 | typedef union CacheLinePaddedIntImpl {
 85 |     int value;
 86 |     char padding[128];
 87 | } CacheLinePaddedInt;
 88 | 
 89 | 
 90 | typedef union CacheLinePaddedULongImpl {
 91 |     unsigned long value;
 92 |     char padding[128];
 93 | } CacheLinePaddedULong;
 94 | 
 95 | typedef union CacheLinePaddedDoubleImpl {
 96 |     double value;
 97 |     char padding[128];
 98 | } CacheLinePaddedDouble;
 99 | 
100 | typedef union CacheLinePaddedPointerImpl {
101 |     void * value;
102 |     char padding[64];
103 | } CacheLinePaddedPointer;
104 | 
105 | #endif
106 | 


--------------------------------------------------------------------------------
/src/lock/wprw_lock.c:
--------------------------------------------------------------------------------
  1 | #include <stdbool.h>
  2 | #include <stdlib.h>
  3 | #include <stdio.h>
  4 | #include <pthread.h>
  5 | #include <assert.h>
  6 | 
  7 | #include "wprw_lock.h"
  8 | #include "utils/support_many_lock_types.h"
  9 | #include "utils/smp_utils.h"
 10 | #include "utils/thread_identifier.h"
 11 | 
 12 | 
 13 | #define READ_PATIENCE_LIMIT 130000
 14 | 
 15 | static inline
 16 | bool isWriteLocked(WPRWLock * lock){
 17 | #ifdef LOCK_TYPE_MCSLock
 18 |     MCSNode * endOfQueue;
 19 |     load_acq(endOfQueue, lock->lock.endOfQueue.value);
 20 |     return endOfQueue != NULL;
 21 | #elif defined (LOCK_TYPE_CohortLock)
 22 |     int inCounter;
 23 |     int outCounter;
 24 |     load_acq(inCounter, lock->lock.globalLock.inCounter.value);
 25 |     load_acq(outCounter, lock->lock.globalLock.outCounter.value);
 26 |     return (inCounter != outCounter);
 27 | #else
 28 |     printf("WPRW LOCK: Unsuported mutal exclusion lock\n");
 29 |     assert(false);
 30 |     return false;
 31 | #endif
 32 | }
 33 |  
 34 | WPRWLock * wprwlock_create(void (*writer)(void *, void **)){
 35 |     WPRWLock * lock = malloc(sizeof(WPRWLock));
 36 |     wprwlock_initialize(lock, writer);
 37 |     return lock;
 38 | }
 39 | 
 40 | void wprwlock_initialize(WPRWLock * lock, void (*writer)(void *, void **)){
 41 |     LOCK_INITIALIZE(&lock->lock, writer);
 42 |     lock->writeBarrier.value = 0;
 43 |     NZI_INITIALIZE(&lock->nonZeroIndicator);
 44 |     __sync_synchronize();
 45 | }
 46 | 
 47 | void wprwlock_free(WPRWLock * lock){
 48 |     free(lock);
 49 | }
 50 | 
 51 | 
 52 | void wprwlock_register_this_thread(){
 53 |     LOCK_REGISTER_THIS_THREAD();
 54 |     assign_id_to_thread();
 55 | }
 56 | 
 57 | void wprwlock_write(WPRWLock *lock, void * writeInfo) {
 58 |     wprwlock_write_read_lock(lock);
 59 |     lock->lock.writer(writeInfo, NULL);
 60 |     wprwlock_write_read_unlock(lock);
 61 | }
 62 | 
 63 | void wprwlock_write_read_lock(WPRWLock *lock) {
 64 |     bool writeBarrierOn;
 65 |     load_acq(writeBarrierOn, lock->writeBarrier.value);
 66 |     while(writeBarrierOn){
 67 |         __sync_synchronize();
 68 |         load_acq(writeBarrierOn, lock->writeBarrier.value);
 69 |     }
 70 |     if(!LOCK_WRITE_READ_LOCK(&lock->lock)){
 71 |         NZI_WAIT_UNIL_EMPTY(&lock->nonZeroIndicator);
 72 |     }
 73 | }
 74 | 
 75 | void wprwlock_write_read_unlock(WPRWLock * lock) {
 76 |     LOCK_WRITE_READ_UNLOCK(&lock->lock);
 77 | }
 78 | 
 79 | void wprwlock_read_lock(WPRWLock *lock) {
 80 |     bool bRaised = false; 
 81 |     int readPatience = 0;
 82 |  start:
 83 |     NZI_ARRIVE(&lock->nonZeroIndicator);
 84 |     if(isWriteLocked(lock)){
 85 |         NZI_DEPART(&lock->nonZeroIndicator);
 86 |         while(isWriteLocked(lock)){
 87 |             __sync_synchronize();//Pause (pause instruction might be better)
 88 |             if((readPatience == READ_PATIENCE_LIMIT) && !bRaised){
 89 |                 __sync_fetch_and_add(&lock->writeBarrier.value, 1);
 90 |                 bRaised = true;
 91 |             }
 92 |             readPatience = readPatience + 1;
 93 |         }
 94 |         goto start;
 95 |     }
 96 |     if(bRaised){
 97 |         __sync_fetch_and_sub(&lock->writeBarrier.value, 1);
 98 |     }
 99 | }
100 | 
101 | void wprwlock_read_unlock(WPRWLock *lock) {
102 |     NZI_DEPART(&lock->nonZeroIndicator);
103 | }
104 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/synch_algs_config.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CONFIG_H_
 2 | 
 3 | #define _CONFIG_H_
 4 | 
 5 | // Definition: USE_CPUS
 6 | // --------------------
 7 | // Define the number of processing cores that your computation
 8 | // system offers or the maximum number of cores that you like to use.
 9 | #ifndef USE_CPUS
10 | #    define USE_CPUS               64
11 | #endif
12 | 
13 | // Definition: N_THREADS
14 | // ---------------------
15 | // Define the number of threads that you like to run experiments.
16 | // In case N_THREADS > USE_CPUS, two or more threads may run in
17 | // any processing core.
18 | #ifndef N_THREADS
19 | #    define N_THREADS              64
20 | #endif
21 | 
22 | // Definition: MAX_WORK
23 | // --------------------
24 | // Define the maximum local work that each thread executes 
25 | // between two calls of some simulated shared object's
26 | // operation. A zero value means no work between two calls.
27 | // The exact value depends on the speed of processing cores.
28 | // Try not to use big values (avoiding slow contention)
29 | // or not to use small values (avoiding long runs and
30 | // unrealistic cache misses ratios).
31 | #define MAX_WORK                   64
32 | 
33 | // definition: RUNS
34 | // ----------------
35 | // Define the total number of the calls of object's 
36 | // operations that will be executed.
37 | #define RUNS                       (10000000 / N_THREADS)
38 | 
39 | // Definition: DEBUG
40 | // -----------------
41 | // Enable this definition, in case you would like some
42 | // parts of the code. Usually leads to performance loss.
43 | // This way of debugging is deprecated. It is better to
44 | // compile your code with debug option.
45 | // See Readme for more details.
46 | //#define DEBUG
47 | 
48 | // Definition OBJECT_SIZE
49 | // ----------------------
50 | // This definition is only used in lfobject.c, simopt.c
51 | // and luobject.c experiments. In any other case it is
52 | // ignored. Its default value is 1. It is used for simulating
53 | // of an atomic array of Fetch&Multiply objects with
54 | // OBJECT_SIZE elements. All elements are updated 
55 | // simultaneously.
56 | #ifndef OBJECT_SIZE
57 | #    define OBJECT_SIZE            1
58 | #endif
59 | 
60 | // Definition: DISABLE_BACKOFF
61 | // ---------------------------
62 | // By defining this, any backoff scheme in any algorithm
63 | // is disabled. Be careful, upper an lower bounds must 
64 | // also used as experiments' arguments, but they are ignored.
65 | //#define DISABLE_BACKOFF
66 | 
67 | 
68 | #define Object                     int32_t
69 | 
70 | // Definition: RetVal
71 | // ------------------
72 | // Define the type of the return value that simulated 
73 | // atomic objects must return. Be careful, this type
74 | // must be read/written atomically by target machine.
75 | // Usually, 32 or 64 bit (in some cases i.e. x86_64
76 | // types of 128 bits are supported). In case that you
77 | // need a larger type use indirection.
78 | #define RetVal                     int32_t
79 | 
80 | // Definition: ArgVal
81 | // ------------------
82 | // Define the type of the argument value of atomic objects.
83 | // All atomic objects have same argument types. In case
84 | // that you 'd like to use different argument values in each
85 | // atomic object, redefine it in object's source file.
86 | #define ArgVal                     int32_t
87 | 
88 | #endif
89 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/synch_algorithms/qdlock.c:
--------------------------------------------------------------------------------
 1 | #define MAX_NUM_OF_HELPED_OPS 4096
 2 | #include "qdlock.h"
 3 | 
 4 | 
 5 | AgnosticDXLock * adxlock_create(void (*writer)(int, int *)){
 6 |     AgnosticDXLock * lock = (AgnosticDXLock *)malloc(sizeof(AgnosticDXLock));
 7 |     adxlock_initialize(lock, writer);
 8 |     return lock;
 9 | }
10 | void adxlock_initialize(AgnosticDXLock * lock, void (*defaultWriter)(int, int *)){
11 |     //TODO check if the following typecast is fine
12 |     lock->defaultWriter = defaultWriter;
13 |     tataslock_initialize(&lock->lock, defaultWriter);
14 |     drmvqueue_initialize(&lock->writeQueue);
15 |     __sync_synchronize();
16 | }
17 | void adxlock_write_with_response(AgnosticDXLock *lock, 
18 |                                  void (*delgateFun)(int, int *), 
19 |                                  int data, 
20 |                                  int * responseLocation){
21 |     int counter = 0;
22 |     DelegateRequestEntry e;
23 |     e.request = delgateFun;
24 |     e.data = data;
25 |     e.responseLocation = responseLocation;
26 |     do{
27 |         if(!tataslock_is_locked(&lock->lock)){
28 |             if(tataslock_try_write_read_lock(&lock->lock)){
29 | #ifdef ACTIVATE_NO_CONTENTION_OPT
30 | 	        if(counter > 0){
31 | #endif
32 |                     drmvqueue_reset_fully_read(&lock->writeQueue);
33 |                     delgateFun(data, responseLocation);
34 |                     drmvqueue_flush(&lock->writeQueue);
35 |                     tataslock_write_read_unlock(&lock->lock);
36 |                     return;
37 | #ifdef ACTIVATE_NO_CONTENTION_OPT
38 | 	        }else{
39 |                     delgateFun(data, responseLocation);
40 | 		    tataslock_write_read_unlock(&lock->lock);
41 | 		    return;
42 |                 }
43 | #endif
44 |             }
45 |         }else{
46 |             while(tataslock_is_locked(&lock->lock)){
47 |                 if(drmvqueue_offer(&lock->writeQueue, e)){
48 |                     return;
49 |                 }else{
50 |                     __sync_synchronize();
51 |                     __sync_synchronize();
52 |                 }
53 |             }
54 |         }
55 |         if((counter & 7) == 0){
56 | #ifdef USE_YIELD
57 |             sched_yield();
58 | #endif
59 |         }
60 |         counter = counter + 1;
61 |         sched_yield();
62 |     }while(true);
63 | }
64 | 
65 | int adxlock_write_with_response_block(AgnosticDXLock *lock, 
66 |                                       void (*delgateFun)(int, int *), 
67 |                                       int data){
68 |     int counter = 0;
69 |     int returnValue = INT_MIN;
70 |     int currentValue;
71 |     adxlock_write_with_response(lock, delgateFun, data, &returnValue);
72 |     load_acq(currentValue, returnValue);
73 |     while(currentValue == INT_MIN){
74 |         if((counter & 7) == 0){
75 | #ifdef USE_YIELD
76 |             sched_yield();
77 | #endif
78 |         }else{
79 |             __sync_synchronize();
80 |         }
81 |         counter = counter + 1;
82 |         load_acq(currentValue, returnValue);
83 |     }
84 |     return currentValue;
85 | }
86 | void adxlock_delegate(AgnosticDXLock *lock, 
87 |                       void (*delgateFun)(int, int *), 
88 |                       int data) {
89 |     adxlock_write_with_response(lock, delgateFun, data, NULL);
90 | }
91 | void adxlock_write(AgnosticDXLock *lock, int writeInfo) {
92 |     adxlock_delegate(lock, lock->defaultWriter, writeInfo);
93 | }
94 | void adxlock_write_read_unlock(AgnosticDXLock * lock) {
95 |     drmvqueue_flush(&lock->writeQueue);
96 |     tataslock_write_read_unlock(&lock->lock);
97 | }
98 | 


--------------------------------------------------------------------------------
/src/benchmark/benchmark_lock.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | import os
  5 | import datetime
  6 | import subprocess
  7 | import re
  8 | 
  9 | import sys
 10 | 
 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 12 | 
 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock'))
 14 | 
 15 | from extract_numa_structure import numa_structure
 16 | 
 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure()
 18 | 
 19 | parameters = sys.argv
 20 | 
 21 | parameters.pop(0)
 22 | 
 23 | if len(parameters) < 11:
 24 |     print """Not enough parameters:
 25 | 
 26 | Look at bin/run_benchmarks_on_intel_i7.py and
 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of
 28 | parameters.
 29 | 
 30 | """
 31 |     sys.exit()
 32 | 
 33 | 
 34 | iterations = int(parameters.pop(0))
 35 | 
 36 | 
 37 | output_dir_base = parameters.pop(0)
 38 | 
 39 | if output_dir_base=='standard':
 40 |     output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S")) #subprocess.check_output(['git', 'rev-parse', 'HEAD'])[-11:-1] 
 41 | 
 42 | benchmark_names = parameters.pop(0).split(',')
 43 | 
 44 | lock_ids = parameters.pop(0).split(',')
 45 | 
 46 | pinning_settings = parameters.pop(0).split(',')
 47 | 
 48 | thread_counts = parameters.pop(0).split(",")
 49 | 
 50 | #* Percentage read
 51 | #* Number of seconds to benchmark
 52 | #* Iterations spent in write critical section
 53 | #* Iterations spent in read critical section
 54 | #* Iterations spent in non critical section
 55 | 
 56 | 
 57 | percentages_reads = parameters.pop(0).split(',')
 58 | 
 59 | run_times_seconds = parameters.pop(0).split(',')
 60 | 
 61 | iterations_wcs = parameters.pop(0).split(',')
 62 | 
 63 | iterations_rcs = parameters.pop(0).split(',')
 64 | 
 65 | iterations_ncs = parameters.pop(0).split(',')
 66 | 
 67 | for iteration in range(iterations):
 68 | 	print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n"
 69 | 	for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id)
 70 | 			     for benchmark_name in benchmark_names 
 71 | 			     for lock_id in lock_ids]:
 72 | 	    for settings in [[pr,rts,iw,ir,incs] 
 73 | 			     for pr in percentages_reads
 74 | 			     for rts in run_times_seconds
 75 | 			     for iw in iterations_wcs
 76 | 			     for ir in iterations_rcs
 77 | 			     for incs in iterations_ncs]:
 78 | 		for pinning in pinning_settings:
 79 | 		    output_file_dir_str = ('bench_results/' + 
 80 | 					   benchmark_id + '#' + output_dir_base + '#' + lock_id +  '/')
 81 | 		    if not os.path.exists(output_file_dir_str):
 82 | 			os.makedirs(output_file_dir_str)
 83 | 		    output_file_str = (output_file_dir_str +
 84 | 				       'b_' + pinning + '_' +  '_'.join(settings) + '.dat')
 85 | 		    with open(output_file_str, "a") as outfile:
 86 | 			print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n"
 87 | 			for thread_count in thread_counts:
 88 | 			    command = [bin_dir_path + '/' + benchmark_id, thread_count] + settings
 89 | 			    if pinning=='no':
 90 | 				process = subprocess.Popen(command, stdout=outfile)
 91 | 				process.wait()
 92 | 			    else:
 93 | 				max_node_id = (int(thread_count)-1) / num_of_cpus_per_node
 94 | 				nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])]
 95 | 				process = subprocess.Popen(nomactrl + command, stdout=outfile)
 96 | 				process.wait()
 97 | 			print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n"
 98 | 
 99 | 	print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n"
100 | 


--------------------------------------------------------------------------------
/src/benchmark/cache_benchmark_lock_simple.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import os
 5 | import datetime
 6 | import subprocess
 7 | import re
 8 | 
 9 | import sys
10 | 
11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
12 | 
13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock'))
14 | 
15 | from extract_numa_structure import numa_structure
16 | 
17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure()
18 | 
19 | parameters = sys.argv
20 | 
21 | parameters.pop(0)
22 | 
23 | if len(parameters) < 11:
24 |     print """Not enough parameters:
25 | 
26 | Look at bin/run_benchmarks_on_intel_i7.py and
27 | bin/run_benchmarks_on_sandy.py for examples and explanations of
28 | parameters.
29 | 
30 | """
31 |     sys.exit()
32 | 
33 | iterations = int(parameters.pop(0))
34 | 
35 | output_dir_base = parameters.pop(0)
36 | 
37 | if output_dir_base=='standard':
38 |     output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S"))
39 | 
40 | benchmark_names = parameters.pop(0).split(',')
41 | 
42 | lock_ids = parameters.pop(0).split(',')
43 | 
44 | pinning_settings = parameters.pop(0).split(',')
45 | 
46 | thread_counts = parameters.pop(0).split(",")
47 | 
48 | #* Percentage read
49 | #* Number of seconds to benchmark
50 | #* Iterations spent in write critical section
51 | #* Iterations spent in read critical section
52 | #* Iterations spent in non critical section
53 | 
54 | 
55 | percentages_reads = parameters.pop(0).split(',')
56 | 
57 | run_times_seconds = parameters.pop(0).split(',')
58 | 
59 | iterations_wcs = parameters.pop(0).split(',')
60 | 
61 | iterations_rcs = parameters.pop(0).split(',')
62 | 
63 | iterations_ncs = parameters.pop(0).split(',')
64 | 
65 | 
66 | for iteration in range(iterations):
67 | 	print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n"
68 | 	for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id)
69 | 			     for benchmark_name in benchmark_names 
70 | 			     for lock_id in lock_ids]:
71 | 	    for settings in [[pr,rts,iw,ir,incs] 
72 | 			     for pr in percentages_reads
73 | 			     for rts in run_times_seconds
74 | 			     for iw in iterations_wcs
75 | 			     for ir in iterations_rcs
76 | 			     for incs in iterations_ncs]:
77 | 		for pinning in pinning_settings:
78 | 		    output_file_dir_str = ('bench_results/' + 
79 | 					   benchmark_id + '#' + output_dir_base + '#' + lock_id +  '/')
80 | 		    if not os.path.exists(output_file_dir_str):
81 | 			os.makedirs(output_file_dir_str)
82 | 		    output_file_str = (output_file_dir_str +
83 | 				       'b_' + pinning + '_' +  '_'.join(settings) + '.dat')
84 | 		    with open(output_file_str, "a") as outfile:
85 | 			print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n"
86 | 			for thread_count in thread_counts:
87 | 			    realcmd = [bin_dir_path + '/' + benchmark_id, thread_count] + settings
88 | 			    perfcmd = ['perf', 'stat','-B', '-e', 'r01d1:u,r02d1:u,r04d1:u,r81d0:u']
89 | 			    command = perfcmd + realcmd
90 |                             (outString, outErr) = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
91 |                             print (bin_dir_path + '/' + 'perf_magic_simple')
92 |                             cachedata = subprocess.Popen(bin_dir_path + '/' + 'perf_magic_simple', stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(outErr)[0]
93 | 			    outfile.write(outString.rstrip('\n') + cachedata + '\n')
94 | 			print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n"
95 | 	
96 | 	print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n"
97 | 


--------------------------------------------------------------------------------
/src/benchmark/benchmark_lockXOpDist.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | import os
  5 | import datetime
  6 | import subprocess
  7 | import re
  8 | 
  9 | import sys
 10 | 
 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 12 | 
 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock'))
 14 | 
 15 | from extract_numa_structure import numa_structure
 16 | 
 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure()
 18 | 
 19 | parameters = sys.argv
 20 | 
 21 | parameters.pop(0)
 22 | 
 23 | if len(parameters) < 11:
 24 |     print """Not enough parameters:
 25 | 
 26 | Look at bin/run_benchmarks_on_intel_i7.py and
 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of
 28 | parameters.
 29 | 
 30 | """
 31 |     sys.exit()
 32 | 
 33 | iterations = int(parameters.pop(0))
 34 | 
 35 | output_dir_base = parameters.pop(0)
 36 | 
 37 | if output_dir_base=='standard':
 38 |     output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S"))
 39 | 
 40 | benchmark_names = parameters.pop(0).split(',')
 41 | 
 42 | lock_ids = parameters.pop(0).split(',')
 43 | 
 44 | pinning_settings = parameters.pop(0).split(',')
 45 | 
 46 | thread_counts = parameters.pop(0).split(",")
 47 | 
 48 | #* Percentage read
 49 | #* Number of seconds to benchmark
 50 | #* Iterations spent in write critical section
 51 | #* Iterations spent in read critical section
 52 | #* Iterations spent in non critical section
 53 | 
 54 | 
 55 | percentages_reads = parameters.pop(0).split(',')
 56 | 
 57 | run_times_seconds = parameters.pop(0).split(',')
 58 | 
 59 | iterations_wcs = parameters.pop(0).split(',')
 60 | 
 61 | iterations_rcs = parameters.pop(0).split(',')
 62 | 
 63 | iterations_ncs = parameters.pop(0).split(',')
 64 | 
 65 | 
 66 | for iteration in range(iterations):
 67 | 	print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n"
 68 | 	for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id)
 69 | 			     for benchmark_name in benchmark_names 
 70 | 			     for lock_id in lock_ids]:
 71 | 	    for settings in [[tc,rts,iw,ir,ncsw] 
 72 | 			     for tc in thread_counts
 73 | 			     for rts in run_times_seconds
 74 | 			     for iw in iterations_wcs
 75 | 			     for ir in iterations_rcs
 76 | 			     for ncsw in iterations_ncs]:
 77 | 		for pinning in pinning_settings:
 78 | 		    output_file_dir_str = ('bench_results/' + 
 79 | 					   benchmark_id + '#' + output_dir_base + '#' + lock_id +  '/')
 80 | 		    if not os.path.exists(output_file_dir_str):
 81 | 			os.makedirs(output_file_dir_str)
 82 | 		    output_file_str = (output_file_dir_str +
 83 | 				       'xodi_' + pinning + '_' + '_'.join(settings) + '.dat')
 84 | 		    with open(output_file_str, "a") as outfile:
 85 | 			print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n"
 86 | 			for pr in percentages_reads:
 87 | 			    [tc,rts,iw,ir,ncsw] = settings
 88 | 			    command = [bin_dir_path + '/' + benchmark_id,tc,pr,rts,iw,ir,ncsw]
 89 | 			    print command
 90 | 			    if pinning=='no':
 91 | 				outString = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0]
 92 | 				outfile.write(str(pr) + " " + ' '.join(outString.split(" ")[1:]))
 93 | 			    else:
 94 | 				max_node_id = (int(tc)-1) / num_of_cpus_per_node
 95 | 				nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])]
 96 | 				outString = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE).communicate()[0]
 97 | 				outfile.write(str(pr) + " " + ' '.join(outString.split(" ")[1:]))
 98 | 			print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n"
 99 | 	
100 | 	print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n"
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/src/benchmark/benchmark_lock_XNonCW.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | import os
  5 | import datetime
  6 | import subprocess
  7 | import re
  8 | 
  9 | import sys
 10 | 
 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 12 | 
 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock'))
 14 | 
 15 | from extract_numa_structure import numa_structure
 16 | 
 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure()
 18 | 
 19 | parameters = sys.argv
 20 | 
 21 | parameters.pop(0)
 22 | 
 23 | if len(parameters) < 11:
 24 |     print """Not enough parameters:
 25 | 
 26 | Look at bin/run_benchmarks_on_intel_i7.py and
 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of
 28 | parameters.
 29 | 
 30 | """
 31 |     sys.exit()
 32 | 
 33 | iterations = int(parameters.pop(0))
 34 | 
 35 | output_dir_base = parameters.pop(0)
 36 | 
 37 | if output_dir_base=='standard':
 38 |     output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S"))
 39 | 
 40 | benchmark_names = parameters.pop(0).split(',')
 41 | 
 42 | lock_ids = parameters.pop(0).split(',')
 43 | 
 44 | pinning_settings = parameters.pop(0).split(',')
 45 | 
 46 | thread_counts = parameters.pop(0).split(",")
 47 | 
 48 | #* Percentage read
 49 | #* Number of seconds to benchmark
 50 | #* Iterations spent in write critical section
 51 | #* Iterations spent in read critical section
 52 | #* Iterations spent in non critical section
 53 | 
 54 | 
 55 | percentages_reads = parameters.pop(0).split(',')
 56 | 
 57 | run_times_seconds = parameters.pop(0).split(',')
 58 | 
 59 | iterations_wcs = parameters.pop(0).split(',')
 60 | 
 61 | iterations_rcs = parameters.pop(0).split(',')
 62 | 
 63 | iterations_ncs = parameters.pop(0).split(',')
 64 | 
 65 | 
 66 | for iteration in range(iterations):
 67 | 	print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n"
 68 | 	for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id)
 69 | 			     for benchmark_name in benchmark_names 
 70 | 			     for lock_id in lock_ids]:
 71 | 	    for settings in [[tc,pr,rts,iw,ir] 
 72 | 			     for tc in thread_counts
 73 | 			     for pr in percentages_reads
 74 | 			     for rts in run_times_seconds
 75 | 			     for iw in iterations_wcs
 76 | 			     for ir in iterations_rcs]:
 77 | 		for pinning in pinning_settings:
 78 | 		    output_file_dir_str = ('bench_results/' + 
 79 | 					   benchmark_id + '#' + output_dir_base + '#' + lock_id +  '/')
 80 | 		    if not os.path.exists(output_file_dir_str):
 81 | 			os.makedirs(output_file_dir_str)
 82 | 		    output_file_str = (output_file_dir_str +
 83 | 				       'xncw_' + pinning + '_' + '_'.join(settings) + '.dat')
 84 | 		    with open(output_file_str, "a") as outfile:
 85 | 			print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n"
 86 | 			for non_cs_work in iterations_ncs:
 87 | 			    [tc,pr,rts,iw,ir] = settings
 88 | 			    command = [bin_dir_path + '/' + benchmark_id,tc,pr,rts,iw,ir,non_cs_work]
 89 | 			    print command
 90 | 			    if pinning=='no':
 91 | 				outString = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0]
 92 | 				outfile.write(str(non_cs_work) + " " + ' '.join(outString.split(" ")[1:]))
 93 | 			    else:
 94 | 				max_node_id = (int(tc)-1) / num_of_cpus_per_node
 95 | 				nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])]
 96 | 				outString = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE).communicate()[0]
 97 | 				outfile.write(str(non_cs_work) + " " + ' '.join(outString.split(" ")[1:]))
 98 | 			print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n"
 99 | 	
100 | 	print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n"
101 | 


--------------------------------------------------------------------------------
/src/lock/cohort_lock.c:
--------------------------------------------------------------------------------
  1 | #include <stdbool.h>
  2 | #include <stdlib.h>
  3 | #include <stdio.h>
  4 | #include <pthread.h>
  5 | #include <assert.h>
  6 | #include <sched.h>
  7 | #include "cohort_lock.h"
  8 | #include "utils/numa_node_info_support.h"
  9 | 
 10 | 
 11 | __thread CacheLinePaddedInt myLocalNode __attribute__((aligned(64)));
 12 | 
 13 | 
 14 | static inline
 15 | bool nodeHasWaitingThreads(TicketLock * localLock){
 16 |     int localLockInCounter;
 17 |     int localLockOutCounter;
 18 |     load_acq(localLockInCounter, localLock->inCounter.value); 
 19 |     load_acq(localLockOutCounter, localLock->outCounter.value);
 20 |     return (localLockInCounter - localLockOutCounter) > 1;
 21 | }
 22 |  
 23 | CohortLock * cohortlock_create(void (*writer)(void *, void **)){
 24 |     CohortLock * lock = malloc(sizeof(CohortLock));
 25 |     cohortlock_initialize(lock, writer);
 26 |     return lock;
 27 | }
 28 | 
 29 | void cohortlock_initialize(CohortLock * lock, void (*writer)(void *, void **)){
 30 |     lock->writer = writer;
 31 |     aticketlock_initialize(&lock->globalLock, writer);
 32 |     for(int i = 0; i < NUMBER_OF_NUMA_NODES; i++){
 33 |         ticketlock_initialize(&lock->localLockData[i].lock, writer);
 34 |         lock->localLockData[i].numberOfHandOvers.value = 0;
 35 |         lock->localLockData[i].needToTakeGlobalLock.value = true;
 36 |     }
 37 |     //Initialize CPUToNodeMap
 38 |     int numaStructure[NUMBER_OF_NUMA_NODES][NUMBER_OF_CPUS_PER_NODE] = NUMA_STRUCTURE;
 39 |     for(char node = 0; node < NUMBER_OF_NUMA_NODES; node++){
 40 |         for(int i = 0; i < NUMBER_OF_CPUS_PER_NODE; i++){
 41 |             CPUToNodeMap.value[numaStructure[(int)node][i]] = node;
 42 |         }
 43 |     }
 44 | 
 45 |     __sync_synchronize();
 46 | }
 47 | 
 48 | void cohortlock_free(CohortLock * lock){
 49 |     free(lock);
 50 | }
 51 | 
 52 | 
 53 | void cohortlock_register_this_thread(){
 54 | }
 55 | 
 56 | void cohortlock_write(CohortLock *lock, void * writeInfo) {
 57 |     cohortlock_write_read_lock(lock);
 58 |     lock->writer(writeInfo, NULL);
 59 |     cohortlock_write_read_unlock(lock);
 60 | }
 61 | 
 62 | 
 63 | 
 64 | //Returns true if it is taken over from another writer and false otherwise
 65 | bool cohortlock_write_read_lock(CohortLock *lock) {
 66 | #ifdef PINNING
 67 |     NodeLocalLockData * localData = &lock->localLockData[numa_node.value];
 68 | #else
 69 |     myLocalNode.value = numa_node_id();
 70 |     NodeLocalLockData * localData = &lock->localLockData[myLocalNode.value];
 71 | #endif 
 72 |     ticketlock_write_read_lock(&localData->lock);
 73 |     if(localData->needToTakeGlobalLock.value){
 74 |         aticketlock_write_read_lock(&lock->globalLock);
 75 |         return false;
 76 |     }else{
 77 |         return true;
 78 |     }
 79 | }
 80 | 
 81 | void cohortlock_write_read_unlock(CohortLock * lock) {
 82 | #ifdef PINNING
 83 |     NodeLocalLockData * localData = &lock->localLockData[numa_node.value];
 84 | #else
 85 |     NodeLocalLockData * localData = &lock->localLockData[myLocalNode.value];
 86 | #endif
 87 |     if(nodeHasWaitingThreads(&localData->lock) && 
 88 |        (localData->numberOfHandOvers.value < MAXIMUM_NUMBER_OF_HAND_OVERS)){
 89 |         localData->needToTakeGlobalLock.value = false;
 90 |         localData->numberOfHandOvers.value++;
 91 |         ticketlock_write_read_unlock(&localData->lock);
 92 | 
 93 |     }else{
 94 |         localData->needToTakeGlobalLock.value = true;
 95 |         localData->numberOfHandOvers.value = 0;
 96 |         aticketlock_write_read_unlock(&lock->globalLock);
 97 |         ticketlock_write_read_unlock(&localData->lock);
 98 |     }
 99 | }
100 | 
101 | void cohortlock_read_lock(CohortLock *lock) {
102 |     cohortlock_write_read_lock(lock);
103 | }
104 | 
105 | void cohortlock_read_unlock(CohortLock *lock) {
106 |     cohortlock_write_read_unlock(lock);
107 | }
108 | 


--------------------------------------------------------------------------------
/src/benchmark/cache_benchmark_lock.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | import os
  5 | import datetime
  6 | import subprocess
  7 | import re
  8 | 
  9 | import sys
 10 | 
 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 12 | 
 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock'))
 14 | 
 15 | from extract_numa_structure import numa_structure
 16 | 
 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure()
 18 | 
 19 | parameters = sys.argv
 20 | 
 21 | parameters.pop(0)
 22 | 
 23 | if len(parameters) < 11:
 24 |     print """Not enough parameters:
 25 | 
 26 | Look at bin/run_benchmarks_on_intel_i7.py and
 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of
 28 | parameters.
 29 | 
 30 | """
 31 |     sys.exit()
 32 | 
 33 | iterations = int(parameters.pop(0))
 34 | 
 35 | output_dir_base = parameters.pop(0)
 36 | 
 37 | if output_dir_base=='standard':
 38 |     output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S"))
 39 | 
 40 | benchmark_names = parameters.pop(0).split(',')
 41 | 
 42 | lock_ids = parameters.pop(0).split(',')
 43 | 
 44 | pinning_settings = parameters.pop(0).split(',')
 45 | 
 46 | thread_counts = parameters.pop(0).split(",")
 47 | 
 48 | #* Percentage read
 49 | #* Number of seconds to benchmark
 50 | #* Iterations spent in write critical section
 51 | #* Iterations spent in read critical section
 52 | #* Iterations spent in non critical section
 53 | 
 54 | 
 55 | percentages_reads = parameters.pop(0).split(',')
 56 | 
 57 | run_times_seconds = parameters.pop(0).split(',')
 58 | 
 59 | iterations_wcs = parameters.pop(0).split(',')
 60 | 
 61 | iterations_rcs = parameters.pop(0).split(',')
 62 | 
 63 | iterations_ncs = parameters.pop(0).split(',')
 64 | 
 65 | 
 66 | for iteration in range(iterations):
 67 | 	print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n"
 68 | 	for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id)
 69 | 			     for benchmark_name in benchmark_names 
 70 | 			     for lock_id in lock_ids]:
 71 | 	    for settings in [[pr,rts,iw,ir,incs] 
 72 | 			     for pr in percentages_reads
 73 | 			     for rts in run_times_seconds
 74 | 			     for iw in iterations_wcs
 75 | 			     for ir in iterations_rcs
 76 | 			     for incs in iterations_ncs]:
 77 | 		for pinning in pinning_settings:
 78 | 		    output_file_dir_str = ('bench_results/' + 
 79 | 					   benchmark_id + '#' + output_dir_base + '#' + lock_id +  '/')
 80 | 		    if not os.path.exists(output_file_dir_str):
 81 | 			os.makedirs(output_file_dir_str)
 82 | 		    output_file_str = (output_file_dir_str +
 83 | 				       'b_' + pinning + '_' +  '_'.join(settings) + '.dat')
 84 | 		    with open(output_file_str, "a") as outfile:
 85 | 			print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n"
 86 | 			for thread_count in thread_counts:
 87 | 			    realcmd = [bin_dir_path + '/' + benchmark_id, thread_count] + settings
 88 | 			    perfcmd = ['perf', 'stat','-B', '-e', 'r01d1:u,r02d1:u,r04d1:u,r81d0:u']
 89 | 			    command = perfcmd + realcmd
 90 | 			    if pinning=='no':
 91 | 				    (outString, outErr) = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
 92 | 			    else:
 93 | 				max_node_id = (int(thread_count)-1) / num_of_cpus_per_node
 94 | 				nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])]
 95 | 				process = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
 96 | 			    cachedata = subprocess.Popen(bin_dir_path + '/' + 'perf_magic', stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(outErr)[0]
 97 | 			    outfile.write(outString.rstrip('\n') + cachedata + '\n')
 98 | 			print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n"
 99 | 	
100 | 	print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n"
101 | 


--------------------------------------------------------------------------------
/src/benchmark/cache_benchmark_lockXOpDist.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | import os
  5 | import datetime
  6 | import subprocess
  7 | import re
  8 | 
  9 | import sys
 10 | 
 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 12 | 
 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock'))
 14 | 
 15 | from extract_numa_structure import numa_structure
 16 | 
 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure()
 18 | 
 19 | parameters = sys.argv
 20 | 
 21 | parameters.pop(0)
 22 | 
 23 | if len(parameters) < 11:
 24 |     print """Not enough parameters:
 25 | 
 26 | Look at bin/run_benchmarks_on_intel_i7.py and
 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of
 28 | parameters.
 29 | 
 30 | """
 31 |     sys.exit()
 32 | 
 33 | iterations = int(parameters.pop(0))
 34 | 
 35 | output_dir_base = parameters.pop(0)
 36 | 
 37 | if output_dir_base=='standard':
 38 |     output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S"))
 39 | 
 40 | benchmark_names = parameters.pop(0).split(',')
 41 | 
 42 | lock_ids = parameters.pop(0).split(',')
 43 | 
 44 | pinning_settings = parameters.pop(0).split(',')
 45 | 
 46 | thread_counts = parameters.pop(0).split(",")
 47 | 
 48 | #* Percentage read
 49 | #* Number of seconds to benchmark
 50 | #* Iterations spent in write critical section
 51 | #* Iterations spent in read critical section
 52 | #* Iterations spent in non critical section
 53 | 
 54 | 
 55 | percentages_reads = parameters.pop(0).split(',')
 56 | 
 57 | run_times_seconds = parameters.pop(0).split(',')
 58 | 
 59 | iterations_wcs = parameters.pop(0).split(',')
 60 | 
 61 | iterations_rcs = parameters.pop(0).split(',')
 62 | 
 63 | iterations_ncs = parameters.pop(0).split(',')
 64 | 
 65 | 
 66 | for iteration in range(iterations):
 67 | 	print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n"
 68 | 	for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id)
 69 | 			     for benchmark_name in benchmark_names 
 70 | 			     for lock_id in lock_ids]:
 71 | 	    for settings in [[tc,rts,iw,ir,ncsw] 
 72 | 			     for tc in thread_counts
 73 | 			     for rts in run_times_seconds
 74 | 			     for iw in iterations_wcs
 75 | 			     for ir in iterations_rcs
 76 | 			     for ncsw in iterations_ncs]:
 77 | 		for pinning in pinning_settings:
 78 | 		    output_file_dir_str = ('bench_results/' + 
 79 | 					   benchmark_id + '#' + output_dir_base + '#' + lock_id +  '/')
 80 | 		    if not os.path.exists(output_file_dir_str):
 81 | 			os.makedirs(output_file_dir_str)
 82 | 		    output_file_str = (output_file_dir_str +
 83 | 				       'xodi_' + pinning + '_' + '_'.join(settings) + '.dat')
 84 | 		    with open(output_file_str, "a") as outfile:
 85 | 			print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n"
 86 | 			for pr in percentages_reads:
 87 | 			    [tc,rts,iw,ir,ncsw] = settings
 88 | 			    realcmd = [bin_dir_path + '/' + benchmark_id,tc,pr,rts,iw,ir,ncsw]
 89 | 			    perfcmd = ['perf', 'stat','-B', '-e', 'r01d1:u,r02d1:u,r04d1:u,r81d0:u']
 90 | 			    command = perfcmd + realcmd
 91 | 			    print command
 92 | 			    if pinning=='no':
 93 | 				(outString, outErr) = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
 94 | 			    else:
 95 | 				max_node_id = (int(tc)-1) / num_of_cpus_per_node
 96 | 				nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])]
 97 | 				(outString, outErr) = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
 98 | 			    cachedata = subprocess.Popen(bin_dir_path + '/' + 'perf_magic', stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(outErr)[0]
 99 | 			    outfile.write(str(pr) + " " + ' '.join(outString.split(" ")[1:]).rstrip('\n') + cachedata + '\n')
100 | 			print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n"
101 | 	
102 | 	print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n"
103 | 


--------------------------------------------------------------------------------
/src/datastructures_bench/PR/gc/intel_defns.h:
--------------------------------------------------------------------------------
  1 | #ifndef __INTEL_DEFNS_H__
  2 | #define __INTEL_DEFNS_H__
  3 | 
  4 | #include <pthread.h>
  5 | #include <sched.h>
  6 | 
  7 | #ifndef INTEL
  8 | #define INTEL
  9 | #endif
 10 | 
 11 | #if 0
 12 | #define pthread_mutex_init(_m,_i) \
 13 | ({ pthread_mutex_init(_m,_i); (_m)->__m_kind = PTHREAD_MUTEX_ADAPTIVE_NP; })
 14 | #endif
 15 | 
 16 | 
 17 | /*
 18 |  * I. Compare-and-swap.
 19 |  */
 20 | 
 21 | /*
 22 |  * This is a strong barrier! Reads cannot be delayed beyond a later store.
 23 |  * Reads cannot be hoisted beyond a LOCK prefix. Stores always in-order.
 24 |  */
 25 | #define CAS(_a, _o, _n)                                    \
 26 | ({ __typeof__(_o) __o = _o;                                \
 27 |    __asm__ __volatile__(                                   \
 28 |        "lock cmpxchg %3,%1"                                \
 29 |        : "=a" (__o), "=m" (*(volatile unsigned int *)(_a)) \
 30 |        :  "0" (__o), "r" (_n) );                           \
 31 |    __o;                                                    \
 32 | })
 33 | 
 34 | #define FAS(_a, _n)                                        \
 35 | ({ __typeof__(_n) __o;                                     \
 36 |    __asm__ __volatile__(                                   \
 37 |        "lock xchg %0,%1"                                   \
 38 |        : "=r" (__o), "=m" (*(volatile unsigned int *)(_a)) \
 39 |        :  "0" (_n) );                                      \
 40 |    __o;                                                    \
 41 | })
 42 | 
 43 | #define CAS64(_a, _o, _n)                                        \
 44 | ({ __typeof__(_o) __o = _o;                                      \
 45 |    __asm__ __volatile__(                                         \
 46 |        "movl %3, %%ecx;"                                         \
 47 |        "movl %4, %%ebx;"                                         \
 48 |        "lock cmpxchg8b %1"                                       \
 49 |        : "=A" (__o), "=m" (*(volatile unsigned long long *)(_a)) \
 50 |        : "0" (__o), "m" (_n >> 32), "m" (_n)                     \
 51 |        : "ebx", "ecx" );                                         \
 52 |    __o;                                                          \
 53 | })
 54 | 
 55 | /* Update Integer location, return Old value. */
 56 | #define CASIO CAS
 57 | #define FASIO FAS
 58 | /* Update Pointer location, return Old value. */
 59 | #define CASPO CAS
 60 | #define FASPO FAS
 61 | /* Update 32/64-bit location, return Old value. */
 62 | #define CAS32O CAS
 63 | #define CAS64O CAS64
 64 | 
 65 | /*
 66 |  * II. Memory barriers. 
 67 |  *  WMB(): All preceding write operations must commit before any later writes.
 68 |  *  RMB(): All preceding read operations must commit before any later reads.
 69 |  *  MB():  All preceding memory accesses must commit before any later accesses.
 70 |  * 
 71 |  *  If the compiler does not observe these barriers (but any sane compiler
 72 |  *  will!), then VOLATILE should be defined as 'volatile'.
 73 |  */
 74 | 
 75 | #define MB()  __sync_synchronize()
 76 | #define WMB() __asm__ __volatile__ ("" : : : "memory")
 77 | #define RMB() MB()
 78 | #define VOLATILE /*volatile*/
 79 | 
 80 | /* On Intel, CAS is a strong barrier, but not a compile barrier. */
 81 | #define RMB_NEAR_CAS() WMB()
 82 | #define WMB_NEAR_CAS() WMB()
 83 | #define MB_NEAR_CAS()  WMB()
 84 | 
 85 | 
 86 | /*
 87 |  * III. Cycle counter access.
 88 |  */
 89 | 
 90 | typedef unsigned long long tick_t;
 91 | 
 92 | static inline tick_t __attribute__((always_inline)) 
 93 | RDTICK()
 94 | { tick_t __t;
 95 |     __asm__ __volatile__("rdtsc\n"
 96 | 			 "shl $32,%%rdx\n"
 97 | 			 "or %%rdx,%%rax"
 98 | 			 : "=a"(__t)
 99 | 			 :
100 | 			 : "%rcx", "%rdx");
101 |     return __t;
102 | }
103 | 
104 | 
105 | 
106 | 
107 | /*
108 |  * IV. Types.
109 |  */
110 | 
111 | typedef unsigned char      _u8;
112 | typedef unsigned short     _u16;
113 | typedef unsigned int       _u32;
114 | typedef unsigned long long _u64;
115 | 
116 | #endif /* __INTEL_DEFNS_H__ */
117 | 


--------------------------------------------------------------------------------
/src/benchmark/cache_benchmark_lock_XNonCW.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | import os
  5 | import datetime
  6 | import subprocess
  7 | import re
  8 | 
  9 | import sys
 10 | 
 11 | bin_dir_path = os.path.dirname(os.path.realpath(__file__))
 12 | 
 13 | sys.path.append(os.path.join(bin_dir_path, '..', 'src', 'lock'))
 14 | 
 15 | from extract_numa_structure import numa_structure
 16 | 
 17 | _ignore1, num_of_cpus_per_node,ignore2 = numa_structure()
 18 | 
 19 | parameters = sys.argv
 20 | 
 21 | parameters.pop(0)
 22 | 
 23 | if len(parameters) < 12:
 24 |     print """Not enough parameters:
 25 | 
 26 | Look at bin/run_benchmarks_on_intel_i7.py and
 27 | bin/run_benchmarks_on_sandy.py for examples and explanations of
 28 | parameters.
 29 | 
 30 | """
 31 |     sys.exit()
 32 | 
 33 | iterations = int(parameters.pop(0))
 34 | 
 35 | output_dir_base = parameters.pop(0)
 36 | 
 37 | if output_dir_base=='standard':
 38 |     output_dir_base = (datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S"))
 39 | 
 40 | benchmark_names = parameters.pop(0).split(',')
 41 | 
 42 | lock_ids = parameters.pop(0).split(',')
 43 | 
 44 | pinning_settings = parameters.pop(0).split(',')
 45 | 
 46 | thread_counts = parameters.pop(0).split(",")
 47 | 
 48 | #* Percentage read
 49 | #* Number of seconds to benchmark
 50 | #* Iterations spent in write critical section
 51 | #* Iterations spent in read critical section
 52 | #* Iterations spent in non critical section
 53 | 
 54 | 
 55 | percentages_reads = parameters.pop(0).split(',')
 56 | 
 57 | run_times_seconds = parameters.pop(0).split(',')
 58 | 
 59 | iterations_wcs = parameters.pop(0).split(',')
 60 | 
 61 | iterations_rcs = parameters.pop(0).split(',')
 62 | 
 63 | iterations_ncs = parameters.pop(0).split(',')
 64 | 
 65 | 
 66 | for iteration in range(iterations):
 67 | 	print "\n\nSTARTING ITERATION " + str(iteration+1) + " / " + str(iterations) + "\n\n"
 68 | 	for (benchmark_id, lock_id) in [(benchmark_name + "_" + lock_id, lock_id)
 69 | 			     for benchmark_name in benchmark_names 
 70 | 			     for lock_id in lock_ids]:
 71 | 	    for settings in [[tc,pr,rts,iw,ir] 
 72 | 			     for tc in thread_counts
 73 | 			     for pr in percentages_reads
 74 | 			     for rts in run_times_seconds
 75 | 			     for iw in iterations_wcs
 76 | 			     for ir in iterations_rcs]:
 77 | 		for pinning in pinning_settings:
 78 | 		    output_file_dir_str = ('bench_results/' + 
 79 | 					   benchmark_id + '#' + output_dir_base + '#' + lock_id +  '/')
 80 | 		    if not os.path.exists(output_file_dir_str):
 81 | 			os.makedirs(output_file_dir_str)
 82 | 		    output_file_str = (output_file_dir_str +
 83 | 				       'xncw_' + pinning + '_' + '_'.join(settings) + '.dat')
 84 | 		    with open(output_file_str, "a") as outfile:
 85 | 			print "\n\n\033[32m -- STARTING BENCHMARKS FOR " + output_file_str + "! -- \033[m\n\n"
 86 | 			for non_cs_work in iterations_ncs:
 87 | 			    [tc,pr,rts,iw,ir] = settings
 88 | 			    realcmd = [bin_dir_path + '/' + benchmark_id,tc,pr,rts,iw,ir,non_cs_work]
 89 | 			    perfcmd = ['perf', 'stat','-B', '-e', 'r01d1:u,r02d1:u,r04d1:u,r81d0:u']
 90 | 			    command = perfcmd + realcmd
 91 | 			    print command
 92 | 			    if pinning=='no':
 93 | 				(outString, outErr) = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
 94 | 			    else:
 95 | 				max_node_id = (int(tc)-1) / num_of_cpus_per_node
 96 | 				nomactrl = ['numactl', '--cpunodebind=' + ",".join([str(x) for x in range(0,max_node_id+1)])]
 97 | 				(outString, outErr) = subprocess.Popen(nomactrl + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
 98 | 			    cachedata = subprocess.Popen(bin_dir_path + '/' + 'perf_magic', stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(outErr)[0]
 99 | 			    outfile.write(str(non_cs_work) + " " + ' '.join(outString.split(" ")[1:]).rstrip('\n') + cachedata + '\n')
100 | 			print "\n\n\033[32m -- BENCHMARKS FOR " + output_file_str + " COMPLETED! -- \033[m\n\n"
101 | 	
102 | 	print "\n\nITERATION " + str(iteration+1) + " / " + str(iterations) + " DONE!\n\n"
103 | 
104 | 


--------------------------------------------------------------------------------
/src/lock/tts_rdx_lock.c:
--------------------------------------------------------------------------------
  1 | #include <stdbool.h>
  2 | #include <stdlib.h>
  3 | #include <pthread.h>
  4 | #include <limits.h>
  5 | #include <stdio.h>
  6 | #include "tts_rdx_lock.h"
  7 | #include "utils/smp_utils.h"
  8 | #include "utils/thread_identifier.h"
  9 | 
 10 | #define READ_PATIENCE_LIMIT 130000
 11 |  
 12 | TTSRDXLock * ttsalock_create(void (*writer)(void *, void **)){
 13 |     TTSRDXLock * lock = malloc(sizeof(TTSRDXLock));
 14 |     ttsalock_initialize(lock, writer);
 15 |     return lock;
 16 | }
 17 | 
 18 | void ttsalock_initialize(TTSRDXLock * lock, void (*writer)(void *, void **)){
 19 |     lock->writer = writer;
 20 |     lock->lockWord.value = 0;
 21 |     NZI_INITIALIZE(&lock->nonZeroIndicator);
 22 |     omwqueue_initialize(&lock->writeQueue);
 23 |     __sync_synchronize();
 24 | }
 25 | 
 26 | void ttsalock_free(TTSRDXLock * lock){
 27 |     free(lock);
 28 | }
 29 | 
 30 | void ttsalock_register_this_thread(){
 31 |     assign_id_to_thread();
 32 | }
 33 | 
 34 | static inline
 35 | void waitUntilWriteBarrierOff(TTSRDXLock *lock) {
 36 |     bool writeBarrierOn;
 37 |     load_acq(writeBarrierOn, lock->writeBarrier.value);    
 38 |     while(writeBarrierOn){
 39 |         __sync_synchronize();
 40 |         load_acq(writeBarrierOn, lock->writeBarrier.value);
 41 |     }
 42 | }
 43 | 
 44 | void ttsalock_write(TTSRDXLock *lock, void * writeInfo) {
 45 |     bool currentlylocked;
 46 |     waitUntilWriteBarrierOff(lock);
 47 |     while(!omwqueue_offer(&lock->writeQueue, writeInfo)){
 48 |         load_acq(currentlylocked, lock->lockWord.value);
 49 |         if(!currentlylocked){
 50 |             currentlylocked = __sync_lock_test_and_set(&lock->lockWord.value, true);
 51 |             if(!currentlylocked){
 52 |                 //Was not locked before operation
 53 |                 omwqueue_reset_fully_read(&lock->writeQueue);
 54 |                 NZI_WAIT_UNIL_EMPTY(&lock->nonZeroIndicator);
 55 |                 lock->writer(writeInfo, NULL);
 56 |                 ttsalock_write_read_unlock(lock);
 57 |                 return;
 58 |             }
 59 |         }
 60 |         //A __sync_synchronize(); or a pause instruction
 61 |         //is probably necessary here to make it perform on
 62 |         //sandy
 63 |     }
 64 | }
 65 | 
 66 | void ttsalock_write_read_lock(TTSRDXLock *lock) {
 67 |     bool currentlylocked;
 68 |     waitUntilWriteBarrierOff(lock);
 69 |     while(true){
 70 |         load_acq(currentlylocked, lock->lockWord.value);
 71 |         while(currentlylocked){
 72 |             load_acq(currentlylocked, lock->lockWord.value);
 73 |         }
 74 |         currentlylocked = __sync_lock_test_and_set(&lock->lockWord.value, true);
 75 |         if(!currentlylocked){
 76 |             //Was not locked before operation
 77 |             omwqueue_reset_fully_read(&lock->writeQueue);
 78 |             __sync_synchronize();//Flush
 79 |             NZI_WAIT_UNIL_EMPTY(&lock->nonZeroIndicator);
 80 |             return;
 81 |         }
 82 |     }
 83 | }
 84 | 
 85 | void ttsalock_write_read_unlock(TTSRDXLock * lock) {
 86 |     omwqueue_flush(&lock->writeQueue, lock->writer);
 87 |     __sync_lock_release(&lock->lockWord.value);
 88 | }
 89 | 
 90 | void ttsalock_read_lock(TTSRDXLock *lock) {
 91 |     bool bRaised = false; 
 92 |     int readPatience = 0;
 93 |  start:
 94 |     NZI_ARRIVE(&lock->nonZeroIndicator);
 95 |     if(lock->lockWord.value){
 96 |         NZI_DEPART(&lock->nonZeroIndicator);
 97 |         while(lock->lockWord.value){
 98 |             __sync_synchronize();//Pause (pause instruction might be better)
 99 |             if((readPatience == READ_PATIENCE_LIMIT) && !bRaised){
100 |                 __sync_fetch_and_add(&lock->writeBarrier.value, 1);
101 |                 bRaised = true;
102 |             }
103 |             readPatience = readPatience + 1;
104 |         }
105 |         goto start;
106 |     }
107 |     if(bRaised){
108 |         __sync_fetch_and_sub(&lock->writeBarrier.value, 1);
109 |     }
110 | }
111 | 
112 | void ttsalock_read_unlock(TTSRDXLock *lock) {
113 |     NZI_DEPART(&lock->nonZeroIndicator);
114 | }
115 | 


--------------------------------------------------------------------------------
/src/new_rep/tests/test_qd_queue.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include "misc/bsd_stdatomic.h"//Until c11 stdatomic.h is available
  5 | 
  6 | #include "qd_queues/qd_queue.h"
  7 | #include "tests/test_framework.h"
  8 | #include "misc/random.h"
  9 | 
 10 | int test_initialize(){
 11 | 
 12 |     QDQueue test;
 13 |     qdq_initialize(&test);
 14 |     return 1;
 15 | 
 16 | }
 17 | volatile atomic_ulong counter = ATOMIC_VAR_INIT(0);
 18 | void critical_section(unsigned int messageSize, void * message){
 19 |     if(messageSize == 0 && message != NULL){ /* Prevent warning */
 20 |         atomic_fetch_add(&counter, 1);
 21 |     }else{
 22 |         assert(false);
 23 |     }
 24 | }
 25 | int test_enqueue(int nrOfEnqueues){   
 26 |     QDQueue queue;
 27 |     qdq_initialize(&queue);
 28 |     qdq_open(&queue);
 29 |     for(int i = 0; i < nrOfEnqueues; i++){
 30 |         qdq_enqueue(&queue, critical_section, 0, NULL);
 31 |     }
 32 |     return 1;
 33 | }
 34 | 
 35 | int test_enqueue_and_flush(int nrOfEnqueues){
 36 |     atomic_store(&counter, 0);
 37 |     QDQueue queue;
 38 |     qdq_initialize(&queue);
 39 |     qdq_open(&queue);
 40 |     unsigned long enqueueCounter = 0;
 41 |     for(int i = 0; i < nrOfEnqueues; i++){
 42 |         if(qdq_enqueue(&queue, critical_section, 0, NULL)){
 43 |             enqueueCounter = enqueueCounter + 1;
 44 |         }
 45 |     }
 46 |     qdq_flush(&queue);
 47 |     assert(atomic_load(&counter) == enqueueCounter);
 48 |     return 1;
 49 | }
 50 | 
 51 | void variable_message_size_cs(unsigned int messageSize, void * message){
 52 |     unsigned char * messageBytes = (unsigned char *)message;
 53 |     for(unsigned int i = 0; i < messageSize; i++){
 54 |         assert(((unsigned int)messageBytes[i]) == messageSize);
 55 |     }
 56 |     atomic_fetch_add(&counter, 1);
 57 | }
 58 | int test_variable_message_sizes(int nrOfEnqueues){
 59 |     atomic_store(&counter, 0);
 60 |     QDQueue queue;
 61 |     qdq_initialize(&queue);
 62 |     qdq_open(&queue);
 63 |     unsigned int seed = 0;
 64 |     unsigned long enqueueCounter = 0;
 65 |     for(int i = 0; i < nrOfEnqueues; i++){
 66 |         unsigned int messageSize = (unsigned int)(15.0*random_double(&seed));
 67 |         char messageBuffer[messageSize];
 68 |         for(unsigned int i = 0; i < messageSize; i++){
 69 |             messageBuffer[i] = (unsigned char)messageSize;
 70 |         }
 71 |         if(qdq_enqueue(&queue, variable_message_size_cs, messageSize, messageBuffer)){
 72 |             enqueueCounter = enqueueCounter + 1;
 73 |         }
 74 |     }
 75 |     qdq_flush(&queue);
 76 |     assert(atomic_load(&counter) == enqueueCounter);
 77 |     return 1;
 78 | }
 79 | 
 80 | int main(/*int argc, char **argv*/){
 81 |     
 82 |     printf("\n\n\n\033[32m ### STARTING QD QUEUE TESTS! -- \033[m\n\n\n");
 83 | 
 84 |     T(test_initialize(), "test_initialize()");
 85 | 
 86 |     T(test_enqueue(1), "test_enqueue(nrOfEnqueues = 1)");
 87 |     T(test_enqueue(15), "test_enqueue(nrOfEnqueues = 2)");
 88 |     T(test_enqueue(15), "test_enqueue(nrOfEnqueues = 15)");
 89 |     T(test_enqueue(QD_QUEUE_BUFFER_SIZE*2), "test_enqueue(nrOfEnqueues = QD_QUEUE_BUFFER_SIZE*2)");
 90 | 
 91 |     T(test_enqueue_and_flush(1), "test_enqueue_and_flush(nrOfEnqueues = 1)");
 92 |     T(test_enqueue_and_flush(2), "test_enqueue_and_flush(nrOfEnqueues = 2)");
 93 |     T(test_enqueue_and_flush(15), "test_enqueue_and_flush(nrOfEnqueues = 15)");
 94 |     T(test_enqueue_and_flush(QD_QUEUE_BUFFER_SIZE*2), "test_enqueue_and_flush(nrOfEnqueues = QD_QUEUE_BUFFER_SIZE*2)");
 95 | 
 96 |     T(test_variable_message_sizes(1), "test_variable_message_sizes(nrOfEnqueues = 1)");
 97 |     T(test_variable_message_sizes(2), "test_variable_message_sizes(nrOfEnqueues = 2)");
 98 |     T(test_variable_message_sizes(15), "test_variable_message_sizes(nrOfEnqueues = 15)");
 99 |     T(test_variable_message_sizes(QD_QUEUE_BUFFER_SIZE*2), "test_variable_message_sizes(nrOfEnqueues = QD_QUEUE_BUFFER_SIZE*2)");
100 | 
101 |     printf("\n\n\n\033[32m ### QD QUEUE COMPLETED! -- \033[m\n\n\n");
102 | 
103 |     exit(0);
104 | 
105 | }
106 | 


--------------------------------------------------------------------------------
/src/datastructures/opti_multi_writers_queue.h:
--------------------------------------------------------------------------------
  1 | #include <stdbool.h>
  2 | #include "utils/smp_utils.h"
  3 | 
  4 | #ifndef MULTI_WRITES_QUEUE_H
  5 | #define MULTI_WRITES_QUEUE_H
  6 | 
  7 | #define MWQ_CAPACITY 4048
  8 | typedef void * entry;
  9 | typedef struct OptiMWQImpl {
 10 |     char padd1[64];
 11 |     CacheLinePaddedBool closed;
 12 |     char padd2[64];
 13 |     CacheLinePaddedULong elementCount;
 14 |     entry elements[MWQ_CAPACITY];
 15 |     char padd3[64 - ((sizeof(entry)*MWQ_CAPACITY) % 64)];
 16 | } OptiMWQueue;
 17 | 
 18 | 
 19 | 
 20 | OptiMWQueue * omwqueue_create();
 21 | OptiMWQueue * omwqueue_initialize(OptiMWQueue * queue);
 22 | void omwqueue_free(OptiMWQueue * queue);
 23 | static bool omwqueue_offer(OptiMWQueue * queue, entry e);
 24 | static void omwqueue_flush(OptiMWQueue * queue, void (*writer)(void *, void **));
 25 | static void omwqueue_reset_fully_read(OptiMWQueue * queue);
 26 | 
 27 | static inline
 28 | unsigned long min(unsigned long i1, unsigned long i2){
 29 |     return i1 < i2 ? i1 : i2;
 30 | }
 31 | static inline
 32 | bool omwqueue_offer(OptiMWQueue * queue, entry e){
 33 |     bool closed;
 34 |     load_acq(closed, queue->closed.value);
 35 |     if(!closed){
 36 |         int index = __sync_fetch_and_add(&queue->elementCount.value, 1);
 37 |         if(index < MWQ_CAPACITY){
 38 |             store_rel(queue->elements[index], e);
 39 |             __sync_synchronize();//Flush
 40 |             return true;
 41 |         }else{
 42 |             store_rel(queue->closed.value, true);
 43 |             __sync_synchronize();//Flush
 44 |             return false;
 45 |         }
 46 |     }else{
 47 |         return false;
 48 |     }
 49 | }
 50 | 
 51 | static inline
 52 | void omwqueue_flush(OptiMWQueue * queue, void (*writer)(void *, void **)){
 53 |     unsigned long numOfElementsToRead;
 54 |     unsigned long newNumOfElementsToRead;
 55 |     unsigned long currentElementIndex = 0;
 56 |     bool closed = false;
 57 |     load_acq(numOfElementsToRead, queue->elementCount.value);
 58 |     if(numOfElementsToRead >= MWQ_CAPACITY){
 59 |         closed = true;
 60 |         numOfElementsToRead = MWQ_CAPACITY;
 61 |     }
 62 | 
 63 |     while(true){
 64 |         if(currentElementIndex < numOfElementsToRead){
 65 |             //There is definitly an element that we should read
 66 |             entry theElement;
 67 |             load_acq(theElement, queue->elements[currentElementIndex]);
 68 |             while(theElement == NULL) {
 69 |                 __sync_synchronize();
 70 |                 load_acq(theElement, queue->elements[currentElementIndex]);
 71 |             }
 72 |             store_rel(queue->elements[currentElementIndex], NULL);
 73 |             currentElementIndex = currentElementIndex + 1;
 74 |             writer(theElement, NULL);
 75 |         }else if (closed){
 76 |             //The queue is closed and there is no more elements that need to be read:
 77 |             return;
 78 |         }else{
 79 |             //Seems like there are no elements that should be read and the queue is
 80 |             //not closed. Check again if there are still no more elements that should
 81 |             //be read before closing the queue
 82 |             load_acq(newNumOfElementsToRead, queue->elementCount.value);
 83 |             if(newNumOfElementsToRead == numOfElementsToRead){
 84 |                 //numOfElementsToRead has not changed. Close the queue.
 85 |                 numOfElementsToRead = 
 86 |                     min(get_and_set_ulong(&queue->elementCount.value, MWQ_CAPACITY + 1), 
 87 |                         MWQ_CAPACITY);
 88 |                 closed = true;
 89 |             }else if(newNumOfElementsToRead < MWQ_CAPACITY){
 90 |                 numOfElementsToRead = newNumOfElementsToRead;
 91 |             }else{
 92 |                 closed = true;
 93 |                 numOfElementsToRead = MWQ_CAPACITY;
 94 |             }
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | static inline
100 | void omwqueue_reset_fully_read(OptiMWQueue * queue){
101 |     store_rel(queue->elementCount.value, 0);
102 |     store_rel(queue->closed.value, false);
103 | }
104 | #endif
105 | 


--------------------------------------------------------------------------------
/src/lock/agnostic_fdx_lock.h:
--------------------------------------------------------------------------------
  1 | #include <stdbool.h>
  2 | #include "datastructures/dr_multi_writers_queue.h"
  3 | #include "common_lock_constants.h"
  4 | #include "utils/support_many_non_zero_indicator_types.h"
  5 | #include "utils/support_many_lock_types.h"
  6 | 
  7 | #ifndef AGNOSTIC_FDX_LOCK_H
  8 | #define AGNOSTIC_FDX_LOCK_H
  9 | 
 10 | #ifdef LOCK_TYPE_WPRW_MCSLock
 11 | //***********************************
 12 | //MCSLock
 13 | //***********************************
 14 | #include "mcs_lock.h"
 15 | 
 16 | #define LOCK_DATATYPE_NAME_WPRW MCSLock
 17 | 
 18 | #elif defined (LOCK_TYPE_WPRW_CohortLock)
 19 | //***********************************
 20 | //CohortLock
 21 | //***********************************
 22 | #include "cohort_lock.h"
 23 | 
 24 | #define LOCK_DATATYPE_NAME_WPRW CohortLock
 25 | 
 26 | #elif defined (LOCK_TYPE_WPRW_TATASLock)
 27 | //***********************************
 28 | //TATASLock
 29 | //***********************************
 30 | #include "tatas_lock.h"
 31 | 
 32 | #define LOCK_DATATYPE_NAME_WPRW TATASLock
 33 | 
 34 | #else
 35 | 
 36 | #define LOCK_DATATYPE_NAME_WPRW NoLockDatatypeSpecified
 37 | 
 38 | #endif
 39 | 
 40 | struct FlatCombNodeImpl;
 41 | 
 42 | typedef union CacheLinePaddedFlatCombNodePtrImpl {
 43 |     struct FlatCombNodeImpl * value;
 44 |     char padding[64];
 45 | } CacheLinePaddedFlatCombNodePtr;
 46 | 
 47 | typedef struct FlatCombNodeImpl {
 48 |     char pad1[128];
 49 |     struct FlatCombNodeImpl * next;
 50 |     void * data;
 51 |     void ** responseLocation;
 52 |     unsigned long last_used;
 53 |     char pad2[128 - (3 * sizeof(void *) + sizeof(unsigned long)) % 64];
 54 |     void (*request)(void *, void **);
 55 |     char pad3[128 - (sizeof(void *)) % 64];
 56 |     CacheLinePaddedBool active;
 57 |     char pad4[128];
 58 | } FlatCombNode;
 59 | 
 60 | 
 61 | typedef struct AgnosticFDXLockImpl {
 62 |     CacheLinePaddedFlatCombNodePtr combineList;
 63 |     unsigned long combineCount;
 64 |     char pad1[64 - sizeof(unsigned long) % 64];
 65 |     char pad2[64];
 66 |     void (*defaultWriter)(void *, void**);
 67 |     char pad3[64 - sizeof(void * (*)(void*)) % 64];
 68 |     char pad4[128];
 69 |     LOCK_DATATYPE_NAME_WPRW lock;
 70 |     char pad5[64];
 71 | } AgnosticFDXLock;
 72 | 
 73 | 
 74 | 
 75 | AgnosticFDXLock * afdxlock_create(void (*writer)(void *, void **));
 76 | void afdxlock_free(AgnosticFDXLock * lock);
 77 | void afdxlock_initialize(AgnosticFDXLock * lock, void (*writer)(void *, void **));
 78 | void afdxlock_register_this_thread();
 79 | void afdxlock_write(AgnosticFDXLock *lock, void * writeInfo);
 80 | void afdxlock_write_with_response(AgnosticFDXLock *lock, 
 81 |                                   void (*writer)(void *, void **),
 82 |                                   void * data,
 83 |                                   void ** responseLocation);
 84 | void * afdxlock_write_with_response_block(AgnosticFDXLock *lock, 
 85 |                                           void (*delgateFun)(void *, void **), 
 86 |                                           void * data);
 87 | static void afdxlock_delegate(AgnosticFDXLock *lock, 
 88 |                        void (*delgateFun)(void *, void **), 
 89 |                        void * data);
 90 | void afdxlock_write_read_lock(AgnosticFDXLock *lock);
 91 | void afdxlock_write_read_unlock(AgnosticFDXLock * lock);
 92 | void afdxlock_read_lock(AgnosticFDXLock *lock);
 93 | void afdxlock_read_unlock(AgnosticFDXLock *lock);
 94 | 
 95 | static inline
 96 |     void afdxlock_delegate(AgnosticFDXLock *lock, void (*delgateFun)(void *, void **), void * data) {
 97 |     afdxlock_write_with_response(lock, delgateFun, data, NULL);
 98 | }
 99 | static inline
100 | void activateFCNode(AgnosticFDXLock *lock, FlatCombNode * fcNode){
101 |     fcNode->active.value = true;
102 |     FlatCombNode ** pointerToOldValue = &lock->combineList.value;
103 |     FlatCombNode * oldValue = ACCESS_ONCE(*pointerToOldValue);
104 |     while (true) {
105 |         fcNode->next = oldValue;
106 |         if (__sync_bool_compare_and_swap(pointerToOldValue, oldValue, fcNode))
107 |             return;
108 |         oldValue = ACCESS_ONCE(*pointerToOldValue);
109 |     }
110 | }
111 | 
112 | #endif
113 | 


--------------------------------------------------------------------------------
/qd_library/queues/entry_queue.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef qd_entry_queue_hpp
  2 | #define qd_entry_queue_hpp qd_entry_queue_hpp
  3 | 
  4 | #include<algorithm>
  5 | #include<array>
  6 | #include<atomic>
  7 | #include<iostream>
  8 | #include<typeinfo>
  9 | 
 10 | /**
 11 |  * @brief a buffer-based tantrum queue with fixed-size entries
 12 |  * @tparam ENTRIES the number of entries
 13 |  */
 14 | template<long ENTRIES, int BUFFER_SIZE>
 15 | class entry_queue {
 16 | 	/** type for the size field for queue entries, loads must not be optimized away in flush */
 17 | 	typedef std::atomic<long> sizetype;
 18 | 
 19 | 	/** type for function pointers to be stored in this queue */
 20 | 	typedef void(*ftype)(char*);
 21 | 	
 22 | 	struct entry_t {
 23 | 		std::atomic<ftype> fun;
 24 | 		char buf[BUFFER_SIZE];
 25 | 	};
 26 | 	void forwardall(long, long) {};
 27 | 	template<typename P, typename... Ts>
 28 | 	void forwardall(long idx, long offset, P&& p, Ts&&... ts) {
 29 | 		auto ptr = reinterpret_cast<P*>(&entry_array[idx].buf[offset]);
 30 | 		new (ptr) P(std::forward<P>(p));
 31 | 		forwardall(idx, offset+sizeof(p), std::forward<Ts>(ts)...);
 32 | 	}
 33 | 	public:
 34 | 		/** constants for current state of the queue */
 35 | 		enum class status : long { OPEN=0, SUCCESS=0, FULL, CLOSED }; 
 36 | 
 37 | 		entry_queue() : counter(ENTRIES), closed(status::CLOSED) {}
 38 | 		/** opens the queue */
 39 | 		void open() {
 40 | 			counter.store(0, std::memory_order_relaxed);
 41 | 			closed.store(status::OPEN, std::memory_order_relaxed);
 42 | 		}
 43 | 
 44 | 		/**
 45 | 		 * @brief enqueues an entry
 46 | 		 * @tparam P return type of associated function
 47 | 		 * @param op wrapper function for associated function
 48 | 		 * @return SUCCESS on successful storing in queue, FULL if the queue is full and CLOSED if the queue is closed explicitly
 49 | 		 */
 50 | 		template<typename... Ps>
 51 | 		status enqueue(void (*op)(char*), Ps*... ps) {
 52 | 			auto current_status = closed.load(std::memory_order_relaxed);
 53 | 			if(current_status != status::OPEN) {
 54 | 				return current_status;
 55 | 			}
 56 | 			/* entry size = size of size + size of wrapper functor + size of promise + size of all parameters*/
 57 | 			constexpr long size = sumsizes<Ps...>::size;
 58 | 			/* get memory in buffer */
 59 | 			long index = counter.fetch_add(1, std::memory_order_relaxed);
 60 | 			if(index < ENTRIES) {
 61 | 				static_assert(size <= BUFFER_SIZE, "entry_queue buffer per entry too small.");
 62 | 				/* entry available: move op, p and parameters to buffer, then set size of entry */
 63 | 				forwardall(index, 0, std::move(*ps)...);
 64 | 				entry_array[index].fun.store(op, std::memory_order_release);
 65 | 				return status::SUCCESS;
 66 | 			} else {
 67 | 				return status::FULL;
 68 | 			}
 69 | 		}
 70 | 
 71 | 		/** execute all stored operations, leave queue in closed state */
 72 | 		void flush() {
 73 | 			long todo = 0;
 74 | 			bool open = true;
 75 | 			while(open) {
 76 | 				long done = todo;
 77 | 				todo = counter.load(std::memory_order_relaxed);
 78 | 				if(todo == done) { /* close queue */
 79 | 					todo = counter.exchange(ENTRIES, std::memory_order_relaxed);
 80 | 					closed.store(status::CLOSED, std::memory_order_relaxed);
 81 | 					open = false;
 82 | 				}
 83 | 				if(todo >= static_cast<long>(ENTRIES)) { /* queue closed */
 84 | 					todo = ENTRIES;
 85 | 					closed.store(status::CLOSED, std::memory_order_relaxed);
 86 | 					open = false;
 87 | 				}
 88 | 				for(long index = done; index < todo; index++) {
 89 | 					/* synchronization on entry size field: 0 until entry available */
 90 | 					ftype fun = nullptr;
 91 | 					do {
 92 | 						fun = entry_array[index].fun.load(std::memory_order_acquire);
 93 | 					} while(!fun);
 94 | 
 95 | 					/* call functor with pointer to promise (of unknown type) */
 96 | 					fun(&entry_array[index].buf[0]);
 97 | 
 98 | 					/* cleanup: call destructor of (now empty) functor and clear buffer area */
 99 | //					fun->~ftype();
100 | 					entry_array[index].fun.store(nullptr, std::memory_order_relaxed);
101 | 				}
102 | 			}
103 | 		}
104 | 	private:
105 | 		/** counter for how many entries are already in use */
106 | 		std::atomic<long> counter;
107 | 		char pad[128];
108 | 		/** optimization flag: no writes when queue in known-closed state */
109 | 		std::atomic<status> closed;
110 | 		char pad2[128];
111 | 		/** the buffer for entries to this queue */
112 | 		std::array<entry_t, ENTRIES> entry_array;
113 | };
114 | 
115 | #endif /* qd_buffer_queue_hpp */
116 | 


--------------------------------------------------------------------------------