├── LICENSE ├── Makefile ├── README ├── csuperglue ├── Makefile ├── src │ ├── csuperglue.cpp │ └── csuperglue.h └── test │ ├── Makefile │ ├── chol.c │ ├── chol2.c │ ├── helloworld.c │ ├── helloworld2.c │ ├── helloworld3.c │ ├── helloworld4.c │ ├── helloworld5.c │ └── startpaused.c ├── examples ├── Makefile └── src │ ├── accesstypes.cpp │ ├── customhandle.cpp │ ├── dag.cpp │ ├── dependencies.cpp │ ├── handlewithdata.cpp │ ├── helloworld.cpp │ ├── hierarchic.cpp │ ├── logging.cpp │ ├── nbody.cpp │ ├── nested1.cpp │ ├── nested2.cpp │ ├── pinnedtasks.cpp │ ├── subtasks.cpp │ ├── subtasksdeps.cpp │ ├── tempvars.cpp │ ├── vardeps.cpp │ └── workspace.cpp ├── examples_dep └── cholesky │ ├── Makefile │ └── cholesky.cpp ├── include └── sg │ ├── core │ ├── access.hpp │ ├── access_rwa.hpp │ ├── accessutil.hpp │ ├── barrierprotocol.hpp │ ├── contrib.hpp │ ├── defaults.hpp │ ├── handle.hpp │ ├── orderedvec.hpp │ ├── schedulerver.hpp │ ├── spinlock.hpp │ ├── supergluebase.hpp │ ├── task.hpp │ ├── taskexecutor.hpp │ ├── taskqueue.hpp │ ├── taskqueuesafe.hpp │ ├── taskqueueunsafe.hpp │ ├── types.hpp │ └── versionqueue.hpp │ ├── option │ ├── access_readwrite.hpp │ ├── access_rwc.hpp │ ├── instr_debug.hpp │ ├── instr_perfcount.hpp │ ├── instr_procstat.hpp │ ├── instr_trace.hpp │ ├── log.hpp │ ├── log2.hpp │ ├── savedag.hpp │ ├── savedag_common.hpp │ ├── savedag_data.hpp │ ├── savedag_task.hpp │ ├── taskqueue_deque.hpp │ ├── taskqueue_prio.hpp │ ├── taskqueue_priopinned.hpp │ ├── threadingmanager_default.hpp │ └── threadingmanager_omp.hpp │ ├── platform │ ├── affinity.hpp │ ├── atomic.hpp │ ├── gettime.hpp │ ├── openmputil.hpp │ ├── perfcount.hpp │ ├── platform.hpp │ ├── threads.hpp │ ├── threadutil.hpp │ └── tls.hpp │ └── superglue.hpp ├── make.bat ├── scripts ├── drawsched.py └── status.py ├── test ├── fail │ ├── Makefile │ ├── sanity_fail_lockable.cpp │ └── sanity_fail_signed.cpp ├── main.cpp ├── modular │ ├── Makefile │ ├── atomic.cpp │ ├── gettime.cpp │ ├── logging.cpp │ ├── minimal.cpp │ ├── sanity_lockable.cpp │ ├── semaphores.cpp │ ├── spinlock.cpp │ ├── taskqueue_prio.cpp │ ├── threads.cpp │ └── versionwrap.cpp ├── modular_dep │ ├── Makefile │ ├── gettime.cpp │ └── perfcount.cpp ├── unit │ ├── test_access.hpp │ ├── test_handle.hpp │ ├── test_listqueue.hpp │ ├── test_locks.hpp │ ├── test_rwc.hpp │ ├── test_schedver.hpp │ ├── test_subtasks.hpp │ ├── test_taskqueue.hpp │ ├── test_taskqueue_impl.hpp │ ├── test_taskqueuedeque.hpp │ ├── test_taskqueueprio.hpp │ └── test_tasks.hpp └── util │ └── log_inc.hpp └── tools ├── Makefile └── viewer.cpp /LICENSE: -------------------------------------------------------------------------------- 1 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. 2 | 3 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -Wconversion -I include/ -pthread 2 | 3 | tests: unittest modular csuperglue 4 | 5 | unittest: 6 | mkdir -p bin 7 | $(CXX) $(FLAGS) test/main.cpp -o bin/$@ 8 | ./bin/$@ 9 | 10 | modular: 11 | ( cd test/modular ; make ) 12 | ( cd test/fail ; make ) 13 | 14 | examples: 15 | ( cd examples ; make ) 16 | 17 | tools: 18 | ( cd tools ; make ) 19 | 20 | csuperglue: 21 | ( cd csuperglue ; make ) 22 | 23 | clean: 24 | rm -f ./bin/* ./examples/bin/* 25 | 26 | .PHONY: tests unittest examples tools csuperglue clean 27 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 2 | WHAT IS THIS? 3 | ------------- 4 | 5 | SuperGlue is a C++ library for task-parallelism, with data-dependent tasks. 6 | 7 | The programmer specifies tasks, and which data each task reads and writes, 8 | and SuperGlue uses this information to deduce dependencies. 9 | 10 | 11 | SHOW ME THE CODE! 12 | ----------------- 13 | 14 | An example where a single, independent task is created and executed: 15 | 16 | #include "superglue.hpp" 17 | #include 18 | 19 | // Settings for SuperGlue. Here we use the defaults. 20 | struct Options : public DefaultOptions {}; 21 | 22 | // Define a task, with no dependencies. 23 | struct MyTask : public Task { 24 | void run() { 25 | std::cout << "Hello world!" << std::endl; 26 | } 27 | }; 28 | 29 | int main() { 30 | // The SuperGlue object starts the run-time system and starts 31 | // as many worker threads as there are cores. 32 | SuperGlue sg; 33 | 34 | // Create a task and submit it to SuperGlue 35 | sg.submit(new MyTask()); 36 | 37 | return 0; 38 | } 39 | 40 | Check the "examples/" directory for more examples. The above example is found 41 | in "examples/helloworld/". For an example with task dependencies, look at 42 | "examples/dependencies/". 43 | 44 | 45 | COMPILING AND INSTALLING 46 | ------------------------ 47 | 48 | SuperGlue is a header-only C++ template library. As such, it is not compiled as 49 | its own unit, but included into and compiled together with other code. The only 50 | setup that is needed is to add the "superglue/" directory to the include paths 51 | of the compiler, and enable pthreads support (compile with the -pthread flag). 52 | 53 | 54 | GETTING STARTED 55 | --------------- 56 | 57 | Running "make" will compile and run a few unit tests. 58 | Running "make examples" will compile all examples in the "examples/" directory. 59 | The "examples_dep/" directory contains examples with external dependencies. 60 | 61 | -------------------------------------------------------------------------------- /csuperglue/Makefile: -------------------------------------------------------------------------------- 1 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -Wconversion -I ../include -pthread 2 | 3 | all: csuperglue csupergluetest 4 | 5 | bin: 6 | mkdir -p bin 7 | 8 | csuperglue: bin 9 | $(CXX) $(FLAGS) -I src/ src/csuperglue.cpp -c -o bin/csuperglue.o 10 | $(CXX) $(FLAGS) -DSG_LOGGING -I src/ src/csuperglue.cpp -c -o bin/csupergluelog.o 11 | 12 | csupergluetest: 13 | ( cd test ; make ) 14 | 15 | clean: 16 | rm -f ./bin/* 17 | 18 | .PHONY: tests unittest examples tools csuperglue clean 19 | -------------------------------------------------------------------------------- /csuperglue/src/csuperglue.cpp: -------------------------------------------------------------------------------- 1 | extern "C" { 2 | #include "csuperglue.h" 3 | } 4 | 5 | #include "sg/superglue.hpp" 6 | 7 | #ifdef SG_LOGGING 8 | #include "sg/option/instr_trace.hpp" 9 | #endif 10 | #include "sg/platform/gettime.hpp" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | struct Options : public DefaultOptions { 18 | typedef Enable PauseExecution; 19 | #ifdef SG_LOGGING 20 | typedef Enable TaskName; 21 | typedef Trace Instrumentation; 22 | #endif 23 | }; 24 | 25 | SuperGlue *superglue; 26 | 27 | class CTaskBase : public Task { 28 | protected: 29 | sg_task_function function; 30 | void *args; 31 | 32 | public: 33 | CTaskBase(sg_task_function function_, void *args_) 34 | : function(function_), args(args_) 35 | {} 36 | virtual ~CTaskBase() {} 37 | 38 | void run() { 39 | function(args); 40 | } 41 | }; 42 | 43 | #ifdef SG_LOGGING 44 | 45 | class CTask : public CTaskBase { 46 | protected: 47 | std::string name; 48 | public: 49 | CTask(sg_task_function function, void *args_, size_t argsize, const char *name_) 50 | : CTaskBase(function, new char[argsize]), name(name_ == NULL ? "" : name_) { 51 | memcpy(args, args_, argsize); 52 | } 53 | virtual ~CTask() { delete [] (char *) args; } 54 | std::string get_name() { return name; } 55 | }; 56 | 57 | class CInplaceTask : public CTaskBase { 58 | protected: 59 | std::string name; 60 | public: 61 | CInplaceTask(sg_task_function function, void *args, const char *name_) 62 | : CTaskBase(function, args), name(name_ == NULL ? "" : name_) {} 63 | std::string get_name() { return name; } 64 | }; 65 | 66 | #else // SG_LOGGING 67 | 68 | class CTask : public CTaskBase { 69 | public: 70 | CTask(sg_task_function function, void *args_, size_t argsize, const char *) 71 | : CTaskBase(function, new char[argsize]) { 72 | memcpy(args, args_, argsize); 73 | } 74 | virtual ~CTask() { delete [] (char *) args; } 75 | }; 76 | 77 | class CInplaceTask : public CTaskBase { 78 | public: 79 | CInplaceTask(sg_task_function function, void *args, const char *) 80 | : CTaskBase(function, args) {} 81 | }; 82 | 83 | #endif // SG_LOGGING 84 | 85 | extern "C" sg_task_t sg_create_task(sg_task_function function, void *args, size_t argsize, const char *name) { 86 | CTask *task(new CTask(function, args, argsize, name)); 87 | return (sg_task_t) task; 88 | } 89 | 90 | extern "C" sg_task_t sg_create_inplace_task(sg_task_function function, void *args, const char *name) { 91 | CInplaceTask *task(new CInplaceTask(function, args, name)); 92 | return (sg_task_t) task; 93 | } 94 | 95 | extern "C" void sg_register_access(sg_task_t task_, enum sg_access_type type, sg_handle_t handle_) { 96 | CTask *task((CTask *) task_); 97 | Handle *handle((Handle *) handle_); 98 | task->register_access((ReadWriteAdd::Type) (type-1), *handle); 99 | } 100 | 101 | extern "C" void sg_submit_task(sg_task_t task_) { 102 | CTask *task((CTask *) task_); 103 | superglue->submit(task); 104 | } 105 | 106 | extern "C" void sg_submit(sg_task_function function, void *args, size_t argsize, const char *name, ...) { 107 | va_list deps; 108 | CTask *task(new CTask(function, args, argsize, name)); 109 | 110 | va_start(deps, name); 111 | 112 | for (;;) { 113 | int type = va_arg(deps, int); 114 | if (type == 0) 115 | break; 116 | Handle *handle = va_arg(deps, Handle *); 117 | 118 | task->register_access((ReadWriteAdd::Type) (type-1), *handle); 119 | } 120 | va_end(deps); 121 | superglue->submit(task); 122 | } 123 | 124 | extern "C" void sg_submit_inplace(sg_task_function function, void *args, const char *name, ...) { 125 | va_list deps; 126 | CInplaceTask *task(new CInplaceTask(function, args, name)); 127 | 128 | va_start(deps, name); 129 | 130 | for (;;) { 131 | int type = va_arg(deps, int); 132 | if (type == 0) 133 | break; 134 | Handle *handle = va_arg(deps, Handle *); 135 | 136 | task->register_access((ReadWriteAdd::Type) (type-1), *handle); 137 | } 138 | va_end(deps); 139 | superglue->submit(task); 140 | } 141 | 142 | extern "C" void sg_wait(sg_handle_t handle) { 143 | superglue->wait(*(Handle *) handle); 144 | } 145 | 146 | extern "C" void sg_barrier() { 147 | superglue->barrier(); 148 | } 149 | 150 | extern "C" sg_handle_t *sg_create_handles(int num) { 151 | sg_handle_t *mem = new sg_handle_t[num]; 152 | for (int i = 0; i < num; ++i) 153 | mem[i] = (sg_handle_t) new Handle(); 154 | return mem; 155 | } 156 | 157 | extern "C" void sg_destroy_handles(sg_handle_t *handles, int num) { 158 | for (int i = 0; i < num; ++i) 159 | delete (Handle *) handles[i]; 160 | delete [] handles; 161 | } 162 | 163 | extern "C" void sg_init() { 164 | superglue = new SuperGlue(); 165 | superglue->start_executing(); 166 | } 167 | 168 | extern "C" void sg_init_paused() { 169 | superglue = new SuperGlue(); 170 | } 171 | 172 | extern "C" void sg_execute() { 173 | superglue->start_executing(); 174 | } 175 | 176 | extern "C" void sg_destroy() { 177 | delete superglue; 178 | } 179 | 180 | extern "C" void sg_write_log(const char *filename) { 181 | #ifdef SG_LOGGING 182 | Options::Instrumentation::dump(filename); 183 | #endif 184 | } 185 | 186 | extern "C" unsigned long long sg_get_time() { 187 | return Time::getTime(); 188 | } 189 | 190 | extern "C" void sg_log(const char *name, unsigned long long start, unsigned long long stop) { 191 | #ifdef SG_LOGGING 192 | Log::log(name, start, stop); 193 | #endif 194 | } 195 | -------------------------------------------------------------------------------- /csuperglue/src/csuperglue.h: -------------------------------------------------------------------------------- 1 | #ifndef SG_CSUPERGLUE_H_INCLUDED 2 | #define SG_CSUPERGLUE_H_INCLUDED 3 | 4 | #include "stddef.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef void *sg_handle_t; 11 | typedef void *sg_task_t; 12 | typedef void (*sg_task_function)(void *); 13 | enum sg_access_type { sg_read = 1, sg_add, sg_write }; 14 | 15 | /* Initialize SuperGlue. Starts worker threads. */ 16 | void sg_init(); 17 | 18 | /* Initialize SuperGlue. Starts worker threads, but do not run any tasks yet. */ 19 | void sg_init_paused(); 20 | /* Start running tasks. (If initialized via sg_init_paused()) */ 21 | void sg_execute(); 22 | 23 | /* Shut down SuperGlue */ 24 | void sg_destroy(); 25 | 26 | /* Create a task. The task must be submitted to SuperGlue using sg_submit_task, or there is a memory leak. */ 27 | sg_task_t sg_create_task(sg_task_function function, void *args, size_t argsize, const char *name); 28 | /* Version that does not make a private copy of the arguments. */ 29 | sg_task_t sg_create_inplace_task(sg_task_function function, void *args, const char *name); 30 | 31 | /* Add an access to a task. */ 32 | void sg_register_access(sg_task_t task, enum sg_access_type type, sg_handle_t handle); 33 | 34 | /* Submit a task to SuperGlue. SuperGlue takes ownership of the task. */ 35 | void sg_submit_task(sg_task_t task); 36 | 37 | /* Create and submit a task to SuperGlue. 38 | function -- function with signature "void my_function(void *args)" 39 | args -- user-defined arguments to be passed to the function 40 | argsize -- size of user-defined arguments 41 | name -- name of task, for logging (ignored otherwise) 42 | deps -- null-terminated list of access types and handles: 43 | Example: [sg_access_type, sg_handle, sg_access_type, sg_handle, 0] 44 | */ 45 | void sg_submit(sg_task_function function, void *args, size_t argsize, const char *name, ...); 46 | /* Version that does not make a private copy of the arguments. */ 47 | void sg_submit_inplace(sg_task_function function, void *args, const char *name, ...); 48 | 49 | /* Wait for all submitted tasks to finish */ 50 | void sg_barrier(); 51 | 52 | /* Create an array of handles */ 53 | sg_handle_t *sg_create_handles(int num); 54 | 55 | /* Destroy handles */ 56 | void sg_destroy_handles(sg_handle_t *handles, int num); 57 | 58 | /* Return current time (time stamp counter) */ 59 | unsigned long long sg_get_time(); 60 | 61 | /* Write log-file (if logging is enabled) */ 62 | void sg_write_log(const char *filename); 63 | 64 | /* Add a log entry (if logging is enabled) */ 65 | void sg_log(const char *name, unsigned long long start, unsigned long long stop); 66 | 67 | #ifdef __cplusplus 68 | } 69 | #endif 70 | 71 | #endif /* SG_CSUPERGLUE_H_INCLUDED */ 72 | -------------------------------------------------------------------------------- /csuperglue/test/Makefile: -------------------------------------------------------------------------------- 1 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -I ../src -pthread 2 | 3 | C_FILES=$(wildcard *.c) 4 | TESTS=$(patsubst %.c,%.test,$(C_FILES)) 5 | 6 | CSUPERGLUE_LIB=../bin/csupergluelog.o 7 | 8 | all: $(TESTS) 9 | 10 | %.test: %.c 11 | $(CC) $(FLAGS) $< $(CSUPERGLUE_LIB) -lstdc++ && ./a.out 12 | 13 | -------------------------------------------------------------------------------- /csuperglue/test/chol.c: -------------------------------------------------------------------------------- 1 | #include "csuperglue.h" 2 | #include 3 | #include 4 | 5 | struct gemm_data { double *A, *B, *C; }; 6 | void gemm(void *args) { /* fprintf(stderr, "gemm\n"); */ } 7 | 8 | struct syrk_data { double *A, *B; }; 9 | void syrk(void *args) { /* fprintf(stderr, "syrk\n"); */ } 10 | 11 | struct potrf_data { double *A; }; 12 | void potrf(void *args) { /* fprintf(stderr, "potrf\n"); */ } 13 | 14 | struct trsm_data { double *A, *B; }; 15 | void trsm(void *args) { /* fprintf(stderr, "trsm\n"); */ } 16 | 17 | int main() { 18 | 19 | int i, j, k; 20 | const size_t n = 100; 21 | 22 | sg_handle_t *h = sg_create_handles(n*n); 23 | double **A = malloc(n*n*sizeof(double *)); 24 | 25 | sg_init(); 26 | 27 | for (j = 0; j < n; j++) { 28 | struct potrf_data potrf_args; 29 | 30 | for (k = 0; k < j; k++) { 31 | for (i = j+1; i < n; i++) { 32 | /* A[i,j] = A[i,j] - A[i,k] * (A[j,k])^t */ 33 | struct gemm_data args; 34 | args.A = A[i*n+k]; 35 | args.B = A[j*n+k]; 36 | args.C = A[i*n+j]; 37 | sg_submit(gemm, &args, sizeof(args), "gemm", 38 | sg_read, h[i*n+k], 39 | sg_read, h[j*n+k], 40 | sg_add, h[i*n+j], 41 | 0); 42 | } 43 | } 44 | for (i = 0; i < j; i++) { 45 | /* A[j,j] = A[j,j] - A[j,i] * (A[j,i])^t */ 46 | struct syrk_data args; 47 | args.A = A[j*n+i]; 48 | args.B = A[j*n+j]; 49 | sg_submit(syrk, &args, sizeof(args), "syrk", 50 | sg_read, h[j*n+i], 51 | sg_add, h[j*n+j], 52 | 0); 53 | } 54 | 55 | /* Cholesky Factorization of A[j,j] */ 56 | potrf_args.A = A[j*n+j]; 57 | sg_submit(potrf, &potrf_args, sizeof(potrf_args), "potrf", 58 | sg_write, h[j*n+j], 59 | 0); 60 | 61 | for (i = j+1; i < n; i++) { 62 | /* A[i,j] <- A[i,j] = X * (A[j,j])^t */ 63 | struct trsm_data args; 64 | args.A = A[j*n+j]; 65 | args.B = A[i*n+j]; 66 | sg_submit(trsm, &args, sizeof(args), "trsm", 67 | sg_read, h[j*n+j], 68 | sg_write, h[i*n+j], 69 | 0); 70 | } 71 | } 72 | sg_barrier(); 73 | sg_write_log("execution.log"); 74 | sg_destroy(); 75 | 76 | free(A); 77 | sg_destroy_handles(h, n*n); 78 | 79 | return 0; 80 | } 81 | -------------------------------------------------------------------------------- /csuperglue/test/chol2.c: -------------------------------------------------------------------------------- 1 | #include "csuperglue.h" 2 | #include 3 | #include 4 | 5 | struct gemm_data { double *A, *B, *C; }; 6 | void gemm(void *args) { /* fprintf(stderr, "gemm\n"); */ } 7 | 8 | struct syrk_data { double *A, *B; }; 9 | void syrk(void *args) { /* fprintf(stderr, "syrk\n"); */ } 10 | 11 | struct potrf_data { double *A; }; 12 | void potrf(void *args) { /* fprintf(stderr, "potrf\n"); */ } 13 | 14 | struct trsm_data { double *A, *B; }; 15 | void trsm(void *args) { /* fprintf(stderr, "trsm\n"); */ } 16 | 17 | int main() { 18 | 19 | int i, j, k; 20 | const size_t n = 30; 21 | 22 | sg_handle_t *h = sg_create_handles(n*n); 23 | double **A = malloc(n*n * sizeof(double *)); 24 | 25 | /* fill A with pointers to blocks here... */ 26 | 27 | sg_init(); 28 | 29 | for (j = 0; j < n; j++) { 30 | sg_task_t task; 31 | struct potrf_data potrf_args; 32 | 33 | for (k = 0; k < j; k++) { 34 | for (i = j+1; i < n; i++) { 35 | /* A[i,j] = A[i,j] - A[i,k] * (A[j,k])^t */ 36 | struct gemm_data args; 37 | args.A = A[i*n+k]; 38 | args.B = A[j*n+k]; 39 | args.C = A[i*n+j]; 40 | task = sg_create_task(gemm, &args, sizeof(args), "gemm"); 41 | sg_register_access(task, sg_read, h[i*n+k]); 42 | sg_register_access(task, sg_read, h[j*n+k]); 43 | sg_register_access(task, sg_add, h[i*n+j]); 44 | sg_submit_task(task); 45 | } 46 | } 47 | for (i = 0; i < j; i++) { 48 | /* A[j,j] = A[j,j] - A[j,i] * (A[j,i])^t */ 49 | struct syrk_data args; 50 | args.A = A[j*n+i]; 51 | args.B = A[j*n+j]; 52 | task = sg_create_task(syrk, &args, sizeof(args), "syrk"); 53 | sg_register_access(task, sg_read, h[j*n+i]); 54 | sg_register_access(task, sg_add, h[j*n+j]); 55 | sg_submit_task(task); 56 | } 57 | 58 | /* Cholesky Factorization of A[j,j] */ 59 | potrf_args.A = A[j*n+j]; 60 | task = sg_create_task(potrf, &potrf_args, sizeof(potrf_args), "potrf"); 61 | sg_register_access(task, sg_write, h[j*n+j]); 62 | sg_submit_task(task); 63 | 64 | for (i = j+1; i < n; i++) { 65 | /* A[i,j] <- A[i,j] = X * (A[j,j])^t */ 66 | struct trsm_data args; 67 | args.A = A[j*n+j]; 68 | args.B = A[i*n+j]; 69 | task = sg_create_task(trsm, &args, sizeof(args), "trsm"); 70 | sg_register_access(task, sg_read, h[j*n+j]); 71 | sg_register_access(task, sg_write, h[i*n+j]); 72 | sg_submit_task(task); 73 | } 74 | } 75 | sg_barrier(); 76 | sg_write_log("execution2.log"); 77 | sg_destroy(); 78 | 79 | free(A); 80 | sg_destroy_handles(h, n*n); 81 | 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /csuperglue/test/helloworld.c: -------------------------------------------------------------------------------- 1 | #include "csuperglue.h" 2 | #include 3 | 4 | void my_task(void *args) { 5 | printf("Hello world!\n"); 6 | } 7 | 8 | int main() { 9 | sg_init(); 10 | sg_submit(my_task, 0, 0, 0, 0); 11 | sg_destroy(); 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /csuperglue/test/helloworld2.c: -------------------------------------------------------------------------------- 1 | #include "csuperglue.h" 2 | #include 3 | 4 | struct my_args { 5 | int i; 6 | }; 7 | 8 | void my_task(void *args_) { 9 | struct my_args *args = (struct my_args *) args_; 10 | /* args here is a private copy */ 11 | printf("Hello world! Private copy of args = %d\n", args->i); 12 | ++args->i; 13 | } 14 | 15 | int main() { 16 | struct my_args args; 17 | args.i = 32; 18 | 19 | sg_init(); 20 | sg_submit(my_task, &args, sizeof(struct my_args), 0, 0); 21 | sg_barrier(); 22 | sg_submit(my_task, &args, sizeof(struct my_args), 0, 0); 23 | sg_destroy(); 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /csuperglue/test/helloworld3.c: -------------------------------------------------------------------------------- 1 | #include "csuperglue.h" 2 | #include 3 | 4 | struct my_args { 5 | int i; 6 | }; 7 | 8 | void my_task(void *args_) { 9 | struct my_args *args = (struct my_args *) args_; 10 | /* args here is a private copy */ 11 | printf("Hello world! Private copy of args = %d\n", args->i); 12 | ++args->i; 13 | } 14 | 15 | int main() { 16 | struct my_args args; 17 | sg_task_t task; 18 | 19 | args.i = 32; 20 | 21 | sg_init(); 22 | task = sg_create_task(my_task, &args, sizeof(struct my_args), NULL); 23 | sg_submit_task(task); 24 | sg_barrier(); 25 | task = sg_create_task(my_task, &args, sizeof(struct my_args), NULL); 26 | sg_submit_task(task); 27 | sg_destroy(); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /csuperglue/test/helloworld4.c: -------------------------------------------------------------------------------- 1 | #include "csuperglue.h" 2 | #include 3 | 4 | void my_task(void *args) { 5 | int *i = (int *) args; 6 | /* here i is no copy, but points to i in the stack of main() */ 7 | printf("Hello world! Shared copy of args = %d\n", *i); 8 | ++(*i); 9 | } 10 | 11 | int main() { 12 | int i = 0; 13 | sg_init(); 14 | sg_submit_inplace(my_task, &i, 0, NULL); 15 | sg_barrier(); 16 | sg_submit_inplace(my_task, &i, 0, NULL); 17 | sg_destroy(); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /csuperglue/test/helloworld5.c: -------------------------------------------------------------------------------- 1 | #include "csuperglue.h" 2 | #include 3 | 4 | void my_task(void *args) { 5 | int *i = (int *) args; 6 | /* here i is no copy, but points to i in the stack of main() */ 7 | printf("Hello world! Shared copy of args = %d\n", *i); 8 | ++(*i); 9 | } 10 | 11 | int main() { 12 | int i = 0; 13 | sg_task_t task; 14 | 15 | sg_init(); 16 | task = sg_create_inplace_task(my_task, &i, NULL); 17 | sg_submit_task(task); 18 | sg_barrier(); 19 | 20 | task = sg_create_inplace_task(my_task, &i, NULL); 21 | sg_submit_task(task); 22 | sg_destroy(); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /csuperglue/test/startpaused.c: -------------------------------------------------------------------------------- 1 | #include "csuperglue.h" 2 | #include 3 | #include 4 | #include 5 | 6 | pthread_mutex_t mutex; 7 | int counter = 0; 8 | 9 | void my_task(void *args) { 10 | pthread_mutex_lock(&mutex); 11 | ++counter; 12 | pthread_mutex_unlock(&mutex); 13 | } 14 | 15 | int main() { 16 | const int num_tasks = 1000; 17 | int i; 18 | 19 | pthread_mutex_init(&mutex, NULL); 20 | 21 | /* Start SuperGlue paused: No tasks can run yet. */ 22 | sg_init_paused(); 23 | 24 | /* Submit tasks. Will not be run yet. */ 25 | for (i = 0; i < num_tasks; ++i) 26 | sg_submit(my_task, 0, 0, 0, 0); 27 | 28 | /* Make sure no tasks have run. */ 29 | assert(counter == 0); 30 | 31 | /* Allow tasks to run. */ 32 | sg_execute(); 33 | 34 | /* Wait for all tasks to finish. */ 35 | sg_barrier(); 36 | 37 | /* Make sure all tasks finished. */ 38 | assert(counter == num_tasks); 39 | 40 | /* Shut down SuperGlue. */ 41 | sg_destroy(); 42 | 43 | pthread_mutex_destroy(&mutex); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | EXAMPLES:=$(basename $(notdir $(wildcard src/*.cpp))) 2 | 3 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -I ../include -pthread 4 | 5 | all: $(EXAMPLES) 6 | 7 | $(EXAMPLES): bin/ 8 | $(CXX) $(FLAGS) src/$@.cpp -o bin/$@ 9 | 10 | bin: 11 | mkdir -p bin 12 | 13 | clean: 14 | rm -rf bin 15 | 16 | .PHONY: all clean $(EXAMPLES) 17 | -------------------------------------------------------------------------------- /examples/src/accesstypes.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | // This example defines a new access type: Mul, to define a 13 | // commutative multiplicative access that can be reordered 14 | // among other accesses of the same type, as with the Add 15 | // access type. 16 | 17 | class ReadWriteAddMul { 18 | public: 19 | enum Type { read = 0, write, add, mul, num_accesses }; 20 | template struct AccessType {}; 21 | }; 22 | 23 | template<> struct ReadWriteAddMul::AccessType { 24 | enum { commutative = 1 }; 25 | enum { exclusive = 0 }; 26 | enum { readonly = 1 }; 27 | }; 28 | 29 | template<> struct ReadWriteAddMul::AccessType { 30 | enum { commutative = 0 }; 31 | enum { exclusive = 1 }; 32 | enum { readonly = 0 }; 33 | }; 34 | 35 | template<> struct ReadWriteAddMul::AccessType { 36 | enum { commutative = 1 }; 37 | enum { exclusive = 1 }; 38 | enum { readonly = 0 }; 39 | }; 40 | 41 | template<> struct ReadWriteAddMul::AccessType { 42 | enum { commutative = 1 }; 43 | enum { exclusive = 1 }; 44 | enum { readonly = 0 }; 45 | }; 46 | 47 | //=========================================================================== 48 | // Task Library Options 49 | //=========================================================================== 50 | struct Options : public DefaultOptions { 51 | typedef ReadWriteAddMul AccessInfoType; 52 | typedef Enable Lockable; 53 | typedef Enable TaskName; 54 | }; 55 | 56 | Handle handle; 57 | volatile double data = 0.0; 58 | 59 | //=========================================================================== 60 | // Tasks 61 | //=========================================================================== 62 | class TaskSet : public Task { 63 | private: 64 | double value; 65 | public: 66 | TaskSet(double value_) : value(value_) { 67 | register_access(ReadWriteAddMul::write, handle); 68 | } 69 | 70 | void run() { 71 | data = value; 72 | std::stringstream ss; 73 | ss << "=" << value << "="< { 80 | private: 81 | double value; 82 | public: 83 | TaskAdd(double value_) : value(value_) { 84 | register_access(ReadWriteAddMul::add, handle); 85 | } 86 | 87 | void run() { 88 | data += value; 89 | std::stringstream ss; 90 | ss << "+" << value << "="< { 97 | private: 98 | double value; 99 | public: 100 | TaskMul(double value_) : value(value_) { 101 | register_access(ReadWriteAddMul::mul, handle); 102 | } 103 | 104 | void run() { 105 | data *= value; 106 | std::stringstream ss; 107 | ss << "*" << value << "="< { 114 | private: 115 | public: 116 | TaskPrint() { 117 | register_access(ReadWriteAddMul::read, handle); 118 | } 119 | void run() { 120 | std::stringstream ss; 121 | ss << "Result=" << data << std::endl; 122 | std::cerr << ss.str(); 123 | } 124 | std::string get_name() { return "Print"; } 125 | }; 126 | 127 | //=========================================================================== 128 | // main 129 | //=========================================================================== 130 | int main(int argc, char *argv[]) { 131 | 132 | int num_threads = -1; 133 | if (argc == 2) { 134 | num_threads = (size_t) atoi(argv[1]); 135 | } 136 | else if (argc != 1) { 137 | printf("usage: %s [num_cores]\n", argv[0]); 138 | exit(0); 139 | } 140 | 141 | SuperGlue sg(num_threads); 142 | // 1 * 2 * 3 * 4 + 5 + 6 + 7 = 42 143 | sg.submit(new TaskSet(1.0)); 144 | sg.submit(new TaskMul(2.0)); 145 | sg.submit(new TaskMul(3.0)); 146 | sg.submit(new TaskMul(4.0)); 147 | sg.submit(new TaskAdd(5.0)); 148 | sg.submit(new TaskAdd(6.0)); 149 | sg.submit(new TaskAdd(7.0)); 150 | sg.submit(new TaskPrint()); 151 | sg.barrier(); 152 | 153 | return 0; 154 | } 155 | -------------------------------------------------------------------------------- /examples/src/customhandle.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include 3 | 4 | const size_t numSlices = 5; 5 | const size_t sliceSize = 100; 6 | 7 | // Shared array divided into slices. 8 | double data[numSlices][sliceSize]; 9 | 10 | // Define own handle type to store user-specific data in all handles. 11 | template 12 | struct MyHandle : public HandleBase { 13 | size_t index; 14 | void setIndex(size_t i) { index = i; } 15 | }; 16 | 17 | // Specify that our handle type should replace the default handle type. 18 | struct Options : public DefaultOptions { 19 | typedef MyHandle HandleType; 20 | }; 21 | 22 | struct ScaleTask : public Task { 23 | double s; 24 | size_t a, b; 25 | ScaleTask(double s_, Handle &hA, Handle &hB) 26 | : s(s_), a(hA.index), b(hB.index) 27 | { 28 | register_access(ReadWriteAdd::read, hA); 29 | register_access(ReadWriteAdd::write, hB); 30 | } 31 | void run() { 32 | for (size_t i = 0; i < sliceSize; ++i) 33 | data[b][i] = s*data[a][i]; 34 | } 35 | }; 36 | 37 | struct SumTask : public Task { 38 | size_t a, b, c; 39 | SumTask(Handle &hA, 40 | Handle &hB, 41 | Handle &hC) 42 | : a(hA.index), b(hB.index), c(hC.index) 43 | { 44 | register_access(ReadWriteAdd::read, hA); 45 | register_access(ReadWriteAdd::read, hB); 46 | register_access(ReadWriteAdd::write, hC); 47 | } 48 | void run() { 49 | for (size_t i = 0; i < sliceSize; ++i) 50 | data[c][i] = data[a][i] + data[b][i]; 51 | } 52 | }; 53 | 54 | int main() { 55 | for (size_t i = 0; i < sliceSize; ++i) 56 | data[0][i] = 1.0; 57 | 58 | // Define handles for the slices 59 | Handle h[numSlices]; 60 | 61 | // Set the user-defined index 62 | for (size_t i = 0; i < numSlices; ++i) 63 | h[i].setIndex(i); 64 | 65 | SuperGlue sg; 66 | sg.submit(new ScaleTask(2.0, h[0], h[1])); // h_1 = 2*h_0 67 | sg.submit(new ScaleTask(3.0, h[0], h[2])); // h_2 = 3*h_0 68 | sg.submit(new SumTask(h[0], h[1], h[3])); // h_3 = h_0+h_1 69 | sg.submit(new SumTask(h[1], h[2], h[4])); // h_4 = h_1+h_2 70 | 71 | // Wait for all tasks to finish 72 | sg.barrier(); 73 | 74 | // The data may be accessed here, after the barrier 75 | std::cout << "result=[" << data[0][0] << " " << data[1][0] << " " 76 | << data[2][0] << " " << data[3][0] << " " << data[4][0] 77 | << "]" << std::endl; 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /examples/src/dag.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include "sg/option/savedag_task.hpp" 3 | #include "sg/option/savedag_data.hpp" 4 | #include 5 | 6 | struct Options : public DefaultOptions { 7 | typedef Enable TaskId; 8 | typedef Enable HandleId; 9 | typedef Enable TaskName; 10 | typedef Enable HandleName; 11 | typedef SaveDAG LogDAG; 12 | }; 13 | 14 | struct gemm : public Task { 15 | gemm(Handle &a, Handle &b, Handle &c) { 16 | register_access(ReadWriteAdd::read, a); 17 | register_access(ReadWriteAdd::read, b); 18 | register_access(ReadWriteAdd::add, c); 19 | } 20 | void run() {} 21 | std::string get_name() { return "gemm"; } 22 | }; 23 | struct syrk : public Task { 24 | syrk(Handle &a, Handle &b) { 25 | register_access(ReadWriteAdd::read, a); 26 | register_access(ReadWriteAdd::add, b); 27 | } 28 | void run() {} 29 | std::string get_name() { return "syrk"; } 30 | }; 31 | struct potrf : public Task { 32 | potrf(Handle &a) { 33 | register_access(ReadWriteAdd::write, a); 34 | } 35 | void run() {} 36 | std::string get_name() { return "potrf"; } 37 | }; 38 | struct trsm : public Task { 39 | trsm(Handle &a, Handle &b) { 40 | register_access(ReadWriteAdd::read, a); 41 | register_access(ReadWriteAdd::write, b); 42 | } 43 | void run() {} 44 | std::string get_name() { return "trsm"; } 45 | }; 46 | 47 | int main() { 48 | 49 | const size_t numBlocks = 3; 50 | 51 | Handle **A = new Handle*[numBlocks]; 52 | for (size_t i = 0; i < numBlocks; ++i) { 53 | A[i] = new Handle[numBlocks]; 54 | for (size_t j = 0; j < numBlocks; ++j) { 55 | std::stringstream ss; 56 | ss<<"("< sg(1); 62 | 63 | for (size_t j = 0; j < numBlocks; j++) { 64 | for (size_t k = 0; k < j; k++) { 65 | for (size_t i = j+1; i < numBlocks; i++) { 66 | // A[i,j] = A[i,j] - A[i,k] * (A[j,k])^t 67 | sg.submit(new gemm(A[i][k], A[j][k], A[i][j])); 68 | } 69 | } 70 | for (size_t i = 0; i < j; i++) { 71 | // A[j,j] = A[j,j] - A[j,i] * (A[j,i])^t 72 | sg.submit(new syrk(A[j][i], A[j][j])); 73 | } 74 | 75 | // Cholesky Factorization of A[j,j] 76 | sg.submit(new potrf(A[j][j])); 77 | 78 | for (size_t i = j+1; i < numBlocks; i++) { 79 | // A[i,j] <- A[i,j] = X * (A[j,j])^t 80 | sg.submit(new trsm(A[j][j], A[i][j])); 81 | } 82 | } 83 | sg.barrier(); 84 | SaveDAG_task::dump("cholesky.dot"); 85 | SaveDAG_data::dump("cholesky_data.dot"); 86 | return 0; 87 | } 88 | -------------------------------------------------------------------------------- /examples/src/dependencies.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include 3 | 4 | const size_t numSlices = 5; 5 | const size_t sliceSize = 100; 6 | 7 | struct Options : public DefaultOptions {}; 8 | 9 | struct ScaleTask : public Task { 10 | double s, *a, *b; 11 | ScaleTask(double s_, 12 | double *a_, Handle &hA, 13 | double *b_, Handle &hB) 14 | : s(s_), a(a_), b(b_) 15 | { 16 | register_access(ReadWriteAdd::read, hA); 17 | register_access(ReadWriteAdd::write, hB); 18 | } 19 | void run() { 20 | for (size_t i = 0; i < sliceSize; ++i) 21 | b[i] = s*a[i]; 22 | } 23 | }; 24 | 25 | struct SumTask : public Task { 26 | double *a, *b, *c; 27 | SumTask(double *a_, Handle &hA, 28 | double *b_, Handle &hB, 29 | double *c_, Handle &hC) 30 | : a(a_), b(b_), c(c_) 31 | { 32 | register_access(ReadWriteAdd::read, hA); 33 | register_access(ReadWriteAdd::read, hB); 34 | register_access(ReadWriteAdd::write, hC); 35 | } 36 | void run() { 37 | for (size_t i = 0; i < sliceSize; ++i) 38 | c[i] = a[i]+b[i]; 39 | } 40 | }; 41 | 42 | int main() { 43 | // Shared array divided into slices 44 | double data[numSlices][sliceSize]; 45 | 46 | for (size_t i = 0; i < sliceSize; ++i) 47 | data[0][i] = 1.0; 48 | 49 | // Define handles for the slices 50 | Handle h[numSlices]; 51 | 52 | SuperGlue sg; 53 | sg.submit(new ScaleTask(2.0, data[0], h[0], data[1], h[1])); // h_1 = 2*h_0 54 | sg.submit(new ScaleTask(3.0, data[0], h[0], data[2], h[2])); // h_2 = 3*h_0 55 | sg.submit(new SumTask(data[0], h[0], data[1], h[1], data[3], h[3])); // h_3 = h_0+h_1 56 | sg.submit(new SumTask(data[1], h[1], data[2], h[2], data[4], h[4])); // h_4 = h_1+h_2 57 | 58 | // Wait for all tasks to finish 59 | sg.barrier(); 60 | 61 | // The data may be accessed here, after the barrier 62 | std::cout << "result=[" << data[0][0] << " " << data[1][0] << " " 63 | << data[2][0] << " " << data[3][0] << " " << data[4][0] 64 | << "]" << std::endl; 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /examples/src/handlewithdata.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include 3 | #include 4 | 5 | struct Options : public DefaultOptions {}; 6 | 7 | // User-defined datatype that includes a handle for dependency management 8 | struct MyFloatData { 9 | Handle handle; 10 | double value; 11 | }; 12 | 13 | // Another user-defined datatype that also includes a handle 14 | struct MyTextData { 15 | Handle handle; 16 | char text[10]; 17 | }; 18 | 19 | struct ScaleTask : public Task { 20 | double s, &a, &b; 21 | ScaleTask(double s_, MyFloatData &a_, MyFloatData &b_) 22 | : s(s_), a(a_.value), b(b_.value) 23 | { 24 | register_access(ReadWriteAdd::read, a_.handle); 25 | register_access(ReadWriteAdd::write, b_.handle); 26 | } 27 | void run() { 28 | b = s*a; 29 | } 30 | }; 31 | 32 | struct ToStrTask : public Task { 33 | double &a; 34 | char *b; 35 | ToStrTask(MyFloatData &a_, MyTextData &b_) 36 | : a(a_.value), b(b_.text) 37 | { 38 | register_access(ReadWriteAdd::read, a_.handle); 39 | register_access(ReadWriteAdd::write, b_.handle); 40 | } 41 | void run() { 42 | sprintf(b, "%f", a); 43 | } 44 | }; 45 | 46 | int main() { 47 | // User-defined datatypes including handles 48 | MyFloatData a, b, c; 49 | MyTextData bstr, cstr; 50 | a.value = 1.0; 51 | 52 | SuperGlue sg; 53 | sg.submit(new ScaleTask(2.0, a, b)); // b = 2*a 54 | sg.submit(new ScaleTask(3.0, a, c)); // c = 3*a 55 | sg.submit(new ToStrTask(b, bstr)); 56 | sg.submit(new ToStrTask(c, cstr)); 57 | sg.barrier(); 58 | 59 | std::cout << "b=" << bstr.text << " c=" << cstr.text << std::endl; 60 | return 0; 61 | } 62 | 63 | -------------------------------------------------------------------------------- /examples/src/helloworld.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include 3 | 4 | struct Options : public DefaultOptions {}; 5 | 6 | struct MyTask : public Task { 7 | void run() { 8 | std::cout << "Hello world!" << std::endl; 9 | } 10 | }; 11 | 12 | int main() { 13 | SuperGlue sg; 14 | sg.submit(new MyTask()); 15 | return 0; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /examples/src/hierarchic.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "sg/superglue.hpp" 3 | #include 4 | #include 5 | 6 | struct Options : public DefaultOptions {}; 7 | 8 | SuperGlue g_sg; 9 | 10 | struct MyTask : public Task { 11 | 12 | int begin, end; 13 | 14 | MyTask(int begin_, int end_) 15 | : begin(begin_), end(end_) 16 | {} 17 | 18 | void run() { 19 | if (end-begin >= 2) { 20 | int mid = begin+(end-begin+1)/2; 21 | if (mid - begin > 0) { 22 | g_sg.submit(new MyTask(begin, mid)); 23 | } 24 | if (end - mid > 0) { 25 | g_sg.submit(new MyTask(mid, end)); 26 | } 27 | } 28 | else { 29 | std::stringstream ss; 30 | ss << "[" << begin << "-" << end << "]" << std::endl; 31 | std::cerr << ss.str(); 32 | } 33 | } 34 | }; 35 | 36 | int main() { 37 | g_sg.submit(new MyTask(0, 9)); 38 | g_sg.barrier(); 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /examples/src/logging.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include "sg/option/instr_trace.hpp" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | // 13 | // This example activates logging and execute some dummy tasks. 14 | // 15 | // The application creates a files "output.log" which contains 16 | // execution trace information such as which thread executes 17 | // which task, at what time and for how long. 18 | // 19 | // There is a python script in scripts/ that can be used to 20 | // draw the execution trace. It can be used like this: 21 | // 22 | // ../../script/drawsched.py execution.log 23 | // 24 | 25 | //=========================================================================== 26 | // Task Library Options 27 | //=========================================================================== 28 | struct Options : public DefaultOptions { 29 | typedef Enable TaskName; 30 | typedef Trace Instrumentation; 31 | }; 32 | 33 | //=========================================================================== 34 | // Tasks 35 | //=========================================================================== 36 | class TaskA : public Task { 37 | public: 38 | TaskA(Handle &h) { 39 | register_access(ReadWriteAdd::write, h); 40 | } 41 | 42 | void run() { 43 | Time::TimeUnit t = Time::getTime(); 44 | while (Time::getTime() < t + 1000000); 45 | } 46 | 47 | std::string get_name() { return "A"; } 48 | }; 49 | 50 | class TaskB : public Task { 51 | private: 52 | size_t delay; 53 | 54 | public: 55 | TaskB(Handle &h, Handle &h1, size_t delay_) 56 | : delay(delay_) { 57 | register_access(ReadWriteAdd::read, h); 58 | register_access(ReadWriteAdd::write, h1); 59 | } 60 | 61 | void run() { 62 | Time::TimeUnit t = Time::getTime(); 63 | while (Time::getTime() < t + delay); 64 | } 65 | std::string get_name() { return "B"; } 66 | }; 67 | 68 | class TaskD : public Task { 69 | public: 70 | TaskD(Handle &h) { 71 | register_access(ReadWriteAdd::read, h); 72 | } 73 | 74 | void run() { 75 | Time::TimeUnit t = Time::getTime(); 76 | while (Time::getTime() < t + 1000000); 77 | } 78 | std::string get_name() { return "D"; } 79 | }; 80 | 81 | class TaskE : public Task { 82 | public: 83 | TaskE(Handle &h1, Handle &h2) { 84 | register_access(ReadWriteAdd::read, h1); 85 | register_access(ReadWriteAdd::read, h2); 86 | } 87 | 88 | void run() { 89 | Time::TimeUnit t = Time::getTime(); 90 | while (Time::getTime() < t + 1000000); 91 | } 92 | std::string get_name() { return "E"; } 93 | }; 94 | 95 | //=========================================================================== 96 | // main 97 | //=========================================================================== 98 | int main(int argc, char *argv[]) { 99 | 100 | int num_threads = -1; 101 | if (argc == 2) { 102 | num_threads = (size_t) atoi(argv[1]); 103 | } 104 | else if (argc != 1) { 105 | printf("usage: %s [num_cores]\n", argv[0]); 106 | exit(0); 107 | } 108 | 109 | SuperGlue sg(num_threads); 110 | Handle a, b, c; 111 | sg.submit(new TaskA(a)); 112 | sg.submit(new TaskB(a, b, 1000000)); 113 | sg.submit(new TaskB(a, c, 2000000)); 114 | sg.submit(new TaskD(b)); 115 | sg.submit(new TaskE(b, c)); 116 | sg.barrier(); 117 | 118 | Trace::dump("execution.log"); 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /examples/src/nested1.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include "sg/option/instr_trace.hpp" 3 | 4 | struct Options : public DefaultOptions { 5 | typedef Trace Instrumentation; 6 | typedef Enable PassTaskExecutor; 7 | }; 8 | 9 | double *A; 10 | Handle *handles; 11 | const int DIM = 8; 12 | 13 | Handle &get_handle(int i, int j, int block_size) { 14 | int dim(DIM); 15 | int offset(0); 16 | 17 | while (block_size > 1) { 18 | offset += dim*dim; 19 | block_size /= 2; 20 | dim /= 2; 21 | i /= 2; 22 | j /= 2; 23 | } 24 | return handles[offset + i*dim+j]; 25 | } 26 | 27 | struct nested_task : public Task { 28 | int ii, jj, bsz; 29 | nested_task(int i_, int j_, int bsz_) : ii(i_), jj(j_), bsz(bsz_) { 30 | register_access(ReadWriteAdd::write, get_handle(ii, jj, bsz)); 31 | } 32 | 33 | void run(TaskExecutor &te) { 34 | if (bsz == 1) { 35 | A[ii*DIM+jj] += 1.0; 36 | return; 37 | } 38 | 39 | const int num_blocks = 2; 40 | const int nbsz = bsz/num_blocks; 41 | 42 | for (int i = 0; i < num_blocks; ++i) 43 | for (int j = 0; j < num_blocks; ++j) 44 | te.submit(new nested_task(ii+i*nbsz, jj+j*nbsz, nbsz)); 45 | } 46 | }; 47 | 48 | 49 | int main() { 50 | A = new double[DIM*DIM]; 51 | handles = new Handle[8*8 + 4*4 + 2*2 + 1]; 52 | 53 | assert( &get_handle(0,0,1) == handles ); 54 | assert( &get_handle(7,7,1)+1 == &get_handle(0,0,2) ); 55 | assert( &get_handle(6,6,2)+1 == &get_handle(0,0,4) ); 56 | assert( &get_handle(4,4,4)+1 == &get_handle(0,0,8) ); 57 | 58 | 59 | for (int i = 0; i < 8; ++i) 60 | for (int j = 0; j < 8; ++j) 61 | A[i*8+j] = 0; 62 | 63 | SuperGlue sg; 64 | sg.submit(new nested_task(0, 0, 8)); 65 | sg.barrier(); 66 | 67 | double sum = 0.0; 68 | double max = 0.0; 69 | 70 | for (int i = 0; i < 8; ++i) { 71 | for (int j = 0; j < 8; ++j) { 72 | if (A[i*8+j] > max) 73 | max = A[i*8+j]; 74 | sum += A[i*8+j]; 75 | } 76 | } 77 | 78 | fprintf(stderr, "sum = %f , max = %f\n", sum, max); 79 | 80 | Options::Instrumentation::dump("trace.log"); 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /examples/src/nested2.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include "sg/option/instr_trace.hpp" 3 | 4 | struct Options : public DefaultOptions { 5 | typedef Trace Instrumentation; 6 | typedef Enable PassTaskExecutor; 7 | typedef Enable Subtasks; 8 | }; 9 | 10 | double *A; 11 | Handle *handles; 12 | const int DIM = 8; 13 | 14 | Handle &get_handle(int i, int j, int block_size) { 15 | int dim(DIM); 16 | int offset(0); 17 | 18 | while (block_size > 1) { 19 | offset += dim*dim; 20 | block_size /= 2; 21 | dim /= 2; 22 | i /= 2; 23 | j /= 2; 24 | } 25 | return handles[offset + i*dim+j]; 26 | } 27 | 28 | struct nested_task : public Task { 29 | int ii, jj, bsz; 30 | nested_task(int i_, int j_, int bsz_) : ii(i_), jj(j_), bsz(bsz_) { 31 | register_access(ReadWriteAdd::write, get_handle(ii, jj, bsz)); 32 | } 33 | 34 | void run(TaskExecutor &te) { 35 | if (bsz == 1) { 36 | A[ii*DIM+jj] += 1.0; 37 | return; 38 | } 39 | 40 | const int num_blocks = 2; 41 | const int nbsz = bsz/num_blocks; 42 | 43 | for (int i = 0; i < num_blocks; ++i) 44 | for (int j = 0; j < num_blocks; ++j) 45 | te.subtask(this, new nested_task(ii+i*nbsz, jj+j*nbsz, nbsz)); 46 | } 47 | }; 48 | 49 | 50 | int main() { 51 | A = new double[DIM*DIM]; 52 | handles = new Handle[8*8 + 4*4 + 2*2 + 1]; 53 | 54 | assert( &get_handle(0,0,1) == handles ); 55 | assert( &get_handle(7,7,1)+1 == &get_handle(0,0,2) ); 56 | assert( &get_handle(6,6,2)+1 == &get_handle(0,0,4) ); 57 | assert( &get_handle(4,4,4)+1 == &get_handle(0,0,8) ); 58 | 59 | 60 | for (int i = 0; i < 8; ++i) 61 | for (int j = 0; j < 8; ++j) 62 | A[i*8+j] = 0; 63 | 64 | SuperGlue sg; 65 | sg.submit(new nested_task(0, 0, 8)); 66 | sg.barrier(); 67 | 68 | double sum = 0.0; 69 | double max = 0.0; 70 | 71 | for (int i = 0; i < 8; ++i) { 72 | for (int j = 0; j < 8; ++j) { 73 | if (A[i*8+j] > max) 74 | max = A[i*8+j]; 75 | sum += A[i*8+j]; 76 | } 77 | } 78 | 79 | fprintf(stderr, "sum = %f , max = %f\n", sum, max); 80 | 81 | Options::Instrumentation::dump("trace.log"); 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /examples/src/pinnedtasks.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include 3 | 4 | ThreadIDType id = 0; 5 | 6 | struct Options : public DefaultOptions { 7 | typedef Disable Stealing; 8 | typedef Enable PauseExecution; 9 | }; 10 | 11 | struct MyTask : public Task { 12 | void run() { 13 | if (id == 0) 14 | id = ThreadUtil::get_current_thread_id(); 15 | if (id != ThreadUtil::get_current_thread_id()) { 16 | std::cerr << "Task running on wrong thread" << std::endl; 17 | exit(0); 18 | } 19 | } 20 | }; 21 | 22 | int main() { 23 | 24 | SuperGlue sg; 25 | 26 | for (int i = 0; i < 1000; ++i) 27 | sg.submit(new MyTask(), 0); // all tasks added to readyqueue 0 28 | 29 | // No tasks will be executed yet 30 | 31 | sg.start_executing(); 32 | 33 | // Wait for all tasks to finish 34 | sg.barrier(); 35 | 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /examples/src/subtasks.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include 3 | 4 | struct Options : public DefaultOptions {}; 5 | 6 | const int num = 10, n1 = 1000, n2 = 1000; 7 | int data[num] = {0}; 8 | Handle h[num]; 9 | 10 | struct MyTask : public Task { 11 | int i; 12 | MyTask(size_t i_, Handle &h) : i(i_) { 13 | register_access(ReadWriteAdd::write, h); 14 | } 15 | void run() { ++data[i]; } 16 | }; 17 | 18 | struct TaskCreator : public Task { 19 | SuperGlue &sg; 20 | size_t begin, end; 21 | TaskCreator(SuperGlue &sg_, 22 | size_t begin_, size_t end_) 23 | : sg(sg_), begin(begin_), end(end_) {} 24 | 25 | void run() { 26 | for (size_t i = begin; i < end; ++i) { 27 | size_t idx = i % num; 28 | sg.submit(new MyTask(idx, h[idx])); 29 | } 30 | } 31 | }; 32 | 33 | int main() { 34 | SuperGlue sg; 35 | for (int i = 0; i < n1; ++i) 36 | sg.submit(new TaskCreator(sg, i*n2, (i+1)*n2)); 37 | sg.barrier(); 38 | 39 | int res = 0; 40 | for (int i = 0; i < num; ++i) 41 | res += data[i]; 42 | std::cout << "result=" << res << std::endl; 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /examples/src/tempvars.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include 3 | #include 4 | 5 | template 6 | struct MyHandle : public HandleBase { 7 | int data; 8 | }; 9 | 10 | struct Options : public DefaultOptions { 11 | // This changes the handle to contain data 12 | typedef MyHandle HandleType; 13 | }; 14 | 15 | SuperGlue g_sg; 16 | 17 | struct Producer : public Task { 18 | Handle *out; 19 | 20 | Producer(Handle *out_) : out(out_) { 21 | register_access(ReadWriteAdd::write, *out); 22 | } 23 | 24 | void run() { 25 | out->data = 10; 26 | } 27 | }; 28 | 29 | // task that uses temporary variable and deletes it when finished 30 | struct ConsumeAndDelete : public Task { 31 | Handle *in; 32 | ConsumeAndDelete(Handle *in_) : in(in_) { 33 | register_access(ReadWriteAdd::write, *in); // register as write 34 | } 35 | void run() { 36 | std::stringstream ss; 37 | ss << "consume&delete " << in->data << " " << in << std::endl; 38 | std::cerr << ss.str(); 39 | // must not delete handle here. 40 | // registered handles may only be deleted in the destructor 41 | } 42 | ~ConsumeAndDelete() { 43 | delete in; 44 | } 45 | }; 46 | 47 | // task that uses temporary variable 48 | struct Consumer : public Task { 49 | Handle *in; 50 | 51 | Consumer(Handle *in_) : in(in_) { 52 | register_access(ReadWriteAdd::read, *in); 53 | } 54 | void run() { 55 | std::stringstream ss; 56 | ss << "consume " << in->data << " " << in << std::endl; 57 | std::cerr << ss.str(); 58 | } 59 | }; 60 | 61 | // task to delete temporary variable when all tasks are finished with it 62 | struct Deleter : public Task { 63 | Handle *in; 64 | Deleter(Handle *in_) : in(in_) { 65 | register_access(ReadWriteAdd::write, *in); 66 | } 67 | void run() { 68 | std::stringstream ss; 69 | ss << "delete " << in->data << " " << in << std::endl; 70 | std::cerr << ss.str(); 71 | } 72 | ~Deleter() { 73 | delete in; 74 | } 75 | }; 76 | 77 | struct OuterTask : public Task { 78 | 79 | void run() { 80 | { 81 | Handle *temp(new Handle()); 82 | g_sg.submit(new Producer(temp)); 83 | g_sg.submit(new ConsumeAndDelete(temp)); 84 | } 85 | 86 | { 87 | Handle *temp(new Handle()); 88 | g_sg.submit(new Producer(temp)); 89 | g_sg.submit(new Consumer(temp)); 90 | g_sg.submit(new Consumer(temp)); 91 | g_sg.submit(new Consumer(temp)); 92 | g_sg.submit(new Deleter(temp)); 93 | } 94 | } 95 | }; 96 | 97 | int main() { 98 | for (size_t i(0); i != 10; ++i) 99 | g_sg.submit(new OuterTask()); 100 | g_sg.barrier(); 101 | return 0; 102 | } 103 | -------------------------------------------------------------------------------- /examples/src/vardeps.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include "sg/core/defaults.hpp" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | // 13 | // This example uses tasks with variable number of dependencies, and mixes 14 | // these together with tasks with a fixed number of dependencies. 15 | // 16 | // What the application is doing is first initializing all elements of a 17 | // matrix with ones (one task per element), and then replace the diagonal 18 | // element with the sum of all elements to the left of the diagonal. 19 | // 20 | // This is just an example of how to create tasks with variable number of 21 | // dependencies. The tasks are too small to run efficiently. 22 | // 23 | 24 | const size_t N = 6; 25 | 26 | //=========================================================================== 27 | // Task Library Options 28 | //=========================================================================== 29 | struct Options : public DefaultOptions {}; 30 | 31 | //=========================================================================== 32 | // Tasks 33 | //=========================================================================== 34 | 35 | class InitBlock : public Task { 36 | double *data; 37 | public: 38 | InitBlock(double *data_, Handle *h, size_t i, size_t j) 39 | : data(&data_[j*N+i]) { 40 | register_access(ReadWriteAdd::write, h[j*N+i]); 41 | } 42 | 43 | void run() { 44 | *data = 1.0; 45 | } 46 | }; 47 | 48 | class DiagTask : public Task { 49 | private: 50 | double *data; 51 | size_t j; 52 | 53 | public: 54 | DiagTask(double *data_, Handle *h, size_t j_) 55 | : data(data_), j(j_) { 56 | for (size_t i = 0; i < j; ++i) 57 | register_access(ReadWriteAdd::read, h[j*N+i]); 58 | register_access(ReadWriteAdd::write, h[j*N+j]); 59 | } 60 | 61 | void run() { 62 | double tmp = 0.0; 63 | for (size_t i = 0; i < j; ++i) 64 | tmp += data[j*N+i]; 65 | data[j*N+j] = tmp; 66 | } 67 | }; 68 | 69 | //=========================================================================== 70 | // main 71 | //=========================================================================== 72 | int main(int argc, char *argv[]) { 73 | 74 | int num_threads = -1; 75 | if (argc == 2) { 76 | num_threads = (size_t) atoi(argv[1]); 77 | } 78 | else if (argc != 1) { 79 | printf("usage: %s [num_cores]\n", argv[0]); 80 | exit(0); 81 | } 82 | 83 | Handle *h = new Handle[N*N]; 84 | double *data = new double[N*N]; 85 | 86 | SuperGlue sg(num_threads); 87 | 88 | for (size_t i = 0; i < N; ++i) 89 | for (size_t j = 0; j < N; ++j) 90 | sg.submit(new InitBlock(data, h, i, j)); 91 | 92 | for (size_t i = 0; i < N; ++i) 93 | sg.submit(new DiagTask(data, h, i)); 94 | 95 | sg.barrier(); 96 | 97 | for (size_t i = 0; i < N; ++i) { 98 | for (size_t j = 0; j < N; ++j) 99 | std::cout << data[j*N+i] << " "; 100 | std::cout << std::endl; 101 | } 102 | 103 | delete [] data; 104 | delete [] h; 105 | 106 | return 0; 107 | } 108 | -------------------------------------------------------------------------------- /examples/src/workspace.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include 3 | 4 | struct Options : public DefaultOptions { 5 | typedef Enable PassTaskExecutor; 6 | typedef Enable ThreadWorkspace; 7 | enum { ThreadWorkspace_size = 102400 }; 8 | }; 9 | 10 | struct MyTask : public Task { 11 | int *a; 12 | 13 | MyTask(int *a_) : a(a_) {} 14 | 15 | // The run() method now takes a TaskExecutor * parameter. 16 | void run(TaskExecutor &te) { 17 | // Allocate some memory 18 | int *mem1 = (int *) te.get_thread_workspace(1024 * sizeof(int)); 19 | // Allocate some more memory 20 | int *mem2 = (int *) te.get_thread_workspace(1024 * sizeof(int)); 21 | 22 | // Fill the allocated memory 23 | for (int i = 0; i < 1024; ++i) 24 | mem2[i] = mem1[i] = i; 25 | 26 | // Sum the memory 27 | *a = 0; 28 | for (int i = 0; i < 1024; ++i) 29 | *a += mem2[i] + mem1[i]; 30 | 31 | // Memory is automatically freed when task is finished 32 | } 33 | }; 34 | 35 | int main() { 36 | // Shared array divided into slices 37 | int res; 38 | 39 | SuperGlue sg; 40 | sg.submit(new MyTask(&res)); 41 | sg.barrier(); 42 | 43 | // The data may be accessed here, after the barrier 44 | std::cout << "result=" << res << std::endl; 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /examples_dep/cholesky/Makefile: -------------------------------------------------------------------------------- 1 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -I ../../include -pthread 2 | 3 | ifneq ("$(wildcard $(MKL_ROOT)/lib/intel64/libmkl_core.a)","") 4 | 5 | MKL_INC=-isystem $(MKL_ROOT)/include 6 | MKL_LIBS= \ 7 | -Wl,--start-group \ 8 | $(MKL_ROOT)/lib/intel64/libmkl_intel_lp64.a \ 9 | $(MKL_ROOT)/lib/intel64/libmkl_sequential.a \ 10 | $(MKL_ROOT)/lib/intel64/libmkl_core.a \ 11 | -ldl \ 12 | -Wl,--end-group 13 | 14 | cholesky_mkl: cholesky.cpp Makefile 15 | $(CXX) $(FLAGS) $(MKL_INC) -DUSE_MKL cholesky.cpp -o $@ $(MKL_LIBS) -lm 16 | 17 | endif 18 | 19 | 20 | ifneq ("$(wildcard $(ACML_ROOT)/gfortran64_fma4/lib/libacml.a)","") 21 | 22 | ACML_INC=-isystem $(ACML_ROOT)/gfortran64_fma4/include 23 | ACML_LIBS=-L$(ACML_ROOT)/gfortran64_fma4/lib -lacml 24 | 25 | cholesky_acml: cholesky.cpp Makefile 26 | $(CXX) $(FLAGS) $(ACML_INC) -DUSE_PRIO -DUSE_ACML cholesky.cpp -o $@ $(ACML_LIBS) -lm 27 | 28 | endif 29 | 30 | cholesky: 31 | @echo "### Warning: could not find MKL or ACML. Neither MKL_ROOT nor ACML_ROOT set." 32 | @echo "Skipping Cholesky example." 33 | 34 | .PHONY: cholesky 35 | -------------------------------------------------------------------------------- /include/sg/core/access.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_ACCESS_HPP_INCLUDED 2 | #define SG_ACCESS_HPP_INCLUDED 3 | 4 | #include 5 | #include 6 | 7 | namespace sg { 8 | 9 | template class Access; 10 | template class Handle; 11 | template class TaskBase; 12 | 13 | namespace detail { 14 | 15 | // ============================================================================ 16 | // Option Contributions 17 | // ============================================================================ 18 | template class Access_Contributions; 19 | 20 | template 21 | class Access_Contributions { 22 | public: 23 | static bool use_contrib() { return false; } 24 | static void set_use_contrib(bool ) {} 25 | }; 26 | 27 | template 28 | class Access_Contributions { 29 | private: 30 | bool use_contrib_flag; 31 | public: 32 | typedef typename Options::ContributionType Contribution; 33 | 34 | Access_Contributions() : use_contrib_flag(false) {} 35 | void set_use_contrib(bool value) { use_contrib_flag = value; } 36 | bool use_contrib() const { return use_contrib_flag; } 37 | void add_contribution(Contribution c) { 38 | this->get_handle()->add_contribution(c); 39 | } 40 | }; 41 | 42 | // ============================================================================ 43 | // Option Lockable 44 | // ============================================================================ 45 | template class Access_Lockable; 46 | 47 | template 48 | class Access_Lockable { 49 | typedef typename Options::version_type version_type; 50 | typedef typename Options::lockcount_type lockcount_type; 51 | typedef typename Options::WaitListType TaskQueue; 52 | typedef typename TaskQueue::unsafe_t TaskQueueUnsafe; 53 | 54 | public: 55 | static bool get_lock() { return true; } 56 | static bool needs_lock() { return false; } 57 | static void release_lock(TaskQueueUnsafe &) {} 58 | static bool get_lock_or_notify(TaskBase *) { return true; } 59 | static void set_required_quantity(lockcount_type required_) {} 60 | version_type finished(TaskQueueUnsafe &woken) { 61 | const Access *this_(static_cast *>(this)); 62 | return this_->handle->increase_current_version(woken); 63 | } 64 | }; 65 | 66 | template 67 | class Access_Lockable { 68 | typedef typename Options::lockcount_type lockcount_type; 69 | typedef typename Options::version_type version_type; 70 | typedef typename Options::WaitListType TaskQueue; 71 | typedef typename TaskQueue::unsafe_t TaskQueueUnsafe; 72 | private: 73 | lockcount_type required; 74 | public: 75 | Access_Lockable() : required(0) {} 76 | 77 | // Check if lock is available, or add a listener 78 | bool get_lock_or_notify(TaskBase *task) const { 79 | if (required == 0) 80 | return true; 81 | 82 | const Access *this_(static_cast *>(this)); 83 | return this_->handle->get_lock_or_notify(required, task); 84 | } 85 | 86 | // Get lock if its free, or return false. 87 | // Low level interface to lock several objects simultaneously 88 | bool get_lock() const { 89 | if (required == 0) 90 | return true; 91 | 92 | const Access *this_(static_cast *>(this)); 93 | return this_->handle->get_lock(required); 94 | } 95 | 96 | // Low level interface to unlock when failed to lock several objects 97 | void release_lock(TaskQueueUnsafe &woken) const { 98 | if (required == 0) 99 | return; 100 | 101 | const Access *this_(static_cast *>(this)); 102 | this_->handle->release_lock(required, woken); 103 | } 104 | 105 | public: 106 | void set_required_quantity(lockcount_type required_) { required = required_; } 107 | bool needs_lock() const { return required != 0; } 108 | version_type finished(TaskQueueUnsafe &woken) { 109 | const Access *this_(static_cast *>(this)); 110 | version_type ver; 111 | if (this_->use_contrib()) 112 | ver = this_->handle->increase_current_version_no_unlock(woken); 113 | else 114 | ver = this_->handle->increase_current_version_unlock(required, woken); 115 | return ver; 116 | } 117 | }; 118 | 119 | } // namespace detail 120 | 121 | // ============================================================================ 122 | // Access 123 | // ============================================================================ 124 | // Wraps up the selection of which action to perform depending on access type 125 | template 126 | class Access 127 | : public detail::Access_Lockable, 128 | public detail::Access_Contributions 129 | { 130 | public: 131 | typedef typename Options::AccessInfoType AccessInfo; 132 | typedef typename Options::version_type version_type; 133 | 134 | Handle *handle; 135 | version_type required_version; 136 | 137 | Access() {} 138 | Access(Handle *handle_, typename Options::version_type version_) 139 | : handle(handle_), required_version(version_) {} 140 | 141 | Handle *get_handle() const { 142 | return handle; 143 | } 144 | }; 145 | 146 | } // namespace sg 147 | 148 | #endif // SG_ACCESS_HPP_INCLUDED 149 | -------------------------------------------------------------------------------- /include/sg/core/access_rwa.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_ACCESS_RWA_HPP_INCLUDED 2 | #define SG_ACCESS_RWA_HPP_INCLUDED 3 | 4 | namespace sg { 5 | 6 | class ReadWriteAdd { 7 | public: 8 | enum Type { read = 0, add, write, num_accesses }; 9 | template struct AccessType {}; 10 | }; 11 | 12 | template<> struct ReadWriteAdd::AccessType { 13 | enum { commutative = 1 }; 14 | enum { exclusive = 0 }; 15 | enum { readonly = 1 }; 16 | }; 17 | 18 | template<> struct ReadWriteAdd::AccessType { 19 | enum { commutative = 0 }; 20 | enum { exclusive = 1 }; 21 | enum { readonly = 0 }; 22 | }; 23 | 24 | template<> struct ReadWriteAdd::AccessType { 25 | enum { commutative = 1 }; 26 | enum { exclusive = 1 }; 27 | enum { readonly = 0 }; 28 | }; 29 | 30 | } // namespace sg 31 | 32 | #endif // SG_ACCESS_RWA_HPP_INCLUDED 33 | -------------------------------------------------------------------------------- /include/sg/core/accessutil.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_ACCESSUTIL_HPP_INCLUDED 2 | #define SG_ACCESSUTIL_HPP_INCLUDED 3 | 4 | namespace sg { 5 | 6 | template 7 | class AccessUtil { 8 | typedef typename Options::AccessInfoType AccessInfo; 9 | 10 | template class Predicate, bool stop = (n==0)> 11 | struct Aux { 12 | static bool check(int type) { 13 | typedef typename AccessInfo::template AccessType AccessType; 14 | if (type == n-1) 15 | return Predicate::result; 16 | else 17 | return Aux::check(type); 18 | } 19 | }; 20 | 21 | template class Predicate> 22 | struct Aux { 23 | // should never be called, but required for compilation 24 | static bool check(int) { return false; } 25 | }; 26 | 27 | template class Predicate, bool stop = (n==0)> 28 | struct AnyTypeAux { 29 | typedef typename AccessInfo::template AccessType AccessType; 30 | enum { result = (Predicate::result == 1) || (AnyTypeAux::result == 1) }; 31 | }; 32 | template class Predicate> 33 | struct AnyTypeAux { 34 | enum { result = 0 }; 35 | }; 36 | 37 | public: 38 | 39 | template 40 | struct NeedsLockPredicate { 41 | enum { result = ((T::exclusive == 1) && (T::commutative == 1)) ? 1 : 0 }; 42 | }; 43 | 44 | template 45 | struct ConcurrentPredicate { 46 | enum { result = ((T::exclusive == 0) && (T::commutative == 1)) ? 1 : 0 }; 47 | }; 48 | 49 | template 50 | struct CommutativePredicate { 51 | enum { result = T::commutative }; 52 | }; 53 | 54 | template 55 | struct ReadOnlyPredicate { 56 | enum { result = T::readonly }; 57 | }; 58 | 59 | template< template class Predicate > 60 | struct AnyType { 61 | enum { result = AnyTypeAux::result }; 62 | }; 63 | 64 | static bool needs_lock(int type) { 65 | return Aux::check(type); 66 | } 67 | 68 | static bool concurrent(int type) { 69 | return Aux::check(type); 70 | } 71 | 72 | static bool commutative(int type) { 73 | return Aux::check(type); 74 | } 75 | 76 | static bool readonly(int type) { 77 | return Aux::check(type); 78 | } 79 | }; 80 | 81 | } // namespace sg 82 | 83 | #endif // SG_ACCESSUTIL_HPP_INCLUDED 84 | -------------------------------------------------------------------------------- /include/sg/core/barrierprotocol.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_BARRIERPROTOCOL_HPP_INCLUDED 2 | #define SG_BARRIERPROTOCOL_HPP_INCLUDED 3 | 4 | #include "sg/platform/atomic.hpp" 5 | 6 | namespace sg { 7 | 8 | template class TaskExecutor; 9 | 10 | template 11 | class BarrierProtocol { 12 | private: 13 | typedef typename Options::ThreadingManagerType ThreadingManager; 14 | typedef typename Options::ReadyListType TaskQueue; 15 | typedef typename TaskQueue::unsafe_t TaskQueueUnsafe; 16 | 17 | ThreadingManager &tm; 18 | char padding1[Options::CACHE_LINE_SIZE]; 19 | unsigned int barrier_counter; // written by everybody, read by main thread 20 | char padding2[Options::CACHE_LINE_SIZE]; 21 | int state; // written by anyone, 3 times per try, read by everybody 22 | int abort; // read/written on every task submit. written by anyone, 1 time per try, read by main thread. 23 | char padding3[Options::CACHE_LINE_SIZE]; 24 | 25 | private: 26 | 27 | public: 28 | BarrierProtocol(ThreadingManager &tm_) 29 | : tm(tm_), barrier_counter(0), state(0), abort(1) 30 | { 31 | } 32 | 33 | // Called from TaskExecutor: return true if we are allowed to run tasks 34 | bool update_barrier_state(TaskExecutor &te) { 35 | Atomic::compiler_fence(); 36 | const int local_state(state); 37 | 38 | // return if not in barrier 39 | if (local_state == 0) { 40 | if (te.my_barrier_state != 0) { 41 | te.my_barrier_state = 0; 42 | te.after_barrier(); 43 | } 44 | return true; 45 | } 46 | 47 | // set abort flag if we have tasks 48 | if (!te.get_task_queue().empty()) { 49 | if (abort != 1) { 50 | abort = 1; 51 | Atomic::memory_fence_producer(); // make sure abort is visible before state changes 52 | } 53 | } 54 | 55 | // return if barrier is in same state as last time 56 | if (te.my_barrier_state == local_state) 57 | return abort == 1; 58 | 59 | // new state 60 | 61 | te.my_barrier_state = local_state; 62 | 63 | // enter barrier 64 | const unsigned int local_barrier_counter(Atomic::decrease_nv(&barrier_counter)); 65 | 66 | // return if not last 67 | if (local_barrier_counter != 0) 68 | return abort == 1; 69 | 70 | // we are last to enter the barrier 71 | 72 | if (local_state == 1) { 73 | const unsigned int num_workers = tm.get_num_cpus() - 1; 74 | // if single worker, the barrier is finished 75 | if (num_workers == 1) { 76 | te.my_barrier_state = 0; 77 | state = 0; 78 | te.after_barrier(); 79 | return true; 80 | } 81 | 82 | // setup state 2, join it, and return 83 | te.my_barrier_state = 2; 84 | barrier_counter = num_workers - 1; 85 | Atomic::memory_fence_producer(); // make sure barrier_counter is visible before state changes 86 | state = 2; 87 | return abort == 1; 88 | } 89 | 90 | // last in for stage 2 -- finish barrier 91 | te.my_barrier_state = 0; 92 | state = 0; 93 | te.after_barrier(); 94 | return true; 95 | } 96 | 97 | // cannot be invoked by more than one thread at a time 98 | void barrier(TaskExecutor &te) { 99 | tm.start_executing(); 100 | 101 | { 102 | TaskQueueUnsafe woken; 103 | while (te.execute_tasks(woken)); 104 | } 105 | 106 | const unsigned int num_workers(static_cast(tm.get_num_cpus())-1); 107 | 108 | if (num_workers == 0) 109 | return; 110 | 111 | for (;;) { 112 | { 113 | TaskQueueUnsafe woken; 114 | while (te.execute_tasks(woken)); 115 | } 116 | 117 | barrier_counter = num_workers; 118 | abort = 0; 119 | Atomic::memory_fence_producer(); 120 | state = 1; 121 | 122 | for (;;) { 123 | const int local_state(state); 124 | if (local_state == 0) { 125 | Atomic::memory_fence_consumer(); 126 | const int local_abort(abort); 127 | if (local_abort == 1) 128 | break; 129 | if (!te.get_task_queue().empty_safe()) 130 | break; 131 | te.after_barrier(); 132 | return; 133 | } 134 | 135 | const int local_abort(abort); 136 | if (local_abort == 1 || !te.get_task_queue().empty_safe()) { 137 | while (state != 0) { 138 | 139 | { 140 | TaskQueueUnsafe woken; 141 | while (te.execute_tasks(woken)); 142 | } 143 | 144 | Atomic::compiler_fence(); 145 | } 146 | break; 147 | } 148 | Atomic::rep_nop(); 149 | } 150 | } 151 | } 152 | 153 | void signal_new_work() { 154 | Atomic::compiler_fence(); 155 | const int local_abort(abort); 156 | if (local_abort != 1) { 157 | abort = 1; 158 | Atomic::memory_fence_producer(); 159 | } 160 | } 161 | }; 162 | 163 | } // namespace sg 164 | 165 | #endif // SG_BARRIERPROTOCOL_HPP_INCLUDED 166 | -------------------------------------------------------------------------------- /include/sg/core/contrib.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_CONTRIB_HPP_INCLUDED 2 | #define SG_CONTRIB_HPP_INCLUDED 3 | 4 | #include 5 | #include 6 | 7 | namespace sg { 8 | 9 | template 10 | class Contribution { 11 | public: 12 | size_t size; 13 | size_t versions; 14 | T *dst; 15 | 16 | private: 17 | Contribution(T *dst_, size_t size_) : size(size_), versions(1), dst(dst_) {} 18 | 19 | public: 20 | static void merge_impl(T *src, T *dst, size_t size) { 21 | for (size_t i(0); i < size; ++i) 22 | dst[i] += src[i]; 23 | } 24 | 25 | size_t get_versions() const { return versions; } 26 | void merge(Contribution *temp) { 27 | assert(size == temp->size); 28 | versions += temp->versions; 29 | merge_impl(get_data(*temp), get_data(*this), size); 30 | free(temp); 31 | } 32 | static size_t data_offset() { 33 | // align upwards to multiple of 4 34 | return (sizeof(Contribution) + 0x3) & ~static_cast(0x3); 35 | } 36 | 37 | static T *get_data(Contribution &c) { 38 | return (T*) (((char *) &c) + data_offset()); 39 | } 40 | 41 | static void apply_and_free(Contribution *c) { 42 | merge_impl(get_data(*c), c->dst, c->size); 43 | free(c); 44 | } 45 | 46 | static Contribution *allocate(size_t size, T *dest) { 47 | char *c = new char[data_offset() + size*sizeof(T)]; 48 | return new (c) Contribution(dest, size); 49 | } 50 | static void free(Contribution *c) { 51 | // Note that the Contribution destructor is never called 52 | delete [] (char *) c; 53 | } 54 | }; 55 | 56 | } // namespace sg 57 | 58 | #endif // SG_CONTRIB_HPP_INCLUDED 59 | -------------------------------------------------------------------------------- /include/sg/core/orderedvec.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_ORDEREDVEC_HPP_INCLUDED 2 | #define SG_ORDEREDVEC_HPP_INCLUDED 3 | 4 | namespace sg { 5 | 6 | template 7 | struct elem_t { 8 | Key key; 9 | Value value; 10 | elem_t(Key key_) : key(key_) {} 11 | bool operator<(const elem_t &rhs) const { 12 | return key < rhs.key; 13 | } 14 | }; 15 | 16 | template 17 | class ordered_vec_t { 18 | typedef elem_t element_t; 19 | private: 20 | deque_t array; 21 | public: 22 | Key first_key() { return array[0].key; } 23 | Value pop_front() { 24 | Value value(array[0].value); 25 | array.pop_front(); 26 | return value; 27 | } 28 | 29 | Value &operator[](Key key) { 30 | // Want to check last item, so first we must make sure such item exist. 31 | if (array.empty()) { 32 | array.push_back(element_t(key)); 33 | return array[0].value; 34 | } 35 | 36 | // Check last item first, since that is the expected location 37 | const element_t &last( array[array.size()-1] ); 38 | if (last.key == key) 39 | return array[array.size()-1].value; 40 | if (last.key < key) { 41 | array.push_back(element_t(key)); 42 | return array[array.size()-1].value; 43 | } 44 | 45 | // Binary search to find the version 46 | typename deque_t::iterator iter = lower_bound(array.begin(), array.end(), key); 47 | // iter must be valid, cannot be array.end() as we already checked the last element 48 | 49 | if (iter->key == key) 50 | return iter->value; 51 | 52 | // New version, but not at the end. 53 | // An expensive insert is done here, but it should be a rare case since 54 | // requests of versions are expected (but not required) to come in order. 55 | return array.insert(iter, element_t(key))->value; 56 | } 57 | 58 | bool empty() const { return array.empty(); } 59 | }; 60 | 61 | } // namespace sg 62 | 63 | #endif // SG_ORDEREDVEC_HPP_INCLUDED 64 | -------------------------------------------------------------------------------- /include/sg/core/schedulerver.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_SCHEDULERVER_HPP_INCLUDED 2 | #define SG_SCHEDULERVER_HPP_INCLUDED 3 | 4 | #include "sg/core/access_rwa.hpp" // specialize for ReadWriteAdd 5 | #include "sg/core/spinlock.hpp" 6 | 7 | #include // memset 8 | #include // max_element 9 | 10 | namespace sg { 11 | 12 | namespace detail { 13 | 14 | // store next version for each type 15 | template 16 | class SchedulerVersionImpl { 17 | private: 18 | typedef typename Options::version_type version_type; 19 | 20 | version_type required_version[AccessInfo::num_accesses]; 21 | 22 | // increase scheduler version on dynamic type 23 | template 24 | struct IncreaseAux { 25 | static void increase(int type, version_type *required_version, version_type next_ver) { 26 | typedef typename AccessInfo::template AccessType AccessType; 27 | 28 | IncreaseAux::increase(type, required_version, next_ver); 29 | 30 | // set next-required-version to next version, except if the type is commutative, 31 | // in which case the next-required-version for the type should not be modified. 32 | if (type != n-1 || AccessType::commutative == 0) 33 | required_version[n-1] = next_ver; 34 | } 35 | }; 36 | template 37 | struct IncreaseAux { 38 | static void increase(int /*type*/, version_type * /*required_version*/, version_type /*next_ver*/) {} 39 | }; 40 | 41 | public: 42 | SchedulerVersionImpl() { 43 | std::memset(required_version, 0, sizeof(required_version)); 44 | } 45 | 46 | version_type next_version() { 47 | return *std::max_element(required_version, required_version + AccessInfo::num_accesses)+1; 48 | } 49 | 50 | version_type schedule(int type) { 51 | const version_type ver = required_version[type]; 52 | IncreaseAux::increase(type, required_version, next_version()); 53 | return ver; 54 | } 55 | }; 56 | 57 | // Specialize SchedulerVersionImpl for default ReadWriteAdd 58 | // to use 2 counters instead of 3 59 | template 60 | class SchedulerVersionImpl { 61 | private: 62 | typedef typename Options::version_type version_type; 63 | version_type required_version[2]; 64 | 65 | public: 66 | SchedulerVersionImpl() { 67 | required_version[0] = required_version[1] = 0; 68 | } 69 | 70 | version_type next_version() { 71 | return std::max(required_version[0], required_version[1])+1; 72 | } 73 | 74 | version_type schedule(int type) { 75 | const version_type next_ver = next_version(); 76 | switch (type) { 77 | case ReadWriteAdd::read: 78 | required_version[ReadWriteAdd::add] = next_ver; 79 | return required_version[ReadWriteAdd::read]; 80 | case ReadWriteAdd::add: 81 | required_version[ReadWriteAdd::read] = next_ver; 82 | return required_version[ReadWriteAdd::add]; 83 | } 84 | required_version[0] = required_version[1] = next_ver; 85 | return next_ver-1; 86 | } 87 | }; 88 | 89 | // ============================================================================ 90 | // SchedulerVersion 91 | // ============================================================================ 92 | template class SchedulerVersion; 93 | 94 | template 95 | class SchedulerVersion 96 | : public detail::SchedulerVersionImpl { 97 | typedef typename detail::SchedulerVersionImpl parent; 98 | typedef typename Options::version_type version_type; 99 | private: 100 | SpinLock lock; 101 | public: 102 | version_type schedule(int type) { 103 | SpinLockScoped l(lock); 104 | return parent::schedule(type); 105 | } 106 | }; 107 | 108 | template 109 | class SchedulerVersion 110 | : public detail::SchedulerVersionImpl { 111 | typedef typename detail::SchedulerVersionImpl parent; 112 | typedef typename Options::version_type version_type; 113 | public: 114 | version_type schedule(int type) { return parent::schedule(type); } 115 | }; 116 | 117 | } // namespace detail 118 | 119 | template class SchedulerVersion : public detail::SchedulerVersion {}; 120 | 121 | } // namespace sg 122 | 123 | #endif // SG_SCHEDULERVER_HPP_INCLUDED 124 | -------------------------------------------------------------------------------- /include/sg/core/spinlock.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_SPINLOCK_HPP_INCLUDED 2 | #define SG_SPINLOCK_HPP_INCLUDED 3 | 4 | #include "sg/platform/atomic.hpp" 5 | #include "sg/platform/platform.hpp" 6 | 7 | namespace sg { 8 | 9 | class SpinLock { 10 | private: 11 | enum { CACHE_LINE_SIZE = 64 }; 12 | unsigned int v_; 13 | char padding[CACHE_LINE_SIZE-sizeof(unsigned int)]; 14 | 15 | SpinLock(const SpinLock &); 16 | const SpinLock &operator=(const SpinLock &); 17 | 18 | public: 19 | SpinLock() : v_(0) {} 20 | 21 | bool try_lock() { 22 | return Atomic::lock_test_and_set(&v_); 23 | } 24 | bool is_locked() { 25 | return v_ != 0; 26 | } 27 | void unlock() { 28 | Atomic::lock_release(&v_); 29 | } 30 | void lock() { 31 | while (v_ == 1) 32 | Atomic::rep_nop(); 33 | while (!try_lock()) 34 | while (v_ == 1) 35 | Atomic::rep_nop(); 36 | } 37 | }; 38 | 39 | class SpinLockScoped { 40 | private: 41 | SpinLock & sp_; 42 | 43 | SpinLockScoped( SpinLockScoped const & ); 44 | SpinLockScoped & operator=( SpinLockScoped const & ); 45 | public: 46 | explicit SpinLockScoped( SpinLock & sp ): sp_( sp ) { 47 | sp.lock(); 48 | } 49 | 50 | SG_INLINE ~SpinLockScoped() { 51 | sp_.unlock(); 52 | } 53 | }; 54 | 55 | class SpinLockTryLock { 56 | private: 57 | SpinLock & sp_; 58 | 59 | SpinLockTryLock(SpinLockTryLock const &); 60 | SpinLockTryLock &operator=(SpinLockTryLock const &); 61 | 62 | public: 63 | const bool success; 64 | 65 | explicit SpinLockTryLock(SpinLock &sp) : sp_(sp), success(sp.try_lock()) {} 66 | 67 | ~SpinLockTryLock() { 68 | if (success) 69 | sp_.unlock(); 70 | } 71 | }; 72 | 73 | } // namespace sg 74 | 75 | #endif // SG_SPINLOCK_HPP_INCLUDED 76 | -------------------------------------------------------------------------------- /include/sg/core/supergluebase.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_SUPERGLUEBASE_HPP_INCLUDED 2 | #define SG_SUPERGLUEBASE_HPP_INCLUDED 3 | 4 | #include "sg/core/barrierprotocol.hpp" 5 | 6 | #include 7 | 8 | namespace sg { 9 | 10 | template class TaskBase; 11 | template class HandleBase; 12 | template class SuperGlue; 13 | template class AccessUtil; 14 | 15 | namespace detail { 16 | 17 | // =========================================================================== 18 | // Option PauseExecution 19 | // =========================================================================== 20 | template class SuperGlue_PauseExecution; 21 | 22 | template 23 | class SuperGlue_PauseExecution {}; 24 | 25 | template 26 | class SuperGlue_PauseExecution { 27 | public: 28 | void start_executing() { 29 | SuperGlue *this_(static_cast *>(this)); 30 | this_->tman->start_executing(); 31 | } 32 | }; 33 | 34 | // =========================================================================== 35 | // CheckLockableRequired -- check that no access types commutes and required 36 | // exclusive access if lockable is disabled 37 | // =========================================================================== 38 | template 39 | struct CheckLockableRequired { 40 | enum { access_types_needs_lockable = 1 }; 41 | }; 42 | 43 | template 44 | struct CheckLockableRequired { 45 | template 46 | struct NeedsLockablePredicate { 47 | enum { result = ((T::exclusive == 1) && (T::commutative == 1)) ? 1 : 0 }; 48 | }; 49 | 50 | typedef AccessUtil AU; 51 | typedef typename AU::template AnyType NeedsLock; 52 | 53 | enum { access_types_needs_lockable = NeedsLock::result ? 0 : 1}; 54 | }; 55 | 56 | template struct STATIC_ASSERT {}; 57 | template<> struct STATIC_ASSERT { typedef struct {} type; }; 58 | 59 | template 60 | struct SANITY_CHECKS { 61 | 62 | template 63 | struct is_unsigned { 64 | enum { value = T(-1) > T(0) ? 1 : 0 }; 65 | }; 66 | 67 | // version_type must be an unsigned type 68 | typedef typename STATIC_ASSERT< is_unsigned::value == 1>::type check_version_type; 69 | 70 | // lockcount_type must be a signed type 71 | typedef typename STATIC_ASSERT< is_unsigned::value == 0>::type check_lockcount_type; 72 | 73 | // check that Lockable isn't disabled when access types require it to be enabled 74 | typedef typename STATIC_ASSERT< CheckLockableRequired::access_types_needs_lockable >::type check_lockable; 75 | }; 76 | 77 | } // namespace detail 78 | 79 | // =========================================================================== 80 | // SuperGlue 81 | // =========================================================================== 82 | template 83 | class SuperGlue 84 | : public detail::SuperGlue_PauseExecution, 85 | public Options::ThreadAffinity, 86 | private detail::SANITY_CHECKS 87 | { 88 | template friend class SuperGlue_PauseExecution; 89 | typedef typename Options::ReadyListType TaskQueue; 90 | typedef typename Options::ThreadingManagerType ThreadingManager; 91 | typedef typename TaskQueue::unsafe_t TaskQueueUnsafe; 92 | 93 | private: 94 | SuperGlue(const SuperGlue &); 95 | SuperGlue &operator=(const SuperGlue &); 96 | 97 | bool delete_threadmanager; 98 | 99 | public: 100 | ThreadingManager *tman; 101 | TaskExecutor *main_task_executor; 102 | char padding0[Options::CACHE_LINE_SIZE]; 103 | 104 | int next_queue; 105 | char padding2[Options::CACHE_LINE_SIZE]; 106 | 107 | public: 108 | SuperGlue(ThreadingManager &tman_) 109 | : delete_threadmanager(false), tman(&tman_), next_queue(0) { 110 | tman->init(); 111 | main_task_executor = tman->get_worker(Options::ThreadingManagerType::MAIN_THREAD_ID); 112 | } 113 | 114 | SuperGlue(int req = -1) 115 | : delete_threadmanager(true), tman(new ThreadingManager(req)), next_queue(0) { 116 | main_task_executor = tman->get_worker(Options::ThreadingManagerType::MAIN_THREAD_ID); 117 | } 118 | 119 | ~SuperGlue() { 120 | tman->stop(); 121 | if (delete_threadmanager) 122 | delete tman; 123 | } 124 | 125 | int get_num_cpus() { return tman->get_num_cpus(); } 126 | 127 | // USER INTERFACE { 128 | 129 | void submit(TaskBase *task) { 130 | submit(task, next_queue); 131 | // data race here when multiple threads submit tasks, but it 132 | // is not important that the distribution is perfectly even. 133 | next_queue = (next_queue + 1) % get_num_cpus(); 134 | } 135 | 136 | void submit(TaskBase *task, int cpuid) { 137 | tman->get_worker(cpuid)->submit(task); 138 | } 139 | 140 | void barrier() { 141 | tman->barrier_protocol.barrier(*main_task_executor); 142 | } 143 | 144 | void wait(HandleBase &handle) { 145 | TaskQueueUnsafe woken; 146 | while (handle.next_version()-1 != handle.get_current_version()) { 147 | main_task_executor->execute_tasks(woken); 148 | Atomic::compiler_fence(); // to reload the handle versions 149 | } 150 | if (!woken.empty()) 151 | main_task_executor->push_front_list(woken); 152 | } 153 | 154 | // } 155 | }; 156 | 157 | } // namespace sg 158 | 159 | #endif // SG_SUPERGLUEBASE_HPP_INCLUDED 160 | -------------------------------------------------------------------------------- /include/sg/core/taskqueue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TASKQUEUE_HPP_INCLUDED 2 | #define SG_TASKQUEUE_HPP_INCLUDED 3 | 4 | #include "sg/core/taskqueueunsafe.hpp" 5 | #include "sg/core/taskqueuesafe.hpp" 6 | 7 | #include 8 | 9 | namespace sg { 10 | 11 | template class TaskBase; 12 | 13 | template 14 | class TaskQueueDefault 15 | : public detail::TaskQueueSafe< detail::TaskQueueDefaultUnsafe, detail::QueueSpinLocked> {}; 16 | 17 | } // namespace sg 18 | 19 | #endif // SG_TASKQUEUE_HPP_INCLUDED 20 | -------------------------------------------------------------------------------- /include/sg/core/taskqueuesafe.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TASKQUEUESAFE_HPP_INCLUDED 2 | #define SG_TASKQUEUESAFE_HPP_INCLUDED 3 | 4 | #include "sg/core/spinlock.hpp" 5 | 6 | #include 7 | 8 | // push_back 9 | // push_front 10 | // push_front_list 11 | // pop_back 12 | // pop_front 13 | // empty 14 | // empty_safe 15 | // swap 16 | // 17 | // private: 18 | // lock, unlock, get_unsafe_queue 19 | 20 | namespace sg { 21 | 22 | namespace detail { 23 | 24 | class QueueSpinLocked { 25 | SpinLock spinlock; 26 | 27 | public: 28 | struct ScopedLockHolder : public SpinLockScoped { 29 | ScopedLockHolder(QueueSpinLocked &qsl) : SpinLockScoped(qsl.spinlock) {} 30 | }; 31 | struct ScopedLockHolderTry : public SpinLockTryLock { 32 | ScopedLockHolderTry(QueueSpinLocked &qsl) : SpinLockTryLock(qsl.spinlock) {} 33 | }; 34 | void lock() { spinlock.lock(); } 35 | void unlock() { spinlock.unlock(); } 36 | }; 37 | 38 | template 39 | class TaskQueueSafe { 40 | template friend class Log_DumpState; 41 | template friend class TaskQueueExclusive; 42 | typedef typename LockType::ScopedLockHolder ScopedLockHolder; 43 | typedef typename LockType::ScopedLockHolderTry ScopedLockHolderTry; 44 | 45 | private: 46 | TaskQueueUnsafe queue; 47 | LockType queuelock; 48 | 49 | TaskQueueSafe(const TaskQueueSafe &); 50 | const TaskQueueSafe &operator=(const TaskQueueSafe &); 51 | 52 | TaskQueueUnsafe &get_unsafe_queue() { return queue; } 53 | 54 | protected: 55 | void lock() { queuelock.lock(); } 56 | void unlock() { queuelock.unlock(); } 57 | 58 | public: 59 | typedef typename TaskQueueUnsafe::value_type value_type; 60 | typedef typename TaskQueueUnsafe::ElementData ElementData; 61 | typedef TaskQueueUnsafe unsafe_t; 62 | 63 | TaskQueueSafe() {} 64 | 65 | void push_back(value_type *elem) { 66 | ScopedLockHolder hold(queuelock); 67 | queue.push_back(elem); 68 | } 69 | 70 | void push_front(value_type *elem) { 71 | ScopedLockHolder hold(queuelock); 72 | queue.push_front(elem); 73 | } 74 | 75 | // takes ownership of input list 76 | void push_front_list(TaskQueueUnsafe &list) { 77 | ScopedLockHolder hold(queuelock); 78 | queue.push_front_list(list); 79 | } 80 | 81 | bool pop_front(value_type * &elem) { 82 | ScopedLockHolder hold(queuelock); 83 | return queue.pop_front(elem); 84 | } 85 | 86 | bool pop_back(value_type * &elem) { 87 | if (queue.empty()) 88 | return false; 89 | ScopedLockHolder hold(queuelock); 90 | return queue.pop_back(elem); 91 | } 92 | 93 | bool try_steal(value_type * &elem) { 94 | ScopedLockHolderTry hold(queuelock); 95 | if (!hold.success) 96 | return false; 97 | return queue.pop_back(elem); 98 | } 99 | 100 | void swap(TaskQueueUnsafe &rhs) { 101 | ScopedLockHolder hold(queuelock); 102 | queue.swap(rhs); 103 | } 104 | 105 | bool empty() { 106 | return queue.empty(); 107 | } 108 | 109 | bool empty_safe() { 110 | ScopedLockHolder hold(queuelock); 111 | return queue.empty(); 112 | } 113 | }; 114 | 115 | 116 | 117 | template 118 | class TaskQueueExclusive { 119 | typedef typename TaskQueueType::value_type value_type; 120 | 121 | private: 122 | TaskQueueType &tq; 123 | 124 | TaskQueueExclusive(const TaskQueueExclusive &); 125 | const TaskQueueExclusive &operator=(const TaskQueueExclusive &); 126 | 127 | public: 128 | TaskQueueExclusive(TaskQueueType &tq_) : tq(tq_) { 129 | tq.lock(); 130 | } 131 | ~TaskQueueExclusive() { 132 | tq.unlock(); 133 | } 134 | void push_back(value_type *elem) { tq.get_unsafe_queue().push_back(elem); } 135 | void swap(typename TaskQueueType::unsafe_t &rhs) { tq.get_unsafe_queue().swap(rhs); } 136 | bool empty() { return tq.get_unsafe_queue().empty(); } 137 | }; 138 | 139 | } // namespace detail 140 | 141 | } // namespace sg 142 | 143 | #endif // SG_TASKQUEUESAFE_HPP_INCLUDED 144 | -------------------------------------------------------------------------------- /include/sg/core/types.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TYPES_HPP_INCLUDED 2 | #define SG_TYPES_HPP_INCLUDED 3 | 4 | #include 5 | #include 6 | 7 | namespace sg { 8 | 9 | template 10 | struct Types { 11 | template 12 | struct vector_t { 13 | typedef typename std::vector::type > type; 14 | }; 15 | template 16 | struct deque_t { 17 | typedef typename std::deque::type > type; 18 | }; 19 | 20 | }; 21 | 22 | } // namespace sg 23 | 24 | #endif // SG_TYPES_HPP_INCLUDED 25 | -------------------------------------------------------------------------------- /include/sg/core/versionqueue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_VERSIONQUEUE_HPP_INCLUDED 2 | #define SG_VERSIONQUEUE_HPP_INCLUDED 3 | 4 | #include "sg/core/types.hpp" 5 | #include "sg/core/orderedvec.hpp" 6 | #include "sg/core/spinlock.hpp" 7 | #include "sg/platform/atomic.hpp" 8 | #include 9 | 10 | namespace sg { 11 | 12 | template class TaskBase; 13 | template class TaskExecutor; 14 | 15 | template 16 | class VersionQueue { 17 | template friend class VersionQueueExclusive; 18 | private: 19 | typedef typename Options::version_type version_type; 20 | typedef typename Options::WaitListType WaitListType; 21 | typedef typename WaitListType::unsafe_t TaskQueueUnsafe; 22 | typedef elem_t vecelem_t; 23 | typedef typename Types::template deque_t::type elemdeque_t; 24 | typedef ordered_vec_t< elemdeque_t, version_type, TaskQueueUnsafe> versionmap_t; 25 | 26 | // lock that must be held during usage of the listener list, and when unlocking 27 | SpinLock version_listener_spinlock; 28 | // version listeners, per version 29 | versionmap_t version_listeners; 30 | 31 | struct DependenciesNotSolvedPredicate { 32 | bool operator()(TaskBase *elem) { 33 | return !elem->are_dependencies_solved_or_notify(); 34 | } 35 | }; 36 | 37 | protected: 38 | SpinLock &get_lock() { return version_listener_spinlock; } 39 | void add_version_listener(TaskBase *task, version_type version) { 40 | version_listeners[version].push_back(task); 41 | } 42 | 43 | public: 44 | void notify_version_listeners(TaskQueueUnsafe &woken, version_type version) { 45 | 46 | for (;;) { 47 | TaskQueueUnsafe list; 48 | { 49 | SpinLockScoped hold(version_listener_spinlock); 50 | 51 | // return if there are no version listeners 52 | if (version_listeners.empty()) 53 | return; 54 | 55 | // return if next version listener is for future version 56 | 57 | if ((version_type)(version - version_listeners.first_key()) >= std::numeric_limits::max() / 2) 58 | return; 59 | 60 | list = version_listeners.pop_front(); 61 | } 62 | 63 | // Note that a version is increased while holding a lock, and adding a 64 | // version listener requires holding the same lock. Hence, it is not 65 | // possible to add a listener for an old version here. 66 | // The version number is already increased when we wake tasks. 67 | 68 | // iterate through list and remove elements that are not ready 69 | 70 | list.erase_if(DependenciesNotSolvedPredicate()); 71 | 72 | if (!list.empty()) 73 | woken.push_front_list(list); 74 | } 75 | } 76 | }; 77 | 78 | template 79 | class VersionQueueExclusive { 80 | typedef typename Options::version_type version_type; 81 | private: 82 | VersionQueue &queue; 83 | SpinLockScoped lock; 84 | public: 85 | VersionQueueExclusive(VersionQueue &queue_) : queue(queue_), lock(queue.get_lock()) {} 86 | void add_version_listener(TaskBase *task, version_type version) { 87 | queue.add_version_listener(task, version); 88 | } 89 | }; 90 | 91 | } // namespace sg 92 | 93 | #endif // SG_VERSIONQUEUE_HPP_INCLUDED 94 | -------------------------------------------------------------------------------- /include/sg/option/access_readwrite.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_ACCESS_READWRITE_HPP_INCLUDED 2 | #define SG_ACCESS_READWRITE_HPP_INCLUDED 3 | 4 | namespace sg { 5 | 6 | // ===================================================================== 7 | // ReadWrite 8 | // ===================================================================== 9 | class ReadWrite { 10 | public: 11 | enum Type { read = 0, write, num_accesses }; 12 | template struct AccessType {}; 13 | }; 14 | 15 | template<> struct ReadWrite::AccessType { 16 | enum { commutative = 1 }; 17 | enum { exclusive = 0 }; 18 | enum { readonly = 1 }; 19 | }; 20 | 21 | template<> struct ReadWrite::AccessType { 22 | enum { commutative = 0 }; 23 | enum { exclusive = 1 }; 24 | enum { readonly = 0 }; 25 | }; 26 | 27 | } // namespace sg 28 | 29 | #endif // SG_ACCESS_READWRITE_HPP_INCLUDED 30 | -------------------------------------------------------------------------------- /include/sg/option/access_rwc.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_ACCESS_RWC_HPP_INCLUDED 2 | #define SG_ACCESS_RWC_HPP_INCLUDED 3 | 4 | namespace sg { 5 | 6 | class ReadWriteConcurrent { 7 | public: 8 | enum Type { read = 0, write, concurrent, num_accesses }; 9 | template struct AccessType {}; 10 | }; 11 | 12 | template<> struct ReadWriteConcurrent::AccessType { 13 | enum { commutative = 1 }; 14 | enum { exclusive = 0 }; 15 | enum { readonly = 1 }; 16 | }; 17 | 18 | template<> struct ReadWriteConcurrent::AccessType { 19 | enum { commutative = 0 }; 20 | enum { exclusive = 1 }; 21 | enum { readonly = 0 }; 22 | }; 23 | 24 | template<> struct ReadWriteConcurrent::AccessType { 25 | enum { commutative = 1 }; 26 | enum { exclusive = 0 }; 27 | enum { readonly = 0 }; 28 | }; 29 | 30 | } // namespace sg 31 | 32 | #endif // SG_ACCESS_RWC_HPP_INCLUDED 33 | -------------------------------------------------------------------------------- /include/sg/option/instr_debug.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_INSTR_DEBUG_HPP_INCLUDED 2 | #define SG_INSTR_DEBUG_HPP_INCLUDED 3 | 4 | #include 5 | #include 6 | 7 | namespace sg { 8 | 9 | template class TaskBase; 10 | 11 | template 12 | struct TaskRunDebug { 13 | TaskRunDebug(int threadid) {} 14 | static void run_task_before(TaskBase *) {} 15 | static void run_task_after(TaskBase *task) { 16 | std::stringstream ss; 17 | ss << "run '" << task->get_name() << "' => "; 18 | for (size_t i = 0; i < task->get_num_access(); ++i) { 19 | ss << " handle " 20 | << task->get_access(i).handle->get_global_id() 21 | << " v" << task->get_access(i).required_version 22 | << "-> v" << task->get_access(i).handle->version+1; 23 | } 24 | fprintf(stderr, "%s\n", ss.str().c_str()); 25 | } 26 | static void after_barrier() {} 27 | }; 28 | 29 | } // namespace sg 30 | 31 | #endif // SG_INSTR_DEBUG_HPP_INCLUDED 32 | -------------------------------------------------------------------------------- /include/sg/option/instr_perfcount.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_INSTR_PERFCOUNT_HPP_INCLUDED 2 | #define SG_INSTR_PERFCOUNT_HPP_INCLUDED 3 | 4 | // [[TODO]] MISSING TEST 5 | 6 | #include "sg/platform/perfcount.hpp" 7 | 8 | #include "sg/option/log.hpp" 9 | 10 | namespace sg { 11 | 12 | template 13 | struct PerfTiming { 14 | Time::TimeUnit start, stop; 15 | unsigned long long cpu_clock; 16 | unsigned long long task_clock; 17 | unsigned long long context_switches; 18 | PerformanceCounter *perf[3]; 19 | 20 | PerfTiming(int) { 21 | perf[0] = new PerformanceCounter(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); 22 | perf[1] = new PerformanceCounter(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); 23 | perf[2] = new PerformanceCounter(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); 24 | } 25 | ~PerfTiming() { 26 | delete perf[0]; 27 | delete perf[1]; 28 | delete perf[2]; 29 | } 30 | void run_task_before(TaskBase *) { 31 | cpu_clock = perf[0]->read_counter(); 32 | task_clock = perf[1]->read_counter(); 33 | context_switches = perf[2]->read_counter(); 34 | perf[0]->start(); 35 | perf[1]->start(); 36 | perf[2]->start(); 37 | start = Time::getTime(); 38 | } 39 | void run_task_after(TaskBase *task) { 40 | stop = Time::getTime(); 41 | perf[2]->stop(); 42 | perf[1]->stop(); 43 | perf[0]->stop(); 44 | unsigned long long end_cpu_clock = perf[0]->read_counter(); 45 | unsigned long long end_task_clock = perf[1]->read_counter(); 46 | unsigned long long end_context_switches = perf[2]->read_counter(); 47 | char txt[80]; 48 | sprintf(txt, "%s cpu=%llu task=%llu cs=%llu", 49 | detail::GetName::get_name(task).c_str(), 50 | end_cpu_clock - cpu_clock, 51 | end_task_clock - task_clock, 52 | end_context_switches - context_switches); 53 | Log::log(txt, start, stop); 54 | } 55 | static void after_barrier() {} 56 | }; 57 | 58 | } // namespace sg 59 | 60 | #endif // SG_INSTR_PERFCOUNT_HPP_INCLUDED 61 | -------------------------------------------------------------------------------- /include/sg/option/instr_procstat.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_INSTR_PROCSTAT_HPP_INCLUDED 2 | #define SG_INSTR_PROCSTAT_HPP_INCLUDED 3 | 4 | #include "sg/option/log.hpp" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace sg { 13 | 14 | template 15 | struct PerfProcStat { 16 | 17 | Time::TimeUnit start, stop; 18 | 19 | static int gettid() { 20 | return syscall(SYS_gettid); 21 | } 22 | 23 | void run_task_before(TaskBase *) { 24 | start = Time::getTime(); 25 | } 26 | void run_task_after(TaskBase *task) { 27 | stop = Time::getTime(); 28 | 29 | pid_t pid = ::getpid(); 30 | pid_t tid = gettid(); 31 | 32 | char procFilename[256]; 33 | sprintf(procFilename, "/proc/%d/task/%d/stat",pid,tid) ; 34 | 35 | char buffer[1024]; 36 | 37 | int fd = open(procFilename, O_RDONLY, 0); 38 | int num_read = read(fd, buffer, 1023); 39 | close(fd); 40 | buffer[num_read-1] = '\0'; 41 | 42 | Log::log((detail::GetName::get_name(task) + buffer).c_str(), start, stop); 43 | } 44 | static void after_barrier() {} 45 | }; 46 | 47 | } // namespace sg 48 | 49 | #endif // SG_INSTR_PROCSTAT_HPP_INCLUDED 50 | -------------------------------------------------------------------------------- /include/sg/option/instr_trace.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_INSTR_TRACE_HPP_INCLUDED 2 | #define SG_INSTR_TRACE_HPP_INCLUDED 3 | 4 | #include "sg/option/log.hpp" 5 | 6 | namespace sg { 7 | 8 | template 9 | struct Trace { 10 | Time::TimeUnit start; 11 | Time::TimeUnit start_idle_time; 12 | Trace(int threadid) { 13 | Log::register_thread(threadid); 14 | start = start_idle_time = Time::getTime(); 15 | } 16 | void run_task_before(TaskBase *) { 17 | const bool first = (start == start_idle_time); 18 | start = Time::getTime(); 19 | if (!first) 20 | Log::add_idle_time(start - start_idle_time); 21 | } 22 | void run_task_after(TaskBase *task) { 23 | Time::TimeUnit stop = Time::getTime(); 24 | Log::log(detail::GetName::get_name(task).c_str(), start, stop); 25 | start_idle_time = Time::getTime(); 26 | } 27 | void after_barrier() { 28 | Time::TimeUnit stop = Time::getTime(); 29 | Log::add_barrier_time(stop - start_idle_time); 30 | start_idle_time = Time::getTime(); 31 | } 32 | static void dump(const char *name) { 33 | Log::dump(name); 34 | } 35 | }; 36 | 37 | } // namespace sg 38 | 39 | #endif // SG_INSTR_TRACE_HPP_INCLUDED 40 | -------------------------------------------------------------------------------- /include/sg/option/log2.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // log which task is executed where, and for what duration 3 | // 4 | 5 | #ifndef SG_LOG2_HPP_INCLUDED 6 | #define SG_LOG2_HPP_INCLUDED 7 | 8 | #include "sg/core/spinlock.hpp" 9 | #include "sg/platform/threads.hpp" 10 | #include "sg/platform/gettime.hpp" 11 | #include "sg/platform/tls.hpp" 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace sg { 18 | 19 | template class Log; 20 | template class TaskBase; 21 | template class HandleBase; 22 | 23 | // =========================================================================== 24 | // Log 25 | // =========================================================================== 26 | template 27 | class Log2 { 28 | public: 29 | struct Event { 30 | Time::TimeUnit time_start; 31 | Time::TimeUnit time_total; 32 | T data; 33 | 34 | Event() {} 35 | Event(T data_, Time::TimeUnit start, Time::TimeUnit stop) 36 | : time_start(start), time_total(stop-start), data(data_) {} 37 | int operator<(const Event &rhs) const { 38 | return time_start < rhs.time_start; 39 | } 40 | }; 41 | 42 | struct ThreadData { 43 | int id; 44 | std::vector events; 45 | ThreadData(int id_) : id(id_) { 46 | events.reserve(655360); 47 | } 48 | }; 49 | struct LogData { 50 | SpinLock initspinlock; 51 | std::vector threaddata; 52 | }; 53 | 54 | public: 55 | static LogData &getLogData() { 56 | static LogData logdata; 57 | return logdata; 58 | } 59 | 60 | static ThreadData *&getThreadData() { 61 | static SG_TLS ThreadData *threaddata; 62 | return threaddata; 63 | } 64 | 65 | static void log(T data, Time::TimeUnit start, Time::TimeUnit stop) { 66 | ThreadData &threaddata(*getThreadData()); 67 | threaddata.events.push_back(Event(data, start, stop)); 68 | } 69 | 70 | static void dump(const char *filename, int node_id = 0) { 71 | std::ofstream out(filename); 72 | LogData &logdata(getLogData()); 73 | 74 | const size_t num = logdata.threaddata.size(); 75 | std::vector > merged; 76 | 77 | for (size_t i = 0; i < num; ++i) { 78 | for (size_t j = 0; j < logdata.threaddata[i]->events.size(); ++j) 79 | merged.push_back(std::make_pair(logdata.threaddata[i]->events[j], logdata.threaddata[i]->id)); 80 | } 81 | 82 | std::sort(merged.begin(), merged.end()); 83 | 84 | for (size_t i = 0; i < merged.size(); ++i) { 85 | out << node_id << " " 86 | << merged[i].second << ": " 87 | << merged[i].first.time_start << " " 88 | << merged[i].first.time_total << " " 89 | << merged[i].first.data << std::endl; 90 | } 91 | 92 | out.close(); 93 | } 94 | 95 | static void clear() { 96 | LogData &logdata(getLogData()); 97 | const size_t num = logdata.threaddata.size(); 98 | for (size_t i = 0; i < num; ++i) 99 | logdata.threaddata[i].events.clear(); 100 | } 101 | 102 | static void init() { 103 | // This method only needs to be called once, 104 | // but it is allowed to call is several times, 105 | // as long as its from the same thread. 106 | 107 | // The reason to allow this to be called several 108 | // times is to be able to initialize and use 109 | // the logging machinery before starting up 110 | // SuperGlue, but still call here in the SuperGlue 111 | // startup. 112 | 113 | // However, no initialization is currently needed. 114 | } 115 | 116 | static void register_thread(int id) { 117 | LogData &logdata(getLogData()); 118 | ThreadData *td = new ThreadData(id); 119 | getThreadData() = td; 120 | 121 | SpinLockScoped lock(logdata.initspinlock); 122 | logdata.threaddata.push_back(td); 123 | } 124 | }; 125 | 126 | } // namespace sg 127 | 128 | #endif // SG_LOG2_HPP_INCLUDED 129 | -------------------------------------------------------------------------------- /include/sg/option/savedag.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_SAVEDAG_HPP_INCLUDED 2 | #define SG_SAVEDAG_HPP_INCLUDED 3 | 4 | // 5 | // collect DAG log data 6 | // 7 | 8 | #include "sg/option/log.hpp" 9 | #include "sg/core/accessutil.hpp" 10 | #include "sg/core/spinlock.hpp" 11 | #include "sg/platform/gettime.hpp" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace sg { 20 | 21 | template class Handle; 22 | 23 | namespace detail { 24 | 25 | struct Node { 26 | std::string name; 27 | std::string style; 28 | int type; 29 | Time::TimeUnit time_stamp; 30 | Node(const std::string &name_, 31 | const std::string &style_, 32 | int type_) 33 | : name(name_), style(style_), type(type_) { 34 | time_stamp = Time::getTime(); 35 | } 36 | }; 37 | struct TaskFinish { 38 | size_t taskid; 39 | size_t handleid; 40 | size_t version; 41 | TaskFinish(size_t taskid_, 42 | size_t handleid_, 43 | size_t version_) 44 | : taskid(taskid_), handleid(handleid_), version(version_) 45 | {} 46 | }; 47 | struct TaskDependency { 48 | size_t taskid; 49 | size_t handleid; 50 | size_t version; 51 | size_t type; 52 | TaskDependency(size_t taskid_, 53 | size_t handleid_, 54 | size_t version_, 55 | size_t type_) 56 | : taskid(taskid_), handleid(handleid_), version(version_), type(type_) 57 | {} 58 | }; 59 | 60 | struct LogDagData { 61 | SpinLock spinlock; 62 | 63 | std::vector nodes; 64 | std::map tasknodes; 65 | std::map< std::pair, size_t> datanodes; 66 | std::vector ranks; 67 | 68 | std::vector task_finish; 69 | std::vector task_dependency; 70 | 71 | void clear() { 72 | nodes.clear(); 73 | tasknodes.clear(); 74 | datanodes.clear(); 75 | ranks.clear(); 76 | task_finish.clear(); 77 | task_dependency.clear(); 78 | } 79 | }; 80 | 81 | // =========================================================================== 82 | // GetStyle 83 | // =========================================================================== 84 | 85 | template 86 | class TypeHasGetStyle { 87 | private: 88 | typedef char yes; 89 | typedef struct { char dummy[2]; } no; 90 | 91 | template struct TypeCheck {}; 92 | template static yes test(TypeCheck*); 93 | template static no test(...); 94 | 95 | public: 96 | enum { value = (sizeof(test(0)) == sizeof(yes)) }; 97 | }; 98 | 99 | template struct GetStyle { 100 | static std::string get_style(TaskBase *) { 101 | return ""; 102 | } 103 | }; 104 | template 105 | struct GetStyle { 106 | static std::string get_style(TaskBase *task) { 107 | return task->get_style(); 108 | } 109 | }; 110 | 111 | template 112 | std::string get_style(TaskBase *task) { 113 | return GetStyle >::value >::get_style(task); 114 | } 115 | 116 | } // namespace detail 117 | 118 | // =========================================================================== 119 | // Option Logging_DAG 120 | // =========================================================================== 121 | 122 | template 123 | class SaveDAG { 124 | typedef typename Options::version_type version_type; 125 | private: 126 | static size_t add_node(detail::LogDagData &data, std::string name, std::string style, size_t type) { 127 | data.nodes.push_back(detail::Node(name, style, type)); 128 | return data.nodes.size()-1; 129 | } 130 | 131 | static void register_task_node(detail::LogDagData &data, TaskBase *task) { 132 | size_t taskId = task->get_global_id(); 133 | if (data.tasknodes.find(taskId) != data.tasknodes.end()) 134 | return; 135 | std::stringstream ss; 136 | ss << task; 137 | size_t new_node = add_node(data, ss.str(), get_style(task), 0); 138 | data.tasknodes[taskId] = new_node; 139 | } 140 | 141 | static void register_data_node(detail::LogDagData &data, Handle *handle, size_t version) { 142 | size_t handleId = handle->get_global_id(); 143 | std::pair id(handleId, version); 144 | if (data.datanodes.find(id) != data.datanodes.end()) 145 | return; 146 | std::stringstream ss; 147 | ss << "$" << handle << "_{" << version << "}$"; 148 | size_t new_node = add_node(data, ss.str(), "[shape=rectangle,style=filled,fillcolor=gray]", 1); 149 | data.datanodes[id] = new_node; 150 | } 151 | 152 | 153 | public: 154 | static detail::LogDagData &get_dag_data() { 155 | static detail::LogDagData data; 156 | return data; 157 | } 158 | 159 | static void task_finish(TaskBase *task, Handle *handle, size_t newVersion) { 160 | detail::LogDagData &data(get_dag_data()); 161 | SpinLockScoped lock(data.spinlock); 162 | // have to register nodes here, as we cannot get name otherwise. 163 | register_data_node(data, handle, newVersion); 164 | data.task_finish.push_back(detail::TaskFinish(task->get_global_id(), handle->get_global_id(), newVersion)); 165 | } 166 | 167 | static void add_dependency(TaskBase *task, Handle *handle, version_type required_version, int type) { 168 | detail::LogDagData &data(get_dag_data()); 169 | SpinLockScoped lock(data.spinlock); 170 | // have to register nodes here, as we cannot get name otherwise. 171 | register_task_node(data, task); 172 | register_data_node(data, handle, required_version); 173 | data.task_dependency.push_back(detail::TaskDependency(task->get_global_id(), 174 | handle->get_global_id(), 175 | required_version, type)); 176 | } 177 | 178 | static void new_rank() { 179 | detail::LogDagData &data(get_dag_data()); 180 | SpinLockScoped lock(data.spinlock); 181 | if (data.nodes.empty()) 182 | return; 183 | if (!data.ranks.empty()) { 184 | size_t old = data.ranks[data.ranks.size()-1]; 185 | if (data.nodes.size() == old) 186 | return; 187 | } 188 | data.ranks.push_back(data.nodes.size()); 189 | }; 190 | }; 191 | 192 | } // namespace sg 193 | #endif // SG_SAVEDAG_HPP_INCLUDED 194 | -------------------------------------------------------------------------------- /include/sg/option/savedag_common.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_SAVEDAG_COMMON_HPP_INCLUDED 2 | #define SG_SAVEDAG_COMMON_HPP_INCLUDED 3 | 4 | // 5 | // contains common utilities for classes that generates graphs from dag log data 6 | // 7 | 8 | #include "sg/option/savedag.hpp" 9 | 10 | namespace sg { 11 | 12 | namespace detail { 13 | 14 | template 15 | class SaveDAG_common { 16 | public: 17 | static size_t get_data_node(const LogDagData &data, 18 | size_t handle_id, size_t version) { 19 | // data node must exist in datanodes 20 | return data.datanodes.find(std::make_pair(handle_id, version))->second; 21 | } 22 | 23 | static size_t get_task_node(const LogDagData &data, size_t task_id) { 24 | // task_id must be in tasknodes. 25 | return data.tasknodes.find(task_id)->second; 26 | } 27 | 28 | static std::string get_name(bool dupename, std::string name, size_t index) { 29 | std::stringstream ss; 30 | ss << "\"" << name; 31 | if (dupename) 32 | ss << "_" << index; 33 | ss << "\""; 34 | return ss.str(); 35 | } 36 | 37 | static bool find_dupe_names(const std::vector &nodes) { 38 | std::set names; 39 | for (size_t i = 0; i < nodes.size(); ++i) 40 | names.insert(nodes[i].name); 41 | if (names.size() != nodes.size()) 42 | return true; 43 | return false; 44 | } 45 | 46 | static void dump_task_nodes(const std::vector &nodes, 47 | const std::vector &ranks, 48 | bool dupename, 49 | std::ofstream &out) { 50 | if (ranks.empty()) 51 | out << "{ " << std::endl; 52 | size_t j = 0; 53 | for (size_t i = 0; i < nodes.size(); ++i) { 54 | if (j != ranks.size() && ranks[j] == i) { 55 | if (j != 0) 56 | out << " } " << std::endl; 57 | out << " { rank = same; " << std::endl; 58 | ++j; 59 | } 60 | const Node &n(nodes[i]); 61 | if (n.name.empty()) 62 | continue; 63 | if (n.type == 1) 64 | continue; 65 | out << " " << get_name(dupename, n.name, i) 66 | << " [label=\""< &nodes, 75 | std::map &data_merge_map, 76 | std::set &hidden_nodes, 77 | bool dupename, std::ofstream &out) { 78 | for (size_t i = 0; i < nodes.size(); ++i) { 79 | const Node &n(nodes[i]); 80 | if (n.name.empty()) 81 | continue; 82 | if (hidden_nodes.find(i) != hidden_nodes.end()) 83 | continue; 84 | if (n.type != 1) 85 | continue; 86 | if (data_merge_map.find(i) != data_merge_map.end() 87 | && data_merge_map[i] != i) 88 | continue; 89 | 90 | out << " " << get_name(dupename, n.name, i) 91 | << " [label=\""< 14 | class SaveDAG_task : public detail::SaveDAG_common { 15 | private: 16 | struct Edge { 17 | size_t source; 18 | size_t sink; 19 | std::string style; 20 | Time::TimeUnit time_stamp; 21 | size_t type; 22 | Edge(size_t source_, size_t sink_, const char *style_, size_t type_) 23 | : source(source_), sink(sink_), style(style_), type(type_) { 24 | time_stamp = Time::getTime(); 25 | } 26 | bool operator<(const Edge &rhs) const { 27 | if (source < rhs.source) 28 | return true; 29 | if (source > rhs.source) 30 | return false; 31 | return sink < rhs.sink; 32 | } 33 | }; 34 | 35 | struct AccessingTasks { 36 | size_t lasttype; 37 | std::vector last[Options::AccessInfoType::num_accesses]; 38 | AccessingTasks() : lasttype(~static_cast(0)) {} 39 | }; 40 | 41 | static void finalize(const detail::LogDagData &data, 42 | const std::vector &last, size_t i, 43 | std::set &edges) { 44 | #ifdef MANY_EDGES // draw dotted edges between all associative tasks in a group 45 | for (size_t j = 0; j < last.size(); ++j) { 46 | for (size_t k = j+1; k < last.size(); ++k) { 47 | edges.insert(Edge(detail::SaveDAG_common::get_task_node(data, last[j], 0), 48 | detail::SaveDAG_common::get_task_node(data, last[k]), 49 | "[dir=none,style=dashed,penwidth=2]", i)); 50 | } 51 | } 52 | #else // draw single path to connect all associative tasks in a group 53 | for (size_t k = 1; k < last.size(); ++k) { 54 | edges.insert(Edge(detail::SaveDAG_common::get_task_node(data, last[k-1]), 55 | detail::SaveDAG_common::get_task_node(data, last[k]), 56 | "[dir=none,style=dashed]", i)); 57 | } 58 | #endif 59 | } 60 | 61 | static void get_edges(const detail::LogDagData &data, std::set &edges) { 62 | 63 | std::map accessinfo; 64 | 65 | for (size_t i = 0; i < data.task_dependency.size(); ++i) { 66 | const size_t type(data.task_dependency[i].type); 67 | const size_t handleid(data.task_dependency[i].handleid); 68 | const size_t taskid(data.task_dependency[i].taskid); 69 | 70 | AccessingTasks &at(accessinfo[handleid]); 71 | 72 | if (at.lasttype != type) { 73 | for (size_t j = 0; j < Options::AccessInfoType::num_accesses; ++j) 74 | if (j != at.lasttype) 75 | at.last[j].clear(); 76 | } 77 | 78 | for (size_t j = 0; j < Options::AccessInfoType::num_accesses; ++j) { 79 | 80 | if (at.lasttype != type && AccessUtil::needs_lock(j)) 81 | finalize(data, at.last[j], j, edges); 82 | 83 | if (AccessUtil::needs_lock(type)) 84 | continue; 85 | if (j == type && AccessUtil::concurrent(type)) 86 | continue; 87 | 88 | #ifdef MANY_EDGES // draw dependency edges from all tasks in the previous group of associative tasks 89 | for (size_t k = 0; k < at.last[j].size(); ++k) 90 | edges.insert(Edge(detail::SaveDAG_common::get_task_node(data, at.last[j][k]), 91 | detail::SaveDAG_common::get_task_node(data, taskId), "", j)); 92 | #else // draw a single dependency edge from the "last" task in the previous group of associative tasks 93 | if (!at.last[j].empty()) 94 | edges.insert(Edge(detail::SaveDAG_common::get_task_node(data, at.last[j][at.last[j].size()-1]), 95 | detail::SaveDAG_common::get_task_node(data, taskid), "", j)); 96 | #endif 97 | } 98 | 99 | at.lasttype = type; 100 | at.last[type].push_back(taskid); 101 | } 102 | 103 | for (typename std::map::const_iterator itr = accessinfo.begin(); itr != accessinfo.end(); ++itr) { 104 | const AccessingTasks &at(itr->second); 105 | for (size_t k = 0; k < Options::AccessInfoType::num_accesses; ++k) 106 | if (AccessUtil::needs_lock(k)) 107 | finalize(data, at.last[k], k, edges); 108 | } 109 | } 110 | 111 | public: 112 | static void dump(const char *filename) { 113 | std::ofstream out(filename); 114 | detail::LogDagData &data(SaveDAG::get_dag_data()); 115 | SpinLockScoped lock(data.spinlock); 116 | 117 | std::set edges; 118 | get_edges(data, edges); 119 | 120 | const bool dupename = detail::SaveDAG_common::find_dupe_names( data.nodes ); 121 | 122 | out << "digraph {" << std::endl; 123 | out << " overlap=false;" << std::endl; 124 | 125 | detail::SaveDAG_common::dump_task_nodes(data.nodes, data.ranks, dupename, out); 126 | 127 | for (typename std::set::iterator itr = edges.begin(); itr != edges.end(); ++itr) { 128 | const Edge &e(*itr); 129 | const detail::Node &source(data.nodes[e.source]); 130 | const detail::Node &sink(data.nodes[e.sink]); 131 | out << " " << detail::SaveDAG_common::get_name(dupename, source.name, e.source) << " -> " 132 | << detail::SaveDAG_common::get_name(dupename, sink.name, e.sink) << " " << e.style << ";" << std::endl; 133 | } 134 | 135 | out << "}" << std::endl; 136 | out.close(); 137 | } 138 | }; 139 | 140 | } // namespace sg 141 | 142 | #endif // SG_SAVEDAG_TASK_HPP_INCLUDED 143 | -------------------------------------------------------------------------------- /include/sg/option/taskqueue_deque.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TASKQUEUE_DEQUE_HPP_INCLUDED 2 | #define SG_TASKQUEUE_DEQUE_HPP_INCLUDED 3 | 4 | #include "sg/core/taskqueuesafe.hpp" 5 | #include "sg/core/types.hpp" 6 | 7 | namespace sg { 8 | 9 | template class TaskBase; 10 | 11 | namespace detail { 12 | 13 | template 14 | class TaskQueueDequeUnsafe { 15 | typedef TaskBase * taskptr_t; 16 | typedef typename Types::template deque_t::type taskdeque_t; 17 | taskdeque_t q; 18 | 19 | public: 20 | struct ElementData {}; 21 | typedef TaskBase value_type; 22 | 23 | bool pop_front(TaskBase * &elem) { 24 | if (q.empty()) 25 | return false; 26 | 27 | elem = q.front(); 28 | q.pop_front(); 29 | return true; 30 | } 31 | 32 | bool pop_back(TaskBase * &elem) { 33 | if (q.empty()) 34 | return false; 35 | elem = q.back(); 36 | q.pop_back(); 37 | return true; 38 | } 39 | 40 | void push_front(TaskBase *elem) { 41 | q.push_front(elem); 42 | } 43 | 44 | void push_back(TaskBase *elem) { 45 | q.push_back(elem); 46 | } 47 | 48 | void push_front_list(TaskQueueDequeUnsafe &rhs) { 49 | q.insert(q.begin(), rhs.q.begin(), rhs.q.end()); 50 | } 51 | 52 | bool empty() { 53 | return q.empty(); 54 | } 55 | 56 | template 57 | void erase_if(UnaryPredicate pred) { 58 | q.erase(remove_if(q.begin(), q.end(), pred), q.end()); 59 | } 60 | 61 | void swap(TaskQueueDequeUnsafe &rhs) { 62 | std::swap(q, rhs.q); 63 | } 64 | }; 65 | 66 | } // namespace detail 67 | 68 | template 69 | class TaskQueueDeque : public detail::TaskQueueSafe< detail::TaskQueueDequeUnsafe< Options >, 70 | detail::QueueSpinLocked > { 71 | public: 72 | typedef typename detail::TaskQueueDequeUnsafe unsafe_t; 73 | }; 74 | 75 | } // namespace sg 76 | 77 | #endif // SG_TASKQUEUE_DEQUE_HPP_INCLUDED 78 | -------------------------------------------------------------------------------- /include/sg/option/taskqueue_prio.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TASKQUEUE_PRIO_HPP_INCLUDED 2 | #define SG_TASKQUEUE_PRIO_HPP_INCLUDED 3 | 4 | #include "sg/core/taskqueueunsafe.hpp" 5 | #include "sg/core/taskqueuesafe.hpp" 6 | 7 | namespace sg { 8 | 9 | template class TaskBase; 10 | 11 | namespace detail { 12 | 13 | template 14 | class TaskQueuePrioUnsafe { 15 | protected: 16 | TaskQueueDefaultUnsafe highprio; 17 | TaskQueueDefaultUnsafe lowprio; 18 | 19 | public: 20 | struct ElementData : public TaskQueueDefaultUnsafe::ElementData { 21 | bool is_prioritized; 22 | ElementData() : is_prioritized(false) {} 23 | }; 24 | 25 | typedef TaskBase value_type; 26 | 27 | TaskQueuePrioUnsafe() {} 28 | 29 | void push_back(TaskBase *elem) { 30 | if (elem->is_prioritized) 31 | highprio.push_back(elem); 32 | else 33 | lowprio.push_back(elem); 34 | } 35 | 36 | void push_front(TaskBase *elem) { 37 | if (elem->is_prioritized) 38 | highprio.push_front(elem); 39 | else 40 | lowprio.push_front(elem); 41 | } 42 | 43 | // takes ownership of input list 44 | void push_front_list(TaskQueuePrioUnsafe &rhs) { 45 | highprio.push_front_list(rhs.highprio); 46 | lowprio.push_front_list(rhs.lowprio); 47 | } 48 | 49 | bool pop_front(TaskBase * &elem) { 50 | if (highprio.pop_front(elem)) 51 | return true; 52 | return lowprio.pop_front(elem); 53 | } 54 | 55 | bool pop_back(TaskBase * &elem) { 56 | if (highprio.pop_back(elem)) 57 | return true; 58 | return lowprio.pop_back(elem); 59 | } 60 | 61 | template 62 | void visit(Visitor &visitor) { 63 | highprio.visit(visitor); 64 | lowprio.visit(visitor); 65 | } 66 | 67 | template 68 | void erase_if(UnaryPredicate pred) { 69 | highprio.erase_if(pred); 70 | lowprio.erase_if(pred); 71 | } 72 | 73 | bool empty() { 74 | return lowprio.empty() && highprio.empty(); 75 | } 76 | 77 | void swap(TaskQueuePrioUnsafe &rhs) { 78 | std::swap(highprio, rhs.highprio); 79 | std::swap(lowprio, rhs.lowprio); 80 | } 81 | 82 | size_t size() const { 83 | return highprio.size() + lowprio.size(); 84 | } 85 | }; 86 | 87 | } // namespace detail 88 | 89 | template 90 | class TaskQueuePrio : public detail::TaskQueueSafe< detail::TaskQueuePrioUnsafe< Options >, 91 | detail::QueueSpinLocked > {}; 92 | 93 | } // namespace sg 94 | 95 | #endif // SG_TASKQUEUE_PRIO_HPP_INCLUDED 96 | -------------------------------------------------------------------------------- /include/sg/option/taskqueue_priopinned.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TASKQUEUE_PRIOPINNED_HPP_INCLUDED 2 | #define SG_TASKQUEUE_PRIOPINNED_HPP_INCLUDED 3 | 4 | #include "sg/core/taskqueueunsafe.hpp" 5 | #include "sg/core/taskqueuesafe.hpp" 6 | 7 | namespace sg { 8 | 9 | template class TaskBase; 10 | 11 | namespace detail { 12 | 13 | template 14 | class TaskQueuePrioPinnedUnsafe { 15 | protected: 16 | TaskQueueDefaultUnsafe highprio; 17 | TaskQueueDefaultUnsafe lowprio; 18 | TaskQueueDefaultUnsafe pinned; 19 | 20 | public: 21 | struct ElementData : public TaskQueueDefaultUnsafe::ElementData { 22 | bool is_prioritized; 23 | int pinned_to; 24 | ElementData() : is_prioritized(false), pinned_to(-1) {} 25 | int get_location() const { return pinned_to; } 26 | }; 27 | 28 | typedef TaskBase value_type; 29 | 30 | TaskQueuePrioPinnedUnsafe() {} 31 | 32 | void push_back(TaskBase *elem) { 33 | if (elem->pinned_to != -1) 34 | pinned.push_back(elem); 35 | else if (elem->is_prioritized) 36 | highprio.push_back(elem); 37 | else 38 | lowprio.push_back(elem); 39 | } 40 | 41 | void push_front(TaskBase *elem) { 42 | if (elem->pinned_to != -1) 43 | pinned.push_front(elem); 44 | else if (elem->is_prioritized) 45 | highprio.push_front(elem); 46 | else 47 | lowprio.push_front(elem); 48 | } 49 | 50 | // takes ownership of input list 51 | void push_front_list(TaskQueuePrioPinnedUnsafe &rhs) { 52 | pinned.push_front_list(rhs.pinned); 53 | highprio.push_front_list(rhs.highprio); 54 | lowprio.push_front_list(rhs.lowprio); 55 | } 56 | 57 | bool pop_front(TaskBase * &elem) { 58 | if (highprio.pop_front(elem)) 59 | return true; 60 | if (pinned.pop_front(elem)) 61 | return true; 62 | return lowprio.pop_front(elem); 63 | } 64 | 65 | bool pop_back(TaskBase * &elem) { 66 | if (highprio.pop_back(elem)) 67 | return true; 68 | return lowprio.pop_back(elem); 69 | } 70 | 71 | template 72 | void visit(Visitor &visitor) { 73 | highprio.visit(visitor); 74 | pinned.visit(visitor); 75 | lowprio.visit(visitor); 76 | } 77 | 78 | template 79 | void erase_if(UnaryPredicate pred) { 80 | highprio.erase_if(pred); 81 | pinned.erase_if(pred); 82 | lowprio.erase_if(pred); 83 | } 84 | 85 | bool empty() { 86 | return lowprio.empty() && highprio.empty() && pinned.empty(); 87 | } 88 | 89 | void swap(TaskQueuePrioPinnedUnsafe &rhs) { 90 | std::swap(highprio, rhs.highprio); 91 | std::swap(pinned, rhs.pinned); 92 | std::swap(lowprio, rhs.lowprio); 93 | } 94 | 95 | size_t size() const { 96 | return highprio.size() + lowprio.size() + pinned.size(); 97 | } 98 | }; 99 | 100 | } // namespace detail 101 | 102 | template 103 | class TaskQueuePrioPinned : public detail::TaskQueueSafe< detail::TaskQueuePrioPinnedUnsafe, 104 | detail::QueueSpinLocked > {}; 105 | 106 | } // namespace sg 107 | 108 | #endif // SG_TASKQUEUE_PRIOPINNED_HPP_INCLUDED 109 | -------------------------------------------------------------------------------- /include/sg/option/threadingmanager_default.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_THREADINGMANAGER_DEFAULT_HPP_INCLUDED 2 | #define SG_THREADINGMANAGER_DEFAULT_HPP_INCLUDED 3 | 4 | #include "sg/platform/threads.hpp" 5 | #include "sg/platform/threadutil.hpp" 6 | #include "sg/platform/atomic.hpp" 7 | #include "sg/core/spinlock.hpp" 8 | 9 | #include 10 | #include 11 | 12 | namespace sg { 13 | 14 | template class SuperGlue; 15 | template class BarrierProtocol; 16 | template class TaskExecutor; 17 | 18 | template 19 | class ThreadingManagerDefault { 20 | typedef typename Options::ReadyListType TaskQueue; 21 | typedef typename Options::ThreadingManagerType ThreadingManager; 22 | 23 | private: 24 | // =========================================================================== 25 | // WorkerThread: Thread to run worker 26 | // =========================================================================== 27 | class WorkerThread : public Thread { 28 | private: 29 | const int id; 30 | ThreadingManagerDefault &tman; 31 | 32 | public: 33 | WorkerThread(int id_, ThreadingManagerDefault &tman_) 34 | : id(id_), tman(tman_) {} 35 | 36 | void run() { 37 | Options::ThreadAffinity::pin_workerthread(id); 38 | 39 | // allocate Worker on thread 40 | TaskExecutor *te = new TaskExecutor(id, tman); 41 | 42 | tman.threads[id] = te; 43 | tman.task_queues[id] = &te->get_task_queue(); 44 | 45 | Atomic::increase(&tman.start_counter); 46 | 47 | tman.lock_workers_initialized.lock(); 48 | tman.lock_workers_initialized.unlock(); 49 | 50 | te->work_loop(); 51 | } 52 | }; 53 | 54 | SpinLock lock_workers_initialized; 55 | unsigned int start_counter; 56 | char padding1[Options::CACHE_LINE_SIZE]; 57 | unsigned int num_cpus; 58 | std::vector workerthreads; 59 | 60 | public: 61 | enum { MAIN_THREAD_ID = 0, WORKER_THREAD_ID_BASE = 1 }; 62 | BarrierProtocol barrier_protocol; 63 | TaskExecutor **threads; 64 | TaskQueue **task_queues; 65 | 66 | private: 67 | static bool workers_start_paused(typename Options::Disable) { return false; } 68 | static bool workers_start_paused(typename Options::Enable) { return true; } 69 | static bool workers_start_paused() { return workers_start_paused(typename Options::PauseExecution()); } 70 | 71 | int decide_num_cpus(int requested) { 72 | assert(requested == -1 || requested > 0); 73 | std::string var = sg_getenv("OMP_NUM_THREADS"); 74 | if (!var.empty()) { 75 | const int OMP_NUM_THREADS(atoi(var.c_str())); 76 | assert(OMP_NUM_THREADS >= 0); 77 | if (OMP_NUM_THREADS != 0) 78 | return OMP_NUM_THREADS; 79 | } 80 | if (requested == -1 || requested == 0) 81 | return ThreadUtil::get_num_cpus(); 82 | return requested; 83 | } 84 | 85 | public: 86 | ThreadingManagerDefault(int requested_num_cpus = -1) 87 | : start_counter(1), 88 | num_cpus(decide_num_cpus(requested_num_cpus)), 89 | barrier_protocol(*static_cast(this)) 90 | { 91 | Options::ThreadAffinity::init(); 92 | Options::ThreadAffinity::pin_main_thread(); 93 | 94 | // initialize static singleton before starting threads 95 | Options::LogDAG::get_dag_data(); 96 | 97 | lock_workers_initialized.lock(); 98 | threads = new TaskExecutor *[num_cpus]; 99 | task_queues = new TaskQueue*[num_cpus]; 100 | 101 | threads[0] = new TaskExecutor(0, *this); 102 | task_queues[0] = &threads[0]->get_task_queue(); 103 | 104 | const int num_workers(num_cpus-1); 105 | workerthreads.resize(num_workers); 106 | for (int i = 0; i < num_workers; ++i) { 107 | workerthreads[i] = new WorkerThread(i+1, *this); 108 | workerthreads[i]->start(); 109 | } 110 | 111 | while (start_counter != num_cpus) 112 | Atomic::rep_nop(); 113 | 114 | if (!workers_start_paused()) 115 | lock_workers_initialized.unlock(); 116 | } 117 | 118 | void stop() { 119 | start_executing(); // make sure threads have been started, or we will wait forever in barrier 120 | barrier_protocol.barrier(*threads[0]); 121 | 122 | for (int i = 1; i < get_num_cpus(); ++i) 123 | threads[i]->terminate(); 124 | 125 | const unsigned int num_workers(num_cpus-1); 126 | for (unsigned int i = 0; i < num_workers; ++i) 127 | workerthreads[i]->join(); 128 | for (unsigned int i = 0; i < num_workers; ++i) 129 | delete workerthreads[i]; 130 | for (unsigned int i = 0; i < num_cpus; ++i) 131 | delete threads[i]; 132 | 133 | delete [] threads; 134 | delete [] task_queues; 135 | } 136 | 137 | void start_executing() { 138 | if (workers_start_paused()) 139 | lock_workers_initialized.unlock(); 140 | } 141 | 142 | TaskQueue **get_task_queues() const { return const_cast(&task_queues[0]); } 143 | TaskExecutor *get_worker(int i) { return threads[i]; } 144 | int get_num_cpus() { return num_cpus; } 145 | }; 146 | 147 | } // namespace sg 148 | 149 | #endif // SG_THREADINGMANAGER_DEFAULT_HPP_INCLUDED 150 | -------------------------------------------------------------------------------- /include/sg/option/threadingmanager_omp.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_THREADINGMANAGER_OMP_HPP_INCLUDED 2 | #define SG_THREADINGMANAGER_OMP_HPP_INCLUDED 3 | 4 | #include "sg/platform/openmputil.hpp" 5 | #include "sg/platform/atomic.hpp" 6 | #include "sg/core/spinlock.hpp" 7 | #include 8 | 9 | namespace sg { 10 | 11 | template class BarrierProtocol; 12 | template class SuperGlue; 13 | template class TaskExecutor; 14 | 15 | template 16 | class ThreadingManagerOMP { 17 | typedef typename Options::ReadyListType TaskQueue; 18 | typedef typename Options::ThreadingManagerType ThreadingManager; 19 | 20 | private: 21 | SpinLock startup_lock; 22 | SpinLock lock_workers_initialized; 23 | 24 | int start_counter; 25 | char padding1[Options::CACHE_LINE_SIZE]; 26 | int num_cpus; 27 | 28 | public: 29 | enum { MAIN_THREAD_ID = 0, WORKER_THREAD_ID_BASE = 1 }; 30 | BarrierProtocol barrier_protocol; 31 | TaskExecutor **threads; 32 | TaskQueue **task_queues; 33 | 34 | 35 | private: 36 | static bool workers_start_paused(typename Options::Disable) { return false; } 37 | static bool workers_start_paused(typename Options::Enable) { return true; } 38 | static bool workers_start_paused() { return workers_start_paused(typename Options::PauseExecution()); } 39 | 40 | static int decide_num_cpus() { return omp_get_num_threads(); } 41 | static int get_thread_num() { return omp_get_thread_num(); } 42 | 43 | void init_master() { 44 | Options::ThreadAffinity::pin_main_thread(); 45 | num_cpus = decide_num_cpus(); 46 | threads = new TaskExecutor *[num_cpus]; 47 | task_queues = new TaskQueue*[num_cpus]; 48 | 49 | threads[0] = new TaskExecutor(0, *this); 50 | task_queues[0] = &threads[0]->get_task_queue(); 51 | 52 | startup_lock.unlock(); 53 | 54 | while (start_counter != num_cpus) 55 | Atomic::rep_nop(); 56 | 57 | if (!workers_start_paused()) 58 | lock_workers_initialized.unlock(); 59 | } 60 | 61 | void init_worker(int id) { 62 | Options::ThreadAffinity::pin_workerthread(id); 63 | // allocate Worker on thread 64 | TaskExecutor *te = new TaskExecutor(id, *this); 65 | 66 | // wait until main thread has initialized the threadmanager 67 | startup_lock.lock(); 68 | startup_lock.unlock(); 69 | 70 | threads[id] = te; 71 | task_queues[id] = &te->get_task_queue(); 72 | 73 | Atomic::increase(&start_counter); 74 | 75 | lock_workers_initialized.lock(); 76 | lock_workers_initialized.unlock(); 77 | 78 | te->work_loop(); 79 | } 80 | 81 | public: 82 | ThreadingManagerOMP(int req = -1) 83 | : start_counter(1), 84 | barrier_protocol(*static_cast(this)) 85 | { 86 | assert(req == -1); 87 | 88 | // initialize static singleton before starting threads 89 | Options::LogDAG::get_dag_data(); 90 | 91 | // In the OpenMP backend, the ThreadManager is instantiated 92 | // in a serial section, so in contrast to the threaded backend 93 | // we do not know the number of threads here. 94 | // Instead we find out when we are called again in thread_main() 95 | startup_lock.lock(); 96 | lock_workers_initialized.lock(); 97 | 98 | Options::ThreadAffinity::init(); 99 | } 100 | 101 | void init() { 102 | const int id(get_thread_num()); 103 | if (id == 0) 104 | init_master(); 105 | else 106 | init_worker(id); 107 | } 108 | 109 | void stop() { 110 | if (get_thread_num() != 0) { 111 | 112 | // workers don't come here until terminate() has been called 113 | 114 | int nv = Atomic::decrease_nv(&start_counter); 115 | 116 | // wait until all workers reached this step 117 | // all threads must agree that we are shutting 118 | // down before we can continue and invoke the 119 | // destructor 120 | startup_lock.lock(); 121 | startup_lock.unlock(); 122 | return; 123 | } 124 | 125 | start_executing(); // make sure threads have been started, or we will wait forever in barrier 126 | barrier_protocol.barrier(*threads[0]); 127 | 128 | startup_lock.lock(); 129 | 130 | for (int i = 1; i < get_num_cpus(); ++i) 131 | threads[i]->terminate(); 132 | 133 | 134 | // wait for all threads to join 135 | while (start_counter != 1) 136 | Atomic::rep_nop(); 137 | 138 | // signal that threads can destruct 139 | startup_lock.unlock(); 140 | 141 | for (int i = 1; i < get_num_cpus(); ++i) 142 | delete threads[i]; 143 | 144 | delete [] threads; 145 | delete [] task_queues; 146 | } 147 | 148 | void start_executing() { 149 | if (workers_start_paused()) { 150 | if (lock_workers_initialized.is_locked()) 151 | lock_workers_initialized.unlock(); 152 | } 153 | } 154 | 155 | TaskQueue **get_task_queues() const { return const_cast(&task_queues[0]); } 156 | TaskExecutor *get_worker(int i) { return threads[i]; } 157 | int get_num_cpus() { return num_cpus; } 158 | }; 159 | 160 | } // namespace sg 161 | 162 | #endif // SG_THREADINGMANAGER_OMP_HPP_INCLUDED 163 | 164 | -------------------------------------------------------------------------------- /include/sg/platform/affinity.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_AFFINITY_HPP_INCLUDED 2 | #define SG_AFFINITY_HPP_INCLUDED 3 | 4 | #include 5 | 6 | #ifdef __linux__ 7 | #include 8 | #include 9 | #endif 10 | #ifdef __sun 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #endif 17 | #ifdef _WIN32 18 | #define NOMINMAX 19 | #include 20 | #endif 21 | 22 | namespace sg { 23 | 24 | // =========================================================================== 25 | // affinity_cpu_set: basically a wrapper around cpu_set_t. 26 | // =========================================================================== 27 | 28 | #ifdef __linux__ 29 | 30 | struct affinity_cpu_set { 31 | cpu_set_t cpu_set; 32 | affinity_cpu_set() { 33 | CPU_ZERO(&cpu_set); 34 | } 35 | void set(int cpu) { 36 | CPU_SET(cpu, &cpu_set); 37 | } 38 | }; 39 | 40 | #elif __sun 41 | 42 | // On Solaris, threads are only pinned to a single thread, 43 | // the last one set in cpu_set. 44 | struct affinity_cpu_set { 45 | int cpu_id; 46 | void set(int cpu) { 47 | cpu_id = cpu; 48 | } 49 | }; 50 | 51 | #elif __APPLE__ 52 | 53 | struct affinity_cpu_set { 54 | void set(int) {} 55 | }; 56 | 57 | #elif _WIN32 58 | 59 | struct affinity_cpu_set { 60 | DWORD_PTR cpu_set; 61 | affinity_cpu_set() : cpu_set(0) {} 62 | void set(int cpu) { 63 | cpu_set |= static_cast(1) << static_cast(cpu); 64 | } 65 | }; 66 | 67 | #else 68 | #error Not implemented 69 | #endif 70 | 71 | // =========================================================================== 72 | // ThreadAffinity 73 | // =========================================================================== 74 | 75 | struct ThreadAffinity { 76 | #ifdef __sun 77 | static void set_affinity(affinity_cpu_set &cpu_set) { 78 | assert(processor_bind(P_LWPID, P_MYID, cpu_set.cpu_id, NULL) == 0); 79 | } 80 | #elif __linux__ 81 | static void set_affinity(affinity_cpu_set &cpu_set) { 82 | assert(sched_setaffinity(0, sizeof(cpu_set.cpu_set), &cpu_set.cpu_set) == 0); 83 | } 84 | #elif __APPLE__ 85 | static void set_affinity(affinity_cpu_set &) { 86 | // setting cpu affinity not supported on mac 87 | } 88 | #elif _WIN32 89 | static void set_affinity(affinity_cpu_set &cpu_set) { 90 | SetThreadAffinityMask(GetCurrentThread(), cpu_set.cpu_set); 91 | } 92 | #else 93 | #error Not implemented 94 | #endif 95 | }; 96 | 97 | } // namespace sg 98 | 99 | #endif // SG_AFFINITY_HPP_INCLUDED 100 | -------------------------------------------------------------------------------- /include/sg/platform/gettime.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_GETTIME_HPP_INCLUDED 2 | #define SG_GETTIME_HPP_INCLUDED 3 | 4 | namespace Time { 5 | 6 | // 7 | // Routines for getting the current time. 8 | // 9 | // Defines: 10 | // typedef TimeUnit 11 | // TimeUnit getTime() // returns current time 12 | // TimeUnit getFreq() // returns time units per second 13 | // 14 | 15 | typedef unsigned long long TimeUnit; 16 | 17 | #if defined(_MSC_VER) // MICROSOFT VISUAL C++ 18 | 19 | #define NOMINMAX 20 | #include 21 | static inline TimeUnit getTime() { 22 | return __rdtsc(); 23 | } 24 | static inline TimeUnit getTimeStart() { return getTime(); } 25 | static inline TimeUnit getTimeStop() { return getTime(); } 26 | 27 | #elif defined(__x86_64__) 28 | 29 | static inline TimeUnit getTime() { 30 | unsigned hi, lo; 31 | 32 | __asm__ __volatile__ ("rdtsc\n" 33 | : "=a" (lo), "=d" (hi) 34 | :: "%rbx", "%rcx"); 35 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 36 | } 37 | 38 | static inline TimeUnit getTimeStart() { 39 | unsigned hi, lo; 40 | 41 | __asm__ __volatile__ ("cpuid\n" 42 | "rdtsc\n" 43 | : "=a" (lo), "=d" (hi) 44 | :: "%rbx", "%rcx"); 45 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 46 | } 47 | 48 | static inline TimeUnit getTimeStop() { 49 | unsigned hi, lo; 50 | 51 | __asm__ __volatile__ ("rdtscp\n" 52 | "mov %%edx, %0\n\t" 53 | "mov %%eax, %1\n\t" 54 | "cpuid\n" 55 | : "=r" (hi), "=r" (lo) 56 | :: "%rax", "%rbx", "%rcx", "%rdx"); 57 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 58 | } 59 | 60 | #else 61 | 62 | #include 63 | static inline TimeUnit getTime() { 64 | timeval tv; 65 | gettimeofday(&tv, 0); 66 | return (tv.tv_sec*(unsigned long long )1000000+tv.tv_usec); 67 | } 68 | 69 | static inline TimeUnit getTimeStart() { return getTime(); } 70 | static inline TimeUnit getTimeStop() { return getTime(); } 71 | 72 | #endif 73 | 74 | } // namespace Time 75 | 76 | #endif // SG_GETTIME_HPP_INCLUDED 77 | -------------------------------------------------------------------------------- /include/sg/platform/openmputil.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_OPENMPUTIL_HPP_INCLUDED 2 | #define SG_OPENMPUTIL_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace sg { 7 | 8 | class OpenMPUtil { 9 | public: 10 | static int get_num_cpus() { return omp_get_num_threads(); } 11 | static int get_current_thread_id() { return omp_get_thread_num(); } 12 | }; 13 | 14 | } // namespace sg 15 | 16 | #endif // SG_OPENMPUTIL_HPP_INCLUDED 17 | -------------------------------------------------------------------------------- /include/sg/platform/platform.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_PLATFORM_HPP_INCLUDED 2 | #define SG_PLATFORM_HPP_INCLUDED 3 | 4 | #include 5 | #include 6 | 7 | #ifdef _WIN32 8 | #define SG_INLINE __forceinline 9 | #else 10 | #define SG_INLINE __attribute__((always_inline)) 11 | #endif 12 | 13 | #if defined(_MSC_VER) 14 | #define SG_TLS __declspec( thread ) 15 | #else 16 | #define SG_TLS __thread 17 | #endif 18 | 19 | std::string sg_getenv(const char *env) { 20 | #ifdef _WIN32 21 | char *buf; 22 | size_t len; 23 | errno_t err = _dupenv_s(&buf, &len, env); 24 | if (err || buf == NULL) 25 | return ""; 26 | std::string ret(buf); 27 | free(buf); 28 | return ret; 29 | #else 30 | const char *ret = getenv(env); 31 | return ret ? ret : ""; 32 | #endif 33 | } 34 | 35 | 36 | #endif // SG_PLATFORM_HPP_INCLUDED 37 | -------------------------------------------------------------------------------- /include/sg/platform/threads.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_THREADS_HPP_INCLUDED 2 | #define SG_THREADS_HPP_INCLUDED 3 | 4 | // =========================================================================== 5 | // Defines Threads interface, and contains platform specific threading code 6 | // =========================================================================== 7 | 8 | #include 9 | 10 | #ifdef __linux__ 11 | #include 12 | #include 13 | #include 14 | #endif 15 | #ifdef __sun 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #endif 24 | #ifdef _WIN32 25 | #define NOMINMAX 26 | #include 27 | #endif 28 | 29 | #ifndef _WIN32 30 | #define PTHREADS 31 | typedef pthread_t ThreadType; 32 | #else 33 | typedef HANDLE ThreadType; 34 | #endif 35 | 36 | 37 | namespace sg { 38 | 39 | namespace detail { 40 | 41 | #ifdef _WIN32 42 | DWORD WINAPI spawn_thread(LPVOID arg); 43 | #else 44 | extern "C" void *spawn_thread(void *arg); 45 | #endif 46 | 47 | } // namespace detail 48 | 49 | // =========================================================================== 50 | // Thread 51 | // =========================================================================== 52 | class Thread { 53 | private: 54 | Thread(const Thread &); 55 | const Thread &operator=(const Thread &); 56 | 57 | protected: 58 | ThreadType thread; 59 | 60 | public: 61 | Thread() {} 62 | virtual ~Thread() {}; 63 | virtual void run() = 0; 64 | 65 | // void start() 66 | 67 | #ifdef _WIN32 68 | void start() { 69 | DWORD threadID; 70 | Thread *this_(static_cast(this)); 71 | this_->thread = CreateThread(NULL, 0, &detail::spawn_thread, this_, 0, &threadID); 72 | } 73 | #else 74 | void start() { 75 | pthread_attr_t attr; 76 | pthread_attr_init(&attr); 77 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 78 | Thread *this_(static_cast(this)); 79 | assert(pthread_create(&this_->thread, &attr, &detail::spawn_thread, this_) == 0); 80 | pthread_attr_destroy(&attr); 81 | } 82 | #endif 83 | 84 | // void join() 85 | 86 | #ifdef _WIN32 87 | void join() { 88 | Thread *this_(static_cast(this)); 89 | WaitForSingleObject(this_->thread, INFINITE); 90 | } 91 | #else 92 | void join() { 93 | Thread *this_(static_cast(this)); 94 | void *status; 95 | pthread_join(this_->thread, &status); 96 | } 97 | #endif 98 | 99 | }; 100 | 101 | namespace detail { 102 | 103 | #ifdef _WIN32 104 | DWORD WINAPI spawn_thread(LPVOID arg) { 105 | Thread *t = static_cast(arg); 106 | t->run(); 107 | return 0; 108 | } 109 | #else 110 | extern "C" void *spawn_thread(void *arg) { 111 | Thread *t = static_cast(arg); 112 | t->run(); 113 | return 0; 114 | } 115 | #endif 116 | 117 | } // namespace detail 118 | 119 | } // namespace sg 120 | 121 | #endif // SG_THREADS_HPP_INCLUDED 122 | -------------------------------------------------------------------------------- /include/sg/platform/threadutil.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_THREADUTIL_HPP_INCLUDED 2 | #define SG_THREADUTIL_HPP_INCLUDED 3 | 4 | #ifdef __linux__ 5 | #include 6 | #include 7 | #include 8 | #endif 9 | #ifdef __sun 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #endif 18 | #ifdef _WIN32 19 | #define NOMINMAX 20 | #include 21 | #endif 22 | 23 | namespace sg { 24 | 25 | #ifndef _WIN32 26 | #define PTHREADS 27 | typedef pthread_t ThreadIDType; 28 | #else 29 | typedef DWORD ThreadIDType; 30 | #endif 31 | 32 | // =========================================================================== 33 | // ThreadUtil: Utility functions 34 | // =========================================================================== 35 | class ThreadUtil { 36 | public: 37 | 38 | // static int get_num_cpus() 39 | 40 | #if defined(__sun) 41 | static int get_num_cpus() { 42 | int numCPUs = (processorid_t) sysconf(_SC_NPROCESSORS_ONLN); 43 | int online = 0; 44 | for (int i = 0; i < numCPUs; ++i) 45 | if (p_online(i, P_STATUS) == P_ONLINE) 46 | ++online; 47 | return online; 48 | } 49 | #elif defined(__linux__) || defined(__APPLE__) 50 | static int get_num_cpus() { return (int) sysconf(_SC_NPROCESSORS_ONLN); } 51 | #elif defined(_WIN32) 52 | static int get_num_cpus() { 53 | SYSTEM_INFO m_si = {0}; 54 | GetSystemInfo(&m_si); 55 | return (int) m_si.dwNumberOfProcessors; 56 | } 57 | #else 58 | #error Not implemented for this platform 59 | #endif 60 | 61 | // static ThreadIDType get_current_thread_id() 62 | 63 | #ifdef PTHREADS 64 | static ThreadIDType get_current_thread_id() { return pthread_self(); } 65 | #elif _WIN32 66 | static ThreadIDType get_current_thread_id() { return GetCurrentThreadId(); } 67 | #else 68 | #error Not implemented for this platform 69 | #endif 70 | }; 71 | 72 | } // namespace sg 73 | 74 | #endif // SG_THREADUTIL_HPP_INCLUDED 75 | -------------------------------------------------------------------------------- /include/sg/platform/tls.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TLS_HPP_INCLUDED 2 | #define SG_TLS_HPP_INCLUDED 3 | 4 | #if defined(_POSIX_THREADS) 5 | #include 6 | #elif defined(_WIN32) 7 | #include 8 | #endif 9 | 10 | namespace sg { 11 | 12 | #if defined(_POSIX_THREADS) 13 | 14 | template 15 | class tls_data { 16 | private: 17 | pthread_key_t key; 18 | 19 | tls_data(const tls_data &); 20 | tls_data &operator=(const tls_data &); 21 | 22 | public: 23 | tls_data() { 24 | assert( pthread_key_create(&key, NULL) == 0); 25 | } 26 | ~tls_data() { 27 | pthread_key_delete(key); 28 | } 29 | T *get() { 30 | return (T *) pthread_getspecific(key); 31 | } 32 | void set(const T *value) { 33 | pthread_setspecific(key, (void *) value); 34 | } 35 | }; 36 | 37 | #elif defined(_WIN32) 38 | 39 | template 40 | class tls_data { 41 | private: 42 | DWORD key; 43 | 44 | tls_data(const tls_data &); 45 | tls_data &operator=(const tls_data &); 46 | 47 | public: 48 | tls_data() { 49 | key = TlsAlloc(); 50 | } 51 | ~tls_data() { 52 | TlsFree(key); 53 | } 54 | T *get() { 55 | return (T *) TlsGetValue(key);; 56 | } 57 | void set(const T *value) { 58 | TlsSetValue(key, (void *) value); 59 | } 60 | }; 61 | 62 | #else 63 | 64 | #error Not Tested 65 | 66 | template 67 | class tls_data { 68 | private: 69 | __thread T *value; 70 | 71 | tls_data(const tls_data &); 72 | tls_data &operator=(const tls_data &); 73 | 74 | public: 75 | T *get() { return value; } 76 | void set(const T *value_) { value = value_; } 77 | }; 78 | 79 | #endif 80 | 81 | } // namespace sg 82 | 83 | #endif // SG_TLS_HPP_INCLUDED 84 | -------------------------------------------------------------------------------- /include/sg/superglue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_SUPERGLUE_HPP_INCLUDED 2 | #define SG_SUPERGLUE_HPP_INCLUDED 3 | 4 | #include "sg/core/access.hpp" 5 | #include "sg/core/accessutil.hpp" 6 | #include "sg/core/handle.hpp" 7 | #include "sg/core/schedulerver.hpp" 8 | #include "sg/core/task.hpp" 9 | #include "sg/core/taskexecutor.hpp" 10 | #include "sg/core/taskqueue.hpp" 11 | #include "sg/core/versionqueue.hpp" 12 | #include "sg/core/supergluebase.hpp" 13 | #include "sg/core/defaults.hpp" 14 | 15 | using namespace sg; 16 | 17 | #endif // SG_SUPERGLUE_HPP_INCLUDED 18 | -------------------------------------------------------------------------------- /make.bat: -------------------------------------------------------------------------------- 1 | mkdir build 2 | pushd build 3 | 4 | cl /nologo /EHsc /I ..\include ..\test\main.cpp 5 | .\main.exe 6 | 7 | for %%F in (..\test\modular\*.cpp) do ( 8 | cl /nologo /EHsc /I ..\include %%F 9 | ) 10 | 11 | for %%F in (..\examples\src\*.cpp) do ( 12 | cl /nologo /EHsc /I ..\include %%F 13 | ) 14 | 15 | for %%F in (*.exe) do ( 16 | %%F 17 | ) 18 | 19 | popd 20 | -------------------------------------------------------------------------------- /scripts/status.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | def load_file(filename): 6 | fh = open(filename, "r") 7 | out = list() 8 | while True: 9 | line = fh.readline() 10 | if line.startswith("#"): 11 | continue; 12 | if not line: 13 | break 14 | words = line.split() 15 | 16 | if words[4].isdigit(): 17 | words[4] = words[5] 18 | 19 | name = words[4].strip() 20 | out.append({'name': name, 21 | 'procid': int(words[0]), 22 | 'threadid': int(words[1][:-1]), 23 | 'start': int(words[2]), 24 | 'length': int(words[3]), 25 | 'end': int(words[2]) + int(words[3])}) 26 | fh.close() 27 | return out 28 | 29 | filename = "schedule.dat" 30 | if len(sys.argv) > 1: 31 | filename = sys.argv[1] 32 | 33 | tasks = load_file(filename) 34 | 35 | procids = [task['procid'] for task in tasks] 36 | procids = set(procids) 37 | 38 | min_time = dict() 39 | for procid in procids: 40 | min_time[procid] = min([task['start'] for task in tasks if task['procid'] == procid]) 41 | 42 | for task in tasks: 43 | task['start'] -= min_time[task['procid']] 44 | task['end'] -= min_time[task['procid']] 45 | 46 | tasum = dict() 47 | for task in tasks: 48 | if task['name'] in tasum: 49 | (num, length) = tasum[task['name']] 50 | tasum[task['name']] = (num + 1, length + task['length']/1000000.0) 51 | else: 52 | tasum[task['name']] = (1, task['length']/1000000.0) 53 | 54 | threadids = set() 55 | for task in tasks: 56 | threadids.add(task['threadid']) 57 | num_threads = len(threadids) 58 | 59 | total = sum([task['length'] for task in tasks]) / 1000000.0 60 | end_time = max([task['end'] for task in tasks]) / 1000000.0 61 | 62 | full_time = end_time * num_threads * len(procids) 63 | idle_time = full_time - total 64 | 65 | s = sorted(tasum, key=lambda x: tasum[x]) 66 | 67 | def pf(name, num, time): 68 | print("{:16s}".format(name), "{0:10d}".format(num), "{0:10.2f}".format(time), "{0:6.2f} %".format(time/total*100), "{0:6.2f} %".format(time/full_time*100)) 69 | 70 | def pf2(name, time): 71 | print("{:16s}".format(name), "----------", "{0:10.2f}".format(time), "--------", "{0:6.2f} %".format(time/full_time*100)) 72 | 73 | print('end_time ', "{0:10.2f}".format(end_time)) 74 | print('parallelism', "{0:10.2f}".format(total/end_time)) 75 | print('efficiency ', "{0:10.2f} %".format(total/full_time*100)) 76 | print('num threads:', num_threads) 77 | print('total_time:', total) 78 | print() 79 | for i in s: 80 | (num, length) = tasum[i] 81 | pf(i, num, length) 82 | pf2('idle', idle_time) 83 | -------------------------------------------------------------------------------- /test/fail/Makefile: -------------------------------------------------------------------------------- 1 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -I ../../superglue -pthread 2 | 3 | CPP_FILES=$(wildcard *.cpp) 4 | TESTS=$(patsubst %.cpp,%.test,$(CPP_FILES)) 5 | 6 | all: $(TESTS) 7 | 8 | %.test: %.cpp 9 | $(CXX) $(FLAGS) $< 2> /dev/null ; [ $$? -ne 0 ] 10 | -------------------------------------------------------------------------------- /test/fail/sanity_fail_lockable.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | 3 | #include 4 | 5 | struct Options : public DefaultOptions { 6 | typedef Disable Lockable; 7 | }; 8 | 9 | int main() { 10 | SuperGlue sg; 11 | return 0; 12 | } 13 | 14 | -------------------------------------------------------------------------------- /test/fail/sanity_fail_signed.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | 3 | #include 4 | 5 | struct Options : public DefaultOptions { 6 | typedef int version_t; 7 | }; 8 | 9 | int main() { 10 | SuperGlue sg; 11 | return 0; 12 | } 13 | 14 | -------------------------------------------------------------------------------- /test/main.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | 3 | class TestCase { 4 | public: 5 | typedef bool (*testfunction)(std::string &name); 6 | 7 | virtual testfunction *get(size_t &num) = 0; 8 | virtual std::string get_name() = 0; 9 | virtual ~TestCase() {} 10 | }; 11 | 12 | #include "unit/test_handle.hpp" 13 | #include "unit/test_access.hpp" 14 | #include "unit/test_schedver.hpp" 15 | #include "unit/test_taskqueue.hpp" 16 | #include "unit/test_taskqueuedeque.hpp" 17 | #include "unit/test_taskqueueprio.hpp" 18 | #include "unit/test_tasks.hpp" 19 | #include "unit/test_locks.hpp" 20 | #include "unit/test_listqueue.hpp" 21 | #include "unit/test_rwc.hpp" 22 | #include "unit/test_subtasks.hpp" 23 | 24 | int main(int argc, char *argv[]) { 25 | size_t num_tests = 0; 26 | size_t num_success = 0; 27 | 28 | TestCase *modules[] = { 29 | new TestHandle(), 30 | new TestAccess(), 31 | new TestSchedulerVer(), 32 | new TestTaskQueue(), 33 | new TestTaskQueueDeque(), 34 | new TestTaskQueuePrio(), 35 | new TestTasks(), 36 | new TestLocks(), 37 | new TestListQueue(), 38 | new TestRWC(), 39 | new TestSubtasks() 40 | }; 41 | 42 | for (size_t i = 0; i < sizeof(modules)/sizeof(TestCase*); ++i) { 43 | 44 | size_t num_cases = 0; 45 | TestCase::testfunction *tests = modules[i]->get(num_cases); 46 | num_tests += num_cases; 47 | 48 | std::vector failed; 49 | std::string testname; 50 | 51 | std::cout << modules[i]->get_name() << std::flush; 52 | for (size_t j = 0; j < num_cases; ++j) { 53 | if (tests[j](testname)) 54 | ++num_success; 55 | else 56 | failed.push_back(testname); 57 | } 58 | 59 | if (!failed.empty()) { 60 | std::cout << " FAILED: " << modules[i]->get_name() << " [" << failed[0]; 61 | for (size_t j = 1; j < failed.size(); ++j) 62 | std::cout << ", " << failed[j]; 63 | std::cout << "]" << std::endl; 64 | } 65 | else 66 | std::cout << " OK" << std::endl; 67 | delete modules[i]; 68 | } 69 | 70 | std::cout << num_success << "/" << num_tests << std::endl; 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /test/modular/Makefile: -------------------------------------------------------------------------------- 1 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -I ../../include -pthread 2 | 3 | CPP_FILES=$(wildcard *.cpp) 4 | TESTS=$(patsubst %.cpp,%.test,$(CPP_FILES)) 5 | 6 | all: $(TESTS) 7 | 8 | %.test: %.cpp 9 | $(CXX) $(FLAGS) $< && ./a.out 10 | -------------------------------------------------------------------------------- /test/modular/atomic.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/platform/atomic.hpp" 2 | #include 3 | 4 | using namespace sg; 5 | 6 | int main() { 7 | 8 | // atomic increase and decrease 9 | 10 | int a = 0; 11 | Atomic::increase(&a); 12 | assert(a == 1); 13 | Atomic::decrease(&a); 14 | assert(a == 0); 15 | assert(Atomic::add_nv(&a, 2) == 2); 16 | assert(Atomic::increase_nv(&a) == 3); 17 | assert(Atomic::decrease_nv(&a) == 2); 18 | 19 | // compare and swap 20 | 21 | assert(Atomic::cas(&a, 0, 4) == 2); 22 | assert(Atomic::cas(&a, 2, 3) == 2); 23 | assert(a == 3); 24 | assert(Atomic::swap(&a, 5) == 3); 25 | assert(a == 5); 26 | 27 | int *ptr = (int *)2; 28 | assert(Atomic::cas(&ptr, (int *)0, (int *)4) == (int *)2); 29 | assert(Atomic::cas(&ptr, (int *)2, (int *)3) == (int *)2); 30 | assert(ptr == (int *)3); 31 | assert(Atomic::swap(&ptr, (int *)5) == (int *)3); 32 | assert(ptr == (int *)5); 33 | 34 | // lock primitives 35 | 36 | unsigned int lock = 1; 37 | assert(!Atomic::lock_test_and_set(&lock)); 38 | Atomic::lock_release(&lock); 39 | assert(lock == 0); 40 | assert(Atomic::lock_test_and_set(&lock)); 41 | assert(lock == 1); 42 | 43 | // barriers etc 44 | 45 | Atomic::memory_fence_producer(); 46 | Atomic::memory_fence_consumer(); 47 | Atomic::yield(); 48 | Atomic::rep_nop(); 49 | Atomic::compiler_fence(); 50 | return 0; 51 | } 52 | 53 | -------------------------------------------------------------------------------- /test/modular/gettime.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/platform/gettime.hpp" 2 | #include 3 | 4 | int main() { 5 | Time::TimeUnit t0 = Time::getTime(); 6 | Time::TimeUnit t1 = Time::getTime(); 7 | assert(t0 <= t1); 8 | return 0; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /test/modular/logging.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include "sg/option/log.hpp" 3 | 4 | struct Options : public DefaultOptions {}; 5 | 6 | int main() { 7 | Log::register_thread(0); 8 | Log::log("Test", 0, 1); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /test/modular/minimal.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | 3 | struct Options : public DefaultOptions {}; 4 | 5 | int main() { 6 | SuperGlue sg; 7 | return 0; 8 | } 9 | 10 | -------------------------------------------------------------------------------- /test/modular/sanity_lockable.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | 3 | #include "sg/option/access_readwrite.hpp" 4 | 5 | struct Options : public DefaultOptions { 6 | typedef Disable Lockable; 7 | typedef ReadWrite AccessInfoType; 8 | }; 9 | 10 | struct MyTask : Task { 11 | Handle &h; 12 | MyTask(Handle &h_) : h(h_) { 13 | register_access(ReadWrite::read, h); 14 | } 15 | void run() {} 16 | }; 17 | 18 | int main() { 19 | SuperGlue sg; 20 | Handle h; 21 | MyTask t(h); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /test/modular/semaphores.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | #include "sg/option/instr_trace.hpp" 3 | 4 | struct Options : public DefaultOptions { 5 | typedef Trace Instrumentation; 6 | typedef Enable TaskName; 7 | }; 8 | 9 | void wait() { 10 | Time::TimeUnit stop = Time::getTime() + 100000; 11 | while (Time::getTime() < stop) 12 | Atomic::rep_nop(); 13 | } 14 | 15 | struct Req1 : public Task { 16 | Req1(Resource &r) { require(r); } 17 | void run() { wait(); } 18 | std::string get_name() { return "req1"; } 19 | }; 20 | 21 | struct Req3 : public Task { 22 | Req3(Resource &r) { require(r, 3); } 23 | void run() { wait(); } 24 | std::string get_name() { return "req3"; } 25 | }; 26 | 27 | int main() { 28 | SuperGlue sg; 29 | Handle h; 30 | Resource res(7); 31 | 32 | for (int i = 0; i < 1000; ++i) 33 | sg.submit(new Req3(res)); 34 | 35 | for (int i = 0; i < 1000; ++i) 36 | sg.submit(new Req1(res)); 37 | 38 | sg.barrier(); 39 | 40 | Log::dump("trace.log"); 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /test/modular/spinlock.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/core/spinlock.hpp" 2 | #include 3 | 4 | using namespace sg; 5 | 6 | int main() { 7 | SpinLock sl; 8 | assert(!sl.is_locked()); 9 | sl.lock(); 10 | assert(sl.is_locked()); 11 | assert(!sl.try_lock()); 12 | sl.unlock(); 13 | assert(!sl.is_locked()); 14 | assert(sl.try_lock()); 15 | assert(sl.is_locked()); 16 | sl.unlock(); 17 | assert(!sl.is_locked()); 18 | 19 | { 20 | SpinLockScoped lock(sl); 21 | assert(sl.is_locked()); 22 | } 23 | assert(!sl.is_locked()); 24 | 25 | 26 | { 27 | SpinLockTryLock lock(sl); 28 | assert(sl.is_locked()); 29 | { 30 | SpinLockTryLock lock2(sl); 31 | assert(sl.is_locked()); 32 | } 33 | assert(sl.is_locked()); 34 | } 35 | assert(!sl.is_locked()); 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /test/modular/taskqueue_prio.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | 3 | #include "sg/option/taskqueue_prio.hpp" 4 | 5 | struct Options : public DefaultOptions { 6 | typedef TaskQueuePrio ReadyListType; 7 | typedef TaskQueuePrio WaitListType; 8 | }; 9 | 10 | int main() { 11 | SuperGlue sg; 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/modular/threads.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/platform/threads.hpp" 2 | #include "sg/platform/threadutil.hpp" 3 | #include "sg/platform/affinity.hpp" 4 | #include 5 | 6 | using namespace sg; 7 | 8 | bool ok = false; 9 | bool ok2 = false; 10 | 11 | struct thread : public Thread { 12 | virtual void run() { 13 | ok = true; 14 | } 15 | }; 16 | 17 | int main() { 18 | assert(ThreadUtil::get_num_cpus() > 0); 19 | ThreadUtil::get_current_thread_id(); 20 | 21 | thread thr; 22 | thr.start(); 23 | thr.join(); 24 | assert(ok); 25 | 26 | affinity_cpu_set cpu_set; 27 | cpu_set.set(0); 28 | ThreadAffinity::set_affinity(cpu_set); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /test/modular/versionwrap.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/superglue.hpp" 2 | 3 | #include "sg/platform/gettime.hpp" 4 | #include 5 | #include 6 | 7 | struct Options : public DefaultOptions { 8 | typedef unsigned char version_t; 9 | }; 10 | 11 | volatile size_t busy = 0; 12 | 13 | struct MyTask : public Task { 14 | std::string name; 15 | MyTask(Handle &h) { 16 | register_access(ReadWriteAdd::write, h); 17 | } 18 | void run() { 19 | if (busy != 0) { 20 | fprintf(stderr, "FAIL: %d %d\n", get_access(0).get_handle()->version, get_access(0).required_version); 21 | assert(busy == 0); 22 | } 23 | busy = 1; 24 | // delay to make sure several tasks have the chance to run concurrently 25 | Time::TimeUnit stop = Time::getTime() + 100000; 26 | while (Time::getTime() < stop); 27 | busy = 0; 28 | } 29 | }; 30 | 31 | int main() { 32 | const size_t num_tasks = std::numeric_limits::max()/2; 33 | 34 | SuperGlue sg; 35 | 36 | Handle h; 37 | for (size_t i = 0; i < num_tasks/2; ++i) 38 | sg.submit(new MyTask(h)); 39 | sg.barrier(); 40 | 41 | for (size_t i = 0; i < num_tasks; ++i) 42 | sg.submit(new MyTask(h)); 43 | sg.barrier(); 44 | return 0; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /test/modular_dep/Makefile: -------------------------------------------------------------------------------- 1 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -I ../../superglue -pthread 2 | 3 | CPP_FILES=$(wildcard *.cpp) 4 | TESTS=$(patsubst %.cpp,%.test,$(CPP_FILES)) 5 | 6 | all: $(TESTS) 7 | 8 | %.test: %.cpp 9 | $(CXX) $(FLAGS) $< && ./a.out 10 | -------------------------------------------------------------------------------- /test/modular_dep/gettime.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/platform/gettime.hpp" 2 | #include 3 | 4 | int main() { 5 | Time::TimeUnit t0 = Time::getTimeStart(); 6 | Time::TimeUnit t1 = Time::getTime(); 7 | Time::TimeUnit t2 = Time::getTimeStop(); 8 | assert(t0 <= t1); 9 | assert(t1 <= t2); 10 | return 0; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /test/modular_dep/perfcount.cpp: -------------------------------------------------------------------------------- 1 | #include "sg/platform/perfcount.hpp" 2 | 3 | #include 4 | 5 | int main() { 6 | 7 | PerformanceCounter perf_cycles(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); 8 | PerformanceCounter perf_instr(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); 9 | 10 | double d = 1.0; 11 | const size_t num_iterations = 100; 12 | 13 | perf_cycles.start(); 14 | perf_instr.start(); 15 | unsigned long long start_cycles = perf_cycles.readCounter(); 16 | unsigned long long start_instr = perf_instr.readCounter(); 17 | for (size_t i = 0; i < num_iterations; ++i) 18 | d += 1e-5; 19 | perf_cycles.stop(); 20 | perf_instr.stop(); 21 | 22 | unsigned long long stop_cycles = perf_cycles.readCounter(); 23 | unsigned long long stop_instr = perf_instr.readCounter(); 24 | 25 | assert(stop_cycles > start_cycles); 26 | assert(stop_instr > start_instr); 27 | assert((stop_instr - start_instr) < (stop_cycles - start_cycles)); 28 | assert((stop_instr - start_instr) > num_iterations); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /test/unit/test_access.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_ACCESS_HPP_INCLUDED 2 | #define SG_TEST_ACCESS_HPP_INCLUDED 3 | 4 | #include 5 | 6 | class TestAccess : public TestCase { 7 | struct OpDefault : public DefaultOptions {}; 8 | 9 | struct MyContrib { 10 | }; 11 | 12 | struct OpContrib : public DefaultOptions { 13 | typedef Enable Contributions; 14 | typedef MyContrib ContributionType; 15 | }; 16 | 17 | struct OpLockable : public DefaultOptions { 18 | typedef Enable Lockable; 19 | }; 20 | 21 | struct OpAll : public DefaultOptions { 22 | typedef Enable Contributions; 23 | typedef Enable Lockable; 24 | typedef MyContrib ContributionType; 25 | }; 26 | 27 | static bool testContrib(std::string &name) { name = "testContrib"; 28 | Access h1, h2; 29 | 30 | if (h1.use_contrib()) return false; 31 | h1.set_use_contrib(true); 32 | if (!h1.use_contrib()) return false; 33 | if (h2.use_contrib()) return false; 34 | 35 | return true; 36 | } 37 | 38 | static bool testLockable(std::string &name) { name = "testLockable"; 39 | Handle h1, h2; 40 | Access a11(&h1, 0), a12(&h1, 0); 41 | Access a21(&h2, 0), a22(&h2, 0); 42 | 43 | if (a11.needs_lock()) return false; 44 | 45 | // if not needslock -- always succeeds 46 | if (!a11.get_lock()) return false; 47 | if (!a11.get_lock()) return false; 48 | 49 | a11.set_required_quantity(1); 50 | if (!a11.needs_lock()) return false; 51 | if (a12.needs_lock()) return false; 52 | 53 | // now needs lock -- first grab should succeed, second fail 54 | if (!a11.get_lock()) return false; 55 | if (a11.get_lock()) return false; 56 | 57 | 58 | // different access pointing to same handle should also fail 59 | // but only if it needs locks 60 | if (!a12.get_lock()) return false; 61 | if (!a12.get_lock()) return false; 62 | 63 | // now should fail as a11 already has the lock 64 | a12.set_required_quantity(1); 65 | if (a12.get_lock()) return false; 66 | 67 | return true; 68 | } 69 | 70 | static bool testCombos(std::string &name) { name = "testCombos"; 71 | Access h1; 72 | return true; 73 | } 74 | 75 | public: 76 | 77 | std::string get_name() { return "TestAccess"; } 78 | 79 | testfunction *get(size_t &numTests) { 80 | static testfunction tests[] = { 81 | testContrib, testLockable, testCombos 82 | }; 83 | numTests = sizeof(tests)/sizeof(testfunction); 84 | return tests; 85 | } 86 | }; 87 | 88 | #endif // SG_TEST_ACCESS_HPP_INCLUDED 89 | -------------------------------------------------------------------------------- /test/unit/test_handle.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_HANDLE_HPP_INCLUDED 2 | #define SG_TEST_HANDLE_HPP_INCLUDED 3 | 4 | #include 5 | 6 | class TestHandle : public TestCase { 7 | struct OpDefault : public DefaultOptions {}; 8 | 9 | struct OpName : public DefaultOptions { 10 | typedef Enable HandleName; 11 | }; 12 | struct OpID: public DefaultOptions { 13 | typedef Enable HandleId; 14 | }; 15 | struct OpLockable : public DefaultOptions { 16 | typedef ReadWriteAdd AccessInfoType; 17 | typedef Enable Lockable; 18 | }; 19 | struct OpSubTasks : public DefaultOptions { 20 | typedef Enable SubTasks; 21 | }; 22 | struct OpAll : public DefaultOptions { 23 | typedef ReadWriteAdd AccessInfoType; 24 | typedef Enable SubTasks; 25 | typedef Enable HandleName; 26 | typedef Enable HandleId; 27 | typedef Enable Lockable; 28 | }; 29 | 30 | static bool testName(std::string &name) { name = "testName"; 31 | Handle h1, h2; 32 | h1.set_name("A"); 33 | h2.set_name("B"); 34 | return h1.get_name() == std::string("A") 35 | && h2.get_name() == std::string("B"); 36 | } 37 | 38 | static bool testId(std::string &name) { name = "testId"; 39 | Handle h1, h2; 40 | return (h1.get_global_id() != h2.get_global_id()); 41 | } 42 | 43 | static bool testLockable(std::string &name) { name = "testLockable"; 44 | Handle h0; 45 | Handle h1; 46 | 47 | // check that handle cannot be locked twice 48 | if (!h1.get_lock(1)) 49 | return false; 50 | if (h1.get_lock(1)) 51 | return false; 52 | 53 | // check that public lock api is available 54 | (void) &Handle::get_lock_or_notify; 55 | (void) &Handle::release_lock; 56 | (void) &Handle::increase_current_version_no_unlock; 57 | 58 | return true; 59 | } 60 | 61 | static bool testSubTasks(std::string &name) { name = "testSubTasks"; 62 | // just test compilation 63 | Handle h1; 64 | return true; 65 | } 66 | 67 | static bool testCombos(std::string &name) { name = "testCombos"; 68 | Handle h1; 69 | return true; 70 | } 71 | 72 | public: 73 | std::string get_name() { return "TestHandle"; } 74 | 75 | testfunction *get(size_t &numTests) { 76 | static testfunction tests[] = { 77 | testName, testId, testLockable, testSubTasks, testCombos 78 | }; 79 | numTests = sizeof(tests)/sizeof(testfunction); 80 | return tests; 81 | } 82 | }; 83 | 84 | #endif // SG_TEST_HANDLE_HPP_INCLUDED 85 | 86 | -------------------------------------------------------------------------------- /test/unit/test_listqueue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_LISTQUEUE_HPP_INCLUDED 2 | #define SG_TEST_LISTQUEUE_HPP_INCLUDED 3 | 4 | #include 5 | 6 | class TestListQueue : public TestCase { 7 | struct OpPaused : public DefaultOptions { 8 | typedef Enable PauseExecution; 9 | typedef ReadWrite AccessInfoType; 10 | typedef Disable Locking; 11 | }; 12 | 13 | class DepTask : public Task { 14 | private: 15 | size_t *value; 16 | bool *success; 17 | size_t idx; 18 | public: 19 | DepTask(Handle &h, size_t *value_, bool *success_, size_t idx_) 20 | : value(value_), success(success_), idx(idx_) { 21 | register_access(ReadWrite::write, h); 22 | } 23 | void run() { 24 | if (*value != idx) 25 | *success = false; 26 | ++(*value); 27 | } 28 | }; 29 | 30 | static bool testDependent(std::string &name) { name = "testDependent"; 31 | { 32 | SuperGlue sg; 33 | Handle h; 34 | size_t value = 0; 35 | bool success = true; 36 | for (size_t i = 0; i < 1000; ++i) 37 | sg.submit(new DepTask(h, &value, &success, i)); 38 | if (value != 0) 39 | return false; 40 | sg.start_executing(); 41 | sg.barrier(); 42 | 43 | if (value != 1000) 44 | return false; 45 | 46 | if (!success) 47 | return false; 48 | } 49 | 50 | { 51 | SuperGlue sg; 52 | Handle *h2 = new Handle[1000]; 53 | size_t value = 0; 54 | bool success = true; 55 | for (size_t i = 0; i < 1000; ++i) 56 | sg.submit(new DepTask(h2[i], &value, &success, i)); 57 | if (value != 0) { 58 | sg.start_executing(); 59 | return false; 60 | } 61 | 62 | sg.start_executing(); 63 | sg.barrier(); 64 | delete[] h2; 65 | if (value == 0) 66 | return false; 67 | return !success; 68 | } 69 | } 70 | 71 | public: 72 | 73 | std::string get_name() { return "TestListQueue"; } 74 | 75 | testfunction *get(size_t &numTests) { 76 | static testfunction tests[] = { 77 | testDependent 78 | }; 79 | numTests = sizeof(tests)/sizeof(testfunction); 80 | return tests; 81 | } 82 | }; 83 | 84 | #endif // SG_TEST_LISTQUEUE_HPP_INCLUDED 85 | -------------------------------------------------------------------------------- /test/unit/test_locks.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_LOCKS_HPP_INCLUDED 2 | #define SG_TEST_LOCKS_HPP_INCLUDED 3 | 4 | #include 5 | 6 | class TestLocks : public TestCase { 7 | struct OpLockable : public DefaultOptions {}; 8 | 9 | static const char *get_name(OpLockable) { return "testLockable"; } 10 | 11 | template 12 | class MyTask : public Task { 13 | private: 14 | size_t *value; 15 | 16 | public: 17 | MyTask(Handle &h, size_t *value_) : value(value_) { 18 | this->register_access(ReadWriteAdd::add, h); 19 | } 20 | void run() { *value += 1; } 21 | }; 22 | 23 | template 24 | static bool testLockable(std::string &name) { name = get_name(Op()); 25 | 26 | SuperGlue sg; 27 | Handle h; 28 | 29 | size_t value = 0; 30 | 31 | for (size_t i = 0; i < 1000; ++i) 32 | sg.submit(new MyTask(h, &value)); 33 | sg.barrier(); 34 | 35 | return value == 1000; 36 | } 37 | 38 | public: 39 | 40 | std::string get_name() { return "TestLocks"; } 41 | 42 | testfunction *get(size_t &numTests) { 43 | static testfunction tests[] = { 44 | testLockable 45 | }; 46 | numTests = sizeof(tests)/sizeof(testfunction); 47 | return tests; 48 | } 49 | }; 50 | 51 | #endif // SG_TEST_LOCKS_HPP_INCLUDED 52 | 53 | -------------------------------------------------------------------------------- /test/unit/test_rwc.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_RWC_HPP_INCLUDED 2 | #define SG_TEST_RWC_HPP_INCLUDED 3 | 4 | class TestRWC : public TestCase { 5 | struct Options : public DefaultOptions { 6 | typedef ReadWriteConcurrent AccessInfoType; 7 | }; 8 | 9 | class MyTask : public Task { 10 | private: 11 | size_t *flag; 12 | size_t *value; 13 | size_t index; 14 | 15 | public: 16 | MyTask(size_t *flag_, size_t *value_, Handle &h, size_t i) 17 | : flag(flag_), value(value_), index(i) { 18 | register_access(ReadWriteConcurrent::concurrent, h); 19 | } 20 | void run() { 21 | Atomic::decrease(flag); 22 | while (*flag != 0) 23 | Atomic::compiler_fence(); 24 | for (size_t i = 0; i < 100000; ++i) 25 | ++value[index]; 26 | } 27 | }; 28 | 29 | static bool testConcurrent(std::string &name) { name = "testConcurrent"; 30 | SuperGlue sg; 31 | size_t num = (size_t) sg.get_num_cpus(); 32 | size_t flag = num; 33 | std::vector values(num); 34 | Handle h; 35 | for (size_t i = 0; i < num; ++i) 36 | sg.submit(new MyTask(&flag, &values[0], h, i)); 37 | sg.barrier(); 38 | for (size_t i = 0; i < num; ++i) 39 | if (values[i] != 100000) 40 | return false; 41 | return true; 42 | } 43 | 44 | public: 45 | 46 | std::string get_name() { return "TestRWC"; } 47 | 48 | testfunction *get(size_t &numTests) { 49 | static testfunction tests[] = { 50 | testConcurrent 51 | }; 52 | numTests = sizeof(tests)/sizeof(testfunction); 53 | return tests; 54 | } 55 | }; 56 | 57 | #endif // SG_TEST_RWC_HPP_INCLUDED 58 | -------------------------------------------------------------------------------- /test/unit/test_schedver.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_SCHEDVER_HPP_INCLUDED 2 | #define SG_TEST_SCHEDVER_HPP_INCLUDED 3 | 4 | #include "sg/option/access_rwc.hpp" 5 | #include 6 | 7 | using namespace sg; 8 | 9 | class TestSchedulerVer : public TestCase { 10 | class ReadWriteAddMul { 11 | public: 12 | enum Type { read = 0, write, add, mul, num_accesses }; 13 | template struct AccessType {}; 14 | }; 15 | 16 | struct OpDefault : public DefaultOptions { 17 | typedef ReadWriteAdd AccessInfoType; 18 | }; 19 | 20 | struct OpMul : public DefaultOptions { 21 | typedef ReadWriteAddMul AccessInfoType; 22 | }; 23 | 24 | struct OpRWC : public DefaultOptions { 25 | typedef ReadWriteConcurrent AccessInfoType; 26 | }; 27 | 28 | static bool testSchedVer(std::string &name) { name = "testSchedVer"; 29 | SchedulerVersion s; 30 | 31 | if (s.next_version() != 1) return false; 32 | if (s.next_version() != 1) return false; 33 | 34 | if (s.schedule(ReadWriteAdd::read) != 0) return false; 35 | if (s.schedule(ReadWriteAdd::read) != 0) return false; 36 | 37 | if (s.schedule(ReadWriteAdd::add) != 2) return false; 38 | if (s.schedule(ReadWriteAdd::add) != 2) return false; 39 | 40 | if (s.schedule(ReadWriteAdd::write) != 4) return false; 41 | if (s.schedule(ReadWriteAdd::write) != 5) return false; 42 | 43 | if (s.next_version() != 7) return false; 44 | 45 | SchedulerVersion s2; 46 | 47 | if (s2.next_version() != 1) return false; 48 | if (s2.next_version() != 1) return false; 49 | 50 | if (s2.schedule(ReadWriteAddMul::mul) != 0) return false; 51 | if (s2.schedule(ReadWriteAddMul::mul) != 0) return false; 52 | 53 | if (s2.schedule(ReadWriteAddMul::add) != 2) return false; 54 | if (s2.schedule(ReadWriteAddMul::add) != 2) return false; 55 | 56 | if (s2.schedule(ReadWriteAddMul::read) != 4) return false; 57 | if (s2.schedule(ReadWriteAddMul::read) != 4) return false; 58 | 59 | if (s2.schedule(ReadWriteAddMul::write) != 6) return false; 60 | if (s2.schedule(ReadWriteAddMul::write) != 7) return false; 61 | 62 | if (s2.next_version() != 9) return false; 63 | 64 | return true; 65 | } 66 | 67 | static bool testAccessUtil(std::string &name) { name = "testAccessUtil"; 68 | if (AccessUtil::needs_lock(ReadWriteAdd::read)) return false; 69 | if (!AccessUtil::needs_lock(ReadWriteAdd::add)) return false; 70 | if (AccessUtil::needs_lock(ReadWriteAdd::write)) return false; 71 | 72 | if (AccessUtil::needs_lock(ReadWriteAddMul::read)) return false; 73 | if (!AccessUtil::needs_lock(ReadWriteAddMul::add)) return false; 74 | if (!AccessUtil::needs_lock(ReadWriteAddMul::mul)) return false; 75 | if (AccessUtil::needs_lock(ReadWriteAddMul::write)) return false; 76 | 77 | if (AccessUtil::needs_lock(ReadWriteConcurrent::read)) return false; 78 | if (AccessUtil::needs_lock(ReadWriteConcurrent::write)) return false; 79 | if (AccessUtil::needs_lock(ReadWriteConcurrent::concurrent)) return false; 80 | return true; 81 | } 82 | 83 | public: 84 | 85 | std::string get_name() { return "TestSchedulerVer"; } 86 | 87 | testfunction *get(size_t &numTests) { 88 | static testfunction tests[] = { 89 | testSchedVer, testAccessUtil 90 | }; 91 | numTests = sizeof(tests)/sizeof(testfunction); 92 | return tests; 93 | } 94 | }; 95 | 96 | template<> struct TestSchedulerVer::ReadWriteAddMul::AccessType { 97 | enum { commutative = 1 }; 98 | enum { exclusive = 0 }; 99 | }; 100 | 101 | template<> struct TestSchedulerVer::ReadWriteAddMul::AccessType { 102 | enum { commutative = 0 }; 103 | enum { exclusive = 1 }; 104 | }; 105 | 106 | template<> struct TestSchedulerVer::ReadWriteAddMul::AccessType { 107 | enum { commutative = 1 }; 108 | enum { exclusive = 1 }; 109 | }; 110 | 111 | template<> struct TestSchedulerVer::ReadWriteAddMul::AccessType { 112 | enum { commutative = 1 }; 113 | enum { exclusive = 1 }; 114 | }; 115 | 116 | #endif // SG_TEST_SCHEDVER_HPP_INCLUDED 117 | -------------------------------------------------------------------------------- /test/unit/test_subtasks.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_SUBTASKS_HPP_INCLUDED 2 | #define SG_TEST_SUBTASKS_HPP_INCLUDED 3 | 4 | #include 5 | 6 | class TestSubtasks : public TestCase { 7 | 8 | struct Options : public DefaultOptions { 9 | typedef Enable PassTaskExecutor; 10 | typedef Enable Subtasks; 11 | }; 12 | 13 | struct RunOrder { 14 | size_t order[10]; 15 | size_t index; 16 | void store(size_t value) { 17 | size_t pos = Atomic::increase_nv(&index); 18 | order[pos-1] = value; 19 | } 20 | }; 21 | 22 | static bool testSubtasks(std::string &name) { name = "testSubtasks"; 23 | 24 | static volatile bool subtask_may_finish = false; 25 | static RunOrder r; 26 | 27 | struct Subtask : public Task { 28 | size_t value; 29 | Subtask(size_t value_) : value(value_) {} 30 | void run(TaskExecutor &te) { 31 | while (!subtask_may_finish); 32 | r.store(value); 33 | } 34 | }; 35 | 36 | struct BigTask : public Task { 37 | BigTask(Handle &h) { 38 | register_access(ReadWriteAdd::write, h); 39 | } 40 | void run(TaskExecutor &te) { 41 | r.store(0); 42 | te.submit(new Subtask(1)); 43 | te.submit(new Subtask(2)); 44 | te.submit(new Subtask(3)); 45 | te.submit(new Subtask(4)); 46 | } 47 | }; 48 | 49 | struct NextTask : public Task { 50 | NextTask(Handle &h) { 51 | register_access(ReadWriteAdd::write, h); 52 | } 53 | void run(TaskExecutor &te) { 54 | r.store(5); 55 | subtask_may_finish = true; 56 | } 57 | }; 58 | 59 | 60 | SuperGlue sg; 61 | Handle h; 62 | 63 | sg.submit(new BigTask(h)); 64 | sg.submit(new NextTask(h)); // may start before bigtasks subtasks are finished 65 | sg.barrier(); 66 | 67 | assert(r.order[0] == 0); 68 | assert(r.order[1] == 5); 69 | assert(1 <= r.order[2] && r.order[2] <= 4); 70 | assert(1 <= r.order[3] && r.order[3] <= 4); 71 | assert(1 <= r.order[4] && r.order[4] <= 4); 72 | assert(1 <= r.order[5] && r.order[5] <= 4); 73 | 74 | return true; 75 | } 76 | 77 | static bool testSubtasksDep(std::string &name) { name = "testSubtasksDep"; 78 | static volatile unsigned int subtask_finished = 0; 79 | static RunOrder r; 80 | 81 | struct Subtask : public Task { 82 | size_t value; 83 | Subtask(size_t value_) : value(value_) {} 84 | void run(TaskExecutor &te) { 85 | r.store(value); 86 | Atomic::increase(&subtask_finished); 87 | } 88 | }; 89 | 90 | struct BigTask : public Task { 91 | BigTask(Handle &h) { 92 | register_access(ReadWriteAdd::write, h); 93 | } 94 | void run(TaskExecutor &te) { 95 | r.store(0); 96 | te.submit(new Subtask(1)); 97 | te.submit(new Subtask(2)); 98 | te.submit(new Subtask(3)); 99 | te.submit(new Subtask(4)); 100 | // subtasks may start directly 101 | while (subtask_finished != 4); 102 | } 103 | }; 104 | 105 | struct NextTask : public Task { 106 | NextTask(Handle &h) { 107 | register_access(ReadWriteAdd::write, h); 108 | } 109 | void run(TaskExecutor &te) { 110 | r.store(5); 111 | } 112 | }; 113 | 114 | 115 | SuperGlue sg; 116 | Handle h; 117 | 118 | sg.submit(new BigTask(h)); 119 | sg.submit(new NextTask(h)); // must wait on BigTask 120 | sg.barrier(); 121 | 122 | assert(r.order[0] == 0); 123 | assert(r.order[5] == 5); 124 | assert(1 <= r.order[1] && r.order[1] <= 4); 125 | assert(1 <= r.order[2] && r.order[2] <= 4); 126 | assert(1 <= r.order[3] && r.order[3] <= 4); 127 | assert(1 <= r.order[4] && r.order[4] <= 4); 128 | 129 | return true; 130 | } 131 | 132 | public: 133 | 134 | std::string get_name() { return "TestSubtasks"; } 135 | 136 | testfunction *get(size_t &numTests) { 137 | static testfunction tests[] = { 138 | testSubtasks, testSubtasksDep 139 | }; 140 | numTests = sizeof(tests)/sizeof(testfunction); 141 | return tests; 142 | } 143 | }; 144 | 145 | #endif // SG_TEST_SUBTASKS_HPP_INCLUDED 146 | -------------------------------------------------------------------------------- /test/unit/test_taskqueue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_TASKQUEUE_HPP_INCLUDED 2 | #define SG_TEST_TASKQUEUE_HPP_INCLUDED 3 | 4 | #include "test_taskqueue_impl.hpp" 5 | 6 | #include 7 | 8 | class TestTaskQueue : public TestCase { 9 | struct OpDefault : public DefaultOptions {}; 10 | 11 | static bool testTaskQueue(std::string &name) { 12 | return TaskQueueTest::testTaskQueueImpl(name); 13 | } 14 | 15 | static bool testEraseIf(std::string &name) { 16 | return TaskQueueTest::testTaskQueueImpl(name); 17 | } 18 | 19 | public: 20 | 21 | std::string get_name() { return "TestTaskQueue"; } 22 | 23 | testfunction *get(size_t &numTests) { 24 | static testfunction tests[] = { 25 | testTaskQueue, testEraseIf 26 | }; 27 | numTests = sizeof(tests)/sizeof(testfunction); 28 | return tests; 29 | } 30 | }; 31 | 32 | #endif // SG_TEST_TASKQUEUE_HPP_INCLUDED 33 | -------------------------------------------------------------------------------- /test/unit/test_taskqueue_impl.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_TASKQUEUE_IMPL_HPP_INCLUDED 2 | #define SG_TEST_TASKQUEUE_IMPL_HPP_INCLUDED 3 | 4 | template 5 | struct TaskQueueTest { 6 | 7 | struct MyTask : public TaskBase { 8 | std::string name; 9 | MyTask(std::string name_) : name(name_) {} 10 | void run() {} 11 | }; 12 | 13 | struct IsBPred { 14 | bool operator()(TaskBase *task) { 15 | return ((MyTask *) task)->name == "B"; 16 | } 17 | }; 18 | 19 | static const std::string &name(TaskBase *task) { 20 | return ((MyTask *) task)->name; 21 | } 22 | 23 | static bool testTaskQueueImpl(std::string &testname) { testname = "testTaskQueue"; 24 | typename Options::ReadyListType::unsafe_t q; 25 | 26 | if (!q.empty()) return false; 27 | q.push_back(new MyTask("A")); 28 | if (q.empty()) return false; 29 | q.push_front(new MyTask("B")); 30 | q.push_back(new MyTask("C")); 31 | q.push_back(new MyTask("D")); 32 | 33 | TaskBase *task; 34 | 35 | if (!q.pop_front(task) || name(task) != "B") return false; 36 | if (!q.pop_front(task) || name(task) != "A") return false; 37 | if (!q.pop_front(task) || name(task) != "C") return false; 38 | if (!q.pop_front(task) || name(task) != "D") return false; 39 | if (!q.empty() || q.pop_front(task)) return false; 40 | 41 | q.push_front(new MyTask("A")); 42 | q.push_front(new MyTask("B")); 43 | q.push_front(new MyTask("C")); 44 | q.push_front(new MyTask("D")); 45 | 46 | if (!q.pop_back(task) || name(task) != "A") return false; 47 | if (!q.pop_back(task) || name(task) != "B") return false; 48 | if (!q.pop_back(task) || name(task) != "C") return false; 49 | if (!q.pop_back(task) || name(task) != "D") return false; 50 | if (!q.empty() || q.pop_back(task)) return false; 51 | 52 | if (!q.empty()) return false; 53 | q.push_back(new MyTask("A")); 54 | if (q.empty()) return false; 55 | q.push_front(new MyTask("B")); 56 | 57 | if (!q.pop_front(task)) return false; 58 | if (name(task) != "B") return false; 59 | if (!q.pop_front(task)) return false; 60 | if (name(task) != "A") return false; 61 | if (q.pop_front(task)) return false; 62 | 63 | q.push_front(new MyTask("A")); 64 | q.push_front(new MyTask("B")); 65 | 66 | if (!q.pop_back(task)) return false; 67 | if (name(task) != "A") return false; 68 | if (!q.pop_back(task)) return false; 69 | if (name(task) != "B") return false; 70 | if (q.pop_back(task)) return false; 71 | 72 | return true; 73 | } 74 | 75 | static bool testEraseIfImpl(std::string &testname) { testname = "testEraseIf"; 76 | TaskBase *task; 77 | { 78 | typename Options::TaskQueueUnsafeType q; 79 | q.push_back(new MyTask("A")); 80 | q.push_back(new MyTask("E")); 81 | q.push_back(new MyTask("C")); 82 | q.push_back(new MyTask("D")); 83 | q.erase_if(IsBPred()); 84 | if (!q.pop_front(task) || name(task) != "A") return false; 85 | if (!q.pop_front(task) || name(task) != "E") return false; 86 | if (!q.pop_front(task) || name(task) != "C") return false; 87 | if (!q.pop_front(task) || name(task) != "D") return false; 88 | if (!q.empty() || q.pop_front(task)) return false; 89 | } 90 | { 91 | typename Options::TaskQueueUnsafeType q; 92 | q.push_back(new MyTask("A")); 93 | q.push_back(new MyTask("B")); 94 | q.push_back(new MyTask("C")); 95 | q.erase_if(IsBPred()); 96 | if (!q.pop_front(task) || name(task) != "A") return false; 97 | if (!q.pop_front(task) || name(task) != "C") return false; 98 | if (!q.empty() || q.pop_front(task)) return false; 99 | } 100 | { 101 | typename Options::TaskQueueUnsafeType q; 102 | q.push_back(new MyTask("B")); 103 | q.push_back(new MyTask("B")); 104 | q.push_back(new MyTask("B")); 105 | q.push_back(new MyTask("B")); 106 | q.push_back(new MyTask("B")); 107 | q.erase_if(IsBPred()); 108 | if (!q.empty() || q.pop_front(task)) return false; 109 | } 110 | { 111 | typename Options::TaskQueueUnsafeType q; 112 | q.push_back(new MyTask("B")); 113 | q.push_back(new MyTask("A")); 114 | q.push_back(new MyTask("B")); 115 | q.push_back(new MyTask("C")); 116 | q.push_back(new MyTask("B")); 117 | q.erase_if(IsBPred()); 118 | if (!q.pop_front(task) || name(task) != "A") return false; 119 | if (!q.pop_front(task) || name(task) != "C") return false; 120 | if (!q.empty() || q.pop_front(task)) return false; 121 | } 122 | { 123 | typename Options::TaskQueueUnsafeType q; 124 | q.push_back(new MyTask("A")); 125 | q.push_back(new MyTask("B")); 126 | q.push_back(new MyTask("B")); 127 | q.push_back(new MyTask("C")); 128 | q.erase_if(IsBPred()); 129 | if (!q.pop_front(task) || name(task) != "A") return false; 130 | if (!q.pop_front(task) || name(task) != "C") return false; 131 | if (!q.empty() || q.pop_front(task)) return false; 132 | } 133 | 134 | return true; 135 | } 136 | }; 137 | 138 | #endif // SG_TEST_TASKQUEUE_IMPL_HPP_INCLUDED 139 | -------------------------------------------------------------------------------- /test/unit/test_taskqueuedeque.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_DEQUETASKQUEUE_HPP_INCLUDED 2 | #define SG_TEST_DEQUETASKQUEUE_HPP_INCLUDED 3 | 4 | #include "sg/option/taskqueue_deque.hpp" 5 | #include "test_taskqueue_impl.hpp" 6 | 7 | #include 8 | 9 | class TestTaskQueueDeque : public TestCase { 10 | struct OpDefault : public DefaultOptions { 11 | typedef TaskQueueDeque TaskQueueType; 12 | }; 13 | 14 | static bool testTaskQueue(std::string &name) { 15 | return TaskQueueTest::testTaskQueueImpl(name); 16 | } 17 | 18 | static bool testEraseIf(std::string &name) { 19 | return TaskQueueTest::testTaskQueueImpl(name); 20 | } 21 | 22 | public: 23 | 24 | std::string get_name() { return "TestTaskQueueDeque"; } 25 | 26 | testfunction *get(size_t &numTests) { 27 | static testfunction tests[] = { 28 | testTaskQueue, testEraseIf 29 | }; 30 | numTests = sizeof(tests)/sizeof(testfunction); 31 | return tests; 32 | } 33 | }; 34 | 35 | #endif // SG_TEST_DEQUETASKQUEUE_HPP_INCLUDED 36 | -------------------------------------------------------------------------------- /test/unit/test_taskqueueprio.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_TASKQUEUEPRIO_HPP_INCLUDED 2 | #define SG_TEST_TASKQUEUEPRIO_HPP_INCLUDED 3 | 4 | #include "test_taskqueue_impl.hpp" 5 | #include "sg/option/taskqueue_prio.hpp" 6 | 7 | #include 8 | 9 | using namespace sg; 10 | 11 | class TestTaskQueuePrio : public TestCase { 12 | struct OpDefault : public DefaultOptions { 13 | typedef TaskQueuePrio ReadyListType; 14 | typedef TaskQueuePrio WaitListType; 15 | }; 16 | 17 | struct NamedTask : public Task { 18 | std::string name; 19 | NamedTask(const std::string &name_) : name(name_) {} 20 | }; 21 | 22 | struct LowPrioTask : public NamedTask { 23 | LowPrioTask(const std::string &name_) : NamedTask(name_) {} 24 | void run() {} 25 | }; 26 | 27 | struct HighPrioTask : public NamedTask { 28 | HighPrioTask(const std::string &name_) : NamedTask(name_) { 29 | is_prioritized = true; 30 | } 31 | void run() {} 32 | }; 33 | 34 | static bool testTaskQueue(std::string &name) { 35 | return TaskQueueTest::testTaskQueueImpl(name); 36 | } 37 | 38 | static bool testEraseIf(std::string &name) { 39 | return TaskQueueTest::testTaskQueueImpl(name); 40 | } 41 | 42 | static const std::string &name(TaskBase *task) { 43 | return static_cast(task)->name; 44 | } 45 | 46 | static bool testPrio(std::string &testname) { testname = "testTaskQueuePrio"; 47 | typedef OpDefault::ReadyListType::unsafe_t TaskQueueUnsafe; 48 | 49 | TaskQueueUnsafe q; 50 | TaskBase *task; 51 | 52 | if (!q.empty()) return false; 53 | q.push_back(new LowPrioTask("B")); 54 | q.push_back(new HighPrioTask("A")); 55 | q.push_back(new LowPrioTask("B")); 56 | 57 | if (!q.pop_front(task) || name(task) != "A") return false; 58 | if (!q.pop_front(task) || name(task) != "B") return false; 59 | if (!q.pop_front(task) || name(task) != "B") return false; 60 | if (!q.empty() || q.pop_front(task)) return false; 61 | 62 | q.push_back(new LowPrioTask("B")); 63 | q.push_back(new HighPrioTask("A")); 64 | q.push_back(new LowPrioTask("B")); 65 | 66 | if (!q.pop_back(task) || name(task) != "A") return false; 67 | if (!q.pop_back(task) || name(task) != "B") return false; 68 | if (!q.pop_back(task) || name(task) != "B") return false; 69 | if (!q.empty() || q.pop_front(task)) return false; 70 | 71 | q.push_front(new LowPrioTask("B")); 72 | q.push_front(new HighPrioTask("A")); 73 | q.push_front(new LowPrioTask("B")); 74 | 75 | if (!q.pop_front(task) || name(task) != "A") return false; 76 | if (!q.pop_front(task) || name(task) != "B") return false; 77 | if (!q.pop_front(task) || name(task) != "B") return false; 78 | if (!q.empty() || q.pop_front(task)) return false; 79 | 80 | q.push_front(new LowPrioTask("B")); 81 | q.push_front(new HighPrioTask("A")); 82 | q.push_front(new LowPrioTask("B")); 83 | 84 | if (!q.pop_back(task) || name(task) != "A") return false; 85 | if (!q.pop_back(task) || name(task) != "B") return false; 86 | if (!q.pop_back(task) || name(task) != "B") return false; 87 | if (!q.empty() || q.pop_front(task)) return false; 88 | 89 | return true; 90 | } 91 | 92 | public: 93 | 94 | std::string get_name() { return "TestTaskQueuePrio"; } 95 | 96 | testfunction *get(size_t &numTests) { 97 | static testfunction tests[] = { 98 | testTaskQueue, testEraseIf, testPrio 99 | }; 100 | numTests = sizeof(tests)/sizeof(testfunction); 101 | return tests; 102 | } 103 | }; 104 | 105 | #endif // SG_TEST_TASKQUEUEPRIO_HPP_INCLUDED 106 | -------------------------------------------------------------------------------- /test/unit/test_tasks.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SG_TEST_TASKS_HPP_INCLUDED 2 | #define SG_TEST_TASKS_HPP_INCLUDED 3 | 4 | #include "sg/option/access_readwrite.hpp" 5 | 6 | #include "sg/platform/threads.hpp" 7 | #include "sg/platform/threadutil.hpp" 8 | 9 | #include 10 | 11 | class TestTasks : public TestCase { 12 | struct OpDefault : public DefaultOptions {}; 13 | struct OpNoSteal : public DefaultOptions { 14 | typedef Disable Stealing; 15 | }; 16 | struct OpPaused : public DefaultOptions { 17 | typedef Disable Lockable; 18 | typedef ReadWrite AccessInfoType; 19 | typedef Enable PauseExecution; 20 | }; 21 | 22 | class MyTask : public Task { 23 | private: 24 | size_t *value; 25 | 26 | public: 27 | MyTask(size_t *value_) : value(value_) {} 28 | void run() { 29 | Atomic::increase(value); 30 | } 31 | }; 32 | 33 | class ParallelTask : public Task { 34 | private: 35 | size_t *value; 36 | size_t myValue, otherValue; 37 | 38 | public: 39 | ParallelTask(size_t *value_, size_t myValue_, size_t otherValue_) 40 | : value(value_), myValue(myValue_), otherValue(otherValue_) {} 41 | void run() { 42 | for (size_t i = 0; i < 1000; ++i) 43 | Atomic::cas((void **)value, (void *)myValue, (void *)otherValue); 44 | } 45 | }; 46 | 47 | static bool testBarrier(std::string &name) { name = "testBarrier"; 48 | 49 | SuperGlue sg; 50 | 51 | size_t value = 0; 52 | 53 | sg.barrier(); 54 | for (size_t i = 0; i < 1000; ++i) 55 | sg.submit(new MyTask(&value)); 56 | sg.barrier(); 57 | sg.barrier(); 58 | 59 | return value == 1000; 60 | } 61 | 62 | static bool testStealing(std::string &name) { name = "testStealing"; 63 | SuperGlue sg; 64 | size_t value = 0; 65 | for (size_t i = 0; i < 100; ++i) { 66 | sg.submit(new ParallelTask(&value, 0, 1), 0); 67 | sg.submit(new ParallelTask(&value, 1, 0), 0); 68 | } 69 | sg.barrier(); 70 | return true; 71 | } 72 | 73 | class NoStealingTask : public Task { 74 | private: 75 | ThreadIDType *threadid; 76 | bool *success; 77 | 78 | public: 79 | NoStealingTask(ThreadIDType *threadid_, bool *success_) 80 | : threadid(threadid_), success(success_) {} 81 | void run() { 82 | if (*threadid == 0) 83 | *threadid = ThreadUtil::get_current_thread_id(); 84 | else if (*threadid != ThreadUtil::get_current_thread_id()) 85 | *success = false; 86 | } 87 | }; 88 | 89 | static bool testNoStealing(std::string &name) { name = "testNoStealing"; 90 | SuperGlue sg; 91 | ThreadIDType threadid = 0; 92 | bool success = true; 93 | for (size_t i = 0; i < 100; ++i) { 94 | sg.submit(new NoStealingTask(&threadid, &success), 0); 95 | sg.submit(new NoStealingTask(&threadid, &success), 0); 96 | } 97 | sg.barrier(); 98 | return success; 99 | } 100 | 101 | class DepTask : public Task { 102 | private: 103 | size_t *value; 104 | bool *success; 105 | size_t idx; 106 | public: 107 | DepTask(Handle &h, size_t *value_, bool *success_, size_t idx_) 108 | : value(value_), success(success_), idx(idx_) { 109 | register_access(ReadWrite::write, h); 110 | } 111 | void run() { 112 | if (*value != idx) 113 | *success = false; 114 | ++(*value); 115 | } 116 | }; 117 | 118 | static bool testDependent(std::string &name) { name = "testDependent"; 119 | { 120 | Handle h; 121 | SuperGlue sg; 122 | size_t value = 0; 123 | bool success = true; 124 | for (size_t i = 0; i < 1000; ++i) 125 | sg.submit(new DepTask(h, &value, &success, i)); 126 | assert(value == 0); 127 | sg.start_executing(); 128 | sg.barrier(); 129 | 130 | if (value != 1000) 131 | return false; 132 | 133 | if (!success) 134 | return false; 135 | } 136 | 137 | { 138 | Handle *h2 = new Handle[1000]; 139 | SuperGlue sg; 140 | size_t value = 0; 141 | bool success = true; 142 | for (size_t i = 0; i < 1000; ++i) 143 | sg.submit(new DepTask(h2[i], &value, &success, i)); 144 | assert(value == 0); 145 | 146 | sg.start_executing(); 147 | sg.barrier(); 148 | delete[] h2; 149 | if (value == 0) 150 | return false; 151 | return !success; 152 | } 153 | } 154 | 155 | public: 156 | 157 | std::string get_name() { return "TestTasks"; } 158 | 159 | testfunction *get(size_t &numTests) { 160 | static testfunction tests[] = { 161 | testBarrier, testStealing, testNoStealing, testDependent 162 | }; 163 | numTests = sizeof(tests)/sizeof(testfunction); 164 | return tests; 165 | } 166 | }; 167 | 168 | #endif // SG_TEST_TASKS_HPP_INCLUDED 169 | -------------------------------------------------------------------------------- /test/util/log_inc.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | ///////////////////// TIMING 5 | 6 | typedef unsigned long long LOG_TimeUnit; 7 | 8 | static inline LOG_TimeUnit LOG_getTimeStart() { 9 | unsigned hi, lo; 10 | 11 | __asm__ __volatile__ ("cpuid\n" 12 | "rdtsc\n" 13 | : "=a" (lo), "=d" (hi) 14 | :: "%rbx", "%rcx"); 15 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 16 | } 17 | 18 | static inline LOG_TimeUnit LOG_getTimeStop() { 19 | unsigned hi, lo; 20 | 21 | __asm__ __volatile__ ("rdtscp\n" 22 | "mov %%edx, %0\n\t" 23 | "mov %%eax, %1\n\t" 24 | "cpuid\n" 25 | : "=r" (hi), "=r" (lo) 26 | :: "%rax", "%rbx", "%rcx", "%rdx"); 27 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 28 | } 29 | 30 | #ifdef LOGGING 31 | ///////////////////// LOGGING 32 | 33 | #define LOG_MAX_ENTRIES 1000000 34 | 35 | static inline unsigned long LOG_getThread() { 36 | return pthread_self(); 37 | } 38 | 39 | typedef struct { 40 | LOG_TimeUnit start, length; 41 | char text[64 - 2*sizeof(LOG_TimeUnit)- sizeof(unsigned long)]; 42 | unsigned long thread; 43 | } LOG_entry; 44 | 45 | LOG_entry LOG_data[LOG_MAX_ENTRIES]; 46 | size_t LOG_ptr; 47 | 48 | void LOG_init() { 49 | } 50 | 51 | static inline void LOG_add(const char *text, LOG_TimeUnit start, LOG_TimeUnit stop) { 52 | size_t i = __sync_fetch_and_add(&LOG_ptr, 1); 53 | strcpy(LOG_data[i].text, text); 54 | LOG_data[i].start = start; 55 | LOG_data[i].length = stop-start; 56 | LOG_data[i].thread = LOG_getThread(); 57 | } 58 | 59 | static void LOG_dump(const char *filename) { 60 | FILE *out = fopen(filename, "w"); 61 | fprintf(out, "LOG 2\n"); 62 | LOG_TimeUnit minimum = LOG_data[0].start; 63 | for (size_t i = 0; i < LOG_ptr; ++i) { 64 | if (LOG_data[i].start < minimum) 65 | minimum = LOG_data[i].start; 66 | } 67 | for (size_t i = 0; i < LOG_ptr; ++i) { 68 | fprintf(out, "%lu: %llu %llu %s\n", 69 | LOG_data[i].thread, 70 | LOG_data[i].start-minimum, 71 | LOG_data[i].length, 72 | LOG_data[i].text); 73 | } 74 | fclose(out); 75 | } 76 | 77 | #ifdef LOGGING_PERF 78 | ///////////////////// PERFORMANCE_COUNTERS 79 | 80 | int LOG_fd; 81 | 82 | unsigned long long LOG_readCounter() { 83 | unsigned long long count; 84 | size_t res = read(LOG_fd, &count, sizeof(unsigned long long)); 85 | if (res != sizeof(unsigned long long)) { 86 | fprintf(stderr, "read() failed %d", res); 87 | exit(1); 88 | } 89 | return count; 90 | } 91 | 92 | void LOG_init_perf(unsigned int type, unsigned long long config) { 93 | struct perf_event_attr attr = {0}; 94 | attr.type = type; 95 | attr.config = config; 96 | attr.inherit = 1; 97 | attr.disabled = 1; 98 | attr.size = sizeof(struct perf_event_attr); 99 | LOG_fd = syscall(__NR_perf_event_open, attr, 0, -1, -1, 0); 100 | 101 | if (LOG_fd < 0) { 102 | fprintf(stderr, "sys_perf_event_open failed %d", fd); 103 | exit(1); 104 | } 105 | } 106 | 107 | unsigned long long LOG_getCacheStart() { 108 | unsigned long long count; 109 | count = LOG_readCounter(); 110 | ioctl(LOG_fd, PERF_EVENT_IOC_ENABLE); 111 | return count; 112 | } 113 | unsigned long long LOG_getCacheStop() { 114 | unsigned long long count; 115 | ioctl(LOG_fd, PERF_EVENT_IOC_DISABLE); 116 | count = LOG_readCounter(); 117 | return count; 118 | } 119 | 120 | #else // LOGGING_PERF 121 | #define LOG_init_perf(a,b) 122 | #define LOG_getCacheStart() 0 123 | #define LOG_getCacheStop() 0 124 | #endif 125 | 126 | #else // LOGGING 127 | #define LOG_add(a,b,c) 128 | #define LOG_dump(a) 129 | #define LOG_init_perf(a,b) 130 | #define LOG_getCacheStart() 0 131 | #define LOG_getCacheStop() 0 132 | #endif 133 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | FLAGS=-O3 -pedantic -Wall -Wno-long-long -I superglue/ 2 | 3 | viewer: viewer.cpp 4 | $(CXX) $(FLAGS) viewer.cpp -lGL -lglut -o $@ 5 | 6 | clean: 7 | rm -f viewer 8 | --------------------------------------------------------------------------------