├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── example.cpp ├── high_resolution_timer.h └── hp-threadpool.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: false 3 | 4 | language: cpp 5 | 6 | os: linux 7 | 8 | compiler: g++ 9 | 10 | script: 11 | - mkdir build 12 | - cd build 13 | - cmake -DCMAKE_BUILD_TYPE=Release .. 14 | - make 15 | - cd .. 16 | 17 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.0 FATAL_ERROR) 2 | 3 | project(example-hp-threadpool) 4 | 5 | # set(CMAKE_BUILD_TYPE "Release") 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra") 7 | 8 | if(UNIX) 9 | set(CMAKE_USE_PTHREADS_INIT "pthread") 10 | endif(UNIX) 11 | 12 | add_executable(example example.cpp) 13 | 14 | target_link_libraries (example ${CMAKE_USE_PTHREADS_INIT}) 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Aimin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | high-performance-thread-pool(hp-threadpool) 2 | ================= 3 | [![Build Status](https://travis-ci.org/7starsea/hp-threadpool.svg?branch=master)](https://travis-ci.org/7starsea/hp-threadpool) 4 | [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) 5 | 6 | We have specific application (high-frequency trading system) in mind for designing the HPThreadPool. 7 | Common Features: 8 | 9 | * It is highly scalable and fast. 10 | * It is header only and lock-free. 11 | * No external dependencies, only standard library needed (C++11). 12 | 13 | # How to use 14 | You only need to include file **hp-threadpool.hpp** in your project. Please take a look at **example.cpp** to see how to use the APIs of **hp-threadpool.hpp**. 15 | 16 | # Extra Contribution 17 | We also provide a cross-platform **high_resolution_timer.h** for benchmark testing. 18 | 19 | -------------------------------------------------------------------------------- /example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "high_resolution_timer.h" 7 | #include "hp-threadpool.hpp" 8 | 9 | 10 | double time_comsuming_work(const int n){ 11 | int nn = 1000; 12 | double c = 1; 13 | for (int i = 0; i < nn; ++i){ 14 | if(i > 10000){ 15 | c += i * n; 16 | }else{ 17 | c -= i; 18 | } 19 | if (c > 100){ 20 | c = 1; 21 | }else if(c < -100){ 22 | c = -1; 23 | } 24 | } 25 | return c; 26 | } 27 | 28 | class RunningTask{ 29 | public: 30 | RunningTask(): beg_(0), end_(0), a_(NULL), sum(0) {} 31 | 32 | RunningTask(int beg, int end, int * a): 33 | beg_(beg), end_(end), a_(a), sum(0){}; 34 | 35 | void compute(int n){ 36 | sum += time_comsuming_work(n); 37 | } 38 | 39 | void operator()(){ 40 | for(int n = beg_; n < end_; ++n){ 41 | sum += a_[n] * time_comsuming_work(n); 42 | } 43 | } 44 | public: 45 | int beg_; 46 | int end_; 47 | int * a_; 48 | double sum; 49 | }; 50 | 51 | 52 | void benchmark_test(){ 53 | const int size = 300; 54 | int a[size]; 55 | 56 | for (int i = 0; i < size; ++i) { 57 | a[i] = i % 2 + i % 3; 58 | } 59 | 60 | HighResolutionTimer timer; 61 | 62 | timer.start(); 63 | RunningTask task1( 0, size/3, a), task2(size/3, (2*size)/3, a), task3((2*size)/3, size, a); 64 | std::vector tasks; 65 | tasks.push_back(&task1); 66 | tasks.push_back(&task2); 67 | tasks.push_back(&task3); 68 | HPThreadPool::ThreadPool tp(3, tasks); 69 | tp.start(); 70 | 71 | double t0 = timer.microseconds_elapsed(); 72 | 73 | double sum = 0, sum2=0; 74 | 75 | for(int k = 0; k < 30; ++k){ 76 | timer.start(); 77 | tp.restart_tasks(); 78 | 79 | // tp.post_task(0, &task1); 80 | // tp.post_task(1, &task2); 81 | // tp.post_task(2, &task3); 82 | while( ! tp.is_task_done() ); 83 | 84 | sum = task1.sum + task2.sum + task3.sum; 85 | double t1 = timer.microseconds_elapsed(); 86 | 87 | timer.start(); 88 | for (int n = 0; n < size; ++n){ 89 | sum2 += a[n] * time_comsuming_work(n); 90 | } 91 | double t2 = timer.microseconds_elapsed(); 92 | 93 | std::cout << "\tt1=" << t1 << " t2=" << t2 << std::endl; 94 | std::this_thread::sleep_for( std::chrono::duration(100) ); 95 | } 96 | 97 | 98 | std::cout << "\tConstruction Time Of ThreadPool=" << t0 99 | << "\n\tComputation sum1=" << sum <<" sum2="<< sum2 << std::endl; 100 | tp.stop(); 101 | } 102 | 103 | 104 | 105 | void std_testing(){ 106 | 107 | HPThreadPool::ThreadPool< std::function > tp(3); 108 | tp.start(); 109 | RunningTask task; 110 | 111 | for(int i = 0; i < 20; ++i){ 112 | if( ! tp.post( std::bind(&RunningTask::compute, &task, i + 10 )) ){ 113 | std::cout<<"\tall worker are busy, sleeping..."<(100) ); 115 | } 116 | } 117 | 118 | std::cout<<"\tComputation sum="< 26 | class HighResolutionTimer 27 | { 28 | public: 29 | const static char version = 'W'; ///Windows 30 | public: 31 | HighResolutionTimer(){ 32 | LARGE_INTEGER frequency; 33 | QueryPerformanceFrequency(&frequency); 34 | high_res_frequency_ = (double)frequency.QuadPart; 35 | QueryPerformanceCounter(&start_time_); 36 | } 37 | 38 | inline void start(){ 39 | QueryPerformanceCounter(&start_time_); 40 | } 41 | 42 | inline double seconds_elapsed() { 43 | QueryPerformanceCounter(&end_time_); 44 | return (end_time_.QuadPart - start_time_.QuadPart)/high_res_frequency_; 45 | } 46 | 47 | inline double milliseconds_elapsed() { 48 | QueryPerformanceCounter(&end_time_); 49 | return 1000*(end_time_.QuadPart - start_time_.QuadPart)/high_res_frequency_; 50 | } 51 | 52 | inline double microseconds_elapsed() { 53 | QueryPerformanceCounter(&end_time_); 54 | return 1000000*(end_time_.QuadPart - start_time_.QuadPart)/high_res_frequency_; 55 | } 56 | 57 | private: 58 | LARGE_INTEGER start_time_; 59 | LARGE_INTEGER end_time_; 60 | double high_res_frequency_; 61 | }; 62 | 63 | 64 | #elif defined(linux) || defined(__linux) 65 | #include 66 | 67 | /// CLOCK_REALTIME, a system-wide realtime clock. 68 | /// CLOCK_PROCESS_CPUTIME_ID, high-resolution timer provided by the CPU for each process. 69 | /// CLOCK_THREAD_CPUTIME_ID, high-resolution timer provided by the CPU for each of the threads. 70 | class HighResolutionTimer 71 | { 72 | public: 73 | const static char version = 'L'; ///Linux 74 | public: 75 | HighResolutionTimer(const clockid_t clk_id = CLOCK_REALTIME ) 76 | :clk_id_(clk_id){ 77 | clock_gettime(clk_id_, &start_time_); 78 | } 79 | inline void start(){ 80 | clock_gettime(clk_id_, &start_time_); 81 | } 82 | inline double seconds_elapsed() { 83 | clock_gettime(clk_id_, &end_time_); 84 | return (end_time_.tv_sec - start_time_.tv_sec) + (end_time_.tv_nsec - start_time_.tv_nsec)/1000000000.0; 85 | } 86 | double milliseconds_elapsed() { 87 | clock_gettime(clk_id_, &end_time_); 88 | return (end_time_.tv_sec - start_time_.tv_sec) * 1000.0 + (end_time_.tv_nsec - start_time_.tv_nsec)/1000000.0; 89 | } 90 | inline double microseconds_elapsed() { 91 | clock_gettime(clk_id_, &end_time_); 92 | return (end_time_.tv_sec - start_time_.tv_sec) * 1000000.0 + (end_time_.tv_nsec - start_time_.tv_nsec)/1000.0; 93 | } 94 | private: 95 | const clockid_t clk_id_; 96 | timespec start_time_; 97 | timespec end_time_; 98 | }; 99 | 100 | #elif defined(macintosh) || defined(Macintosh) || (defined(__APPLE__) && defined(__MACH__)) /////for MAC OS in my case 101 | #include 102 | #include 103 | #include 104 | #ifndef _CLOCKID_T 105 | #define _CLOCKID_T 106 | typedef int clockid_t; /* clock identifier type */ 107 | #endif /* ifndef _CLOCKID_T */ 108 | 109 | #ifndef CLOCK_MONOTONIC 110 | #define CLOCK_MONOTONIC ((clockid_t)0) 111 | /* system-wide monotonic clock (aka system time) */ 112 | #endif 113 | 114 | #ifndef CLOCK_REALTIME 115 | #define CLOCK_REALTIME ((clockid_t)-1) 116 | /* system-wide real time clock */ 117 | #endif 118 | 119 | #ifndef CLOCK_PROCESS_CPUTIME_ID 120 | #define CLOCK_PROCESS_CPUTIME_ID ((clockid_t)-2) 121 | /* clock measuring the used CPU time of the current process */ 122 | #endif 123 | 124 | #ifndef CLOCK_THREAD_CPUTIME_ID 125 | #define CLOCK_THREAD_CPUTIME_ID ((clockid_t)-3) 126 | /* clock measuring the used CPU time of the current thread */ 127 | #endif 128 | 129 | class HighResolutionTimer 130 | { 131 | public: 132 | const static char version = 'M'; ///MAC 133 | public: 134 | HighResolutionTimer(const clockid_t clk_id = CLOCK_REALTIME ) 135 | :clock_service_(clock_serv_t()) 136 | { 137 | /// if( clk_id == CLOCK_REALTIME) 138 | host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &clock_service_); 139 | /// else 140 | /// host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &clock_service_); 141 | /// clock_serv_t cclock; 142 | /// mach_timespec_t mts; 143 | clock_get_time(clock_service_, &start_time_); 144 | /// mach_port_deallocate(mach_task_self(), cclock); 145 | 146 | } 147 | 148 | ~HighResolutionTimer(){ 149 | mach_port_deallocate(mach_task_self(), clock_service_); 150 | } 151 | inline void start(){ 152 | clock_get_time(clock_service_, &start_time_); 153 | } 154 | inline double seconds_elapsed() { 155 | clock_get_time(clock_service_, &end_time_); 156 | return (end_time_.tv_sec - start_time_.tv_sec) + (end_time_.tv_nsec - start_time_.tv_nsec) / 1000000000.0; 157 | } 158 | inline double milliseconds_elapsed() { 159 | clock_get_time(clock_service_, &end_time_); 160 | return (end_time_.tv_sec - start_time_.tv_sec) * 1000.0 + (end_time_.tv_nsec - start_time_.tv_nsec) / 1000000.0; 161 | } 162 | inline double microseconds_elapsed() { 163 | clock_get_time(clock_service_, &end_time_); 164 | return (end_time_.tv_sec - start_time_.tv_sec) * 1000000.0 + (end_time_.tv_nsec - start_time_.tv_nsec)/1000.0; 165 | } 166 | private: 167 | clock_serv_t clock_service_; 168 | 169 | mach_timespec_t start_time_; 170 | mach_timespec_t end_time_; 171 | }; 172 | #else ////for all other systems 173 | #include 174 | class HighResolutionTimer 175 | { 176 | public: 177 | const static char version = 'O'; ///Other 178 | public: 179 | HighResolutionTimer( ) { 180 | gettimeofday(&start_time_, NULL); 181 | } 182 | inline void start(){ 183 | gettimeofday(&start_time_, NULL); 184 | } 185 | inline double seconds_elapsed() { 186 | gettimeofday(&end_time_, NULL); 187 | return (end_time_.tv_sec - start_time_.tv_sec) + (end_time_.tv_usec - start_time_.tv_usec) / 1000000.0; 188 | } 189 | inline double milliseconds_elapsed() { 190 | gettimeofday(&end_time_, NULL); 191 | return (end_time_.tv_sec - start_time_.tv_sec) * 1000.0 + (end_time_.tv_usec - start_time_.tv_usec) / 1000.0; 192 | } 193 | inline double microseconds_elapsed() { 194 | gettimeofday(&end_time_, NULL); 195 | return (end_time_.tv_sec - start_time_.tv_sec) * 1000000.0 + (end_time_.tv_usec - start_time_.tv_usec); 196 | } 197 | private: 198 | timeval start_time_; 199 | timeval end_time_; 200 | }; 201 | #endif 202 | 203 | #endif 204 | -------------------------------------------------------------------------------- /hp-threadpool.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HIGH_PERFORMANCE_THREAD_POOL_HPP_20181005 2 | #define HIGH_PERFORMANCE_THREAD_POOL_HPP_20181005 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | 12 | /// @brief high-performance-thread-pool(HPThreadPool) 13 | /// We have specific application (high-frequency trading system) in mind for designing the HPThreadPool 14 | 15 | namespace HPThreadPool{ 16 | 17 | /// @brief class SpinMutex; see https://en.wikipedia.org/wiki/Spinlock and https://en.cppreference.com/w/cpp/atomic/atomic_flag 18 | /// (pro) SpinMutex is faster than std::mutex for saving wakeup time from thread 19 | /// (con) SpinMutex keeps cpu busy and needs more energy 20 | class SpinLockMutex { 21 | public: 22 | SpinLockMutex(){} 23 | inline bool try_lock(){ 24 | return false == locked_.test_and_set(std::memory_order_acquire); 25 | } 26 | inline void lock() { 27 | while (locked_.test_and_set(std::memory_order_acquire)); 28 | } 29 | inline void unlock() { 30 | locked_.clear(std::memory_order_release); 31 | } 32 | protected: 33 | std::atomic_flag locked_; 34 | private: 35 | SpinLockMutex(SpinLockMutex&&) = delete; 36 | SpinLockMutex(SpinLockMutex const&) = delete; 37 | SpinLockMutex& operator=(SpinLockMutex&&) = delete; 38 | SpinLockMutex& operator=(SpinLockMutex const&) = delete; 39 | }; 40 | 41 | class WorkerBase{ 42 | public: 43 | WorkerBase(): 44 | /// mutex_(), 45 | stop_(true), task_done_(true), t_() { }; 46 | 47 | inline void join(){ if(t_.joinable()) t_.join(); } 48 | inline void stop(){ stop_.store(true, std::memory_order_relaxed); } 49 | inline bool is_stopped(){ return stop_.load(std::memory_order_relaxed); } 50 | inline bool is_task_done(){ return task_done_.load(std::memory_order_relaxed); } 51 | protected: 52 | /// SpinLockMutex mutex_; 53 | std::atomic stop_; 54 | std::atomic task_done_; 55 | std::thread t_; 56 | private: 57 | WorkerBase(WorkerBase&&) = delete; 58 | WorkerBase(WorkerBase const&) = delete; 59 | WorkerBase& operator=(WorkerBase&&) = delete; 60 | WorkerBase& operator=(WorkerBase const&) = delete; 61 | }; 62 | 63 | /// @brief template Task should support method Task(); internally, we will call (task()); 64 | template 65 | class Worker : public WorkerBase { 66 | public: 67 | Worker(): WorkerBase(), task_() {}; 68 | Worker(const Task & task): WorkerBase(), task_(task) {}; 69 | 70 | void start(){ 71 | if(is_stopped()){ 72 | stop_.store(false); 73 | t_ = std::thread(std::bind(&Worker::_run, this)); 74 | } 75 | } 76 | 77 | /// @brief please make sure task_ is a valid task 78 | void restart_task(){ 79 | task_done_.store(false, std::memory_order_relaxed); 80 | } 81 | 82 | /// @brief: return true if task is posted successfully 83 | bool post(const Task & task){ 84 | if(task_done_.load(std::memory_order_acquire)){ 85 | /// I do not think we need a mutex here and in protected method _run, and in my tests, all is running ok. 86 | /// Please do not hesitate to contact me if you have a second mind. 87 | /// mutex_.lock(); 88 | task_ = task; 89 | /// mutex_.unlock(); 90 | task_done_.store(false); 91 | return true; 92 | } 93 | return false; 94 | } 95 | protected: 96 | /// @implementation in mind: we always keep the thread in busy state, when a new task is posted, the thread can execute the task immediately 97 | /// our main application is for high-frequency trading system and time is always in top priority consideration 98 | void _run(){ 99 | while(true){ 100 | if(!task_done_.load(std::memory_order_acquire)){ 101 | /// mutex_.lock(); 102 | _do_task(std::integral_constant::value >()); 103 | /// mutex_.unlock(); 104 | task_done_.store(true); 105 | } 106 | if(stop_.load(std::memory_order_relaxed)) break; 107 | } 108 | } 109 | 110 | private: 111 | Worker(Worker&&) = delete; 112 | Worker(Worker const&) = delete; 113 | Worker& operator=(Worker&&) = delete; 114 | Worker& operator=(Worker const&) = delete; 115 | protected: 116 | Task task_; 117 | private: 118 | inline void _do_task(const std::integral_constant &){ 119 | (*task_)(); 120 | } 121 | inline void _do_task(const std::integral_constant &){ 122 | task_(); 123 | } 124 | 125 | }; 126 | 127 | template 128 | class ThreadPool{ 129 | public: 130 | ThreadPool(int thread_size) 131 | : thread_size_(thread_size), workers_() { 132 | for(int i = 0; i < thread_size; ++i){ 133 | workers_.emplace_back(new Worker()); 134 | } 135 | } 136 | ThreadPool(int thread_size, const std::vector & tasks) 137 | : thread_size_(thread_size), workers_() { 138 | for(int i = 0; i < thread_size; ++i){ 139 | workers_.emplace_back(new Worker(tasks[i])); 140 | } 141 | } 142 | 143 | ~ThreadPool(){ 144 | for(int i = 0; i < thread_size_; ++i){ 145 | delete workers_[i]; 146 | } 147 | 148 | } 149 | public: 150 | /// @brief standard threadpool method post 151 | /// return true if task is posted successfully 152 | bool post(const Task & task){ 153 | bool posted = false; 154 | for(Worker* & worker : workers_){ 155 | if(worker->is_task_done()){ 156 | worker->post(task); 157 | posted = true; 158 | break; 159 | } 160 | } 161 | return posted; 162 | } 163 | 164 | /// @note:Developer should be carefull when using the following two methods since there is some assumption when calling them; see details below. 165 | 166 | /// @implementation in mind: when developer call post_task, 167 | /// developer should make sure the corresponding worker is in idle 168 | void post_task(int ind, const Task & task){ 169 | workers_[ind]->post(task); 170 | } 171 | 172 | /// @implementation in mind: when developer call post_tasks, 173 | /// developer should make sure the all workers are in idle and number of tasks equals thread_size 174 | void post_tasks(const std::vector & tasks){ 175 | for(int i = 0; i < thread_size_; ++i){ 176 | workers_[i]->post(tasks[i]); 177 | } 178 | } 179 | /// @implementation in mind: when developer call restart_tasks, 180 | /// developer should make sure the all workers are in idle and already assigned task (either by post or constructor) 181 | void restart_tasks(){ 182 | for(int i = 0; i < thread_size_; ++i){ 183 | workers_[i]->restart_task(); 184 | } 185 | } 186 | public: 187 | Worker* get_worker(int ind){ 188 | return workers_[ind]; 189 | } 190 | void start(){ 191 | for(Worker* & worker : workers_){ 192 | worker->start(); 193 | } 194 | } 195 | void stop(){ 196 | stop_only(); joinall(); 197 | } 198 | void restart(){ 199 | stop(); start(); 200 | } 201 | void stop_only(){ 202 | for(Worker* & worker : workers_){ 203 | worker->stop(); 204 | } 205 | } 206 | void joinall(){ 207 | for(Worker* & worker : workers_){ 208 | worker->join(); 209 | } 210 | } 211 | bool is_task_done(){ 212 | bool is_done = true; 213 | for(Worker* & worker : workers_){ 214 | is_done &= worker->is_task_done(); 215 | } 216 | return is_done; 217 | } 218 | protected: 219 | const int thread_size_; 220 | std::vector< Worker* > workers_; 221 | }; 222 | } 223 | 224 | #endif --------------------------------------------------------------------------------