├── include └── pulsar │ ├── queue.inl │ ├── stopwatch.hpp │ ├── stopwatch.inl │ ├── queue.hpp │ ├── wait.hpp │ ├── wait.inl │ ├── subscriber.hpp │ ├── publisher.hpp │ ├── publisher.inl │ └── subscriber.inl ├── CMakeLists.txt ├── src ├── one_to_one_performance_test.cpp ├── CMakeLists.txt ├── test.hpp ├── one_to_three_performance_test.cpp ├── one_to_three_sequenced_performance_test.cpp ├── one_to_one_std_queue_performance_test.cpp └── functional_test.cpp └── README.md /include/pulsar/queue.inl: -------------------------------------------------------------------------------- 1 | template< class T > 2 | inline queue< T >::queue( size_t n ) : 3 | data_( new T[n] ), 4 | size_( n ), 5 | mask_( n-1 ) 6 | { 7 | } 8 | 9 | template< class T > 10 | inline size_t queue< T >::size() const 11 | { 12 | return size_; 13 | } 14 | 15 | template< class T > 16 | inline T& queue< T >::at( position p ) 17 | { 18 | return data_[ p & mask_ ]; 19 | } 20 | -------------------------------------------------------------------------------- /include/pulsar/stopwatch.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace pulsar { 6 | 7 | class stopwatch 8 | { 9 | public: 10 | stopwatch( bool s=true ); 11 | 12 | void start(); 13 | 14 | size_t elapsed_ms(); 15 | 16 | private: 17 | std::chrono::time_point< std::chrono::high_resolution_clock > start_; 18 | }; 19 | 20 | #include "pulsar/stopwatch.inl" 21 | 22 | } 23 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required( VERSION 2.8 ) 2 | project( pulsar_cpp ) 3 | set( VERSION_NUMBER "0.0.1" ) 4 | 5 | if( "${CMAKE_BUILD_TYPE}" STREQUAL "Release" ) 6 | set( VERSION "${VERSION_NUMBER}-release" ) 7 | else() 8 | set( VERSION "${VERSION_NUMBER}-debug" ) 9 | endif() 10 | 11 | set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wl,--no-as-needed -O3" ) 12 | include_directories( include ) 13 | add_subdirectory( src ) 14 | -------------------------------------------------------------------------------- /include/pulsar/stopwatch.inl: -------------------------------------------------------------------------------- 1 | inline stopwatch::stopwatch( bool s ) 2 | { 3 | if( s ) { 4 | start(); 5 | } 6 | } 7 | 8 | inline void stopwatch::start() 9 | { 10 | start_ = std::chrono::high_resolution_clock::now(); 11 | } 12 | 13 | inline size_t stopwatch::elapsed_ms() 14 | { 15 | return std::chrono::duration_cast< std::chrono::milliseconds >( 16 | std::chrono::high_resolution_clock::now() - start_ ).count(); 17 | } 18 | -------------------------------------------------------------------------------- /include/pulsar/queue.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace pulsar { 8 | 9 | typedef uint64_t position; 10 | 11 | template< class T > 12 | class queue 13 | { 14 | public: 15 | queue( size_t n ); 16 | 17 | size_t size() const; 18 | 19 | T& at( position ); 20 | 21 | private: 22 | std::unique_ptr< T[] > data_; 23 | size_t size_; 24 | size_t mask_; 25 | }; 26 | 27 | #include "pulsar/queue.inl" 28 | 29 | } 30 | -------------------------------------------------------------------------------- /include/pulsar/wait.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace pulsar { 8 | 9 | class yield_wait_strategy 10 | { 11 | public: 12 | void wait(); 13 | 14 | void notify(); 15 | }; 16 | 17 | class block_wait_strategy 18 | { 19 | public: 20 | void wait(); 21 | 22 | void notify(); 23 | 24 | private: 25 | std::atomic< bool > ready_ = { false }; 26 | std::condition_variable cond_; 27 | std::mutex mut_; 28 | }; 29 | 30 | #include "pulsar/wait.inl" 31 | 32 | } 33 | -------------------------------------------------------------------------------- /include/pulsar/wait.inl: -------------------------------------------------------------------------------- 1 | inline void yield_wait_strategy::wait() 2 | { 3 | std::this_thread::yield(); 4 | } 5 | 6 | inline void yield_wait_strategy::notify() 7 | { 8 | ; 9 | } 10 | 11 | inline void block_wait_strategy::wait() 12 | { 13 | std::unique_lock< std::mutex > lock( mut_ ); 14 | cond_.wait( lock, [&]{ 15 | return ready_ == true; 16 | } ); 17 | ready_ = false; 18 | } 19 | 20 | inline void block_wait_strategy::notify() 21 | { 22 | std::unique_lock< std::mutex > lock( mut_ ); 23 | ready_ = true; 24 | cond_.notify_all(); 25 | } 26 | -------------------------------------------------------------------------------- /src/one_to_one_performance_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | #include 3 | #include 4 | 5 | using namespace pulsar; 6 | 7 | int main() 8 | { 9 | std::cout.imbue( std::locale( "" ) ); 10 | 11 | for( long j=0; j p0( Q ); 14 | subscriber< long >& s0 = p0.subscribe(); 15 | std::thread t0( do_subscribe, &s0 ); 16 | 17 | auto start = std::chrono::high_resolution_clock::now(); 18 | do_publish( &p0 ); 19 | t0.join(); 20 | 21 | auto millis = std::chrono::duration_cast< std::chrono::milliseconds >( 22 | std::chrono::high_resolution_clock::now() - start ).count() + 1; 23 | 24 | std::cout << ( N * 1000 ) / millis << std::endl; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable( one_to_one_std_queue_performance_test one_to_one_std_queue_performance_test.cpp ) 2 | target_link_libraries( one_to_one_std_queue_performance_test pthread ) 3 | 4 | add_executable( one_to_one_performance_test one_to_one_performance_test.cpp ) 5 | target_link_libraries( one_to_one_performance_test pthread ) 6 | 7 | add_executable( one_to_three_performance_test one_to_three_performance_test.cpp ) 8 | target_link_libraries( one_to_three_performance_test pthread ) 9 | 10 | add_executable( one_to_three_sequenced_performance_test one_to_three_sequenced_performance_test.cpp ) 11 | target_link_libraries( one_to_three_sequenced_performance_test pthread ) 12 | 13 | #add_executable( three_to_one_performance_test three_to_one_performance_test.cpp ) 14 | #target_link_libraries( three_to_one_performance_test pthread ) 15 | -------------------------------------------------------------------------------- /include/pulsar/subscriber.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pulsar/publisher.hpp" 4 | #include 5 | #include 6 | 7 | namespace pulsar { 8 | 9 | template< typename E, typename W=yield_wait_strategy > 10 | class subscriber 11 | { 12 | friend class publisher< E, W >; 13 | public: 14 | subscriber( publisher< E, W >& p, position& h ); 15 | 16 | template< typename F > 17 | void subscribe( F func ); 18 | 19 | template< typename F > 20 | size_t dispatch( F func ); 21 | 22 | subscriber< E, W >& subscribe(); 23 | 24 | void cancel(); 25 | 26 | private: 27 | size_t available(); 28 | 29 | const E& at( size_t i ); 30 | 31 | void commit( size_t n ); 32 | 33 | publisher< E, W >& publisher_; 34 | position& head_; 35 | position tail_; 36 | std::atomic< bool > alive_; 37 | }; 38 | 39 | #include "pulsar/subscriber.inl" 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/test.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pulsar/subscriber.hpp" 4 | #include 5 | 6 | using namespace pulsar; 7 | 8 | const long N = 1000L * 1000L * 100L; 9 | const size_t Q = 16 * 1024; 10 | const size_t B = 1; 11 | const size_t I = 100; 12 | 13 | void do_publish( publisher< long >* p ) 14 | { 15 | for( size_t i=0; ipublish( B, [&]( long& e ){ 17 | e = i++; 18 | } ); 19 | } 20 | } 21 | 22 | void do_subscribe( subscriber< long >* s ) 23 | { 24 | int i=0; 25 | s->subscribe( [&]( const long& e, size_t a ) { 26 | assert( e == i++ ); 27 | return e < N-1; 28 | } ); 29 | } 30 | 31 | void do_subscribe_debug( subscriber< long >* s ) 32 | { 33 | int i=0; 34 | s->subscribe( [&]( const long& e, size_t a ) { 35 | std::cout << "data: " << e << ", rem: " << a << std::endl; 36 | return e < N-1; 37 | } ); 38 | } 39 | -------------------------------------------------------------------------------- /src/one_to_three_performance_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | #include 3 | #include 4 | 5 | using namespace pulsar; 6 | 7 | int main() 8 | { 9 | std::cout.imbue( std::locale( "" ) ); 10 | 11 | for( long j=0; j p0( Q ); 14 | subscriber< long >& s0 = p0.subscribe(); 15 | subscriber< long >& s1 = p0.subscribe(); 16 | subscriber< long >& s2 = p0.subscribe(); 17 | std::thread t0( do_subscribe, &s0 ); 18 | std::thread t1( do_subscribe, &s1 ); 19 | std::thread t2( do_subscribe, &s2 ); 20 | 21 | auto start = std::chrono::high_resolution_clock::now(); 22 | do_publish( &p0 ); 23 | t0.join(); 24 | t1.join(); 25 | t2.join(); 26 | 27 | auto millis = std::chrono::duration_cast< std::chrono::milliseconds >( 28 | std::chrono::high_resolution_clock::now() - start ).count() + 1; 29 | 30 | std::cout << ( N * 1000 ) / millis << std::endl; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/one_to_three_sequenced_performance_test.cpp: -------------------------------------------------------------------------------- 1 | #include "test.hpp" 2 | #include 3 | #include 4 | 5 | using namespace pulsar; 6 | 7 | int main() 8 | { 9 | std::cout.imbue( std::locale( "" ) ); 10 | 11 | for( long j=0; j p0( Q ); 14 | subscriber< long >& s0 = p0.subscribe(); 15 | subscriber< long >& s1 = s0.subscribe(); 16 | subscriber< long >& s2 = s1.subscribe(); 17 | std::thread t0( do_subscribe, &s0 ); 18 | std::thread t1( do_subscribe, &s1 ); 19 | std::thread t2( do_subscribe, &s2 ); 20 | 21 | auto start = std::chrono::high_resolution_clock::now(); 22 | do_publish( &p0 ); 23 | t0.join(); 24 | t1.join(); 25 | t2.join(); 26 | 27 | auto millis = std::chrono::duration_cast< std::chrono::milliseconds >( 28 | std::chrono::high_resolution_clock::now() - start ).count() + 1; 29 | 30 | std::cout << ( N * 1000 ) / millis << std::endl; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /include/pulsar/publisher.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pulsar/queue.hpp" 4 | #include "pulsar/wait.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | namespace pulsar { 10 | 11 | template< typename E, typename W > 12 | class subscriber; 13 | 14 | template< typename E, typename W=yield_wait_strategy > 15 | class publisher 16 | { 17 | friend class subscriber< E, W >; 18 | public: 19 | publisher( size_t n ); 20 | 21 | template< typename F > 22 | void publish( size_t, F func ); 23 | 24 | subscriber< E, W >& subscribe(); 25 | 26 | private: 27 | size_t available(); 28 | 29 | E& at( size_t i ); 30 | 31 | void commit( size_t n ); 32 | 33 | subscriber< E, W >& subscribe( position& ); 34 | 35 | typedef std::vector< std::unique_ptr< 36 | subscriber< E, W > > > subscriber_list; 37 | 38 | queue< E > queue_; 39 | position head_; 40 | subscriber_list tail_; 41 | size_t avail_; 42 | 43 | W wait_; 44 | }; 45 | 46 | #include "pulsar/publisher.inl" 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/one_to_one_std_queue_performance_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | template< typename T > 8 | class concurrent_queue 9 | { 10 | public: 11 | void push( T const& t ) 12 | { 13 | std::lock_guard< std::mutex > lock( mut_ ); 14 | data_.push( t ); 15 | cond_.notify_one(); 16 | } 17 | 18 | T pop() 19 | { 20 | std::unique_lock< std::mutex > lock( mut_ ); 21 | cond_.wait( lock, [&]{ 22 | return !data_.empty(); 23 | } ); 24 | T t = data_.front(); 25 | data_.pop(); 26 | return t; 27 | } 28 | 29 | private: 30 | std::mutex mut_; 31 | std::condition_variable cond_; 32 | std::queue< T > data_; 33 | }; 34 | 35 | concurrent_queue< long > queue; 36 | 37 | void publish( size_t n ) 38 | { 39 | for( size_t i=0; i( 65 | std::chrono::high_resolution_clock::now() - start ).count() + 1; 66 | 67 | std::cout << ( N * 1000 ) / millis << std::endl; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/functional_test.cpp: -------------------------------------------------------------------------------- 1 | #include "pulsar/source.hpp" 2 | #include "pulsar/subscription.hpp" 3 | #include 4 | #include 5 | 6 | using namespace pulsar; 7 | 8 | int main() 9 | { 10 | source< int > p0( 16 ); 11 | subscription< int >& s0 = p0.subscribe(); 12 | subscription< int >& s1 = s0.subscribe(); 13 | 14 | std::cout << "p0 available: " << p0.available() << std::endl; 15 | std::cout << "s0 available: " << s0.available() << std::endl; 16 | std::cout << "s1 available: " << s1.available() << std::endl; 17 | std::cout << std::endl; 18 | 19 | size_t available = p0.available(); 20 | for( int i=0; i 2 | inline publisher< E, W >::publisher( size_t n ) : 3 | queue_( n ), 4 | head_( 0 ), 5 | avail_( 0 ) 6 | { 7 | } 8 | 9 | template< typename E, typename W > 10 | template< typename F > 11 | inline void publisher< E, W >::publish( size_t n, F func ) 12 | { 13 | // wait for n slots to become available 14 | // yield wait is appropriate here as we shouldnt have to wait long 15 | if( avail_ < n ) { 16 | while( ( avail_ = available() ) < n ) { 17 | std::this_thread::yield(); 18 | } 19 | } 20 | 21 | // populate n slots by calling func n times 22 | for( size_t i=0; i 34 | inline size_t publisher< E, W >::available() 35 | { 36 | // apply memory barrier to ensure all positions are correct 37 | std::atomic_thread_fence( std::memory_order::memory_order_acquire ); 38 | 39 | // find the slowest subscriber 40 | // only consider subscribers that are still alive 41 | // remove dead subscribers 42 | position tail_min = std::numeric_limits< position >::max(); 43 | for( auto it = tail_.begin(); it != tail_.end(); ) 44 | { 45 | if( (*it)->alive_ ) { 46 | tail_min = std::min( (*it)->tail_, tail_min ); it++; 47 | } else { 48 | it = tail_.erase( it ); 49 | } 50 | } 51 | 52 | // number of slots available is head - min( tail ) 53 | // this ensures we can never write past the slowest subscriber 54 | // if there are no subscribers we cannot publish 55 | if( tail_.size() ) { 56 | return queue_.size() - ( head_ - tail_min ); 57 | } else { 58 | return 0; 59 | } 60 | } 61 | 62 | template< typename E, typename W > 63 | inline void publisher< E, W >::commit( size_t n ) 64 | { 65 | // issue a memory barrier to ensure the queue is consistent 66 | // across threads then increment head 67 | std::atomic_thread_fence( std::memory_order::memory_order_release ); 68 | head_ += n; 69 | wait_.notify(); 70 | } 71 | 72 | template< typename E, typename W > 73 | inline E& publisher< E, W >::at( size_t i ) 74 | { 75 | return queue_.at( head_ + i ); 76 | } 77 | 78 | template< typename E, typename W > 79 | inline subscriber< E, W >& publisher< E, W >::subscribe() 80 | { 81 | tail_.push_back( std::unique_ptr< subscriber< E, W > >( new subscriber< E, W >( *this, head_ ) ) ); 82 | return *tail_.back(); 83 | } 84 | 85 | template< typename E, typename W > 86 | inline subscriber< E, W >& publisher< E, W >::subscribe( position& h ) 87 | { 88 | tail_.push_back( std::unique_ptr< subscriber< E, W > >( new subscriber< E, W >( *this, h ) ) ); 89 | return *tail_.back(); 90 | } 91 | -------------------------------------------------------------------------------- /include/pulsar/subscriber.inl: -------------------------------------------------------------------------------- 1 | template< typename E, typename W > 2 | inline subscriber< E, W >::subscriber( publisher< E, W >& p, position& h ) : 3 | publisher_( p ), 4 | head_( h ), 5 | tail_( h ), 6 | alive_( true ) 7 | { 8 | } 9 | 10 | template< typename E, typename W > 11 | template< typename F > 12 | inline void subscriber< E, W >::subscribe( F func ) 13 | { 14 | while( alive_ ) 15 | { 16 | // wait for publisher to publish 17 | size_t avail = 0; 18 | while( ( avail = available() ) < 1 ) { 19 | publisher_.wait_.wait(); 20 | } 21 | 22 | // dispatch available slots to func 23 | int i; 24 | for( i=0; i 36 | template< typename F > 37 | inline size_t subscriber< E, W >::dispatch( F func ) 38 | { 39 | if( alive_ ) 40 | { 41 | // wait for publisher to publish 42 | size_t avail = available(); 43 | if( avail ) 44 | { 45 | // dispatch available slots to func 46 | int i; 47 | for( i=0; i 64 | inline size_t subscriber< E, W >::available() 65 | { 66 | // apply memory barrier to ensure all positions are correct 67 | std::atomic_thread_fence( std::memory_order::memory_order_acquire ); 68 | 69 | // number of slots available is the difference between the head and tail 70 | // this ensure the subscriber can never read past the head 71 | return head_ - tail_; 72 | } 73 | 74 | template< typename E, typename W > 75 | inline const E& subscriber< E, W >::at( size_t i ) 76 | { 77 | return publisher_.queue_.at( tail_ + i ); 78 | } 79 | 80 | template< typename E, typename W > 81 | inline void subscriber< E, W >::commit( size_t n ) 82 | { 83 | // issue a memory barrier to ensure the queue is consistent 84 | // across threads then increment tail 85 | std::atomic_thread_fence( std::memory_order::memory_order_release ); 86 | tail_ += n; 87 | } 88 | 89 | template< typename E, typename W > 90 | inline subscriber< E, W >& subscriber< E, W >::subscribe() 91 | { 92 | return publisher_.subscribe( tail_ ); 93 | } 94 | 95 | template< typename E, typename W > 96 | inline void subscriber< E, W >::cancel() 97 | { 98 | alive_ = false; 99 | } 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pulsar_cpp 2 | ========== 3 | **Introduction** 4 | 5 | Working in the finance industry there is always a need to write software that can achieve low latency, high performance characteristics. The LMAX disruptor is a Java technology that has gained a lot of interest by developers in this field. 6 | 7 | Pulsar is not a port of disruptor, rather it is an attempt to provide a native C++11 library that implements a lot of the concepts used by disruptor. 8 | 9 | The goals for pulsar are as follows: 10 | 11 | * Better performance than disruptor 12 | * A simpler API than disruptor 13 | 14 | **Usage** 15 | 16 | Pulsar applications are implemented in terms of publishers and subscribers. A publisher can be subscribed to by 1 or more subscribers. For example 17 | 18 | ``` 19 | pulsar::publisher< long > p; 20 | pulsar::subscriber< long >& s0 = p.subscribe(); 21 | pulsar::subscriber< long >& s1 = p.subscribe(); 22 | pulsar::subscriber< long >& s2 = p.subscribe(); 23 | ``` 24 | 25 | In the example above there are 3 subscribers - each one will receive the data published by p. All subscribers operate in parallel - there are no guarantees about the order in which each subscriber will see the data. If p publishers item 1, s1 might see that data before s0 or s2. 26 | 27 | It is also possible to chain subscribers together to create a pipeline processing arrangement. For example 28 | 29 | ``` 30 | pulsar::publisher< long > p; 31 | pulsar::subscriber< long >& s0 = p.subscribe(); 32 | pulsar::subscriber< long >& s1 = s0.subscribe(); 33 | pulsar::subscriber< long >& s2 = s1.subscribe(); 34 | ``` 35 | 36 | This example differs from the first as data is guaranteed to be processed in sequence. If p publishes item 1 it is guaranteed to be received by subscribers s0, s1 and s2 in that order. 37 | 38 | To start receiving data on the subscriber side you supply a callable object. The subscriber will invoke the supplied callable everytime an item of data arrives. When you are done subscribing the callable should return false. Here is a simple example using a lambda. In this case subscribe would return after the first invocation of the lambda 39 | 40 | ``` 41 | s.subscribe( []( const long& e ) { 42 | return false; 43 | } ); 44 | ``` 45 | 46 | The following code demonstrates how we might publish 100 data items. 47 | 48 | ``` 49 | for( size_t i=0; i<100; ) { 50 | p.publish( 1, [&]( long& e ){ 51 | e = i++; 52 | } ); 53 | } 54 | ``` 55 | 56 | The publish() method expects 2 arguments. Argument 1 is the number of data items we want to publish. Argument 2 is a callable object used to populate each data item. If will be called as many times as specified in argument 1. 57 | 58 | **Performance** 59 | 60 | The following table compares the performance of pulsar against 61 | 62 | * LMAX disruptor 3.3.2 (jdk 8u31) https://github.com/LMAX-Exchange/disruptor 63 | * disruptor-- (most popular C++ disruptor implementation on github) https://github.com/fsaintjacques/disruptor-- 64 | * A thread safe std queue implementation using condition variables for synchronization 65 | 66 | The test case for each is to pass 100 billion items of data (longs) between 2 threads and measure the time taken to calculate the rate at which data is passed (operations per second) 67 | 68 | All tests were executed on a Intel i5 4590 3.30GHz CPU 69 | 70 | The source code for the pulsar test can be found here - https://github.com/mmcilroy/pulsar_cpp/blob/master/src/one_to_one_performance_test.cpp 71 | 72 | tech | ops/s 73 | ---------------|------------ 74 | pulsar | 291,923,762 75 | disruptor java | 186,572,620 76 | disruptor-- | 7,328,783 77 | std queue | 8,244,023 78 | 79 | **Building** 80 | 81 | --------------------------------------------------------------------------------