├── .gitignore ├── test ├── TastyDiscover.hs ├── Main.hs └── DisruptorTest.hs ├── CHANGELOG.md ├── src ├── Disruptor │ ├── MP.hs │ ├── SP.hs │ ├── MP │ │ ├── Producer.hs │ │ ├── Consumer.hs │ │ └── RingBuffer.hs │ ├── SP │ │ ├── Unboxed │ │ │ ├── Producer.hs │ │ │ ├── Consumer.hs │ │ │ └── RingBuffer.hs │ │ ├── Producer.hs │ │ ├── Consumer.hs │ │ └── RingBuffer.hs │ ├── SequenceNumber.hs │ └── AtomicCounterPadded.hs └── Disruptor.hs ├── cabal.project ├── bench ├── MP │ ├── Chan.hs │ ├── LockFreeQueue.hs │ ├── TBQueue.hs │ ├── UnagiChan.hs │ ├── FastLogger.hs │ ├── FastLogger10.hs │ ├── Disruptor.hs │ ├── Logging10.hs │ └── Logging.hs ├── SP │ ├── Chan.hs │ ├── ChaseLevDeque.hs │ ├── LockFreeQueue.hs │ ├── TBQueue.hs │ ├── UnagiChan.hs │ ├── RingBuffer.hs │ ├── Disruptor.hs │ └── DisruptorUnboxed.hs ├── SingleOps.hs └── Common.hs ├── .stylish-haskell.yaml ├── LICENSE ├── app ├── ReadmeDisruptorExample.hs └── ReadmePipelineExample.hs ├── benchmark.sh ├── pipelined-state-machines.cabal └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Haskell 2 | dist-newstyle/ 3 | cabal.project.local* 4 | bench-* 5 | bench.* 6 | # Emacs 7 | .\#* 8 | -------------------------------------------------------------------------------- /test/TastyDiscover.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF tasty-discover -optF --generated-module=TastyDiscover #-} 2 | 3 | module TastyDiscover where 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Revision history for pipelined-state-machines 2 | 3 | ## 0.0.0 -- 2021-11-02 4 | 5 | * First version. Released on an unsuspecting world. 6 | -------------------------------------------------------------------------------- /src/Disruptor/MP.hs: -------------------------------------------------------------------------------- 1 | module Disruptor.MP (module X) where 2 | 3 | import Disruptor.MP.Consumer as X 4 | import Disruptor.MP.Producer as X 5 | import Disruptor.MP.RingBuffer as X 6 | import Disruptor.SequenceNumber as X 7 | -------------------------------------------------------------------------------- /src/Disruptor/SP.hs: -------------------------------------------------------------------------------- 1 | module Disruptor.SP (module X) where 2 | 3 | import Disruptor.SP.Consumer as X 4 | import Disruptor.SP.Producer as X 5 | import Disruptor.SP.RingBuffer as X 6 | import Disruptor.SequenceNumber as X 7 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: . 2 | 3 | with-compiler: ghc-9.0.2 4 | 5 | reject-unconstrained-dependencies: all 6 | 7 | constraints: QuickCheck +old-random 8 | 9 | package pipelined-state-machines 10 | 11 | allow-older: * 12 | allow-newer: * 13 | -------------------------------------------------------------------------------- /test/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import qualified TastyDiscover 4 | import Test.Tasty 5 | 6 | ------------------------------------------------------------------------ 7 | 8 | main :: IO () 9 | main = defaultMain =<< TastyDiscover.tests 10 | -------------------------------------------------------------------------------- /bench/MP/Chan.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent.Chan 4 | import Control.Concurrent.MVar (putMVar) 5 | import Control.Monad (replicateM_) 6 | 7 | import Common 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = mpsc newChan producer consumer 13 | where 14 | producer i = replicateM_ iTERATIONS (writeChan i vALUE_TO_WRITE) 15 | 16 | consumer o consumerFinished = do 17 | replicateM_ iTERATIONS (readChan o) 18 | putMVar consumerFinished () 19 | -------------------------------------------------------------------------------- /bench/SP/Chan.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent.Chan 4 | import Control.Concurrent.MVar (putMVar) 5 | import Control.Monad (replicateM_) 6 | 7 | import Common 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = spsc newChan producer consumer 13 | where 14 | producer i = replicateM_ iTERATIONS (writeChan i vALUE_TO_WRITE) 15 | 16 | consumer o consumerFinished = do 17 | replicateM_ iTERATIONS (readChan o) 18 | putMVar consumerFinished () 19 | -------------------------------------------------------------------------------- /src/Disruptor.hs: -------------------------------------------------------------------------------- 1 | module Disruptor where 2 | 3 | -- * Single-producer 4 | 5 | import Disruptor.SP.RingBuffer 6 | import Disruptor.SP.Producer 7 | import Disruptor.SP.Consumer 8 | 9 | -- * Single-producer unboxed 10 | 11 | import Disruptor.SP.Unboxed.RingBuffer 12 | import Disruptor.SP.Unboxed.Producer 13 | import Disruptor.SP.Unboxed.Consumer 14 | 15 | -- * Multiple-producers 16 | 17 | import Disruptor.MP.RingBuffer 18 | import Disruptor.MP.Producer 19 | import Disruptor.MP.Consumer 20 | 21 | -- * Common 22 | 23 | import Disruptor.SequenceNumber 24 | -------------------------------------------------------------------------------- /bench/MP/LockFreeQueue.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Monad 4 | import Data.Concurrent.Queue.MichaelScott 5 | import Control.Concurrent.MVar 6 | 7 | import Common 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = mpsc setup producer consumer 13 | where 14 | setup = newQ 15 | 16 | producer q = replicateM_ iTERATIONS (pushL q vALUE_TO_WRITE) 17 | 18 | consumer q consumerFinished = do 19 | replicateM_ iTERATIONS $ do 20 | _i <- tryPopR q 21 | return () 22 | putMVar consumerFinished () 23 | -------------------------------------------------------------------------------- /bench/SP/ChaseLevDeque.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Monad 4 | import Data.Concurrent.Deque.ChaseLev 5 | import Control.Concurrent.MVar 6 | 7 | import Common 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = spsc setup producer consumer 13 | where 14 | setup = newQ 15 | 16 | producer q = replicateM_ iTERATIONS (pushL q vALUE_TO_WRITE) 17 | 18 | consumer q consumerFinished = do 19 | replicateM_ iTERATIONS $ do 20 | _i <- tryPopR q 21 | return () 22 | putMVar consumerFinished () 23 | -------------------------------------------------------------------------------- /bench/SP/LockFreeQueue.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Monad 4 | import Data.Concurrent.Queue.MichaelScott 5 | import Control.Concurrent.MVar 6 | 7 | import Common 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = spsc setup producer consumer 13 | where 14 | setup = newQ 15 | 16 | producer q = replicateM_ iTERATIONS (pushL q vALUE_TO_WRITE) 17 | 18 | consumer q consumerFinished = do 19 | replicateM_ iTERATIONS $ do 20 | _i <- tryPopR q 21 | return () 22 | putMVar consumerFinished () 23 | -------------------------------------------------------------------------------- /bench/MP/TBQueue.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Monad 4 | import Control.Concurrent.STM 5 | import Control.Concurrent.MVar 6 | 7 | import Common 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = mpsc setup producer consumer 13 | where 14 | setup = newTBQueueIO (fromIntegral bUFFER_CAPACITY) 15 | 16 | producer q = replicateM_ iTERATIONS 17 | (atomically (writeTBQueue q vALUE_TO_WRITE)) 18 | 19 | consumer q consumerFinished = do 20 | replicateM_ iTERATIONS (atomically (readTBQueue q)) 21 | putMVar consumerFinished () 22 | -------------------------------------------------------------------------------- /bench/SP/TBQueue.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Monad 4 | import Control.Concurrent.STM 5 | import Control.Concurrent.MVar 6 | 7 | import Common 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = spsc setup producer consumer 13 | where 14 | setup = newTBQueueIO (fromIntegral bUFFER_CAPACITY) 15 | 16 | producer q = replicateM_ iTERATIONS 17 | (atomically (writeTBQueue q vALUE_TO_WRITE)) 18 | 19 | consumer q consumerFinished = do 20 | replicateM_ iTERATIONS (atomically (readTBQueue q)) 21 | putMVar consumerFinished () 22 | -------------------------------------------------------------------------------- /bench/SP/UnagiChan.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent (threadDelay) 4 | import Control.Concurrent.Chan.Unagi.NoBlocking.Unboxed 5 | import Control.Concurrent.MVar (putMVar) 6 | import Control.Monad (replicateM_) 7 | 8 | import Common 9 | 10 | ------------------------------------------------------------------------ 11 | 12 | main :: IO () 13 | main = spsc newChan (producer . fst) (\(_i, o) -> consumer o) 14 | where 15 | producer i = replicateM_ iTERATIONS (writeChan i vALUE_TO_WRITE) 16 | 17 | consumer o consumerFinished = do 18 | replicateM_ iTERATIONS (readChan (threadDelay sLEEP_TIME) o) 19 | putMVar consumerFinished () 20 | -------------------------------------------------------------------------------- /bench/MP/UnagiChan.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent (putMVar, threadDelay) 4 | import Control.Concurrent.Chan.Unagi.NoBlocking.Unboxed 5 | import Control.Monad (replicateM_) 6 | 7 | import Common 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = mpsc newChan (producer . fst) (\(_i, o) -> consumer o) 13 | where 14 | producer i = replicateM_ iTERATIONS (writeChan i vALUE_TO_WRITE) 15 | 16 | consumer o consumerFinished = do 17 | replicateM_ (iTERATIONS * nUMBER_OF_PRODUCERS) 18 | (readChan (threadDelay sLEEP_TIME) o) 19 | putMVar consumerFinished () 20 | -------------------------------------------------------------------------------- /.stylish-haskell.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - imports: 3 | align: none 4 | list_align: with_module_name 5 | pad_module_names: false 6 | long_list_align: new_line_multiline 7 | empty_list_align: inherit 8 | list_padding: 7 # length "import " 9 | separate_lists: false 10 | space_surround: false 11 | - language_pragmas: 12 | style: vertical 13 | align: false 14 | remove_redundant: true 15 | - simple_align: 16 | cases: false 17 | top_level_patterns: false 18 | records: false 19 | - trailing_whitespace: {} 20 | 21 | # You need to put any language extensions that's enabled for the entire project 22 | # here. 23 | language_extensions: [] 24 | 25 | columns: 72 26 | -------------------------------------------------------------------------------- /bench/SP/RingBuffer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | 3 | module Main where 4 | 5 | import Control.Concurrent.MVar 6 | import Control.Monad 7 | import Data.RingBuffer 8 | import Data.Vector (Vector) 9 | 10 | import Common 11 | 12 | ------------------------------------------------------------------------ 13 | 14 | main :: IO () 15 | main = spsc setup producer consumer 16 | where 17 | setup :: IO (RingBuffer Vector Int) -- NOTE: `IOVector` doesn't work?! 18 | setup = new bUFFER_CAPACITY 19 | 20 | producer rb = replicateM_ iTERATIONS (append vALUE_TO_WRITE rb) 21 | 22 | consumer rb consumerFinished = do 23 | replicateM_ iTERATIONS $ do 24 | _i <- latest rb 1 25 | return () 26 | putMVar consumerFinished () 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Symbiont Inc, Stevan Andjelkovic 2022-2023. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a 6 | copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included 14 | in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /bench/MP/FastLogger.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent 4 | import Control.Monad (when, replicateM_) 5 | import System.Log.FastLogger 6 | import Data.IORef 7 | 8 | import Common 9 | 10 | ------------------------------------------------------------------------ 11 | 12 | lOG_MESSAGE :: LogStr 13 | lOG_MESSAGE = toLogStr "some random string to log" 14 | 15 | lOG_FILE :: FilePath 16 | lOG_FILE = "/tmp/pipelined-state-machines-bench-mp-fast-logger.log" 17 | 18 | main :: IO () 19 | main = do 20 | producersFinished <- newIORef 0 21 | mpsc setup (producer producersFinished) (consumer producersFinished) 22 | where 23 | setup = do 24 | cleanup lOG_FILE 25 | newFileLoggerSet defaultBufSize lOG_FILE 26 | 27 | producer producersFinished lgrset = do 28 | replicateM_ iTERATIONS (pushLogStrLn lgrset lOG_MESSAGE) 29 | flushLogStr lgrset 30 | atomicModifyIORef' producersFinished (\n -> (n + 1, ())) 31 | 32 | consumer producersFinished _lgrset consumerFinished = go 33 | where 34 | go = do 35 | p <- readIORef producersFinished 36 | if p == nUMBER_OF_PRODUCERS 37 | then putMVar consumerFinished () 38 | else do 39 | threadDelay 10000 40 | go 41 | -------------------------------------------------------------------------------- /src/Disruptor/MP/Producer.hs: -------------------------------------------------------------------------------- 1 | module Disruptor.MP.Producer where 2 | 3 | import Control.Concurrent.Async 4 | 5 | import Disruptor.MP.RingBuffer 6 | import Disruptor.SequenceNumber 7 | 8 | ------------------------------------------------------------------------ 9 | 10 | data EventProducer s = EventProducer 11 | { epWorker :: s -> IO s 12 | , epInitialState :: s 13 | } 14 | 15 | newEventProducer :: RingBuffer e -> (s -> IO (e, s)) -> (s -> IO ()) -> s 16 | -> IO (EventProducer s) 17 | newEventProducer rb p backPressure s0 = do 18 | let go s = {-# SCC go #-} do 19 | mSnr <- tryNext rb 20 | case mSnr of 21 | None -> do 22 | {-# SCC backPresure #-} backPressure s 23 | go s 24 | Some snr -> do 25 | (e, s') <- {-# SCC p #-} p s 26 | set rb snr e 27 | publish rb snr 28 | go s' 29 | 30 | return (EventProducer go s0) 31 | 32 | withEventProducer :: EventProducer s -> (Async s -> IO a) -> IO a 33 | withEventProducer ep k = withAsync (epWorker ep (epInitialState ep)) $ \a -> do 34 | link a 35 | k a 36 | 37 | withEventProducerOn :: Int -> EventProducer s -> (Async s -> IO a) -> IO a 38 | withEventProducerOn capability ep k = 39 | withAsyncOn capability (epWorker ep (epInitialState ep)) $ \a -> do 40 | link a 41 | k a 42 | -------------------------------------------------------------------------------- /src/Disruptor/SP/Unboxed/Producer.hs: -------------------------------------------------------------------------------- 1 | module Disruptor.SP.Unboxed.Producer where 2 | 3 | import Control.Concurrent.Async 4 | import Data.Vector.Unboxed.Mutable (Unbox) 5 | 6 | import Disruptor.SP.Unboxed.RingBuffer 7 | import Disruptor.SequenceNumber 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | data EventProducer s = EventProducer 12 | { epWorker :: s -> IO s 13 | , epInitialState :: s 14 | } 15 | 16 | newEventProducer :: Unbox e => RingBuffer e -> (s -> IO (e, s)) -> (s -> IO ()) -> s 17 | -> IO (EventProducer s) 18 | newEventProducer rb p backPressure s0 = do 19 | let go s = {-# SCC go #-} do 20 | mSnr <- tryNext rb 21 | case mSnr of 22 | None -> do 23 | {-# SCC backPresure #-} backPressure s 24 | go s 25 | Some snr -> do 26 | (e, s') <- {-# SCC p #-} p s 27 | set rb snr e 28 | publish rb snr 29 | go s' 30 | 31 | return (EventProducer go s0) 32 | 33 | withEventProducer :: EventProducer s -> (Async s -> IO a) -> IO a 34 | withEventProducer ep k = withAsync (epWorker ep (epInitialState ep)) $ \a -> do 35 | link a 36 | k a 37 | 38 | withEventProducerOn :: Int -> EventProducer s -> (Async s -> IO a) -> IO a 39 | withEventProducerOn capability ep k = 40 | withAsyncOn capability (epWorker ep (epInitialState ep)) $ \a -> do 41 | link a 42 | k a 43 | -------------------------------------------------------------------------------- /bench/MP/FastLogger10.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE NumericUnderscores #-} 2 | 3 | module Main where 4 | 5 | import Control.Concurrent 6 | import Control.Monad (when, replicateM_) 7 | import System.Log.FastLogger 8 | import Data.IORef 9 | 10 | import Common hiding (nUMBER_OF_PRODUCERS, iTERATIONS) 11 | 12 | ------------------------------------------------------------------------ 13 | 14 | iTERATIONS = 1_000_000 15 | 16 | nUMBER_OF_PRODUCERS :: Int 17 | nUMBER_OF_PRODUCERS = 10 18 | 19 | lOG_MESSAGE :: LogStr 20 | lOG_MESSAGE = toLogStr "some random string to log" 21 | 22 | lOG_FILE :: FilePath 23 | lOG_FILE = "/tmp/pipelined-state-machines-bench-mp-fast-logger10.log" 24 | 25 | main :: IO () 26 | main = do 27 | producersFinished <- newIORef 0 28 | m10psc setup (producer producersFinished) (consumer producersFinished) 29 | where 30 | setup = do 31 | cleanup lOG_FILE 32 | newFileLoggerSet defaultBufSize lOG_FILE 33 | 34 | producer producersFinished lgrset = do 35 | replicateM_ iTERATIONS (pushLogStrLn lgrset lOG_MESSAGE) 36 | flushLogStr lgrset 37 | atomicModifyIORef' producersFinished (\n -> (n + 1, ())) 38 | 39 | consumer producersFinished _lgrset consumerFinished = go 40 | where 41 | go = do 42 | p <- readIORef producersFinished 43 | if p == nUMBER_OF_PRODUCERS 44 | then putMVar consumerFinished () 45 | else do 46 | threadDelay 10000 47 | go 48 | -------------------------------------------------------------------------------- /bench/MP/Disruptor.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent 4 | import Control.Monad 5 | 6 | import Common 7 | import Disruptor.MP.Consumer 8 | import Disruptor.MP.Producer 9 | import Disruptor.MP.RingBuffer 10 | import Disruptor.SequenceNumber 11 | 12 | ------------------------------------------------------------------------ 13 | 14 | main :: IO () 15 | main = mpsc setup producer consumer 16 | where 17 | setup = newRingBuffer bUFFER_CAPACITY 18 | 19 | producer :: RingBuffer Int -> IO () 20 | producer rb = go iTERATIONS 21 | where 22 | go :: Int -> IO () 23 | go 0 = return () 24 | go n = do 25 | mSnr <- tryNext rb 26 | case mSnr of 27 | Some snr -> do 28 | set rb snr vALUE_TO_WRITE 29 | publish rb snr 30 | go (n - 1) 31 | None -> 32 | -- NOTE: No sleep needed here. 33 | go n 34 | 35 | consumer :: RingBuffer Int -> MVar () -> IO () 36 | consumer rb consumerFinished = do 37 | let handler _s _n snr endOfBatch = do 38 | when (endOfBatch && 39 | getSequenceNumber snr == 40 | fromIntegral (iTERATIONS * nUMBER_OF_PRODUCERS - 1)) $ 41 | putMVar consumerFinished () 42 | return () 43 | ec <- newEventConsumer rb handler () [] (Sleep sLEEP_TIME) 44 | setGatingSequences rb [ecSequenceNumber ec] 45 | ecWorker ec () 46 | -------------------------------------------------------------------------------- /bench/SP/Disruptor.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent.MVar (MVar, putMVar) 4 | import Control.Monad (when) 5 | 6 | import Common 7 | import Disruptor.SP 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | main :: IO () 12 | main = spsc setup producer consumer 13 | where 14 | setup :: IO (RingBuffer Int) 15 | setup = newRingBuffer bUFFER_CAPACITY 16 | 17 | producer :: RingBuffer Int -> IO () 18 | producer rb = go iTERATIONS 19 | where 20 | go :: Int -> IO () 21 | go 0 = return () 22 | go n = do 23 | mSnr <- tryNext rb 24 | case mSnr of 25 | Some snr -> do 26 | -- NOTE: Measuring transactions, which is useful for calculating 27 | -- latency, seriously slows down the benchmark. 28 | 29 | -- {-# SCC "transactions+1" #-} incrCounter_ 1 transactions 30 | set rb snr vALUE_TO_WRITE 31 | publish rb snr 32 | go (n - 1) 33 | None -> do 34 | -- yield 35 | go n 36 | 37 | consumer :: RingBuffer Int -> MVar () -> IO () 38 | consumer rb consumerFinished = do 39 | let handler _s _n snr endOfBatch = do 40 | -- t' <- {-# SCC "transactions-1" #-} decrCounter 1 transactions 41 | -- measureInt_ t' histo 42 | when (endOfBatch && getSequenceNumber snr == fromIntegral (iTERATIONS - 1)) $ 43 | putMVar consumerFinished () 44 | return () 45 | ec <- newEventConsumer rb handler () [] (Sleep sLEEP_TIME) 46 | setGatingSequences rb [ecSequenceNumber ec] 47 | ecWorker ec () 48 | -------------------------------------------------------------------------------- /src/Disruptor/SP/Producer.hs: -------------------------------------------------------------------------------- 1 | module Disruptor.SP.Producer where 2 | 3 | import Control.Concurrent.Async 4 | 5 | import Disruptor.SP.RingBuffer 6 | import Disruptor.SequenceNumber 7 | 8 | ------------------------------------------------------------------------ 9 | 10 | data EventProducer s = EventProducer 11 | { epWorker :: s -> IO s 12 | , epInitialState :: s 13 | } 14 | 15 | newEventProducer :: RingBuffer e -> (s -> IO (e, s)) -> (s -> IO ()) -> s 16 | -> IO (EventProducer s) 17 | newEventProducer rb p backPressure s0 = do 18 | let go s = {-# SCC go #-} do 19 | mSnr <- tryNext rb 20 | case mSnr of 21 | None -> do 22 | {-# SCC backPresure #-} backPressure s 23 | go s 24 | Some snr -> do 25 | (e, s') <- {-# SCC p #-} p s 26 | set rb snr e 27 | publish rb snr 28 | go s' 29 | 30 | return (EventProducer go s0) 31 | 32 | -- XXX: 2x slower than above... 33 | newEventProducer' :: RingBuffer e -> (s -> IO (e, s)) -> (s -> IO ()) -> s 34 | -> IO (EventProducer s) 35 | newEventProducer' rb p backPressure s0 = do 36 | let go s = {-# SCC go #-} do 37 | snr <- next rb 38 | (e, s') <- {-# SCC p #-} p s 39 | set rb snr e 40 | publish rb snr 41 | go s' 42 | 43 | return (EventProducer go s0) 44 | 45 | withEventProducer :: EventProducer s -> (Async s -> IO a) -> IO a 46 | withEventProducer ep k = withAsync (epWorker ep (epInitialState ep)) $ \a -> do 47 | link a 48 | k a 49 | 50 | withEventProducerOn :: Int -> EventProducer s -> (Async s -> IO a) -> IO a 51 | withEventProducerOn capability ep k = 52 | withAsyncOn capability (epWorker ep (epInitialState ep)) $ \a -> do 53 | link a 54 | k a 55 | -------------------------------------------------------------------------------- /bench/SP/DisruptorUnboxed.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent.MVar 4 | import Control.Monad 5 | 6 | import Disruptor.SP.Unboxed.Consumer 7 | import Disruptor.SP.Unboxed.Producer 8 | import Disruptor.SP.Unboxed.RingBuffer 9 | import Disruptor.SequenceNumber 10 | import Common 11 | 12 | ------------------------------------------------------------------------ 13 | 14 | main :: IO () 15 | main = spsc setup producer consumer 16 | where 17 | setup :: IO (RingBuffer Int) 18 | setup = newRingBuffer bUFFER_CAPACITY 19 | 20 | producer :: RingBuffer Int -> IO () 21 | producer rb = go iTERATIONS 22 | where 23 | go :: Int -> IO () 24 | go 0 = return () 25 | go n = do 26 | mSnr <- tryNext rb 27 | case mSnr of 28 | Some snr -> do 29 | -- NOTE: Measuring transactions, which is useful for calculating 30 | -- latency, seriously slows down the benchmark. 31 | 32 | -- {-# SCC "transactions+1" #-} incrCounter_ 1 transactions 33 | set rb snr vALUE_TO_WRITE 34 | publish rb snr 35 | go (n - 1) 36 | None -> do 37 | -- threadDelay sLEEP_TIME 38 | go n 39 | 40 | consumer :: RingBuffer Int -> MVar () -> IO () 41 | consumer rb consumerFinished = do 42 | let handler _s _n snr endOfBatch = do 43 | -- t' <- {-# SCC "transactions-1" #-} decrCounter 1 transactions 44 | -- measureInt_ t' histo 45 | when (endOfBatch && getSequenceNumber snr == fromIntegral (iTERATIONS - 1)) $ 46 | putMVar consumerFinished () 47 | return () 48 | ec <- newEventConsumer rb handler () [] (Sleep sLEEP_TIME) 49 | setGatingSequences rb [ecSequenceNumber ec] 50 | ecWorker ec () 51 | -------------------------------------------------------------------------------- /src/Disruptor/SequenceNumber.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingStrategies #-} 2 | {-# LANGUAGE DataKinds #-} 3 | {-# LANGUAGE UnboxedTuples #-} 4 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 5 | 6 | module Disruptor.SequenceNumber where 7 | 8 | import Data.Bits (countLeadingZeros, finiteBitSize, unsafeShiftR, (.&.)) 9 | import Data.Int (Int64) 10 | 11 | ------------------------------------------------------------------------ 12 | 13 | newtype SequenceNumber = SequenceNumber { getSequenceNumber :: Int64 } 14 | deriving newtype (Num, Eq, Ord, Real, Enum, Integral, Show, Bounded) 15 | -- ^ NOTE: `(maxBound :: Int64) == 9223372036854775807` so if we write 10M events 16 | -- per second (`10_000_000*60*60*24*365 == 315360000000000) then it would take 17 | -- us `9223372036854775807 / 315360000000000 == 29247.1208677536` years before 18 | -- we overflow. 19 | 20 | -- > quickCheck $ \(Positive i) j -> let capacity = 2^i in 21 | -- j `mod` capacity == j Data.Bits..&. (capacity - 1) 22 | index :: Int64 -> SequenceNumber -> Int 23 | index capacity (SequenceNumber i) = fromIntegral (i .&. indexMask) 24 | where 25 | indexMask = capacity - 1 26 | {-# INLINE index #-} 27 | 28 | availabilityFlag :: Int64 -> SequenceNumber -> Int 29 | availabilityFlag capacity (SequenceNumber i) = 30 | fromIntegral (i `unsafeShiftR` indexShift) 31 | where 32 | indexShift = logBase2 capacity 33 | {-# INLINE availabilityFlag #-} 34 | 35 | -- Taken from: 36 | -- https://hackage.haskell.org/package/base-4.15.0.0/docs/Data-Bits.html#v:countLeadingZeros 37 | logBase2 :: Int64 -> Int 38 | logBase2 i = finiteBitSize i - 1 - countLeadingZeros i 39 | {-# INLINE logBase2 #-} 40 | 41 | data MaybeSequenceNumber = None | Some {-# UNPACK #-} !SequenceNumber 42 | deriving (Eq, Show) 43 | -- ^ TODO: compare with: https://hackage.haskell.org/package/unpacked-maybe-text-0.1.0.0/docs/src/Data.Maybe.Unpacked.Text.Short.html#MaybeShortText 44 | -------------------------------------------------------------------------------- /app/ReadmeDisruptorExample.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Control.Concurrent 4 | import Control.Concurrent.Async 5 | import Disruptor.SP 6 | 7 | main :: IO () 8 | main = do 9 | 10 | -- Create the shared ring buffer. 11 | let bufferCapacity = 128 12 | rb <- newRingBuffer bufferCapacity 13 | 14 | -- The producer keeps a counter and produces events that are merely the pretty 15 | -- printed value as a string of that counter. 16 | let produce :: Int -> IO (String, Int) 17 | produce n = return (show n, n + 1) 18 | 19 | -- The counter starts at zero. 20 | initialProducerState = 0 21 | 22 | -- No back-pressure is applied in this example. 23 | backPressure :: Int -> IO () 24 | backPressure _ = return () 25 | 26 | producer <- newEventProducer rb produce backPressure initialProducerState 27 | 28 | -- The consumer merely prints the string event to the terminal. 29 | let consume :: () -> String -> SequenceNumber -> EndOfBatch -> IO () 30 | consume () event snr endOfBatch = 31 | putStrLn (event ++ if endOfBatch then " (end of batch)" else "") 32 | 33 | -- The consumer doesn't need any state in this example. 34 | initialConsumerState = () 35 | 36 | -- Which other consumers do we need to wait for before consuming an event? 37 | dependencies = [] 38 | 39 | -- What to do in case there are no events to consume? 40 | waitStrategy = Sleep 1 41 | 42 | consumer <- newEventConsumer rb consume initialConsumerState dependencies waitStrategy 43 | 44 | -- Tell the ring buffer which the last consumer is, to avoid overwriting 45 | -- events that haven't been consumed yet. 46 | setGatingSequences rb [ecSequenceNumber consumer] 47 | 48 | withEventProducer producer $ \ap -> 49 | withEventConsumer consumer $ \ac -> do 50 | threadDelay (3 * 1000 * 1000) -- 3 sec 51 | cancel ap 52 | cancel ac 53 | -------------------------------------------------------------------------------- /src/Disruptor/SP/Unboxed/Consumer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ExistentialQuantification #-} -- XXX 2 | 3 | module Disruptor.SP.Unboxed.Consumer where 4 | 5 | import Control.Concurrent.Async 6 | import Control.Concurrent 7 | import Control.Concurrent.STM -- XXX 8 | import Data.IORef 9 | import Data.Vector.Unboxed (Unbox) 10 | 11 | import Disruptor.SequenceNumber 12 | import Disruptor.SP.Unboxed.RingBuffer 13 | 14 | ------------------------------------------------------------------------ 15 | 16 | data EventConsumer s = EventConsumer 17 | { ecSequenceNumber :: {-# UNPACK #-} !(IORef SequenceNumber) 18 | , ecWorker :: s -> IO s 19 | , ecInitialState :: s 20 | } 21 | 22 | -- NOTE: The `SequenceNumber` can be used for sharding, e.g. one handler handles 23 | -- even and another handles odd numbers. 24 | type EventHandler s e = s -> e -> SequenceNumber -> EndOfBatch -> IO s 25 | type EndOfBatch = Bool 26 | 27 | data SequenceBarrier e 28 | = RingBufferBarrier (RingBuffer e) 29 | | forall s. EventConsumerBarrier (EventConsumer s) 30 | 31 | data WaitStrategy = Sleep Int 32 | 33 | withEventConsumer :: EventConsumer s -> (Async s -> IO a) -> IO a 34 | withEventConsumer ec k = withAsync (ecWorker ec (ecInitialState ec)) $ \a -> do 35 | link a 36 | k a 37 | 38 | withEventConsumerOn :: Int -> EventConsumer s -> (Async s -> IO a) -> IO a 39 | withEventConsumerOn capability ec k = 40 | withAsyncOn capability (ecWorker ec (ecInitialState ec)) $ \a -> do 41 | link a 42 | k a 43 | 44 | newEventConsumer :: Unbox e => RingBuffer e -> EventHandler s e -> s -> [SequenceBarrier e] 45 | -> WaitStrategy -> IO (EventConsumer s) 46 | newEventConsumer rb handler s0 _barriers (Sleep n) = do 47 | snrRef <- newIORef (-1) 48 | 49 | let go s = {-# SCC go #-} do 50 | mySnr <- readIORef snrRef 51 | bSnr <- waitFor mySnr rb -- XXX: barriers 52 | -- XXX: what if handler throws exception? https://youtu.be/eTeWxZvlCZ8?t=2271 53 | s' <- {-# SCC go' #-} go' (mySnr + 1) bSnr s 54 | writeIORef snrRef bSnr 55 | go s' 56 | where 57 | go' lo hi s | lo > hi = return s 58 | | lo <= hi = do 59 | e <- unsafeGet rb lo 60 | s' <- {-# SCC handler #-} handler s e lo (lo == hi) 61 | go' (lo + 1) hi s' 62 | 63 | return (EventConsumer snrRef go s0) 64 | 65 | waitFor :: SequenceNumber -> RingBuffer e -> IO SequenceNumber 66 | waitFor consumed rb = go 67 | where 68 | go = do 69 | produced <- readIORef (rbCursor rb) 70 | if consumed < produced 71 | then return produced 72 | else do 73 | -- NOTE: Removing the sleep seems to cause non-termination... XXX: Why 74 | -- though? the consumer should be running on its own thread? 75 | threadDelay 1 76 | go -- SPIN 77 | -- ^ XXX: waitStrategy should be passed in and acted on here. 78 | -- 79 | -- XXX: Other wait strategies could be implemented here, e.g. we could 80 | -- try to recurse immediately here, and if there's no work after a 81 | -- couple of tries go into a takeMTVar sleep waiting for a producer to 82 | -- wake us up. 83 | {-# INLINE waitFor #-} 84 | -------------------------------------------------------------------------------- /bench/SingleOps.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE NumericUnderscores #-} 2 | 3 | module Main where 4 | 5 | import Control.Monad 6 | import Control.Concurrent 7 | import Control.Concurrent.Async 8 | import Data.Time 9 | import Data.Word 10 | import Data.Atomics.Counter 11 | import Data.IORef 12 | import System.CPUTime 13 | 14 | import StuntDouble.Histogram 15 | import qualified Disruptor.AtomicCounterPadded as Padded 16 | 17 | -- Can't load FastMutInt: 18 | -- Could not load module `FastMutInt' 19 | -- It is a member of the hidden package `ghc-8.10.4'. 20 | -- Perhaps you need to add `ghc' to the build-depends in your .cabal file. 21 | -- 22 | -- But when I add ghc to build-depends it says it can't find a build plan, and 23 | -- also https://hackage.haskell.org/package/ghc doesn't seem to have 8.10.4... 24 | -- 25 | -- import FastMutInt 26 | 27 | ------------------------------------------------------------------------ 28 | 29 | main :: IO () 30 | main = do 31 | many "getCurrentTime" (return ()) (const getCurrentTime) 32 | 33 | many "getCPUTime" (return ()) (const getCPUTime) 34 | 35 | many "threadDelay 0" (return ()) (const (threadDelay 0)) 36 | many "threadDelay 1" (return ()) (const (threadDelay 1)) 37 | many "yield" (return ()) (const yield) 38 | manyConcurrent "threadDelay 0 (concurrent)" 3 (return ()) (const (threadDelay 0)) 39 | manyConcurrent "threadDelay 1 (concurrent)" 3 (return ()) (const (threadDelay 1)) 40 | manyConcurrent "yield (concurrent)" 3 (return ()) (const yield) 41 | 42 | many "incrCounter1" (newCounter 0) (incrCounter 1) 43 | many "incrCounter1Padded" (Padded.newCounter 0) (Padded.incrCounter 1) 44 | -- many "incrCounter1FastMutInt" (newFastMutInt 0) 45 | -- (\r -> readFastMutInt r >>= \v -> writeFastMutInt r (v +1)) 46 | 47 | manyConcurrent "incrCounter1 (concurrent)" 48 | 3 (newCounter 0) (incrCounter 1) 49 | manyConcurrent "incrCounter1Padded (concurrent)" 50 | 3 (Padded.newCounter 0) (Padded.incrCounter 1) 51 | 52 | many "modifyIORef'" (newIORef (0 :: Int)) (\r -> modifyIORef' r succ) 53 | 54 | many "atomicModifyIORef'" 55 | (newIORef (0 :: Int)) (\r -> atomicModifyIORef' r (\n -> ((n + 1), ()))) 56 | 57 | manyConcurrent "atomicModifyIORef' (concurrent)" 3 58 | (newIORef (0 :: Int)) (\r -> atomicModifyIORef' r (\n -> ((n + 1), ()))) 59 | 60 | many :: String -> IO a -> (a -> IO b) -> IO () 61 | many name create use = do 62 | h <- newHistogram 63 | r <- create 64 | replicateM 500000 (once h (use r)) 65 | putStrLn "" 66 | putStrLn "" 67 | prettyPrintHistogram name h 68 | 69 | manyConcurrent :: String -> Int -> IO a -> (a -> IO b) -> IO () 70 | manyConcurrent name n create use = do 71 | h <- newHistogram 72 | r <- create 73 | as <- replicateM n (async (replicateM 500000 (once h (use r)))) 74 | mapM_ wait as 75 | putStrLn "" 76 | putStrLn "" 77 | prettyPrintHistogram name h 78 | 79 | once :: Histogram -> IO a -> IO () 80 | once h io = do 81 | start <- fromInteger <$> getCPUTime 82 | _ <- io 83 | end <- fromInteger <$> getCPUTime 84 | 85 | let diffPico :: Word64 86 | diffPico = end - start 87 | 88 | diffNano :: Double 89 | diffNano = realToFrac (fromIntegral diffPico) * 1e-3 90 | 91 | void (measure diffNano h) 92 | -------------------------------------------------------------------------------- /src/Disruptor/SP/Consumer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ExistentialQuantification #-} -- XXX 2 | 3 | module Disruptor.SP.Consumer where 4 | 5 | import Control.Concurrent.Async 6 | import Control.Concurrent 7 | import Control.Concurrent.STM -- XXX 8 | import Data.IORef 9 | 10 | import Disruptor.SequenceNumber 11 | import Disruptor.SP.RingBuffer 12 | 13 | ------------------------------------------------------------------------ 14 | 15 | data EventConsumer s = EventConsumer 16 | { ecSequenceNumber :: {-# UNPACK #-} !(IORef SequenceNumber) 17 | , ecWorker :: s -> IO s 18 | , ecInitialState :: s 19 | } 20 | 21 | -- NOTE: The `SequenceNumber` can be used for sharding, e.g. one handler handles 22 | -- even and another handles odd numbers. 23 | type EventHandler s e = s -> e -> SequenceNumber -> EndOfBatch -> IO s 24 | type EndOfBatch = Bool 25 | 26 | data SequenceBarrier e 27 | = RingBufferBarrier (RingBuffer e) 28 | | forall s. EventConsumerBarrier (EventConsumer s) 29 | 30 | data WaitStrategy = Sleep Int 31 | 32 | withEventConsumer :: EventConsumer s -> (Async s -> IO a) -> IO a 33 | withEventConsumer ec k = withAsync (ecWorker ec (ecInitialState ec)) $ \a -> do 34 | link a 35 | k a 36 | 37 | withEventConsumerOn :: Int -> EventConsumer s -> (Async s -> IO a) -> IO a 38 | withEventConsumerOn capability ec k = 39 | withAsyncOn capability (ecWorker ec (ecInitialState ec)) $ \a -> do 40 | link a 41 | k a 42 | 43 | newEventConsumer :: RingBuffer e -> EventHandler s e -> s -> [SequenceBarrier e] 44 | -> WaitStrategy -> IO (EventConsumer s) 45 | newEventConsumer rb handler s0 _barriers ws = do 46 | snrRef <- newIORef (-1) 47 | 48 | let go s = {-# SCC go #-} do 49 | mySnr <- readIORef snrRef 50 | bSnr <- waitFor mySnr rb ws -- XXX: barriers 51 | -- XXX: what if handler throws exception? https://youtu.be/eTeWxZvlCZ8?t=2271 52 | s' <- {-# SCC go' #-} go' (mySnr + 1) bSnr s 53 | writeIORef snrRef bSnr 54 | go s' 55 | where 56 | go' lo hi s | lo > hi = return s 57 | | lo <= hi = do 58 | e <- unsafeGet rb lo 59 | s' <- {-# SCC handler #-} handler s e lo (lo == hi) 60 | go' (lo + 1) hi s' 61 | 62 | return (EventConsumer snrRef go s0) 63 | 64 | waitFor :: SequenceNumber -> RingBuffer e -> WaitStrategy -> IO SequenceNumber 65 | waitFor consumed rb (Sleep n) = go 66 | where 67 | go = do 68 | produced <- readIORef (rbCursor rb) 69 | if consumed < produced 70 | then return produced 71 | else do 72 | threadDelay n -- NOTE: removing the sleep seems to cause 73 | -- non-termination... XXX: Why though? the consumer should be 74 | -- running on its own thread? 75 | go -- SPIN 76 | 77 | -- XXX: Other wait strategies could be implemented here, e.g. we could 78 | -- try to recurse immediately here, and if there's no work after a 79 | -- couple of tries go into a takeMTVar sleep waiting for a producer to 80 | -- wake us up. 81 | 82 | _getSequenceNumberRef :: SequenceBarrier e -> IORef SequenceNumber 83 | _getSequenceNumberRef (RingBufferBarrier rb) = rbCursor rb 84 | _getSequenceNumberRef (EventConsumerBarrier ec) = ecSequenceNumber ec 85 | {-# INLINE waitFor #-} 86 | -------------------------------------------------------------------------------- /bench/MP/Logging10.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE NumericUnderscores #-} 2 | 3 | module Main where 4 | 5 | import Control.Concurrent 6 | import Control.Monad (when) 7 | import Data.ByteString.Char8 (ByteString) 8 | import Data.ByteString.Builder 9 | import qualified Data.ByteString.Char8 as BS 10 | import System.IO 11 | 12 | 13 | import Disruptor.MP 14 | import Common hiding (nUMBER_OF_PRODUCERS, iTERATIONS) 15 | 16 | ------------------------------------------------------------------------ 17 | 18 | iTERATIONS = 1_000_000 19 | 20 | nUMBER_OF_PRODUCERS :: Int 21 | nUMBER_OF_PRODUCERS = 10 22 | 23 | lOG_MESSAGE :: ByteString 24 | lOG_MESSAGE = BS.pack "some random string to log\n" 25 | 26 | lOG_FILE :: FilePath 27 | lOG_FILE = "/tmp/pipelined-state-machines-bench-mp-logger10.log" 28 | 29 | data Buffer = Buffer 30 | { bufferCapacity :: !Int 31 | , bufferSize :: !Int 32 | , buffer :: !Builder 33 | } 34 | 35 | newBuffer :: Int -> Buffer 36 | newBuffer capacity = Buffer capacity 0 (byteString (BS.pack "")) 37 | 38 | hasCapacityBuffer :: ByteString -> Buffer -> Bool 39 | hasCapacityBuffer bs buf = bufferSize buf + BS.length bs <= bufferCapacity buf 40 | 41 | appendBuffer :: ByteString -> Buffer -> Buffer 42 | appendBuffer bs buf = 43 | buf { bufferSize = bufferSize buf + BS.length bs 44 | , buffer = buffer buf <> byteString bs 45 | } 46 | 47 | flushBuffer :: Buffer -> Handle -> IO Buffer 48 | flushBuffer buf h = do 49 | hPutBuilder h (buffer buf) 50 | return (newBuffer (bufferCapacity buf)) 51 | 52 | main :: IO () 53 | main = m10psc setup producer consumer 54 | where 55 | setup = do 56 | cleanup lOG_FILE 57 | newRingBuffer bUFFER_CAPACITY 58 | 59 | producer :: RingBuffer ByteString -> IO () 60 | producer rb = go iTERATIONS 61 | where 62 | go :: Int -> IO () 63 | go 0 = return () 64 | go n = do 65 | mSnr <- tryNext rb 66 | case mSnr of 67 | Some snr -> do 68 | set rb snr lOG_MESSAGE 69 | publish rb snr 70 | go (n - 1) 71 | None -> 72 | -- NOTE: No sleep needed here. 73 | go n 74 | 75 | consumer :: RingBuffer ByteString -> MVar () -> IO Buffer 76 | consumer rb consumerFinished = do 77 | h <- openFile lOG_FILE WriteMode 78 | hSetBinaryMode h True 79 | hSetBuffering h (BlockBuffering (Just 4096)) 80 | let handler buf bs snr endOfBatch = do 81 | buf' <- if hasCapacityBuffer bs buf -- XXX: we can probably do 82 | -- better here by only flushing 83 | -- at `endOfBatch`? 84 | then return (appendBuffer bs buf) 85 | else do 86 | buf' <- flushBuffer buf h 87 | return (appendBuffer bs buf') 88 | when (endOfBatch && 89 | getSequenceNumber snr == 90 | fromIntegral (iTERATIONS * nUMBER_OF_PRODUCERS - 1)) $ do 91 | flushBuffer buf' h 92 | hFlush h 93 | putMVar consumerFinished () 94 | return buf' 95 | ec <- newEventConsumer rb handler (newBuffer bUFFER_CAPACITY) [] (Sleep sLEEP_TIME) 96 | setGatingSequences rb [ecSequenceNumber ec] 97 | ecWorker ec (ecInitialState ec) 98 | -------------------------------------------------------------------------------- /src/Disruptor/MP/Consumer.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ExistentialQuantification #-} -- XXX 2 | 3 | module Disruptor.MP.Consumer where 4 | 5 | import Control.Concurrent.Async 6 | import Control.Concurrent 7 | import Control.Concurrent.STM -- XXX 8 | import Data.IORef 9 | 10 | import Disruptor.SequenceNumber 11 | import Disruptor.MP.RingBuffer 12 | 13 | ------------------------------------------------------------------------ 14 | 15 | data EventConsumer s = EventConsumer 16 | { ecSequenceNumber :: {-# UNPACK #-} !(IORef SequenceNumber) 17 | , ecWorker :: s -> IO s 18 | , ecInitialState :: s 19 | } 20 | 21 | -- NOTE: The `SequenceNumber` can be used for sharding, e.g. one handler handles 22 | -- even and another handles odd numbers. 23 | type EventHandler s e = s -> e -> SequenceNumber -> EndOfBatch -> IO s 24 | type EndOfBatch = Bool 25 | 26 | data SequenceBarrier e 27 | = RingBufferBarrier (RingBuffer e) 28 | | forall s. EventConsumerBarrier (EventConsumer s) 29 | 30 | data WaitStrategy = Sleep Int 31 | 32 | withEventConsumer :: EventConsumer s -> (Async s -> IO a) -> IO a 33 | withEventConsumer ec k = withAsync (ecWorker ec (ecInitialState ec)) $ \a -> do 34 | link a 35 | k a 36 | 37 | withEventConsumerOn :: Int -> EventConsumer s -> (Async s -> IO a) -> IO a 38 | withEventConsumerOn capability ec k = 39 | withAsyncOn capability (ecWorker ec (ecInitialState ec)) $ \a -> do 40 | link a 41 | k a 42 | 43 | newEventConsumer :: RingBuffer e -> EventHandler s e -> s -> [SequenceBarrier e] 44 | -> WaitStrategy -> IO (EventConsumer s) 45 | newEventConsumer rb handler s0 _barriers ws = do 46 | snrRef <- newIORef (-1) 47 | 48 | let go s = {-# SCC go #-} do 49 | mySnr <- readIORef snrRef 50 | bSnr <- waitFor mySnr rb ws -- XXX: barriers 51 | -- XXX: what if handler throws exception? https://youtu.be/eTeWxZvlCZ8?t=2271 52 | s' <- {-# SCC go' #-} go' (mySnr + 1) bSnr s 53 | writeIORef snrRef bSnr 54 | go s' 55 | where 56 | go' lo hi s | lo > hi = return s 57 | | lo <= hi = do 58 | e <- get rb lo 59 | s' <- {-# SCC handler #-} handler s e lo (lo == hi) 60 | go' (lo + 1) hi s' 61 | 62 | return (EventConsumer snrRef go s0) 63 | 64 | waitFor :: SequenceNumber -> RingBuffer e -> WaitStrategy -> IO SequenceNumber 65 | waitFor consumed rb (Sleep n) = go 66 | where 67 | go = do 68 | claimedSequence <- getCursor rb 69 | if consumed < claimedSequence 70 | -- `claimedSequence` may be much higher than the the capacity of the ring 71 | -- buffer, so we need to guarantee that every consumer makes batches that 72 | -- are at most `rbCapacity rb` big. 73 | then return (min claimedSequence (consumed + fromIntegral (rbCapacity rb))) 74 | else do 75 | -- NOTE: Removing the sleep seems to cause non-termination... XXX: Why 76 | -- though? the consumer should be running on its own thread? 77 | -- yield 78 | threadDelay n 79 | go -- SPIN 80 | -- 81 | -- XXX: Other wait strategies could be implemented here, e.g. we could 82 | -- try to recurse immediately here, and if there's no work after a 83 | -- couple of tries go into a takeMTVar sleep waiting for a producer to 84 | -- wake us up. 85 | {-# INLINE waitFor #-} 86 | -------------------------------------------------------------------------------- /bench/MP/Logging.hs: -------------------------------------------------------------------------------- 1 | -- This example is inspired by log4j's use of the disruptor pattern: 2 | -- 3 | -- https://logging.apache.org/log4j/2.x/manual/async.html 4 | -- 5 | -- The idea is that each producer corresponds to a concurrent thread that may 6 | -- log messages and a single consumer then syncs the log messages to disk. 7 | 8 | module Main where 9 | 10 | import Control.Concurrent 11 | import Control.Monad (when) 12 | import Data.ByteString.Char8 (ByteString) 13 | import Data.ByteString.Builder 14 | import qualified Data.ByteString.Char8 as BS 15 | import System.IO 16 | 17 | import Common 18 | import Disruptor.MP 19 | 20 | ------------------------------------------------------------------------ 21 | 22 | lOG_MESSAGE :: ByteString 23 | lOG_MESSAGE = BS.pack "some random string to log\n" 24 | 25 | lOG_FILE :: FilePath 26 | lOG_FILE = "/tmp/pipelined-state-machines-bench-mp-logger.log" 27 | 28 | data Buffer = Buffer 29 | { bufferCapacity :: !Int 30 | , bufferSize :: !Int 31 | , buffer :: !Builder 32 | } 33 | 34 | newBuffer :: Int -> Buffer 35 | newBuffer capacity = Buffer capacity 0 (byteString (BS.pack "")) 36 | 37 | hasCapacityBuffer :: ByteString -> Buffer -> Bool 38 | hasCapacityBuffer bs buf = bufferSize buf + BS.length bs <= bufferCapacity buf 39 | 40 | appendBuffer :: ByteString -> Buffer -> Buffer 41 | appendBuffer bs buf = 42 | buf { bufferSize = bufferSize buf + BS.length bs 43 | , buffer = buffer buf <> byteString bs 44 | } 45 | 46 | flushBuffer :: Buffer -> Handle -> IO Buffer 47 | flushBuffer buf h = do 48 | hPutBuilder h (buffer buf) 49 | return (newBuffer (bufferCapacity buf)) 50 | 51 | main :: IO () 52 | main = mpsc setup producer consumer 53 | where 54 | setup = do 55 | cleanup lOG_FILE 56 | newRingBuffer bUFFER_CAPACITY 57 | 58 | producer :: RingBuffer ByteString -> IO () 59 | producer rb = go iTERATIONS 60 | where 61 | go :: Int -> IO () 62 | go 0 = return () 63 | go n = do 64 | mSnr <- tryNext rb 65 | case mSnr of 66 | Some snr -> do 67 | set rb snr lOG_MESSAGE 68 | publish rb snr 69 | go (n - 1) 70 | None -> 71 | -- NOTE: No sleep needed here. 72 | go n 73 | 74 | consumer :: RingBuffer ByteString -> MVar () -> IO Buffer 75 | consumer rb consumerFinished = do 76 | h <- openFile lOG_FILE WriteMode 77 | hSetBinaryMode h True 78 | hSetBuffering h (BlockBuffering (Just 4096)) 79 | let handler buf bs snr endOfBatch = do 80 | buf' <- if hasCapacityBuffer bs buf -- XXX: we can probably do 81 | -- better here by only flushing 82 | -- at `endOfBatch`? 83 | then return (appendBuffer bs buf) 84 | else do 85 | buf' <- flushBuffer buf h 86 | return (appendBuffer bs buf') 87 | when (endOfBatch && 88 | getSequenceNumber snr == 89 | fromIntegral (iTERATIONS * nUMBER_OF_PRODUCERS - 1)) $ do 90 | flushBuffer buf' h 91 | hFlush h 92 | putMVar consumerFinished () 93 | return buf' 94 | ec <- newEventConsumer rb handler (newBuffer bUFFER_CAPACITY) [] (Sleep sLEEP_TIME) 95 | setGatingSequences rb [ecSequenceNumber ec] 96 | ecWorker ec (ecInitialState ec) 97 | -------------------------------------------------------------------------------- /src/Disruptor/AtomicCounterPadded.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE MagicHash #-} 3 | {-# LANGUAGE UnboxedTuples #-} 4 | 5 | -- Inspired by: 6 | -- * https://github.com/jberryman/unagi-chan/blob/master/src/Data/Atomics/Counter/Fat.hs 7 | -- * https://hackage.haskell.org/package/atomic-primops-0.8.4/docs/src/Data.Atomics.Counter.html 8 | -- * https://hackage.haskell.org/package/unboxed-ref-0.4.0.0/docs/src/Data-STRef-Unboxed-Internal.html#STRefU 9 | -- * https://hackage.haskell.org/package/ghc-8.10.2/docs/src/FastMutInt.html#FastMutInt 10 | 11 | module Disruptor.AtomicCounterPadded 12 | ( AtomicCounter() 13 | , newCounter 14 | , incrCounter 15 | , incrCounter_ 16 | , getAndIncrCounter 17 | , decrCounter 18 | , decrCounter_ 19 | , readCounter 20 | , casCounter 21 | ) where 22 | 23 | import Data.Bits (finiteBitSize) 24 | import GHC.Exts 25 | import GHC.Types 26 | 27 | ------------------------------------------------------------------------ 28 | 29 | data AtomicCounter = AtomicCounter !(MutableByteArray# RealWorld) 30 | 31 | sIZEOF_CACHELINE :: Int 32 | sIZEOF_CACHELINE = 64 33 | {-# INLINE sIZEOF_CACHELINE #-} 34 | -- ^ TODO: See 35 | -- https://github.com/NickStrupat/CacheLineSize/blob/93a57c094f71a2796714f7a28d74dd8776149193/CacheLineSize.c 36 | -- for how to get the cache line size on Windows, MacOS and Linux. 37 | 38 | -- | Create a new atomic counter padded with 64-bytes (an x86 cache line) to try 39 | -- to avoid false sharing. 40 | newCounter :: Int -> IO AtomicCounter 41 | newCounter (I# n) = IO $ \s -> 42 | case newAlignedPinnedByteArray# size alignment s of 43 | (# s', arr #) -> case writeIntArray# arr 0# n s' of 44 | s'' -> (# s'', AtomicCounter arr #) 45 | where 46 | !(I# size) = finiteBitSize (0 :: Int) 47 | !(I# alignment) = sIZEOF_CACHELINE 48 | {-# INLINE newCounter #-} 49 | 50 | incrCounter :: Int -> AtomicCounter -> IO Int 51 | incrCounter (I# incr) (AtomicCounter arr) = IO $ \s -> 52 | case fetchAddIntArray# arr 0# incr s of 53 | (# s', i #) -> (# s', I# (i +# incr) #) 54 | {-# INLINE incrCounter #-} 55 | 56 | getAndIncrCounter :: Int -> AtomicCounter -> IO Int 57 | getAndIncrCounter (I# incr) (AtomicCounter arr) = IO $ \s -> 58 | case fetchAddIntArray# arr 0# incr s of 59 | (# s', i #) -> (# s', I# i #) 60 | {-# INLINE getAndIncrCounter #-} 61 | 62 | incrCounter_ :: Int -> AtomicCounter -> IO () 63 | incrCounter_ (I# incr) (AtomicCounter arr) = IO $ \s -> 64 | case fetchAddIntArray# arr 0# incr s of 65 | (# s', _i #) -> (# s', () #) 66 | {-# INLINE incrCounter_ #-} 67 | 68 | decrCounter :: Int -> AtomicCounter -> IO Int 69 | decrCounter (I# decr) (AtomicCounter arr) = IO $ \s -> 70 | case fetchSubIntArray# arr 0# decr s of 71 | (# s', i #) -> (# s', I# (i -# decr) #) 72 | {-# INLINE decrCounter #-} 73 | 74 | decrCounter_ :: Int -> AtomicCounter -> IO () 75 | decrCounter_ (I# decr) (AtomicCounter arr) = IO $ \s -> 76 | case fetchSubIntArray# arr 0# decr s of 77 | (# s', _i #) -> (# s', () #) 78 | {-# INLINE decrCounter_ #-} 79 | 80 | readCounter :: AtomicCounter -> IO Int 81 | readCounter (AtomicCounter arr) = IO $ \s -> 82 | case readIntArray# arr 0# s of 83 | (# s', i #) -> (# s', I# i #) 84 | {-# INLINE readCounter #-} 85 | 86 | casCounter :: AtomicCounter -> Int -> Int -> IO Bool 87 | casCounter (AtomicCounter arr) (I# old) (I# new) = IO $ \s -> 88 | case casIntArray# arr 0# old new s of 89 | (# s', before #) -> case before ==# old of 90 | 1# -> (# s', True #) 91 | 0# -> (# s', False #) 92 | {-# INLINE casCounter #-} 93 | -------------------------------------------------------------------------------- /bench/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE NumericUnderscores #-} 2 | 3 | module Common where 4 | 5 | import Control.Monad (when) 6 | import Control.Concurrent 7 | import Control.Concurrent.Async 8 | import Control.Concurrent.MVar 9 | import Data.Time 10 | import System.Directory (doesFileExist, removeFile) 11 | import System.Mem (performGC) 12 | import Text.Printf 13 | 14 | ------------------------------------------------------------------------ 15 | 16 | iTERATIONS :: Int 17 | iTERATIONS = 100_000_000 18 | 19 | vALUE_TO_WRITE :: Int 20 | vALUE_TO_WRITE = 1 21 | 22 | nUMBER_OF_PRODUCERS :: Int 23 | nUMBER_OF_PRODUCERS = 3 24 | 25 | sLEEP_TIME :: Int 26 | sLEEP_TIME = 0 27 | 28 | bUFFER_CAPACITY :: Int 29 | bUFFER_CAPACITY = 1024 * 64 30 | 31 | -- Single-producer single-consumer helper. 32 | spsc :: IO a -> (a -> IO ()) -> (a -> MVar () -> IO ()) -> IO () 33 | spsc setup producer consumer = do 34 | (r, consumerFinished, start) <- header setup 35 | withAsync (producer r) $ \ap -> 36 | withAsync (consumer r consumerFinished) $ \ac -> do 37 | () <- takeMVar consumerFinished 38 | end <- getCurrentTime 39 | cancel ap 40 | cancel ac 41 | footer start end 42 | 43 | -- Multiple-producers single-consumer helper. 44 | mpsc :: IO a -> (a -> IO ()) -> (a -> MVar () -> IO c) -> IO () 45 | mpsc setup producer consumer = do 46 | (r, consumerFinished, start) <- header setup 47 | withAsync (producer r) $ \ap1 -> 48 | withAsync (producer r) $ \ap2 -> 49 | withAsync (producer r) $ \ap3 -> 50 | withAsync (consumer r consumerFinished) $ \ac -> do 51 | () <- takeMVar consumerFinished 52 | end <- getCurrentTime 53 | mapM_ cancel [ap1, ap2, ap3] 54 | cancel ac 55 | footer start end 56 | 57 | m10psc :: IO a -> (a -> IO ()) -> (a -> MVar () -> IO c) -> IO () 58 | m10psc setup producer consumer = do 59 | (r, consumerFinished, start) <- header setup 60 | withAsync (producer r) $ \ap1 -> 61 | withAsync (producer r) $ \ap2 -> 62 | withAsync (producer r) $ \ap3 -> 63 | withAsync (producer r) $ \ap4 -> 64 | withAsync (producer r) $ \ap5 -> 65 | withAsync (producer r) $ \ap6 -> 66 | withAsync (producer r) $ \ap7 -> 67 | withAsync (producer r) $ \ap8 -> 68 | withAsync (producer r) $ \ap9 -> 69 | withAsync (producer r) $ \ap10 -> 70 | withAsync (consumer r consumerFinished) $ \ac -> do 71 | () <- takeMVar consumerFinished 72 | end <- getCurrentTime 73 | mapM_ cancel [ap1, ap2, ap3, ap4, ap5, ap6, ap7, ap8, ap9, ap10] 74 | cancel ac 75 | footer start end 76 | 77 | header :: IO a -> IO (a, MVar (), UTCTime) 78 | header setup = do 79 | n <- getNumCapabilities 80 | printf "%-25.25s%10d\n" "CPU capabilities" n 81 | printf "%-25.25s%10d\n" "Total number of events" iTERATIONS 82 | r <- setup 83 | consumerFinished <- newEmptyMVar 84 | performGC 85 | start <- getCurrentTime 86 | return (r, consumerFinished, start) 87 | 88 | footer :: UTCTime -> UTCTime -> IO () 89 | footer start end = do 90 | let duration :: Double 91 | duration = realToFrac (diffUTCTime end start) 92 | 93 | throughput :: Double 94 | throughput = realToFrac iTERATIONS / duration 95 | printf "%-25.25s%10.2f events/s\n" "Throughput" throughput 96 | printf "%-25.25s%10.2f s\n" "Duration" duration 97 | 98 | -- XXX: prettyPrintHistogram histo 99 | -- meanTransactions <- hmean histo 100 | -- printf "%-25.25s%10.2f\n" "Mean concurrent txs" meanTransactions 101 | -- Just maxTransactions <- percentile 100.0 histo 102 | -- printf "%-25.25s%10.2f\n" "Max concurrent txs" maxTransactions 103 | -- printf "%-25.25s%10.2f ns\n" "Latency" ((meanTransactions / throughput) * 1000000) 104 | 105 | cleanup :: FilePath -> IO () 106 | cleanup fp = do 107 | b <- doesFileExist fp 108 | when b (removeFile fp) 109 | -------------------------------------------------------------------------------- /app/ReadmePipelineExample.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs #-} 2 | {-# LANGUAGE BangPatterns #-} 3 | 4 | module Main where 5 | 6 | import Control.Monad 7 | import Control.Concurrent (threadDelay) 8 | import Control.Concurrent.Async 9 | import Control.Concurrent.STM 10 | import Control.Concurrent.STM.TQueue 11 | import Data.Time 12 | import Data.List 13 | 14 | ------------------------------------------------------------------------ 15 | 16 | data SM s a b where 17 | Id :: SM s a a 18 | Compose :: SM s b c -> SM s a b -> SM s a c 19 | Fst :: SM s (a, b) a 20 | Snd :: SM s (a, b) b 21 | (:&&&) :: SM s a c -> SM s a d -> SM s a (c, d) 22 | (:***) :: SM s a c -> SM s b d -> SM s (a, b) (c, d) 23 | SlowIO :: SM s a a 24 | 25 | swap :: SM () (a, b) (b, a) 26 | swap = Snd :*** Fst `Compose` copy `Compose` SlowIO 27 | where 28 | copy = Id :&&& Id 29 | 30 | interpret :: SM s a b -> (a -> s -> IO (s, b)) 31 | interpret Id x s = return (s, x) 32 | interpret (Compose g f) x s = do 33 | (s', y) <- interpret f x s 34 | interpret g y s' 35 | interpret Fst x s = return (s, fst x) 36 | interpret Snd x s = return (s, snd x) 37 | interpret (f :&&& g) x s = do 38 | (s', y) <- interpret f x s 39 | (s'', z) <- interpret g x s' 40 | return (s'', (y, z)) 41 | interpret (f :*** g) x s = do 42 | (s', y) <- interpret f (fst x) s 43 | (s'', z) <- interpret g (snd x) s' 44 | return (s'', (y, z)) 45 | interpret SlowIO x s = do 46 | threadDelay 200000 -- 0.2s 47 | return (s, x) 48 | 49 | data P a b where 50 | SM :: String -> SM s a b -> s -> P a b 51 | (:>>>) :: P a b -> P b c -> P a c 52 | Shard :: P a b -> P a b 53 | 54 | swapsSequential :: P (a, b) (b, a) 55 | swapsSequential = SM "three swaps" (swap `Compose` swap `Compose` swap) () 56 | 57 | swapsPipelined :: P (a, b) (b, a) 58 | swapsPipelined = 59 | SM "first swap" swap () :>>> 60 | SM "second swap" swap () :>>> 61 | SM "third swap" swap () 62 | 63 | data Deployment a = Deployment 64 | { queue :: TQueue a 65 | , pids :: [(String, Async ())] 66 | } 67 | 68 | names :: Deployment a -> String 69 | names = bracket . intercalate "," . reverse . map fst . pids 70 | where 71 | bracket s = "[" ++ s ++ "]" 72 | 73 | stop :: Deployment a -> IO () 74 | stop = mapM_ (cancel . snd) . pids 75 | 76 | deploy' :: P a b -> Deployment a -> IO (Deployment b) 77 | deploy' (SM name sm s0) d = do 78 | q' <- newTQueueIO 79 | pid <- async (go s0 q') 80 | return Deployment { queue = q', pids = (name, pid) : pids d } 81 | where 82 | f = interpret sm 83 | 84 | go s q' = do 85 | x <- atomically $ readTQueue (queue d) 86 | (s', o) <- f x s 87 | atomically $ writeTQueue q' o 88 | go s' q' 89 | deploy' (sm :>>> sm') d = do 90 | d' <- deploy' sm d 91 | deploy' sm' d' 92 | deploy' (Shard p) d = do 93 | let qIn = queue d 94 | qEven <- newTQueueIO 95 | qOdd <- newTQueueIO 96 | pidIn <- async $ shardQIn qIn qEven qOdd 97 | dEven <- deploy' p (Deployment qEven []) 98 | dOdd <- deploy' p (Deployment qOdd []) 99 | qOut <- newTQueueIO 100 | pidOut <- async $ shardQOut (queue dEven) (queue dOdd) qOut 101 | return (Deployment qOut (("shardIn: " ++ names dEven ++ " & " ++ names dOdd, pidIn) : 102 | ("shardOut: " ++ names dEven ++ " & " ++ names dOdd, pidOut) : 103 | pids dEven ++ pids dOdd ++ pids d)) 104 | where 105 | shardQIn :: TQueue a -> TQueue a -> TQueue a -> IO () 106 | shardQIn qIn qEven qOdd = do 107 | atomically (readTQueue qIn >>= writeTQueue qEven) 108 | shardQIn qIn qOdd qEven 109 | 110 | shardQOut :: TQueue a -> TQueue a -> TQueue a -> IO () 111 | shardQOut qEven qOdd qOut = do 112 | atomically (readTQueue qEven >>= writeTQueue qOut) 113 | shardQOut qOdd qEven qOut 114 | 115 | deploy :: P a b -> IO (TQueue a, Deployment b) 116 | deploy p = do 117 | q <- newTQueueIO 118 | d <- deploy' p (Deployment q []) 119 | return (q, d) 120 | 121 | swapsSharded :: P (a, b) (b, a) 122 | swapsSharded = 123 | Shard (SM "first swap" swap ()) :>>> 124 | Shard (SM "second swap" swap ()) :>>> 125 | Shard (SM "third swap" swap ()) 126 | 127 | data PipelineKind = Sequential | Pipelined | Sharded 128 | deriving Show 129 | 130 | main :: IO () 131 | main = do 132 | mapM_ libMain [Sequential, Pipelined, Sharded] 133 | 134 | libMain :: PipelineKind -> IO () 135 | libMain k = do 136 | (q, d) <- deploy $ case k of 137 | Sequential -> swapsSequential 138 | Pipelined -> swapsPipelined 139 | Sharded -> swapsSharded 140 | print k 141 | putStrLn $ "Pids: " ++ names d 142 | !start <- getCurrentTime 143 | forM_ [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)] $ \x -> 144 | atomically $ writeTQueue q x 145 | !resps <- replicateM 6 $ atomically $ readTQueue (queue d) 146 | !end <- getCurrentTime 147 | putStrLn $ "Responses: " ++ show resps 148 | putStrLn $ "Time: " ++ show (diffUTCTime end start) 149 | putStrLn "" 150 | stop d 151 | -------------------------------------------------------------------------------- /benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | # Inspired by: https://sled.rs/perf.html#experimental-design 6 | 7 | BENCHMARK_WORKLOAD1="bench-mp-logging10" 8 | BENCHMARK_WORKLOAD2="bench-mp-fast-logger10" 9 | BENCHMARK_NUMBER_OF_RUNS=5 10 | # TODO: +RTS --nonmoving-gc ? 11 | BENCHMARK_GHC_OPTS=("-threaded" "-rtsopts" "-with-rtsopts=-N") 12 | BENCHMARK_CABAL_BUILD_OPTS=("--enable-benchmarks" 13 | "--disable-profiling" 14 | "-O2" 15 | "--ghc-options=${BENCHMARK_GHC_OPTS[*]}") 16 | BENCHMARK_CABAL_RUN_OPTS=("-O2" 17 | "--ghc-options=${BENCHMARK_GHC_OPTS[*]}") 18 | BENCHMARK_PERF_EVENTS="L1-dcache-loads,L1-dcache-load-misses,LLC-loads,LLC-load-misses,dTLB-loads,dTLB-load-misses" 19 | 20 | # Save info about current hardware and OS setup. 21 | uname --kernel-name --kernel-release --kernel-version --machine --operating-system 22 | echo "" 23 | lscpu 24 | echo "" 25 | 26 | # Warn about non-essential programs... 27 | FIREFOX_PID="$(pgrep GeckoMain)" 28 | if [ -n "${FIREFOX_PID}" ]; then 29 | read -r -p "Firefox is running, wanna kill it? [y/N] " yn 30 | case $yn in 31 | [Yy]*) kill -1 "${FIREFOX_PID}" ;; 32 | *) ;; 33 | esac 34 | fi 35 | 36 | if [ -f "/tmp/${BENCHMARK_WORKLOAD1}.txt" ] || [ -f "/tmp/${BENCHMARK_WORKLOAD2}.txt" ]; then 37 | read -r -p "Old benchmark results exist, wanna remove them? [y/N] " yn 38 | case $yn in 39 | [Yy]*) rm -f "/tmp/${BENCHMARK_WORKLOAD1}.txt"; 40 | rm -f "/tmp/${BENCHMARK_WORKLOAD2}.txt" ;; 41 | *) ;; 42 | esac 43 | fi 44 | 45 | # Use the performance governor instead of powersave (for laptops). 46 | for policy in /sys/devices/system/cpu/cpufreq/policy*; do 47 | echo "${policy}" 48 | echo "performance" | sudo tee "${policy}/scaling_governor" 49 | done 50 | 51 | # Compile workloads. 52 | 53 | # XXX: enable benchmarking against old versions of same test 54 | # BENCHMARK_GITHASH1="XXX: NOT USED YET" 55 | # BENCHMARK_GITHASH2="XXX: NOT USED YET" 56 | # BENCHMARK_BIN1="/tmp/${BENCHMARK_GITHASH1}-${BENCHMARK_WORKLOAD1}" 57 | # BENCHMARK_BIN2="/tmp/${BENCHMARK_GITHASH2}-${BENCHMARK_WORKLOAD2}" 58 | # if [ -n "${BENCHMARK_GITHASH1}" ] && [ ! -f "${BENCHMARK_BIN1}" ] ; then 59 | # git checkout "${BENCHMARK_GITHASH1}" 60 | # cabal build -O2 "${BENCHMARK_WORKLOAD1}" 61 | # cp $(cabal list-bin "${BENCHMARK_WORKLOAD1}") "${BENCHMARK_BIN1}" 62 | # fi 63 | 64 | cabal build "${BENCHMARK_CABAL_BUILD_OPTS[@]}" "${BENCHMARK_WORKLOAD2}" 65 | 66 | # Disable turbo boost. 67 | echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo 68 | 69 | # The following run is just a (CPU) warm up, the results are discarded. 70 | cabal run "${BENCHMARK_CABAL_RUN_OPTS[@]}" "${BENCHMARK_WORKLOAD2}" 71 | 72 | # Run the benchmarks. By running workloads interleaved with each other, we 73 | # reduce the risk of having particular transient system-wide effects impact only 74 | # a single measurement. 75 | for i in $(seq ${BENCHMARK_NUMBER_OF_RUNS}); do 76 | echo "Running benchmark run ${i}" 77 | perf stat --event="${BENCHMARK_PERF_EVENTS}" \ 78 | cabal run "${BENCHMARK_CABAL_RUN_OPTS[@]}" "${BENCHMARK_WORKLOAD1}" \ 79 | 2>&1 | tee --append "/tmp/${BENCHMARK_WORKLOAD1}.txt" 80 | perf stat --event="${BENCHMARK_PERF_EVENTS}" \ 81 | cabal run "${BENCHMARK_CABAL_RUN_OPTS[@]}" "${BENCHMARK_WORKLOAD2}" \ 82 | 2>&1 | tee --append "/tmp/${BENCHMARK_WORKLOAD2}.txt" 83 | 84 | # XXX: Can't get the below to work, ${BENCHMARK_WORKLOAD} env var doesn't 85 | # get interpolated correctly into the string? 86 | # Use `nice` to bump the priority of the benchmark process to the highest possible. 87 | ##sudo nice -n -20 su -c \ 88 | ## "perf stat -e cache-misses,branch-misses,dTLB-load-misses,iTLB-load-misses cabal run -O2 ${BENCHMARK_WORKLOAD1} >> /tmp/${BENCHMARK_WORKLOAD1}.txt" \ 89 | ## "${USER}" 90 | ##sudo nice -n -20 su -c \ 91 | ## "perf stat -e cache-misses,branch-misses,dTLB-load-misses,iTLB-load-misses cabal run -O2 ${BENCHMARK_WORKLOAD2} >> /tmp/${BENCHMARK_WORKLOAD2}.txt" \ 92 | ## "${USER}" 93 | 94 | done 95 | 96 | # Re-enable turbo boost. 97 | echo 0 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo 98 | 99 | # Go back to powersave governor. 100 | for policy in /sys/devices/system/cpu/cpufreq/policy*; do 101 | echo "${policy}" 102 | echo "powersave" | sudo tee "${policy}/scaling_governor" 103 | done 104 | 105 | # Output throughput data for R analysis. 106 | R_FILE="/tmp/${BENCHMARK_WORKLOAD1}-${BENCHMARK_WORKLOAD2}.r" 107 | 108 | echo 'Input=("' > "${R_FILE}" 109 | { echo "Workload Throughput"; 110 | awk -v wl1="${BENCHMARK_WORKLOAD1}" \ 111 | '/Throughput/ { print wl1, $2 }' "/tmp/${BENCHMARK_WORKLOAD1}.txt"; 112 | awk -v wl2="${BENCHMARK_WORKLOAD2}" \ 113 | '/Throughput/ { print wl2, $2 }' "/tmp/${BENCHMARK_WORKLOAD2}.txt"; 114 | echo '")' 115 | } >> "${R_FILE}" 116 | 117 | cat << EOF >> "${R_FILE}" 118 | Data = read.table(textConnection(Input),header=TRUE) 119 | bartlett.test(Throughput ~ Workload, data=Data) 120 | 121 | # If p-value >= 0.05, use var.equal=TRUE below 122 | 123 | t.test(Throughput ~ Workload, data=Data, 124 | var.equal=TRUE, 125 | conf.level=0.95) 126 | EOF 127 | 128 | Rscript "${R_FILE}" 129 | 130 | # Profiling 131 | 132 | # On Linux install `perf` See 133 | # https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html for how 134 | # to setup the permissions for using `perf`. Also note that on some systems, 135 | # e.g. Ubuntu, `/usr/bin/perf` is not the actual binary but rather a bash script 136 | # that calls the binary. Note that the steps in the admin guide needs to be 137 | # performed on the binary and not the shell script. Since Linux 5.8 there's a 138 | # special capability for capturing perf data called `cap_perfmon`, if you are on 139 | # a older version you need `cap_sys_admin` instead. 140 | # 141 | # For more see: https://brendangregg.com/perf.html 142 | -------------------------------------------------------------------------------- /pipelined-state-machines.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.4 2 | name: pipelined-state-machines 3 | version: 0.0.0 4 | synopsis: High performance inter-thread messaging library 5 | description: 6 | See README at 7 | 8 | bug-reports: https://github.com/stevana/pipelined-state-machines/issues 9 | license: MIT 10 | license-file: LICENSE 11 | author: Stevan Andjelkovic, Daniel Gustafsson 12 | maintainer: stevana@users.noreply.github.com 13 | copyright: Copyright (c) 2021 Symbiont Inc, 2022-2023 Stevan Andjelkovic 14 | category: Concurrency, Data Structures 15 | extra-source-files: 16 | CHANGELOG.md 17 | LICENSE 18 | README.md 19 | benchmark.sh 20 | 21 | tested-with: GHC ==9.0.2 22 | 23 | common common-build-depends 24 | build-depends: 25 | , async 26 | , atomic-primops 27 | , base ^>=4.15.0.0 28 | , ghc-prim 29 | , stm 30 | , vector 31 | 32 | library 33 | import: common-build-depends 34 | hs-source-dirs: src/ 35 | exposed-modules: 36 | Disruptor 37 | Disruptor.AtomicCounterPadded 38 | Disruptor.MP 39 | Disruptor.MP.Consumer 40 | Disruptor.MP.Producer 41 | Disruptor.MP.RingBuffer 42 | Disruptor.SequenceNumber 43 | Disruptor.SP 44 | Disruptor.SP.Consumer 45 | Disruptor.SP.Producer 46 | Disruptor.SP.RingBuffer 47 | Disruptor.SP.Unboxed.Consumer 48 | Disruptor.SP.Unboxed.Producer 49 | Disruptor.SP.Unboxed.RingBuffer 50 | 51 | ghc-options: -O2 52 | default-language: Haskell2010 53 | 54 | test-suite test 55 | import: common-build-depends 56 | type: exitcode-stdio-1.0 57 | hs-source-dirs: test/ 58 | main-is: Main.hs 59 | other-modules: 60 | DisruptorTest 61 | TastyDiscover 62 | 63 | build-depends: 64 | , containers 65 | , pipelined-state-machines 66 | , HUnit 67 | , QuickCheck 68 | , tasty 69 | , tasty-hunit 70 | , tasty-quickcheck 71 | 72 | ghc-options: -threaded -rtsopts -with-rtsopts=-N -fno-ignore-asserts 73 | default-language: Haskell2010 74 | 75 | executable readme-pipeline-example 76 | import: common-build-depends 77 | main-is: ReadmePipelineExample.hs 78 | build-depends: pipelined-state-machines, time 79 | hs-source-dirs: app 80 | default-language: Haskell2010 81 | ghc-options: -threaded -rtsopts -with-rtsopts=-N 82 | 83 | executable readme-disruptor-example 84 | import: common-build-depends 85 | main-is: ReadmeDisruptorExample.hs 86 | build-depends: pipelined-state-machines 87 | hs-source-dirs: app 88 | default-language: Haskell2010 89 | ghc-options: -threaded -rtsopts -with-rtsopts=-N 90 | 91 | common bench-common 92 | hs-source-dirs: bench 93 | build-depends: 94 | , async 95 | , base 96 | , time 97 | , directory 98 | other-modules: Common 99 | 100 | -- Some of these options are taking from: 101 | -- https://wiki.haskell.org/Performance/GHC#Use_optimisation 102 | -- XXX: try with -fllvm 103 | 104 | -- To inspect core: 105 | -- -fforce-recomp 106 | -- -ddump-simpl 107 | -- -dsuppress-all 108 | -- -ddump-to-file 109 | -- 110 | -- To produce event log: 111 | -- -threaded -eventlog -rtsopts -with-rtsopts=-N 112 | ghc-options: 113 | -O2 -fproc-alignment=64 -fexcess-precision -fasm -optc-O3 114 | -optc-ffast-math -threaded -rtsopts -with-rtsopts=-N 115 | 116 | default-language: Haskell2010 117 | 118 | common bench-common-sp 119 | import: bench-common 120 | hs-source-dirs: bench/SP 121 | 122 | common bench-common-mp 123 | import: bench-common 124 | hs-source-dirs: bench/MP 125 | 126 | benchmark bench-singleops 127 | import: bench-common 128 | main-is: SingleOps.hs 129 | build-depends: 130 | , atomic-primops 131 | , pipelined-state-machines 132 | 133 | type: exitcode-stdio-1.0 134 | 135 | benchmark bench-sp 136 | import: bench-common-sp 137 | main-is: Disruptor.hs 138 | build-depends: pipelined-state-machines 139 | type: exitcode-stdio-1.0 140 | 141 | benchmark bench-sp-unboxed 142 | import: bench-common-sp 143 | main-is: DisruptorUnboxed.hs 144 | build-depends: pipelined-state-machines 145 | type: exitcode-stdio-1.0 146 | 147 | benchmark bench-sp-chan 148 | import: bench-common-sp 149 | main-is: Chan.hs 150 | type: exitcode-stdio-1.0 151 | 152 | benchmark bench-sp-tbqueue 153 | import: bench-common-sp 154 | main-is: TBQueue.hs 155 | build-depends: stm 156 | type: exitcode-stdio-1.0 157 | 158 | benchmark bench-sp-chaselev-deque 159 | import: bench-common-sp 160 | main-is: ChaseLevDeque.hs 161 | build-depends: chaselev-deque 162 | type: exitcode-stdio-1.0 163 | 164 | benchmark bench-sp-lockfree-queue 165 | import: bench-common-sp 166 | main-is: LockFreeQueue.hs 167 | build-depends: lockfree-queue 168 | type: exitcode-stdio-1.0 169 | 170 | benchmark bench-sp-unagi-chan 171 | import: bench-common-sp 172 | main-is: UnagiChan.hs 173 | build-depends: unagi-chan 174 | type: exitcode-stdio-1.0 175 | 176 | benchmark bench-mp 177 | import: bench-common-mp 178 | main-is: Disruptor.hs 179 | build-depends: pipelined-state-machines 180 | type: exitcode-stdio-1.0 181 | 182 | benchmark bench-mp-chan 183 | import: bench-common-mp 184 | main-is: Chan.hs 185 | type: exitcode-stdio-1.0 186 | 187 | benchmark bench-mp-tbqueue 188 | import: bench-common-mp 189 | main-is: TBQueue.hs 190 | build-depends: stm 191 | type: exitcode-stdio-1.0 192 | 193 | benchmark bench-mp-unagi-chan 194 | import: bench-common-mp 195 | main-is: UnagiChan.hs 196 | build-depends: unagi-chan 197 | type: exitcode-stdio-1.0 198 | 199 | benchmark bench-mp-lockfree-queue 200 | import: bench-common-mp 201 | main-is: LockFreeQueue.hs 202 | build-depends: lockfree-queue 203 | type: exitcode-stdio-1.0 204 | 205 | benchmark bench-mp-logging 206 | import: bench-common-mp 207 | main-is: Logging.hs 208 | build-depends: pipelined-state-machines, bytestring 209 | type: exitcode-stdio-1.0 210 | 211 | benchmark bench-mp-fast-logger 212 | import: bench-common-mp 213 | main-is: FastLogger.hs 214 | build-depends: fast-logger 215 | type: exitcode-stdio-1.0 216 | 217 | benchmark bench-mp-logging10 218 | import: bench-common-mp 219 | main-is: Logging10.hs 220 | build-depends: pipelined-state-machines, bytestring 221 | type: exitcode-stdio-1.0 222 | 223 | benchmark bench-mp-fast-logger10 224 | import: bench-common-mp 225 | main-is: FastLogger10.hs 226 | build-depends: fast-logger 227 | type: exitcode-stdio-1.0 228 | -------------------------------------------------------------------------------- /src/Disruptor/SP/Unboxed/RingBuffer.hs: -------------------------------------------------------------------------------- 1 | module Disruptor.SP.Unboxed.RingBuffer where 2 | 3 | import Control.Exception (assert) 4 | import Control.Monad (when) 5 | import Data.Bits (popCount) 6 | import Data.IORef (IORef, newIORef, readIORef, writeIORef) 7 | import Data.Int (Int64) 8 | import qualified Data.Vector as ImmutableVector 9 | import qualified Data.Vector.Mutable as Boxed 10 | import Data.Vector.Unboxed.Mutable (IOVector, Unbox) 11 | import qualified Data.Vector.Unboxed.Mutable as Vector 12 | 13 | import Disruptor.SequenceNumber 14 | 15 | ------------------------------------------------------------------------ 16 | 17 | data RingBuffer e = RingBuffer 18 | { 19 | -- | The capacity, or maximum amount of values, of the ring buffer. 20 | rbCapacity :: {-# UNPACK #-} !Int64 21 | -- | The cursor pointing to the head of the ring buffer. 22 | , rbCursor :: {-# UNPACK #-} !(IORef SequenceNumber) 23 | -- | The values of the ring buffer. 24 | , rbEvents :: !(IOVector e) 25 | -- | References to the last consumers' sequence numbers, used in order to 26 | -- avoid wrapping the buffer and overwriting events that have not been 27 | -- consumed yet. 28 | , rbGatingSequences :: {-# UNPACK #-} !(IORef (Boxed.IOVector (IORef SequenceNumber))) 29 | -- | Cached value of computing the last consumers' sequence numbers using the 30 | -- above references. 31 | , rbCachedGatingSequence :: {-# UNPACK #-} !(IORef SequenceNumber) 32 | } 33 | 34 | newRingBuffer :: Unbox e => Int -> IO (RingBuffer e) 35 | newRingBuffer capacity 36 | | capacity <= 0 = 37 | error "newRingBuffer: capacity must be greater than 0" 38 | | popCount capacity /= 1 = 39 | -- NOTE: The use of bitwise and (`.&.`) in `index` relies on this. 40 | error "newRingBuffer: capacity must be a power of 2" 41 | | otherwise = do 42 | snr <- newIORef (-1) 43 | v <- Vector.new capacity 44 | gs <- newIORef =<< Boxed.new 0 45 | cgs <- newIORef (-1) 46 | return (RingBuffer (fromIntegral capacity) snr v gs cgs) 47 | 48 | -- | The capacity, or maximum amount of values, of the ring buffer. 49 | capacity :: RingBuffer e -> Int64 50 | capacity = rbCapacity 51 | {-# INLINE capacity #-} 52 | 53 | getCursor :: RingBuffer e -> IO SequenceNumber 54 | getCursor rb = readIORef (rbCursor rb) 55 | {-# INLINE getCursor #-} 56 | 57 | setGatingSequences :: RingBuffer e -> [IORef SequenceNumber] -> IO () 58 | setGatingSequences rb gs = do 59 | v <- ImmutableVector.thaw (ImmutableVector.fromList gs) 60 | writeIORef (rbGatingSequences rb) v 61 | {-# INLINE setGatingSequences #-} 62 | 63 | getCachedGatingSequence :: RingBuffer e -> IO SequenceNumber 64 | getCachedGatingSequence rb = readIORef (rbCachedGatingSequence rb) 65 | {-# INLINE getCachedGatingSequence #-} 66 | 67 | setCachedGatingSequence :: RingBuffer e -> SequenceNumber -> IO () 68 | setCachedGatingSequence rb = writeIORef (rbCachedGatingSequence rb) 69 | {-# INLINE setCachedGatingSequence #-} 70 | 71 | minimumSequence :: RingBuffer e -> IO SequenceNumber 72 | minimumSequence rb = do 73 | cursorValue <- getCursor rb 74 | minimumSequence' (rbGatingSequences rb) cursorValue 75 | {-# INLINE minimumSequence #-} 76 | 77 | minimumSequence' :: IORef (Boxed.IOVector (IORef SequenceNumber)) -> SequenceNumber 78 | -> IO SequenceNumber 79 | minimumSequence' gatingSequences cursorValue = do 80 | gs <- readIORef gatingSequences 81 | go gs 82 | where 83 | go :: Boxed.IOVector (IORef SequenceNumber) -> IO SequenceNumber 84 | go gs = go' 0 cursorValue 85 | where 86 | len :: Int 87 | len = Boxed.length gs - 1 88 | 89 | go' :: Int -> SequenceNumber -> IO SequenceNumber 90 | go' ix minSequence | ix > len = return minSequence 91 | | ix <= len = do 92 | g <- readIORef =<< Boxed.read gs ix 93 | if g < minSequence 94 | then go' (ix + 1) g 95 | else go' (ix + 1) minSequence 96 | {-# INLINE minimumSequence' #-} 97 | 98 | -- | Currently available slots to write to. 99 | size :: RingBuffer e -> IO Int64 100 | size rb = do 101 | consumed <- minimumSequence rb 102 | produced <- getCursor rb 103 | return (capacity rb - fromIntegral (produced - consumed)) 104 | {-# INLINE size #-} 105 | 106 | -- | Claim the next event in sequence for publishing. 107 | next :: RingBuffer e -> IO SequenceNumber 108 | next rb = nextBatch rb 1 109 | {-# INLINE next #-} 110 | 111 | -- | Claim the next `n` events in sequence for publishing. This is for batch 112 | -- event producing. Returns the highest claimed sequence number, so using it 113 | -- requires a bit of extra work, e.g.: 114 | -- 115 | -- @ 116 | -- let n = 10 117 | -- hi <- nextBatch rb n 118 | -- let lo = hi - (n - 1) 119 | -- mapM_ f [lo..hi] 120 | -- publishBatch rb lo hi 121 | -- @ 122 | -- 123 | nextBatch :: RingBuffer e -> Int -> IO SequenceNumber 124 | nextBatch rb n = assert (n > 0 && fromIntegral n <= capacity rb) $ do 125 | current <- getCursor rb 126 | let nextSequence :: SequenceNumber 127 | nextSequence = current + fromIntegral n 128 | 129 | wrapPoint :: SequenceNumber 130 | wrapPoint = nextSequence - fromIntegral (capacity rb) 131 | 132 | writeIORef (rbCursor rb) nextSequence 133 | cachedGatingSequence <- getCachedGatingSequence rb 134 | 135 | when (wrapPoint > cachedGatingSequence || cachedGatingSequence > current) $ 136 | waitForConsumers wrapPoint 137 | 138 | return nextSequence 139 | where 140 | waitForConsumers :: SequenceNumber -> IO () 141 | waitForConsumers wrapPoint = go 142 | where 143 | go :: IO () 144 | go = do 145 | gatingSequence <- minimumSequence rb 146 | if wrapPoint > gatingSequence 147 | then go 148 | else setCachedGatingSequence rb gatingSequence 149 | {-# INLINE nextBatch #-} 150 | 151 | -- Try to return the next sequence number to write to. If `Nothing` is returned, 152 | -- then the last consumer has not yet processed the event we are about to 153 | -- overwrite (due to the ring buffer wrapping around) -- the callee of `tryNext` 154 | -- should apply back-pressure upstream if this happens. 155 | tryNext :: RingBuffer e -> IO MaybeSequenceNumber 156 | tryNext rb = tryNextBatch rb 1 157 | {-# INLINE tryNext #-} 158 | 159 | tryNextBatch :: RingBuffer e -> Int -> IO MaybeSequenceNumber 160 | tryNextBatch rb n = assert (n > 0) $ do 161 | current <- getCursor rb 162 | let next = current + fromIntegral n 163 | wrapPoint = next - fromIntegral (capacity rb) 164 | cachedGatingSequence <- getCachedGatingSequence rb 165 | if (wrapPoint > cachedGatingSequence || cachedGatingSequence > current) 166 | then do 167 | minSequence <- minimumSequence' (rbGatingSequences rb) current 168 | setCachedGatingSequence rb minSequence 169 | if (wrapPoint > minSequence) 170 | then return None 171 | else return (Some next) 172 | else return (Some next) 173 | {-# INLINE tryNextBatch #-} 174 | 175 | set :: Unbox e => RingBuffer e -> SequenceNumber -> e -> IO () 176 | set rb snr e = Vector.unsafeWrite (rbEvents rb) (index (rbCapacity rb) snr) e 177 | {-# INLINE set #-} 178 | 179 | publish :: RingBuffer e -> SequenceNumber -> IO () 180 | publish rb = writeIORef (rbCursor rb) 181 | {-# INLINE publish #-} 182 | 183 | publishBatch :: RingBuffer e -> SequenceNumber -> SequenceNumber -> IO () 184 | publishBatch rb _lo hi = writeIORef (rbCursor rb) hi 185 | {-# INLINE publishBatch #-} 186 | 187 | unsafeGet :: Unbox e => RingBuffer e -> SequenceNumber -> IO e 188 | unsafeGet rb current = Vector.unsafeRead (rbEvents rb) (index (capacity rb) current) 189 | {-# INLINE unsafeGet #-} 190 | -------------------------------------------------------------------------------- /src/Disruptor/SP/RingBuffer.hs: -------------------------------------------------------------------------------- 1 | module Disruptor.SP.RingBuffer where 2 | 3 | import Control.Concurrent (threadDelay) 4 | import Control.Exception (assert) 5 | import Control.Monad (when) 6 | import Data.Bits (popCount) 7 | import Data.IORef (IORef, newIORef, readIORef, writeIORef) 8 | import Data.Int (Int64) 9 | import qualified Data.Vector as ImmutableVector 10 | import Data.Vector.Mutable (IOVector) 11 | import qualified Data.Vector.Mutable as Vector 12 | 13 | import Disruptor.SequenceNumber 14 | 15 | ------------------------------------------------------------------------ 16 | 17 | data RingBuffer e = RingBuffer 18 | { 19 | -- | The capacity, or maximum amount of values, of the ring buffer. 20 | rbCapacity :: {-# UNPACK #-} !Int64 21 | -- | The cursor pointing to the head of the ring buffer. 22 | , rbCursor :: {-# UNPACK #-} !(IORef SequenceNumber) 23 | -- | The values of the ring buffer. 24 | , rbEvents :: {-# UNPACK #-} !(IOVector e) 25 | -- | References to the last consumers' sequence numbers, used in order to 26 | -- avoid wrapping the buffer and overwriting events that have not been 27 | -- consumed yet. 28 | , rbGatingSequences :: {-# UNPACK #-} !(IORef (IOVector (IORef SequenceNumber))) 29 | -- | Cached value of computing the last consumers' sequence numbers using the 30 | -- above references. 31 | , rbCachedGatingSequence :: {-# UNPACK #-} !(IORef SequenceNumber) 32 | } 33 | 34 | newRingBuffer :: Int -> IO (RingBuffer e) 35 | newRingBuffer capacity 36 | | capacity <= 0 = 37 | error "newRingBuffer: capacity must be greater than 0" 38 | | popCount capacity /= 1 = 39 | -- NOTE: The use of bitwise and (`.&.`) in `index` relies on this. 40 | error "newRingBuffer: capacity must be a power of 2" 41 | | otherwise = do 42 | snr <- newIORef (-1) 43 | v <- Vector.new capacity 44 | -- ^ XXX: Should this really be the same as capacity? See https://github.com/LMAX-Exchange/disruptor/blob/master/src/main/java/com/lmax/disruptor/RingBuffer.java#L60 45 | -- Also see https://youtube.com/watch?v=fDGWWpHlzvw&t=1380 46 | gs <- newIORef =<< Vector.new 0 47 | cgs <- newIORef (-1) 48 | return (RingBuffer (fromIntegral capacity) snr v gs cgs) 49 | 50 | -- | The capacity, or maximum amount of values, of the ring buffer. 51 | capacity :: RingBuffer e -> Int64 52 | capacity = rbCapacity 53 | {-# INLINE capacity #-} 54 | 55 | getCursor :: RingBuffer e -> IO SequenceNumber 56 | getCursor rb = readIORef (rbCursor rb) 57 | {-# INLINE getCursor #-} 58 | 59 | setGatingSequences :: RingBuffer e -> [IORef SequenceNumber] -> IO () 60 | setGatingSequences rb gs = do 61 | v <- ImmutableVector.thaw (ImmutableVector.fromList gs) 62 | writeIORef (rbGatingSequences rb) v 63 | {-# INLINE setGatingSequences #-} 64 | 65 | getCachedGatingSequence :: RingBuffer e -> IO SequenceNumber 66 | getCachedGatingSequence rb = readIORef (rbCachedGatingSequence rb) 67 | {-# INLINE getCachedGatingSequence #-} 68 | 69 | setCachedGatingSequence :: RingBuffer e -> SequenceNumber -> IO () 70 | setCachedGatingSequence rb = writeIORef (rbCachedGatingSequence rb) 71 | {-# INLINE setCachedGatingSequence #-} 72 | 73 | minimumSequence :: RingBuffer e -> IO SequenceNumber 74 | minimumSequence rb = do 75 | cursorValue <- getCursor rb 76 | minimumSequence' (rbGatingSequences rb) cursorValue 77 | {-# INLINE minimumSequence #-} 78 | 79 | minimumSequence' :: IORef (IOVector (IORef SequenceNumber)) -> SequenceNumber 80 | -> IO SequenceNumber 81 | minimumSequence' gatingSequences cursorValue = do 82 | gs <- readIORef gatingSequences 83 | go gs 84 | where 85 | go :: IOVector (IORef SequenceNumber) -> IO SequenceNumber 86 | go gs = go' 0 cursorValue 87 | where 88 | len :: Int 89 | len = Vector.length gs - 1 90 | 91 | go' :: Int -> SequenceNumber -> IO SequenceNumber 92 | go' ix minSequence | ix > len = return minSequence 93 | | ix <= len = do 94 | g <- readIORef =<< Vector.unsafeRead gs ix 95 | if g < minSequence 96 | then go' (ix + 1) g 97 | else go' (ix + 1) minSequence 98 | {-# INLINE minimumSequence' #-} 99 | 100 | -- | Currently available slots to write to. 101 | size :: RingBuffer e -> IO Int64 102 | size rb = do 103 | consumed <- minimumSequence rb 104 | produced <- getCursor rb 105 | return (capacity rb - fromIntegral (produced - consumed)) 106 | {-# INLINE size #-} 107 | 108 | -- | Claim the next event in sequence for publishing. 109 | next :: RingBuffer e -> IO SequenceNumber 110 | next rb = nextBatch rb 1 111 | {-# INLINE next #-} 112 | 113 | -- | Claim the next `n` events in sequence for publishing. This is for batch 114 | -- event producing. Returns the highest claimed sequence number, so using it 115 | -- requires a bit of extra work, e.g.: 116 | -- 117 | -- @ 118 | -- let n = 10 119 | -- hi <- nextBatch rb n 120 | -- let lo = hi - (n - 1) 121 | -- mapM_ f [lo..hi] 122 | -- publishBatch rb lo hi 123 | -- @ 124 | -- 125 | nextBatch :: RingBuffer e -> Int -> IO SequenceNumber 126 | nextBatch rb n = assert (n > 0 && fromIntegral n <= capacity rb) $ do 127 | current <- getCursor rb 128 | let nextSequence :: SequenceNumber 129 | nextSequence = current + fromIntegral n 130 | 131 | wrapPoint :: SequenceNumber 132 | wrapPoint = nextSequence - fromIntegral (capacity rb) 133 | 134 | writeIORef (rbCursor rb) nextSequence 135 | cachedGatingSequence <- getCachedGatingSequence rb 136 | 137 | when (wrapPoint > cachedGatingSequence || cachedGatingSequence > current) $ 138 | waitForConsumers wrapPoint 139 | 140 | return nextSequence 141 | where 142 | waitForConsumers :: SequenceNumber -> IO () 143 | waitForConsumers wrapPoint = go 144 | where 145 | go :: IO () 146 | go = do 147 | gatingSequence <- minimumSequence rb 148 | if wrapPoint > gatingSequence 149 | then do 150 | threadDelay 1 151 | go -- SPIN 152 | else setCachedGatingSequence rb gatingSequence 153 | {-# INLINABLE nextBatch #-} 154 | 155 | -- Try to return the next sequence number to write to. If `Nothing` is returned, 156 | -- then the last consumer has not yet processed the event we are about to 157 | -- overwrite (due to the ring buffer wrapping around) -- the callee of `tryNext` 158 | -- should apply back-pressure upstream if this happens. 159 | tryNext :: RingBuffer e -> IO MaybeSequenceNumber 160 | tryNext rb = tryNextBatch rb 1 161 | {-# INLINE tryNext #-} 162 | 163 | tryNextBatch :: RingBuffer e -> Int -> IO MaybeSequenceNumber 164 | tryNextBatch rb n = assert (n > 0) $ do 165 | current <- getCursor rb 166 | let next = current + fromIntegral n 167 | wrapPoint = next - fromIntegral (capacity rb) 168 | cachedGatingSequence <- getCachedGatingSequence rb 169 | if (wrapPoint > cachedGatingSequence || cachedGatingSequence > current) 170 | then do 171 | minSequence <- minimumSequence' (rbGatingSequences rb) current 172 | setCachedGatingSequence rb minSequence 173 | if (wrapPoint > minSequence) 174 | then return None 175 | else return (Some next) 176 | else return (Some next) 177 | {-# INLINE tryNextBatch #-} 178 | 179 | set :: RingBuffer e -> SequenceNumber -> e -> IO () 180 | set rb snr e = Vector.unsafeWrite (rbEvents rb) (index (rbCapacity rb) snr) e 181 | {-# INLINE set #-} 182 | 183 | publish :: RingBuffer e -> SequenceNumber -> IO () 184 | publish rb = writeIORef (rbCursor rb) 185 | {-# INLINE publish #-} 186 | 187 | publishBatch :: RingBuffer e -> SequenceNumber -> SequenceNumber -> IO () 188 | publishBatch rb _lo hi = writeIORef (rbCursor rb) hi 189 | {-# INLINE publishBatch #-} 190 | 191 | unsafeGet :: RingBuffer e -> SequenceNumber -> IO e 192 | unsafeGet rb current = Vector.unsafeRead (rbEvents rb) (index (capacity rb) current) 193 | {-# INLINE unsafeGet #-} 194 | 195 | get :: RingBuffer e -> SequenceNumber -> IO (Maybe e) 196 | get rb want = do 197 | produced <- getCursor rb 198 | if want <= produced 199 | then Just <$> unsafeGet rb want 200 | else return Nothing 201 | {-# INLINE get #-} 202 | -------------------------------------------------------------------------------- /test/DisruptorTest.hs: -------------------------------------------------------------------------------- 1 | module DisruptorTest where 2 | 3 | import Control.Concurrent 4 | import Control.Concurrent.Async 5 | import Control.Concurrent.MVar 6 | import Control.Monad 7 | import Data.IORef 8 | import Data.Int 9 | import Data.Set (Set) 10 | import qualified Data.Set as Set 11 | import System.IO 12 | import System.IO.Error 13 | import Test.HUnit 14 | import Test.QuickCheck 15 | 16 | import qualified Disruptor.MP.Consumer as MP 17 | import qualified Disruptor.MP.Producer as MP 18 | import qualified Disruptor.MP.RingBuffer as MP 19 | import qualified Disruptor.SP.Consumer as SP 20 | import qualified Disruptor.SP.Producer as SP 21 | import qualified Disruptor.SP.RingBuffer as SP 22 | import Disruptor.SequenceNumber 23 | import Disruptor.AtomicCounterPadded 24 | 25 | ------------------------------------------------------------------------ 26 | 27 | (@?->) :: (Eq a, Show a) => IO a -> a -> Assertion 28 | mx @?-> y = do 29 | x <- mx 30 | x @?= y 31 | 32 | unit_ringBufferSingleNonBlocking :: Assertion 33 | unit_ringBufferSingleNonBlocking = do 34 | rb <- SP.newRingBuffer 8 35 | Some i <- SP.tryNext rb 36 | SP.set rb i 'a' 37 | SP.publish rb i 38 | SP.get rb i @?-> Just 'a' 39 | Some j <- SP.tryNext rb 40 | SP.set rb j 'b' 41 | SP.publish rb j 42 | SP.get rb j @?-> Just 'b' 43 | 44 | unit_ringBufferSingleBlocking :: Assertion 45 | unit_ringBufferSingleBlocking = do 46 | rb <- SP.newRingBuffer 8 47 | i <- SP.next rb 48 | SP.set rb i 'a' 49 | SP.publish rb i 50 | SP.get rb i @?-> Just 'a' 51 | j <- SP.next rb 52 | SP.set rb j 'b' 53 | SP.publish rb j 54 | SP.get rb j @?-> Just 'b' 55 | 56 | unit_ringBufferRemainingCapacity :: Assertion 57 | unit_ringBufferRemainingCapacity = do 58 | rb <- SP.newRingBuffer 1 59 | consumerSnrRef <- newIORef (SequenceNumber (-1)) 60 | SP.setGatingSequences rb [consumerSnrRef] 61 | SP.size rb @?-> 1 62 | SP.publish rb (SequenceNumber 0) 63 | SP.size rb @?-> 0 64 | SP.tryNext rb @?-> None 65 | modifyIORef consumerSnrRef succ 66 | SP.size rb @?-> 1 67 | 68 | unit_ringBufferMulti :: Assertion 69 | unit_ringBufferMulti = do 70 | rb <- MP.newRingBuffer 8 71 | Some i <- MP.tryNext rb 72 | MP.set rb i 'a' 73 | MP.publish rb i 74 | MP.tryGet rb i @?-> Just 'a' 75 | Some j <- MP.tryNext rb 76 | MP.set rb j 'b' 77 | MP.publish rb j 78 | MP.tryGet rb j @?-> Just 'b' 79 | 80 | unit_ringBufferSP1P1C :: Assertion 81 | unit_ringBufferSP1P1C = do 82 | rb <- SP.newRingBuffer 128 83 | consumerFinished <- newEmptyMVar 84 | 85 | let iTERATIONS :: Int64 86 | iTERATIONS = 1024 87 | 88 | ep = SP.EventProducer (const (go 0)) () 89 | where 90 | go :: Int64 -> IO () 91 | go n | n == iTERATIONS = return () 92 | | otherwise = do 93 | mSnr <- SP.tryNext rb 94 | case mSnr of 95 | Some snr -> do 96 | SP.set rb snr n 97 | SP.publish rb snr 98 | go (n + 1) 99 | None -> do 100 | threadDelay 1 101 | go n 102 | 103 | let handler seen n snr endOfBatch 104 | | n /= seen = error (show n ++ " appears twice") 105 | | otherwise = do 106 | -- putStrLn ("consumer got: " ++ show n ++ 107 | -- if endOfBatch then ". End of batch!" else "") 108 | when (endOfBatch && getSequenceNumber snr == iTERATIONS - 1) $ 109 | putMVar consumerFinished () 110 | return (seen + 1) 111 | 112 | ec <- SP.newEventConsumer rb handler 0 [] (SP.Sleep 1) 113 | 114 | SP.setGatingSequences rb [SP.ecSequenceNumber ec] 115 | 116 | SP.withEventProducer ep $ \aep -> 117 | SP.withEventConsumer ec $ \aec -> do 118 | wait aep 119 | () <- takeMVar consumerFinished 120 | cancel aec 121 | 122 | unit_ringBufferMP1P1CBlocking :: Assertion 123 | unit_ringBufferMP1P1CBlocking = ringBufferMP1P1C True 124 | 125 | unit_ringBufferMP1P1CNonBlocking :: Assertion 126 | unit_ringBufferMP1P1CNonBlocking = ringBufferMP1P1C False 127 | 128 | ringBufferMP1P1C :: Bool -> Assertion 129 | ringBufferMP1P1C blocking = do 130 | numCap <- getNumCapabilities 131 | assertBool "getNumCapabilities < 2" (numCap >= 2) 132 | rb <- MP.newRingBuffer 8 133 | counter <- newIORef (-1) 134 | consumerFinished <- newEmptyMVar 135 | 136 | let ep = MP.EventProducer (const go) () 137 | where 138 | go :: IO () 139 | go = do 140 | n <- atomicModifyIORef' counter (\n -> let n' = n + 1 in (n', n')) 141 | if n > atLeastThisManyEvents 142 | then return () 143 | else 144 | if blocking 145 | then goBlocking n 146 | else goNonBlocking n 147 | 148 | goBlocking n = do 149 | snr <- MP.next rb 150 | MP.set rb snr n 151 | MP.publish rb snr 152 | go 153 | 154 | goNonBlocking n = do 155 | mSnr <- MP.tryNext rb 156 | case mSnr of 157 | Some snr -> do 158 | MP.set rb snr n 159 | MP.publish rb snr 160 | go 161 | None -> do 162 | threadDelay 1 163 | goNonBlocking n 164 | 165 | let handler seen n snr endOfBatch 166 | | n `Set.member` seen = error (show n ++ " appears twice") 167 | | otherwise = do 168 | -- putStrLn ("consumer got, n = " ++ show n ++ ", snr = " ++ show snr ++ 169 | -- if endOfBatch then ". End of batch!" else "") 170 | let seen' = Set.insert n seen 171 | when (endOfBatch && 172 | getSequenceNumber snr >= fromIntegral atLeastThisManyEvents) $ do 173 | putMVar consumerFinished seen' 174 | return seen' 175 | ec <- MP.newEventConsumer rb handler Set.empty [] (MP.Sleep 1) 176 | 177 | MP.setGatingSequences rb [MP.ecSequenceNumber ec] 178 | 179 | MP.withEventProducer ep $ \aep -> 180 | MP.withEventConsumer ec $ \aec -> do 181 | seen <- takeMVar consumerFinished 182 | cancel aec 183 | cancel aep 184 | assertEqual "increasingByOneFrom" 185 | (Right ()) 186 | (increasingByOneFrom 0 (Set.toList seen)) 187 | 188 | atLeastThisManyEvents = 10 189 | 190 | increasingByOneFrom :: Int -> [Int] -> Either String () 191 | increasingByOneFrom n [] 192 | | n > atLeastThisManyEvents = Right () 193 | | n <= atLeastThisManyEvents = 194 | Left ("n (= " ++ show n ++ ") < atLeastThisManyEvents (= " ++ 195 | show atLeastThisManyEvents ++ ")") 196 | increasingByOneFrom n (i : is) | n == i = increasingByOneFrom (n + 1) is 197 | | otherwise = 198 | Left ("Expected: " ++ show n ++ ", but got: " ++ show i) 199 | 200 | unit_ringBuffer5P1C :: Assertion 201 | unit_ringBuffer5P1C = do 202 | rb <- MP.newRingBuffer 32 203 | counter <- newCounter (-1) 204 | consumerFinished <- newEmptyMVar 205 | 206 | let ep = MP.EventProducer (const go) () 207 | where 208 | go :: IO () 209 | go = do 210 | n <- incrCounter 1 counter 211 | putStrLn ("producer, n = " ++ show n) 212 | if n > atLeastThisManyEvents 213 | then putStrLn "producer: done" >> return () 214 | else do 215 | putStrLn "producer: not done yet" 216 | mSnr <- MP.tryNext rb 217 | putStrLn ("producer: mSrn = " ++ show mSnr) 218 | case mSnr of 219 | Some snr -> do 220 | putStrLn ("producer: setting " ++ show n) 221 | MP.set rb snr n 222 | MP.publish rb snr 223 | putStrLn ("producer: published n = " ++ show n ++ ", snr = " ++ show snr) 224 | go 225 | None -> do 226 | putStrLn "producer: none" 227 | threadDelay 1 228 | go 229 | 230 | let handler seen n snr endOfBatch 231 | | n `Set.member` seen = error (show n ++ " appears twice") 232 | | otherwise = do 233 | putStrLn ("consumer got: n = " ++ show n ++ ", snr = " ++ show snr ++ 234 | if endOfBatch then ". End of batch!" else "") 235 | let seen' = Set.insert n seen 236 | when (endOfBatch && 237 | getSequenceNumber snr >= fromIntegral atLeastThisManyEvents) $ 238 | putMVar consumerFinished seen' 239 | return seen' 240 | ec <- MP.newEventConsumer rb handler Set.empty [] (MP.Sleep 1) 241 | 242 | MP.setGatingSequences rb [MP.ecSequenceNumber ec] 243 | 244 | MP.withEventProducer ep $ \aep1 -> 245 | MP.withEventProducer ep $ \aep2 -> 246 | MP.withEventProducer ep $ \aep3 -> 247 | MP.withEventProducer ep $ \aep4 -> 248 | MP.withEventProducer ep $ \aep5 -> 249 | MP.withEventConsumer ec $ \aec -> do 250 | seen <- takeMVar consumerFinished 251 | cancel aec 252 | mapM_ cancel [aep1, aep2, aep3, aep4, aep5] 253 | assertEqual "increasingByOneFrom" 254 | (Right ()) 255 | (increasingByOneFrom 0 (Set.toList seen)) 256 | -------------------------------------------------------------------------------- /src/Disruptor/MP/RingBuffer.hs: -------------------------------------------------------------------------------- 1 | module Disruptor.MP.RingBuffer where 2 | 3 | import Control.Concurrent (threadDelay, yield) 4 | import Control.Exception (assert) 5 | import Control.Monad (when) 6 | import Data.Atomics (casIORef, peekTicket, readForCAS) 7 | import Data.Bits (popCount) 8 | import Data.IORef 9 | (IORef, atomicModifyIORef', newIORef, readIORef, writeIORef) 10 | import Data.Int (Int64) 11 | import Data.Vector.Mutable (IOVector) 12 | import qualified Data.Vector.Mutable as Vector 13 | import qualified Data.Vector.Unboxed.Mutable as Unboxed 14 | 15 | import Disruptor.SequenceNumber 16 | import Disruptor.AtomicCounterPadded 17 | 18 | ------------------------------------------------------------------------ 19 | 20 | -- | The lock-free multi-producer implementation is presented in the following 21 | -- talk: 22 | -- 23 | -- Locks? We Don't Need No Stinkin' Locks! by Mike Barker (JAX London 2012) 24 | -- https://youtu.be/VBnLW9mKMh4?t=1813 25 | -- 26 | -- and also discussed in the following thread: 27 | -- 28 | -- https://groups.google.com/g/lmax-disruptor/c/UhmRuz_CL6E/m/-hVt86bHvf8J 29 | -- 30 | -- Note that one can also achieve a similar result by using multiple 31 | -- single-producers and combine them into one as outlined in this thread: 32 | -- 33 | -- https://groups.google.com/g/lmax-disruptor/c/hvJVE-h2Xu0/m/mBW0j_3SrmIJ 34 | -- 35 | data RingBuffer e = RingBuffer 36 | { 37 | -- | The capacity, or maximum amount of values, of the ring buffer. 38 | rbCapacity :: {-# UNPACK #-} !Int64 39 | -- | The cursor pointing to the head of the ring buffer. 40 | , rbCursor :: {-# UNPACK #-} !AtomicCounter 41 | -- | The values of the ring buffer. 42 | , rbEvents :: {-# UNPACK #-} !(IOVector e) 43 | -- | References to the last consumers' sequence numbers, used in order to 44 | -- avoid wrapping the buffer and overwriting events that have not been 45 | -- consumed yet. 46 | -- TODO: use vector instead of list. 47 | , rbGatingSequences :: {-# UNPACK #-} !(IORef [IORef SequenceNumber]) 48 | -- | Cached value of computing the last consumers' sequence numbers using the 49 | -- above references. 50 | , rbCachedGatingSequence :: {-# UNPACK #-} !(IORef SequenceNumber) 51 | -- | Used to keep track of what has been published in the multi-producer case. 52 | , rbAvailableBuffer :: {-# UNPACK #-} !(Unboxed.IOVector Int) 53 | } 54 | 55 | newRingBuffer :: Int -> IO (RingBuffer e) 56 | newRingBuffer capacity 57 | | capacity <= 0 = 58 | error "newRingBuffer: capacity must be greater than 0" 59 | | popCount capacity /= 1 = 60 | -- NOTE: The use of bitwise and (`.&.`) in `index` relies on this. 61 | error "newRingBuffer: capacity must be a power of 2" 62 | | otherwise = do 63 | snr <- newCounter (-1) 64 | v <- Vector.new capacity 65 | gs <- newIORef [] 66 | cgs <- newIORef (-1) 67 | ab <- Unboxed.new capacity 68 | Unboxed.set ab (-1) 69 | return (RingBuffer (fromIntegral capacity) snr v gs cgs ab) 70 | {-# INLINABLE newRingBuffer #-} 71 | 72 | -- | The capacity, or maximum amount of values, of the ring buffer. 73 | capacity :: RingBuffer e -> Int64 74 | capacity = rbCapacity 75 | {-# INLINE capacity #-} 76 | 77 | getCursor :: RingBuffer e -> IO SequenceNumber 78 | getCursor rb = fromIntegral <$> readCounter (rbCursor rb) 79 | {-# INLINE getCursor #-} 80 | 81 | setGatingSequences :: RingBuffer e -> [IORef SequenceNumber] -> IO () 82 | setGatingSequences rb = writeIORef (rbGatingSequences rb) 83 | {-# INLINE setGatingSequences #-} 84 | 85 | getCachedGatingSequence :: RingBuffer e -> IO SequenceNumber 86 | getCachedGatingSequence rb = readIORef (rbCachedGatingSequence rb) 87 | {-# INLINE getCachedGatingSequence #-} 88 | 89 | setCachedGatingSequence :: RingBuffer e -> SequenceNumber -> IO () 90 | setCachedGatingSequence rb = writeIORef (rbCachedGatingSequence rb) 91 | {-# INLINE setCachedGatingSequence #-} 92 | 93 | setAvailable :: RingBuffer e -> SequenceNumber -> IO () 94 | setAvailable rb snr = Unboxed.unsafeWrite 95 | (rbAvailableBuffer rb) 96 | (index cap snr) 97 | (availabilityFlag cap snr) 98 | where 99 | cap = capacity rb 100 | {-# INLINE setAvailable #-} 101 | 102 | getAvailable :: RingBuffer e -> Int -> IO Int 103 | getAvailable rb ix = Unboxed.unsafeRead (rbAvailableBuffer rb) ix 104 | {-# INLINE getAvailable #-} 105 | 106 | minimumSequence :: RingBuffer e -> IO SequenceNumber 107 | minimumSequence rb = do 108 | cursorValue <- getCursor rb 109 | minimumSequence' (rbGatingSequences rb) cursorValue 110 | {-# INLINE minimumSequence #-} 111 | 112 | minimumSequence' :: IORef [IORef SequenceNumber] -> SequenceNumber -> IO SequenceNumber 113 | minimumSequence' gatingSequences cursorValue = do 114 | snrs <- mapM readIORef =<< readIORef gatingSequences 115 | return (minimum (cursorValue : snrs)) 116 | {-# INLINE minimumSequence' #-} 117 | 118 | -- | Currently available slots to write to. 119 | size :: RingBuffer e -> IO Int64 120 | size rb = do 121 | consumed <- minimumSequence rb 122 | produced <- getCursor rb 123 | return (capacity rb - fromIntegral (produced - consumed)) 124 | {-# INLINABLE size #-} 125 | 126 | -- | Claim the next event in sequence for publishing. 127 | next :: RingBuffer e -> IO SequenceNumber 128 | next rb = nextBatch rb 1 129 | {-# INLINE next #-} 130 | 131 | -- | Claim the next `n` events in sequence for publishing. This is for batch 132 | -- event producing. Returns the highest claimed sequence number, so using it 133 | -- requires a bit of extra work, e.g.: 134 | -- 135 | -- @ 136 | -- let n = 10 137 | -- hi <- nextBatch rb n 138 | -- let lo = hi - (n - 1) 139 | -- mapM_ f [lo..hi] 140 | -- publishBatch rb lo hi 141 | -- @ 142 | -- 143 | nextBatch :: RingBuffer e -> Int -> IO SequenceNumber 144 | nextBatch rb n = assert (n > 0 && fromIntegral n <= capacity rb) $ do 145 | -- (current, nextSequence) <- -- {-# SCC "atomicModifyIORef'" #-} 146 | -- -- XXX: The atomic takes 60% of the time of 147 | -- -- `nextBatch`... Try using `AtomicCounter` instead 148 | -- -- of `IORef SequneceNumber`. 149 | -- atomicModifyIORef' (rbCursor rb) $ \current -> 150 | -- let 151 | -- nextSequence = current + fromIntegral n 152 | -- in 153 | -- (nextSequence, (current, nextSequence)) 154 | current <- fromIntegral <$> {-# SCC incrCounter #-} getAndIncrCounter n (rbCursor rb) 155 | 156 | let nextSequence :: SequenceNumber 157 | nextSequence = current + fromIntegral n 158 | 159 | wrapPoint :: SequenceNumber 160 | wrapPoint = nextSequence - fromIntegral (capacity rb) 161 | 162 | cachedGatingSequence <- getCachedGatingSequence rb 163 | 164 | when (wrapPoint > cachedGatingSequence || cachedGatingSequence > current) $ 165 | waitForConsumers current wrapPoint (rbGatingSequences rb) 166 | 167 | return nextSequence 168 | where 169 | waitForConsumers :: SequenceNumber -> SequenceNumber -> IORef [IORef SequenceNumber] 170 | -> IO () 171 | waitForConsumers current wrapPoint gatingSequences = go 172 | where 173 | go :: IO () 174 | go = do 175 | gatingSequence <- minimumSequence' gatingSequences current 176 | if wrapPoint > gatingSequence 177 | then do 178 | threadDelay 1 179 | go -- SPIN 180 | else setCachedGatingSequence rb gatingSequence 181 | {-# INLINABLE nextBatch #-} 182 | 183 | -- Try to return the next sequence number to write to. If `Nothing` is returned, 184 | -- then the last consumer has not yet processed the event we are about to 185 | -- overwrite (due to the ring buffer wrapping around) -- the callee of `tryNext` 186 | -- should apply back-pressure upstream if this happens. 187 | tryNext :: RingBuffer e -> IO MaybeSequenceNumber 188 | tryNext rb = tryNextBatch rb 1 189 | {-# INLINE tryNext #-} 190 | 191 | tryNextBatch :: RingBuffer e -> Int -> IO MaybeSequenceNumber 192 | tryNextBatch rb n = assert (n > 0) go 193 | where 194 | go = do 195 | current_ <- {-# SCC "readCounter" #-} readCounter (rbCursor rb) 196 | let current = fromIntegral current_ 197 | next_ = current_ + n 198 | next = fromIntegral next_ 199 | b <- {-# SCC "hasCapacity" #-} hasCapacity rb n current 200 | if not b 201 | then return None 202 | else do 203 | success <- {-# SCC casCounter #-} casCounter (rbCursor rb) current_ next_ 204 | if success 205 | then return (Some next) 206 | else do 207 | {-# SCC "threadDelay" #-} threadDelay 1 208 | -- yield 209 | go -- SPIN 210 | {-- 211 | current <- {-# SCC "readForCas" #-} readForCAS (rbCursor rb) 212 | let current_ = peekTicket current 213 | next = current_ + fromIntegral n 214 | b <- {-# SCC "hasCapacity" #-} hasCapacity rb n current_ 215 | if not b 216 | then return None 217 | else do 218 | (success, _current') <- {-# SCC "casIORef"#-} casIORef (rbCursor rb) current next 219 | if success 220 | then return (Some next) 221 | else do 222 | {-# SCC "threadDelay" #-}threadDelay 1 223 | -- yield 224 | go -- SPIN 225 | -} 226 | {-# INLINABLE tryNextBatch #-} 227 | 228 | hasCapacity :: RingBuffer e -> Int -> SequenceNumber -> IO Bool 229 | hasCapacity rb requiredCapacity cursorValue = do 230 | let wrapPoint = (cursorValue + fromIntegral requiredCapacity) - 231 | fromIntegral (capacity rb) 232 | cachedGatingSequence <- getCachedGatingSequence rb 233 | if (wrapPoint > cachedGatingSequence || cachedGatingSequence > cursorValue) 234 | then do 235 | minSequence <- minimumSequence' (rbGatingSequences rb) cursorValue 236 | setCachedGatingSequence rb minSequence 237 | if (wrapPoint > minSequence) 238 | then return False 239 | else return True 240 | else return True 241 | {-# INLINE hasCapacity #-} 242 | 243 | set :: RingBuffer e -> SequenceNumber -> e -> IO () 244 | set rb snr e = Vector.unsafeWrite (rbEvents rb) (index (capacity rb) snr) e 245 | {-# INLINE set #-} 246 | 247 | publish :: RingBuffer e -> SequenceNumber -> IO () 248 | publish rb = setAvailable rb 249 | {-# INLINE publish #-} 250 | -- XXX: Wake up consumers that are using a sleep wait strategy. 251 | 252 | publishBatch :: RingBuffer e -> SequenceNumber -> SequenceNumber -> IO () 253 | publishBatch rb lo hi = mapM_ (setAvailable rb) [lo..hi] 254 | {-# INLINE publishBatch #-} 255 | 256 | get :: RingBuffer e -> SequenceNumber -> IO e 257 | get rb current = go 258 | where 259 | cap :: Int64 260 | cap = capacity rb 261 | 262 | availableValue :: Int 263 | availableValue = availabilityFlag cap current 264 | 265 | ix :: Int 266 | ix = index cap current 267 | 268 | go = do 269 | v <- getAvailable rb ix 270 | if v /= availableValue 271 | then do 272 | threadDelay 1 273 | go -- SPIN 274 | else Vector.unsafeRead (rbEvents rb) ix 275 | 276 | tryGet :: RingBuffer e -> SequenceNumber -> IO (Maybe e) 277 | tryGet rb want = do 278 | produced <- getCursor rb 279 | if want <= produced 280 | then Just <$> get rb want 281 | else return Nothing 282 | {-# INLINE tryGet #-} 283 | 284 | isAvailable :: RingBuffer e -> SequenceNumber -> IO Bool 285 | isAvailable rb snr = 286 | (==) <$> Unboxed.unsafeRead (rbAvailableBuffer rb) (index capacity snr) 287 | <*> pure (availabilityFlag capacity snr) 288 | where 289 | capacity = rbCapacity rb 290 | {-# INLINE isAvailable #-} 291 | 292 | highestPublished :: RingBuffer e -> SequenceNumber -> SequenceNumber 293 | -> IO SequenceNumber 294 | highestPublished rb lowerBound availableSequence = go lowerBound 295 | where 296 | go sequence 297 | | sequence > availableSequence = return availableSequence 298 | | otherwise = do 299 | available <- isAvailable rb sequence 300 | if not (available) 301 | then return (sequence - 1) 302 | else go (sequence + 1) 303 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pipelined-state-machines 2 | 3 | An experiment in declaratively programming parallel pipelines of state machines. 4 | 5 | ## Motivation 6 | 7 | Imagine a flat complex in Sweden. Being the socialist utopia Sweden is there's a 8 | shared laundry room which the people in the flat complex can book. In the 9 | laundry room there's everything one needs to wash, dry and iron your clothes. 10 | You don't even need to bring your own laundry detergent! 11 | 12 | Lets call three people living there Ann, Bo and Cecilia, and lets assume they 13 | all want to use the laundry room. Depending on how the booking system is 14 | implemented the total time it would take for all three people to do their 15 | laundry varies. 16 | 17 | For example if the booking system allocates a big time slot per person in which 18 | that person can do the whole cycle of *W*ashing, *D*rying and *I*roning then, 19 | assuming each step takes one time unit, we get a situation like this: 20 | 21 | Person 22 | ^ 23 | Ann | W D I W = Washing 24 | Bo | W D I D = Drying 25 | Cecilia | W D I I = Ironing 26 | +-------------------> Time 27 | 0 1 2 3 4 5 6 7 8 9 28 | 29 | Bo cannot start washing until Ann is done ironing, because Ann has booked the 30 | room for the whole cycle, and so on. 31 | 32 | If the booking system is more granular and allows booking a time slot per step 33 | then we can get a situation that looks like this: 34 | 35 | Person 36 | ^ 37 | Ann | W D I 38 | Bo | W D I 39 | Cecilia | W D I 40 | +-------------------> Time 41 | 0 1 2 3 4 5 6 7 8 9 42 | 43 | It should be clear that the total time is shorter in this case, because the 44 | machines are utilised better (Bo can start using the washing machine right after 45 | Ann is done with it). Also note that if each person would start a new washing 46 | after they finish ironing the first one and so on then the time savings would be 47 | even greater. 48 | 49 | This optimisation is called pipelining. It's used a lot in manufacturing, for 50 | example Airbus [builds](https://youtu.be/oxjT7veKi9c?t=2682) two airplanes per 51 | day. If you were to order a plane today you'd get it delivered in two months 52 | time. How is that they deliver two per day if it takes two months to build them? 53 | Pipelining! It's also used inside CPUs to [pipeline 54 | instructions](https://en.wikipedia.org/wiki/Instruction_pipelining). 55 | 56 | The rest of this document is an experiment in how we can construct such 57 | pipelining in software in a declarative way. 58 | 59 | ## Usage 60 | 61 | The workers or stages in our pipeline will be state machines of the following 62 | type. 63 | 64 | ```haskell 65 | data SM s a b where 66 | Id :: SM s a a 67 | Compose :: SM s b c -> SM s a b -> SM s a c 68 | Fst :: SM s (a, b) a 69 | Snd :: SM s (a, b) b 70 | (:&&&) :: SM s a c -> SM s a d -> SM s a (c, d) 71 | (:***) :: SM s a c -> SM s b d -> SM s (a, b) (c, d) 72 | SlowIO :: SM s a a -- Simulate a slow I/O computation. 73 | ``` 74 | 75 | Here's an example of a stage which takes an ordered pair as input and swaps the 76 | elements of the pair. Note the use of `SlowIO` to simulate that some slow I/O 77 | computation happens. 78 | 79 | ```haskell 80 | swap :: SM () (a, b) (b, a) 81 | swap = Snd :*** Fst `Compose` copy `Compose` SlowIO 82 | where 83 | copy = Id :&&& Id 84 | ``` 85 | 86 | We can `interpret` such state machines into plain functions as follows. 87 | 88 | ```haskell 89 | interpret :: SM s a b -> (a -> s -> IO (s, b)) 90 | interpret Id x s = return (s, x) 91 | interpret (Compose g f) x s = do 92 | (s', y) <- interpret f x s 93 | interpret g y s' 94 | interpret Fst x s = return (s, fst x) 95 | interpret Snd x s = return (s, snd x) 96 | interpret (f :&&& g) x s = do 97 | (s', y) <- interpret f x s 98 | (s'', z) <- interpret g x s' 99 | return (s'', (y, z)) 100 | interpret (f :*** g) x s = do 101 | (s', y) <- interpret f (fst x) s 102 | (s'', z) <- interpret g (snd x) s' 103 | return (s'', (y, z)) 104 | interpret SlowIO x s = do 105 | threadDelay 200000 -- 0.2s 106 | return (s, x) 107 | ``` 108 | 109 | Next lets have a look at how we can construct pipelines of such state machines. 110 | 111 | ```haskell 112 | data P a b where 113 | SM :: String -> SM s a b -> s -> P a b 114 | (:>>>) :: P a b -> P b c -> P a c 115 | ``` 116 | 117 | The following is an example pipeline where there's only one stage in which we do 118 | our pair swapping three times. 119 | 120 | ```haskell 121 | swapsSequential :: P (a, b) (b, a) 122 | swapsSequential = SM "three swaps" (swap `Compose` swap `Compose` swap) () 123 | ``` 124 | 125 | The above corresponds to our coarse grained booking system where the laundry was 126 | booked for the whole cycle. Whereas the following corresponds to the more fine 127 | grained approach where we get pipelining. 128 | 129 | ```haskell 130 | swapsPipelined :: P (a, b) (b, a) 131 | swapsPipelined = 132 | SM "first swap" swap () :>>> 133 | SM "second swap" swap () :>>> 134 | SM "third swap" swap () 135 | ``` 136 | 137 | A pipeline can be deployed, we'll use the following type to keep track of the 138 | queue associated with the pipeline as well as the name and pids of the state 139 | machines involved in the pipeline. 140 | 141 | ```haskell 142 | data Deployment a = Deployment 143 | { queue :: TQueue a 144 | , pids :: [(String, Async ())] 145 | } 146 | 147 | names :: Deployment a -> String 148 | names = bracket . intercalate "," . reverse . map fst . pids 149 | where 150 | bracket s = "[" ++ s ++ "]" 151 | ``` 152 | 153 | Here's the actual `deploy`ment function which takes a pipeline and gives back an 154 | input-queue and a `Deployment` which holds the output-queue and the names and 155 | pids of the state machines. 156 | 157 | ```haskell 158 | deploy :: P a b -> IO (TQueue a, Deployment b) 159 | deploy p = do 160 | q <- newTQueueIO 161 | d <- deploy' p (Deployment q []) 162 | return (q, d) 163 | 164 | deploy' :: P a b -> Deployment a -> IO (Deployment b) 165 | deploy' (SM name sm s0) d = do 166 | q' <- newTQueueIO 167 | pid <- async (go s0 q') 168 | return Deployment { queue = q', pids = (name, pid) : pids d } 169 | where 170 | f = interpret sm 171 | 172 | go s q' = do 173 | x <- atomically $ readTQueue (queue d) 174 | (s', o) <- f x s 175 | atomically $ writeTQueue q' o 176 | go s' q' 177 | deploy' (sm :>>> sm') d = do 178 | d' <- deploy' sm d 179 | deploy' sm' d' 180 | ``` 181 | 182 | We now have everything we need to run a simple benchmark comparing the 183 | sequential version of three swaps versus the pipelined version. 184 | 185 | ```haskell 186 | data PipelineKind = Sequential | Pipelined 187 | deriving Show 188 | 189 | main :: IO () 190 | main = do 191 | mapM_ libMain [Sequential, Pipelined] 192 | 193 | libMain :: PipelineKind -> IO () 194 | libMain k = do 195 | (q, d) <- deploy $ case k of 196 | Sequential -> swapsSequential 197 | Pipelined -> swapsPipelined 198 | print k 199 | putStrLn $ "Pids: " ++ names d 200 | start <- getCurrentTime 201 | forM_ [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)] $ \x -> 202 | atomically $ writeTQueue q x 203 | resps <- replicateM 6 $ atomically $ readTQueue (queue d) 204 | end <- getCurrentTime 205 | putStrLn $ "Responses: " ++ show resps 206 | putStrLn $ "Time: " ++ show (diffUTCTime end start) 207 | putStrLn "" 208 | ``` 209 | 210 | We can run the above with `cabal run readme-pipeline-example`, which results in 211 | something like the following being printed to the screen. 212 | 213 | ``` 214 | Sequential 215 | Pids: [three swaps] 216 | Responses: [(2,1),(3,2),(4,3),(5,4),(6,5),(7,6)] 217 | Time: 3.611045787s 218 | 219 | Pipelined 220 | Pids: [first swap,second swap,third swap] 221 | Responses: [(2,1),(3,2),(4,3),(5,4),(6,5),(7,6)] 222 | Time: 1.604990775s 223 | ``` 224 | 225 | Cool, we managed to reduce the total running time by more than half! We can do 226 | even better though! In addition to pipelining we can also shard the queues by 227 | letting two state machines work on the same queue, the first processing the 228 | elements in the even positions of the queue and the second processing the 229 | elements in the odd positions. 230 | 231 | ```diff 232 | data P a b where 233 | SM :: String -> SM s a b -> s -> P a b 234 | (:>>>) :: P a b -> P b c -> P a c 235 | + Shard :: P a b -> P a b 236 | ``` 237 | 238 | Here's an example of a sharded pipeline, where each shard will spawn two state 239 | machines (one working on the even indexes of the queue and the other on the 240 | odd). 241 | 242 | ```haskell 243 | swapsSharded :: P (a, b) (b, a) 244 | swapsSharded = 245 | Shard (SM "first swap" swap ()) :>>> 246 | Shard (SM "second swap" swap ()) :>>> 247 | Shard (SM "third swap" swap ()) 248 | ``` 249 | 250 | In the deployment of shards, we achieve the even-odd split by reading from the 251 | input queue, `qIn`, and first writing to the even queue, `qEven`, and then 252 | switching over to the odd queue, `qOdd`, when making the recursive call in 253 | `shardQIn`. Whereas `shardQOut` does the inverse and merges the two queues back 254 | into the output queue: 255 | 256 | ```diff 257 | + deploy' (Shard p) d = do 258 | + let qIn = queue d 259 | + qEven <- newTQueueIO 260 | + qOdd <- newTQueueIO 261 | + pidIn <- async $ shardQIn qIn qEven qOdd 262 | + dEven <- deploy' p (Deployment qEven []) 263 | + dOdd <- deploy' p (Deployment qOdd []) 264 | + qOut <- newTQueueIO 265 | + pidOut <- async $ shardQOut (queue dEven) (queue dOdd) qOut 266 | + return (Deployment qOut (("shardIn: " ++ names dEven ++ " & " ++ names dOdd, pidIn) : 267 | + ("shardOut: " ++ names dEven ++ " & " ++ names dOdd, pidOut) : 268 | + pids dEven ++ pids dOdd ++ pids d)) 269 | + where 270 | + shardQIn :: TQueue a -> TQueue a -> TQueue a -> IO () 271 | + shardQIn qIn qEven qOdd = do 272 | + atomically (readTQueue qIn >>= writeTQueue qEven) 273 | + shardQIn qIn qOdd qEven 274 | + 275 | + shardQOut :: TQueue a -> TQueue a -> TQueue a -> IO () 276 | + shardQOut qEven qOdd qOut = do 277 | + atomically (readTQueue qEven >>= writeTQueue qOut) 278 | + shardQOut qOdd qEven qOut 279 | ``` 280 | 281 | Running this version we see more than 3.5x speed-up compared to the sequential 282 | pipeline. 283 | 284 | ``` 285 | Sharded 286 | Pids: [first swap,first swap,shardOut: [first swap] & [first swap],shardIn: [first swap] & [first swap],second swap,second swap,shardOut: [second swap] & [second swap],shardIn: [second swap] & [second swap],third swap,third swap,shardOut: [third swap] & [third swap],shardIn: [third swap] & [third swap]] 287 | Responses: [(2,1),(3,2),(4,3),(5,4),(6,5),(7,6)] 288 | Time: 1.00241912s 289 | ``` 290 | 291 | There are still many more improvements to be made here: 292 | 293 | * Avoid spawning threads for merely shuffling elements between queues, e.g. 294 | `shardQ{In, Out}` above; 295 | * Avoid copying elements between queues; 296 | * Back-pressure; 297 | * Batching. 298 | 299 | I believe all these problems can be solved by choosing a better concurrent queue 300 | data structure than `TQueue`, so that's what we'll have a look at next. 301 | 302 | ## Disruptor 303 | 304 | The `Disruptor*` modules are a Haskell port of the [LMAX 305 | Disruptor](https://github.com/LMAX-Exchange/disruptor), which is a high 306 | performance inter-thread messaging library. The developers at LMAX, which 307 | operates a financial exchange, 308 | [reported](https://www.infoq.com/presentations/LMAX/) in 2010 that they could 309 | process more than 100,000 transactions per second at less than 1 millisecond 310 | latency. 311 | 312 | At its core it's just a lock-free concurrent queue, but it also provides 313 | building blocks for achieving several useful concurrent programming tasks that 314 | typical queues don't (or at least don't make obvious how to do). The extra 315 | features include: 316 | 317 | * Multi-cast (many consumers can in parallel process the same event); 318 | * Batching (both on producer and consumer side); 319 | * Back-pressure; 320 | * Sharding for scalability; 321 | * Dependencies between consumers. 322 | 323 | It's also performs better than most queues, as we shall see further down. 324 | 325 | ### Example 326 | 327 | ```haskell 328 | import Control.Concurrent 329 | import Control.Concurrent.Async 330 | import Disruptor.SP 331 | 332 | main :: IO () 333 | main = do 334 | 335 | -- Create the shared ring buffer. 336 | let bufferCapacity = 128 337 | rb <- newRingBuffer bufferCapacity 338 | 339 | -- The producer keeps a counter and produces events that are merely the pretty 340 | -- printed value as a string of that counter. 341 | let produce :: Int -> IO (String, Int) 342 | produce n = return (show n, n + 1) 343 | 344 | -- The counter starts at zero. 345 | initialProducerState = 0 346 | 347 | -- No back-pressure is applied in this example. 348 | backPressure :: Int -> IO () 349 | backPressure _ = return () 350 | 351 | producer <- newEventProducer rb produce backPressure initialProducerState 352 | 353 | -- The consumer merely prints the string event to the terminal. 354 | let consume :: () -> String -> SequenceNumber -> EndOfBatch -> IO () 355 | consume () event snr endOfBatch = 356 | putStrLn (event ++ if endOfBatch then " (end of batch)" else "") 357 | 358 | -- The consumer doesn't need any state in this example. 359 | initialConsumerState = () 360 | 361 | -- Which other consumers do we need to wait for before consuming an event? 362 | dependencies = [] 363 | 364 | -- What to do in case there are no events to consume? 365 | waitStrategy = Sleep 1 366 | 367 | consumer <- newEventConsumer rb consume initialConsumerState dependencies waitStrategy 368 | 369 | -- Tell the ring buffer which the last consumer is, to avoid overwriting 370 | -- events that haven't been consumed yet. 371 | setGatingSequences rb [ecSequenceNumber consumer] 372 | 373 | withEventProducer producer $ \ap -> 374 | withEventConsumer consumer $ \ac -> do 375 | threadDelay (3 * 1000 * 1000) -- 3 sec 376 | cancel ap 377 | cancel ac 378 | ``` 379 | 380 | You can run the above example with `cabal run readme-disruptor-example`. 381 | 382 | A couple of things we could change to highlight the features we mentioned in the 383 | above section: 384 | 385 | 1. Add a second consumer that saves the event to disk, this consumer would be 386 | slower than the current one which logs to the terminal, but we could use 387 | buffer up events in memory and only actually write when the end of batch 388 | flag is set to speed things up; 389 | 390 | 2. We could also shard depending on the sequence number, e.g. have two slower 391 | consumers that write to disk and have one of them handle even sequence numbers 392 | while the other handles odd ones; 393 | 394 | 3. The above producer writes one event at the time to the ring buffer, but 395 | since we know at which sequence number the last consumer is at we can 396 | easily make writes in batches as well; 397 | 398 | 4. Currently the producer doesn't apply any back-pressure when the ring buffer 399 | is full, in a more realistic example where the producer would, for example, 400 | create events from requests made to a http server we could use 401 | back-pressure to tell the http server to return status code 429 (too many 402 | requests); 403 | 404 | 5. If we have one consumer that writes to the terminal and another one that 405 | concurrently writes to disk, we could add a third consumer that does 406 | something with the event only if it has both been logged and stored to disk 407 | (i.e. the third consumer depends on both the first and the second). 408 | 409 | ### How it works 410 | 411 | The ring buffer is implemented using a bounded array, it keeps track of a 412 | monotonically increasing sequence number and it knows its the capacity of the 413 | array, so to find out where to write the next value by simply taking the modulus 414 | of the sequence number and the capacity. This has several advantages over 415 | traditional queues: 416 | 417 | 1. We never remove elements when dequeing, merely overwrite them once we gone 418 | all way around the ring. This removes write 419 | [contention](https://en.wikipedia.org/wiki/Resource_contention) between the 420 | producer and the consumer, one could also imagine avoiding garbage 421 | collection by only allocating memory the first time around the ring (but we 422 | don't do this in Haskell); 423 | 424 | 2. Using an array rather than linked list increasing 425 | [striding](https://en.wikipedia.org/wiki/Stride_of_an_array) due to 426 | [spatial 427 | locality](https://en.wikipedia.org/wiki/Locality_of_reference#Spatial_and_temporal_locality_usage). 428 | 429 | The ring buffer also keeps track of up to which sequence number its last 430 | consumer has consumed, in order to not overwrite events that haven't been handled 431 | yet. 432 | 433 | This also means that producers can ask how much capacity left a ring buffer has, 434 | and do batched writes. If there's no capacity left the producer can apply 435 | back-pressure upstream as appropriate. 436 | 437 | Consumers need keep track of which sequence number they have processed, in order 438 | to avoid having the ring buffer overwrite unprocessed events as already 439 | mentioned, but this also allows consumers to depend on each other. 440 | 441 | When a consumer is done processing an event, it asks the ring buffer for the 442 | event at its next sequence number, the ring buffer then replies that either 443 | there are no new events, in which case the consumer applies it wait strategy, or 444 | the ring buffer can reply that there are new events, the consumer the handles 445 | each one in turn and the last one will be have the end of batch flag set, so 446 | that the consumer can effectively batch the processing. 447 | 448 | ### Performance 449 | 450 | Our Disruptor implementation, which hasn't been optimised much yet, is about 2x 451 | slower than LMAX's Java version on their single-producer single-consumer 452 | [benchmark](https://github.com/LMAX-Exchange/disruptor/blob/master/src/perftest/java/com/lmax/disruptor/sequenced/OneToOneSequencedThroughputTest.java) 453 | (1P1C) (basically the above example) on a couple of years old Linux laptop. 454 | 455 | The same benchmark compared to other Haskell libraries: 456 | 457 | * 10.3x faster than 458 | [`Control.Concurrent.Chan`](https://hackage.haskell.org/package/base-4.15.0.0/docs/Control-Concurrent-Chan.html); 459 | 460 | * 8.3x faster than 461 | [`Control.Concurrent.STM.TBQueue`](https://hackage.haskell.org/package/stm/docs/Control-Concurrent-STM-TBQueue.html); 462 | 463 | * 1.7x faster than 464 | [`unagi-chan`](https://hackage.haskell.org/package/unagi-chan); 465 | 466 | * 25.5x faster than 467 | [`chaselev-deque`](https://hackage.haskell.org/package/chaselev-deque); 468 | 469 | * 700x faster than [`ring-buffer`](https://hackage.haskell.org/package/ring-buffer); 470 | 471 | * 1.3x slower than 472 | [`lockfree-queue`](https://hackage.haskell.org/package/lockfree-queue); 473 | 474 | * TODO: Compare with 475 | [`data-ringbuffer`](https://github.com/kim/data-ringbuffer/tree/master/src/Data/RingBuffer). 476 | 477 | In the triple-producer single-consumer (3P1C) 478 | [benchmark](https://github.com/LMAX-Exchange/disruptor/blob/master/src/perftest/java/com/lmax/disruptor/sequenced/ThreeToOneSequencedThroughputTest.java), 479 | the Java version is 5x slower than the Java 1P1C case. And our 3P1C is 4.6x 480 | slower than our 1P1C version and our 3P1C version is 2.7x slower than the Java 481 | version. 482 | 483 | The same benchmark compared to other Haskell libraries: 484 | 485 | * 73x faster than 486 | [`Control.Concurrent.Chan`](https://hackage.haskell.org/package/base-4.15.0.0/docs/Control-Concurrent-Chan.html); 487 | 488 | * 3.5x faster than 489 | [`Control.Concurrent.STM.TBQueue`](https://hackage.haskell.org/package/stm/docs/Control-Concurrent-STM-TBQueue.html); 490 | 491 | * 1.3x faster than 492 | [`unagi-chan`](https://hackage.haskell.org/package/unagi-chan); 493 | 494 | * 1.9x faster than 495 | [`lockfree-queue`](https://hackage.haskell.org/package/lockfree-queue). 496 | 497 | For a slightly more "real world" example, we modified the 3P1C test to have 498 | three producers that log messages while the consumer writes them to a log file 499 | and compared it to 500 | [`fast-logger`](https://hackage.haskell.org/package/fast-logger). The 501 | `pipelined-state-machines` benchmark has a throughput of 3:4 that of 502 | `fast-logger`. When we bump it to ten concurrently logging threads the 503 | `pipelined-state-machines` benchmark has a throughput of 10:7 that of 504 | `fast-logger`. 505 | 506 | See the file [`benchmark.sh`](benchmark.sh) for full details about how the 507 | benchmarks are run. 508 | 509 | As always take benchmarks with a grain of salt, we've tried to make them as fair 510 | with respect to each other and as true to the original Java versions as 511 | possible. If you see anything that seems unfair, or if you get very different 512 | results when trying to reproduce the numbers, then please file an issue. 513 | 514 | ## Contributing 515 | 516 | There's a lot of possible paths to explore from here, including: 517 | 518 | - [ ] Can we swap out our use of `TQueue` for `Disruptor` in our `deploy` of 519 | `P`ipelines? 520 | - [ ] Can we add something like a `FanOut :: P a b -> P a c -> P a (b, c)` and a 521 | `Par :: P a c -> P b d -> P (a, b) (c, d)` combinator to allow two 522 | parallel queues? 523 | - [ ] What about sum-types and error handling? 524 | - [ ] Our current, and the above just mentioned, pipeline combinators are all 525 | binary to can we generalise this to N-ary? 526 | - [ ] Can we visualise pipelines using `dot` or similar? 527 | - [ ] Can we build a performance/cost simulator of pipelines? 528 | - [ ] Arrow syntax or monadic DSL for pipelines? 529 | - [ ] We've seen 530 | [previously](https://github.com/stevana/hot-swapping-state-machines) how 531 | we can hot-code upgrade state machines, what about hot-code upgrading 532 | pipelines? 533 | - [ ] Can we implement the Erlang `gen_event` behaviour using Disruptor? 534 | - [ ] Would it make sense to use the spiritual successor of the Disruptor 535 | instead, i.e. the different array queues from `aeron` and `agrona`: 536 | + [Single-producer 537 | single-consumer](https://github.com/real-logic/agrona/blob/master/agrona/src/main/java/org/agrona/concurrent/OneToOneConcurrentArrayQueue.java); 538 | + [Multiple-producers 539 | single-consumer](https://github.com/real-logic/agrona/blob/master/agrona/src/main/java/org/agrona/concurrent/ManyToOneConcurrentArrayQueue.java); 540 | + [Multiple-producers 541 | multiple-consumers](https://github.com/real-logic/agrona/blob/master/agrona/src/main/java/org/agrona/concurrent/ManyToManyConcurrentArrayQueue.java). 542 | - [ ] How exactly do these pipelines relate to the libraries 543 | [`pipes`](https://hackage.haskell.org/package/pipes), 544 | [`conduit`](https://hackage.haskell.org/package/conduit) and 545 | [`streamly`](https://hackage.haskell.org/package/streamly)? 546 | - [ ] How does it relate to synchronous programming languages such as 547 | [Esterel](https://en.wikipedia.org/wiki/Esterel), 548 | [Lustre](https://en.wikipedia.org/wiki/Lustre_(programming_language)), 549 | [ReactiveML](https://rml.lri.fr), etc? It seems to me that their main 550 | motivation is to be concurrent or parallel while still determinstic, which 551 | is what we'd like as well. Looking at ReactiveML's documentation for 552 | [compositions](https://rml.lri.fr/documentation.html#compositions) we see 553 | the same constructs as we've discussed: their `;` is our `Compose` (with 554 | its arguments flipped), their `||` is our `FanOut`, their `|>` is our 555 | `:>>>` and their `let-and` construct could be achived by adding projection 556 | functions to our `P`ipelines similar to `Fst` and `Snd` for `SM`. 557 | Interestingly they don't have any sum-types-like construct here, i.e. 558 | something like `(:|||) :: P a c -> P b c -> P (Either a b) c`; 559 | - [ ] I like to think of how one constructs a pipeline, i.e. the choice of which 560 | tasks should happen in parallel or should be sharded etc, as a choice of 561 | how to best make use of the CPUs/cores of a single computer. If seen this 562 | way then that begs the question: what about a network of multiple 563 | computers? Perhaps there should be something like a `Topology` data type 564 | which describes how multiple pipelines interact and a topology is deployed 565 | by deploying multiple pipelines over multiple machines? 566 | 567 | ## See also 568 | 569 | ### Presentations 570 | 571 | * [LMAX - How to Do 100K TPS at Less than 1ms 572 | Latency](https://www.infoq.com/presentations/LMAX/) by Martin Thompson (QCon 573 | 2010); 574 | 575 | * [LMAX Disruptor and the Concepts of Mechanical 576 | Sympathy](https://youtube.com/watch?v=Qho1QNbXBso) by Jamie Allen (2011); 577 | 578 | * [Concurrent Programming with the 579 | Disruptor](https://www.infoq.com/presentations/Concurrent-Programming-Using-The-Disruptor/) 580 | by Trisha Gee (2012); 581 | 582 | * [Disruptor 3.0: Details and Advanced 583 | Patterns](https://youtube.com/watch?v=2Be_Lqa35Y0) by Mike Barker (YOW! 584 | 2013); 585 | 586 | * [Designing for Performance](https://youtube.com/watch?v=fDGWWpHlzvw) by 587 | Martin Thompson (GOTO 2015); 588 | 589 | * [A quest for predictable latency with Java 590 | concurrency](https://vimeo.com/181814364) Martin Thompson 591 | (JavaZone 2016); 592 | 593 | * [Evolution of Financial Exchange 594 | Architectures](https://www.youtube.com/watch?v=qDhTjE0XmkE) by Martin 595 | Thompson (QCon 2020) 596 | + 1,000,000 tx/s and less than 100 microseconds latency, he is no longer 597 | at LMAX though so we don't know if these exchanges are using the 598 | disruptor pattern. 599 | 600 | * [*Aeron: Open-source high-performance 601 | messaging*](https://youtube.com/watch?v=tM4YskS94b0) talk by Martin Thompson 602 | (Strange Loop, 2014); 603 | 604 | * *Aeron: What, Why and What Next?* 605 | [talk](https://youtube.com/watch?v=p1bsloPeBzE) by Todd Montgomery (GOTO, 606 | 2015); 607 | 608 | * *Cluster Consensus: when Aeron met Raft* 609 | [talk](https://youtube.com/watch?v=GFfLCGW_5-w) by Martin Thompson (GOTO, 610 | 2018); 611 | 612 | * *Fault Tolerant 24/7 Operations with Aeron Cluster* 613 | [talk](https://youtube.com/watch?v=H9yqzfNiEb4) by Todd Montgomery (2022). 614 | 615 | ### Writings 616 | 617 | * Martin Thompson's [blog](https://mechanical-sympathy.blogspot.com/); 618 | * The Disruptor [mailing list](https://groups.google.com/g/lmax-disruptor); 619 | * The Mechanical Sympathy [mailing list](https://groups.google.com/g/mechanical-sympathy); 620 | * [The LMAX Architecture](https://martinfowler.com/articles/lmax.html) by 621 | Martin Fowler (2011); 622 | * [Staged event-driven 623 | architecture](https://en.wikipedia.org/wiki/Staged_event-driven_architecture); 624 | * [The Reactive Manifesto](https://www.reactivemanifesto.org/); 625 | * [Flow-based programming](https://en.wikipedia.org/wiki/Flow-based_programming). 626 | --------------------------------------------------------------------------------