├── .gitignore
├── test
    ├── TastyDiscover.hs
    ├── Main.hs
    └── DisruptorTest.hs
├── CHANGELOG.md
├── src
    ├── Disruptor
    │   ├── MP.hs
    │   ├── SP.hs
    │   ├── MP
    │   │   ├── Producer.hs
    │   │   ├── Consumer.hs
    │   │   └── RingBuffer.hs
    │   ├── SP
    │   │   ├── Unboxed
    │   │   │   ├── Producer.hs
    │   │   │   ├── Consumer.hs
    │   │   │   └── RingBuffer.hs
    │   │   ├── Producer.hs
    │   │   ├── Consumer.hs
    │   │   └── RingBuffer.hs
    │   ├── SequenceNumber.hs
    │   └── AtomicCounterPadded.hs
    └── Disruptor.hs
├── cabal.project
├── bench
    ├── MP
    │   ├── Chan.hs
    │   ├── LockFreeQueue.hs
    │   ├── TBQueue.hs
    │   ├── UnagiChan.hs
    │   ├── FastLogger.hs
    │   ├── FastLogger10.hs
    │   ├── Disruptor.hs
    │   ├── Logging10.hs
    │   └── Logging.hs
    ├── SP
    │   ├── Chan.hs
    │   ├── ChaseLevDeque.hs
    │   ├── LockFreeQueue.hs
    │   ├── TBQueue.hs
    │   ├── UnagiChan.hs
    │   ├── RingBuffer.hs
    │   ├── Disruptor.hs
    │   └── DisruptorUnboxed.hs
    ├── SingleOps.hs
    └── Common.hs
├── .stylish-haskell.yaml
├── LICENSE
├── app
    ├── ReadmeDisruptorExample.hs
    └── ReadmePipelineExample.hs
├── benchmark.sh
├── pipelined-state-machines.cabal
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | # Haskell
2 | dist-newstyle/
3 | cabal.project.local*
4 | bench-*
5 | bench.*
6 | # Emacs
7 | .\#*
8 | 


--------------------------------------------------------------------------------
/test/TastyDiscover.hs:
--------------------------------------------------------------------------------
1 | {-# OPTIONS_GHC -F -pgmF tasty-discover -optF --generated-module=TastyDiscover #-}
2 | 
3 | module TastyDiscover where
4 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Revision history for pipelined-state-machines
2 | 
3 | ## 0.0.0 -- 2021-11-02
4 | 
5 | * First version. Released on an unsuspecting world.
6 | 


--------------------------------------------------------------------------------
/src/Disruptor/MP.hs:
--------------------------------------------------------------------------------
1 | module Disruptor.MP (module X) where
2 | 
3 | import Disruptor.MP.Consumer as X
4 | import Disruptor.MP.Producer as X
5 | import Disruptor.MP.RingBuffer as X
6 | import Disruptor.SequenceNumber as X
7 | 


--------------------------------------------------------------------------------
/src/Disruptor/SP.hs:
--------------------------------------------------------------------------------
1 | module Disruptor.SP (module X) where
2 | 
3 | import Disruptor.SP.Consumer as X
4 | import Disruptor.SP.Producer as X
5 | import Disruptor.SP.RingBuffer as X
6 | import Disruptor.SequenceNumber as X
7 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
 1 | packages: .
 2 | 
 3 | with-compiler: ghc-9.0.2
 4 | 
 5 | reject-unconstrained-dependencies: all
 6 | 
 7 | constraints: QuickCheck +old-random
 8 | 
 9 | package pipelined-state-machines
10 | 
11 | allow-older: *
12 | allow-newer: *
13 | 


--------------------------------------------------------------------------------
/test/Main.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import qualified TastyDiscover
 4 | import Test.Tasty
 5 | 
 6 | ------------------------------------------------------------------------
 7 | 
 8 | main :: IO ()
 9 | main = defaultMain =<< TastyDiscover.tests
10 | 


--------------------------------------------------------------------------------
/bench/MP/Chan.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent.Chan
 4 | import Control.Concurrent.MVar (putMVar)
 5 | import Control.Monad (replicateM_)
 6 | 
 7 | import Common
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = mpsc newChan producer consumer
13 |   where
14 |     producer i = replicateM_ iTERATIONS (writeChan i vALUE_TO_WRITE)
15 | 
16 |     consumer o consumerFinished = do
17 |       replicateM_ iTERATIONS (readChan o)
18 |       putMVar consumerFinished ()
19 | 


--------------------------------------------------------------------------------
/bench/SP/Chan.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent.Chan
 4 | import Control.Concurrent.MVar (putMVar)
 5 | import Control.Monad (replicateM_)
 6 | 
 7 | import Common
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = spsc newChan producer consumer
13 |   where
14 |     producer i = replicateM_ iTERATIONS (writeChan i vALUE_TO_WRITE)
15 | 
16 |     consumer o consumerFinished = do
17 |       replicateM_ iTERATIONS (readChan o)
18 |       putMVar consumerFinished ()
19 | 


--------------------------------------------------------------------------------
/src/Disruptor.hs:
--------------------------------------------------------------------------------
 1 | module Disruptor where
 2 | 
 3 | -- * Single-producer
 4 | 
 5 | import Disruptor.SP.RingBuffer
 6 | import Disruptor.SP.Producer
 7 | import Disruptor.SP.Consumer
 8 | 
 9 | -- * Single-producer unboxed
10 | 
11 | import Disruptor.SP.Unboxed.RingBuffer
12 | import Disruptor.SP.Unboxed.Producer
13 | import Disruptor.SP.Unboxed.Consumer
14 | 
15 | -- * Multiple-producers
16 | 
17 | import Disruptor.MP.RingBuffer
18 | import Disruptor.MP.Producer
19 | import Disruptor.MP.Consumer
20 | 
21 | -- * Common
22 | 
23 | import Disruptor.SequenceNumber
24 | 


--------------------------------------------------------------------------------
/bench/MP/LockFreeQueue.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Monad
 4 | import Data.Concurrent.Queue.MichaelScott
 5 | import Control.Concurrent.MVar
 6 | 
 7 | import Common
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = mpsc setup producer consumer
13 |   where
14 |     setup = newQ
15 | 
16 |     producer q = replicateM_ iTERATIONS (pushL q vALUE_TO_WRITE)
17 | 
18 |     consumer q consumerFinished = do
19 |       replicateM_ iTERATIONS $ do
20 |         _i <- tryPopR q
21 |         return ()
22 |       putMVar consumerFinished ()
23 | 


--------------------------------------------------------------------------------
/bench/SP/ChaseLevDeque.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Monad
 4 | import Data.Concurrent.Deque.ChaseLev
 5 | import Control.Concurrent.MVar
 6 | 
 7 | import Common
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = spsc setup producer consumer
13 |   where
14 |     setup = newQ
15 | 
16 |     producer q = replicateM_ iTERATIONS (pushL q vALUE_TO_WRITE)
17 | 
18 |     consumer q consumerFinished = do
19 |       replicateM_ iTERATIONS $ do
20 |         _i <- tryPopR q
21 |         return ()
22 |       putMVar consumerFinished ()
23 | 


--------------------------------------------------------------------------------
/bench/SP/LockFreeQueue.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Monad
 4 | import Data.Concurrent.Queue.MichaelScott
 5 | import Control.Concurrent.MVar
 6 | 
 7 | import Common
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = spsc setup producer consumer
13 |   where
14 |     setup = newQ
15 | 
16 |     producer q = replicateM_ iTERATIONS (pushL q vALUE_TO_WRITE)
17 | 
18 |     consumer q consumerFinished = do
19 |       replicateM_ iTERATIONS $ do
20 |         _i <- tryPopR q
21 |         return ()
22 |       putMVar consumerFinished ()
23 | 


--------------------------------------------------------------------------------
/bench/MP/TBQueue.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Monad
 4 | import Control.Concurrent.STM
 5 | import Control.Concurrent.MVar
 6 | 
 7 | import Common
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = mpsc setup producer consumer
13 |   where
14 |     setup = newTBQueueIO (fromIntegral bUFFER_CAPACITY)
15 | 
16 |     producer q = replicateM_ iTERATIONS
17 |                              (atomically (writeTBQueue q vALUE_TO_WRITE))
18 | 
19 |     consumer q consumerFinished = do
20 |       replicateM_ iTERATIONS (atomically (readTBQueue q))
21 |       putMVar consumerFinished ()
22 | 


--------------------------------------------------------------------------------
/bench/SP/TBQueue.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Monad
 4 | import Control.Concurrent.STM
 5 | import Control.Concurrent.MVar
 6 | 
 7 | import Common
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = spsc setup producer consumer
13 |   where
14 |     setup = newTBQueueIO (fromIntegral bUFFER_CAPACITY)
15 | 
16 |     producer q = replicateM_ iTERATIONS
17 |                              (atomically (writeTBQueue q vALUE_TO_WRITE))
18 | 
19 |     consumer q consumerFinished = do
20 |       replicateM_ iTERATIONS (atomically (readTBQueue q))
21 |       putMVar consumerFinished ()
22 | 


--------------------------------------------------------------------------------
/bench/SP/UnagiChan.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent (threadDelay)
 4 | import Control.Concurrent.Chan.Unagi.NoBlocking.Unboxed
 5 | import Control.Concurrent.MVar (putMVar)
 6 | import Control.Monad (replicateM_)
 7 | 
 8 | import Common
 9 | 
10 | ------------------------------------------------------------------------
11 | 
12 | main :: IO ()
13 | main = spsc newChan (producer . fst) (\(_i, o) -> consumer o)
14 |   where
15 |     producer i = replicateM_ iTERATIONS (writeChan i vALUE_TO_WRITE)
16 | 
17 |     consumer o consumerFinished = do
18 |       replicateM_ iTERATIONS (readChan (threadDelay sLEEP_TIME) o)
19 |       putMVar consumerFinished ()
20 | 


--------------------------------------------------------------------------------
/bench/MP/UnagiChan.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent (putMVar, threadDelay)
 4 | import Control.Concurrent.Chan.Unagi.NoBlocking.Unboxed
 5 | import Control.Monad (replicateM_)
 6 | 
 7 | import Common
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = mpsc newChan (producer . fst) (\(_i, o) -> consumer o)
13 |   where
14 |     producer i = replicateM_ iTERATIONS (writeChan i vALUE_TO_WRITE)
15 | 
16 |     consumer o consumerFinished = do
17 |       replicateM_ (iTERATIONS * nUMBER_OF_PRODUCERS)
18 |                   (readChan (threadDelay sLEEP_TIME) o)
19 |       putMVar consumerFinished ()
20 | 


--------------------------------------------------------------------------------
/.stylish-haskell.yaml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   - imports:
 3 |       align: none
 4 |       list_align: with_module_name
 5 |       pad_module_names: false
 6 |       long_list_align: new_line_multiline
 7 |       empty_list_align: inherit
 8 |       list_padding: 7 # length "import "
 9 |       separate_lists: false
10 |       space_surround: false
11 |   - language_pragmas:
12 |       style: vertical
13 |       align: false
14 |       remove_redundant: true
15 |   - simple_align:
16 |       cases: false
17 |       top_level_patterns: false
18 |       records: false
19 |   - trailing_whitespace: {}
20 | 
21 | # You need to put any language extensions that's enabled for the entire project
22 | # here.
23 | language_extensions: []
24 | 
25 | columns: 72
26 | 


--------------------------------------------------------------------------------
/bench/SP/RingBuffer.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | 
 3 | module Main where
 4 | 
 5 | import Control.Concurrent.MVar
 6 | import Control.Monad
 7 | import Data.RingBuffer
 8 | import Data.Vector (Vector)
 9 | 
10 | import Common
11 | 
12 | ------------------------------------------------------------------------
13 | 
14 | main :: IO ()
15 | main = spsc setup producer consumer
16 |   where
17 |     setup :: IO (RingBuffer Vector Int) -- NOTE: `IOVector` doesn't work?!
18 |     setup = new bUFFER_CAPACITY
19 | 
20 |     producer rb = replicateM_ iTERATIONS (append vALUE_TO_WRITE rb)
21 | 
22 |     consumer rb consumerFinished = do
23 |       replicateM_ iTERATIONS $ do
24 |         _i <- latest rb 1
25 |         return ()
26 |       putMVar consumerFinished ()
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Symbiont Inc, Stevan Andjelkovic 2022-2023.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a
 6 | copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included
14 | in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/bench/MP/FastLogger.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent
 4 | import Control.Monad (when, replicateM_)
 5 | import System.Log.FastLogger
 6 | import Data.IORef
 7 | 
 8 | import Common
 9 | 
10 | ------------------------------------------------------------------------
11 | 
12 | lOG_MESSAGE :: LogStr
13 | lOG_MESSAGE = toLogStr "some random string to log"
14 | 
15 | lOG_FILE :: FilePath
16 | lOG_FILE = "/tmp/pipelined-state-machines-bench-mp-fast-logger.log"
17 | 
18 | main :: IO ()
19 | main = do
20 |   producersFinished <- newIORef 0
21 |   mpsc setup (producer producersFinished) (consumer producersFinished)
22 |   where
23 |     setup = do
24 |       cleanup lOG_FILE
25 |       newFileLoggerSet defaultBufSize lOG_FILE
26 | 
27 |     producer producersFinished lgrset = do
28 |       replicateM_ iTERATIONS (pushLogStrLn lgrset lOG_MESSAGE)
29 |       flushLogStr lgrset
30 |       atomicModifyIORef' producersFinished (\n -> (n + 1, ()))
31 | 
32 |     consumer producersFinished _lgrset consumerFinished = go
33 |       where
34 |         go = do
35 |           p <- readIORef producersFinished
36 |           if p == nUMBER_OF_PRODUCERS
37 |           then putMVar consumerFinished ()
38 |           else do
39 |             threadDelay 10000
40 |             go
41 | 


--------------------------------------------------------------------------------
/src/Disruptor/MP/Producer.hs:
--------------------------------------------------------------------------------
 1 | module Disruptor.MP.Producer where
 2 | 
 3 | import Control.Concurrent.Async
 4 | 
 5 | import Disruptor.MP.RingBuffer
 6 | import Disruptor.SequenceNumber
 7 | 
 8 | ------------------------------------------------------------------------
 9 | 
10 | data EventProducer s = EventProducer
11 |   { epWorker       :: s -> IO s
12 |   , epInitialState :: s
13 |   }
14 | 
15 | newEventProducer :: RingBuffer e -> (s -> IO (e, s)) -> (s -> IO ()) -> s
16 |                  -> IO (EventProducer s)
17 | newEventProducer rb p backPressure s0 = do
18 |   let go s = {-# SCC go #-} do
19 |         mSnr <- tryNext rb
20 |         case mSnr of
21 |           None -> do
22 |             {-# SCC backPresure #-} backPressure s
23 |             go s
24 |           Some snr -> do
25 |             (e, s') <- {-# SCC p #-} p s
26 |             set rb snr e
27 |             publish rb snr
28 |             go s'
29 | 
30 |   return (EventProducer go s0)
31 | 
32 | withEventProducer :: EventProducer s -> (Async s -> IO a) -> IO a
33 | withEventProducer ep k = withAsync (epWorker ep (epInitialState ep)) $ \a -> do
34 |   link a
35 |   k a
36 | 
37 | withEventProducerOn :: Int -> EventProducer s -> (Async s -> IO a) -> IO a
38 | withEventProducerOn capability ep k =
39 |   withAsyncOn capability (epWorker ep (epInitialState ep)) $ \a -> do
40 |     link a
41 |     k a
42 | 


--------------------------------------------------------------------------------
/src/Disruptor/SP/Unboxed/Producer.hs:
--------------------------------------------------------------------------------
 1 | module Disruptor.SP.Unboxed.Producer where
 2 | 
 3 | import Control.Concurrent.Async
 4 | import Data.Vector.Unboxed.Mutable (Unbox)
 5 | 
 6 | import Disruptor.SP.Unboxed.RingBuffer
 7 | import Disruptor.SequenceNumber
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | data EventProducer s = EventProducer
12 |   { epWorker       :: s -> IO s
13 |   , epInitialState :: s
14 |   }
15 | 
16 | newEventProducer :: Unbox e => RingBuffer e -> (s -> IO (e, s)) -> (s -> IO ()) -> s
17 |                  -> IO (EventProducer s)
18 | newEventProducer rb p backPressure s0 = do
19 |   let go s = {-# SCC go #-} do
20 |         mSnr <- tryNext rb
21 |         case mSnr of
22 |           None -> do
23 |             {-# SCC backPresure #-} backPressure s
24 |             go s
25 |           Some snr -> do
26 |             (e, s') <- {-# SCC p #-} p s
27 |             set rb snr e
28 |             publish rb snr
29 |             go s'
30 | 
31 |   return (EventProducer go s0)
32 | 
33 | withEventProducer :: EventProducer s -> (Async s -> IO a) -> IO a
34 | withEventProducer ep k = withAsync (epWorker ep (epInitialState ep)) $ \a -> do
35 |   link a
36 |   k a
37 | 
38 | withEventProducerOn :: Int -> EventProducer s -> (Async s -> IO a) -> IO a
39 | withEventProducerOn capability ep k =
40 |   withAsyncOn capability (epWorker ep (epInitialState ep)) $ \a -> do
41 |     link a
42 |     k a
43 | 


--------------------------------------------------------------------------------
/bench/MP/FastLogger10.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NumericUnderscores #-}
 2 | 
 3 | module Main where
 4 | 
 5 | import Control.Concurrent
 6 | import Control.Monad (when, replicateM_)
 7 | import System.Log.FastLogger
 8 | import Data.IORef
 9 | 
10 | import Common hiding (nUMBER_OF_PRODUCERS, iTERATIONS)
11 | 
12 | ------------------------------------------------------------------------
13 | 
14 | iTERATIONS = 1_000_000
15 | 
16 | nUMBER_OF_PRODUCERS :: Int
17 | nUMBER_OF_PRODUCERS = 10
18 | 
19 | lOG_MESSAGE :: LogStr
20 | lOG_MESSAGE = toLogStr "some random string to log"
21 | 
22 | lOG_FILE :: FilePath
23 | lOG_FILE = "/tmp/pipelined-state-machines-bench-mp-fast-logger10.log"
24 | 
25 | main :: IO ()
26 | main = do
27 |   producersFinished <- newIORef 0
28 |   m10psc setup (producer producersFinished) (consumer producersFinished)
29 |   where
30 |     setup = do
31 |       cleanup lOG_FILE
32 |       newFileLoggerSet defaultBufSize lOG_FILE
33 | 
34 |     producer producersFinished lgrset = do
35 |       replicateM_ iTERATIONS (pushLogStrLn lgrset lOG_MESSAGE)
36 |       flushLogStr lgrset
37 |       atomicModifyIORef' producersFinished (\n -> (n + 1, ()))
38 | 
39 |     consumer producersFinished _lgrset consumerFinished = go
40 |       where
41 |         go = do
42 |           p <- readIORef producersFinished
43 |           if p == nUMBER_OF_PRODUCERS
44 |           then putMVar consumerFinished ()
45 |           else do
46 |             threadDelay 10000
47 |             go
48 | 


--------------------------------------------------------------------------------
/bench/MP/Disruptor.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent
 4 | import Control.Monad
 5 | 
 6 | import Common
 7 | import Disruptor.MP.Consumer
 8 | import Disruptor.MP.Producer
 9 | import Disruptor.MP.RingBuffer
10 | import Disruptor.SequenceNumber
11 | 
12 | ------------------------------------------------------------------------
13 | 
14 | main :: IO ()
15 | main = mpsc setup producer consumer
16 |   where
17 |     setup = newRingBuffer bUFFER_CAPACITY
18 | 
19 |     producer :: RingBuffer Int -> IO ()
20 |     producer rb = go iTERATIONS
21 |       where
22 |         go :: Int -> IO ()
23 |         go 0 = return ()
24 |         go n = do
25 |           mSnr <- tryNext rb
26 |           case mSnr of
27 |             Some snr -> do
28 |               set rb snr vALUE_TO_WRITE
29 |               publish rb snr
30 |               go (n - 1)
31 |             None ->
32 |               -- NOTE: No sleep needed here.
33 |               go n
34 | 
35 |     consumer :: RingBuffer Int -> MVar () -> IO ()
36 |     consumer rb consumerFinished = do
37 |       let handler _s _n snr endOfBatch = do
38 |             when (endOfBatch &&
39 |                   getSequenceNumber snr ==
40 |                   fromIntegral (iTERATIONS * nUMBER_OF_PRODUCERS - 1)) $
41 |               putMVar consumerFinished ()
42 |             return ()
43 |       ec <- newEventConsumer rb handler () [] (Sleep sLEEP_TIME)
44 |       setGatingSequences rb [ecSequenceNumber ec]
45 |       ecWorker ec ()
46 | 


--------------------------------------------------------------------------------
/bench/SP/Disruptor.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent.MVar (MVar, putMVar)
 4 | import Control.Monad (when)
 5 | 
 6 | import Common
 7 | import Disruptor.SP
 8 | 
 9 | ------------------------------------------------------------------------
10 | 
11 | main :: IO ()
12 | main = spsc setup producer consumer
13 |   where
14 |     setup :: IO (RingBuffer Int)
15 |     setup = newRingBuffer bUFFER_CAPACITY
16 | 
17 |     producer :: RingBuffer Int -> IO ()
18 |     producer rb = go iTERATIONS
19 |       where
20 |         go :: Int -> IO ()
21 |         go 0 = return ()
22 |         go n = do
23 |           mSnr <- tryNext rb
24 |           case mSnr of
25 |             Some snr -> do
26 |               -- NOTE: Measuring transactions, which is useful for calculating
27 |               -- latency, seriously slows down the benchmark.
28 | 
29 |               -- {-# SCC "transactions+1" #-} incrCounter_ 1 transactions
30 |               set rb snr vALUE_TO_WRITE
31 |               publish rb snr
32 |               go (n - 1)
33 |             None -> do
34 |               -- yield
35 |               go n
36 | 
37 |     consumer :: RingBuffer Int -> MVar () -> IO ()
38 |     consumer rb consumerFinished = do
39 |       let handler _s _n snr endOfBatch = do
40 |             -- t' <- {-# SCC "transactions-1" #-} decrCounter 1 transactions
41 |             -- measureInt_ t' histo
42 |             when (endOfBatch && getSequenceNumber snr == fromIntegral (iTERATIONS - 1)) $
43 |               putMVar consumerFinished ()
44 |             return ()
45 |       ec <- newEventConsumer rb handler () [] (Sleep sLEEP_TIME)
46 |       setGatingSequences rb [ecSequenceNumber ec]
47 |       ecWorker ec ()
48 | 


--------------------------------------------------------------------------------
/src/Disruptor/SP/Producer.hs:
--------------------------------------------------------------------------------
 1 | module Disruptor.SP.Producer where
 2 | 
 3 | import Control.Concurrent.Async
 4 | 
 5 | import Disruptor.SP.RingBuffer
 6 | import Disruptor.SequenceNumber
 7 | 
 8 | ------------------------------------------------------------------------
 9 | 
10 | data EventProducer s = EventProducer
11 |   { epWorker       :: s -> IO s
12 |   , epInitialState :: s
13 |   }
14 | 
15 | newEventProducer :: RingBuffer e -> (s -> IO (e, s)) -> (s -> IO ()) -> s
16 |                  -> IO (EventProducer s)
17 | newEventProducer rb p backPressure s0 = do
18 |   let go s = {-# SCC go #-} do
19 |         mSnr <- tryNext rb
20 |         case mSnr of
21 |           None -> do
22 |             {-# SCC backPresure #-} backPressure s
23 |             go s
24 |           Some snr -> do
25 |             (e, s') <- {-# SCC p #-} p s
26 |             set rb snr e
27 |             publish rb snr
28 |             go s'
29 | 
30 |   return (EventProducer go s0)
31 | 
32 | -- XXX: 2x slower than above...
33 | newEventProducer' :: RingBuffer e -> (s -> IO (e, s)) -> (s -> IO ()) -> s
34 |                   -> IO (EventProducer s)
35 | newEventProducer' rb p backPressure s0 = do
36 |   let go s = {-# SCC go #-} do
37 |         snr <- next rb
38 |         (e, s') <- {-# SCC p #-} p s
39 |         set rb snr e
40 |         publish rb snr
41 |         go s'
42 | 
43 |   return (EventProducer go s0)
44 | 
45 | withEventProducer :: EventProducer s -> (Async s -> IO a) -> IO a
46 | withEventProducer ep k = withAsync (epWorker ep (epInitialState ep)) $ \a -> do
47 |   link a
48 |   k a
49 | 
50 | withEventProducerOn :: Int -> EventProducer s -> (Async s -> IO a) -> IO a
51 | withEventProducerOn capability ep k =
52 |   withAsyncOn capability (epWorker ep (epInitialState ep)) $ \a -> do
53 |     link a
54 |     k a
55 | 


--------------------------------------------------------------------------------
/bench/SP/DisruptorUnboxed.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent.MVar
 4 | import Control.Monad
 5 | 
 6 | import Disruptor.SP.Unboxed.Consumer
 7 | import Disruptor.SP.Unboxed.Producer
 8 | import Disruptor.SP.Unboxed.RingBuffer
 9 | import Disruptor.SequenceNumber
10 | import Common
11 | 
12 | ------------------------------------------------------------------------
13 | 
14 | main :: IO ()
15 | main = spsc setup producer consumer
16 |   where
17 |     setup :: IO (RingBuffer Int)
18 |     setup = newRingBuffer bUFFER_CAPACITY
19 | 
20 |     producer :: RingBuffer Int -> IO ()
21 |     producer rb = go iTERATIONS
22 |       where
23 |         go :: Int -> IO ()
24 |         go 0 = return ()
25 |         go n = do
26 |           mSnr <- tryNext rb
27 |           case mSnr of
28 |             Some snr -> do
29 |               -- NOTE: Measuring transactions, which is useful for calculating
30 |               -- latency, seriously slows down the benchmark.
31 | 
32 |               -- {-# SCC "transactions+1" #-} incrCounter_ 1 transactions
33 |               set rb snr vALUE_TO_WRITE
34 |               publish rb snr
35 |               go (n - 1)
36 |             None -> do
37 |               -- threadDelay sLEEP_TIME
38 |               go n
39 | 
40 |     consumer :: RingBuffer Int -> MVar () -> IO ()
41 |     consumer rb consumerFinished = do
42 |       let handler _s _n snr endOfBatch = do
43 |             -- t' <- {-# SCC "transactions-1" #-} decrCounter 1 transactions
44 |             -- measureInt_ t' histo
45 |             when (endOfBatch && getSequenceNumber snr == fromIntegral (iTERATIONS - 1)) $
46 |               putMVar consumerFinished ()
47 |             return ()
48 |       ec <- newEventConsumer rb handler () [] (Sleep sLEEP_TIME)
49 |       setGatingSequences rb [ecSequenceNumber ec]
50 |       ecWorker ec ()
51 | 


--------------------------------------------------------------------------------
/src/Disruptor/SequenceNumber.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DerivingStrategies #-}
 2 | {-# LANGUAGE DataKinds #-}
 3 | {-# LANGUAGE UnboxedTuples #-}
 4 | {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 5 | 
 6 | module Disruptor.SequenceNumber where
 7 | 
 8 | import Data.Bits (countLeadingZeros, finiteBitSize, unsafeShiftR, (.&.))
 9 | import Data.Int (Int64)
10 | 
11 | ------------------------------------------------------------------------
12 | 
13 | newtype SequenceNumber = SequenceNumber { getSequenceNumber :: Int64 }
14 |   deriving newtype (Num, Eq, Ord, Real, Enum, Integral, Show, Bounded)
15 | -- ^ NOTE: `(maxBound :: Int64) == 9223372036854775807` so if we write 10M events
16 | -- per second (`10_000_000*60*60*24*365 == 315360000000000) then it would take
17 | -- us `9223372036854775807 / 315360000000000 == 29247.1208677536` years before
18 | -- we overflow.
19 | 
20 | -- > quickCheck $ \(Positive i) j -> let capacity = 2^i in
21 | --     j `mod` capacity == j Data.Bits..&. (capacity - 1)
22 | index :: Int64 -> SequenceNumber -> Int
23 | index capacity (SequenceNumber i) = fromIntegral (i .&. indexMask)
24 |   where
25 |     indexMask = capacity - 1
26 | {-# INLINE index #-}
27 | 
28 | availabilityFlag :: Int64 -> SequenceNumber -> Int
29 | availabilityFlag capacity (SequenceNumber i) =
30 |   fromIntegral (i `unsafeShiftR` indexShift)
31 |   where
32 |     indexShift = logBase2 capacity
33 | {-# INLINE availabilityFlag #-}
34 | 
35 | -- Taken from:
36 | -- https://hackage.haskell.org/package/base-4.15.0.0/docs/Data-Bits.html#v:countLeadingZeros
37 | logBase2 :: Int64 -> Int
38 | logBase2 i = finiteBitSize i - 1 - countLeadingZeros i
39 | {-# INLINE logBase2 #-}
40 | 
41 | data MaybeSequenceNumber = None | Some {-# UNPACK #-} !SequenceNumber
42 |   deriving (Eq, Show)
43 | -- ^ TODO: compare with: https://hackage.haskell.org/package/unpacked-maybe-text-0.1.0.0/docs/src/Data.Maybe.Unpacked.Text.Short.html#MaybeShortText
44 | 


--------------------------------------------------------------------------------
/app/ReadmeDisruptorExample.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Control.Concurrent
 4 | import Control.Concurrent.Async
 5 | import Disruptor.SP
 6 | 
 7 | main :: IO ()
 8 | main = do
 9 | 
10 |   -- Create the shared ring buffer.
11 |   let bufferCapacity = 128
12 |   rb <- newRingBuffer bufferCapacity
13 | 
14 |   -- The producer keeps a counter and produces events that are merely the pretty
15 |   -- printed value as a string of that counter.
16 |   let produce :: Int -> IO (String, Int)
17 |       produce n = return (show n, n + 1)
18 | 
19 |       -- The counter starts at zero.
20 |       initialProducerState = 0
21 | 
22 |       -- No back-pressure is applied in this example.
23 |       backPressure :: Int -> IO ()
24 |       backPressure _ = return ()
25 | 
26 |   producer <- newEventProducer rb produce backPressure initialProducerState
27 | 
28 |   -- The consumer merely prints the string event to the terminal.
29 |   let consume :: () -> String -> SequenceNumber -> EndOfBatch -> IO ()
30 |       consume () event snr endOfBatch =
31 |         putStrLn (event ++ if endOfBatch then " (end of batch)" else "")
32 | 
33 |       -- The consumer doesn't need any state in this example.
34 |       initialConsumerState = ()
35 | 
36 |       -- Which other consumers do we need to wait for before consuming an event?
37 |       dependencies = []
38 | 
39 |       -- What to do in case there are no events to consume?
40 |       waitStrategy = Sleep 1
41 | 
42 |   consumer <- newEventConsumer rb consume initialConsumerState dependencies waitStrategy
43 | 
44 |   -- Tell the ring buffer which the last consumer is, to avoid overwriting
45 |   -- events that haven't been consumed yet.
46 |   setGatingSequences rb [ecSequenceNumber consumer]
47 | 
48 |   withEventProducer producer $ \ap ->
49 |     withEventConsumer consumer $ \ac -> do
50 |       threadDelay (3 * 1000 * 1000) -- 3 sec
51 |       cancel ap
52 |       cancel ac
53 | 


--------------------------------------------------------------------------------
/src/Disruptor/SP/Unboxed/Consumer.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ExistentialQuantification #-} -- XXX
 2 | 
 3 | module Disruptor.SP.Unboxed.Consumer where
 4 | 
 5 | import Control.Concurrent.Async
 6 | import Control.Concurrent
 7 | import Control.Concurrent.STM -- XXX
 8 | import Data.IORef
 9 | import Data.Vector.Unboxed (Unbox)
10 | 
11 | import Disruptor.SequenceNumber
12 | import Disruptor.SP.Unboxed.RingBuffer
13 | 
14 | ------------------------------------------------------------------------
15 | 
16 | data EventConsumer s = EventConsumer
17 |   { ecSequenceNumber :: {-# UNPACK #-} !(IORef SequenceNumber)
18 |   , ecWorker         :: s -> IO s
19 |   , ecInitialState   :: s
20 |   }
21 | 
22 | -- NOTE: The `SequenceNumber` can be used for sharding, e.g. one handler handles
23 | -- even and another handles odd numbers.
24 | type EventHandler s e = s -> e -> SequenceNumber -> EndOfBatch -> IO s
25 | type EndOfBatch = Bool
26 | 
27 | data SequenceBarrier e
28 |   = RingBufferBarrier (RingBuffer e)
29 |   | forall s. EventConsumerBarrier (EventConsumer s)
30 | 
31 | data WaitStrategy = Sleep Int
32 | 
33 | withEventConsumer :: EventConsumer s -> (Async s -> IO a) -> IO a
34 | withEventConsumer ec k = withAsync (ecWorker ec (ecInitialState ec)) $ \a -> do
35 |   link a
36 |   k a
37 | 
38 | withEventConsumerOn :: Int -> EventConsumer s -> (Async s -> IO a) -> IO a
39 | withEventConsumerOn capability ec k =
40 |   withAsyncOn capability (ecWorker ec (ecInitialState ec)) $ \a -> do
41 |     link a
42 |     k a
43 | 
44 | newEventConsumer :: Unbox e => RingBuffer e -> EventHandler s e -> s -> [SequenceBarrier e]
45 |                  -> WaitStrategy -> IO (EventConsumer s)
46 | newEventConsumer rb handler s0 _barriers (Sleep n) = do
47 |   snrRef <- newIORef (-1)
48 | 
49 |   let go s = {-# SCC go #-} do
50 |         mySnr <- readIORef snrRef
51 |         bSnr <- waitFor mySnr rb -- XXX: barriers
52 |         -- XXX: what if handler throws exception? https://youtu.be/eTeWxZvlCZ8?t=2271
53 |         s' <- {-# SCC go' #-} go' (mySnr + 1) bSnr s
54 |         writeIORef snrRef bSnr
55 |         go s'
56 |         where
57 |           go' lo hi s | lo >  hi = return s
58 |                       | lo <= hi = do
59 |             e <- unsafeGet rb lo
60 |             s' <- {-# SCC handler #-} handler s e lo (lo == hi)
61 |             go' (lo + 1) hi s'
62 | 
63 |   return (EventConsumer snrRef go s0)
64 | 
65 | waitFor :: SequenceNumber -> RingBuffer e -> IO SequenceNumber
66 | waitFor consumed rb = go
67 |   where
68 |     go = do
69 |       produced <- readIORef (rbCursor rb)
70 |       if consumed < produced
71 |       then return produced
72 |       else do
73 |         -- NOTE: Removing the sleep seems to cause non-termination... XXX: Why
74 |         -- though? the consumer should be running on its own thread?
75 |         threadDelay 1
76 |         go -- SPIN
77 |         -- ^ XXX: waitStrategy should be passed in and acted on here.
78 |         --
79 |         -- XXX: Other wait strategies could be implemented here, e.g. we could
80 |         -- try to recurse immediately here, and if there's no work after a
81 |         -- couple of tries go into a takeMTVar sleep waiting for a producer to
82 |         -- wake us up.
83 | {-# INLINE waitFor #-}
84 | 


--------------------------------------------------------------------------------
/bench/SingleOps.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NumericUnderscores #-}
 2 | 
 3 | module Main where
 4 | 
 5 | import Control.Monad
 6 | import Control.Concurrent
 7 | import Control.Concurrent.Async
 8 | import Data.Time
 9 | import Data.Word
10 | import Data.Atomics.Counter
11 | import Data.IORef
12 | import System.CPUTime
13 | 
14 | import StuntDouble.Histogram
15 | import qualified Disruptor.AtomicCounterPadded as Padded
16 | 
17 | -- Can't load FastMutInt:
18 | --    Could not load module `FastMutInt'
19 | --    It is a member of the hidden package `ghc-8.10.4'.
20 | --    Perhaps you need to add `ghc' to the build-depends in your .cabal file.
21 | --
22 | -- But when I add ghc to build-depends it says it can't find a build plan, and
23 | -- also https://hackage.haskell.org/package/ghc doesn't seem to have 8.10.4...
24 | --
25 | -- import FastMutInt
26 | 
27 | ------------------------------------------------------------------------
28 | 
29 | main :: IO ()
30 | main = do
31 |   many "getCurrentTime" (return ()) (const getCurrentTime)
32 | 
33 |   many "getCPUTime" (return ()) (const getCPUTime)
34 | 
35 |   many "threadDelay 0" (return ()) (const (threadDelay 0))
36 |   many "threadDelay 1" (return ()) (const (threadDelay 1))
37 |   many "yield" (return ()) (const yield)
38 |   manyConcurrent "threadDelay 0 (concurrent)" 3 (return ()) (const (threadDelay 0))
39 |   manyConcurrent "threadDelay 1 (concurrent)" 3 (return ()) (const (threadDelay 1))
40 |   manyConcurrent "yield (concurrent)" 3 (return ()) (const yield)
41 | 
42 |   many "incrCounter1" (newCounter 0) (incrCounter 1)
43 |   many "incrCounter1Padded" (Padded.newCounter 0) (Padded.incrCounter 1)
44 |   -- many "incrCounter1FastMutInt" (newFastMutInt 0)
45 |   --      (\r -> readFastMutInt r >>= \v -> writeFastMutInt r (v +1))
46 | 
47 |   manyConcurrent "incrCounter1 (concurrent)"
48 |     3 (newCounter 0) (incrCounter 1)
49 |   manyConcurrent "incrCounter1Padded (concurrent)"
50 |     3 (Padded.newCounter 0) (Padded.incrCounter 1)
51 | 
52 |   many "modifyIORef'" (newIORef (0 :: Int)) (\r -> modifyIORef' r succ)
53 | 
54 |   many "atomicModifyIORef'"
55 |     (newIORef (0 :: Int)) (\r -> atomicModifyIORef' r (\n -> ((n + 1), ())))
56 | 
57 |   manyConcurrent "atomicModifyIORef' (concurrent)" 3
58 |     (newIORef (0 :: Int)) (\r -> atomicModifyIORef' r (\n -> ((n + 1), ())))
59 | 
60 | many :: String -> IO a -> (a -> IO b) -> IO ()
61 | many name create use = do
62 |   h <- newHistogram
63 |   r <- create
64 |   replicateM 500000 (once h (use r))
65 |   putStrLn ""
66 |   putStrLn ""
67 |   prettyPrintHistogram name h
68 | 
69 | manyConcurrent :: String -> Int -> IO a -> (a -> IO b) -> IO ()
70 | manyConcurrent name n create use = do
71 |   h <- newHistogram
72 |   r <- create
73 |   as <- replicateM n (async (replicateM 500000 (once h (use r))))
74 |   mapM_ wait as
75 |   putStrLn ""
76 |   putStrLn ""
77 |   prettyPrintHistogram name h
78 | 
79 | once :: Histogram -> IO a -> IO ()
80 | once h io = do
81 |   start <- fromInteger <$> getCPUTime
82 |   _     <- io
83 |   end   <- fromInteger <$> getCPUTime
84 | 
85 |   let diffPico :: Word64
86 |       diffPico = end - start
87 | 
88 |       diffNano :: Double
89 |       diffNano = realToFrac (fromIntegral diffPico) * 1e-3
90 | 
91 |   void (measure diffNano h)
92 | 


--------------------------------------------------------------------------------
/src/Disruptor/SP/Consumer.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ExistentialQuantification #-} -- XXX
 2 | 
 3 | module Disruptor.SP.Consumer where
 4 | 
 5 | import Control.Concurrent.Async
 6 | import Control.Concurrent
 7 | import Control.Concurrent.STM -- XXX
 8 | import Data.IORef
 9 | 
10 | import Disruptor.SequenceNumber
11 | import Disruptor.SP.RingBuffer
12 | 
13 | ------------------------------------------------------------------------
14 | 
15 | data EventConsumer s = EventConsumer
16 |   { ecSequenceNumber :: {-# UNPACK #-} !(IORef SequenceNumber)
17 |   , ecWorker         :: s -> IO s
18 |   , ecInitialState   :: s
19 |   }
20 | 
21 | -- NOTE: The `SequenceNumber` can be used for sharding, e.g. one handler handles
22 | -- even and another handles odd numbers.
23 | type EventHandler s e = s -> e -> SequenceNumber -> EndOfBatch -> IO s
24 | type EndOfBatch = Bool
25 | 
26 | data SequenceBarrier e
27 |   = RingBufferBarrier (RingBuffer e)
28 |   | forall s. EventConsumerBarrier (EventConsumer s)
29 | 
30 | data WaitStrategy = Sleep Int
31 | 
32 | withEventConsumer :: EventConsumer s -> (Async s -> IO a) -> IO a
33 | withEventConsumer ec k = withAsync (ecWorker ec (ecInitialState ec)) $ \a -> do
34 |   link a
35 |   k a
36 | 
37 | withEventConsumerOn :: Int -> EventConsumer s -> (Async s -> IO a) -> IO a
38 | withEventConsumerOn capability ec k =
39 |   withAsyncOn capability (ecWorker ec (ecInitialState ec)) $ \a -> do
40 |     link a
41 |     k a
42 | 
43 | newEventConsumer :: RingBuffer e -> EventHandler s e -> s -> [SequenceBarrier e]
44 |                  -> WaitStrategy -> IO (EventConsumer s)
45 | newEventConsumer rb handler s0 _barriers ws = do
46 |   snrRef <- newIORef (-1)
47 | 
48 |   let go s = {-# SCC go #-} do
49 |         mySnr <- readIORef snrRef
50 |         bSnr <- waitFor mySnr rb ws -- XXX: barriers
51 |         -- XXX: what if handler throws exception? https://youtu.be/eTeWxZvlCZ8?t=2271
52 |         s' <- {-# SCC go' #-} go' (mySnr + 1) bSnr s
53 |         writeIORef snrRef bSnr
54 |         go s'
55 |         where
56 |           go' lo hi s | lo >  hi = return s
57 |                       | lo <= hi = do
58 |             e <- unsafeGet rb lo
59 |             s' <- {-# SCC handler #-} handler s e lo (lo == hi)
60 |             go' (lo + 1) hi s'
61 | 
62 |   return (EventConsumer snrRef go s0)
63 | 
64 | waitFor :: SequenceNumber -> RingBuffer e -> WaitStrategy -> IO SequenceNumber
65 | waitFor consumed rb (Sleep n) = go
66 |   where
67 |     go = do
68 |       produced <- readIORef (rbCursor rb)
69 |       if consumed < produced
70 |       then return produced
71 |       else do
72 |         threadDelay n -- NOTE: removing the sleep seems to cause
73 |                       -- non-termination... XXX: Why though? the consumer should be
74 |                       -- running on its own thread?
75 |         go -- SPIN
76 | 
77 |         -- XXX: Other wait strategies could be implemented here, e.g. we could
78 |         -- try to recurse immediately here, and if there's no work after a
79 |         -- couple of tries go into a takeMTVar sleep waiting for a producer to
80 |         -- wake us up.
81 | 
82 |     _getSequenceNumberRef :: SequenceBarrier e -> IORef SequenceNumber
83 |     _getSequenceNumberRef (RingBufferBarrier    rb) = rbCursor rb
84 |     _getSequenceNumberRef (EventConsumerBarrier ec) = ecSequenceNumber ec
85 | {-# INLINE waitFor #-}
86 | 


--------------------------------------------------------------------------------
/bench/MP/Logging10.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE NumericUnderscores #-}
 2 | 
 3 | module Main where
 4 | 
 5 | import Control.Concurrent
 6 | import Control.Monad (when)
 7 | import Data.ByteString.Char8 (ByteString)
 8 | import Data.ByteString.Builder
 9 | import qualified Data.ByteString.Char8 as BS
10 | import System.IO
11 | 
12 | 
13 | import Disruptor.MP
14 | import Common hiding (nUMBER_OF_PRODUCERS, iTERATIONS)
15 | 
16 | ------------------------------------------------------------------------
17 | 
18 | iTERATIONS = 1_000_000
19 | 
20 | nUMBER_OF_PRODUCERS :: Int
21 | nUMBER_OF_PRODUCERS = 10
22 | 
23 | lOG_MESSAGE :: ByteString
24 | lOG_MESSAGE = BS.pack "some random string to log\n"
25 | 
26 | lOG_FILE :: FilePath
27 | lOG_FILE = "/tmp/pipelined-state-machines-bench-mp-logger10.log"
28 | 
29 | data Buffer = Buffer
30 |   { bufferCapacity :: !Int
31 |   , bufferSize     :: !Int
32 |   , buffer         :: !Builder
33 |   }
34 | 
35 | newBuffer :: Int -> Buffer
36 | newBuffer capacity = Buffer capacity 0 (byteString (BS.pack ""))
37 | 
38 | hasCapacityBuffer :: ByteString -> Buffer -> Bool
39 | hasCapacityBuffer bs buf = bufferSize buf + BS.length bs <= bufferCapacity buf
40 | 
41 | appendBuffer :: ByteString -> Buffer -> Buffer
42 | appendBuffer bs buf =
43 |   buf { bufferSize = bufferSize buf + BS.length bs
44 |       , buffer     = buffer buf <> byteString bs
45 |       }
46 | 
47 | flushBuffer :: Buffer -> Handle -> IO Buffer
48 | flushBuffer buf h = do
49 |   hPutBuilder h (buffer buf)
50 |   return (newBuffer (bufferCapacity buf))
51 | 
52 | main :: IO ()
53 | main = m10psc setup producer consumer
54 |   where
55 |     setup = do
56 |       cleanup lOG_FILE
57 |       newRingBuffer bUFFER_CAPACITY
58 | 
59 |     producer :: RingBuffer ByteString -> IO ()
60 |     producer rb = go iTERATIONS
61 |       where
62 |         go :: Int -> IO ()
63 |         go 0 = return ()
64 |         go n = do
65 |           mSnr <- tryNext rb
66 |           case mSnr of
67 |             Some snr -> do
68 |               set rb snr lOG_MESSAGE
69 |               publish rb snr
70 |               go (n - 1)
71 |             None ->
72 |               -- NOTE: No sleep needed here.
73 |               go n
74 | 
75 |     consumer :: RingBuffer ByteString -> MVar () -> IO Buffer
76 |     consumer rb consumerFinished = do
77 |       h <- openFile lOG_FILE WriteMode
78 |       hSetBinaryMode h True
79 |       hSetBuffering h (BlockBuffering (Just 4096))
80 |       let handler buf bs snr endOfBatch = do
81 |             buf' <- if hasCapacityBuffer bs buf -- XXX: we can probably do
82 |                                                 -- better here by only flushing
83 |                                                 -- at `endOfBatch`?
84 |             then return (appendBuffer bs buf)
85 |             else do
86 |               buf' <- flushBuffer buf h
87 |               return (appendBuffer bs buf')
88 |             when (endOfBatch &&
89 |                   getSequenceNumber snr ==
90 |                   fromIntegral (iTERATIONS * nUMBER_OF_PRODUCERS - 1)) $ do
91 |               flushBuffer buf' h
92 |               hFlush h
93 |               putMVar consumerFinished ()
94 |             return buf'
95 |       ec <- newEventConsumer rb handler (newBuffer bUFFER_CAPACITY) [] (Sleep sLEEP_TIME)
96 |       setGatingSequences rb [ecSequenceNumber ec]
97 |       ecWorker ec (ecInitialState ec)
98 | 


--------------------------------------------------------------------------------
/src/Disruptor/MP/Consumer.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ExistentialQuantification #-} -- XXX
 2 | 
 3 | module Disruptor.MP.Consumer where
 4 | 
 5 | import Control.Concurrent.Async
 6 | import Control.Concurrent
 7 | import Control.Concurrent.STM -- XXX
 8 | import Data.IORef
 9 | 
10 | import Disruptor.SequenceNumber
11 | import Disruptor.MP.RingBuffer
12 | 
13 | ------------------------------------------------------------------------
14 | 
15 | data EventConsumer s = EventConsumer
16 |   { ecSequenceNumber :: {-# UNPACK #-} !(IORef SequenceNumber)
17 |   , ecWorker         :: s -> IO s
18 |   , ecInitialState   :: s
19 |   }
20 | 
21 | -- NOTE: The `SequenceNumber` can be used for sharding, e.g. one handler handles
22 | -- even and another handles odd numbers.
23 | type EventHandler s e = s -> e -> SequenceNumber -> EndOfBatch -> IO s
24 | type EndOfBatch = Bool
25 | 
26 | data SequenceBarrier e
27 |   = RingBufferBarrier (RingBuffer e)
28 |   | forall s. EventConsumerBarrier (EventConsumer s)
29 | 
30 | data WaitStrategy = Sleep Int
31 | 
32 | withEventConsumer :: EventConsumer s -> (Async s -> IO a) -> IO a
33 | withEventConsumer ec k = withAsync (ecWorker ec (ecInitialState ec)) $ \a -> do
34 |   link a
35 |   k a
36 | 
37 | withEventConsumerOn :: Int -> EventConsumer s -> (Async s -> IO a) -> IO a
38 | withEventConsumerOn capability ec k =
39 |   withAsyncOn capability (ecWorker ec (ecInitialState ec)) $ \a -> do
40 |     link a
41 |     k a
42 | 
43 | newEventConsumer :: RingBuffer e -> EventHandler s e -> s -> [SequenceBarrier e]
44 |                  -> WaitStrategy -> IO (EventConsumer s)
45 | newEventConsumer rb handler s0 _barriers ws = do
46 |   snrRef <- newIORef (-1)
47 | 
48 |   let go s = {-# SCC go #-} do
49 |         mySnr <- readIORef snrRef
50 |         bSnr <- waitFor mySnr rb ws -- XXX: barriers
51 |         -- XXX: what if handler throws exception? https://youtu.be/eTeWxZvlCZ8?t=2271
52 |         s' <- {-# SCC go' #-} go' (mySnr + 1) bSnr s
53 |         writeIORef snrRef bSnr
54 |         go s'
55 |         where
56 |           go' lo hi s | lo >  hi = return s
57 |                       | lo <= hi = do
58 |             e <- get rb lo
59 |             s' <- {-# SCC handler #-} handler s e lo (lo == hi)
60 |             go' (lo + 1) hi s'
61 | 
62 |   return (EventConsumer snrRef go s0)
63 | 
64 | waitFor :: SequenceNumber -> RingBuffer e -> WaitStrategy -> IO SequenceNumber
65 | waitFor consumed rb (Sleep n) = go
66 |   where
67 |     go = do
68 |       claimedSequence <- getCursor rb
69 |       if consumed < claimedSequence
70 |       -- `claimedSequence` may be much higher than the the capacity of the ring
71 |       -- buffer, so we need to guarantee that every consumer makes batches that
72 |       -- are at most `rbCapacity rb` big.
73 |       then return (min claimedSequence (consumed + fromIntegral (rbCapacity rb)))
74 |       else do
75 |         -- NOTE: Removing the sleep seems to cause non-termination... XXX: Why
76 |         -- though? the consumer should be running on its own thread?
77 |         -- yield
78 |         threadDelay n
79 |         go -- SPIN
80 |         --
81 |         -- XXX: Other wait strategies could be implemented here, e.g. we could
82 |         -- try to recurse immediately here, and if there's no work after a
83 |         -- couple of tries go into a takeMTVar sleep waiting for a producer to
84 |         -- wake us up.
85 | {-# INLINE waitFor #-}
86 | 


--------------------------------------------------------------------------------
/bench/MP/Logging.hs:
--------------------------------------------------------------------------------
 1 | -- This example is inspired by log4j's use of the disruptor pattern:
 2 | --
 3 | --   https://logging.apache.org/log4j/2.x/manual/async.html
 4 | --
 5 | -- The idea is that each producer corresponds to a concurrent thread that may
 6 | -- log messages and a single consumer then syncs the log messages to disk.
 7 | 
 8 | module Main where
 9 | 
10 | import Control.Concurrent
11 | import Control.Monad (when)
12 | import Data.ByteString.Char8 (ByteString)
13 | import Data.ByteString.Builder
14 | import qualified Data.ByteString.Char8 as BS
15 | import System.IO
16 | 
17 | import Common
18 | import Disruptor.MP
19 | 
20 | ------------------------------------------------------------------------
21 | 
22 | lOG_MESSAGE :: ByteString
23 | lOG_MESSAGE = BS.pack "some random string to log\n"
24 | 
25 | lOG_FILE :: FilePath
26 | lOG_FILE = "/tmp/pipelined-state-machines-bench-mp-logger.log"
27 | 
28 | data Buffer = Buffer
29 |   { bufferCapacity :: !Int
30 |   , bufferSize     :: !Int
31 |   , buffer         :: !Builder
32 |   }
33 | 
34 | newBuffer :: Int -> Buffer
35 | newBuffer capacity = Buffer capacity 0 (byteString (BS.pack ""))
36 | 
37 | hasCapacityBuffer :: ByteString -> Buffer -> Bool
38 | hasCapacityBuffer bs buf = bufferSize buf + BS.length bs <= bufferCapacity buf
39 | 
40 | appendBuffer :: ByteString -> Buffer -> Buffer
41 | appendBuffer bs buf =
42 |   buf { bufferSize = bufferSize buf + BS.length bs
43 |       , buffer     = buffer buf <> byteString bs
44 |       }
45 | 
46 | flushBuffer :: Buffer -> Handle -> IO Buffer
47 | flushBuffer buf h = do
48 |   hPutBuilder h (buffer buf)
49 |   return (newBuffer (bufferCapacity buf))
50 | 
51 | main :: IO ()
52 | main = mpsc setup producer consumer
53 |   where
54 |     setup = do
55 |       cleanup lOG_FILE
56 |       newRingBuffer bUFFER_CAPACITY
57 | 
58 |     producer :: RingBuffer ByteString -> IO ()
59 |     producer rb = go iTERATIONS
60 |       where
61 |         go :: Int -> IO ()
62 |         go 0 = return ()
63 |         go n = do
64 |           mSnr <- tryNext rb
65 |           case mSnr of
66 |             Some snr -> do
67 |               set rb snr lOG_MESSAGE
68 |               publish rb snr
69 |               go (n - 1)
70 |             None ->
71 |               -- NOTE: No sleep needed here.
72 |               go n
73 | 
74 |     consumer :: RingBuffer ByteString -> MVar () -> IO Buffer
75 |     consumer rb consumerFinished = do
76 |       h <- openFile lOG_FILE WriteMode
77 |       hSetBinaryMode h True
78 |       hSetBuffering h (BlockBuffering (Just 4096))
79 |       let handler buf bs snr endOfBatch = do
80 |             buf' <- if hasCapacityBuffer bs buf -- XXX: we can probably do
81 |                                                 -- better here by only flushing
82 |                                                 -- at `endOfBatch`?
83 |             then return (appendBuffer bs buf)
84 |             else do
85 |               buf' <- flushBuffer buf h
86 |               return (appendBuffer bs buf')
87 |             when (endOfBatch &&
88 |                   getSequenceNumber snr ==
89 |                   fromIntegral (iTERATIONS * nUMBER_OF_PRODUCERS - 1)) $ do
90 |               flushBuffer buf' h
91 |               hFlush h
92 |               putMVar consumerFinished ()
93 |             return buf'
94 |       ec <- newEventConsumer rb handler (newBuffer bUFFER_CAPACITY) [] (Sleep sLEEP_TIME)
95 |       setGatingSequences rb [ecSequenceNumber ec]
96 |       ecWorker ec (ecInitialState ec)
97 | 


--------------------------------------------------------------------------------
/src/Disruptor/AtomicCounterPadded.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# LANGUAGE MagicHash #-}
 3 | {-# LANGUAGE UnboxedTuples #-}
 4 | 
 5 | -- Inspired by:
 6 | --   * https://github.com/jberryman/unagi-chan/blob/master/src/Data/Atomics/Counter/Fat.hs
 7 | --   * https://hackage.haskell.org/package/atomic-primops-0.8.4/docs/src/Data.Atomics.Counter.html
 8 | --   * https://hackage.haskell.org/package/unboxed-ref-0.4.0.0/docs/src/Data-STRef-Unboxed-Internal.html#STRefU
 9 | --   * https://hackage.haskell.org/package/ghc-8.10.2/docs/src/FastMutInt.html#FastMutInt
10 | 
11 | module Disruptor.AtomicCounterPadded
12 |     ( AtomicCounter()
13 |     , newCounter
14 |     , incrCounter
15 |     , incrCounter_
16 |     , getAndIncrCounter
17 |     , decrCounter
18 |     , decrCounter_
19 |     , readCounter
20 |     , casCounter
21 |     ) where
22 | 
23 | import Data.Bits (finiteBitSize)
24 | import GHC.Exts
25 | import GHC.Types
26 | 
27 | ------------------------------------------------------------------------
28 | 
29 | data AtomicCounter = AtomicCounter !(MutableByteArray# RealWorld)
30 | 
31 | sIZEOF_CACHELINE :: Int
32 | sIZEOF_CACHELINE = 64
33 | {-# INLINE sIZEOF_CACHELINE #-}
34 | -- ^ TODO: See
35 | -- https://github.com/NickStrupat/CacheLineSize/blob/93a57c094f71a2796714f7a28d74dd8776149193/CacheLineSize.c
36 | -- for how to get the cache line size on Windows, MacOS and Linux.
37 | 
38 | -- | Create a new atomic counter padded with 64-bytes (an x86 cache line) to try
39 | -- to avoid false sharing.
40 | newCounter :: Int -> IO AtomicCounter
41 | newCounter (I# n) = IO $ \s ->
42 |   case newAlignedPinnedByteArray# size alignment s of
43 |     (# s', arr #) -> case writeIntArray# arr 0# n s' of
44 |       s'' -> (# s'', AtomicCounter arr #)
45 |   where
46 |     !(I# size)      = finiteBitSize (0 :: Int)
47 |     !(I# alignment) = sIZEOF_CACHELINE
48 | {-# INLINE newCounter #-}
49 | 
50 | incrCounter :: Int -> AtomicCounter -> IO Int
51 | incrCounter (I# incr) (AtomicCounter arr) = IO $ \s ->
52 |   case fetchAddIntArray# arr 0# incr s of
53 |     (# s', i #) -> (# s', I# (i +# incr) #)
54 | {-# INLINE incrCounter #-}
55 | 
56 | getAndIncrCounter :: Int -> AtomicCounter -> IO Int
57 | getAndIncrCounter (I# incr) (AtomicCounter arr) = IO $ \s ->
58 |   case fetchAddIntArray# arr 0# incr s of
59 |     (# s', i #) -> (# s', I# i #)
60 | {-# INLINE getAndIncrCounter #-}
61 | 
62 | incrCounter_ :: Int -> AtomicCounter -> IO ()
63 | incrCounter_ (I# incr) (AtomicCounter arr) = IO $ \s ->
64 |   case fetchAddIntArray# arr 0# incr s of
65 |     (# s', _i #) -> (# s', () #)
66 | {-# INLINE incrCounter_ #-}
67 | 
68 | decrCounter :: Int -> AtomicCounter -> IO Int
69 | decrCounter (I# decr) (AtomicCounter arr) = IO $ \s ->
70 |   case fetchSubIntArray# arr 0# decr s of
71 |     (# s', i #) -> (# s', I# (i -# decr) #)
72 | {-# INLINE decrCounter #-}
73 | 
74 | decrCounter_ :: Int -> AtomicCounter -> IO ()
75 | decrCounter_ (I# decr) (AtomicCounter arr) = IO $ \s ->
76 |   case fetchSubIntArray# arr 0# decr s of
77 |     (# s', _i #) -> (# s', () #)
78 | {-# INLINE decrCounter_ #-}
79 | 
80 | readCounter :: AtomicCounter -> IO Int
81 | readCounter (AtomicCounter arr) = IO $ \s ->
82 |   case readIntArray# arr 0# s of
83 |     (# s', i #) -> (# s', I# i #)
84 | {-# INLINE readCounter #-}
85 | 
86 | casCounter :: AtomicCounter -> Int -> Int -> IO Bool
87 | casCounter (AtomicCounter arr) (I# old) (I# new) = IO $ \s ->
88 |   case casIntArray# arr 0# old new s of
89 |     (# s', before #) -> case before ==# old of
90 |       1# -> (# s', True #)
91 |       0# -> (# s', False #)
92 | {-# INLINE casCounter #-}
93 | 


--------------------------------------------------------------------------------
/bench/Common.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE NumericUnderscores #-}
  2 | 
  3 | module Common where
  4 | 
  5 | import Control.Monad (when)
  6 | import Control.Concurrent
  7 | import Control.Concurrent.Async
  8 | import Control.Concurrent.MVar
  9 | import Data.Time
 10 | import System.Directory (doesFileExist, removeFile)
 11 | import System.Mem (performGC)
 12 | import Text.Printf
 13 | 
 14 | ------------------------------------------------------------------------
 15 | 
 16 | iTERATIONS :: Int
 17 | iTERATIONS = 100_000_000
 18 | 
 19 | vALUE_TO_WRITE :: Int
 20 | vALUE_TO_WRITE = 1
 21 | 
 22 | nUMBER_OF_PRODUCERS :: Int
 23 | nUMBER_OF_PRODUCERS = 3
 24 | 
 25 | sLEEP_TIME :: Int
 26 | sLEEP_TIME = 0
 27 | 
 28 | bUFFER_CAPACITY :: Int
 29 | bUFFER_CAPACITY = 1024 * 64
 30 | 
 31 | -- Single-producer single-consumer helper.
 32 | spsc :: IO a -> (a -> IO ()) -> (a -> MVar () -> IO ()) -> IO ()
 33 | spsc setup producer consumer = do
 34 |   (r, consumerFinished, start) <- header setup
 35 |   withAsync (producer r) $ \ap ->
 36 |     withAsync (consumer r consumerFinished) $ \ac -> do
 37 |       () <- takeMVar consumerFinished
 38 |       end <- getCurrentTime
 39 |       cancel ap
 40 |       cancel ac
 41 |       footer start end
 42 | 
 43 | -- Multiple-producers single-consumer helper.
 44 | mpsc :: IO a -> (a -> IO ()) -> (a -> MVar () -> IO c) -> IO ()
 45 | mpsc setup producer consumer = do
 46 |   (r, consumerFinished, start) <- header setup
 47 |   withAsync (producer r) $ \ap1 ->
 48 |     withAsync (producer r) $ \ap2 ->
 49 |       withAsync (producer r) $ \ap3 ->
 50 |         withAsync (consumer r consumerFinished) $ \ac -> do
 51 |           () <- takeMVar consumerFinished
 52 |           end <- getCurrentTime
 53 |           mapM_ cancel [ap1, ap2, ap3]
 54 |           cancel ac
 55 |           footer start end
 56 | 
 57 | m10psc :: IO a -> (a -> IO ()) -> (a -> MVar () -> IO c) -> IO ()
 58 | m10psc setup producer consumer = do
 59 |   (r, consumerFinished, start) <- header setup
 60 |   withAsync (producer r) $ \ap1 ->
 61 |     withAsync (producer r) $ \ap2 ->
 62 |       withAsync (producer r) $ \ap3 ->
 63 |         withAsync (producer r) $ \ap4 ->
 64 |           withAsync (producer r) $ \ap5 ->
 65 |             withAsync (producer r) $ \ap6 ->
 66 |               withAsync (producer r) $ \ap7 ->
 67 |                 withAsync (producer r) $ \ap8 ->
 68 |                   withAsync (producer r) $ \ap9 ->
 69 |                     withAsync (producer r) $ \ap10 ->
 70 |                       withAsync (consumer r consumerFinished) $ \ac -> do
 71 |                         () <- takeMVar consumerFinished
 72 |                         end <- getCurrentTime
 73 |                         mapM_ cancel [ap1, ap2, ap3, ap4, ap5, ap6, ap7, ap8, ap9, ap10]
 74 |                         cancel ac
 75 |                         footer start end
 76 | 
 77 | header :: IO a -> IO (a, MVar (), UTCTime)
 78 | header setup = do
 79 |   n <- getNumCapabilities
 80 |   printf "%-25.25s%10d\n" "CPU capabilities" n
 81 |   printf "%-25.25s%10d\n" "Total number of events" iTERATIONS
 82 |   r <- setup
 83 |   consumerFinished <- newEmptyMVar
 84 |   performGC
 85 |   start <- getCurrentTime
 86 |   return (r, consumerFinished, start)
 87 | 
 88 | footer :: UTCTime -> UTCTime -> IO ()
 89 | footer start end = do
 90 |   let duration :: Double
 91 |       duration = realToFrac (diffUTCTime end start)
 92 | 
 93 |       throughput :: Double
 94 |       throughput = realToFrac iTERATIONS / duration
 95 |   printf "%-25.25s%10.2f events/s\n" "Throughput" throughput
 96 |   printf "%-25.25s%10.2f s\n" "Duration" duration
 97 | 
 98 |   -- XXX: prettyPrintHistogram histo
 99 |   -- meanTransactions <- hmean histo
100 |   -- printf "%-25.25s%10.2f\n" "Mean concurrent txs" meanTransactions
101 |   -- Just maxTransactions <- percentile 100.0 histo
102 |   -- printf "%-25.25s%10.2f\n" "Max concurrent txs" maxTransactions
103 |   -- printf "%-25.25s%10.2f ns\n" "Latency" ((meanTransactions / throughput) * 1000000)
104 | 
105 | cleanup :: FilePath -> IO ()
106 | cleanup fp = do
107 |   b <- doesFileExist fp
108 |   when b (removeFile fp)
109 | 


--------------------------------------------------------------------------------
/app/ReadmePipelineExample.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE GADTs #-}
  2 | {-# LANGUAGE BangPatterns #-}
  3 | 
  4 | module Main where
  5 | 
  6 | import Control.Monad
  7 | import Control.Concurrent (threadDelay)
  8 | import Control.Concurrent.Async
  9 | import Control.Concurrent.STM
 10 | import Control.Concurrent.STM.TQueue
 11 | import Data.Time
 12 | import Data.List
 13 | 
 14 | ------------------------------------------------------------------------
 15 | 
 16 | data SM s a b where
 17 |   Id      :: SM s a a
 18 |   Compose :: SM s b c -> SM s a b -> SM s a c
 19 |   Fst     :: SM s (a, b) a
 20 |   Snd     :: SM s (a, b) b
 21 |   (:&&&)  :: SM s a c -> SM s a d -> SM s a (c, d)
 22 |   (:***)  :: SM s a c -> SM s b d -> SM s (a, b) (c, d)
 23 |   SlowIO  :: SM s a a
 24 | 
 25 | swap :: SM () (a, b) (b, a)
 26 | swap = Snd :*** Fst `Compose` copy `Compose` SlowIO
 27 |   where
 28 |     copy = Id :&&& Id
 29 | 
 30 | interpret :: SM s a b -> (a -> s -> IO (s, b))
 31 | interpret Id            x s = return (s, x)
 32 | interpret (Compose g f) x s = do
 33 |   (s', y) <- interpret f x s
 34 |   interpret g y s'
 35 | interpret Fst           x s = return (s, fst x)
 36 | interpret Snd           x s = return (s, snd x)
 37 | interpret (f :&&& g)    x s = do
 38 |   (s', y)  <- interpret f x s
 39 |   (s'', z) <- interpret g x s'
 40 |   return (s'', (y, z))
 41 | interpret (f :*** g)    x s = do
 42 |   (s', y)  <- interpret f (fst x) s
 43 |   (s'', z) <- interpret g (snd x) s'
 44 |   return (s'', (y, z))
 45 | interpret SlowIO x s = do
 46 |   threadDelay 200000 -- 0.2s
 47 |   return (s, x)
 48 | 
 49 | data P a b where
 50 |   SM     :: String -> SM s a b -> s -> P a b
 51 |   (:>>>) :: P a b -> P b c -> P a c
 52 |   Shard  :: P a b -> P a b
 53 | 
 54 | swapsSequential :: P (a, b) (b, a)
 55 | swapsSequential = SM "three swaps" (swap `Compose` swap `Compose` swap) ()
 56 | 
 57 | swapsPipelined :: P (a, b) (b, a)
 58 | swapsPipelined =
 59 |   SM "first swap"  swap () :>>>
 60 |   SM "second swap" swap () :>>>
 61 |   SM "third swap"  swap ()
 62 | 
 63 | data Deployment a = Deployment
 64 |   { queue :: TQueue a
 65 |   , pids  :: [(String, Async ())]
 66 |   }
 67 | 
 68 | names :: Deployment a -> String
 69 | names = bracket . intercalate "," . reverse . map fst . pids
 70 |   where
 71 |     bracket s = "[" ++ s ++ "]"
 72 | 
 73 | stop :: Deployment a -> IO ()
 74 | stop = mapM_ (cancel . snd) . pids
 75 | 
 76 | deploy' :: P a b -> Deployment a -> IO (Deployment b)
 77 | deploy' (SM name sm s0) d = do
 78 |   q' <- newTQueueIO
 79 |   pid <- async (go s0 q')
 80 |   return Deployment { queue = q', pids = (name, pid) : pids d }
 81 |   where
 82 |     f = interpret sm
 83 | 
 84 |     go s q' = do
 85 |       x <- atomically $ readTQueue (queue d)
 86 |       (s', o) <- f x s
 87 |       atomically $ writeTQueue q' o
 88 |       go s' q'
 89 | deploy' (sm :>>> sm') d = do
 90 |   d' <- deploy' sm d
 91 |   deploy' sm' d'
 92 | deploy' (Shard p) d = do
 93 |   let qIn = queue d
 94 |   qEven  <- newTQueueIO
 95 |   qOdd   <- newTQueueIO
 96 |   pidIn  <- async $ shardQIn qIn qEven qOdd
 97 |   dEven  <- deploy' p (Deployment qEven [])
 98 |   dOdd   <- deploy' p (Deployment qOdd [])
 99 |   qOut   <- newTQueueIO
100 |   pidOut <- async $ shardQOut (queue dEven) (queue dOdd) qOut
101 |   return (Deployment qOut (("shardIn:  " ++ names dEven ++ " & " ++ names dOdd, pidIn) :
102 |                            ("shardOut: " ++ names dEven ++ " & " ++ names dOdd, pidOut) :
103 |                            pids dEven ++ pids dOdd ++ pids d))
104 |   where
105 |     shardQIn :: TQueue a -> TQueue a -> TQueue a -> IO ()
106 |     shardQIn  qIn qEven qOdd = do
107 |       atomically (readTQueue qIn >>= writeTQueue qEven)
108 |       shardQIn qIn qOdd qEven
109 | 
110 |     shardQOut :: TQueue a -> TQueue a -> TQueue a -> IO ()
111 |     shardQOut qEven qOdd qOut = do
112 |       atomically (readTQueue qEven >>= writeTQueue qOut)
113 |       shardQOut qOdd qEven qOut
114 | 
115 | deploy :: P a b -> IO (TQueue a, Deployment b)
116 | deploy p = do
117 |   q <- newTQueueIO
118 |   d <- deploy' p (Deployment q [])
119 |   return (q, d)
120 | 
121 | swapsSharded :: P (a, b) (b, a)
122 | swapsSharded =
123 |   Shard (SM "first swap"  swap ()) :>>>
124 |   Shard (SM "second swap" swap ()) :>>>
125 |   Shard (SM "third swap"  swap ())
126 | 
127 | data PipelineKind = Sequential | Pipelined | Sharded
128 |   deriving Show
129 | 
130 | main :: IO ()
131 | main = do
132 |   mapM_ libMain [Sequential, Pipelined, Sharded]
133 | 
134 | libMain :: PipelineKind -> IO ()
135 | libMain k = do
136 |   (q, d) <- deploy $ case k of
137 |                        Sequential -> swapsSequential
138 |                        Pipelined  -> swapsPipelined
139 |                        Sharded    -> swapsSharded
140 |   print k
141 |   putStrLn $ "Pids: " ++ names d
142 |   !start <- getCurrentTime
143 |   forM_ [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)] $ \x ->
144 |     atomically $ writeTQueue q x
145 |   !resps <- replicateM 6 $ atomically $ readTQueue (queue d)
146 |   !end <- getCurrentTime
147 |   putStrLn $ "Responses: " ++ show resps
148 |   putStrLn $ "Time: " ++ show (diffUTCTime end start)
149 |   putStrLn ""
150 |   stop d
151 | 


--------------------------------------------------------------------------------
/benchmark.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -euo pipefail
  4 | 
  5 | # Inspired by: https://sled.rs/perf.html#experimental-design
  6 | 
  7 | BENCHMARK_WORKLOAD1="bench-mp-logging10"
  8 | BENCHMARK_WORKLOAD2="bench-mp-fast-logger10"
  9 | BENCHMARK_NUMBER_OF_RUNS=5
 10 | # TODO: +RTS --nonmoving-gc ?
 11 | BENCHMARK_GHC_OPTS=("-threaded" "-rtsopts" "-with-rtsopts=-N")
 12 | BENCHMARK_CABAL_BUILD_OPTS=("--enable-benchmarks"
 13 |                             "--disable-profiling"
 14 |                             "-O2"
 15 |                             "--ghc-options=${BENCHMARK_GHC_OPTS[*]}")
 16 | BENCHMARK_CABAL_RUN_OPTS=("-O2"
 17 |                           "--ghc-options=${BENCHMARK_GHC_OPTS[*]}")
 18 | BENCHMARK_PERF_EVENTS="L1-dcache-loads,L1-dcache-load-misses,LLC-loads,LLC-load-misses,dTLB-loads,dTLB-load-misses"
 19 | 
 20 | # Save info about current hardware and OS setup.
 21 | uname --kernel-name --kernel-release --kernel-version --machine --operating-system
 22 | echo ""
 23 | lscpu
 24 | echo ""
 25 | 
 26 | # Warn about non-essential programs...
 27 | FIREFOX_PID="$(pgrep GeckoMain)"
 28 | if [ -n "${FIREFOX_PID}" ]; then
 29 |     read -r -p "Firefox is running, wanna kill it? [y/N] " yn
 30 |     case $yn in
 31 |         [Yy]*) kill -1 "${FIREFOX_PID}" ;;
 32 |         *) ;;
 33 |     esac
 34 | fi
 35 | 
 36 | if [ -f "/tmp/${BENCHMARK_WORKLOAD1}.txt" ] || [ -f "/tmp/${BENCHMARK_WORKLOAD2}.txt" ]; then
 37 |     read -r -p "Old benchmark results exist, wanna remove them? [y/N] " yn
 38 |     case $yn in
 39 |         [Yy]*) rm -f "/tmp/${BENCHMARK_WORKLOAD1}.txt";
 40 |                rm -f "/tmp/${BENCHMARK_WORKLOAD2}.txt" ;;
 41 |         *) ;;
 42 |     esac
 43 | fi
 44 | 
 45 | # Use the performance governor instead of powersave (for laptops).
 46 | for policy in /sys/devices/system/cpu/cpufreq/policy*; do
 47 |     echo "${policy}"
 48 |     echo "performance" | sudo tee "${policy}/scaling_governor"
 49 | done
 50 | 
 51 | # Compile workloads.
 52 | 
 53 | # XXX: enable benchmarking against old versions of same test
 54 | # BENCHMARK_GITHASH1="XXX: NOT USED YET"
 55 | # BENCHMARK_GITHASH2="XXX: NOT USED YET"
 56 | # BENCHMARK_BIN1="/tmp/${BENCHMARK_GITHASH1}-${BENCHMARK_WORKLOAD1}"
 57 | # BENCHMARK_BIN2="/tmp/${BENCHMARK_GITHASH2}-${BENCHMARK_WORKLOAD2}"
 58 | # if [ -n "${BENCHMARK_GITHASH1}" ] && [ ! -f "${BENCHMARK_BIN1}" ] ; then
 59 | #     git checkout "${BENCHMARK_GITHASH1}"
 60 | #     cabal build -O2 "${BENCHMARK_WORKLOAD1}"
 61 | #     cp $(cabal list-bin "${BENCHMARK_WORKLOAD1}") "${BENCHMARK_BIN1}"
 62 | # fi
 63 | 
 64 | cabal build "${BENCHMARK_CABAL_BUILD_OPTS[@]}" "${BENCHMARK_WORKLOAD2}"
 65 | 
 66 | # Disable turbo boost.
 67 | echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo
 68 | 
 69 | # The following run is just a (CPU) warm up, the results are discarded.
 70 | cabal run "${BENCHMARK_CABAL_RUN_OPTS[@]}" "${BENCHMARK_WORKLOAD2}"
 71 | 
 72 | # Run the benchmarks. By running workloads interleaved with each other, we
 73 | # reduce the risk of having particular transient system-wide effects impact only
 74 | # a single measurement.
 75 | for i in $(seq ${BENCHMARK_NUMBER_OF_RUNS}); do
 76 |     echo "Running benchmark run ${i}"
 77 |     perf stat --event="${BENCHMARK_PERF_EVENTS}" \
 78 |          cabal run "${BENCHMARK_CABAL_RUN_OPTS[@]}" "${BENCHMARK_WORKLOAD1}" \
 79 |          2>&1 | tee --append "/tmp/${BENCHMARK_WORKLOAD1}.txt"
 80 |     perf stat --event="${BENCHMARK_PERF_EVENTS}" \
 81 |          cabal run "${BENCHMARK_CABAL_RUN_OPTS[@]}" "${BENCHMARK_WORKLOAD2}" \
 82 |          2>&1 | tee --append "/tmp/${BENCHMARK_WORKLOAD2}.txt"
 83 | 
 84 |     # XXX: Can't get the below to work, ${BENCHMARK_WORKLOAD} env var doesn't
 85 |     # get interpolated correctly into the string?
 86 |     # Use `nice` to bump the priority of the benchmark process to the highest possible.
 87 |     ##sudo nice -n -20 su -c \
 88 |     ##     "perf stat -e cache-misses,branch-misses,dTLB-load-misses,iTLB-load-misses cabal run -O2 ${BENCHMARK_WORKLOAD1} >> /tmp/${BENCHMARK_WORKLOAD1}.txt" \
 89 |     ##     "${USER}"
 90 |     ##sudo nice -n -20 su -c \
 91 |     ##     "perf stat -e cache-misses,branch-misses,dTLB-load-misses,iTLB-load-misses cabal run -O2 ${BENCHMARK_WORKLOAD2} >> /tmp/${BENCHMARK_WORKLOAD2}.txt" \
 92 |     ##     "${USER}"
 93 | 
 94 | done
 95 | 
 96 | # Re-enable turbo boost.
 97 | echo 0 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo
 98 | 
 99 | # Go back to powersave governor.
100 | for policy in /sys/devices/system/cpu/cpufreq/policy*; do
101 |     echo "${policy}"
102 |     echo "powersave" | sudo tee "${policy}/scaling_governor"
103 | done
104 | 
105 | # Output throughput data for R analysis.
106 | R_FILE="/tmp/${BENCHMARK_WORKLOAD1}-${BENCHMARK_WORKLOAD2}.r"
107 | 
108 | echo 'Input=("' > "${R_FILE}"
109 | { echo "Workload Throughput";
110 |   awk -v wl1="${BENCHMARK_WORKLOAD1}" \
111 |       '/Throughput/ { print wl1, $2 }' "/tmp/${BENCHMARK_WORKLOAD1}.txt";
112 |   awk -v wl2="${BENCHMARK_WORKLOAD2}" \
113 |       '/Throughput/ { print wl2, $2 }' "/tmp/${BENCHMARK_WORKLOAD2}.txt";
114 |   echo '")'
115 | } >> "${R_FILE}"
116 | 
117 | cat << EOF >> "${R_FILE}"
118 | Data = read.table(textConnection(Input),header=TRUE)
119 | bartlett.test(Throughput ~ Workload, data=Data)
120 | 
121 | # If p-value >= 0.05, use var.equal=TRUE below
122 | 
123 | t.test(Throughput ~ Workload, data=Data,
124 |        var.equal=TRUE,
125 |        conf.level=0.95)
126 | EOF
127 | 
128 | Rscript "${R_FILE}"
129 | 
130 | # Profiling
131 | 
132 | # On Linux install `perf` See
133 | # https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html for how
134 | # to setup the permissions for using `perf`. Also note that on some systems,
135 | # e.g. Ubuntu, `/usr/bin/perf` is not the actual binary but rather a bash script
136 | # that calls the binary. Note that the steps in the admin guide needs to be
137 | # performed on the binary and not the shell script. Since Linux 5.8 there's a
138 | # special capability for capturing perf data called `cap_perfmon`, if you are on
139 | # a older version you need `cap_sys_admin` instead.
140 | #
141 | # For more see: https://brendangregg.com/perf.html
142 | 


--------------------------------------------------------------------------------
/pipelined-state-machines.cabal:
--------------------------------------------------------------------------------
  1 | cabal-version:      2.4
  2 | name:               pipelined-state-machines
  3 | version:            0.0.0
  4 | synopsis:           High performance inter-thread messaging library
  5 | description:
  6 |   See README at <https://github.com/stevana/pipelined-state-machines#readme>
  7 | 
  8 | bug-reports:        https://github.com/stevana/pipelined-state-machines/issues
  9 | license:            MIT
 10 | license-file:       LICENSE
 11 | author:             Stevan Andjelkovic, Daniel Gustafsson
 12 | maintainer:         stevana@users.noreply.github.com
 13 | copyright:          Copyright (c) 2021 Symbiont Inc, 2022-2023 Stevan Andjelkovic
 14 | category:           Concurrency, Data Structures
 15 | extra-source-files:
 16 |   CHANGELOG.md
 17 |   LICENSE
 18 |   README.md
 19 |   benchmark.sh
 20 | 
 21 | tested-with:        GHC ==9.0.2
 22 | 
 23 | common common-build-depends
 24 |   build-depends:
 25 |     , async
 26 |     , atomic-primops
 27 |     , base            ^>=4.15.0.0
 28 |     , ghc-prim
 29 |     , stm
 30 |     , vector
 31 | 
 32 | library
 33 |   import:           common-build-depends
 34 |   hs-source-dirs:   src/
 35 |   exposed-modules:
 36 |     Disruptor
 37 |     Disruptor.AtomicCounterPadded
 38 |     Disruptor.MP
 39 |     Disruptor.MP.Consumer
 40 |     Disruptor.MP.Producer
 41 |     Disruptor.MP.RingBuffer
 42 |     Disruptor.SequenceNumber
 43 |     Disruptor.SP
 44 |     Disruptor.SP.Consumer
 45 |     Disruptor.SP.Producer
 46 |     Disruptor.SP.RingBuffer
 47 |     Disruptor.SP.Unboxed.Consumer
 48 |     Disruptor.SP.Unboxed.Producer
 49 |     Disruptor.SP.Unboxed.RingBuffer
 50 | 
 51 |   ghc-options:      -O2
 52 |   default-language: Haskell2010
 53 | 
 54 | test-suite test
 55 |   import:           common-build-depends
 56 |   type:             exitcode-stdio-1.0
 57 |   hs-source-dirs:   test/
 58 |   main-is:          Main.hs
 59 |   other-modules:
 60 |     DisruptorTest
 61 |     TastyDiscover
 62 | 
 63 |   build-depends:
 64 |     , containers
 65 |     , pipelined-state-machines
 66 |     , HUnit
 67 |     , QuickCheck
 68 |     , tasty
 69 |     , tasty-hunit
 70 |     , tasty-quickcheck
 71 | 
 72 |   ghc-options:      -threaded -rtsopts -with-rtsopts=-N -fno-ignore-asserts
 73 |   default-language: Haskell2010
 74 | 
 75 | executable readme-pipeline-example
 76 |   import:           common-build-depends
 77 |   main-is:          ReadmePipelineExample.hs
 78 |   build-depends:    pipelined-state-machines, time
 79 |   hs-source-dirs:   app
 80 |   default-language: Haskell2010
 81 |   ghc-options:      -threaded -rtsopts -with-rtsopts=-N
 82 | 
 83 | executable readme-disruptor-example
 84 |   import:           common-build-depends
 85 |   main-is:          ReadmeDisruptorExample.hs
 86 |   build-depends:    pipelined-state-machines
 87 |   hs-source-dirs:   app
 88 |   default-language: Haskell2010
 89 |   ghc-options:      -threaded -rtsopts -with-rtsopts=-N
 90 | 
 91 | common bench-common
 92 |   hs-source-dirs:   bench
 93 |   build-depends:
 94 |     , async
 95 |     , base
 96 |     , time
 97 |     , directory
 98 |   other-modules:    Common
 99 | 
100 |   -- Some of these options are taking from:
101 |   --   https://wiki.haskell.org/Performance/GHC#Use_optimisation
102 |   -- XXX: try with -fllvm
103 | 
104 |   -- To inspect core:
105 |   -- -fforce-recomp
106 |   -- -ddump-simpl
107 |   -- -dsuppress-all
108 |   -- -ddump-to-file
109 |   --
110 |   -- To produce event log:
111 |   -- -threaded -eventlog -rtsopts -with-rtsopts=-N
112 |   ghc-options:
113 |     -O2 -fproc-alignment=64 -fexcess-precision -fasm -optc-O3
114 |     -optc-ffast-math -threaded -rtsopts -with-rtsopts=-N
115 | 
116 |   default-language: Haskell2010
117 | 
118 | common bench-common-sp
119 |   import:         bench-common
120 |   hs-source-dirs: bench/SP
121 | 
122 | common bench-common-mp
123 |   import:         bench-common
124 |   hs-source-dirs: bench/MP
125 | 
126 | benchmark bench-singleops
127 |   import:        bench-common
128 |   main-is:       SingleOps.hs
129 |   build-depends:
130 |     , atomic-primops
131 |     , pipelined-state-machines
132 | 
133 |   type:          exitcode-stdio-1.0
134 | 
135 | benchmark bench-sp
136 |   import:        bench-common-sp
137 |   main-is:       Disruptor.hs
138 |   build-depends: pipelined-state-machines
139 |   type:          exitcode-stdio-1.0
140 | 
141 | benchmark bench-sp-unboxed
142 |   import:        bench-common-sp
143 |   main-is:       DisruptorUnboxed.hs
144 |   build-depends: pipelined-state-machines
145 |   type:          exitcode-stdio-1.0
146 | 
147 | benchmark bench-sp-chan
148 |   import:  bench-common-sp
149 |   main-is: Chan.hs
150 |   type:    exitcode-stdio-1.0
151 | 
152 | benchmark bench-sp-tbqueue
153 |   import:        bench-common-sp
154 |   main-is:       TBQueue.hs
155 |   build-depends: stm
156 |   type:          exitcode-stdio-1.0
157 | 
158 | benchmark bench-sp-chaselev-deque
159 |   import:        bench-common-sp
160 |   main-is:       ChaseLevDeque.hs
161 |   build-depends: chaselev-deque
162 |   type:          exitcode-stdio-1.0
163 | 
164 | benchmark bench-sp-lockfree-queue
165 |   import:        bench-common-sp
166 |   main-is:       LockFreeQueue.hs
167 |   build-depends: lockfree-queue
168 |   type:          exitcode-stdio-1.0
169 | 
170 | benchmark bench-sp-unagi-chan
171 |   import:        bench-common-sp
172 |   main-is:       UnagiChan.hs
173 |   build-depends: unagi-chan
174 |   type:          exitcode-stdio-1.0
175 | 
176 | benchmark bench-mp
177 |   import:        bench-common-mp
178 |   main-is:       Disruptor.hs
179 |   build-depends: pipelined-state-machines
180 |   type:          exitcode-stdio-1.0
181 | 
182 | benchmark bench-mp-chan
183 |   import:  bench-common-mp
184 |   main-is: Chan.hs
185 |   type:    exitcode-stdio-1.0
186 | 
187 | benchmark bench-mp-tbqueue
188 |   import:        bench-common-mp
189 |   main-is:       TBQueue.hs
190 |   build-depends: stm
191 |   type:          exitcode-stdio-1.0
192 | 
193 | benchmark bench-mp-unagi-chan
194 |   import:        bench-common-mp
195 |   main-is:       UnagiChan.hs
196 |   build-depends: unagi-chan
197 |   type:          exitcode-stdio-1.0
198 | 
199 | benchmark bench-mp-lockfree-queue
200 |   import:        bench-common-mp
201 |   main-is:       LockFreeQueue.hs
202 |   build-depends: lockfree-queue
203 |   type:          exitcode-stdio-1.0
204 | 
205 | benchmark bench-mp-logging
206 |   import:        bench-common-mp
207 |   main-is:       Logging.hs
208 |   build-depends: pipelined-state-machines, bytestring
209 |   type:          exitcode-stdio-1.0
210 | 
211 | benchmark bench-mp-fast-logger
212 |   import:        bench-common-mp
213 |   main-is:       FastLogger.hs
214 |   build-depends: fast-logger
215 |   type:          exitcode-stdio-1.0
216 | 
217 | benchmark bench-mp-logging10
218 |   import:        bench-common-mp
219 |   main-is:       Logging10.hs
220 |   build-depends: pipelined-state-machines, bytestring
221 |   type:          exitcode-stdio-1.0
222 | 
223 | benchmark bench-mp-fast-logger10
224 |   import:        bench-common-mp
225 |   main-is:       FastLogger10.hs
226 |   build-depends: fast-logger
227 |   type:          exitcode-stdio-1.0
228 | 


--------------------------------------------------------------------------------
/src/Disruptor/SP/Unboxed/RingBuffer.hs:
--------------------------------------------------------------------------------
  1 | module Disruptor.SP.Unboxed.RingBuffer where
  2 | 
  3 | import Control.Exception (assert)
  4 | import Control.Monad (when)
  5 | import Data.Bits (popCount)
  6 | import Data.IORef (IORef, newIORef, readIORef, writeIORef)
  7 | import Data.Int (Int64)
  8 | import qualified Data.Vector as ImmutableVector
  9 | import qualified Data.Vector.Mutable as Boxed
 10 | import Data.Vector.Unboxed.Mutable (IOVector, Unbox)
 11 | import qualified Data.Vector.Unboxed.Mutable as Vector
 12 | 
 13 | import Disruptor.SequenceNumber
 14 | 
 15 | ------------------------------------------------------------------------
 16 | 
 17 | data RingBuffer e = RingBuffer
 18 |   {
 19 |   -- | The capacity, or maximum amount of values, of the ring buffer.
 20 |     rbCapacity :: {-# UNPACK #-} !Int64
 21 |   -- | The cursor pointing to the head of the ring buffer.
 22 |   , rbCursor :: {-# UNPACK #-} !(IORef SequenceNumber)
 23 |   -- | The values of the ring buffer.
 24 |   , rbEvents :: !(IOVector e)
 25 |   -- | References to the last consumers' sequence numbers, used in order to
 26 |   -- avoid wrapping the buffer and overwriting events that have not been
 27 |   -- consumed yet.
 28 |   , rbGatingSequences :: {-# UNPACK #-} !(IORef (Boxed.IOVector (IORef SequenceNumber)))
 29 |   -- | Cached value of computing the last consumers' sequence numbers using the
 30 |   -- above references.
 31 |   , rbCachedGatingSequence :: {-# UNPACK #-} !(IORef SequenceNumber)
 32 |   }
 33 | 
 34 | newRingBuffer :: Unbox e => Int -> IO (RingBuffer e)
 35 | newRingBuffer capacity
 36 |   | capacity <= 0          =
 37 |       error "newRingBuffer: capacity must be greater than 0"
 38 |   | popCount capacity /= 1 =
 39 |       -- NOTE: The use of bitwise and (`.&.`) in `index` relies on this.
 40 |       error "newRingBuffer: capacity must be a power of 2"
 41 |   | otherwise              = do
 42 |       snr <- newIORef (-1)
 43 |       v   <- Vector.new capacity
 44 |       gs  <- newIORef =<< Boxed.new 0
 45 |       cgs <- newIORef (-1)
 46 |       return (RingBuffer (fromIntegral capacity) snr v gs cgs)
 47 | 
 48 | -- | The capacity, or maximum amount of values, of the ring buffer.
 49 | capacity :: RingBuffer e -> Int64
 50 | capacity = rbCapacity
 51 | {-# INLINE capacity #-}
 52 | 
 53 | getCursor :: RingBuffer e -> IO SequenceNumber
 54 | getCursor rb = readIORef (rbCursor rb)
 55 | {-# INLINE getCursor #-}
 56 | 
 57 | setGatingSequences :: RingBuffer e -> [IORef SequenceNumber] -> IO ()
 58 | setGatingSequences rb gs = do
 59 |   v <- ImmutableVector.thaw (ImmutableVector.fromList gs)
 60 |   writeIORef (rbGatingSequences rb) v
 61 | {-# INLINE setGatingSequences #-}
 62 | 
 63 | getCachedGatingSequence :: RingBuffer e -> IO SequenceNumber
 64 | getCachedGatingSequence rb = readIORef (rbCachedGatingSequence rb)
 65 | {-# INLINE getCachedGatingSequence #-}
 66 | 
 67 | setCachedGatingSequence :: RingBuffer e -> SequenceNumber -> IO ()
 68 | setCachedGatingSequence rb = writeIORef (rbCachedGatingSequence rb)
 69 | {-# INLINE setCachedGatingSequence #-}
 70 | 
 71 | minimumSequence :: RingBuffer e -> IO SequenceNumber
 72 | minimumSequence rb = do
 73 |   cursorValue <- getCursor rb
 74 |   minimumSequence' (rbGatingSequences rb) cursorValue
 75 | {-# INLINE minimumSequence #-}
 76 | 
 77 | minimumSequence' :: IORef (Boxed.IOVector (IORef SequenceNumber)) -> SequenceNumber
 78 |                  -> IO SequenceNumber
 79 | minimumSequence' gatingSequences cursorValue = do
 80 |   gs <- readIORef gatingSequences
 81 |   go gs
 82 |   where
 83 |     go :: Boxed.IOVector (IORef SequenceNumber) -> IO SequenceNumber
 84 |     go gs = go' 0 cursorValue
 85 |       where
 86 |         len :: Int
 87 |         len = Boxed.length gs - 1
 88 | 
 89 |         go' :: Int -> SequenceNumber -> IO SequenceNumber
 90 |         go' ix minSequence | ix >  len = return minSequence
 91 |                            | ix <= len = do
 92 |           g <- readIORef =<< Boxed.read gs ix
 93 |           if g < minSequence
 94 |           then go' (ix + 1) g
 95 |           else go' (ix + 1) minSequence
 96 | {-# INLINE minimumSequence' #-}
 97 | 
 98 | -- | Currently available slots to write to.
 99 | size :: RingBuffer e -> IO Int64
100 | size rb = do
101 |   consumed <- minimumSequence rb
102 |   produced <- getCursor rb
103 |   return (capacity rb - fromIntegral (produced - consumed))
104 | {-# INLINE size #-}
105 | 
106 | -- | Claim the next event in sequence for publishing.
107 | next :: RingBuffer e -> IO SequenceNumber
108 | next rb = nextBatch rb 1
109 | {-# INLINE next #-}
110 | 
111 | -- | Claim the next `n` events in sequence for publishing. This is for batch
112 | -- event producing. Returns the highest claimed sequence number, so using it
113 | -- requires a bit of extra work, e.g.:
114 | --
115 | -- @
116 | --     let n = 10
117 | --     hi <- nextBatch rb n
118 | --     let lo = hi - (n - 1)
119 | --     mapM_ f [lo..hi]
120 | --     publishBatch rb lo hi
121 | -- @
122 | --
123 | nextBatch :: RingBuffer e -> Int -> IO SequenceNumber
124 | nextBatch rb n = assert (n > 0 && fromIntegral n <= capacity rb) $ do
125 |   current <- getCursor rb
126 |   let nextSequence :: SequenceNumber
127 |       nextSequence = current + fromIntegral n
128 | 
129 |       wrapPoint :: SequenceNumber
130 |       wrapPoint = nextSequence - fromIntegral (capacity rb)
131 | 
132 |   writeIORef (rbCursor rb) nextSequence
133 |   cachedGatingSequence <- getCachedGatingSequence rb
134 | 
135 |   when (wrapPoint > cachedGatingSequence || cachedGatingSequence > current) $
136 |     waitForConsumers wrapPoint
137 | 
138 |   return nextSequence
139 |   where
140 |     waitForConsumers :: SequenceNumber -> IO ()
141 |     waitForConsumers wrapPoint = go
142 |       where
143 |         go :: IO ()
144 |         go = do
145 |           gatingSequence <- minimumSequence rb
146 |           if wrapPoint > gatingSequence
147 |           then go
148 |           else setCachedGatingSequence rb gatingSequence
149 | {-# INLINE nextBatch #-}
150 | 
151 | -- Try to return the next sequence number to write to. If `Nothing` is returned,
152 | -- then the last consumer has not yet processed the event we are about to
153 | -- overwrite (due to the ring buffer wrapping around) -- the callee of `tryNext`
154 | -- should apply back-pressure upstream if this happens.
155 | tryNext :: RingBuffer e -> IO MaybeSequenceNumber
156 | tryNext rb = tryNextBatch rb 1
157 | {-# INLINE tryNext #-}
158 | 
159 | tryNextBatch :: RingBuffer e -> Int -> IO MaybeSequenceNumber
160 | tryNextBatch rb n = assert (n > 0) $ do
161 |   current <- getCursor rb
162 |   let next = current + fromIntegral n
163 |       wrapPoint = next - fromIntegral (capacity rb)
164 |   cachedGatingSequence <- getCachedGatingSequence rb
165 |   if (wrapPoint > cachedGatingSequence || cachedGatingSequence > current)
166 |   then do
167 |     minSequence <- minimumSequence' (rbGatingSequences rb) current
168 |     setCachedGatingSequence rb minSequence
169 |     if (wrapPoint > minSequence)
170 |     then return None
171 |     else return (Some next)
172 |   else return (Some next)
173 | {-# INLINE tryNextBatch #-}
174 | 
175 | set :: Unbox e => RingBuffer e -> SequenceNumber -> e -> IO ()
176 | set rb snr e = Vector.unsafeWrite (rbEvents rb) (index (rbCapacity rb) snr) e
177 | {-# INLINE set #-}
178 | 
179 | publish :: RingBuffer e -> SequenceNumber -> IO ()
180 | publish rb = writeIORef (rbCursor rb)
181 | {-# INLINE publish #-}
182 | 
183 | publishBatch :: RingBuffer e -> SequenceNumber -> SequenceNumber -> IO ()
184 | publishBatch rb _lo hi = writeIORef (rbCursor rb) hi
185 | {-# INLINE publishBatch #-}
186 | 
187 | unsafeGet :: Unbox e => RingBuffer e -> SequenceNumber -> IO e
188 | unsafeGet rb current = Vector.unsafeRead (rbEvents rb) (index (capacity rb) current)
189 | {-# INLINE unsafeGet #-}
190 | 


--------------------------------------------------------------------------------
/src/Disruptor/SP/RingBuffer.hs:
--------------------------------------------------------------------------------
  1 | module Disruptor.SP.RingBuffer where
  2 | 
  3 | import Control.Concurrent (threadDelay)
  4 | import Control.Exception (assert)
  5 | import Control.Monad (when)
  6 | import Data.Bits (popCount)
  7 | import Data.IORef (IORef, newIORef, readIORef, writeIORef)
  8 | import Data.Int (Int64)
  9 | import qualified Data.Vector as ImmutableVector
 10 | import Data.Vector.Mutable (IOVector)
 11 | import qualified Data.Vector.Mutable as Vector
 12 | 
 13 | import Disruptor.SequenceNumber
 14 | 
 15 | ------------------------------------------------------------------------
 16 | 
 17 | data RingBuffer e = RingBuffer
 18 |   {
 19 |   -- | The capacity, or maximum amount of values, of the ring buffer.
 20 |     rbCapacity :: {-# UNPACK #-} !Int64
 21 |   -- | The cursor pointing to the head of the ring buffer.
 22 |   , rbCursor :: {-# UNPACK #-} !(IORef SequenceNumber)
 23 |   -- | The values of the ring buffer.
 24 |   , rbEvents :: {-# UNPACK #-} !(IOVector e)
 25 |   -- | References to the last consumers' sequence numbers, used in order to
 26 |   -- avoid wrapping the buffer and overwriting events that have not been
 27 |   -- consumed yet.
 28 |   , rbGatingSequences :: {-# UNPACK #-} !(IORef (IOVector (IORef SequenceNumber)))
 29 |   -- | Cached value of computing the last consumers' sequence numbers using the
 30 |   -- above references.
 31 |   , rbCachedGatingSequence :: {-# UNPACK #-} !(IORef SequenceNumber)
 32 |   }
 33 | 
 34 | newRingBuffer :: Int -> IO (RingBuffer e)
 35 | newRingBuffer capacity
 36 |   | capacity <= 0          =
 37 |       error "newRingBuffer: capacity must be greater than 0"
 38 |   | popCount capacity /= 1 =
 39 |       -- NOTE: The use of bitwise and (`.&.`) in `index` relies on this.
 40 |       error "newRingBuffer: capacity must be a power of 2"
 41 |   | otherwise              = do
 42 |       snr <- newIORef (-1)
 43 |       v   <- Vector.new capacity
 44 |       -- ^ XXX: Should this really be the same as capacity? See https://github.com/LMAX-Exchange/disruptor/blob/master/src/main/java/com/lmax/disruptor/RingBuffer.java#L60
 45 |       -- Also see https://youtube.com/watch?v=fDGWWpHlzvw&t=1380
 46 |       gs  <- newIORef =<< Vector.new 0
 47 |       cgs <- newIORef (-1)
 48 |       return (RingBuffer (fromIntegral capacity) snr v gs cgs)
 49 | 
 50 | -- | The capacity, or maximum amount of values, of the ring buffer.
 51 | capacity :: RingBuffer e -> Int64
 52 | capacity = rbCapacity
 53 | {-# INLINE capacity #-}
 54 | 
 55 | getCursor :: RingBuffer e -> IO SequenceNumber
 56 | getCursor rb = readIORef (rbCursor rb)
 57 | {-# INLINE getCursor #-}
 58 | 
 59 | setGatingSequences :: RingBuffer e -> [IORef SequenceNumber] -> IO ()
 60 | setGatingSequences rb gs = do
 61 |   v <- ImmutableVector.thaw (ImmutableVector.fromList gs)
 62 |   writeIORef (rbGatingSequences rb) v
 63 | {-# INLINE setGatingSequences #-}
 64 | 
 65 | getCachedGatingSequence :: RingBuffer e -> IO SequenceNumber
 66 | getCachedGatingSequence rb = readIORef (rbCachedGatingSequence rb)
 67 | {-# INLINE getCachedGatingSequence #-}
 68 | 
 69 | setCachedGatingSequence :: RingBuffer e -> SequenceNumber -> IO ()
 70 | setCachedGatingSequence rb = writeIORef (rbCachedGatingSequence rb)
 71 | {-# INLINE setCachedGatingSequence #-}
 72 | 
 73 | minimumSequence :: RingBuffer e -> IO SequenceNumber
 74 | minimumSequence rb = do
 75 |   cursorValue <- getCursor rb
 76 |   minimumSequence' (rbGatingSequences rb) cursorValue
 77 | {-# INLINE minimumSequence #-}
 78 | 
 79 | minimumSequence' :: IORef (IOVector (IORef SequenceNumber)) -> SequenceNumber
 80 |                  -> IO SequenceNumber
 81 | minimumSequence' gatingSequences cursorValue = do
 82 |   gs <- readIORef gatingSequences
 83 |   go gs
 84 |   where
 85 |     go :: IOVector (IORef SequenceNumber) -> IO SequenceNumber
 86 |     go gs = go' 0 cursorValue
 87 |       where
 88 |         len :: Int
 89 |         len = Vector.length gs - 1
 90 | 
 91 |         go' :: Int -> SequenceNumber -> IO SequenceNumber
 92 |         go' ix minSequence | ix >  len = return minSequence
 93 |                            | ix <= len = do
 94 |           g <- readIORef =<< Vector.unsafeRead gs ix
 95 |           if g < minSequence
 96 |           then go' (ix + 1) g
 97 |           else go' (ix + 1) minSequence
 98 | {-# INLINE minimumSequence' #-}
 99 | 
100 | -- | Currently available slots to write to.
101 | size :: RingBuffer e -> IO Int64
102 | size rb = do
103 |   consumed <- minimumSequence rb
104 |   produced <- getCursor rb
105 |   return (capacity rb - fromIntegral (produced - consumed))
106 | {-# INLINE size #-}
107 | 
108 | -- | Claim the next event in sequence for publishing.
109 | next :: RingBuffer e -> IO SequenceNumber
110 | next rb = nextBatch rb 1
111 | {-# INLINE next #-}
112 | 
113 | -- | Claim the next `n` events in sequence for publishing. This is for batch
114 | -- event producing. Returns the highest claimed sequence number, so using it
115 | -- requires a bit of extra work, e.g.:
116 | --
117 | -- @
118 | --     let n = 10
119 | --     hi <- nextBatch rb n
120 | --     let lo = hi - (n - 1)
121 | --     mapM_ f [lo..hi]
122 | --     publishBatch rb lo hi
123 | -- @
124 | --
125 | nextBatch :: RingBuffer e -> Int -> IO SequenceNumber
126 | nextBatch rb n = assert (n > 0 && fromIntegral n <= capacity rb) $ do
127 |   current <- getCursor rb
128 |   let nextSequence :: SequenceNumber
129 |       nextSequence = current + fromIntegral n
130 | 
131 |       wrapPoint :: SequenceNumber
132 |       wrapPoint = nextSequence - fromIntegral (capacity rb)
133 | 
134 |   writeIORef (rbCursor rb) nextSequence
135 |   cachedGatingSequence <- getCachedGatingSequence rb
136 | 
137 |   when (wrapPoint > cachedGatingSequence || cachedGatingSequence > current) $
138 |     waitForConsumers wrapPoint
139 | 
140 |   return nextSequence
141 |   where
142 |     waitForConsumers :: SequenceNumber -> IO ()
143 |     waitForConsumers wrapPoint = go
144 |       where
145 |         go :: IO ()
146 |         go = do
147 |           gatingSequence <- minimumSequence rb
148 |           if wrapPoint > gatingSequence
149 |           then do
150 |             threadDelay 1
151 |             go -- SPIN
152 |           else setCachedGatingSequence rb gatingSequence
153 | {-# INLINABLE nextBatch #-}
154 | 
155 | -- Try to return the next sequence number to write to. If `Nothing` is returned,
156 | -- then the last consumer has not yet processed the event we are about to
157 | -- overwrite (due to the ring buffer wrapping around) -- the callee of `tryNext`
158 | -- should apply back-pressure upstream if this happens.
159 | tryNext :: RingBuffer e -> IO MaybeSequenceNumber
160 | tryNext rb = tryNextBatch rb 1
161 | {-# INLINE tryNext #-}
162 | 
163 | tryNextBatch :: RingBuffer e -> Int -> IO MaybeSequenceNumber
164 | tryNextBatch rb n = assert (n > 0) $ do
165 |   current <- getCursor rb
166 |   let next = current + fromIntegral n
167 |       wrapPoint = next - fromIntegral (capacity rb)
168 |   cachedGatingSequence <- getCachedGatingSequence rb
169 |   if (wrapPoint > cachedGatingSequence || cachedGatingSequence > current)
170 |   then do
171 |     minSequence <- minimumSequence' (rbGatingSequences rb) current
172 |     setCachedGatingSequence rb minSequence
173 |     if (wrapPoint > minSequence)
174 |     then return None
175 |     else return (Some next)
176 |   else return (Some next)
177 | {-# INLINE tryNextBatch #-}
178 | 
179 | set :: RingBuffer e -> SequenceNumber -> e -> IO ()
180 | set rb snr e = Vector.unsafeWrite (rbEvents rb) (index (rbCapacity rb) snr) e
181 | {-# INLINE set #-}
182 | 
183 | publish :: RingBuffer e -> SequenceNumber -> IO ()
184 | publish rb = writeIORef (rbCursor rb)
185 | {-# INLINE publish #-}
186 | 
187 | publishBatch :: RingBuffer e -> SequenceNumber -> SequenceNumber -> IO ()
188 | publishBatch rb _lo hi = writeIORef (rbCursor rb) hi
189 | {-# INLINE publishBatch #-}
190 | 
191 | unsafeGet :: RingBuffer e -> SequenceNumber -> IO e
192 | unsafeGet rb current = Vector.unsafeRead (rbEvents rb) (index (capacity rb) current)
193 | {-# INLINE unsafeGet #-}
194 | 
195 | get :: RingBuffer e -> SequenceNumber -> IO (Maybe e)
196 | get rb want = do
197 |   produced <- getCursor rb
198 |   if want <= produced
199 |   then Just <$> unsafeGet rb want
200 |   else return Nothing
201 | {-# INLINE get #-}
202 | 


--------------------------------------------------------------------------------
/test/DisruptorTest.hs:
--------------------------------------------------------------------------------
  1 | module DisruptorTest where
  2 | 
  3 | import Control.Concurrent
  4 | import Control.Concurrent.Async
  5 | import Control.Concurrent.MVar
  6 | import Control.Monad
  7 | import Data.IORef
  8 | import Data.Int
  9 | import Data.Set (Set)
 10 | import qualified Data.Set as Set
 11 | import System.IO
 12 | import System.IO.Error
 13 | import Test.HUnit
 14 | import Test.QuickCheck
 15 | 
 16 | import qualified Disruptor.MP.Consumer as MP
 17 | import qualified Disruptor.MP.Producer as MP
 18 | import qualified Disruptor.MP.RingBuffer as MP
 19 | import qualified Disruptor.SP.Consumer as SP
 20 | import qualified Disruptor.SP.Producer as SP
 21 | import qualified Disruptor.SP.RingBuffer as SP
 22 | import Disruptor.SequenceNumber
 23 | import Disruptor.AtomicCounterPadded
 24 | 
 25 | ------------------------------------------------------------------------
 26 | 
 27 | (@?->) :: (Eq a, Show a) => IO a -> a -> Assertion
 28 | mx @?-> y = do
 29 |   x <- mx
 30 |   x @?= y
 31 | 
 32 | unit_ringBufferSingleNonBlocking :: Assertion
 33 | unit_ringBufferSingleNonBlocking = do
 34 |   rb <- SP.newRingBuffer 8
 35 |   Some i <- SP.tryNext rb
 36 |   SP.set rb i 'a'
 37 |   SP.publish rb i
 38 |   SP.get rb i @?-> Just 'a'
 39 |   Some j <- SP.tryNext rb
 40 |   SP.set rb j 'b'
 41 |   SP.publish rb j
 42 |   SP.get rb j @?-> Just 'b'
 43 | 
 44 | unit_ringBufferSingleBlocking :: Assertion
 45 | unit_ringBufferSingleBlocking = do
 46 |   rb <- SP.newRingBuffer 8
 47 |   i <- SP.next rb
 48 |   SP.set rb i 'a'
 49 |   SP.publish rb i
 50 |   SP.get rb i @?-> Just 'a'
 51 |   j <- SP.next rb
 52 |   SP.set rb j 'b'
 53 |   SP.publish rb j
 54 |   SP.get rb j @?-> Just 'b'
 55 | 
 56 | unit_ringBufferRemainingCapacity :: Assertion
 57 | unit_ringBufferRemainingCapacity = do
 58 |   rb <- SP.newRingBuffer 1
 59 |   consumerSnrRef <- newIORef (SequenceNumber (-1))
 60 |   SP.setGatingSequences rb [consumerSnrRef]
 61 |   SP.size rb @?-> 1
 62 |   SP.publish rb (SequenceNumber 0)
 63 |   SP.size rb @?-> 0
 64 |   SP.tryNext rb @?-> None
 65 |   modifyIORef consumerSnrRef succ
 66 |   SP.size rb @?-> 1
 67 | 
 68 | unit_ringBufferMulti :: Assertion
 69 | unit_ringBufferMulti = do
 70 |   rb <- MP.newRingBuffer 8
 71 |   Some i <- MP.tryNext rb
 72 |   MP.set rb i 'a'
 73 |   MP.publish rb i
 74 |   MP.tryGet rb i @?-> Just 'a'
 75 |   Some j <- MP.tryNext rb
 76 |   MP.set rb j 'b'
 77 |   MP.publish rb j
 78 |   MP.tryGet rb j @?-> Just 'b'
 79 | 
 80 | unit_ringBufferSP1P1C :: Assertion
 81 | unit_ringBufferSP1P1C = do
 82 |   rb <- SP.newRingBuffer 128
 83 |   consumerFinished <- newEmptyMVar
 84 | 
 85 |   let iTERATIONS :: Int64
 86 |       iTERATIONS = 1024
 87 | 
 88 |       ep = SP.EventProducer (const (go 0)) ()
 89 |         where
 90 |           go :: Int64 -> IO ()
 91 |           go n | n == iTERATIONS = return ()
 92 |                | otherwise = do
 93 |                    mSnr <- SP.tryNext rb
 94 |                    case mSnr of
 95 |                      Some snr -> do
 96 |                        SP.set rb snr n
 97 |                        SP.publish rb snr
 98 |                        go (n + 1)
 99 |                      None -> do
100 |                        threadDelay 1
101 |                        go n
102 | 
103 |   let handler seen n snr endOfBatch
104 |         | n /= seen = error (show n ++ " appears twice")
105 |         | otherwise = do
106 |             -- putStrLn ("consumer got: " ++ show n ++
107 |             --           if endOfBatch then ". End of batch!" else "")
108 |             when (endOfBatch && getSequenceNumber snr == iTERATIONS - 1) $
109 |               putMVar consumerFinished ()
110 |             return (seen + 1)
111 | 
112 |   ec <- SP.newEventConsumer rb handler 0 [] (SP.Sleep 1)
113 | 
114 |   SP.setGatingSequences rb [SP.ecSequenceNumber ec]
115 | 
116 |   SP.withEventProducer ep $ \aep ->
117 |     SP.withEventConsumer ec $ \aec -> do
118 |       wait aep
119 |       () <- takeMVar consumerFinished
120 |       cancel aec
121 | 
122 | unit_ringBufferMP1P1CBlocking :: Assertion
123 | unit_ringBufferMP1P1CBlocking = ringBufferMP1P1C True
124 | 
125 | unit_ringBufferMP1P1CNonBlocking :: Assertion
126 | unit_ringBufferMP1P1CNonBlocking = ringBufferMP1P1C False
127 | 
128 | ringBufferMP1P1C :: Bool -> Assertion
129 | ringBufferMP1P1C blocking = do
130 |   numCap <- getNumCapabilities
131 |   assertBool "getNumCapabilities < 2" (numCap >= 2)
132 |   rb <- MP.newRingBuffer 8
133 |   counter <- newIORef (-1)
134 |   consumerFinished <- newEmptyMVar
135 | 
136 |   let ep = MP.EventProducer (const go) ()
137 |         where
138 |           go :: IO ()
139 |           go = do
140 |             n <- atomicModifyIORef' counter (\n -> let n' = n + 1 in (n', n'))
141 |             if n > atLeastThisManyEvents
142 |             then return ()
143 |             else
144 |               if blocking
145 |               then goBlocking n
146 |               else goNonBlocking n
147 | 
148 |           goBlocking n = do
149 |             snr <- MP.next rb
150 |             MP.set rb snr n
151 |             MP.publish rb snr
152 |             go
153 | 
154 |           goNonBlocking n = do
155 |             mSnr <- MP.tryNext rb
156 |             case mSnr of
157 |               Some snr -> do
158 |                 MP.set rb snr n
159 |                 MP.publish rb snr
160 |                 go
161 |               None -> do
162 |                 threadDelay 1
163 |                 goNonBlocking n
164 | 
165 |   let handler seen n snr endOfBatch
166 |         | n `Set.member` seen = error (show n ++ " appears twice")
167 |         | otherwise           = do
168 |             -- putStrLn ("consumer got, n = " ++ show n ++ ", snr = " ++ show snr ++
169 |             --           if endOfBatch then ". End of batch!" else "")
170 |             let seen' = Set.insert n seen
171 |             when (endOfBatch &&
172 |                   getSequenceNumber snr >= fromIntegral atLeastThisManyEvents) $ do
173 |               putMVar consumerFinished seen'
174 |             return seen'
175 |   ec <- MP.newEventConsumer rb handler Set.empty [] (MP.Sleep 1)
176 | 
177 |   MP.setGatingSequences rb [MP.ecSequenceNumber ec]
178 | 
179 |   MP.withEventProducer ep $ \aep ->
180 |     MP.withEventConsumer ec $ \aec -> do
181 |       seen <- takeMVar consumerFinished
182 |       cancel aec
183 |       cancel aep
184 |       assertEqual "increasingByOneFrom"
185 |         (Right ())
186 |         (increasingByOneFrom 0 (Set.toList seen))
187 | 
188 | atLeastThisManyEvents = 10
189 | 
190 | increasingByOneFrom :: Int -> [Int] -> Either String ()
191 | increasingByOneFrom n []
192 |   | n >  atLeastThisManyEvents = Right ()
193 |   | n <= atLeastThisManyEvents =
194 |       Left ("n (= " ++ show n ++ ") < atLeastThisManyEvents (= " ++
195 |             show atLeastThisManyEvents ++ ")")
196 | increasingByOneFrom n (i : is) | n == i    = increasingByOneFrom (n + 1) is
197 |                                | otherwise =
198 |   Left ("Expected: " ++ show n ++ ", but got: " ++ show i)
199 | 
200 | unit_ringBuffer5P1C :: Assertion
201 | unit_ringBuffer5P1C = do
202 |   rb <- MP.newRingBuffer 32
203 |   counter <- newCounter (-1)
204 |   consumerFinished <- newEmptyMVar
205 | 
206 |   let ep = MP.EventProducer (const go) ()
207 |         where
208 |           go :: IO ()
209 |           go = do
210 |             n <- incrCounter 1 counter
211 |             putStrLn ("producer, n = " ++ show n)
212 |             if n > atLeastThisManyEvents
213 |             then putStrLn "producer: done" >> return ()
214 |             else do
215 |               putStrLn "producer: not done yet"
216 |               mSnr <- MP.tryNext rb
217 |               putStrLn ("producer: mSrn = " ++ show mSnr)
218 |               case mSnr of
219 |                 Some snr -> do
220 |                   putStrLn ("producer: setting " ++ show n)
221 |                   MP.set rb snr n
222 |                   MP.publish rb snr
223 |                   putStrLn ("producer: published n = " ++ show n ++ ", snr = " ++ show snr)
224 |                   go
225 |                 None -> do
226 |                   putStrLn "producer: none"
227 |                   threadDelay 1
228 |                   go
229 | 
230 |   let handler seen n snr endOfBatch
231 |         | n `Set.member` seen = error (show n ++ " appears twice")
232 |         | otherwise           = do
233 |             putStrLn ("consumer got: n = " ++ show n ++ ", snr = " ++ show snr ++
234 |                       if endOfBatch then ". End of batch!" else "")
235 |             let seen' = Set.insert n seen
236 |             when (endOfBatch &&
237 |                   getSequenceNumber snr >= fromIntegral atLeastThisManyEvents) $
238 |               putMVar consumerFinished seen'
239 |             return seen'
240 |   ec <- MP.newEventConsumer rb handler Set.empty [] (MP.Sleep 1)
241 | 
242 |   MP.setGatingSequences rb [MP.ecSequenceNumber ec]
243 | 
244 |   MP.withEventProducer ep $ \aep1 ->
245 |     MP.withEventProducer ep $ \aep2 ->
246 |       MP.withEventProducer ep $ \aep3 ->
247 |         MP.withEventProducer ep $ \aep4 ->
248 |           MP.withEventProducer ep $ \aep5 ->
249 |             MP.withEventConsumer ec $ \aec -> do
250 |               seen <- takeMVar consumerFinished
251 |               cancel aec
252 |               mapM_ cancel [aep1, aep2, aep3, aep4, aep5]
253 |               assertEqual "increasingByOneFrom"
254 |                 (Right ())
255 |                 (increasingByOneFrom 0 (Set.toList seen))
256 | 


--------------------------------------------------------------------------------
/src/Disruptor/MP/RingBuffer.hs:
--------------------------------------------------------------------------------
  1 | module Disruptor.MP.RingBuffer where
  2 | 
  3 | import Control.Concurrent (threadDelay, yield)
  4 | import Control.Exception (assert)
  5 | import Control.Monad (when)
  6 | import Data.Atomics (casIORef, peekTicket, readForCAS)
  7 | import Data.Bits (popCount)
  8 | import Data.IORef
  9 |        (IORef, atomicModifyIORef', newIORef, readIORef, writeIORef)
 10 | import Data.Int (Int64)
 11 | import Data.Vector.Mutable (IOVector)
 12 | import qualified Data.Vector.Mutable as Vector
 13 | import qualified Data.Vector.Unboxed.Mutable as Unboxed
 14 | 
 15 | import Disruptor.SequenceNumber
 16 | import Disruptor.AtomicCounterPadded
 17 | 
 18 | ------------------------------------------------------------------------
 19 | 
 20 | -- | The lock-free multi-producer implementation is presented in the following
 21 | -- talk:
 22 | --
 23 | --   Locks? We Don't Need No Stinkin' Locks! by Mike Barker (JAX London 2012)
 24 | --   https://youtu.be/VBnLW9mKMh4?t=1813
 25 | --
 26 | -- and also discussed in the following thread:
 27 | --
 28 | --   https://groups.google.com/g/lmax-disruptor/c/UhmRuz_CL6E/m/-hVt86bHvf8J
 29 | --
 30 | -- Note that one can also achieve a similar result by using multiple
 31 | -- single-producers and combine them into one as outlined in this thread:
 32 | --
 33 | -- https://groups.google.com/g/lmax-disruptor/c/hvJVE-h2Xu0/m/mBW0j_3SrmIJ
 34 | --
 35 | data RingBuffer e = RingBuffer
 36 |   {
 37 |   -- | The capacity, or maximum amount of values, of the ring buffer.
 38 |     rbCapacity :: {-# UNPACK #-} !Int64
 39 |   -- | The cursor pointing to the head of the ring buffer.
 40 |   , rbCursor :: {-# UNPACK #-} !AtomicCounter
 41 |   -- | The values of the ring buffer.
 42 |   , rbEvents :: {-# UNPACK #-} !(IOVector e)
 43 |   -- | References to the last consumers' sequence numbers, used in order to
 44 |   -- avoid wrapping the buffer and overwriting events that have not been
 45 |   -- consumed yet.
 46 |   -- TODO: use vector instead of list.
 47 |   , rbGatingSequences :: {-# UNPACK #-} !(IORef [IORef SequenceNumber])
 48 |   -- | Cached value of computing the last consumers' sequence numbers using the
 49 |   -- above references.
 50 |   , rbCachedGatingSequence :: {-# UNPACK #-} !(IORef SequenceNumber)
 51 |   -- | Used to keep track of what has been published in the multi-producer case.
 52 |   , rbAvailableBuffer :: {-# UNPACK #-} !(Unboxed.IOVector Int)
 53 |   }
 54 | 
 55 | newRingBuffer :: Int -> IO (RingBuffer e)
 56 | newRingBuffer capacity
 57 |   | capacity <= 0          =
 58 |       error "newRingBuffer: capacity must be greater than 0"
 59 |   | popCount capacity /= 1 =
 60 |       -- NOTE: The use of bitwise and (`.&.`) in `index` relies on this.
 61 |       error "newRingBuffer: capacity must be a power of 2"
 62 |   | otherwise              = do
 63 |       snr <- newCounter (-1)
 64 |       v   <- Vector.new capacity
 65 |       gs  <- newIORef []
 66 |       cgs <- newIORef (-1)
 67 |       ab  <- Unboxed.new capacity
 68 |       Unboxed.set ab (-1)
 69 |       return (RingBuffer (fromIntegral capacity) snr v gs cgs ab)
 70 | {-# INLINABLE newRingBuffer #-}
 71 | 
 72 | -- | The capacity, or maximum amount of values, of the ring buffer.
 73 | capacity :: RingBuffer e -> Int64
 74 | capacity = rbCapacity
 75 | {-# INLINE capacity #-}
 76 | 
 77 | getCursor :: RingBuffer e -> IO SequenceNumber
 78 | getCursor rb = fromIntegral <$> readCounter (rbCursor rb)
 79 | {-# INLINE getCursor #-}
 80 | 
 81 | setGatingSequences :: RingBuffer e -> [IORef SequenceNumber] -> IO ()
 82 | setGatingSequences rb = writeIORef (rbGatingSequences rb)
 83 | {-# INLINE setGatingSequences #-}
 84 | 
 85 | getCachedGatingSequence :: RingBuffer e -> IO SequenceNumber
 86 | getCachedGatingSequence rb = readIORef (rbCachedGatingSequence rb)
 87 | {-# INLINE getCachedGatingSequence #-}
 88 | 
 89 | setCachedGatingSequence :: RingBuffer e -> SequenceNumber -> IO ()
 90 | setCachedGatingSequence rb = writeIORef (rbCachedGatingSequence rb)
 91 | {-# INLINE setCachedGatingSequence #-}
 92 | 
 93 | setAvailable :: RingBuffer e -> SequenceNumber -> IO ()
 94 | setAvailable rb snr = Unboxed.unsafeWrite
 95 |   (rbAvailableBuffer rb)
 96 |   (index cap snr)
 97 |   (availabilityFlag cap snr)
 98 |   where
 99 |     cap = capacity rb
100 | {-# INLINE setAvailable #-}
101 | 
102 | getAvailable :: RingBuffer e -> Int -> IO Int
103 | getAvailable rb ix = Unboxed.unsafeRead (rbAvailableBuffer rb) ix
104 | {-# INLINE getAvailable #-}
105 | 
106 | minimumSequence :: RingBuffer e -> IO SequenceNumber
107 | minimumSequence rb = do
108 |   cursorValue <- getCursor rb
109 |   minimumSequence' (rbGatingSequences rb) cursorValue
110 | {-# INLINE minimumSequence #-}
111 | 
112 | minimumSequence' :: IORef [IORef SequenceNumber] -> SequenceNumber -> IO SequenceNumber
113 | minimumSequence' gatingSequences cursorValue = do
114 |   snrs <- mapM readIORef =<< readIORef gatingSequences
115 |   return (minimum (cursorValue : snrs))
116 | {-# INLINE minimumSequence' #-}
117 | 
118 | -- | Currently available slots to write to.
119 | size :: RingBuffer e -> IO Int64
120 | size rb = do
121 |   consumed <- minimumSequence rb
122 |   produced <- getCursor rb
123 |   return (capacity rb - fromIntegral (produced - consumed))
124 | {-# INLINABLE size #-}
125 | 
126 | -- | Claim the next event in sequence for publishing.
127 | next :: RingBuffer e -> IO SequenceNumber
128 | next rb = nextBatch rb 1
129 | {-# INLINE next #-}
130 | 
131 | -- | Claim the next `n` events in sequence for publishing. This is for batch
132 | -- event producing. Returns the highest claimed sequence number, so using it
133 | -- requires a bit of extra work, e.g.:
134 | --
135 | -- @
136 | --     let n = 10
137 | --     hi <- nextBatch rb n
138 | --     let lo = hi - (n - 1)
139 | --     mapM_ f [lo..hi]
140 | --     publishBatch rb lo hi
141 | -- @
142 | --
143 | nextBatch :: RingBuffer e -> Int -> IO SequenceNumber
144 | nextBatch rb n = assert (n > 0 && fromIntegral n <= capacity rb) $ do
145 |   -- (current, nextSequence) <- -- {-# SCC "atomicModifyIORef'" #-}
146 |   --                            -- XXX: The atomic takes 60% of the time of
147 |   --                            -- `nextBatch`... Try using `AtomicCounter` instead
148 |   --                            -- of `IORef SequneceNumber`.
149 |   --                            atomicModifyIORef' (rbCursor rb) $ \current ->
150 |   --                              let
151 |   --                                nextSequence = current + fromIntegral n
152 |   --                              in
153 |   --                                (nextSequence, (current, nextSequence))
154 |   current <- fromIntegral <$> {-# SCC incrCounter #-} getAndIncrCounter n (rbCursor rb)
155 | 
156 |   let nextSequence :: SequenceNumber
157 |       nextSequence = current + fromIntegral n
158 | 
159 |       wrapPoint :: SequenceNumber
160 |       wrapPoint = nextSequence - fromIntegral (capacity rb)
161 | 
162 |   cachedGatingSequence <- getCachedGatingSequence rb
163 | 
164 |   when (wrapPoint > cachedGatingSequence || cachedGatingSequence > current) $
165 |     waitForConsumers current wrapPoint (rbGatingSequences rb)
166 | 
167 |   return nextSequence
168 |   where
169 |     waitForConsumers :: SequenceNumber -> SequenceNumber -> IORef [IORef SequenceNumber]
170 |                      -> IO ()
171 |     waitForConsumers current wrapPoint gatingSequences = go
172 |       where
173 |         go :: IO ()
174 |         go = do
175 |           gatingSequence <- minimumSequence' gatingSequences current
176 |           if wrapPoint > gatingSequence
177 |           then do
178 |             threadDelay 1
179 |             go -- SPIN
180 |           else setCachedGatingSequence rb gatingSequence
181 | {-# INLINABLE nextBatch #-}
182 | 
183 | -- Try to return the next sequence number to write to. If `Nothing` is returned,
184 | -- then the last consumer has not yet processed the event we are about to
185 | -- overwrite (due to the ring buffer wrapping around) -- the callee of `tryNext`
186 | -- should apply back-pressure upstream if this happens.
187 | tryNext :: RingBuffer e -> IO MaybeSequenceNumber
188 | tryNext rb = tryNextBatch rb 1
189 | {-# INLINE tryNext #-}
190 | 
191 | tryNextBatch :: RingBuffer e -> Int -> IO MaybeSequenceNumber
192 | tryNextBatch rb n = assert (n > 0) go
193 |   where
194 |     go = do
195 |       current_ <- {-# SCC "readCounter" #-} readCounter (rbCursor rb)
196 |       let current = fromIntegral current_
197 |           next_   = current_ + n
198 |           next    = fromIntegral next_
199 |       b <- {-# SCC "hasCapacity" #-} hasCapacity rb n current
200 |       if not b
201 |       then return None
202 |       else do
203 |         success <- {-# SCC casCounter #-} casCounter (rbCursor rb) current_ next_
204 |         if success
205 |         then return (Some next)
206 |         else do
207 |           {-# SCC "threadDelay" #-} threadDelay 1
208 |           -- yield
209 |           go -- SPIN
210 |       {--
211 |       current <- {-# SCC "readForCas" #-} readForCAS (rbCursor rb)
212 |       let current_ = peekTicket current
213 |           next     = current_ + fromIntegral n
214 |       b <- {-# SCC "hasCapacity" #-} hasCapacity rb n current_
215 |       if not b
216 |       then return None
217 |       else do
218 |         (success, _current') <- {-# SCC "casIORef"#-} casIORef (rbCursor rb) current next
219 |         if success
220 |         then return (Some next)
221 |         else do
222 |           {-# SCC "threadDelay" #-}threadDelay 1
223 |           -- yield
224 |           go -- SPIN
225 | -}
226 | {-# INLINABLE tryNextBatch #-}
227 | 
228 | hasCapacity :: RingBuffer e -> Int -> SequenceNumber -> IO Bool
229 | hasCapacity rb requiredCapacity cursorValue = do
230 |   let wrapPoint = (cursorValue + fromIntegral requiredCapacity) -
231 |                   fromIntegral (capacity rb)
232 |   cachedGatingSequence <- getCachedGatingSequence rb
233 |   if (wrapPoint > cachedGatingSequence || cachedGatingSequence > cursorValue)
234 |   then do
235 |     minSequence <- minimumSequence' (rbGatingSequences rb) cursorValue
236 |     setCachedGatingSequence rb minSequence
237 |     if (wrapPoint > minSequence)
238 |     then return False
239 |     else return True
240 |   else return True
241 | {-# INLINE hasCapacity #-}
242 | 
243 | set :: RingBuffer e -> SequenceNumber -> e -> IO ()
244 | set rb snr e = Vector.unsafeWrite (rbEvents rb) (index (capacity rb) snr) e
245 | {-# INLINE set #-}
246 | 
247 | publish :: RingBuffer e -> SequenceNumber -> IO ()
248 | publish rb = setAvailable rb
249 | {-# INLINE publish #-}
250 |     -- XXX: Wake up consumers that are using a sleep wait strategy.
251 | 
252 | publishBatch :: RingBuffer e -> SequenceNumber -> SequenceNumber -> IO ()
253 | publishBatch rb lo hi = mapM_ (setAvailable rb) [lo..hi]
254 | {-# INLINE publishBatch #-}
255 | 
256 | get :: RingBuffer e -> SequenceNumber -> IO e
257 | get rb current = go
258 |   where
259 |     cap :: Int64
260 |     cap = capacity rb
261 | 
262 |     availableValue :: Int
263 |     availableValue = availabilityFlag cap current
264 | 
265 |     ix :: Int
266 |     ix = index cap current
267 | 
268 |     go = do
269 |       v <- getAvailable rb ix
270 |       if v /= availableValue
271 |       then do
272 |         threadDelay 1
273 |         go -- SPIN
274 |       else Vector.unsafeRead (rbEvents rb) ix
275 | 
276 | tryGet :: RingBuffer e -> SequenceNumber -> IO (Maybe e)
277 | tryGet rb want = do
278 |   produced <- getCursor rb
279 |   if want <= produced
280 |   then Just <$> get rb want
281 |   else return Nothing
282 | {-# INLINE tryGet #-}
283 | 
284 | isAvailable :: RingBuffer e -> SequenceNumber -> IO Bool
285 | isAvailable rb snr =
286 |   (==) <$> Unboxed.unsafeRead (rbAvailableBuffer rb) (index capacity snr)
287 |        <*> pure (availabilityFlag capacity snr)
288 |   where
289 |     capacity = rbCapacity rb
290 | {-# INLINE isAvailable #-}
291 | 
292 | highestPublished :: RingBuffer e -> SequenceNumber -> SequenceNumber
293 |                  -> IO SequenceNumber
294 | highestPublished rb lowerBound availableSequence = go lowerBound
295 |   where
296 |     go sequence
297 |       | sequence > availableSequence = return availableSequence
298 |       | otherwise                    = do
299 |           available <- isAvailable rb sequence
300 |           if not (available)
301 |           then return (sequence - 1)
302 |           else go (sequence + 1)
303 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pipelined-state-machines
  2 | 
  3 | An experiment in declaratively programming parallel pipelines of state machines.
  4 | 
  5 | ## Motivation
  6 | 
  7 | Imagine a flat complex in Sweden. Being the socialist utopia Sweden is there's a
  8 | shared laundry room which the people in the flat complex can book. In the
  9 | laundry room there's everything one needs to wash, dry and iron your clothes.
 10 | You don't even need to bring your own laundry detergent!
 11 | 
 12 | Lets call three people living there Ann, Bo and Cecilia, and lets assume they
 13 | all want to use the laundry room. Depending on how the booking system is
 14 | implemented the total time it would take for all three people to do their
 15 | laundry varies.
 16 | 
 17 | For example if the booking system allocates a big time slot per person in which
 18 | that person can do the whole cycle of *W*ashing, *D*rying and *I*roning then,
 19 | assuming each step takes one time unit, we get a situation like this:
 20 | 
 21 |           Person
 22 |             ^
 23 |         Ann | W D I                             W = Washing
 24 |          Bo |       W D I                       D = Drying
 25 |     Cecilia |             W D I                 I = Ironing
 26 |             +-------------------> Time
 27 |             0 1 2 3 4 5 6 7 8 9
 28 | 
 29 | Bo cannot start washing until Ann is done ironing, because Ann has booked the
 30 | room for the whole cycle, and so on.
 31 | 
 32 | If the booking system is more granular and allows booking a time slot per step
 33 | then we can get a situation that looks like this:
 34 | 
 35 |           Person
 36 |             ^
 37 |         Ann | W D I
 38 |          Bo |   W D I
 39 |     Cecilia |     W D I
 40 |             +-------------------> Time
 41 |             0 1 2 3 4 5 6 7 8 9
 42 | 
 43 | It should be clear that the total time is shorter in this case, because the
 44 | machines are utilised better (Bo can start using the washing machine right after
 45 | Ann is done with it). Also note that if each person would start a new washing
 46 | after they finish ironing the first one and so on then the time savings would be
 47 | even greater.
 48 | 
 49 | This optimisation is called pipelining. It's used a lot in manufacturing, for
 50 | example Airbus [builds](https://youtu.be/oxjT7veKi9c?t=2682) two airplanes per
 51 | day. If you were to order a plane today you'd get it delivered in two months
 52 | time. How is that they deliver two per day if it takes two months to build them?
 53 | Pipelining! It's also used inside CPUs to [pipeline
 54 | instructions](https://en.wikipedia.org/wiki/Instruction_pipelining).
 55 | 
 56 | The rest of this document is an experiment in how we can construct such
 57 | pipelining in software in a declarative way.
 58 | 
 59 | ## Usage
 60 | 
 61 | The workers or stages in our pipeline will be state machines of the following
 62 | type.
 63 | 
 64 | ```haskell
 65 | data SM s a b where
 66 |   Id      :: SM s a a
 67 |   Compose :: SM s b c -> SM s a b -> SM s a c
 68 |   Fst     :: SM s (a, b) a
 69 |   Snd     :: SM s (a, b) b
 70 |   (:&&&)  :: SM s a c -> SM s a d -> SM s a (c, d)
 71 |   (:***)  :: SM s a c -> SM s b d -> SM s (a, b) (c, d)
 72 |   SlowIO  :: SM s a a -- Simulate a slow I/O computation.
 73 | ```
 74 | 
 75 | Here's an example of a stage which takes an ordered pair as input and swaps the
 76 | elements of the pair. Note the use of `SlowIO` to simulate that some slow I/O
 77 | computation happens.
 78 | 
 79 | ```haskell
 80 | swap :: SM () (a, b) (b, a)
 81 | swap = Snd :*** Fst `Compose` copy `Compose` SlowIO
 82 |   where
 83 |     copy = Id :&&& Id
 84 | ```
 85 | 
 86 | We can `interpret` such state machines into plain functions as follows.
 87 | 
 88 | ```haskell
 89 | interpret :: SM s a b -> (a -> s -> IO (s, b))
 90 | interpret Id            x s = return (s, x)
 91 | interpret (Compose g f) x s = do
 92 |   (s', y) <- interpret f x s
 93 |   interpret g y s'
 94 | interpret Fst           x s = return (s, fst x)
 95 | interpret Snd           x s = return (s, snd x)
 96 | interpret (f :&&& g)    x s = do
 97 |   (s', y)  <- interpret f x s
 98 |   (s'', z) <- interpret g x s'
 99 |   return (s'', (y, z))
100 | interpret (f :*** g)    x s = do
101 |   (s', y)  <- interpret f (fst x) s
102 |   (s'', z) <- interpret g (snd x) s'
103 |   return (s'', (y, z))
104 | interpret SlowIO x s = do
105 |   threadDelay 200000 -- 0.2s
106 |   return (s, x)
107 | ```
108 | 
109 | Next lets have a look at how we can construct pipelines of such state machines.
110 | 
111 | ```haskell
112 | data P a b where
113 |   SM     :: String -> SM s a b -> s -> P a b
114 |   (:>>>) :: P a b -> P b c -> P a c
115 | ```
116 | 
117 | The following is an example pipeline where there's only one stage in which we do
118 | our pair swapping three times.
119 | 
120 | ```haskell
121 | swapsSequential :: P (a, b) (b, a)
122 | swapsSequential = SM "three swaps" (swap `Compose` swap `Compose` swap) ()
123 | ```
124 | 
125 | The above corresponds to our coarse grained booking system where the laundry was
126 | booked for the whole cycle. Whereas the following corresponds to the more fine
127 | grained approach where we get pipelining.
128 | 
129 | ```haskell
130 | swapsPipelined :: P (a, b) (b, a)
131 | swapsPipelined =
132 |   SM "first swap"  swap () :>>>
133 |   SM "second swap" swap () :>>>
134 |   SM "third swap"  swap ()
135 | ```
136 | 
137 | A pipeline can be deployed, we'll use the following type to keep track of the
138 | queue associated with the pipeline as well as the name and pids of the state
139 | machines involved in the pipeline.
140 | 
141 | ```haskell
142 | data Deployment a = Deployment
143 |   { queue :: TQueue a
144 |   , pids  :: [(String, Async ())]
145 |   }
146 | 
147 | names :: Deployment a -> String
148 | names = bracket . intercalate "," . reverse . map fst . pids
149 |   where
150 |     bracket s = "[" ++ s ++ "]"
151 | ```
152 | 
153 | Here's the actual `deploy`ment function which takes a pipeline and gives back an
154 | input-queue and a `Deployment` which holds the output-queue and the names and
155 | pids of the state machines.
156 | 
157 | ```haskell
158 | deploy :: P a b -> IO (TQueue a, Deployment b)
159 | deploy p = do
160 |   q <- newTQueueIO
161 |   d <- deploy' p (Deployment q [])
162 |   return (q, d)
163 | 
164 | deploy' :: P a b -> Deployment a -> IO (Deployment b)
165 | deploy' (SM name sm s0) d = do
166 |   q' <- newTQueueIO
167 |   pid <- async (go s0 q')
168 |   return Deployment { queue = q', pids = (name, pid) : pids d }
169 |   where
170 |     f = interpret sm
171 | 
172 |     go s q' = do
173 |       x <- atomically $ readTQueue (queue d)
174 |       (s', o) <- f x s
175 |       atomically $ writeTQueue q' o
176 |       go s' q'
177 | deploy' (sm :>>> sm') d = do
178 |   d' <- deploy' sm d
179 |   deploy' sm' d'
180 | ```
181 | 
182 | We now have everything we need to run a simple benchmark comparing the
183 | sequential version of three swaps versus the pipelined version.
184 | 
185 | ```haskell
186 | data PipelineKind = Sequential | Pipelined
187 |   deriving Show
188 | 
189 | main :: IO ()
190 | main = do
191 |   mapM_ libMain [Sequential, Pipelined]
192 | 
193 | libMain :: PipelineKind -> IO ()
194 | libMain k = do
195 |   (q, d) <- deploy $ case k of
196 |                        Sequential -> swapsSequential
197 |                        Pipelined  -> swapsPipelined
198 |   print k
199 |   putStrLn $ "Pids: " ++ names d
200 |   start <- getCurrentTime
201 |   forM_ [(1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)] $ \x ->
202 |     atomically $ writeTQueue q x
203 |   resps <- replicateM 6 $ atomically $ readTQueue (queue d)
204 |   end <- getCurrentTime
205 |   putStrLn $ "Responses: " ++ show resps
206 |   putStrLn $ "Time: " ++ show (diffUTCTime end start)
207 |   putStrLn ""
208 | ```
209 | 
210 | We can run the above with `cabal run readme-pipeline-example`, which results in
211 | something like the following being printed to the screen.
212 | 
213 | ```
214 | Sequential
215 | Pids: [three swaps]
216 | Responses: [(2,1),(3,2),(4,3),(5,4),(6,5),(7,6)]
217 | Time: 3.611045787s
218 | 
219 | Pipelined
220 | Pids: [first swap,second swap,third swap]
221 | Responses: [(2,1),(3,2),(4,3),(5,4),(6,5),(7,6)]
222 | Time: 1.604990775s
223 | ```
224 | 
225 | Cool, we managed to reduce the total running time by more than half! We can do
226 | even better though! In addition to pipelining we can also shard the queues by
227 | letting two state machines work on the same queue, the first processing the
228 | elements in the even positions of the queue and the second processing the
229 | elements in the odd positions.
230 | 
231 | ```diff
232 | data P a b where
233 |   SM     :: String -> SM s a b -> s -> P a b
234 |   (:>>>) :: P a b -> P b c -> P a c
235 | + Shard  :: P a b -> P a b
236 | ```
237 | 
238 | Here's an example of a sharded pipeline, where each shard will spawn two state
239 | machines (one working on the even indexes of the queue and the other on the
240 | odd).
241 | 
242 | ```haskell
243 | swapsSharded :: P (a, b) (b, a)
244 | swapsSharded =
245 |   Shard (SM "first swap"  swap ()) :>>>
246 |   Shard (SM "second swap" swap ()) :>>>
247 |   Shard (SM "third swap"  swap ())
248 | ```
249 | 
250 | In the deployment of shards, we achieve the even-odd split by reading from the
251 | input queue, `qIn`, and first writing to the even queue, `qEven`, and then
252 | switching over to the odd queue, `qOdd`, when making the recursive call in
253 | `shardQIn`. Whereas `shardQOut` does the inverse and merges the two queues back
254 | into the output queue:
255 | 
256 | ```diff
257 | + deploy' (Shard p) d = do
258 | +   let qIn = queue d
259 | +   qEven  <- newTQueueIO
260 | +   qOdd   <- newTQueueIO
261 | +   pidIn  <- async $ shardQIn qIn qEven qOdd
262 | +   dEven  <- deploy' p (Deployment qEven [])
263 | +   dOdd   <- deploy' p (Deployment qOdd [])
264 | +   qOut   <- newTQueueIO
265 | +   pidOut <- async $ shardQOut (queue dEven) (queue dOdd) qOut
266 | +   return (Deployment qOut (("shardIn:  " ++ names dEven ++ " & " ++ names dOdd, pidIn) :
267 | +                            ("shardOut: " ++ names dEven ++ " & " ++ names dOdd, pidOut) :
268 | +                            pids dEven ++ pids dOdd ++ pids d))
269 | +   where
270 | +     shardQIn :: TQueue a -> TQueue a -> TQueue a -> IO ()
271 | +     shardQIn  qIn qEven qOdd = do
272 | +       atomically (readTQueue qIn >>= writeTQueue qEven)
273 | +       shardQIn qIn qOdd qEven
274 | +
275 | +     shardQOut :: TQueue a -> TQueue a -> TQueue a -> IO ()
276 | +     shardQOut qEven qOdd qOut = do
277 | +       atomically (readTQueue qEven >>= writeTQueue qOut)
278 | +       shardQOut qOdd qEven qOut
279 | ```
280 | 
281 | Running this version we see more than 3.5x speed-up compared to the sequential
282 | pipeline.
283 | 
284 | ```
285 | Sharded
286 | Pids: [first swap,first swap,shardOut: [first swap] & [first swap],shardIn:  [first swap] & [first swap],second swap,second swap,shardOut: [second swap] & [second swap],shardIn:  [second swap] & [second swap],third swap,third swap,shardOut: [third swap] & [third swap],shardIn:  [third swap] & [third swap]]
287 | Responses: [(2,1),(3,2),(4,3),(5,4),(6,5),(7,6)]
288 | Time: 1.00241912s
289 | ```
290 | 
291 | There are still many more improvements to be made here:
292 | 
293 |   * Avoid spawning threads for merely shuffling elements between queues, e.g.
294 |     `shardQ{In, Out}` above;
295 |   * Avoid copying elements between queues;
296 |   * Back-pressure;
297 |   * Batching.
298 | 
299 | I believe all these problems can be solved by choosing a better concurrent queue
300 | data structure than `TQueue`, so that's what we'll have a look at next.
301 | 
302 | ## Disruptor
303 | 
304 | The `Disruptor*` modules are a Haskell port of the [LMAX
305 | Disruptor](https://github.com/LMAX-Exchange/disruptor), which is a high
306 | performance inter-thread messaging library. The developers at LMAX, which
307 | operates a financial exchange,
308 | [reported](https://www.infoq.com/presentations/LMAX/) in 2010 that they could
309 | process more than 100,000 transactions per second at less than 1 millisecond
310 | latency.
311 | 
312 | At its core it's just a lock-free concurrent queue, but it also provides
313 | building blocks for achieving several useful concurrent programming tasks that
314 | typical queues don't (or at least don't make obvious how to do). The extra
315 | features include:
316 | 
317 |   * Multi-cast (many consumers can in parallel process the same event);
318 |   * Batching (both on producer and consumer side);
319 |   * Back-pressure;
320 |   * Sharding for scalability;
321 |   * Dependencies between consumers.
322 | 
323 | It's also performs better than most queues, as we shall see further down.
324 | 
325 | ### Example
326 | 
327 | ```haskell
328 | import Control.Concurrent
329 | import Control.Concurrent.Async
330 | import Disruptor.SP
331 | 
332 | main :: IO ()
333 | main = do
334 | 
335 |   -- Create the shared ring buffer.
336 |   let bufferCapacity = 128
337 |   rb <- newRingBuffer bufferCapacity
338 | 
339 |   -- The producer keeps a counter and produces events that are merely the pretty
340 |   -- printed value as a string of that counter.
341 |   let produce :: Int -> IO (String, Int)
342 |       produce n = return (show n, n + 1)
343 | 
344 |       -- The counter starts at zero.
345 |       initialProducerState = 0
346 | 
347 |       -- No back-pressure is applied in this example.
348 |       backPressure :: Int -> IO ()
349 |       backPressure _ = return ()
350 | 
351 |   producer <- newEventProducer rb produce backPressure initialProducerState
352 | 
353 |   -- The consumer merely prints the string event to the terminal.
354 |   let consume :: () -> String -> SequenceNumber -> EndOfBatch -> IO ()
355 |       consume () event snr endOfBatch =
356 |         putStrLn (event ++ if endOfBatch then " (end of batch)" else "")
357 | 
358 |       -- The consumer doesn't need any state in this example.
359 |       initialConsumerState = ()
360 | 
361 |       -- Which other consumers do we need to wait for before consuming an event?
362 |       dependencies = []
363 | 
364 |       -- What to do in case there are no events to consume?
365 |       waitStrategy = Sleep 1
366 | 
367 |   consumer <- newEventConsumer rb consume initialConsumerState dependencies waitStrategy
368 | 
369 |   -- Tell the ring buffer which the last consumer is, to avoid overwriting
370 |   -- events that haven't been consumed yet.
371 |   setGatingSequences rb [ecSequenceNumber consumer]
372 | 
373 |   withEventProducer producer $ \ap ->
374 |     withEventConsumer consumer $ \ac -> do
375 |       threadDelay (3 * 1000 * 1000) -- 3 sec
376 |       cancel ap
377 |       cancel ac
378 | ```
379 | 
380 | You can run the above example with `cabal run readme-disruptor-example`.
381 | 
382 | A couple of things we could change to highlight the features we mentioned in the
383 | above section:
384 | 
385 |   1. Add a second consumer that saves the event to disk, this consumer would be
386 |      slower than the current one which logs to the terminal, but we could use
387 |      buffer up events in memory and only actually write when the end of batch
388 |      flag is set to speed things up;
389 | 
390 |   2. We could also shard depending on the sequence number, e.g. have two slower
391 |      consumers that write to disk and have one of them handle even sequence numbers
392 |      while the other handles odd ones;
393 | 
394 |   3. The above producer writes one event at the time to the ring buffer, but
395 |      since we know at which sequence number the last consumer is at we can
396 |      easily make writes in batches as well;
397 | 
398 |   4. Currently the producer doesn't apply any back-pressure when the ring buffer
399 |      is full, in a more realistic example where the producer would, for example,
400 |      create events from requests made to a http server we could use
401 |      back-pressure to tell the http server to return status code 429 (too many
402 |      requests);
403 | 
404 |   5. If we have one consumer that writes to the terminal and another one that
405 |      concurrently writes to disk, we could add a third consumer that does
406 |      something with the event only if it has both been logged and stored to disk
407 |      (i.e. the third consumer depends on both the first and the second).
408 | 
409 | ### How it works
410 | 
411 | The ring buffer is implemented using a bounded array, it keeps track of a
412 | monotonically increasing sequence number and it knows its the capacity of the
413 | array, so to find out where to write the next value by simply taking the modulus
414 | of the sequence number and the capacity. This has several advantages over
415 | traditional queues:
416 | 
417 |   1. We never remove elements when dequeing, merely overwrite them once we gone
418 |      all way around the ring. This removes write
419 |      [contention](https://en.wikipedia.org/wiki/Resource_contention) between the
420 |      producer and the consumer, one could also imagine avoiding garbage
421 |      collection by only allocating memory the first time around the ring (but we
422 |      don't do this in Haskell);
423 | 
424 |   2. Using an array rather than linked list increasing
425 |      [striding](https://en.wikipedia.org/wiki/Stride_of_an_array) due to
426 |      [spatial
427 |      locality](https://en.wikipedia.org/wiki/Locality_of_reference#Spatial_and_temporal_locality_usage).
428 | 
429 | The ring buffer also keeps track of up to which sequence number its last
430 | consumer has consumed, in order to not overwrite events that haven't been handled
431 | yet.
432 | 
433 | This also means that producers can ask how much capacity left a ring buffer has,
434 | and do batched writes. If there's no capacity left the producer can apply
435 | back-pressure upstream as appropriate.
436 | 
437 | Consumers need keep track of which sequence number they have processed, in order
438 | to avoid having the ring buffer overwrite unprocessed events as already
439 | mentioned, but this also allows consumers to depend on each other.
440 | 
441 | When a consumer is done processing an event, it asks the ring buffer for the
442 | event at its next sequence number, the ring buffer then replies that either
443 | there are no new events, in which case the consumer applies it wait strategy, or
444 | the ring buffer can reply that there are new events, the consumer the handles
445 | each one in turn and the last one will be have the end of batch flag set, so
446 | that the consumer can effectively batch the processing.
447 | 
448 | ### Performance
449 | 
450 | Our Disruptor implementation, which hasn't been optimised much yet, is about 2x
451 | slower than LMAX's Java version on their single-producer single-consumer
452 | [benchmark](https://github.com/LMAX-Exchange/disruptor/blob/master/src/perftest/java/com/lmax/disruptor/sequenced/OneToOneSequencedThroughputTest.java)
453 | (1P1C) (basically the above example) on a couple of years old Linux laptop.
454 | 
455 | The same benchmark compared to other Haskell libraries:
456 | 
457 |   * 10.3x faster than
458 |     [`Control.Concurrent.Chan`](https://hackage.haskell.org/package/base-4.15.0.0/docs/Control-Concurrent-Chan.html);
459 | 
460 |   * 8.3x faster than
461 |     [`Control.Concurrent.STM.TBQueue`](https://hackage.haskell.org/package/stm/docs/Control-Concurrent-STM-TBQueue.html);
462 | 
463 |   * 1.7x faster than
464 |     [`unagi-chan`](https://hackage.haskell.org/package/unagi-chan);
465 | 
466 |   * 25.5x faster than
467 |     [`chaselev-deque`](https://hackage.haskell.org/package/chaselev-deque);
468 | 
469 |   * 700x faster than [`ring-buffer`](https://hackage.haskell.org/package/ring-buffer);
470 | 
471 |   * 1.3x slower than
472 |     [`lockfree-queue`](https://hackage.haskell.org/package/lockfree-queue);
473 | 
474 |   * TODO: Compare with
475 |     [`data-ringbuffer`](https://github.com/kim/data-ringbuffer/tree/master/src/Data/RingBuffer).
476 | 
477 | In the triple-producer single-consumer (3P1C)
478 | [benchmark](https://github.com/LMAX-Exchange/disruptor/blob/master/src/perftest/java/com/lmax/disruptor/sequenced/ThreeToOneSequencedThroughputTest.java),
479 | the Java version is 5x slower than the Java 1P1C case. And our 3P1C is 4.6x
480 | slower than our 1P1C version and our 3P1C version is 2.7x slower than the Java
481 | version.
482 | 
483 | The same benchmark compared to other Haskell libraries:
484 | 
485 |   * 73x faster than
486 |     [`Control.Concurrent.Chan`](https://hackage.haskell.org/package/base-4.15.0.0/docs/Control-Concurrent-Chan.html);
487 | 
488 |   * 3.5x faster than
489 |     [`Control.Concurrent.STM.TBQueue`](https://hackage.haskell.org/package/stm/docs/Control-Concurrent-STM-TBQueue.html);
490 | 
491 |   * 1.3x faster than
492 |     [`unagi-chan`](https://hackage.haskell.org/package/unagi-chan);
493 | 
494 |   * 1.9x faster than
495 |     [`lockfree-queue`](https://hackage.haskell.org/package/lockfree-queue).
496 | 
497 | For a slightly more "real world" example, we modified the 3P1C test to have
498 | three producers that log messages while the consumer writes them to a log file
499 | and compared it to
500 | [`fast-logger`](https://hackage.haskell.org/package/fast-logger). The
501 | `pipelined-state-machines` benchmark has a throughput of 3:4 that of
502 | `fast-logger`. When we bump it to ten concurrently logging threads the
503 | `pipelined-state-machines` benchmark has a throughput of 10:7 that of
504 | `fast-logger`.
505 | 
506 | See the file [`benchmark.sh`](benchmark.sh) for full details about how the
507 | benchmarks are run.
508 | 
509 | As always take benchmarks with a grain of salt, we've tried to make them as fair
510 | with respect to each other and as true to the original Java versions as
511 | possible. If you see anything that seems unfair, or if you get very different
512 | results when trying to reproduce the numbers, then please file an issue.
513 | 
514 | ## Contributing
515 | 
516 | There's a lot of possible paths to explore from here, including:
517 | 
518 | - [ ] Can we swap out our use of `TQueue` for `Disruptor` in our `deploy` of
519 |       `P`ipelines?
520 | - [ ] Can we add something like a `FanOut :: P a b -> P a c -> P a (b, c)` and a
521 |       `Par :: P a c -> P b d -> P (a, b) (c, d)` combinator to allow two
522 |       parallel queues?
523 | - [ ] What about sum-types and error handling?
524 | - [ ] Our current, and the above just mentioned, pipeline combinators are all
525 |       binary to can we generalise this to N-ary?
526 | - [ ] Can we visualise pipelines using `dot` or similar?
527 | - [ ] Can we build a performance/cost simulator of pipelines?
528 | - [ ] Arrow syntax or monadic DSL for pipelines?
529 | - [ ] We've seen
530 |       [previously](https://github.com/stevana/hot-swapping-state-machines) how
531 |       we can hot-code upgrade state machines, what about hot-code upgrading
532 |       pipelines?
533 | - [ ] Can we implement the Erlang `gen_event` behaviour using Disruptor?
534 | - [ ] Would it make sense to use the spiritual successor of the Disruptor
535 |       instead, i.e. the different array queues from `aeron` and `agrona`:
536 |   + [Single-producer
537 |     single-consumer](https://github.com/real-logic/agrona/blob/master/agrona/src/main/java/org/agrona/concurrent/OneToOneConcurrentArrayQueue.java);
538 |   + [Multiple-producers
539 |     single-consumer](https://github.com/real-logic/agrona/blob/master/agrona/src/main/java/org/agrona/concurrent/ManyToOneConcurrentArrayQueue.java);
540 |   + [Multiple-producers
541 |     multiple-consumers](https://github.com/real-logic/agrona/blob/master/agrona/src/main/java/org/agrona/concurrent/ManyToManyConcurrentArrayQueue.java).
542 | - [ ] How exactly do these pipelines relate to the libraries
543 |       [`pipes`](https://hackage.haskell.org/package/pipes),
544 |       [`conduit`](https://hackage.haskell.org/package/conduit) and
545 |       [`streamly`](https://hackage.haskell.org/package/streamly)?
546 | - [ ] How does it relate to synchronous programming languages such as
547 |       [Esterel](https://en.wikipedia.org/wiki/Esterel),
548 |       [Lustre](https://en.wikipedia.org/wiki/Lustre_(programming_language)),
549 |       [ReactiveML](https://rml.lri.fr), etc? It seems to me that their main
550 |       motivation is to be concurrent or parallel while still determinstic, which
551 |       is what we'd like as well. Looking at ReactiveML's documentation for
552 |       [compositions](https://rml.lri.fr/documentation.html#compositions) we see
553 |       the same constructs as we've discussed: their `;` is our `Compose` (with
554 |       its arguments flipped), their `||` is our `FanOut`, their `|>` is our
555 |       `:>>>` and their `let-and` construct could be achived by adding projection
556 |       functions to our `P`ipelines similar to `Fst` and `Snd` for `SM`.
557 |       Interestingly they don't have any sum-types-like construct here, i.e.
558 |       something like `(:|||) :: P a c -> P b c -> P (Either a b) c`;
559 | - [ ] I like to think of how one constructs a pipeline, i.e. the choice of which
560 |       tasks should happen in parallel or should be sharded etc, as a choice of
561 |       how to best make use of the CPUs/cores of a single computer. If seen this
562 |       way then that begs the question: what about a network of multiple
563 |       computers? Perhaps there should be something like a `Topology` data type
564 |       which describes how multiple pipelines interact and a topology is deployed
565 |       by deploying multiple pipelines over multiple machines?
566 | 
567 | ## See also
568 | 
569 | ### Presentations
570 | 
571 |   * [LMAX - How to Do 100K TPS at Less than 1ms
572 |     Latency](https://www.infoq.com/presentations/LMAX/) by Martin Thompson (QCon
573 |     2010);
574 | 
575 |   * [LMAX Disruptor and the Concepts of Mechanical
576 |     Sympathy](https://youtube.com/watch?v=Qho1QNbXBso) by Jamie Allen (2011);
577 | 
578 |   * [Concurrent Programming with the
579 |     Disruptor](https://www.infoq.com/presentations/Concurrent-Programming-Using-The-Disruptor/)
580 |     by Trisha Gee (2012);
581 | 
582 |   * [Disruptor 3.0: Details and Advanced
583 |     Patterns](https://youtube.com/watch?v=2Be_Lqa35Y0) by Mike Barker (YOW!
584 |     2013);
585 | 
586 |   * [Designing for Performance](https://youtube.com/watch?v=fDGWWpHlzvw) by
587 |     Martin Thompson (GOTO 2015);
588 | 
589 |   * [A quest for predictable latency with Java
590 |     concurrency](https://vimeo.com/181814364) Martin Thompson
591 |     (JavaZone 2016);
592 | 
593 |   * [Evolution of Financial Exchange
594 |      Architectures](https://www.youtube.com/watch?v=qDhTjE0XmkE) by Martin
595 |      Thompson (QCon 2020)
596 |       + 1,000,000 tx/s and less than 100 microseconds latency, he is no longer
597 |         at LMAX though so we don't know if these exchanges are using the
598 |         disruptor pattern.
599 | 
600 |   * [*Aeron: Open-source high-performance
601 |     messaging*](https://youtube.com/watch?v=tM4YskS94b0) talk by Martin Thompson
602 |     (Strange Loop, 2014);
603 | 
604 |   * *Aeron: What, Why and What Next?*
605 |     [talk](https://youtube.com/watch?v=p1bsloPeBzE) by Todd Montgomery (GOTO,
606 |     2015);
607 | 
608 |   * *Cluster Consensus: when Aeron met Raft*
609 |     [talk](https://youtube.com/watch?v=GFfLCGW_5-w) by Martin Thompson (GOTO,
610 |     2018);
611 | 
612 |   * *Fault Tolerant 24/7 Operations with Aeron Cluster*
613 |     [talk](https://youtube.com/watch?v=H9yqzfNiEb4) by Todd Montgomery (2022).
614 | 
615 | ### Writings
616 | 
617 |   * Martin Thompson's [blog](https://mechanical-sympathy.blogspot.com/);
618 |   * The Disruptor [mailing list](https://groups.google.com/g/lmax-disruptor);
619 |   * The Mechanical Sympathy [mailing list](https://groups.google.com/g/mechanical-sympathy);
620 |   * [The LMAX Architecture](https://martinfowler.com/articles/lmax.html) by
621 |     Martin Fowler (2011);
622 |   * [Staged event-driven
623 |     architecture](https://en.wikipedia.org/wiki/Staged_event-driven_architecture);
624 |   * [The Reactive Manifesto](https://www.reactivemanifesto.org/);
625 |   * [Flow-based programming](https://en.wikipedia.org/wiki/Flow-based_programming).
626 | 


--------------------------------------------------------------------------------