├── .ghci ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── ChangeLog ├── LICENSE ├── README.md ├── Setup.lhs ├── distributed-process-supervisor.cabal ├── src └── Control │ └── Distributed │ └── Process │ ├── Supervisor.hs │ └── Supervisor │ ├── Management.hs │ └── Types.hs ├── stack.yaml ├── test-report.hs └── tests ├── TestSupervisor.hs └── TestUtils.hs /.ghci: -------------------------------------------------------------------------------- 1 | :set -isrc -itests 2 | 3 | :def hoogle \x -> return $ ":!hoogle " ++ x 4 | 5 | :def doc \x -> return $ ":!hoogle --info \"" ++ x ++ "\"" 6 | 7 | :set -w -fwarn-unused-binds -fwarn-unused-imports 8 | 9 | :load tests/Main.hs -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw[ponm] 2 | dist/ 3 | .cabal-sandbox 4 | cabal.sandbox.config 5 | .stack* 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | sudo: false 4 | 5 | matrix: 6 | include: 7 | - env: ARGS="--resolver nightly" COVER="" GHCVER=latest 8 | addons: {apt: {packages: [libgmp-dev]}} 9 | 10 | cache: 11 | directories: 12 | - $HOME/.stack 13 | - $HOME/.local 14 | 15 | before_install: 16 | - export PATH=$HOME/.local/bin:$HOME/.cabal/bin:$PATH 17 | - mkdir -p ~/.local/bin 18 | - travis_retry curl -L https://www.stackage.org/stack/linux-x86_64 | tar xz --wildcards --strip-components=1 -C ~/.local/bin '*/stack' 19 | - stack --version 20 | 21 | install: 22 | - stack ${ARGS} setup --no-terminal 23 | 24 | script: 25 | - case "$COVER" in 26 | true) 27 | stack ${ARGS} test --coverage --no-terminal; 28 | ./coverage.sh; 29 | ;; 30 | *) 31 | stack test --test-arguments='--plain' 32 | ;; 33 | esac 34 | 35 | notifications: 36 | slack: 37 | secure: g0NP1tkOe3+kI6O0Q1mgT/jPaLjxQ31J26MWouicu2F1Y3p73qTvv/QsOkafRMZDn07HlzgviCP25r7Ytg32pUAFvOh4U4MT2MpO0jUVVGPi4ZiwB+W5AH+HlDtJSickeSZ0AjXZSaGv8nQNegWkeaLQgLBIzrTHU8s0Y9K+whQ= 38 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | See https://github.com/haskell-distributed/cloud-haskell/blob/master/CONTRIBUTING.md. 2 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2018-06-14 Facundo Domínguez 0.2.1 2 | 3 | * Update dependency bounds. 4 | 5 | 2016-02-16 Facundo Domínguez 0.1.3.2 6 | 7 | * Update dependency bounds. 8 | 9 | 2015-06-15 Facundo Domínguez 0.1.3 10 | 11 | * Add compatibility with ghc-7.10. 12 | * Fix dependency bounds. 13 | 14 | # HEAD 15 | 16 | * Added initial GenServer module 17 | * Added Timer Module 18 | * Moved time functions into Time.hs 19 | * Added Async API 20 | * Added GenProcess API (subsumes lower level GenServer API) 21 | 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Tim Watson, 2012-2013. 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of the author nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # distributed-process-supervisor (archive) 2 | 3 | ## :warning: This package is now developed here: https://github.com/haskell-distributed/distributed-process :warning: 4 | -------------------------------------------------------------------------------- /Setup.lhs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env runhaskell 2 | > import Distribution.Simple 3 | > main = defaultMain 4 | -------------------------------------------------------------------------------- /distributed-process-supervisor.cabal: -------------------------------------------------------------------------------- 1 | name: distributed-process-supervisor 2 | version: 0.2.1 3 | cabal-version: >=1.8 4 | build-type: Simple 5 | license: BSD3 6 | license-file: LICENSE 7 | Copyright: Tim Watson 2012 - 2013 8 | Author: Tim Watson 9 | Maintainer: Tim Watson 10 | Stability: experimental 11 | Homepage: http://github.com/haskell-distributed/distributed-process-supervisor 12 | Bug-Reports: http://github.com/haskell-distributed/distributed-process-supervisor/issues 13 | synopsis: Supervisors for The Cloud Haskell Application Platform 14 | description: A part of the Cloud Haskell framework 15 | 16 | This package implements a process which supervises a set of other processes, referred to as its children. 17 | These child processes can be either workers (i.e., processes that do something useful in your application) 18 | or other supervisors. In this way, supervisors may be used to build a hierarchical process structure 19 | called a supervision tree, which provides a convenient structure for building fault tolerant software. 20 | 21 | For detailed information see "Control.Distributed.Process.Supervisor" 22 | category: Control 23 | tested-with: GHC == 8.2.2 24 | data-dir: "" 25 | extra-source-files: ChangeLog 26 | 27 | source-repository head 28 | type: git 29 | location: https://github.com/haskell-distributed/distributed-process-supervisor 30 | 31 | library 32 | build-depends: 33 | base >= 4.8.2.0 && < 5, 34 | bytestring >= 0.9, 35 | data-accessor >= 0.2.2.3, 36 | distributed-static >= 0.3.4.0 && < 0.4, 37 | distributed-process >= 0.7.3 && < 0.8, 38 | distributed-process-extras >= 0.3.1 && < 0.4, 39 | distributed-process-client-server >= 0.2.0 && < 0.4, 40 | binary >= 0.6.3.0 && < 0.9, 41 | deepseq >= 1.3.0.1 && < 1.5, 42 | mtl, 43 | containers >= 0.4 && < 0.6, 44 | hashable >= 1.2.0.5 && < 1.3, 45 | unordered-containers >= 0.2.3.0 && < 0.3, 46 | fingertree < 0.2, 47 | stm >= 2.4 && < 2.5, 48 | time > 1.4 && < 1.9, 49 | transformers, 50 | exceptions >= 0.8.3 && < 0.11 51 | extensions: CPP 52 | hs-source-dirs: src 53 | ghc-options: -Wall 54 | exposed-modules: 55 | Control.Distributed.Process.Supervisor 56 | Control.Distributed.Process.Supervisor.Management 57 | other-modules: 58 | Control.Distributed.Process.Supervisor.Types 59 | 60 | test-suite SupervisorTests 61 | type: exitcode-stdio-1.0 62 | build-depends: 63 | base >= 4.8.2.0 && < 5, 64 | ansi-terminal >= 0.5 && < 0.9, 65 | containers, 66 | unordered-containers, 67 | hashable, 68 | distributed-static >= 0.3.5.0 && < 0.4, 69 | distributed-process >= 0.7.3 && < 0.8, 70 | distributed-process-supervisor, 71 | distributed-process-extras >= 0.3 && < 0.4, 72 | distributed-process-client-server, 73 | distributed-static, 74 | bytestring, 75 | random, 76 | data-accessor, 77 | fingertree < 0.2, 78 | network-transport >= 0.4 && < 0.5.3, 79 | mtl, 80 | network-transport-tcp >= 0.4 && < 0.7, 81 | binary >= 0.6.3.0 && < 0.9, 82 | deepseq >= 1.3.0.1 && < 1.5, 83 | network >= 2.3 && < 2.7, 84 | HUnit >= 1.2 && < 2, 85 | stm >= 2.3 && < 2.5, 86 | time > 1.4 && < 1.9, 87 | test-framework >= 0.6 && < 0.9, 88 | test-framework-hunit, 89 | transformers, 90 | rematch >= 0.2.0.0, 91 | ghc-prim, 92 | exceptions >= 0.8.3 && < 0.11 93 | hs-source-dirs: 94 | tests 95 | ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -fno-warn-name-shadowing -fno-warn-unused-do-bind -eventlog 96 | extensions: CPP 97 | main-is: TestSupervisor.hs 98 | other-modules: TestUtils 99 | 100 | test-suite NonThreadedSupervisorTests 101 | type: exitcode-stdio-1.0 102 | build-depends: 103 | base >= 4.8.2.0 && < 5, 104 | ansi-terminal >= 0.5 && < 0.9, 105 | containers, 106 | unordered-containers, 107 | hashable, 108 | distributed-static >= 0.3.5.0 && < 0.4, 109 | distributed-process >= 0.7.3 && < 0.8, 110 | distributed-process-supervisor, 111 | distributed-process-extras, 112 | distributed-process-client-server, 113 | distributed-static, 114 | bytestring, 115 | random, 116 | data-accessor, 117 | fingertree < 0.2, 118 | network-transport >= 0.4 && < 0.5.3, 119 | mtl, 120 | network-transport-tcp >= 0.4 && < 0.7, 121 | binary >= 0.6.3.0 && < 0.9, 122 | deepseq >= 1.3.0.1 && < 1.5, 123 | network >= 2.3 && < 2.7, 124 | HUnit >= 1.2 && < 2, 125 | stm >= 2.3 && < 2.5, 126 | time > 1.4 && < 1.9, 127 | test-framework >= 0.6 && < 0.9, 128 | test-framework-hunit, 129 | transformers, 130 | rematch >= 0.2.0.0, 131 | ghc-prim, 132 | exceptions >= 0.8.3 && < 0.11 133 | hs-source-dirs: 134 | tests 135 | ghc-options: -Wall -rtsopts -fno-warn-unused-do-bind -fno-warn-name-shadowing 136 | extensions: CPP 137 | main-is: TestSupervisor.hs 138 | other-modules: TestUtils 139 | -------------------------------------------------------------------------------- /src/Control/Distributed/Process/Supervisor.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | {-# LANGUAGE DeriveGeneric #-} 4 | {-# LANGUAGE ExistentialQuantification #-} 5 | {-# LANGUAGE FlexibleInstances #-} 6 | {-# LANGUAGE PatternGuards #-} 7 | {-# LANGUAGE RecordWildCards #-} 8 | {-# LANGUAGE ScopedTypeVariables #-} 9 | 10 | ----------------------------------------------------------------------------- 11 | -- | 12 | -- Module : Control.Distributed.Process.Supervisor 13 | -- Copyright : (c) Tim Watson 2012 - 2013 14 | -- License : BSD3 (see the file LICENSE) 15 | -- 16 | -- Maintainer : Tim Watson 17 | -- Stability : experimental 18 | -- Portability : non-portable (requires concurrency) 19 | -- 20 | -- This module implements a process which supervises a set of other 21 | -- processes, referred to as its children. These /child processes/ can be 22 | -- either workers (i.e., processes that do something useful in your application) 23 | -- or other supervisors. In this way, supervisors may be used to build a 24 | -- hierarchical process structure called a supervision tree, which provides 25 | -- a convenient structure for building fault tolerant software. 26 | -- 27 | -- Unless otherwise stated, all client functions in this module will cause the 28 | -- calling process to exit unless the specified supervisor process can be resolved. 29 | -- 30 | -- [Supervision Principles] 31 | -- 32 | -- A supervisor is responsible for starting, stopping and monitoring its child 33 | -- processes so as to keep them alive by restarting them when necessary. 34 | -- 35 | -- The supervisor's children are defined as a list of child specifications 36 | -- (see "ChildSpec"). When a supervisor is started, its children are started 37 | -- in left-to-right (insertion order) according to this list. When a supervisor 38 | -- stops (or exits for any reason), it will stop all its children before exiting. 39 | -- Child specs can be added to the supervisor after it has started, either on 40 | -- the left or right of the existing list of child specs. 41 | -- 42 | -- [Restart Strategies] 43 | -- 44 | -- Supervisors are initialised with a 'RestartStrategy', which describes how 45 | -- the supervisor should respond to a child that exits and should be restarted 46 | -- (see below for the rules governing child restart eligibility). Each restart 47 | -- strategy comprises a 'RestartMode' and 'RestartLimit', which govern how 48 | -- the restart should be handled, and the point at which the supervisor 49 | -- should give up and stop itself respectively. 50 | -- 51 | -- With the exception of the @RestartOne@ strategy, which indicates that the 52 | -- supervisor will restart /only/ the one individual failing child, each 53 | -- strategy describes a way to select the set of children that should be 54 | -- restarted if /any/ child fails. The @RestartAll@ strategy, as its name 55 | -- suggests, selects /all/ children, whilst the @RestartLeft@ and @RestartRight@ 56 | -- strategies select /all/ children to the left or right of the failed child, 57 | -- in insertion (i.e., startup) order. 58 | -- 59 | -- Note that a /branch/ restart will only occur if the child that exited is 60 | -- meant to be restarted. Since @Temporary@ children are never restarted and 61 | -- @Transient@ children are /not/ restarted if they exit normally, in both these 62 | -- circumstances we leave the remaining supervised children alone. Otherwise, 63 | -- the failing child is /always/ included in the /branch/ to be restarted. 64 | -- 65 | -- For a hypothetical set of children @a@ through @d@, the following pseudocode 66 | -- demonstrates how the restart strategies work. 67 | -- 68 | -- > let children = [a..d] 69 | -- > let failure = c 70 | -- > restartsFor RestartOne children failure = [c] 71 | -- > restartsFor RestartAll children failure = [a,b,c,d] 72 | -- > restartsFor RestartLeft children failure = [a,b,c] 73 | -- > restartsFor RestartRight children failure = [c,d] 74 | -- 75 | -- [Branch Restarts] 76 | -- 77 | -- We refer to a restart (strategy) that involves a set of children as a 78 | -- /branch restart/ from now on. The behaviour of branch restarts can be further 79 | -- refined by the 'RestartMode' with which a 'RestartStrategy' is parameterised. 80 | -- The @RestartEach@ mode treats each child sequentially, first stopping the 81 | -- respective child process and then restarting it. Each child is stopped and 82 | -- started fully before moving on to the next, as the following imaginary 83 | -- example demonstrates for children @[a,b,c]@: 84 | -- 85 | -- > stop a 86 | -- > start a 87 | -- > stop b 88 | -- > start b 89 | -- > stop c 90 | -- > start c 91 | -- 92 | -- By contrast, @RestartInOrder@ will first run through the selected list of 93 | -- children, stopping them. Then, once all the children have been stopped, it 94 | -- will make a second pass, to handle (re)starting them. No child is started 95 | -- until all children have been stopped, as the following imaginary example 96 | -- demonstrates: 97 | -- 98 | -- > stop a 99 | -- > stop b 100 | -- > stop c 101 | -- > start a 102 | -- > start b 103 | -- > start c 104 | -- 105 | -- Both the previous examples have shown children being stopped and started 106 | -- from left to right, but that is up to the user. The 'RestartMode' data 107 | -- type's constructors take a 'RestartOrder', which determines whether the 108 | -- selected children will be processed from @LeftToRight@ or @RightToLeft@. 109 | -- 110 | -- Sometimes it is desireable to stop children in one order and start them 111 | -- in the opposite. This is typically the case when children are in some 112 | -- way dependent on one another, such that restarting them in the wrong order 113 | -- might cause the system to misbehave. For this scenarios, there is another 114 | -- 'RestartMode' that will shut children down in the given order, but then 115 | -- restarts them in the reverse. Using @RestartRevOrder@ mode, if we have 116 | -- children @[a,b,c]@ such that @b@ depends on @a@ and @c@ on @b@, we can stop 117 | -- them in the reverse of their startup order, but restart them the other way 118 | -- around like so: 119 | -- 120 | -- > RestartRevOrder RightToLeft 121 | -- 122 | -- The effect will be thus: 123 | -- 124 | -- > stop c 125 | -- > stop b 126 | -- > stop a 127 | -- > start a 128 | -- > start b 129 | -- > start c 130 | -- 131 | -- [Restart Intensity Limits] 132 | -- 133 | -- If a child process repeatedly crashes during (or shortly after) starting, 134 | -- it is possible for the supervisor to get stuck in an endless loop of 135 | -- restarts. In order prevent this, each restart strategy is parameterised 136 | -- with a 'RestartLimit' that caps the number of restarts allowed within a 137 | -- specific time period. If the supervisor exceeds this limit, it will stop, 138 | -- stopping all its children (in left-to-right order) and exit with the 139 | -- reason @ExitOther "ReachedMaxRestartIntensity"@. 140 | -- 141 | -- The 'MaxRestarts' type is a positive integer, and together with a specified 142 | -- @TimeInterval@ forms the 'RestartLimit' to which the supervisor will adhere. 143 | -- Since a great many children can be restarted in close succession when 144 | -- a /branch restart/ occurs (as a result of @RestartAll@, @RestartLeft@ or 145 | -- @RestartRight@ being triggered), the supervisor will track the operation 146 | -- as a single restart attempt, since otherwise it would likely exceed its 147 | -- maximum restart intensity too quickly. 148 | -- 149 | -- [Child Restart and Stop Policies] 150 | -- 151 | -- When the supervisor detects that a child has died, the 'RestartPolicy' 152 | -- configured in the child specification is used to determin what to do. If 153 | -- the this is set to @Permanent@, then the child is always restarted. 154 | -- If it is @Temporary@, then the child is never restarted and the child 155 | -- specification is removed from the supervisor. A @Transient@ child will 156 | -- be restarted only if it exits /abnormally/, otherwise it is left 157 | -- inactive (but its specification is left in place). Finally, an @Intrinsic@ 158 | -- child is treated like a @Transient@ one, except that if /this/ kind of child 159 | -- exits /normally/, then the supervisor will also exit normally. 160 | -- 161 | -- When the supervisor does stop a child process, the "ChildStopPolicy" 162 | -- provided with the 'ChildSpec' determines how the supervisor should go 163 | -- about doing so. If this is "StopImmediately", then the child will 164 | -- be killed without further notice, which means the child will /not/ have 165 | -- an opportunity to clean up any internal state and/or release any held 166 | -- resources. If the policy is @StopTimeout delay@ however, the child 167 | -- will be sent an /exit signal/ instead, i.e., the supervisor will cause 168 | -- the child to exit via @exit childPid ExitShutdown@, and then will wait 169 | -- until the given @delay@ for the child to exit normally. If this does not 170 | -- happen within the given delay, the supervisor will revert to the more 171 | -- aggressive "StopImmediately" policy and try again. Any errors that 172 | -- occur during a timed-out shutdown will be logged, however exit reasons 173 | -- resulting from "StopImmediately" are ignored. 174 | -- 175 | -- [Creating Child Specs] 176 | -- 177 | -- The 'ToChildStart' typeclass simplifies the process of defining a 'ChildStart' 178 | -- providing two default instances from which a 'ChildStart' datum can be 179 | -- generated. The first, takes a @Closure (Process ())@, where the enclosed 180 | -- action (in the @Process@ monad) is the actual (long running) code that we 181 | -- wish to supervise. In the case of a /managed process/, this is usually the 182 | -- server loop, constructed by evaluating some variant of @ManagedProcess.serve@. 183 | -- 184 | -- The second instance supports returning a /handle/ which can contain extra 185 | -- data about the child process - usually this is a newtype wrapper used by 186 | -- clients to communicate with the process. 187 | -- 188 | -- When the supervisor spawns its child processes, they should be linked to their 189 | -- parent (i.e., the supervisor), such that even if the supervisor is killed 190 | -- abruptly by an asynchronous exception, the children will still be taken down 191 | -- with it, though somewhat less ceremoniously in that case. This behaviour is 192 | -- injected by the supervisor for any "ChildStart" built on @Closure (Process ())@ 193 | -- automatically, but the /handle/ based approach requires that the @Closure@ 194 | -- responsible for spawning does the linking itself. 195 | -- 196 | -- Finally, we provide a simple shortcut to @staticClosure@, for consumers 197 | -- who've manually registered with the /remote table/ and don't with to use 198 | -- tempate haskell (e.g. users of the Explicit closures API). 199 | -- 200 | -- [Supervision Trees & Supervisor Shutdown] 201 | -- 202 | -- To create a supervision tree, one simply adds supervisors below one another 203 | -- as children, setting the @childType@ field of their 'ChildSpec' to 204 | -- @Supervisor@ instead of @Worker@. Supervision tree can be arbitrarilly 205 | -- deep, and it is for this reason that we recommend giving a @Supervisor@ child 206 | -- an arbitrary length of time to stop, by setting the delay to @Infinity@ 207 | -- or a very large @TimeInterval@. 208 | -- 209 | ----------------------------------------------------------------------------- 210 | 211 | module Control.Distributed.Process.Supervisor 212 | ( -- * Defining and Running a Supervisor 213 | ChildSpec(..) 214 | , ChildKey 215 | , ChildType(..) 216 | , ChildStopPolicy(..) 217 | , ChildStart(..) 218 | , RegisteredName(LocalName, CustomRegister) 219 | , RestartPolicy(..) 220 | -- , ChildRestart(..) 221 | , ChildRef(..) 222 | , isRunning 223 | , isRestarting 224 | , Child 225 | , StaticLabel 226 | , SupervisorPid 227 | , ChildPid 228 | , ToChildStart(..) 229 | , start 230 | , run 231 | -- * Limits and Defaults 232 | , MaxRestarts 233 | , maxRestarts 234 | , RestartLimit(..) 235 | , limit 236 | , defaultLimits 237 | , RestartMode(..) 238 | , RestartOrder(..) 239 | , RestartStrategy(..) 240 | , ShutdownMode(..) 241 | , restartOne 242 | , restartAll 243 | , restartLeft 244 | , restartRight 245 | -- * Adding and Removing Children 246 | , addChild 247 | , AddChildResult(..) 248 | , StartChildResult(..) 249 | , startChild 250 | , startNewChild 251 | , stopChild 252 | , StopChildResult(..) 253 | , deleteChild 254 | , DeleteChildResult(..) 255 | , restartChild 256 | , RestartChildResult(..) 257 | -- * Normative Shutdown 258 | , shutdown 259 | , shutdownAndWait 260 | -- * Queries and Statistics 261 | , lookupChild 262 | , listChildren 263 | , SupervisorStats(..) 264 | , statistics 265 | , getRestartIntensity 266 | , definedChildren 267 | , definedWorkers 268 | , definedSupervisors 269 | , runningChildren 270 | , runningWorkers 271 | , runningSupervisors 272 | -- * Additional (Misc) Types 273 | , StartFailure(..) 274 | , ChildInitFailure(..) 275 | ) where 276 | 277 | import Control.DeepSeq (NFData) 278 | 279 | import Control.Distributed.Process.Supervisor.Types 280 | import Control.Distributed.Process 281 | ( Process 282 | , ProcessId 283 | , MonitorRef 284 | , DiedReason(..) 285 | , Match 286 | , Handler(..) 287 | , Message 288 | , ProcessMonitorNotification(..) 289 | , Closure 290 | , Static 291 | , exit 292 | , kill 293 | , match 294 | , matchIf 295 | , monitor 296 | , getSelfPid 297 | , liftIO 298 | , catchExit 299 | , catchesExit 300 | , catches 301 | , die 302 | , link 303 | , send 304 | , register 305 | , spawnLocal 306 | , unsafeWrapMessage 307 | , unmonitor 308 | , withMonitor_ 309 | , expect 310 | , unClosure 311 | , receiveWait 312 | , receiveTimeout 313 | , handleMessageIf 314 | ) 315 | import Control.Distributed.Process.Management (mxNotify, MxEvent(MxUser)) 316 | import Control.Distributed.Process.Extras.Internal.Primitives hiding (monitor) 317 | import Control.Distributed.Process.Extras.Internal.Types 318 | ( ExitReason(..) 319 | ) 320 | import Control.Distributed.Process.ManagedProcess 321 | ( handleCall 322 | , handleInfo 323 | , reply 324 | , continue 325 | , stop 326 | , stopWith 327 | , input 328 | , defaultProcess 329 | , prioritised 330 | , InitHandler 331 | , InitResult(..) 332 | , ProcessAction 333 | , ProcessReply 334 | , ProcessDefinition(..) 335 | , PrioritisedProcessDefinition(..) 336 | , Priority() 337 | , DispatchPriority 338 | , UnhandledMessagePolicy(Drop) 339 | , ExitState 340 | , exitState 341 | ) 342 | import qualified Control.Distributed.Process.ManagedProcess.UnsafeClient as Unsafe 343 | ( call 344 | , cast 345 | ) 346 | import qualified Control.Distributed.Process.ManagedProcess as MP 347 | ( pserve 348 | ) 349 | import Control.Distributed.Process.ManagedProcess.Server.Priority 350 | ( prioritiseCast_ 351 | , prioritiseCall_ 352 | , prioritiseInfo_ 353 | , setPriority 354 | , evalAfter 355 | ) 356 | import Control.Distributed.Process.ManagedProcess.Server.Restricted 357 | ( RestrictedProcess 358 | , Result 359 | , RestrictedAction 360 | , getState 361 | , putState 362 | ) 363 | import qualified Control.Distributed.Process.ManagedProcess.Server.Restricted as Restricted 364 | ( handleCallIf 365 | , handleCall 366 | , handleCast 367 | , reply 368 | , continue 369 | ) 370 | import Control.Distributed.Process.Extras.SystemLog 371 | ( LogClient 372 | , LogChan 373 | , LogText 374 | , Logger(..) 375 | ) 376 | import qualified Control.Distributed.Process.Extras.SystemLog as Log 377 | import Control.Distributed.Process.Extras.Time 378 | import Control.Distributed.Static 379 | ( staticClosure 380 | ) 381 | import Control.Exception (SomeException, throwIO) 382 | import Control.Monad.Catch (catch, finally, mask) 383 | import Control.Monad (void, forM) 384 | 385 | import Data.Accessor 386 | ( Accessor 387 | , accessor 388 | , (^:) 389 | , (.>) 390 | , (^=) 391 | , (^.) 392 | ) 393 | import Data.Binary (Binary) 394 | import Data.Foldable (find, foldlM, toList) 395 | import Data.List (foldl') 396 | import qualified Data.List as List (lookup) 397 | import Data.Map.Strict (Map) 398 | import qualified Data.Map.Strict as Map 399 | import Data.Sequence 400 | ( Seq 401 | , ViewL(EmptyL, (:<)) 402 | , ViewR(EmptyR, (:>)) 403 | , (<|) 404 | , (|>) 405 | , (><) 406 | , filter) 407 | import qualified Data.Sequence as Seq 408 | import Data.Time.Clock 409 | ( NominalDiffTime 410 | , UTCTime 411 | , getCurrentTime 412 | , diffUTCTime 413 | ) 414 | import Data.Typeable (Typeable) 415 | 416 | #if ! MIN_VERSION_base(4,6,0) 417 | import Prelude hiding (catch, filter, init, rem) 418 | #else 419 | import Prelude hiding (filter, init, rem) 420 | #endif 421 | 422 | import GHC.Generics 423 | 424 | -------------------------------------------------------------------------------- 425 | -- Types -- 426 | -------------------------------------------------------------------------------- 427 | 428 | -- TODO: ToChildStart belongs with rest of types in 429 | -- Control.Distributed.Process.Supervisor.Types 430 | 431 | -- | A type that can be converted to a 'ChildStart'. 432 | class ToChildStart a where 433 | toChildStart :: a -> Process ChildStart 434 | 435 | instance ToChildStart (Closure (Process ())) where 436 | toChildStart = return . RunClosure 437 | 438 | instance ToChildStart (Closure (SupervisorPid -> Process (ChildPid, Message))) where 439 | toChildStart = return . CreateHandle 440 | 441 | instance ToChildStart (Static (Process ())) where 442 | toChildStart = toChildStart . staticClosure 443 | 444 | -- internal APIs. The corresponding XxxResult types are in 445 | -- Control.Distributed.Process.Supervisor.Types 446 | 447 | data DeleteChild = DeleteChild !ChildKey 448 | deriving (Typeable, Generic) 449 | instance Binary DeleteChild where 450 | instance NFData DeleteChild where 451 | 452 | data FindReq = FindReq ChildKey 453 | deriving (Typeable, Generic) 454 | instance Binary FindReq where 455 | instance NFData FindReq where 456 | 457 | data StatsReq = StatsReq 458 | deriving (Typeable, Generic) 459 | instance Binary StatsReq where 460 | instance NFData StatsReq where 461 | 462 | data ListReq = ListReq 463 | deriving (Typeable, Generic) 464 | instance Binary ListReq where 465 | instance NFData ListReq where 466 | 467 | type ImmediateStart = Bool 468 | 469 | data AddChildReq = AddChild !ImmediateStart !ChildSpec 470 | deriving (Typeable, Generic, Show) 471 | instance Binary AddChildReq where 472 | instance NFData AddChildReq where 473 | 474 | data AddChildRes = Exists ChildRef | Added State 475 | 476 | data StartChildReq = StartChild !ChildKey 477 | deriving (Typeable, Generic) 478 | instance Binary StartChildReq where 479 | instance NFData StartChildReq where 480 | 481 | data RestartChildReq = RestartChildReq !ChildKey 482 | deriving (Typeable, Generic, Show, Eq) 483 | instance Binary RestartChildReq where 484 | instance NFData RestartChildReq where 485 | 486 | data DelayedRestart = DelayedRestart !ChildKey !DiedReason 487 | deriving (Typeable, Generic, Show, Eq) 488 | instance Binary DelayedRestart where 489 | instance NFData DelayedRestart 490 | 491 | data StopChildReq = StopChildReq !ChildKey 492 | deriving (Typeable, Generic, Show, Eq) 493 | instance Binary StopChildReq where 494 | instance NFData StopChildReq where 495 | 496 | data IgnoreChildReq = IgnoreChildReq !ChildPid 497 | deriving (Typeable, Generic) 498 | instance Binary IgnoreChildReq where 499 | instance NFData IgnoreChildReq where 500 | 501 | type ChildSpecs = Seq Child 502 | type Prefix = ChildSpecs 503 | type Suffix = ChildSpecs 504 | 505 | data StatsType = Active | Specified 506 | 507 | data LogSink = LogProcess !LogClient | LogChan 508 | 509 | instance Logger LogSink where 510 | logMessage LogChan = logMessage Log.logChannel 511 | logMessage (LogProcess client') = logMessage client' 512 | 513 | data State = State { 514 | _specs :: ChildSpecs 515 | , _active :: Map ChildPid ChildKey 516 | , _strategy :: RestartStrategy 517 | , _restartPeriod :: NominalDiffTime 518 | , _restarts :: [UTCTime] 519 | , _stats :: SupervisorStats 520 | , _logger :: LogSink 521 | , shutdownStrategy :: ShutdownMode 522 | } 523 | 524 | supErrId :: String -> String 525 | supErrId s = "Control.Distributed.Process" ++ s 526 | 527 | -------------------------------------------------------------------------------- 528 | -- Starting/Running Supervisor -- 529 | -------------------------------------------------------------------------------- 530 | 531 | -- | Start a supervisor (process), running the supplied children and restart 532 | -- strategy. 533 | -- 534 | -- > start = spawnLocal . run 535 | -- 536 | start :: RestartStrategy -> ShutdownMode -> [ChildSpec] -> Process SupervisorPid 537 | start rs ss cs = spawnLocal $ run rs ss cs 538 | 539 | -- | Run the supplied children using the provided restart strategy. 540 | -- 541 | run :: RestartStrategy -> ShutdownMode -> [ChildSpec] -> Process () 542 | run rs ss specs' = MP.pserve (rs, ss, specs') supInit serverDefinition 543 | 544 | -------------------------------------------------------------------------------- 545 | -- Client Facing API -- 546 | -------------------------------------------------------------------------------- 547 | 548 | -- | Obtain statistics about a running supervisor. 549 | -- 550 | statistics :: Addressable a => a -> Process (SupervisorStats) 551 | statistics = (flip Unsafe.call) StatsReq 552 | 553 | -- | Lookup a possibly supervised child, given its 'ChildKey'. 554 | -- 555 | lookupChild :: Addressable a => a -> ChildKey -> Process (Maybe (ChildRef, ChildSpec)) 556 | lookupChild addr key = Unsafe.call addr $ FindReq key 557 | 558 | -- | List all know (i.e., configured) children. 559 | -- 560 | listChildren :: Addressable a => a -> Process [Child] 561 | listChildren addr = Unsafe.call addr ListReq 562 | 563 | -- | Add a new child. 564 | -- 565 | addChild :: Addressable a => a -> ChildSpec -> Process AddChildResult 566 | addChild addr spec = Unsafe.call addr $ AddChild False spec 567 | 568 | -- | Start an existing (configured) child. The 'ChildSpec' must already be 569 | -- present (see 'addChild'), otherwise the operation will fail. 570 | -- 571 | startChild :: Addressable a => a -> ChildKey -> Process StartChildResult 572 | startChild addr key = Unsafe.call addr $ StartChild key 573 | 574 | -- | Atomically add and start a new child spec. Will fail if a child with 575 | -- the given key is already present. 576 | -- 577 | startNewChild :: Addressable a 578 | => a 579 | -> ChildSpec 580 | -> Process AddChildResult 581 | startNewChild addr spec = Unsafe.call addr $ AddChild True spec 582 | 583 | -- | Delete a supervised child. The child must already be stopped (see 584 | -- 'stopChild'). 585 | -- 586 | deleteChild :: Addressable a => a -> ChildKey -> Process DeleteChildResult 587 | deleteChild sid childKey = Unsafe.call sid $ DeleteChild childKey 588 | 589 | -- | Stop a running child. 590 | -- 591 | stopChild :: Addressable a 592 | => a 593 | -> ChildKey 594 | -> Process StopChildResult 595 | stopChild sid = Unsafe.call sid . StopChildReq 596 | 597 | -- | Forcibly restart a running child. 598 | -- 599 | restartChild :: Addressable a 600 | => a 601 | -> ChildKey 602 | -> Process RestartChildResult 603 | restartChild sid = Unsafe.call sid . RestartChildReq 604 | 605 | -- | Gracefully stop/shutdown a running supervisor. Returns immediately if the 606 | -- /address/ cannot be resolved. 607 | -- 608 | shutdown :: Resolvable a => a -> Process () 609 | shutdown sid = do 610 | mPid <- resolve sid 611 | case mPid of 612 | Nothing -> return () 613 | Just p -> exit p ExitShutdown 614 | 615 | -- | As 'shutdown', but waits until the supervisor process has exited, at which 616 | -- point the caller can be sure that all children have also stopped. Returns 617 | -- immediately if the /address/ cannot be resolved. 618 | -- 619 | shutdownAndWait :: Resolvable a => a -> Process () 620 | shutdownAndWait sid = do 621 | mPid <- resolve sid 622 | case mPid of 623 | Nothing -> return () 624 | Just p -> withMonitor_ p $ do 625 | shutdown p 626 | receiveWait [ matchIf (\(ProcessMonitorNotification _ p' _) -> p' == p) 627 | (\_ -> return ()) 628 | ] 629 | 630 | -------------------------------------------------------------------------------- 631 | -- Server Initialisation/Startup -- 632 | -------------------------------------------------------------------------------- 633 | 634 | supInit :: InitHandler (RestartStrategy, ShutdownMode, [ChildSpec]) State 635 | supInit (strategy', shutdown', specs') = do 636 | logClient <- Log.client 637 | let client' = case logClient of 638 | Nothing -> LogChan 639 | Just c -> LogProcess c 640 | let initState = ( ( -- as a NominalDiffTime (in seconds) 641 | restartPeriod ^= configuredRestartPeriod 642 | ) 643 | . (strategy ^= strategy') 644 | . (logger ^= client') 645 | $ emptyState shutdown' 646 | ) 647 | -- TODO: should we return Ignore, as per OTP's supervisor, if no child starts? 648 | catch (foldlM initChild initState specs' >>= return . (flip InitOk) Infinity) 649 | (\(e :: SomeException) -> do 650 | sup <- getSelfPid 651 | logEntry Log.error $ 652 | mkReport "Could not init supervisor " sup "noproc" (show e) 653 | return $ InitStop (show e)) 654 | where 655 | initChild :: State -> ChildSpec -> Process State 656 | initChild st ch = 657 | case (findChild (childKey ch) st) of 658 | Just (ref, _) -> die $ StartFailureDuplicateChild ref 659 | Nothing -> tryStartChild ch >>= initialised st ch 660 | 661 | configuredRestartPeriod = 662 | let maxT' = maxT (intensity strategy') 663 | tI = asTimeout maxT' 664 | tMs = (fromIntegral tI * (0.000001 :: Float)) 665 | in fromRational (toRational tMs) :: NominalDiffTime 666 | 667 | initialised :: State 668 | -> ChildSpec 669 | -> Either StartFailure ChildRef 670 | -> Process State 671 | initialised _ _ (Left err) = liftIO $ throwIO $ ChildInitFailure (show err) 672 | initialised state spec (Right ref) = do 673 | mPid <- resolve ref 674 | case mPid of 675 | Nothing -> die $ (supErrId ".initChild:child=") ++ (childKey spec) ++ ":InvalidChildRef" 676 | Just childPid -> do 677 | return $ ( (active ^: Map.insert childPid chId) 678 | . (specs ^: (|> (ref, spec))) 679 | $ bumpStats Active chType (+1) state 680 | ) 681 | where chId = childKey spec 682 | chType = childType spec 683 | 684 | -------------------------------------------------------------------------------- 685 | -- Server Definition/State -- 686 | -------------------------------------------------------------------------------- 687 | 688 | emptyState :: ShutdownMode -> State 689 | emptyState strat = State { 690 | _specs = Seq.empty 691 | , _active = Map.empty 692 | , _strategy = restartAll 693 | , _restartPeriod = (fromIntegral (0 :: Integer)) :: NominalDiffTime 694 | , _restarts = [] 695 | , _stats = emptyStats 696 | , _logger = LogChan 697 | , shutdownStrategy = strat 698 | } 699 | 700 | emptyStats :: SupervisorStats 701 | emptyStats = SupervisorStats { 702 | _children = 0 703 | , _workers = 0 704 | , _supervisors = 0 705 | , _running = 0 706 | , _activeSupervisors = 0 707 | , _activeWorkers = 0 708 | , totalRestarts = 0 709 | -- , avgRestartFrequency = 0 710 | } 711 | 712 | serverDefinition :: PrioritisedProcessDefinition State 713 | serverDefinition = prioritised processDefinition supPriorities 714 | where 715 | supPriorities :: [DispatchPriority State] 716 | supPriorities = [ 717 | prioritiseCast_ (\(IgnoreChildReq _) -> setPriority 100) 718 | , prioritiseInfo_ (\(ProcessMonitorNotification _ _ _) -> setPriority 99 ) 719 | , prioritiseInfo_ (\(DelayedRestart _ _) -> setPriority 80 ) 720 | , prioritiseCall_ (\(_ :: FindReq) -> 721 | (setPriority 10) :: Priority (Maybe (ChildRef, ChildSpec))) 722 | ] 723 | 724 | processDefinition :: ProcessDefinition State 725 | processDefinition = 726 | defaultProcess { 727 | apiHandlers = [ 728 | Restricted.handleCast handleIgnore 729 | -- adding, removing and (optionally) starting new child specs 730 | , handleCall handleStopChild 731 | , Restricted.handleCall handleDeleteChild 732 | , Restricted.handleCallIf (input (\(AddChild immediate _) -> not immediate)) 733 | handleAddChild 734 | , handleCall handleStartNewChild 735 | , handleCall handleStartChild 736 | , handleCall handleRestartChild 737 | -- stats/info 738 | , Restricted.handleCall handleLookupChild 739 | , Restricted.handleCall handleListChildren 740 | , Restricted.handleCall handleGetStats 741 | ] 742 | , infoHandlers = [ handleInfo handleMonitorSignal 743 | , handleInfo handleDelayedRestart 744 | ] 745 | , shutdownHandler = handleShutdown 746 | , unhandledMessagePolicy = Drop 747 | } :: ProcessDefinition State 748 | 749 | -------------------------------------------------------------------------------- 750 | -- API Handlers -- 751 | -------------------------------------------------------------------------------- 752 | 753 | handleLookupChild :: FindReq 754 | -> RestrictedProcess State (Result (Maybe (ChildRef, ChildSpec))) 755 | handleLookupChild (FindReq key) = getState >>= Restricted.reply . findChild key 756 | 757 | handleListChildren :: ListReq 758 | -> RestrictedProcess State (Result [Child]) 759 | handleListChildren _ = getState >>= Restricted.reply . toList . (^. specs) 760 | 761 | handleAddChild :: AddChildReq 762 | -> RestrictedProcess State (Result AddChildResult) 763 | handleAddChild req = getState >>= return . doAddChild req True >>= doReply 764 | where doReply :: AddChildRes -> RestrictedProcess State (Result AddChildResult) 765 | doReply (Added s) = putState s >> Restricted.reply (ChildAdded ChildStopped) 766 | doReply (Exists e) = Restricted.reply (ChildFailedToStart $ StartFailureDuplicateChild e) 767 | 768 | handleIgnore :: IgnoreChildReq 769 | -> RestrictedProcess State RestrictedAction 770 | handleIgnore (IgnoreChildReq childPid) = do 771 | {- not only must we take this child out of the `active' field, 772 | we also delete the child spec if it's restart type is Temporary, 773 | since restarting Temporary children is dis-allowed -} 774 | state <- getState 775 | let (cId, active') = 776 | Map.updateLookupWithKey (\_ _ -> Nothing) childPid $ state ^. active 777 | case cId of 778 | Nothing -> Restricted.continue 779 | Just c -> do 780 | putState $ ( (active ^= active') 781 | . (resetChildIgnored c) 782 | $ state 783 | ) 784 | Restricted.continue 785 | where 786 | resetChildIgnored :: ChildKey -> State -> State 787 | resetChildIgnored key state = 788 | maybe state id $ updateChild key (setChildStopped True) state 789 | 790 | handleDeleteChild :: DeleteChild 791 | -> RestrictedProcess State (Result DeleteChildResult) 792 | handleDeleteChild (DeleteChild k) = getState >>= handleDelete k 793 | where 794 | handleDelete :: ChildKey 795 | -> State 796 | -> RestrictedProcess State (Result DeleteChildResult) 797 | handleDelete key state = 798 | let (prefix, suffix) = Seq.breakl ((== key) . childKey . snd) $ state ^. specs 799 | in case (Seq.viewl suffix) of 800 | EmptyL -> Restricted.reply ChildNotFound 801 | child :< remaining -> tryDeleteChild child prefix remaining state 802 | 803 | tryDeleteChild (ref, spec) pfx sfx st 804 | | ref == ChildStopped = do 805 | putState $ ( (specs ^= pfx >< sfx) 806 | $ bumpStats Specified (childType spec) decrement st 807 | ) 808 | Restricted.reply ChildDeleted 809 | | otherwise = Restricted.reply $ ChildNotStopped ref 810 | 811 | handleStartChild :: State 812 | -> StartChildReq 813 | -> Process (ProcessReply StartChildResult State) 814 | handleStartChild state (StartChild key) = 815 | let child = findChild key state in 816 | case child of 817 | Nothing -> 818 | reply ChildStartUnknownId state 819 | Just (ref@(ChildRunning _), _) -> 820 | reply (ChildStartFailed (StartFailureAlreadyRunning ref)) state 821 | Just (ref@(ChildRunningExtra _ _), _) -> 822 | reply (ChildStartFailed (StartFailureAlreadyRunning ref)) state 823 | Just (ref@(ChildRestarting _), _) -> 824 | reply (ChildStartFailed (StartFailureAlreadyRunning ref)) state 825 | Just (_, spec) -> do 826 | started <- doStartChild spec state 827 | case started of 828 | Left err -> reply (ChildStartFailed err) state 829 | Right (ref, st') -> reply (ChildStartOk ref) st' 830 | 831 | handleStartNewChild :: State 832 | -> AddChildReq 833 | -> Process (ProcessReply AddChildResult State) 834 | handleStartNewChild state req@(AddChild _ spec) = 835 | let added = doAddChild req False state in 836 | case added of 837 | Exists e -> reply (ChildFailedToStart $ StartFailureDuplicateChild e) state 838 | Added _ -> attemptStart state spec 839 | where 840 | attemptStart st ch = do 841 | started <- tryStartChild ch 842 | case started of 843 | Left err -> reply (ChildFailedToStart err) $ removeChild spec st -- TODO: document this! 844 | Right ref -> do 845 | let st' = ( (specs ^: (|> (ref, spec))) 846 | $ bumpStats Specified (childType spec) (+1) st 847 | ) 848 | in reply (ChildAdded ref) $ markActive st' ref ch 849 | 850 | handleRestartChild :: State 851 | -> RestartChildReq 852 | -> Process (ProcessReply RestartChildResult State) 853 | handleRestartChild state (RestartChildReq key) = 854 | let child = findChild key state in 855 | case child of 856 | Nothing -> 857 | reply ChildRestartUnknownId state 858 | Just (ref@(ChildRunning _), _) -> 859 | reply (ChildRestartFailed (StartFailureAlreadyRunning ref)) state 860 | Just (ref@(ChildRunningExtra _ _), _) -> 861 | reply (ChildRestartFailed (StartFailureAlreadyRunning ref)) state 862 | Just (ref@(ChildRestarting _), _) -> 863 | reply (ChildRestartFailed (StartFailureAlreadyRunning ref)) state 864 | Just (_, spec) -> do 865 | started <- doStartChild spec state 866 | case started of 867 | Left err -> reply (ChildRestartFailed err) state 868 | Right (ref, st') -> reply (ChildRestartOk ref) st' 869 | 870 | handleDelayedRestart :: State 871 | -> DelayedRestart 872 | -> Process (ProcessAction State) 873 | handleDelayedRestart state (DelayedRestart key reason) = 874 | let child = findChild key state in do 875 | case child of 876 | Nothing -> 877 | continue state -- a child could've been stopped and removed by now 878 | Just ((ChildRestarting childPid), spec) -> do 879 | -- TODO: we ignore the unnecessary .active re-assignments in 880 | -- tryRestartChild, in order to keep the code simple - it would be good to 881 | -- clean this up so we don't have to though... 882 | tryRestartChild childPid state (state ^. active) spec reason 883 | Just other -> do 884 | die $ ExitOther $ (supErrId ".handleDelayedRestart:InvalidState: ") ++ (show other) 885 | 886 | handleStopChild :: State 887 | -> StopChildReq 888 | -> Process (ProcessReply StopChildResult State) 889 | handleStopChild state (StopChildReq key) = 890 | let child = findChild key state in 891 | case child of 892 | Nothing -> 893 | reply StopChildUnknownId state 894 | Just (ChildStopped, _) -> 895 | reply StopChildOk state 896 | Just (ref, spec) -> 897 | reply StopChildOk =<< doStopChild ref spec state 898 | 899 | handleGetStats :: StatsReq 900 | -> RestrictedProcess State (Result SupervisorStats) 901 | handleGetStats _ = Restricted.reply . (^. stats) =<< getState 902 | 903 | -------------------------------------------------------------------------------- 904 | -- Child Monitoring -- 905 | -------------------------------------------------------------------------------- 906 | 907 | handleMonitorSignal :: State 908 | -> ProcessMonitorNotification 909 | -> Process (ProcessAction State) 910 | handleMonitorSignal state (ProcessMonitorNotification _ childPid reason) = do 911 | let (cId, active') = 912 | Map.updateLookupWithKey (\_ _ -> Nothing) childPid $ state ^. active 913 | let mSpec = 914 | case cId of 915 | Nothing -> Nothing 916 | Just c -> fmap snd $ findChild c state 917 | case mSpec of 918 | Nothing -> continue $ (active ^= active') state 919 | Just spec -> tryRestart childPid state active' spec reason 920 | 921 | -------------------------------------------------------------------------------- 922 | -- Child Monitoring -- 923 | -------------------------------------------------------------------------------- 924 | 925 | handleShutdown :: ExitState State -> ExitReason -> Process () 926 | handleShutdown state r@(ExitOther reason) = stopChildren (exitState state) r >> die reason 927 | handleShutdown state r = stopChildren (exitState state) r 928 | 929 | -------------------------------------------------------------------------------- 930 | -- Child Start/Restart Handling -- 931 | -------------------------------------------------------------------------------- 932 | 933 | tryRestart :: ChildPid 934 | -> State 935 | -> Map ChildPid ChildKey 936 | -> ChildSpec 937 | -> DiedReason 938 | -> Process (ProcessAction State) 939 | tryRestart childPid state active' spec reason = do 940 | sup <- getSelfPid 941 | logEntry Log.debug $ do 942 | mkReport "signalled restart" sup (childKey spec) (show reason) 943 | case state ^. strategy of 944 | RestartOne _ -> tryRestartChild childPid state active' spec reason 945 | strat -> do 946 | case (childRestart spec, isNormal reason) of 947 | (Intrinsic, True) -> stopWith newState ExitNormal 948 | (Transient, True) -> continue newState 949 | (Temporary, _) -> continue removeTemp 950 | _ -> tryRestartBranch strat spec reason $ newState 951 | where 952 | newState = (active ^= active') state 953 | 954 | removeTemp = removeChild spec $ newState 955 | 956 | isNormal (DiedException _) = False 957 | isNormal _ = True 958 | 959 | tryRestartBranch :: RestartStrategy 960 | -> ChildSpec 961 | -> DiedReason 962 | -> State 963 | -> Process (ProcessAction State) 964 | tryRestartBranch rs sp dr st = -- TODO: use DiedReason for logging... 965 | let mode' = mode rs 966 | tree' = case rs of 967 | RestartAll _ _ -> childSpecs 968 | RestartLeft _ _ -> subTreeL 969 | RestartRight _ _ -> subTreeR 970 | _ -> error "IllegalState" 971 | proc = case mode' of 972 | RestartEach _ -> stopStart (order mode') 973 | _ -> restartBranch mode' 974 | in do us <- getSelfPid 975 | a <- proc tree' 976 | report $ SupervisorBranchRestarted us (childKey sp) dr rs 977 | return a 978 | where 979 | stopStart :: RestartOrder -> ChildSpecs -> Process (ProcessAction State) 980 | stopStart order' tree = do 981 | let tree' = case order' of 982 | LeftToRight -> tree 983 | RightToLeft -> Seq.reverse tree 984 | state <- addRestart activeState 985 | case state of 986 | Nothing -> do us <- getSelfPid 987 | let reason = errorMaxIntensityReached 988 | report $ SupervisorShutdown us (shutdownStrategy st) reason 989 | die reason 990 | Just st' -> apply (foldlM stopStartIt st' tree') 991 | 992 | restartBranch :: RestartMode -> ChildSpecs -> Process (ProcessAction State) 993 | restartBranch mode' tree = do 994 | state <- addRestart activeState 995 | case state of 996 | Nothing -> die errorMaxIntensityReached 997 | Just st' -> do 998 | let (stopTree, startTree) = mkTrees mode' tree 999 | foldlM stopIt st' stopTree >>= \s -> apply $ foldlM startIt s startTree 1000 | 1001 | mkTrees :: RestartMode -> ChildSpecs -> (ChildSpecs, ChildSpecs) 1002 | mkTrees (RestartInOrder LeftToRight) t = (t, t) 1003 | mkTrees (RestartInOrder RightToLeft) t = let rev = Seq.reverse t in (rev, rev) 1004 | mkTrees (RestartRevOrder LeftToRight) t = (t, Seq.reverse t) 1005 | mkTrees (RestartRevOrder RightToLeft) t = (Seq.reverse t, t) 1006 | mkTrees _ _ = error "mkTrees.INVALID_STATE" 1007 | 1008 | stopStartIt :: State -> Child -> Process State 1009 | stopStartIt s ch@(cr, cs) = do 1010 | us <- getSelfPid 1011 | cPid <- resolve cr 1012 | report $ SupervisedChildRestarting us cPid (childKey cs) (ExitOther "RestartedBySupervisor") 1013 | doStopChild cr cs s >>= (flip startIt) ch 1014 | 1015 | stopIt :: State -> Child -> Process State 1016 | stopIt s (cr, cs) = do 1017 | us <- getSelfPid 1018 | cPid <- resolve cr 1019 | report $ SupervisedChildRestarting us cPid (childKey cs) (ExitOther "RestartedBySupervisor") 1020 | doStopChild cr cs s 1021 | 1022 | startIt :: State -> Child -> Process State 1023 | startIt s (_, cs) 1024 | | isTemporary (childRestart cs) = return $ removeChild cs s 1025 | | otherwise = ensureActive cs =<< doStartChild cs s 1026 | 1027 | -- Note that ensureActive will kill this (supervisor) process if 1028 | -- doStartChild fails, simply because the /only/ failure that can 1029 | -- come out of that function (as `Left err') is *bad closure* and 1030 | -- that should have either been picked up during init (i.e., caused 1031 | -- the super to refuse to start) or been removed during `startChild' 1032 | -- or later on. Any other kind of failure will crop up (once we've 1033 | -- finished the restart sequence) as a monitor signal. 1034 | ensureActive :: ChildSpec 1035 | -> Either StartFailure (ChildRef, State) 1036 | -> Process State 1037 | ensureActive cs it 1038 | | (Right (ref, st')) <- it = return $ markActive st' ref cs 1039 | | (Left err) <- it = die $ ExitOther $ branchErrId ++ (childKey cs) ++ ": " ++ (show err) 1040 | | otherwise = error "IllegalState" 1041 | 1042 | branchErrId :: String 1043 | branchErrId = supErrId ".tryRestartBranch:child=" 1044 | 1045 | apply :: (Process State) -> Process (ProcessAction State) 1046 | apply proc = do 1047 | catchExit (proc >>= continue) (\(_ :: ProcessId) -> stop) 1048 | 1049 | activeState = maybe st id $ updateChild (childKey sp) 1050 | (setChildStopped False) st 1051 | 1052 | subTreeL :: ChildSpecs 1053 | subTreeL = 1054 | let (prefix, suffix) = splitTree Seq.breakl 1055 | in case (Seq.viewl suffix) of 1056 | child :< _ -> prefix |> child 1057 | EmptyL -> prefix 1058 | 1059 | subTreeR :: ChildSpecs 1060 | subTreeR = 1061 | let (prefix, suffix) = splitTree Seq.breakr 1062 | in case (Seq.viewr suffix) of 1063 | _ :> child -> child <| prefix 1064 | EmptyR -> prefix 1065 | 1066 | splitTree splitWith = splitWith ((== childKey sp) . childKey . snd) childSpecs 1067 | 1068 | childSpecs :: ChildSpecs 1069 | childSpecs = 1070 | let cs = activeState ^. specs 1071 | ck = childKey sp 1072 | rs' = childRestart sp 1073 | in case (isTransient rs', isTemporary rs', dr) of 1074 | (True, _, DiedNormal) -> filter ((/= ck) . childKey . snd) cs 1075 | (_, True, _) -> filter ((/= ck) . childKey . snd) cs 1076 | _ -> cs 1077 | 1078 | {- restartParallel :: ChildSpecs 1079 | -> RestartOrder 1080 | -> Process (ProcessAction State) 1081 | restartParallel tree order = do 1082 | liftIO $ putStrLn "handling parallel restart" 1083 | let tree' = case order of 1084 | LeftToRight -> tree 1085 | RightToLeft -> Seq.reverse tree 1086 | 1087 | -- TODO: THIS IS INCORRECT... currently (below), we stop 1088 | -- the branch in parallel, but wait on all the exits and then 1089 | -- restart sequentially (based on 'order'). That's not what the 1090 | -- 'RestartParallel' mode advertised, but more importantly, it's 1091 | -- not clear what the semantics for error handling (viz restart errors) 1092 | -- should actually be. 1093 | 1094 | asyncs <- forM (toList tree') $ \ch -> async $ asyncStop ch 1095 | (_errs, st') <- foldlM collectExits ([], activeState) asyncs 1096 | -- TODO: report errs 1097 | apply $ foldlM startIt st' tree' 1098 | where 1099 | asyncStop :: Child -> Process (Maybe (ChildKey, ChildPid)) 1100 | asyncStop (cr, cs) = do 1101 | mPid <- resolve cr 1102 | case mPid of 1103 | Nothing -> return Nothing 1104 | Just childPid -> do 1105 | void $ doStopChild cr cs activeState 1106 | return $ Just (childKey cs, childPid) 1107 | 1108 | collectExits :: ([ExitReason], State) 1109 | -> Async (Maybe (ChildKey, ChildPid)) 1110 | -> Process ([ExitReason], State) 1111 | collectExits (errs, state) hAsync = do 1112 | -- we perform a blocking wait on each handle, since we'll 1113 | -- always wait until the last shutdown has occurred anyway 1114 | asyncResult <- wait hAsync 1115 | let res = mergeState asyncResult state 1116 | case res of 1117 | Left err -> return ((err:errs), state) 1118 | Right st -> return (errs, st) 1119 | 1120 | mergeState :: AsyncResult (Maybe (ChildKey, ChildPid)) 1121 | -> State 1122 | -> Either ExitReason State 1123 | mergeState (AsyncDone Nothing) state = Right state 1124 | mergeState (AsyncDone (Just (key, childPid))) state = Right $ mergeIt key childPid state 1125 | mergeState (AsyncFailed r) _ = Left $ ExitOther (show r) 1126 | mergeState (AsyncLinkFailed r) _ = Left $ ExitOther (show r) 1127 | mergeState _ _ = Left $ ExitOther "IllegalState" 1128 | 1129 | mergeIt :: ChildKey -> ChildPid -> State -> State 1130 | mergeIt key childPid state = 1131 | -- TODO: lookup the old ref -> childPid and delete from the active map 1132 | ( (active ^: Map.delete childPid) 1133 | $ maybe state id (updateChild key (setChildStopped False) state) 1134 | ) 1135 | -} 1136 | 1137 | tryRestartChild :: ChildPid 1138 | -> State 1139 | -> Map ChildPid ChildKey 1140 | -> ChildSpec 1141 | -> DiedReason 1142 | -> Process (ProcessAction State) 1143 | tryRestartChild childPid st active' spec reason 1144 | | DiedNormal <- reason 1145 | , True <- isTransient (childRestart spec) = continue childDown 1146 | | True <- isTemporary (childRestart spec) = continue childRemoved 1147 | | DiedNormal <- reason 1148 | , True <- isIntrinsic (childRestart spec) = stopWith updateStopped ExitNormal 1149 | | otherwise = doRestartChild childPid spec reason st 1150 | where 1151 | childDown = (active ^= active') $ updateStopped 1152 | childRemoved = (active ^= active') $ removeChild spec st 1153 | updateStopped = maybe st id $ updateChild chKey (setChildStopped False) st 1154 | chKey = childKey spec 1155 | 1156 | doRestartChild :: ChildPid -> ChildSpec -> DiedReason -> State -> Process (ProcessAction State) 1157 | doRestartChild pid spec reason state = do -- TODO: use ChildPid and DiedReason to log 1158 | state' <- addRestart state 1159 | case state' of 1160 | Nothing -> -- die errorMaxIntensityReached 1161 | case (childRestartDelay spec) of 1162 | Nothing -> die errorMaxIntensityReached 1163 | Just del -> doRestartDelay pid del spec reason state 1164 | Just st -> do 1165 | sup <- getSelfPid 1166 | report $ SupervisedChildRestarting sup (Just pid) (childKey spec) (ExitOther $ show reason) 1167 | start' <- doStartChild spec st 1168 | case start' of 1169 | Right (ref, st') -> continue $ markActive st' ref spec 1170 | Left err -> do 1171 | -- All child failures are handled via monitor signals, apart from 1172 | -- BadClosure and UnresolvableAddress from the StarterProcess 1173 | -- variants of ChildStart, which both come back from 1174 | -- doStartChild as (Left err). 1175 | if isTemporary (childRestart spec) 1176 | then do 1177 | logEntry Log.warning $ 1178 | mkReport "Error in temporary child" sup (childKey spec) (show err) 1179 | continue $ ( (active ^: Map.filter (/= chKey)) 1180 | . (bumpStats Active chType decrement) 1181 | . (bumpStats Specified chType decrement) 1182 | $ removeChild spec st) 1183 | else do 1184 | logEntry Log.error $ 1185 | mkReport "Unrecoverable error in child. Stopping supervisor" 1186 | sup (childKey spec) (show err) 1187 | stopWith st $ ExitOther $ "Unrecoverable error in child " ++ (childKey spec) 1188 | where 1189 | chKey = childKey spec 1190 | chType = childType spec 1191 | 1192 | 1193 | doRestartDelay :: ChildPid 1194 | -> TimeInterval 1195 | -> ChildSpec 1196 | -> DiedReason 1197 | -> State 1198 | -> Process (ProcessAction State) 1199 | doRestartDelay oldPid rDelay spec reason state = do 1200 | evalAfter rDelay 1201 | (DelayedRestart (childKey spec) reason) 1202 | $ ( (active ^: Map.filter (/= chKey)) 1203 | . (bumpStats Active chType decrement) 1204 | -- . (restarts ^= []) 1205 | $ maybe state id (updateChild chKey (setChildRestarting oldPid) state) 1206 | ) 1207 | where 1208 | chKey = childKey spec 1209 | chType = childType spec 1210 | 1211 | addRestart :: State -> Process (Maybe State) 1212 | addRestart state = do 1213 | now <- liftIO $ getCurrentTime 1214 | let acc = foldl' (accRestarts now) [] (now:restarted) 1215 | case length acc of 1216 | n | n > maxAttempts -> return Nothing 1217 | _ -> return $ Just $ (restarts ^= acc) $ state 1218 | where 1219 | maxAttempts = maxNumberOfRestarts $ maxR $ maxIntensity 1220 | slot = state ^. restartPeriod 1221 | restarted = state ^. restarts 1222 | maxIntensity = state ^. strategy .> restartIntensity 1223 | 1224 | accRestarts :: UTCTime -> [UTCTime] -> UTCTime -> [UTCTime] 1225 | accRestarts now' acc r = 1226 | let diff = diffUTCTime now' r in 1227 | if diff > slot then acc else (r:acc) 1228 | 1229 | doStartChild :: ChildSpec 1230 | -> State 1231 | -> Process (Either StartFailure (ChildRef, State)) 1232 | doStartChild spec st = do 1233 | restart <- tryStartChild spec 1234 | case restart of 1235 | Left f -> return $ Left f 1236 | Right p -> do 1237 | let mState = updateChild chKey (chRunning p) st 1238 | case mState of 1239 | -- TODO: better error message if the child is unrecognised 1240 | Nothing -> die $ (supErrId ".doStartChild.InternalError:") ++ show spec 1241 | Just s' -> return $ Right $ (p, markActive s' p spec) 1242 | where 1243 | chKey = childKey spec 1244 | 1245 | chRunning :: ChildRef -> Child -> Prefix -> Suffix -> State -> Maybe State 1246 | chRunning newRef (_, chSpec) prefix suffix st' = 1247 | Just $ ( (specs ^= prefix >< ((newRef, chSpec) <| suffix)) 1248 | $ bumpStats Active (childType spec) (+1) st' 1249 | ) 1250 | 1251 | tryStartChild :: ChildSpec 1252 | -> Process (Either StartFailure ChildRef) 1253 | tryStartChild ChildSpec{..} = 1254 | case childStart of 1255 | RunClosure proc -> do 1256 | -- TODO: cache your closures!!! 1257 | mProc <- catch (unClosure proc >>= return . Right) 1258 | (\(e :: SomeException) -> return $ Left (show e)) 1259 | case mProc of 1260 | Left err -> logStartFailure $ StartFailureBadClosure err 1261 | Right p -> wrapClosure childKey childRegName p >>= return . Right 1262 | CreateHandle fn -> do 1263 | mFn <- catch (unClosure fn >>= return . Right) 1264 | (\(e :: SomeException) -> return $ Left (show e)) 1265 | case mFn of 1266 | Left err -> logStartFailure $ StartFailureBadClosure err 1267 | Right fn' -> do 1268 | wrapHandle childKey childRegName fn' >>= return . Right 1269 | where 1270 | logStartFailure sf = do 1271 | sup <- getSelfPid 1272 | -- logEntry Log.error $ mkReport "Child Start Error" sup childKey (show sf) 1273 | report $ SupervisedChildStartFailure sup sf childKey 1274 | return $ Left sf 1275 | 1276 | wrapClosure :: ChildKey 1277 | -> Maybe RegisteredName 1278 | -> Process () 1279 | -> Process ChildRef 1280 | wrapClosure key regName proc = do 1281 | supervisor <- getSelfPid 1282 | childPid <- spawnLocal $ do 1283 | self <- getSelfPid 1284 | link supervisor -- die if our parent dies 1285 | maybeRegister regName self 1286 | () <- expect -- wait for a start signal (pid is still private) 1287 | -- we translate `ExitShutdown' into a /normal/ exit 1288 | (proc 1289 | `catchesExit` [ 1290 | (\_ m -> handleMessageIf m (\r -> r == ExitShutdown) 1291 | (\_ -> return ())) 1292 | , (\_ m -> handleMessageIf m (\(ExitOther _) -> True) 1293 | (\r -> logExit supervisor self r)) 1294 | ]) 1295 | `catches` [ Handler $ filterInitFailures supervisor self 1296 | , Handler $ logFailure supervisor self ] 1297 | void $ monitor childPid 1298 | send childPid () 1299 | let cRef = ChildRunning childPid 1300 | report $ SupervisedChildStarted supervisor cRef key 1301 | return cRef 1302 | 1303 | wrapHandle :: ChildKey 1304 | -> Maybe RegisteredName 1305 | -> (SupervisorPid -> Process (ChildPid, Message)) 1306 | -> Process ChildRef 1307 | wrapHandle key regName proc = do 1308 | super <- getSelfPid 1309 | (childPid, msg) <- proc super 1310 | void $ monitor childPid 1311 | maybeRegister regName childPid 1312 | let cRef = ChildRunningExtra childPid msg 1313 | report $ SupervisedChildStarted super cRef key 1314 | return cRef 1315 | 1316 | maybeRegister :: Maybe RegisteredName -> ChildPid -> Process () 1317 | maybeRegister Nothing _ = return () 1318 | maybeRegister (Just (LocalName n)) pid = register n pid 1319 | maybeRegister (Just (CustomRegister clj)) pid = do 1320 | -- TODO: cache your closures!!! 1321 | mProc <- catch (unClosure clj >>= return . Right) 1322 | (\(e :: SomeException) -> return $ Left (show e)) 1323 | case mProc of 1324 | Left err -> die $ ExitOther (show err) 1325 | Right p -> p pid 1326 | 1327 | filterInitFailures :: SupervisorPid 1328 | -> ChildPid 1329 | -> ChildInitFailure 1330 | -> Process () 1331 | filterInitFailures sup childPid ex = do 1332 | case ex of 1333 | ChildInitFailure _ -> do 1334 | -- This is used as a `catches` handler in multiple places 1335 | -- and matches first before the other handlers that 1336 | -- would call logFailure. 1337 | -- We log here to avoid silent failure in those cases. 1338 | -- logEntry Log.error $ mkReport "ChildInitFailure" sup (show childPid) (show ex) 1339 | report $ SupervisedChildInitFailed sup childPid ex 1340 | liftIO $ throwIO ex 1341 | ChildInitIgnore -> Unsafe.cast sup $ IgnoreChildReq childPid 1342 | 1343 | -------------------------------------------------------------------------------- 1344 | -- Child Stop/Shutdown -- 1345 | -------------------------------------------------------------------------------- 1346 | 1347 | stopChildren :: State -> ExitReason -> Process () 1348 | stopChildren state er = do 1349 | us <- getSelfPid 1350 | let strat = shutdownStrategy state 1351 | report $ SupervisorShutdown us strat er 1352 | case strat of 1353 | ParallelShutdown -> do 1354 | let allChildren = toList $ state ^. specs 1355 | terminatorPids <- forM allChildren $ \ch -> do 1356 | pid <- spawnLocal $ void $ syncStop ch $ (active ^= Map.empty) state 1357 | mRef <- monitor pid 1358 | return (mRef, pid) 1359 | terminationErrors <- collectExits [] $ zip terminatorPids (map snd allChildren) 1360 | -- it seems these would also be logged individually in doStopChild 1361 | case terminationErrors of 1362 | [] -> return () 1363 | _ -> do 1364 | sup <- getSelfPid 1365 | void $ logEntry Log.error $ 1366 | mkReport "Errors in stopChildren / ParallelShutdown" 1367 | sup "n/a" (show terminationErrors) 1368 | SequentialShutdown ord -> do 1369 | let specs' = state ^. specs 1370 | let allChildren = case ord of 1371 | RightToLeft -> Seq.reverse specs' 1372 | LeftToRight -> specs' 1373 | void $ foldlM (flip syncStop) state (toList allChildren) 1374 | where 1375 | syncStop :: Child -> State -> Process State 1376 | syncStop (cr, cs) state' = doStopChild cr cs state' 1377 | 1378 | collectExits :: [(ProcessId, DiedReason)] 1379 | -> [((MonitorRef, ProcessId), ChildSpec)] 1380 | -> Process [(ProcessId, DiedReason)] 1381 | collectExits errors [] = return errors 1382 | collectExits errors pids = do 1383 | (ref, pid, reason) <- receiveWait [ 1384 | match (\(ProcessMonitorNotification ref' pid' reason') -> do 1385 | return (ref', pid', reason')) 1386 | ] 1387 | let remaining = [p | p <- pids, (snd $ fst p) /= pid] 1388 | let spec = List.lookup (ref, pid) pids 1389 | case (reason, spec) of 1390 | (DiedUnknownId, _) -> collectExits errors remaining 1391 | (DiedNormal, _) -> collectExits errors remaining 1392 | (_, Nothing) -> collectExits errors remaining 1393 | (DiedException _, Just sp') -> do 1394 | if (childStop sp') == StopImmediately 1395 | then collectExits errors remaining 1396 | else collectExits ((pid, reason):errors) remaining 1397 | _ -> collectExits ((pid, reason):errors) remaining 1398 | 1399 | doStopChild :: ChildRef -> ChildSpec -> State -> Process State 1400 | doStopChild ref spec state = do 1401 | us <- getSelfPid 1402 | mPid <- resolve ref 1403 | case mPid of 1404 | Nothing -> return state -- an already dead child is not an error 1405 | Just pid -> do 1406 | stopped <- childShutdown (childStop spec) pid state 1407 | report $ SupervisedChildStopped us ref stopped 1408 | -- state' <- shutdownComplete state pid stopped 1409 | return $ ( (active ^: Map.delete pid) 1410 | $ updateStopped 1411 | ) 1412 | where 1413 | {-shutdownComplete :: State -> ChildPid -> DiedReason -> Process State-} 1414 | {-shutdownComplete _ _ DiedNormal = return $ updateStopped-} 1415 | {-shutdownComplete state' pid (r :: DiedReason) = do-} 1416 | {-logShutdown (state' ^. logger) chKey pid r >> return state'-} 1417 | 1418 | chKey = childKey spec 1419 | updateStopped = maybe state id $ updateChild chKey (setChildStopped False) state 1420 | 1421 | childShutdown :: ChildStopPolicy 1422 | -> ChildPid 1423 | -> State 1424 | -> Process DiedReason 1425 | childShutdown policy childPid st = mask $ \restore -> do 1426 | case policy of 1427 | (StopTimeout t) -> exit childPid ExitShutdown >> await restore childPid t st 1428 | -- we ignore DiedReason for brutal kills 1429 | StopImmediately -> do 1430 | kill childPid "StoppedBySupervisor" 1431 | void $ await restore childPid Infinity st 1432 | return DiedNormal 1433 | where 1434 | await restore' childPid' delay state = do 1435 | -- We require and additional monitor here when child shutdown occurs 1436 | -- during a restart which was triggered by the /old/ monitor signal. 1437 | -- Just to be safe, we monitor the child immediately to be sure it goes. 1438 | mRef <- monitor childPid' 1439 | let recv = case delay of 1440 | Infinity -> receiveWait (matches mRef) >>= return . Just 1441 | NoDelay -> receiveTimeout 0 (matches mRef) 1442 | Delay t -> receiveTimeout (asTimeout t) (matches mRef) 1443 | -- let recv' = if monitored then recv else withMonitor childPid' recv 1444 | res <- recv `finally` (unmonitor mRef) 1445 | restore' $ maybe (childShutdown StopImmediately childPid' state) return res 1446 | 1447 | matches :: MonitorRef -> [Match DiedReason] 1448 | matches m = [ 1449 | matchIf (\(ProcessMonitorNotification m' _ _) -> m == m') 1450 | (\(ProcessMonitorNotification _ _ r) -> return r) 1451 | ] 1452 | 1453 | -------------------------------------------------------------------------------- 1454 | -- Loging/Reporting -- 1455 | -------------------------------------------------------------------------------- 1456 | 1457 | errorMaxIntensityReached :: ExitReason 1458 | errorMaxIntensityReached = ExitOther "ReachedMaxRestartIntensity" 1459 | 1460 | report :: MxSupervisor -> Process () 1461 | report = mxNotify . MxUser . unsafeWrapMessage 1462 | 1463 | {-logShutdown :: LogSink -> ChildKey -> ChildPid -> DiedReason -> Process ()-} 1464 | {-logShutdown log' child childPid reason = do-} 1465 | {-sup <- getSelfPid-} 1466 | {-Log.info log' $ mkReport banner sup (show childPid) shutdownReason-} 1467 | {-where-} 1468 | {-banner = "Child Shutdown Complete"-} 1469 | {-shutdownReason = (show reason) ++ ", child-key: " ++ child-} 1470 | 1471 | logExit :: SupervisorPid -> ChildPid -> ExitReason -> Process () 1472 | logExit sup pid er = do 1473 | report $ SupervisedChildDied sup pid er 1474 | 1475 | logFailure :: SupervisorPid -> ChildPid -> SomeException -> Process () 1476 | logFailure sup childPid ex = do 1477 | logEntry Log.notice $ mkReport "Detected Child Exit" sup (show childPid) (show ex) 1478 | liftIO $ throwIO ex 1479 | 1480 | logEntry :: (LogChan -> LogText -> Process ()) -> String -> Process () 1481 | logEntry lg = Log.report lg Log.logChannel 1482 | 1483 | mkReport :: String -> SupervisorPid -> String -> String -> String 1484 | mkReport b s c r = foldl' (\x xs -> xs ++ " " ++ x) "" (reverse items) 1485 | where 1486 | items :: [String] 1487 | items = [ "[" ++ s' ++ "]" | s' <- [ b 1488 | , "supervisor: " ++ show s 1489 | , "child: " ++ c 1490 | , "reason: " ++ r] ] 1491 | 1492 | -------------------------------------------------------------------------------- 1493 | -- Accessors and State/Stats Utilities -- 1494 | -------------------------------------------------------------------------------- 1495 | 1496 | type Ignored = Bool 1497 | 1498 | -- TODO: test that setChildStopped does not re-order the 'specs sequence 1499 | 1500 | setChildStopped :: Ignored -> Child -> Prefix -> Suffix -> State -> Maybe State 1501 | setChildStopped ignored child prefix remaining st = 1502 | let spec = snd child 1503 | rType = childRestart spec 1504 | newRef = if ignored then ChildStartIgnored else ChildStopped 1505 | in case isTemporary rType of 1506 | True -> Just $ (specs ^= prefix >< remaining) $ st 1507 | False -> Just $ (specs ^= prefix >< ((newRef, spec) <| remaining)) st 1508 | 1509 | setChildRestarting :: ChildPid -> Child -> Prefix -> Suffix -> State -> Maybe State 1510 | setChildRestarting oldPid child prefix remaining st = 1511 | let spec = snd child 1512 | newRef = ChildRestarting oldPid 1513 | in Just $ (specs ^= prefix >< ((newRef, spec) <| remaining)) st 1514 | 1515 | -- setChildStarted :: ChildPid -> 1516 | 1517 | doAddChild :: AddChildReq -> Bool -> State -> AddChildRes 1518 | doAddChild (AddChild _ spec) update st = 1519 | let chType = childType spec 1520 | in case (findChild (childKey spec) st) of 1521 | Just (ref, _) -> Exists ref 1522 | Nothing -> 1523 | case update of 1524 | True -> Added $ ( (specs ^: (|> (ChildStopped, spec))) 1525 | $ bumpStats Specified chType (+1) st 1526 | ) 1527 | False -> Added st 1528 | 1529 | updateChild :: ChildKey 1530 | -> (Child -> Prefix -> Suffix -> State -> Maybe State) 1531 | -> State 1532 | -> Maybe State 1533 | updateChild key updateFn state = 1534 | let (prefix, suffix) = Seq.breakl ((== key) . childKey . snd) $ state ^. specs 1535 | in case (Seq.viewl suffix) of 1536 | EmptyL -> Nothing 1537 | child :< remaining -> updateFn child prefix remaining state 1538 | 1539 | removeChild :: ChildSpec -> State -> State 1540 | removeChild spec state = 1541 | let k = childKey spec 1542 | in specs ^: filter ((/= k) . childKey . snd) $ state 1543 | 1544 | -- DO NOT call this function unless you've verified the ChildRef first. 1545 | markActive :: State -> ChildRef -> ChildSpec -> State 1546 | markActive state ref spec = 1547 | case ref of 1548 | ChildRunning (pid :: ChildPid) -> inserted pid 1549 | ChildRunningExtra pid _ -> inserted pid 1550 | _ -> error $ "InternalError" 1551 | where 1552 | inserted pid' = active ^: Map.insert pid' (childKey spec) $ state 1553 | 1554 | decrement :: Int -> Int 1555 | decrement n = n - 1 1556 | 1557 | -- this is O(n) in the worst case, which is a bit naff, but we 1558 | -- can optimise it later with a different data structure, if required 1559 | findChild :: ChildKey -> State -> Maybe (ChildRef, ChildSpec) 1560 | findChild key st = find ((== key) . childKey . snd) $ st ^. specs 1561 | 1562 | bumpStats :: StatsType -> ChildType -> (Int -> Int) -> State -> State 1563 | bumpStats Specified Supervisor fn st = (bump fn) . (stats .> supervisors ^: fn) $ st 1564 | bumpStats Specified Worker fn st = (bump fn) . (stats .> workers ^: fn) $ st 1565 | bumpStats Active Worker fn st = (stats .> running ^: fn) . (stats .> activeWorkers ^: fn) $ st 1566 | bumpStats Active Supervisor fn st = (stats .> running ^: fn) . (stats .> activeSupervisors ^: fn) $ st 1567 | 1568 | bump :: (Int -> Int) -> State -> State 1569 | bump with' = stats .> children ^: with' 1570 | 1571 | isTemporary :: RestartPolicy -> Bool 1572 | isTemporary = (== Temporary) 1573 | 1574 | isTransient :: RestartPolicy -> Bool 1575 | isTransient = (== Transient) 1576 | 1577 | isIntrinsic :: RestartPolicy -> Bool 1578 | isIntrinsic = (== Intrinsic) 1579 | 1580 | active :: Accessor State (Map ChildPid ChildKey) 1581 | active = accessor _active (\act' st -> st { _active = act' }) 1582 | 1583 | strategy :: Accessor State RestartStrategy 1584 | strategy = accessor _strategy (\s st -> st { _strategy = s }) 1585 | 1586 | restartIntensity :: Accessor RestartStrategy RestartLimit 1587 | restartIntensity = accessor intensity (\i l -> l { intensity = i }) 1588 | 1589 | -- | The "RestartLimit" for a given "RestartStrategy" 1590 | getRestartIntensity :: RestartStrategy -> RestartLimit 1591 | getRestartIntensity = (^. restartIntensity) 1592 | 1593 | restartPeriod :: Accessor State NominalDiffTime 1594 | restartPeriod = accessor _restartPeriod (\p st -> st { _restartPeriod = p }) 1595 | 1596 | restarts :: Accessor State [UTCTime] 1597 | restarts = accessor _restarts (\r st -> st { _restarts = r }) 1598 | 1599 | specs :: Accessor State ChildSpecs 1600 | specs = accessor _specs (\sp' st -> st { _specs = sp' }) 1601 | 1602 | stats :: Accessor State SupervisorStats 1603 | stats = accessor _stats (\st' st -> st { _stats = st' }) 1604 | 1605 | logger :: Accessor State LogSink 1606 | logger = accessor _logger (\l st -> st { _logger = l }) 1607 | 1608 | children :: Accessor SupervisorStats Int 1609 | children = accessor _children (\c st -> st { _children = c }) 1610 | 1611 | -- | How many child specs are defined for this supervisor 1612 | definedChildren :: SupervisorStats -> Int 1613 | definedChildren = (^. children) 1614 | 1615 | workers :: Accessor SupervisorStats Int 1616 | workers = accessor _workers (\c st -> st { _workers = c }) 1617 | 1618 | -- | How many child specs define a worker (non-supervisor) 1619 | definedWorkers :: SupervisorStats -> Int 1620 | definedWorkers = (^. workers) 1621 | 1622 | supervisors :: Accessor SupervisorStats Int 1623 | supervisors = accessor _supervisors (\c st -> st { _supervisors = c }) 1624 | 1625 | -- | How many child specs define a supervisor? 1626 | definedSupervisors :: SupervisorStats -> Int 1627 | definedSupervisors = (^. supervisors) 1628 | 1629 | running :: Accessor SupervisorStats Int 1630 | running = accessor _running (\r st -> st { _running = r }) 1631 | 1632 | -- | How many running child processes. 1633 | runningChildren :: SupervisorStats -> Int 1634 | runningChildren = (^. running) 1635 | 1636 | activeWorkers :: Accessor SupervisorStats Int 1637 | activeWorkers = accessor _activeWorkers (\c st -> st { _activeWorkers = c }) 1638 | 1639 | -- | How many worker (non-supervisor) child processes are running. 1640 | runningWorkers :: SupervisorStats -> Int 1641 | runningWorkers = (^. activeWorkers) 1642 | 1643 | activeSupervisors :: Accessor SupervisorStats Int 1644 | activeSupervisors = accessor _activeSupervisors (\c st -> st { _activeSupervisors = c }) 1645 | 1646 | -- | How many supervisor child processes are running 1647 | runningSupervisors :: SupervisorStats -> Int 1648 | runningSupervisors = (^. activeSupervisors) 1649 | -------------------------------------------------------------------------------- /src/Control/Distributed/Process/Supervisor/Management.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | {-# LANGUAGE FlexibleInstances #-} 5 | 6 | ----------------------------------------------------------------------------- 7 | -- | 8 | -- Module : Control.Distributed.Process.Supervisor.Management 9 | -- Copyright : (c) Tim Watson 2017 10 | -- License : BSD3 (see the file LICENSE) 11 | -- 12 | -- Maintainer : Tim Watson 13 | -- Stability : experimental 14 | -- Portability : non-portable (requires concurrency) 15 | -- 16 | ----------------------------------------------------------------------------- 17 | 18 | module Control.Distributed.Process.Supervisor.Management 19 | ( supervisionAgentId 20 | , supervisionMonitor 21 | , monitorSupervisor 22 | , unmonitorSupervisor 23 | -- * Mx Event Type 24 | , MxSupervisor(..) 25 | ) 26 | where 27 | import Control.DeepSeq (NFData) 28 | import Control.Distributed.Process 29 | ( ProcessId 30 | , Process() 31 | , ReceivePort() 32 | , newChan 33 | , sendChan 34 | , getSelfPid 35 | , unwrapMessage 36 | ) 37 | import Control.Distributed.Process.Internal.Types (SendPort(..)) 38 | import Control.Distributed.Process.Management 39 | ( MxAgentId(..) 40 | , MxAgent() 41 | , MxEvent(MxProcessDied, MxUser) 42 | , mxAgent 43 | , mxSink 44 | , mxReady 45 | , liftMX 46 | , mxGetLocal 47 | , mxSetLocal 48 | , mxNotify 49 | ) 50 | import Control.Distributed.Process.Supervisor.Types 51 | ( MxSupervisor(..) 52 | , SupervisorPid 53 | ) 54 | import Data.Binary 55 | import Data.Foldable (mapM_) 56 | import Data.Hashable (Hashable(..)) 57 | import Control.Distributed.Process.Extras.Internal.Containers.MultiMap (MultiMap) 58 | import qualified Control.Distributed.Process.Extras.Internal.Containers.MultiMap as Map 59 | 60 | import Data.Typeable (Typeable) 61 | import GHC.Generics 62 | 63 | data Register = Register !SupervisorPid !ProcessId !(SendPort MxSupervisor) 64 | deriving (Typeable, Generic) 65 | instance Binary Register where 66 | instance NFData Register where 67 | 68 | data UnRegister = UnRegister !SupervisorPid !ProcessId 69 | deriving (Typeable, Generic) 70 | instance Binary UnRegister where 71 | instance NFData UnRegister where 72 | 73 | newtype SupMxChan = SupMxChan { smxc :: SendPort MxSupervisor } 74 | deriving (Typeable, Generic, Show) 75 | instance Binary SupMxChan 76 | instance NFData SupMxChan 77 | instance Hashable SupMxChan where 78 | hashWithSalt salt sp = hashWithSalt salt $ sendPortId (smxc sp) 79 | instance Eq SupMxChan where 80 | (==) a b = (sendPortId $ smxc a) == (sendPortId $ smxc b) 81 | 82 | type State = MultiMap SupervisorPid (ProcessId, SupMxChan) 83 | 84 | -- | The @MxAgentId@ for the node monitoring agent. 85 | supervisionAgentId :: MxAgentId 86 | supervisionAgentId = MxAgentId "service.monitoring.supervision" 87 | 88 | -- | Monitor the supervisor for the given pid. Binds a typed channel to the 89 | -- calling process, to which the resulting @ReceivePort@ belongs. 90 | -- 91 | -- Multiple monitors can be created for any @calling process <-> sup@ pair. 92 | -- Each monitor maintains its own typed channel, which will only contain 93 | -- "MxSupervisor" entries obtained /after/ the channel was established. 94 | -- 95 | monitorSupervisor :: SupervisorPid -> Process (ReceivePort MxSupervisor) 96 | monitorSupervisor sup = do 97 | us <- getSelfPid 98 | (sp, rp) <- newChan 99 | mxNotify $ Register sup us sp 100 | return rp 101 | 102 | -- | Removes all monitors for @sup@, associated with the calling process. 103 | -- It is not possible to delete individual monitors (i.e. typed channels). 104 | -- 105 | unmonitorSupervisor :: SupervisorPid -> Process () 106 | unmonitorSupervisor sup = getSelfPid >>= mxNotify . UnRegister sup 107 | 108 | -- | Starts the supervision monitoring agent. 109 | supervisionMonitor :: Process ProcessId 110 | supervisionMonitor = do 111 | mxAgent supervisionAgentId initState [ 112 | (mxSink $ \(Register sup pid sp) -> do 113 | mxSetLocal . Map.insert sup (pid, SupMxChan sp) =<< mxGetLocal 114 | mxReady) 115 | , (mxSink $ \(UnRegister sup pid) -> do 116 | st <- mxGetLocal 117 | mxSetLocal $ Map.filterWithKey (\k v -> if k == sup then (fst v) /= pid else True) st 118 | mxReady) 119 | , (mxSink $ \(ev :: MxEvent) -> do 120 | case ev of 121 | MxUser msg -> goNotify msg >> mxReady 122 | MxProcessDied pid _ -> do st <- mxGetLocal 123 | mxSetLocal $ Map.filter ((/= pid) . fst) st 124 | mxReady 125 | _ -> mxReady) 126 | ] 127 | where 128 | initState :: State 129 | initState = Map.empty 130 | 131 | goNotify msg = do 132 | ev <- liftMX $ unwrapMessage msg :: MxAgent State (Maybe MxSupervisor) 133 | case ev of 134 | Just ev' -> do st <- mxGetLocal 135 | mapM_ (liftMX . (flip sendChan) ev' . smxc . snd) 136 | (maybe [] id $ Map.lookup (supervisorPid ev') st) 137 | Nothing -> return () 138 | -------------------------------------------------------------------------------- /src/Control/Distributed/Process/Supervisor/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveGeneric #-} 2 | {-# LANGUAGE DeriveDataTypeable #-} 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Control.Distributed.Process.Supervisor.Types 6 | -- Copyright : (c) Tim Watson 2012 7 | -- License : BSD3 (see the file LICENSE) 8 | -- 9 | -- Maintainer : Tim Watson 10 | -- Stability : experimental 11 | -- Portability : non-portable (requires concurrency) 12 | -- 13 | ----------------------------------------------------------------------------- 14 | module Control.Distributed.Process.Supervisor.Types 15 | ( -- * Defining and Running a Supervisor 16 | ChildSpec(..) 17 | , ChildKey 18 | , ChildType(..) 19 | , ChildStopPolicy(..) 20 | , ChildStart(..) 21 | , RegisteredName(LocalName, CustomRegister) 22 | , RestartPolicy(..) 23 | , ChildRef(..) 24 | , isRunning 25 | , isRestarting 26 | , Child 27 | , StaticLabel 28 | , SupervisorPid 29 | , ChildPid 30 | -- * Limits and Defaults 31 | , MaxRestarts(..) 32 | , maxRestarts 33 | , RestartLimit(..) 34 | , limit 35 | , defaultLimits 36 | , RestartMode(..) 37 | , RestartOrder(..) 38 | , RestartStrategy(..) 39 | , ShutdownMode(..) 40 | , restartOne 41 | , restartAll 42 | , restartLeft 43 | , restartRight 44 | -- * Adding and Removing Children 45 | , AddChildResult(..) 46 | , StartChildResult(..) 47 | , StopChildResult(..) 48 | , DeleteChildResult(..) 49 | , RestartChildResult(..) 50 | -- * Additional (Misc) Types 51 | , SupervisorStats(..) 52 | , StartFailure(..) 53 | , ChildInitFailure(..) 54 | , MxSupervisor(..) 55 | ) where 56 | 57 | import GHC.Generics 58 | import Data.Typeable (Typeable) 59 | import Data.Binary 60 | 61 | import Control.DeepSeq (NFData) 62 | import Control.Distributed.Process hiding (call) 63 | import Control.Distributed.Process.Serializable() 64 | import Control.Distributed.Process.Extras.Internal.Types 65 | ( ExitReason(..) 66 | ) 67 | import Control.Distributed.Process.Extras.Time 68 | import Control.Distributed.Process.Extras.Internal.Primitives hiding (monitor) 69 | import Control.Exception (Exception) 70 | import Data.Hashable (Hashable) 71 | 72 | -- aliases for api documentation purposes 73 | 74 | -- | The "ProcessId" of a supervisor. 75 | type SupervisorPid = ProcessId 76 | 77 | -- | The "ProcessId" of a supervised /child/. 78 | type ChildPid = ProcessId 79 | 80 | -- | The maximum number of restarts a supervisor will tollerate, created by 81 | -- evaluating "maxRestarts". 82 | newtype MaxRestarts = MaxR { maxNumberOfRestarts :: Int } 83 | deriving (Typeable, Generic, Show) 84 | instance Binary MaxRestarts where 85 | instance Hashable MaxRestarts where 86 | instance NFData MaxRestarts where 87 | 88 | -- | Smart constructor for @MaxRestarts@. The maximum restart count must be a 89 | -- positive integer, otherwise you will see @error "MaxR must be >= 0"@. 90 | maxRestarts :: Int -> MaxRestarts 91 | maxRestarts r | r >= 0 = MaxR r 92 | | otherwise = error "MaxR must be >= 0" 93 | 94 | -- | A compulsary limit on the number of restarts that a supervisor will 95 | -- tolerate before it stops all child processes and then itself. 96 | -- If > @MaxRestarts@ occur within the specified @TimeInterval@, the child 97 | -- will be stopped. This prevents the supervisor from entering an infinite loop 98 | -- of child process stops and restarts. 99 | -- 100 | data RestartLimit = 101 | RestartLimit 102 | { maxR :: !MaxRestarts 103 | , maxT :: !TimeInterval 104 | } 105 | deriving (Typeable, Generic, Show) 106 | instance Binary RestartLimit where 107 | instance NFData RestartLimit where 108 | 109 | -- | Smart constructor for "RestartLimit". 110 | limit :: MaxRestarts -> TimeInterval -> RestartLimit 111 | limit mr = RestartLimit mr 112 | 113 | -- | Default "RestartLimit" of @MaxR 1@ within @Seconds 1@. 114 | defaultLimits :: RestartLimit 115 | defaultLimits = limit (MaxR 1) (seconds 1) 116 | 117 | -- | Specifies the order in which a supervisor should apply restarts. 118 | data RestartOrder = LeftToRight | RightToLeft 119 | deriving (Typeable, Generic, Eq, Show) 120 | instance Binary RestartOrder where 121 | instance Hashable RestartOrder where 122 | instance NFData RestartOrder where 123 | 124 | -- | Instructs a supervisor on how to restart its children. 125 | data RestartMode = 126 | RestartEach { order :: !RestartOrder } 127 | {- ^ stop then start each child sequentially, i.e., @foldlM stopThenStart children@ -} 128 | | RestartInOrder { order :: !RestartOrder } 129 | {- ^ stop all children first, then restart them sequentially -} 130 | | RestartRevOrder { order :: !RestartOrder } 131 | {- ^ stop all children in the given order, but start them in reverse -} 132 | deriving (Typeable, Generic, Show, Eq) 133 | instance Binary RestartMode where 134 | instance Hashable RestartMode where 135 | instance NFData RestartMode where 136 | 137 | -- | Instructs a supervisor on how to instruct its children to stop running 138 | -- when the supervisor itself is shutting down. 139 | data ShutdownMode = SequentialShutdown !RestartOrder 140 | | ParallelShutdown 141 | deriving (Typeable, Generic, Show, Eq) 142 | instance Binary ShutdownMode where 143 | instance Hashable ShutdownMode where 144 | instance NFData ShutdownMode where 145 | 146 | -- | Strategy used by a supervisor to handle child restarts, whether due to 147 | -- unexpected child failure or explicit restart requests from a client. 148 | -- 149 | -- Some terminology: We refer to child processes managed by the same supervisor 150 | -- as /siblings/. When restarting a child process, the 'RestartNone' policy 151 | -- indicates that sibling processes should be left alone, whilst the 'RestartAll' 152 | -- policy will cause /all/ children to be restarted (in the same order they were 153 | -- started). 154 | -- 155 | -- The other two restart strategies refer to /prior/ and /subsequent/ 156 | -- siblings, which describe's those children's configured position in insertion 157 | -- order in the child specs. These latter modes allow one to control the order 158 | -- in which siblings are restarted, and to exclude some siblings from restarting, 159 | -- without having to resort to grouping them using a child supervisor. 160 | -- 161 | data RestartStrategy = 162 | RestartOne 163 | { intensity :: !RestartLimit 164 | } -- ^ restart only the failed child process 165 | | RestartAll 166 | { intensity :: !RestartLimit 167 | , mode :: !RestartMode 168 | } -- ^ also restart all siblings 169 | | RestartLeft 170 | { intensity :: !RestartLimit 171 | , mode :: !RestartMode 172 | } -- ^ restart prior siblings (i.e., prior /start order/) 173 | | RestartRight 174 | { intensity :: !RestartLimit 175 | , mode :: !RestartMode 176 | } -- ^ restart subsequent siblings (i.e., subsequent /start order/) 177 | deriving (Typeable, Generic, Show) 178 | instance Binary RestartStrategy where 179 | instance NFData RestartStrategy where 180 | 181 | -- | Provides a default 'RestartStrategy' for @RestartOne@. 182 | -- > restartOne = RestartOne defaultLimits 183 | -- 184 | restartOne :: RestartStrategy 185 | restartOne = RestartOne defaultLimits 186 | 187 | -- | Provides a default 'RestartStrategy' for @RestartAll@. 188 | -- > restartOne = RestartAll defaultLimits (RestartEach LeftToRight) 189 | -- 190 | restartAll :: RestartStrategy 191 | restartAll = RestartAll defaultLimits (RestartEach LeftToRight) 192 | 193 | -- | Provides a default 'RestartStrategy' for @RestartLeft@. 194 | -- > restartOne = RestartLeft defaultLimits (RestartEach LeftToRight) 195 | -- 196 | restartLeft :: RestartStrategy 197 | restartLeft = RestartLeft defaultLimits (RestartEach LeftToRight) 198 | 199 | -- | Provides a default 'RestartStrategy' for @RestartRight@. 200 | -- > restartOne = RestartRight defaultLimits (RestartEach LeftToRight) 201 | -- 202 | restartRight :: RestartStrategy 203 | restartRight = RestartRight defaultLimits (RestartEach LeftToRight) 204 | 205 | -- | Identifies a child process by name. 206 | type ChildKey = String 207 | 208 | -- | A reference to a (possibly running) child. 209 | data ChildRef = 210 | ChildRunning !ChildPid -- ^ a reference to the (currently running) child 211 | | ChildRunningExtra !ChildPid !Message -- ^ also a currently running child, with /extra/ child info 212 | | ChildRestarting !ChildPid -- ^ a reference to the /old/ (previous) child (now restarting) 213 | | ChildStopped -- ^ indicates the child is not currently running 214 | | ChildStartIgnored -- ^ a non-temporary child exited with 'ChildInitIgnore' 215 | deriving (Typeable, Generic, Show) 216 | instance Binary ChildRef where 217 | instance NFData ChildRef where 218 | 219 | instance Eq ChildRef where 220 | ChildRunning p1 == ChildRunning p2 = p1 == p2 221 | ChildRunningExtra p1 _ == ChildRunningExtra p2 _ = p1 == p2 222 | ChildRestarting p1 == ChildRestarting p2 = p1 == p2 223 | ChildStopped == ChildStopped = True 224 | ChildStartIgnored == ChildStartIgnored = True 225 | _ == _ = False 226 | 227 | -- | @True@ if "ChildRef" is running. 228 | isRunning :: ChildRef -> Bool 229 | isRunning (ChildRunning _) = True 230 | isRunning (ChildRunningExtra _ _) = True 231 | isRunning _ = False 232 | 233 | -- | @True@ if "ChildRef" is restarting 234 | isRestarting :: ChildRef -> Bool 235 | isRestarting (ChildRestarting _) = True 236 | isRestarting _ = False 237 | 238 | instance Resolvable ChildRef where 239 | resolve (ChildRunning pid) = return $ Just pid 240 | resolve (ChildRunningExtra pid _) = return $ Just pid 241 | resolve _ = return Nothing 242 | 243 | -- these look a bit odd, but we basically want to avoid resolving 244 | -- or sending to (ChildRestarting oldPid) 245 | instance Routable ChildRef where 246 | sendTo (ChildRunning addr) = sendTo addr 247 | sendTo _ = error "invalid address for child process" 248 | 249 | unsafeSendTo (ChildRunning ch) = unsafeSendTo ch 250 | unsafeSendTo _ = error "invalid address for child process" 251 | 252 | -- | Specifies whether the child is another supervisor, or a worker. 253 | data ChildType = Worker | Supervisor 254 | deriving (Typeable, Generic, Show, Eq) 255 | instance Binary ChildType where 256 | instance NFData ChildType where 257 | 258 | -- | Describes when a stopped child process should be restarted. 259 | data RestartPolicy = 260 | Permanent -- ^ a permanent child will always be restarted 261 | | Temporary -- ^ a temporary child will /never/ be restarted 262 | | Transient -- ^ A transient child will be restarted only if it stops abnormally 263 | | Intrinsic -- ^ as 'Transient', but if the child exits normally, the supervisor also exits normally 264 | deriving (Typeable, Generic, Eq, Show) 265 | instance Binary RestartPolicy where 266 | instance NFData RestartPolicy where 267 | 268 | -- | Governs how the supervisor will instruct child processes to stop. 269 | data ChildStopPolicy = 270 | StopTimeout !Delay 271 | | StopImmediately 272 | deriving (Typeable, Generic, Eq, Show) 273 | instance Binary ChildStopPolicy where 274 | instance NFData ChildStopPolicy where 275 | 276 | -- | Represents a registered name, for registration /locally/ using the 277 | -- @register@ primitive, or via a @Closure (ChildPid -> Process ())@ such that 278 | -- registration can be performed using alternative process registries. 279 | data RegisteredName = 280 | LocalName !String 281 | | CustomRegister !(Closure (ChildPid -> Process ())) 282 | deriving (Typeable, Generic) 283 | instance Binary RegisteredName where 284 | instance NFData RegisteredName where 285 | 286 | instance Show RegisteredName where 287 | show (CustomRegister _) = "Custom Register" 288 | show (LocalName n) = n 289 | 290 | -- | Defines the way in which a child process is to be started. 291 | data ChildStart = 292 | RunClosure !(Closure (Process ())) 293 | | CreateHandle !(Closure (SupervisorPid -> Process (ChildPid, Message))) 294 | deriving (Typeable, Generic, Show) 295 | instance Binary ChildStart where 296 | instance NFData ChildStart where 297 | 298 | -- | Specification for a child process. The child must be uniquely identified 299 | -- by it's @childKey@ within the supervisor. The supervisor will start the child 300 | -- itself, therefore @childRun@ should contain the child process' implementation 301 | -- e.g., if the child is a long running server, this would be the server /loop/, 302 | -- as with e.g., @ManagedProces.start@. 303 | data ChildSpec = ChildSpec { 304 | childKey :: !ChildKey 305 | , childType :: !ChildType 306 | , childRestart :: !RestartPolicy 307 | , childRestartDelay :: !(Maybe TimeInterval) 308 | , childStop :: !ChildStopPolicy 309 | , childStart :: !ChildStart 310 | , childRegName :: !(Maybe RegisteredName) 311 | } deriving (Typeable, Generic, Show) 312 | instance Binary ChildSpec where 313 | instance NFData ChildSpec where 314 | 315 | -- | A child process failure during init will be reported using this datum 316 | data ChildInitFailure = 317 | ChildInitFailure !String -- ^ The init failed with the corresponding message 318 | | ChildInitIgnore -- ^ The child told the supervisor to ignore its startup procedure 319 | deriving (Typeable, Generic, Show) 320 | instance Binary ChildInitFailure where 321 | instance NFData ChildInitFailure where 322 | instance Exception ChildInitFailure where 323 | 324 | -- | Statistics about a running supervisor 325 | data SupervisorStats = SupervisorStats { 326 | _children :: Int 327 | , _supervisors :: Int 328 | , _workers :: Int 329 | , _running :: Int 330 | , _activeSupervisors :: Int 331 | , _activeWorkers :: Int 332 | -- TODO: usage/restart/freq stats 333 | , totalRestarts :: Int 334 | } deriving (Typeable, Generic, Show) 335 | instance Binary SupervisorStats where 336 | instance NFData SupervisorStats where 337 | 338 | -- | Supervisor event data published to the management API 339 | data MxSupervisor = 340 | SupervisorBranchRestarted 341 | { 342 | supervisorPid :: SupervisorPid 343 | , childSpecKey :: ChildKey 344 | , diedReason :: DiedReason 345 | , branchStrategy :: RestartStrategy 346 | } -- ^ A branch restart took place 347 | | SupervisedChildRestarting 348 | { supervisorPid :: SupervisorPid 349 | , childInScope :: Maybe ChildPid 350 | , childSpecKey :: ChildKey 351 | , exitReason :: ExitReason 352 | } -- ^ A child is being restarted 353 | | SupervisedChildStarted 354 | { supervisorPid :: SupervisorPid 355 | , childRef :: ChildRef 356 | , childSpecKey :: ChildKey 357 | } -- ^ A child has been started 358 | | SupervisedChildStartFailure 359 | { supervisorPid :: SupervisorPid 360 | , startFailure :: StartFailure 361 | , childSpecKey :: ChildKey 362 | } -- ^ A child failed to start 363 | | SupervisedChildDied 364 | { supervisorPid :: SupervisorPid 365 | , childPid :: ChildPid 366 | , exitReason :: ExitReason 367 | } -- ^ A child process death was detected 368 | | SupervisedChildInitFailed 369 | { supervisorPid :: SupervisorPid 370 | , childPid :: ChildPid 371 | , initFailure :: ChildInitFailure 372 | } -- ^ A child failed during init 373 | | SupervisedChildStopped 374 | { supervisorPid :: SupervisorPid 375 | , childRef :: ChildRef 376 | , diedReason :: DiedReason 377 | } -- ^ A child has been stopped 378 | | SupervisorShutdown 379 | { supervisorPid :: SupervisorPid 380 | , shutdownMode :: ShutdownMode 381 | , exitRason :: ExitReason 382 | } -- ^ A supervisor is shutting down 383 | deriving (Typeable, Generic, Show) 384 | instance Binary MxSupervisor where 385 | instance NFData MxSupervisor where 386 | 387 | -- | Static labels (in the remote table) are strings. 388 | type StaticLabel = String 389 | 390 | -- | Provides failure information when (re-)start failure is indicated. 391 | data StartFailure = 392 | StartFailureDuplicateChild !ChildRef -- ^ a child with this 'ChildKey' already exists 393 | | StartFailureAlreadyRunning !ChildRef -- ^ the child is already up and running 394 | | StartFailureBadClosure !StaticLabel -- ^ a closure cannot be resolved 395 | | StartFailureDied !DiedReason -- ^ a child died (almost) immediately on starting 396 | deriving (Typeable, Generic, Show, Eq) 397 | instance Binary StartFailure where 398 | instance NFData StartFailure where 399 | 400 | -- | The result of a call to 'removeChild'. 401 | data DeleteChildResult = 402 | ChildDeleted -- ^ the child specification was successfully removed 403 | | ChildNotFound -- ^ the child specification was not found 404 | | ChildNotStopped !ChildRef -- ^ the child was not removed, as it was not stopped. 405 | deriving (Typeable, Generic, Show, Eq) 406 | instance Binary DeleteChildResult where 407 | instance NFData DeleteChildResult where 408 | 409 | -- | A child represented as a @(ChildRef, ChildSpec)@ pair. 410 | type Child = (ChildRef, ChildSpec) 411 | 412 | -- exported result types of internal APIs 413 | 414 | -- | The result of an @addChild@ request. 415 | data AddChildResult = 416 | ChildAdded !ChildRef -- ^ The child was added correctly 417 | | ChildFailedToStart !StartFailure -- ^ The child failed to start 418 | deriving (Typeable, Generic, Show, Eq) 419 | instance Binary AddChildResult where 420 | instance NFData AddChildResult where 421 | 422 | -- | The result of a @startChild@ request. 423 | data StartChildResult = 424 | ChildStartOk !ChildRef -- ^ The child started successfully 425 | | ChildStartFailed !StartFailure -- ^ The child failed to start 426 | | ChildStartUnknownId -- ^ The child key was not recognised by the supervisor 427 | deriving (Typeable, Generic, Show, Eq) 428 | instance Binary StartChildResult where 429 | instance NFData StartChildResult where 430 | 431 | -- | The result of a @restartChild@ request. 432 | data RestartChildResult = 433 | ChildRestartOk !ChildRef -- ^ The child restarted successfully 434 | | ChildRestartFailed !StartFailure -- ^ The child failed to restart 435 | | ChildRestartUnknownId -- ^ The child key was not recognised by the supervisor 436 | deriving (Typeable, Generic, Show, Eq) 437 | 438 | instance Binary RestartChildResult where 439 | instance NFData RestartChildResult where 440 | 441 | -- | The result of a @stopChild@ request. 442 | data StopChildResult = 443 | StopChildOk -- ^ The child was stopped successfully 444 | | StopChildUnknownId -- ^ The child key was not recognised by the supervisor 445 | deriving (Typeable, Generic, Show, Eq) 446 | instance Binary StopChildResult where 447 | instance NFData StopChildResult where 448 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-11.13 2 | 3 | packages: 4 | - '.' 5 | 6 | extra-deps: 7 | - distributed-process-async-0.2.6 8 | - distributed-process-extras-0.3.5 9 | - distributed-process-client-server-0.2.5.1 10 | - rematch-0.2.0.0 11 | -------------------------------------------------------------------------------- /test-report.hs: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | HPC_DIR=dist/hpc 4 | 5 | cabal-dev clean 6 | cabal-dev configure --enable-tests --enable-library-coverage 7 | cabal-dev build 8 | cabal-dev test 9 | 10 | open ${HPC_DIR}/html/*/hpc-index.html 11 | -------------------------------------------------------------------------------- /tests/TestSupervisor.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables #-} 2 | {-# LANGUAGE TemplateHaskell #-} 3 | {-# LANGUAGE RecordWildCards #-} 4 | {-# LANGUAGE PatternGuards #-} 5 | {-# LANGUAGE Rank2Types #-} 6 | 7 | -- NOTICE: Some of these tests are /unsafe/, and will fail intermittently, since 8 | -- they rely on ordering constraints which the Cloud Haskell runtime does not 9 | -- guarantee. 10 | 11 | module Main where 12 | 13 | import Control.Concurrent.MVar 14 | ( MVar 15 | , newMVar 16 | , putMVar 17 | , takeMVar 18 | ) 19 | import qualified Control.Exception as Ex 20 | import Control.Exception (throwIO) 21 | import Control.Distributed.Process hiding (call, monitor, finally) 22 | import Control.Distributed.Process.Closure 23 | import Control.Distributed.Process.Node 24 | import Control.Distributed.Process.Extras.Internal.Types 25 | import Control.Distributed.Process.Extras.Internal.Primitives 26 | import Control.Distributed.Process.Extras.SystemLog 27 | ( LogLevel(Debug) 28 | , systemLogFile 29 | , addFormatter 30 | , debug 31 | , logChannel 32 | ) 33 | import Control.Distributed.Process.Extras.Time 34 | import Control.Distributed.Process.Extras.Timer 35 | import Control.Distributed.Process.Supervisor hiding (start, shutdown) 36 | import qualified Control.Distributed.Process.Supervisor as Supervisor 37 | import Control.Distributed.Process.Supervisor.Management 38 | ( MxSupervisor(..) 39 | , monitorSupervisor 40 | , unmonitorSupervisor 41 | , supervisionMonitor 42 | ) 43 | import Control.Distributed.Process.ManagedProcess.Client (shutdown) 44 | import Control.Distributed.Process.Serializable() 45 | 46 | import Control.Distributed.Static (staticLabel) 47 | import Control.Monad (void, unless, forM_, forM) 48 | import Control.Monad.Catch (finally) 49 | import Control.Rematch 50 | ( equalTo 51 | , is 52 | , isNot 53 | , isNothing 54 | , isJust 55 | ) 56 | 57 | import Data.ByteString.Lazy (empty) 58 | import Data.Maybe (catMaybes) 59 | 60 | #if !MIN_VERSION_base(4,6,0) 61 | import Prelude hiding (catch) 62 | #endif 63 | 64 | import Test.HUnit (Assertion, assertFailure) 65 | import Test.Framework (Test, testGroup) 66 | import Test.Framework.Providers.HUnit (testCase) 67 | import TestUtils hiding (waitForExit) 68 | import qualified Network.Transport as NT 69 | 70 | import System.Random (mkStdGen, randomR) 71 | -- test utilities 72 | 73 | expectedExitReason :: ProcessId -> String 74 | expectedExitReason sup = "killed-by=" ++ (show sup) ++ 75 | ",reason=StoppedBySupervisor" 76 | 77 | defaultWorker :: ChildStart -> ChildSpec 78 | defaultWorker clj = 79 | ChildSpec 80 | { 81 | childKey = "" 82 | , childType = Worker 83 | , childRestart = Temporary 84 | , childRestartDelay = Nothing 85 | , childStop = StopImmediately 86 | , childStart = clj 87 | , childRegName = Nothing 88 | } 89 | 90 | tempWorker :: ChildStart -> ChildSpec 91 | tempWorker clj = 92 | (defaultWorker clj) 93 | { 94 | childKey = "temp-worker" 95 | , childRestart = Temporary 96 | } 97 | 98 | transientWorker :: ChildStart -> ChildSpec 99 | transientWorker clj = 100 | (defaultWorker clj) 101 | { 102 | childKey = "transient-worker" 103 | , childRestart = Transient 104 | } 105 | 106 | intrinsicWorker :: ChildStart -> ChildSpec 107 | intrinsicWorker clj = 108 | (defaultWorker clj) 109 | { 110 | childKey = "intrinsic-worker" 111 | , childRestart = Intrinsic 112 | } 113 | 114 | permChild :: ChildStart -> ChildSpec 115 | permChild clj = 116 | (defaultWorker clj) 117 | { 118 | childKey = "perm-child" 119 | , childRestart = Permanent 120 | } 121 | 122 | ensureProcessIsAlive :: ProcessId -> Process () 123 | ensureProcessIsAlive pid = do 124 | result <- isProcessAlive pid 125 | expectThat result $ is True 126 | 127 | runInTestContext :: LocalNode 128 | -> MVar () 129 | -> ShutdownMode 130 | -> RestartStrategy 131 | -> [ChildSpec] 132 | -> (ProcessId -> Process ()) 133 | -> Assertion 134 | runInTestContext node lock sm rs cs proc = do 135 | Ex.bracket (takeMVar lock) (putMVar lock) $ \() -> runProcess node $ do 136 | sup <- Supervisor.start rs sm cs 137 | (proc sup) `finally` (exit sup ExitShutdown) 138 | 139 | data Context = Context { sup :: SupervisorPid 140 | , sniffer :: Sniffer 141 | , waitTimeout :: TimeInterval 142 | , listSize :: Int 143 | , split :: forall a . ([a] -> ([a], [a])) 144 | } 145 | type Sniffer = ReceivePort MxSupervisor 146 | 147 | mkRandom :: Int -> Int -> (Int, Int) 148 | mkRandom minListSz maxListSz 149 | | minListSz > maxListSz = error "nope" 150 | | minListSz < 20 = mkRandom 20 maxListSz 151 | | otherwise = 152 | let gen = mkStdGen 273846 153 | (lSz :: Int, gen') = randomR (minListSz, maxListSz) gen 154 | (sPt :: Int, _) = randomR (max 3 (round((fromIntegral lSz) / 3.15 :: Double) :: Int), lSz - 3) gen' 155 | in (lSz, sPt) 156 | 157 | randomIshSizes :: (Int, Int) 158 | randomIshSizes = mkRandom 20 1200 159 | 160 | runInTestContext' :: LocalNode 161 | -> ShutdownMode 162 | -> RestartStrategy 163 | -> [ChildSpec] 164 | -> (Context -> Process ()) 165 | -> Assertion 166 | runInTestContext' node sm rs cs proc = do 167 | liftIO $ do 168 | -- we don't care about real randomness, just about selecting a vaguely 169 | -- different sizes for each run... 170 | let (lSz, sPt) = randomIshSizes 171 | runProcess node $ do 172 | sup <- Supervisor.start rs sm cs 173 | sf <- monitorSupervisor sup 174 | finally (proc $ Context sup sf (seconds 30) lSz (splitAt sPt)) 175 | (exit sup ExitShutdown >> unmonitorSupervisor sup) 176 | 177 | verifyChildWasRestarted :: ChildKey -> ProcessId -> ProcessId -> Process () 178 | verifyChildWasRestarted key pid sup = do 179 | void $ waitForExit pid 180 | cSpec <- lookupChild sup key 181 | -- TODO: handle (ChildRestarting _) too! 182 | case cSpec of 183 | Just (ref, _) -> do Just pid' <- resolve ref 184 | expectThat pid' $ isNot $ equalTo pid 185 | _ -> do 186 | liftIO $ assertFailure $ "unexpected child ref: " ++ (show (key, cSpec)) 187 | 188 | verifyChildWasNotRestarted :: ChildKey -> ProcessId -> ProcessId -> Process () 189 | verifyChildWasNotRestarted key pid sup = do 190 | void $ waitForExit pid 191 | cSpec <- lookupChild sup key 192 | case cSpec of 193 | Just (ChildStopped, _) -> return () 194 | _ -> liftIO $ assertFailure $ "unexpected child ref: " ++ (show (key, cSpec)) 195 | 196 | verifyTempChildWasRemoved :: ProcessId -> ProcessId -> Process () 197 | verifyTempChildWasRemoved pid sup = do 198 | void $ waitForExit pid 199 | sleepFor 500 Millis 200 | cSpec <- lookupChild sup "temp-worker" 201 | expectThat cSpec isNothing 202 | 203 | waitForExit :: ProcessId -> Process DiedReason 204 | waitForExit pid = do 205 | monitor pid >>= waitForDown 206 | 207 | waitForDown :: Maybe MonitorRef -> Process DiedReason 208 | waitForDown Nothing = error "invalid mref" 209 | waitForDown (Just ref) = 210 | receiveWait [ matchIf (\(ProcessMonitorNotification ref' _ _) -> ref == ref') 211 | (\(ProcessMonitorNotification _ _ dr) -> return dr) ] 212 | 213 | waitForBranchRestartComplete :: Sniffer 214 | -> ChildKey 215 | -> Process () 216 | waitForBranchRestartComplete sniff key = do 217 | debug logChannel $ "waiting for branch restart..." 218 | aux 10000 sniff Nothing -- `finally` unmonitorSupervisor sup 219 | where 220 | aux :: Int -> Sniffer -> Maybe MxSupervisor -> Process () 221 | aux n s m 222 | | n < 1 = liftIO $ assertFailure $ "Never Saw Branch Restarted for " ++ (show key) 223 | | Just mx <- m 224 | , SupervisorBranchRestarted{..} <- mx 225 | , childSpecKey == key = return () 226 | | Nothing <- m = receiveTimeout 100 [ matchChan s return ] >>= aux (n-1) s 227 | | otherwise = aux (n-1) s Nothing 228 | 229 | verifySingleRestart :: Context 230 | -> ChildKey 231 | -> Process () 232 | verifySingleRestart Context{..} key = do 233 | sleep $ seconds 1 234 | let t = asTimeout waitTimeout 235 | mx <- receiveChanTimeout t sniffer 236 | case mx of 237 | Just rs@SupervisedChildRestarting{} -> do 238 | (childSpecKey rs) `shouldBe` equalTo key 239 | mx' <- receiveChanTimeout t sniffer 240 | case mx' of 241 | Just cs@SupervisedChildStarted{} -> do 242 | (childSpecKey cs) `shouldBe` equalTo key 243 | debug logChannel $ "restart ok for " ++ (show cs) 244 | _ -> liftIO $ assertFailure $ " Unexpected Waiting Child Started " ++ (show mx') 245 | _ -> liftIO $ assertFailure $ "Unexpected Waiting Child Restarted " ++ (show mx) 246 | 247 | verifySeqStartOrder :: Context 248 | -> [(ChildRef, Child)] 249 | -> ChildKey 250 | -> Process () 251 | verifySeqStartOrder Context{..} xs toStop = do 252 | -- xs == [(oldRef, (ref, spec))] in specified/insertion order 253 | -- if shutdown is LeftToRight then that's correct, otherwise we 254 | -- should expect the shutdowns in reverse order 255 | sleep $ seconds 1 256 | let t = asTimeout waitTimeout 257 | forM_ xs $ \(oCr, c@(cr, cs)) -> do 258 | debug logChannel $ "checking restart " ++ (show c) 259 | mx <- receiveTimeout t [ matchChan sniffer return ] 260 | case mx of 261 | Just SupervisedChildRestarting{..} -> do 262 | debug logChannel $ "for restart " ++ (show childSpecKey) ++ " we're expecting " ++ (childKey cs) 263 | childSpecKey `shouldBe` equalTo (childKey cs) 264 | unless (childSpecKey == toStop) $ do 265 | Just SupervisedChildStopped{..} <- receiveChanTimeout t sniffer 266 | debug logChannel $ "for " ++ (show childRef) ++ " we're expecting " ++ (show oCr) 267 | childRef `shouldBe` equalTo oCr 268 | mx' <- receiveChanTimeout t sniffer 269 | case mx' of 270 | Just SupervisedChildStarted{..} -> childRef `shouldBe` equalTo cr 271 | _ -> do 272 | liftIO $ assertFailure $ "After Stopping " ++ (show cs) ++ 273 | " received unexpected " ++ (show mx) 274 | _ -> liftIO $ assertFailure $ "Bad Restart: " ++ (show mx) 275 | 276 | verifyStopStartOrder :: Context 277 | -> [(ChildRef, Child)] 278 | -> [Child] 279 | -> ChildKey 280 | -> Process () 281 | verifyStopStartOrder Context{..} xs restarted toStop = do 282 | -- xs == [(oldRef, (ref, spec))] in specified/insertion order 283 | -- if shutdown is LeftToRight then that's correct, otherwise we 284 | -- should expect the shutdowns in reverse order 285 | sleep $ seconds 1 286 | let t = asTimeout waitTimeout 287 | forM_ xs $ \(oCr, c@(_, cs)) -> do 288 | debug logChannel $ "checking restart " ++ (show c) 289 | mx <- receiveTimeout t [ matchChan sniffer return ] 290 | case mx of 291 | Just SupervisedChildRestarting{..} -> do 292 | debug logChannel $ "for restart " ++ (show childSpecKey) ++ " we're expecting " ++ (childKey cs) 293 | childSpecKey `shouldBe` equalTo (childKey cs) 294 | if childSpecKey /= toStop 295 | then do Just SupervisedChildStopped{..} <- receiveChanTimeout t sniffer 296 | debug logChannel $ "for " ++ (show childRef) ++ " we're expecting " ++ (show oCr) 297 | -- childRef `shouldBe` equalTo oCr 298 | if childRef /= oCr 299 | then debug logChannel $ "childRef " ++ (show childRef) ++ " /= " ++ (show oCr) 300 | else return () 301 | else return () 302 | _ -> liftIO $ assertFailure $ "Bad Restart: " ++ (show mx) 303 | 304 | debug logChannel "checking start order..." 305 | sleep $ seconds 1 306 | forM_ restarted $ \(cr, _) -> do 307 | debug logChannel $ "checking (reverse) start order for " ++ (show cr) 308 | mx <- receiveTimeout t [ matchChan sniffer return ] 309 | case mx of 310 | Just SupervisedChildStarted{..} -> childRef `shouldBe` equalTo cr 311 | _ -> liftIO $ assertFailure $ "Bad Child Start: " ++ (show mx) 312 | 313 | checkStartupOrder :: Context -> [Child] -> Process () 314 | checkStartupOrder Context{..} children = do 315 | -- assert that we saw the startup sequence working... 316 | forM_ children $ \(cr, _) -> do 317 | debug logChannel $ "checking " ++ (show cr) 318 | mx <- receiveTimeout (asTimeout waitTimeout) [ matchChan sniffer return ] 319 | case mx of 320 | Just SupervisedChildStarted{..} -> childRef `shouldBe` equalTo cr 321 | _ -> liftIO $ assertFailure $ "Bad Child Start: " ++ (show mx) 322 | 323 | exitIgnore :: Process () 324 | exitIgnore = liftIO $ throwIO ChildInitIgnore 325 | 326 | noOp :: Process () 327 | noOp = return () 328 | 329 | blockIndefinitely :: Process () 330 | blockIndefinitely = runTestProcess noOp 331 | 332 | notifyMe :: ProcessId -> Process () 333 | notifyMe me = getSelfPid >>= send me >> obedient 334 | 335 | sleepy :: Process () 336 | sleepy = (sleepFor 5 Minutes) 337 | `catchExit` (\_ (_ :: ExitReason) -> return ()) >> sleepy 338 | 339 | obedient :: Process () 340 | obedient = (sleepFor 5 Minutes) 341 | {- supervisor inserts handlers that act like we wrote: 342 | `catchExit` (\_ (r :: ExitReason) -> do 343 | case r of 344 | ExitShutdown -> return () 345 | _ -> die r) 346 | -} 347 | 348 | runCore :: SendPort () -> Process () 349 | runCore sp = (expect >>= say) `catchExit` (\_ ExitShutdown -> sendChan sp ()) 350 | 351 | runApp :: SendPort () -> Process () 352 | runApp sg = do 353 | Just pid <- whereis "core" 354 | link pid -- if the real "core" exits first, we go too 355 | sendChan sg () 356 | expect >>= say 357 | 358 | formatMxSupervisor :: Message -> Process (Maybe String) 359 | formatMxSupervisor msg = do 360 | m <- unwrapMessage msg :: Process (Maybe MxSupervisor) 361 | case m of 362 | Nothing -> return Nothing 363 | Just m' -> return $ Just (show m') 364 | 365 | $(remotable [ 'exitIgnore 366 | , 'noOp 367 | , 'blockIndefinitely 368 | , 'sleepy 369 | , 'obedient 370 | , 'notifyMe 371 | , 'runCore 372 | , 'runApp 373 | , 'formatMxSupervisor ]) 374 | 375 | -- test cases start here... 376 | 377 | normalStartStop :: ProcessId -> Process () 378 | normalStartStop sup = do 379 | ensureProcessIsAlive sup 380 | void $ monitor sup 381 | shutdown sup 382 | sup `shouldExitWith` DiedNormal 383 | 384 | sequentialShutdown :: TestResult (Maybe ()) -> Process () 385 | sequentialShutdown result = do 386 | (sp, rp) <- newChan 387 | (sg, rg) <- newChan 388 | 389 | core' <- toChildStart $ $(mkClosure 'runCore) sp 390 | app' <- toChildStart $ $(mkClosure 'runApp) sg 391 | let core = (permChild core') { childRegName = Just (LocalName "core") 392 | , childStop = StopTimeout (Delay $ within 2 Seconds) 393 | , childKey = "child-1" 394 | } 395 | let app = (permChild app') { childRegName = Just (LocalName "app") 396 | , childStop = StopTimeout (Delay $ within 2 Seconds) 397 | , childKey = "child-2" 398 | } 399 | 400 | sup <- Supervisor.start restartRight 401 | (SequentialShutdown RightToLeft) 402 | [core, app] 403 | 404 | () <- receiveChan rg 405 | exit sup ExitShutdown 406 | res <- receiveChanTimeout (asTimeout $ seconds 5) rp 407 | stash result res 408 | 409 | configuredTemporaryChildExitsWithIgnore :: 410 | ChildStart 411 | -> (RestartStrategy -> [ChildSpec] -> (ProcessId -> Process ()) -> Assertion) 412 | -> Assertion 413 | configuredTemporaryChildExitsWithIgnore cs withSupervisor = 414 | let spec = tempWorker cs in do 415 | withSupervisor restartOne [spec] verifyExit 416 | where 417 | verifyExit :: ProcessId -> Process () 418 | verifyExit sup = do 419 | child <- lookupChild sup "temp-worker" 420 | case child of 421 | Nothing -> return () -- the child exited and was removed ok 422 | Just (ref, _) -> do 423 | Just pid <- resolve ref 424 | verifyTempChildWasRemoved pid sup 425 | 426 | configuredNonTemporaryChildExitsWithIgnore :: 427 | ChildStart 428 | -> (RestartStrategy -> [ChildSpec] -> (ProcessId -> Process ()) -> Assertion) 429 | -> Assertion 430 | configuredNonTemporaryChildExitsWithIgnore cs withSupervisor = 431 | let spec = transientWorker cs in do 432 | withSupervisor restartOne [spec] $ verifyExit spec 433 | where 434 | verifyExit :: ChildSpec -> ProcessId -> Process () 435 | verifyExit spec sup = do 436 | sleep $ milliSeconds 100 -- make sure our super has seen the EXIT signal 437 | child <- lookupChild sup (childKey spec) 438 | case child of 439 | Nothing -> liftIO $ assertFailure $ "lost non-temp spec!" 440 | Just (ref, spec') -> do 441 | rRef <- resolve ref 442 | maybe (return DiedNormal) waitForExit rRef 443 | cSpec <- lookupChild sup (childKey spec') 444 | case cSpec of 445 | Just (ChildStartIgnored, _) -> return () 446 | _ -> do 447 | liftIO $ assertFailure $ "unexpected lookup: " ++ (show cSpec) 448 | 449 | startTemporaryChildExitsWithIgnore :: ChildStart -> ProcessId -> Process () 450 | startTemporaryChildExitsWithIgnore cs sup = 451 | -- if a temporary child exits with "ignore" then we must 452 | -- have deleted its specification from the supervisor 453 | let spec = tempWorker cs in do 454 | ChildAdded ref <- startNewChild sup spec 455 | Just pid <- resolve ref 456 | verifyTempChildWasRemoved pid sup 457 | 458 | startNonTemporaryChildExitsWithIgnore :: ChildStart -> ProcessId -> Process () 459 | startNonTemporaryChildExitsWithIgnore cs sup = 460 | let spec = transientWorker cs in do 461 | ChildAdded ref <- startNewChild sup spec 462 | Just pid <- resolve ref 463 | void $ waitForExit pid 464 | sleep $ milliSeconds 250 465 | cSpec <- lookupChild sup (childKey spec) 466 | case cSpec of 467 | Just (ChildStartIgnored, _) -> return () 468 | _ -> do 469 | liftIO $ assertFailure $ "unexpected lookup: " ++ (show cSpec) 470 | 471 | addChildWithoutRestart :: ChildStart -> ProcessId -> Process () 472 | addChildWithoutRestart cs sup = 473 | let spec = transientWorker cs in do 474 | response <- addChild sup spec 475 | response `shouldBe` equalTo (ChildAdded ChildStopped) 476 | 477 | addChildThenStart :: ChildStart -> ProcessId -> Process () 478 | addChildThenStart cs sup = 479 | let spec = transientWorker cs in do 480 | (ChildAdded _) <- addChild sup spec 481 | response <- startChild sup (childKey spec) 482 | case response of 483 | ChildStartOk (ChildRunning pid) -> do 484 | alive <- isProcessAlive pid 485 | alive `shouldBe` equalTo True 486 | _ -> do 487 | liftIO $ putStrLn (show response) 488 | die "Ooops" 489 | 490 | startUnknownChild :: ChildStart -> ProcessId -> Process () 491 | startUnknownChild cs sup = do 492 | response <- startChild sup (childKey (transientWorker cs)) 493 | response `shouldBe` equalTo ChildStartUnknownId 494 | 495 | setupChild :: ChildStart -> ProcessId -> Process (ChildRef, ChildSpec) 496 | setupChild cs sup = do 497 | let spec = transientWorker cs 498 | response <- addChild sup spec 499 | response `shouldBe` equalTo (ChildAdded ChildStopped) 500 | Just child <- lookupChild sup "transient-worker" 501 | return child 502 | 503 | addDuplicateChild :: ChildStart -> ProcessId -> Process () 504 | addDuplicateChild cs sup = do 505 | (ref, spec) <- setupChild cs sup 506 | dup <- addChild sup spec 507 | dup `shouldBe` equalTo (ChildFailedToStart $ StartFailureDuplicateChild ref) 508 | 509 | startDuplicateChild :: ChildStart -> ProcessId -> Process () 510 | startDuplicateChild cs sup = do 511 | (ref, spec) <- setupChild cs sup 512 | dup <- startNewChild sup spec 513 | dup `shouldBe` equalTo (ChildFailedToStart $ StartFailureDuplicateChild ref) 514 | 515 | startBadClosure :: ChildStart -> ProcessId -> Process () 516 | startBadClosure cs sup = do 517 | let spec = tempWorker cs 518 | child <- startNewChild sup spec 519 | child `shouldBe` equalTo 520 | (ChildFailedToStart $ StartFailureBadClosure 521 | "user error (Could not resolve closure: Invalid static label 'non-existing')") 522 | 523 | -- configuredBadClosure withSupervisor = do 524 | -- let spec = permChild (closure (staticLabel "non-existing") empty) 525 | -- -- we make sure we don't hit the supervisor's limits 526 | -- let strategy = RestartOne $ limit (maxRestarts 500000000) (milliSeconds 1) 527 | -- withSupervisor strategy [spec] $ \sup -> do 528 | -- -- ref <- monitor sup 529 | -- children <- (listChildren sup) 530 | -- let specs = map fst children 531 | -- expectThat specs $ equalTo [] 532 | 533 | deleteExistingChild :: ChildStart -> ProcessId -> Process () 534 | deleteExistingChild cs sup = do 535 | let spec = transientWorker cs 536 | (ChildAdded ref) <- startNewChild sup spec 537 | result <- deleteChild sup "transient-worker" 538 | result `shouldBe` equalTo (ChildNotStopped ref) 539 | 540 | deleteStoppedTempChild :: ChildStart -> ProcessId -> Process () 541 | deleteStoppedTempChild cs sup = do 542 | let spec = tempWorker cs 543 | ChildAdded ref <- startNewChild sup spec 544 | Just pid <- resolve ref 545 | testProcessStop pid 546 | -- child needs to be stopped 547 | waitForExit pid 548 | result <- deleteChild sup (childKey spec) 549 | result `shouldBe` equalTo ChildNotFound 550 | 551 | deleteStoppedChild :: ChildStart -> ProcessId -> Process () 552 | deleteStoppedChild cs sup = do 553 | let spec = transientWorker cs 554 | ChildAdded ref <- startNewChild sup spec 555 | Just pid <- resolve ref 556 | testProcessStop pid 557 | -- child needs to be stopped 558 | waitForExit pid 559 | result <- deleteChild sup (childKey spec) 560 | result `shouldBe` equalTo ChildDeleted 561 | 562 | permanentChildrenAlwaysRestart :: ChildStart -> ProcessId -> Process () 563 | permanentChildrenAlwaysRestart cs sup = do 564 | let spec = permChild cs 565 | (ChildAdded ref) <- startNewChild sup spec 566 | Just pid <- resolve ref 567 | testProcessStop pid -- a normal stop should *still* trigger a restart 568 | verifyChildWasRestarted (childKey spec) pid sup 569 | 570 | temporaryChildrenNeverRestart :: ChildStart -> ProcessId -> Process () 571 | temporaryChildrenNeverRestart cs sup = do 572 | let spec = tempWorker cs 573 | (ChildAdded ref) <- startNewChild sup spec 574 | Just pid <- resolve ref 575 | kill pid "bye bye" 576 | verifyTempChildWasRemoved pid sup 577 | 578 | transientChildrenNormalExit :: ChildStart -> ProcessId -> Process () 579 | transientChildrenNormalExit cs sup = do 580 | let spec = transientWorker cs 581 | (ChildAdded ref) <- startNewChild sup spec 582 | Just pid <- resolve ref 583 | testProcessStop pid 584 | verifyChildWasNotRestarted (childKey spec) pid sup 585 | 586 | transientChildrenAbnormalExit :: ChildStart -> ProcessId -> Process () 587 | transientChildrenAbnormalExit cs sup = do 588 | let spec = transientWorker cs 589 | (ChildAdded ref) <- startNewChild sup spec 590 | Just pid <- resolve ref 591 | kill pid "bye bye" 592 | verifyChildWasRestarted (childKey spec) pid sup 593 | 594 | transientChildrenExitShutdown :: ChildStart -> Context -> Process () 595 | transientChildrenExitShutdown cs Context{..} = do 596 | let spec = transientWorker cs 597 | (ChildAdded ref) <- startNewChild sup spec 598 | 599 | Just _ <- receiveChanTimeout (asTimeout waitTimeout) sniffer :: Process (Maybe MxSupervisor) 600 | 601 | Just pid <- resolve ref 602 | mRef <- monitor pid 603 | exit pid ExitShutdown 604 | waitForDown mRef 605 | 606 | mx <- receiveChanTimeout 1000 sniffer :: Process (Maybe MxSupervisor) 607 | expectThat mx isNothing 608 | verifyChildWasNotRestarted (childKey spec) pid sup 609 | 610 | intrinsicChildrenAbnormalExit :: ChildStart -> ProcessId -> Process () 611 | intrinsicChildrenAbnormalExit cs sup = do 612 | let spec = intrinsicWorker cs 613 | ChildAdded ref <- startNewChild sup spec 614 | Just pid <- resolve ref 615 | kill pid "bye bye" 616 | verifyChildWasRestarted (childKey spec) pid sup 617 | 618 | intrinsicChildrenNormalExit :: ChildStart -> ProcessId -> Process () 619 | intrinsicChildrenNormalExit cs sup = do 620 | let spec = intrinsicWorker cs 621 | ChildAdded ref <- startNewChild sup spec 622 | Just pid <- resolve ref 623 | testProcessStop pid 624 | reason <- waitForExit sup 625 | expectThat reason $ equalTo DiedNormal 626 | 627 | explicitRestartRunningChild :: ChildStart -> ProcessId -> Process () 628 | explicitRestartRunningChild cs sup = do 629 | let spec = tempWorker cs 630 | ChildAdded ref <- startNewChild sup spec 631 | result <- restartChild sup (childKey spec) 632 | expectThat result $ equalTo $ ChildRestartFailed (StartFailureAlreadyRunning ref) 633 | 634 | explicitRestartUnknownChild :: ProcessId -> Process () 635 | explicitRestartUnknownChild sup = do 636 | result <- restartChild sup "unknown-id" 637 | expectThat result $ equalTo ChildRestartUnknownId 638 | 639 | explicitRestartRestartingChild :: ChildStart -> ProcessId -> Process () 640 | explicitRestartRestartingChild cs sup = do 641 | let spec = permChild cs 642 | ChildAdded _ <- startNewChild sup spec 643 | -- TODO: we've seen a few explosions here (presumably of the supervisor?) 644 | -- expecially when running with +RTS -N1 - it's possible that there's a bug 645 | -- tucked away that we haven't cracked just yet 646 | restarted <- (restartChild sup (childKey spec)) 647 | `catchExit` (\_ (r :: ExitReason) -> (liftIO $ putStrLn (show r)) >> 648 | die r) 649 | -- this is highly timing dependent, so we have to allow for both 650 | -- possible outcomes - on a dual core machine, the first clause 651 | -- will match approx. 1 / 200 times when running with +RTS -N 652 | case restarted of 653 | ChildRestartFailed (StartFailureAlreadyRunning (ChildRestarting _)) -> return () 654 | ChildRestartFailed (StartFailureAlreadyRunning (ChildRunning _)) -> return () 655 | other -> liftIO $ assertFailure $ "unexpected result: " ++ (show other) 656 | 657 | explicitRestartStoppedChild :: ChildStart -> ProcessId -> Process () 658 | explicitRestartStoppedChild cs sup = do 659 | let spec = transientWorker cs 660 | let key = childKey spec 661 | ChildAdded ref <- startNewChild sup spec 662 | void $ stopChild sup key 663 | restarted <- restartChild sup key 664 | sleepFor 500 Millis 665 | Just (ref', _) <- lookupChild sup key 666 | expectThat ref $ isNot $ equalTo ref' 667 | case restarted of 668 | ChildRestartOk (ChildRunning _) -> return () 669 | _ -> liftIO $ assertFailure $ "unexpected exit: " ++ (show restarted) 670 | 671 | stopChildImmediately :: ChildStart -> ProcessId -> Process () 672 | stopChildImmediately cs sup = do 673 | let spec = tempWorker cs 674 | ChildAdded ref <- startNewChild sup spec 675 | -- Just pid <- resolve ref 676 | mRef <- monitor ref 677 | void $ stopChild sup (childKey spec) 678 | reason <- waitForDown mRef 679 | expectThat reason $ equalTo $ DiedException (expectedExitReason sup) 680 | 681 | stoppingChildExceedsDelay :: ProcessId -> Process () 682 | stoppingChildExceedsDelay sup = do 683 | let spec = (tempWorker (RunClosure $(mkStaticClosure 'sleepy))) 684 | { childStop = StopTimeout (Delay $ within 500 Millis) } 685 | ChildAdded ref <- startNewChild sup spec 686 | -- Just pid <- resolve ref 687 | mRef <- monitor ref 688 | void $ stopChild sup (childKey spec) 689 | reason <- waitForDown mRef 690 | expectThat reason $ equalTo $ DiedException (expectedExitReason sup) 691 | 692 | stoppingChildObeysDelay :: ProcessId -> Process () 693 | stoppingChildObeysDelay sup = do 694 | let spec = (tempWorker (RunClosure $(mkStaticClosure 'obedient))) 695 | { childStop = StopTimeout (Delay $ within 1 Seconds) } 696 | ChildAdded child <- startNewChild sup spec 697 | Just pid <- resolve child 698 | void $ monitor pid 699 | void $ stopChild sup (childKey spec) 700 | child `shouldExitWith` DiedNormal 701 | 702 | restartAfterThreeAttempts :: 703 | ChildStart 704 | -> (RestartStrategy -> [ChildSpec] -> (ProcessId -> Process ()) -> Assertion) 705 | -> Assertion 706 | restartAfterThreeAttempts cs withSupervisor = do 707 | let spec = permChild cs 708 | let strategy = RestartOne $ limit (maxRestarts 500) (seconds 2) 709 | withSupervisor strategy [spec] $ \sup -> do 710 | mapM_ (\_ -> do 711 | [(childRef, _)] <- listChildren sup 712 | Just pid <- resolve childRef 713 | ref <- monitor pid 714 | testProcessStop pid 715 | void $ waitForDown ref) [1..3 :: Int] 716 | [(_, _)] <- listChildren sup 717 | return () 718 | 719 | delayedRestartAfterThreeAttempts :: 720 | (RestartStrategy -> [ChildSpec] -> (Context -> Process ()) -> Assertion) 721 | -> Assertion 722 | delayedRestartAfterThreeAttempts withSupervisor = do 723 | let spec = (permChild $ RunClosure $ $(mkStaticClosure 'blockIndefinitely)) 724 | { childRestartDelay = Just (seconds 3) } 725 | let strategy = RestartOne $ limit (maxRestarts 2) (seconds 2) 726 | withSupervisor strategy [spec] $ \ctx@Context{..} -> do 727 | mapM_ (\_ -> do 728 | [(childRef, _)] <- listChildren sup 729 | Just pid <- resolve childRef 730 | ref <- monitor pid 731 | testProcessStop pid 732 | void $ waitForDown ref) [1..3 :: Int] 733 | 734 | Just (ref, _) <- lookupChild sup $ childKey spec 735 | case ref of 736 | ChildRestarting _ -> do 737 | SupervisedChildStarted{..} <- receiveChan sniffer 738 | childSpecKey `shouldBe` equalTo (childKey spec) 739 | _ -> liftIO $ assertFailure $ "Unexpected ChildRef: " ++ (show ref) 740 | 741 | mapM_ (const $ verifySingleRestart ctx (childKey spec)) [1..3 :: Int] 742 | 743 | [(ref', _)] <- listChildren sup 744 | Just pid <- resolve ref' 745 | mRef <- monitor pid 746 | testProcessStop pid 747 | void $ waitForDown mRef 748 | 749 | permanentChildExceedsRestartsIntensity :: 750 | ChildStart 751 | -> (RestartStrategy -> [ChildSpec] -> (ProcessId -> Process ()) -> Assertion) 752 | -> Assertion 753 | permanentChildExceedsRestartsIntensity cs withSupervisor = do 754 | let spec = permChild cs -- child that exits immediately 755 | let strategy = RestartOne $ limit (maxRestarts 50) (seconds 2) 756 | withSupervisor strategy [spec] $ \sup -> do 757 | ref <- monitor sup 758 | -- if the supervisor dies whilst the call is in-flight, 759 | -- *this* process will exit, therefore we handle that exit reason 760 | void $ ((startNewChild sup spec >> return ()) 761 | `catchExit` (\_ (_ :: ExitReason) -> return ())) 762 | reason <- waitForDown ref 763 | expectThat reason $ equalTo $ 764 | DiedException $ "exit-from=" ++ (show sup) ++ 765 | ",reason=ReachedMaxRestartIntensity" 766 | 767 | stopChildIgnoresSiblings :: 768 | ChildStart 769 | -> (RestartStrategy -> [ChildSpec] -> (ProcessId -> Process ()) -> Assertion) 770 | -> Assertion 771 | stopChildIgnoresSiblings cs withSupervisor = do 772 | let templ = permChild cs 773 | let specs = [templ { childKey = (show i) } | i <- [1..3 :: Int]] 774 | withSupervisor restartAll specs $ \sup -> do 775 | let toStop = childKey $ head specs 776 | Just (ref, _) <- lookupChild sup toStop 777 | mRef <- monitor ref 778 | stopChild sup toStop 779 | waitForDown mRef 780 | children <- listChildren sup 781 | forM_ (tail $ map fst children) $ \cRef -> do 782 | maybe (error "invalid ref") ensureProcessIsAlive =<< resolve cRef 783 | 784 | restartAllWithLeftToRightSeqRestarts :: 785 | ChildStart 786 | -> (RestartStrategy -> [ChildSpec] -> (Context -> Process ()) -> Assertion) 787 | -> Assertion 788 | restartAllWithLeftToRightSeqRestarts cs withSupervisor = do 789 | let (sz, _) = randomIshSizes 790 | let templ = permChild cs 791 | let specs = [templ { childKey = (show i) } | i <- [1..sz :: Int]] 792 | withSupervisor restartAll specs $ \Context{..} -> do 793 | let toStop = childKey $ head specs 794 | Just (ref, _) <- lookupChild sup toStop 795 | children <- listChildren sup 796 | Just pid <- resolve ref 797 | kill pid "goodbye" 798 | forM_ (map fst children) $ \cRef -> monitor cRef >>= waitForDown 799 | forM_ (map snd children) $ \cSpec -> do 800 | Just (ref', _) <- lookupChild sup (childKey cSpec) 801 | maybe (error "invalid ref") ensureProcessIsAlive =<< resolve ref' 802 | 803 | restartLeftWithLeftToRightSeqRestarts :: 804 | ChildStart 805 | -> (RestartStrategy -> [ChildSpec] -> (Context -> Process ()) -> Assertion) 806 | -> Assertion 807 | restartLeftWithLeftToRightSeqRestarts cs withSupervisor = do 808 | let (lSz, sptSz) = randomIshSizes 809 | let templ = permChild cs 810 | let specs = [templ { childKey = (show i) } | i <- [1..lSz :: Int]] 811 | withSupervisor restartLeft specs $ \ctx@Context{..} -> do 812 | 813 | children <- listChildren sup 814 | checkStartupOrder ctx children 815 | 816 | sniff <- monitorSupervisor sup 817 | 818 | let (toRestart, _notToRestart) = splitAt sptSz specs 819 | let (restarts, survivors) = splitAt sptSz children 820 | let toStop = childKey $ last toRestart 821 | Just (ref, _) <- lookupChild sup toStop 822 | Just pid <- resolve ref 823 | kill pid "goodbye" 824 | 825 | forM_ (map fst restarts) $ \cRef -> monitor cRef >>= waitForDown 826 | 827 | -- NB: this uses a separate channel to consume the Mx events... 828 | waitForBranchRestartComplete sniff toStop 829 | 830 | children' <- listChildren sup 831 | let (restarted', _) = splitAt sptSz children' 832 | let xs = zip [fst o | o <- restarts] restarted' 833 | verifySeqStartOrder ctx xs toStop 834 | 835 | forM_ (map snd children') $ \cSpec -> do 836 | Just (ref', _) <- lookupChild sup (childKey cSpec) 837 | maybe (error "invalid ref") ensureProcessIsAlive =<< resolve ref' 838 | 839 | resolved <- forM (map fst survivors) resolve 840 | let possibleBadRestarts = catMaybes resolved 841 | r <- receiveTimeout (after 5 Seconds) [ 842 | match (\(ProcessMonitorNotification _ pid' _) -> do 843 | case (elem pid' possibleBadRestarts) of 844 | True -> liftIO $ assertFailure $ "unexpected exit from " ++ show pid' 845 | False -> return ()) 846 | ] 847 | expectThat r isNothing 848 | 849 | restartRightWithLeftToRightSeqRestarts :: 850 | ChildStart 851 | -> (RestartStrategy -> [ChildSpec] -> (ProcessId -> Process ()) -> Assertion) 852 | -> Assertion 853 | restartRightWithLeftToRightSeqRestarts cs withSupervisor = do 854 | let (lSz, sptSz) = mkRandom 150 688 855 | let templ = permChild cs 856 | let specs = [templ { childKey = (show i) } | i <- [1..lSz :: Int]] 857 | withSupervisor restartRight specs $ \sup -> do 858 | let (_notToRestart, toRestart) = splitAt sptSz specs 859 | let toStop = childKey $ head toRestart 860 | Just (ref, _) <- lookupChild sup toStop 861 | Just pid <- resolve ref 862 | children <- listChildren sup 863 | let (survivors, children') = splitAt sptSz children 864 | kill pid "goodbye" 865 | forM_ (map fst children') $ \cRef -> do 866 | mRef <- monitor cRef 867 | waitForDown mRef 868 | forM_ (map snd children') $ \cSpec -> do 869 | Just (ref', _) <- lookupChild sup (childKey cSpec) 870 | maybe (error "invalid ref") ensureProcessIsAlive =<< resolve ref' 871 | resolved <- forM (map fst survivors) resolve 872 | let possibleBadRestarts = catMaybes resolved 873 | r <- receiveTimeout (after 1 Seconds) [ 874 | match (\(ProcessMonitorNotification _ pid' _) -> do 875 | case (elem pid' possibleBadRestarts) of 876 | True -> liftIO $ assertFailure $ "unexpected exit from " ++ show pid' 877 | False -> return ()) 878 | ] 879 | expectThat r isNothing 880 | 881 | restartAllWithLeftToRightRestarts :: ProcessId -> Process () 882 | restartAllWithLeftToRightRestarts sup = do 883 | let (lSz, _) = randomIshSizes 884 | self <- getSelfPid 885 | let templ = permChild $ RunClosure ($(mkClosure 'notifyMe) self) 886 | let specs = [templ { childKey = (show i) } | i <- [1..lSz :: Int]] 887 | -- add the specs one by one 888 | forM_ specs $ \s -> void $ startNewChild sup s 889 | -- assert that we saw the startup sequence working... 890 | children <- listChildren sup 891 | drainAllChildren children 892 | let toStop = childKey $ head specs 893 | Just (ref, _) <- lookupChild sup toStop 894 | Just pid <- resolve ref 895 | kill pid "goodbye" 896 | -- wait for all the exit signals, so we know the children are restarting 897 | forM_ (map fst children) $ \cRef -> do 898 | Just mRef <- monitor cRef 899 | receiveWait [ 900 | matchIf (\(ProcessMonitorNotification ref' _ _) -> ref' == mRef) 901 | (\_ -> return ()) 902 | -- we should NOT see *any* process signalling that it has started 903 | -- whilst waiting for all the children to be terminated 904 | , match (\(pid' :: ProcessId) -> do 905 | liftIO $ assertFailure $ "unexpected signal from " ++ (show pid')) 906 | ] 907 | -- Now assert that all the children were restarted in the same order. 908 | -- THIS is the bit that is technically unsafe, though it's also unlikely 909 | -- to change, since the architecture of the node controller is pivotal to CH 910 | children' <- listChildren sup 911 | drainAllChildren children' 912 | let [c1, c2] = [map fst cs | cs <- [children, children']] 913 | forM_ (zip c1 c2) $ \(p1, p2) -> expectThat p1 $ isNot $ equalTo p2 914 | where 915 | drainAllChildren children = do 916 | -- Receive all pids then verify they arrived in the correct order. 917 | -- Any out-of-order messages (such as ProcessMonitorNotification) will 918 | -- violate the invariant asserted below, and fail the test case 919 | pids <- forM children $ \_ -> expect :: Process ProcessId 920 | forM_ pids ensureProcessIsAlive 921 | 922 | restartAllWithRightToLeftSeqRestarts :: Context -> Process () 923 | restartAllWithRightToLeftSeqRestarts ctx@Context{..} = do 924 | self <- getSelfPid 925 | let templ = permChild $ RunClosure $(mkStaticClosure 'obedient) 926 | let specs = [templ { childKey = (show i) } | i <- [1..listSize :: Int]] 927 | 928 | -- add the specs one by one 929 | forM_ specs $ \s -> do 930 | ChildAdded ref <- startNewChild sup s 931 | maybe (error "invalid ref") ensureProcessIsAlive =<< resolve ref 932 | 933 | -- assert that we saw the startup sequence working... 934 | children <- listChildren sup 935 | checkStartupOrder ctx children 936 | 937 | -- we need this before the restarts occur 938 | sniff <- monitorSupervisor sup 939 | 940 | let toStop = childKey $ head specs 941 | Just (ref, _) <- lookupChild sup toStop 942 | Just pid <- resolve ref 943 | kill pid "fooboo" 944 | 945 | -- wait for all the exit signals, so we know the children are restarting 946 | forM_ (map fst children) $ \cRef -> monitor cRef >>= waitForDown 947 | 948 | -- NB: this uses a separate channel to consume the Mx events... 949 | waitForBranchRestartComplete sniff toStop 950 | 951 | children' <- listChildren sup 952 | let xs = zip [fst o | o <- children] children' 953 | verifySeqStartOrder ctx (reverse xs) toStop 954 | 955 | forM_ (map snd children') $ \cSpec -> do 956 | Just (ref', _) <- lookupChild sup (childKey cSpec) 957 | maybe (error "invalid ref") ensureProcessIsAlive =<< resolve ref' 958 | 959 | expectLeftToRightRestarts :: Context -> Process () 960 | expectLeftToRightRestarts ctx@Context{..} = do 961 | let templ = permChild $ RunClosure $(mkStaticClosure 'obedient) 962 | let specs = [templ { childKey = (show i) } | i <- [1..listSize :: Int]] 963 | -- add the specs one by one 964 | forM_ specs $ \s -> void $ startNewChild sup s 965 | 966 | -- assert that we saw the startup sequence working... 967 | children <- listChildren sup 968 | checkStartupOrder ctx children 969 | 970 | let toStop = childKey $ head specs 971 | Just (ref, _) <- lookupChild sup toStop 972 | Just pid <- resolve ref 973 | 974 | -- wait for all the exit signals and ensure they arrive in RightToLeft order 975 | refs <- forM children $ \(ch, _) -> monitor ch >>= \r -> return (ch, r) 976 | kill pid "fooboo" 977 | 978 | initRes <- receiveTimeout 979 | (asTimeout $ seconds 1) 980 | [ matchIf 981 | (\(ProcessMonitorNotification r _ _) -> (Just r) == (snd $ head refs)) 982 | (\sig@(ProcessMonitorNotification _ _ _) -> return sig) ] 983 | expectThat initRes $ isJust 984 | 985 | forM_ (reverse (filter ((/= ref) .fst ) refs)) $ \(_, Just mRef) -> do 986 | (ProcessMonitorNotification ref' _ _) <- expect 987 | if ref' == mRef then (return ()) else (die "unexpected monitor signal") 988 | 989 | expectRightToLeftRestarts :: Bool -> Context -> Process () 990 | expectRightToLeftRestarts rev ctx@Context{..} = do 991 | self <- getSelfPid 992 | let templ = permChild $ RunClosure ($(mkClosure 'notifyMe) self) 993 | let specs = [templ { childKey = (show i) } | i <- [1..listSize :: Int]] 994 | -- add the specs one by one 995 | forM_ specs $ \s -> do 996 | ChildAdded ref <- startNewChild sup s 997 | maybe (error "invalid ref") ensureProcessIsAlive =<< resolve ref 998 | 999 | children <- listChildren sup 1000 | checkStartupOrder ctx children 1001 | 1002 | -- assert that we saw the startup sequence working... 1003 | let toStop = childKey $ head specs 1004 | Just (ref, _) <- lookupChild sup toStop 1005 | Just pid <- resolve ref 1006 | kill pid "fooboobarbazbub" 1007 | 1008 | -- wait for all the exit signals, so we know the children are restarting 1009 | forM_ (map fst children) $ \cRef -> monitor cRef >>= waitForDown 1010 | 1011 | restarted' <- listChildren sup 1012 | let xs = zip [fst o | o <- children] restarted' 1013 | let xs' = if rev then xs else reverse xs 1014 | -- say $ "xs = " ++ (show [(o, (cr, childKey cs)) | (o, (cr, cs)) <- xs]) 1015 | verifyStopStartOrder ctx xs' (reverse restarted') toStop 1016 | 1017 | restartLeftWhenLeftmostChildDies :: ChildStart -> ProcessId -> Process () 1018 | restartLeftWhenLeftmostChildDies cs sup = do 1019 | let spec = permChild cs 1020 | (ChildAdded ref) <- startNewChild sup spec 1021 | (ChildAdded ref2) <- startNewChild sup $ spec { childKey = "child2" } 1022 | Just pid <- resolve ref 1023 | Just pid2 <- resolve ref2 1024 | testProcessStop pid -- a normal stop should *still* trigger a restart 1025 | verifyChildWasRestarted (childKey spec) pid sup 1026 | Just (ref3, _) <- lookupChild sup "child2" 1027 | Just pid2' <- resolve ref3 1028 | pid2 `shouldBe` equalTo pid2' 1029 | 1030 | restartWithoutTempChildren :: ChildStart -> ProcessId -> Process () 1031 | restartWithoutTempChildren cs sup = do 1032 | (ChildAdded refTrans) <- startNewChild sup $ transientWorker cs 1033 | (ChildAdded _) <- startNewChild sup $ tempWorker cs 1034 | (ChildAdded refPerm) <- startNewChild sup $ permChild cs 1035 | Just pid2 <- resolve refTrans 1036 | Just pid3 <- resolve refPerm 1037 | 1038 | kill pid2 "foobar" 1039 | void $ waitForExit pid2 -- this wait reduces the likelihood of a race in the test 1040 | Nothing <- lookupChild sup "temp-worker" 1041 | verifyChildWasRestarted "transient-worker" pid2 sup 1042 | verifyChildWasRestarted "perm-child" pid3 sup 1043 | 1044 | restartRightWhenRightmostChildDies :: ChildStart -> ProcessId -> Process () 1045 | restartRightWhenRightmostChildDies cs sup = do 1046 | let spec = permChild cs 1047 | (ChildAdded ref2) <- startNewChild sup $ spec { childKey = "child2" } 1048 | (ChildAdded ref) <- startNewChild sup $ spec { childKey = "child1" } 1049 | [ch1, ch2] <- listChildren sup 1050 | (fst ch1) `shouldBe` equalTo ref2 1051 | (fst ch2) `shouldBe` equalTo ref 1052 | Just pid <- resolve ref 1053 | Just pid2 <- resolve ref2 1054 | -- ref (and therefore pid) is 'rightmost' now 1055 | testProcessStop pid -- a normal stop should *still* trigger a restart 1056 | verifyChildWasRestarted "child1" pid sup 1057 | Just (ref3, _) <- lookupChild sup "child2" 1058 | Just pid2' <- resolve ref3 1059 | pid2 `shouldBe` equalTo pid2' 1060 | 1061 | restartLeftWithLeftToRightRestarts :: Bool -> Context -> Process () 1062 | restartLeftWithLeftToRightRestarts rev ctx@Context{..} = do 1063 | self <- getSelfPid 1064 | let templ = permChild $ RunClosure ($(mkClosure 'notifyMe) self) 1065 | let specs = [templ { childKey = (show i) } | i <- [1..listSize :: Int]] 1066 | forM_ specs $ \s -> void $ startNewChild sup s 1067 | 1068 | -- assert that we saw the startup sequence working... 1069 | children <- listChildren sup 1070 | checkStartupOrder ctx children 1071 | 1072 | let (toRestart, _) = split specs 1073 | let (restarts, _) = split children 1074 | let toStop = childKey $ last toRestart 1075 | Just (ref', _) <- lookupChild sup toStop 1076 | Just stopPid <- resolve ref' 1077 | kill stopPid "goodbye" 1078 | 1079 | -- wait for all the exit signals, so we know the children are restarting 1080 | forM_ (map fst (fst $ split children)) $ \cRef -> monitor cRef >>= waitForDown 1081 | 1082 | children' <- listChildren sup 1083 | let (restarted, notRestarted) = split children' 1084 | let restarted' = if rev then reverse restarted else restarted 1085 | let restarts' = if rev then reverse restarts else restarts 1086 | let xs = zip [fst o | o <- restarts'] restarted' 1087 | verifyStopStartOrder ctx xs restarted toStop 1088 | 1089 | let [c1, c2] = [map fst cs | cs <- [(snd $ split children), notRestarted]] 1090 | forM_ (zip c1 c2) $ \(p1, p2) -> p1 `shouldBe` equalTo p2 1091 | 1092 | restartRightWithLeftToRightRestarts :: Bool -> Context -> Process () 1093 | restartRightWithLeftToRightRestarts rev ctx@Context{..} = do 1094 | 1095 | let templ = permChild $ RunClosure $(mkStaticClosure 'obedient) 1096 | let specs = [templ { childKey = (show i) } | i <- [1..listSize :: Int]] 1097 | forM_ specs $ \s -> void $ startNewChild sup s 1098 | 1099 | children <- listChildren sup 1100 | 1101 | -- assert that we saw the startup sequence working... 1102 | checkStartupOrder ctx children 1103 | 1104 | let (_, toRestart) = split specs 1105 | let (_, restarts) = split children 1106 | let toStop = childKey $ head toRestart 1107 | Just (ref', _) <- lookupChild sup toStop 1108 | Just stopPid <- resolve ref' 1109 | kill stopPid "goodbye" 1110 | -- wait for all the exit signals, so we know the children are restarting 1111 | forM_ (map fst (snd $ split children)) $ \cRef -> monitor cRef >>= waitForDown 1112 | 1113 | children' <- listChildren sup 1114 | let (notRestarted, restarted) = split children' 1115 | 1116 | let restarted' = if rev then reverse restarted else restarted 1117 | let restarts' = if rev then reverse restarts else restarts 1118 | let xs = zip [fst o | o <- restarts'] restarted' 1119 | verifyStopStartOrder ctx xs restarted toStop 1120 | 1121 | let [c1, c2] = [map fst cs | cs <- [(fst $ splitAt 3 children), notRestarted]] 1122 | forM_ (zip c1 c2) $ \(p1, p2) -> p1 `shouldBe` equalTo p2 1123 | 1124 | restartRightWithRightToLeftRestarts :: Bool -> Context -> Process () 1125 | restartRightWithRightToLeftRestarts rev ctx@Context{..} = do 1126 | let templ = permChild $ RunClosure $(mkStaticClosure 'obedient) 1127 | let specs = [templ { childKey = (show i) } | i <- [1..listSize :: Int]] 1128 | forM_ specs $ \s -> void $ startNewChild sup s 1129 | 1130 | children <- listChildren sup 1131 | 1132 | -- assert that we saw the startup sequence working... 1133 | checkStartupOrder ctx children 1134 | 1135 | let (_, toRestart) = split specs 1136 | let (_, restarts) = split children 1137 | let toStop = childKey $ head toRestart 1138 | Just (ref', _) <- lookupChild sup toStop 1139 | Just stopPid <- resolve ref' 1140 | kill stopPid "goodbye" 1141 | 1142 | -- wait for all the exit signals, so we know the children are restarting 1143 | forM_ (map fst (snd $ split children)) $ \cRef -> monitor cRef >>= waitForDown 1144 | 1145 | children' <- listChildren sup 1146 | let (notRestarted, restarted) = split children' 1147 | 1148 | let (restarts', restarted') = if rev then (reverse restarts, reverse restarted) 1149 | else (restarts, restarted) 1150 | let xs = zip [fst o | o <- restarts'] restarted' 1151 | verifyStopStartOrder ctx xs (reverse restarted) toStop 1152 | 1153 | let [c1, c2] = [map fst cs | cs <- [(fst $ split children), notRestarted]] 1154 | forM_ (zip c1 c2) $ \(p1, p2) -> p1 `shouldBe` equalTo p2 1155 | 1156 | restartLeftWithRightToLeftRestarts :: Bool -> Context -> Process () 1157 | restartLeftWithRightToLeftRestarts rev ctx@Context{..} = do 1158 | let templ = permChild $ RunClosure $(mkStaticClosure 'obedient) 1159 | let specs = [templ { childKey = (show i) } | i <- [1..listSize :: Int]] 1160 | forM_ specs $ \s -> void $ startNewChild sup s 1161 | 1162 | children <- listChildren sup 1163 | 1164 | -- assert that we saw the startup sequence working... 1165 | checkStartupOrder ctx children 1166 | 1167 | -- split off 6 children to be restarted 1168 | let (toRestart, _) = split specs 1169 | let (restarts, toSurvive) = split children 1170 | let toStop = childKey $ last toRestart 1171 | Just (ref', _) <- lookupChild sup toStop 1172 | Just stopPid <- resolve ref' 1173 | kill stopPid "test process waves goodbye...." 1174 | 1175 | -- wait for all the exit signals, so we know the children are restarting 1176 | forM_ (map fst restarts) $ \cRef -> monitor cRef >>= waitForDown 1177 | 1178 | children' <- listChildren sup 1179 | let (restarted, notRestarted) = split children' 1180 | --let xs = zip [fst o | o <- restarts] restarted 1181 | let (restarts', restarted') = if rev then (reverse restarts, reverse restarted) 1182 | else (restarts, restarted) 1183 | let xs = zip [fst o | o <- restarts'] restarted' 1184 | 1185 | verifyStopStartOrder ctx xs (reverse restarted) toStop 1186 | 1187 | let [c1, c2] = [map fst cs | cs <- [toSurvive, notRestarted]] 1188 | forM_ (zip c1 c2) $ \(p1, p2) -> p1 `shouldBe` equalTo p2 1189 | 1190 | -- remote table definition and main 1191 | 1192 | myRemoteTable :: RemoteTable 1193 | myRemoteTable = Main.__remoteTable initRemoteTable 1194 | 1195 | withClosure :: (ChildStart -> ProcessId -> Process ()) 1196 | -> (Closure (Process ())) 1197 | -> ProcessId -> Process () 1198 | withClosure fn clj supervisor = do 1199 | cs <- toChildStart clj 1200 | fn cs supervisor 1201 | 1202 | withClosure' :: (ChildStart -> Context -> Process ()) 1203 | -> (Closure (Process ())) 1204 | -> Context 1205 | -> Process () 1206 | withClosure' fn clj ctx = do 1207 | cs <- toChildStart clj 1208 | fn cs ctx 1209 | 1210 | tests :: NT.Transport -> IO [Test] 1211 | tests transport = do 1212 | putStrLn $ concat [ "NOTICE: Branch Tests (Relying on Non-Guaranteed Message Order) " 1213 | , "Can Fail Intermittently" ] 1214 | localNode <- newLocalNode transport myRemoteTable 1215 | singleTestLock <- newMVar () 1216 | runProcess localNode $ do 1217 | void $ supervisionMonitor 1218 | {- 1219 | slog <- systemLogFile "supervisor.test.log" Debug return 1220 | addFormatter slog $(mkStaticClosure 'formatMxSupervisor) 1221 | -} 1222 | 1223 | let withSup sm = runInTestContext localNode singleTestLock sm 1224 | let withSup' sm = runInTestContext' localNode sm 1225 | let withSupervisor = runInTestContext localNode singleTestLock ParallelShutdown 1226 | let withSupervisor' = runInTestContext' localNode ParallelShutdown 1227 | return 1228 | [ testGroup "Supervisor Processes" 1229 | [ 1230 | testGroup "Starting And Adding Children" 1231 | [ 1232 | testCase "Normal (Managed Process) Supervisor Start Stop" 1233 | (withSupervisor restartOne [] normalStartStop) 1234 | , testCase "Add Child Without Starting" 1235 | (withSupervisor restartOne [] 1236 | (withClosure addChildWithoutRestart 1237 | $(mkStaticClosure 'blockIndefinitely))) 1238 | , testCase "Start Previously Added Child" 1239 | (withSupervisor restartOne [] 1240 | (withClosure addChildThenStart 1241 | $(mkStaticClosure 'blockIndefinitely))) 1242 | , testCase "Start Unknown Child" 1243 | (withSupervisor restartOne [] 1244 | (withClosure startUnknownChild 1245 | $(mkStaticClosure 'blockIndefinitely))) 1246 | , testCase "Add Duplicate Child" 1247 | (withSupervisor restartOne [] 1248 | (withClosure addDuplicateChild 1249 | $(mkStaticClosure 'blockIndefinitely))) 1250 | , testCase "Start Duplicate Child" 1251 | (withSupervisor restartOne [] 1252 | (withClosure startDuplicateChild 1253 | $(mkStaticClosure 'blockIndefinitely))) 1254 | , testCase "Started Temporary Child Exits With Ignore" 1255 | (withSupervisor restartOne [] 1256 | (withClosure startTemporaryChildExitsWithIgnore 1257 | $(mkStaticClosure 'exitIgnore))) 1258 | , testCase "Configured Temporary Child Exits With Ignore" 1259 | (configuredTemporaryChildExitsWithIgnore 1260 | (RunClosure $(mkStaticClosure 'exitIgnore)) withSupervisor) 1261 | , testCase "Start Bad Closure" 1262 | (withSupervisor restartOne [] 1263 | (withClosure startBadClosure 1264 | (closure (staticLabel "non-existing") empty))) 1265 | , testCase "Configured Bad Closure" 1266 | (configuredTemporaryChildExitsWithIgnore 1267 | (RunClosure $(mkStaticClosure 'exitIgnore)) withSupervisor) 1268 | , testCase "Started Non-Temporary Child Exits With Ignore" 1269 | (withSupervisor restartOne [] $ 1270 | (withClosure startNonTemporaryChildExitsWithIgnore 1271 | $(mkStaticClosure 'exitIgnore))) 1272 | , testCase "Configured Non-Temporary Child Exits With Ignore" 1273 | (configuredNonTemporaryChildExitsWithIgnore 1274 | (RunClosure $(mkStaticClosure 'exitIgnore)) withSupervisor) 1275 | ] 1276 | , testGroup "Stopping And Deleting Children" 1277 | [ 1278 | testCase "Delete Existing Child Fails" 1279 | (withSupervisor restartOne [] 1280 | (withClosure deleteExistingChild 1281 | $(mkStaticClosure 'blockIndefinitely))) 1282 | , testCase "Delete Stopped Temporary Child (Doesn't Exist)" 1283 | (withSupervisor restartOne [] 1284 | (withClosure deleteStoppedTempChild 1285 | $(mkStaticClosure 'blockIndefinitely))) 1286 | , testCase "Delete Stopped Child Succeeds" 1287 | (withSupervisor restartOne [] 1288 | (withClosure deleteStoppedChild 1289 | $(mkStaticClosure 'blockIndefinitely))) 1290 | , testCase "Restart Minus Dropped (Temp) Child" 1291 | (withSupervisor restartAll [] 1292 | (withClosure restartWithoutTempChildren 1293 | $(mkStaticClosure 'blockIndefinitely))) 1294 | , testCase "Sequential Shutdown Ordering" 1295 | (delayedAssertion 1296 | "expected the shutdown order to hold" 1297 | localNode (Just ()) sequentialShutdown) 1298 | ] 1299 | , testGroup "Stopping and Restarting Children" 1300 | [ 1301 | testCase "Permanent Children Always Restart (Closure)" 1302 | (withSupervisor restartOne [] 1303 | (withClosure permanentChildrenAlwaysRestart 1304 | $(mkStaticClosure 'blockIndefinitely))) 1305 | , testCase "Temporary Children Never Restart (Closure)" 1306 | (withSupervisor restartOne [] 1307 | (withClosure temporaryChildrenNeverRestart 1308 | $(mkStaticClosure 'blockIndefinitely))) 1309 | , testCase "Transient Children Do Not Restart When Exiting Normally (Closure)" 1310 | (withSupervisor restartOne [] 1311 | (withClosure transientChildrenNormalExit 1312 | $(mkStaticClosure 'blockIndefinitely))) 1313 | , testCase "Transient Children Do Restart When Exiting Abnormally (Closure)" 1314 | (withSupervisor restartOne [] 1315 | (withClosure transientChildrenAbnormalExit 1316 | $(mkStaticClosure 'blockIndefinitely))) 1317 | , testCase "ExitShutdown Is Considered Normal" 1318 | (withSupervisor' restartOne [] 1319 | (withClosure' transientChildrenExitShutdown 1320 | $(mkStaticClosure 'blockIndefinitely))) 1321 | , testCase "Intrinsic Children Do Restart When Exiting Abnormally (Closure)" 1322 | (withSupervisor restartOne [] 1323 | (withClosure intrinsicChildrenAbnormalExit 1324 | $(mkStaticClosure 'blockIndefinitely))) 1325 | , testCase (concat [ "Intrinsic Children Cause Supervisor Exits " 1326 | , "When Exiting Normally (Closure)"]) 1327 | (withSupervisor restartOne [] 1328 | (withClosure intrinsicChildrenNormalExit 1329 | $(mkStaticClosure 'blockIndefinitely))) 1330 | , testCase "Explicit Restart Of Running Child Fails (Closure)" 1331 | (withSupervisor restartOne [] 1332 | (withClosure explicitRestartRunningChild 1333 | $(mkStaticClosure 'blockIndefinitely))) 1334 | , testCase "Explicit Restart Of Unknown Child Fails" 1335 | (withSupervisor restartOne [] explicitRestartUnknownChild) 1336 | , testCase "Explicit Restart Whilst Child Restarting Fails (Closure)" 1337 | (withSupervisor 1338 | (RestartOne (limit (maxRestarts 500000000) (milliSeconds 1))) [] 1339 | (withClosure explicitRestartRestartingChild $(mkStaticClosure 'noOp))) 1340 | , testCase "Explicit Restart Stopped Child (Closure)" 1341 | (withSupervisor restartOne [] 1342 | (withClosure explicitRestartStoppedChild 1343 | $(mkStaticClosure 'blockIndefinitely))) 1344 | , testCase "Immediate Child Stop (Brutal Kill) (Closure)" 1345 | (withSupervisor restartOne [] 1346 | (withClosure stopChildImmediately 1347 | $(mkStaticClosure 'blockIndefinitely))) 1348 | , testCase "Child Stop Exceeds Timeout/Delay (Becomes Brutal Kill)" 1349 | (withSupervisor restartOne [] stoppingChildExceedsDelay) 1350 | , testCase "Child Stop Within Timeout/Delay" 1351 | (withSupervisor restartOne [] stoppingChildObeysDelay) 1352 | ] 1353 | -- TODO: test for init failures (expecting $ ChildInitFailed r) 1354 | , testGroup "Branch Restarts" 1355 | [ 1356 | testGroup "Restart All" 1357 | [ 1358 | testCase "Stop Child Ignores Siblings" 1359 | (stopChildIgnoresSiblings 1360 | (RunClosure $(mkStaticClosure 'blockIndefinitely)) 1361 | withSupervisor) 1362 | , testCase "Restart All, Left To Right (Sequential) Restarts" 1363 | (restartAllWithLeftToRightSeqRestarts 1364 | (RunClosure $(mkStaticClosure 'blockIndefinitely)) 1365 | withSupervisor') 1366 | , testCase "Restart All, Right To Left (Sequential) Restarts" 1367 | (withSupervisor' 1368 | (RestartAll defaultLimits (RestartEach RightToLeft)) [] 1369 | restartAllWithRightToLeftSeqRestarts) 1370 | , testCase "Restart All, Left To Right Stop, Left To Right Start" 1371 | (withSup 1372 | (SequentialShutdown LeftToRight) 1373 | (RestartAll defaultLimits (RestartInOrder LeftToRight)) [] 1374 | restartAllWithLeftToRightRestarts) 1375 | , testCase "Restart All, Right To Left Stop, Right To Left Start" 1376 | (withSup' 1377 | (SequentialShutdown RightToLeft) 1378 | (RestartAll defaultLimits (RestartInOrder RightToLeft) 1379 | ) [] 1380 | (expectRightToLeftRestarts False)) 1381 | , testCase "Restart All, Left To Right Stop, Reverse Start" 1382 | (withSup' 1383 | (SequentialShutdown LeftToRight) 1384 | (RestartAll defaultLimits (RestartRevOrder LeftToRight) 1385 | ) [] 1386 | (expectRightToLeftRestarts True)) 1387 | , testCase "Restart All, Right To Left Stop, Reverse Start" 1388 | (withSup' 1389 | (SequentialShutdown RightToLeft) 1390 | (RestartAll defaultLimits (RestartRevOrder RightToLeft) 1391 | ) [] 1392 | expectLeftToRightRestarts) 1393 | ], 1394 | testGroup "Restart Left" 1395 | [ 1396 | testCase "Restart Left, Left To Right (Sequential) Restarts" 1397 | (restartLeftWithLeftToRightSeqRestarts 1398 | (RunClosure $(mkStaticClosure 'blockIndefinitely)) 1399 | withSupervisor') 1400 | , testCase "Restart Left, Leftmost Child Dies" 1401 | (withSupervisor restartLeft [] $ 1402 | restartLeftWhenLeftmostChildDies 1403 | (RunClosure $(mkStaticClosure 'blockIndefinitely))) 1404 | , testCase "Restart Left, Left To Right Stop, Left To Right Start" 1405 | (withSupervisor' 1406 | (RestartLeft defaultLimits (RestartInOrder LeftToRight)) [] 1407 | (restartLeftWithLeftToRightRestarts False)) 1408 | , testCase "Restart Left, Right To Left Stop, Right To Left Start" 1409 | (withSupervisor' 1410 | (RestartLeft defaultLimits (RestartInOrder RightToLeft)) [] 1411 | (restartLeftWithRightToLeftRestarts True)) 1412 | , testCase "Restart Left, Left To Right Stop, Reverse Start" 1413 | (withSupervisor' 1414 | (RestartLeft defaultLimits (RestartRevOrder LeftToRight)) [] 1415 | (restartLeftWithRightToLeftRestarts False)) 1416 | , testCase "Restart Left, Right To Left Stop, Reverse Start" 1417 | (withSupervisor' 1418 | (RestartLeft defaultLimits (RestartRevOrder RightToLeft)) [] 1419 | (restartLeftWithLeftToRightRestarts True)) 1420 | ], 1421 | testGroup "Restart Right" 1422 | [ 1423 | testCase "Restart Right, Left To Right (Sequential) Restarts" 1424 | (restartRightWithLeftToRightSeqRestarts 1425 | (RunClosure $(mkStaticClosure 'blockIndefinitely)) 1426 | withSupervisor) 1427 | , testCase "Restart Right, Rightmost Child Dies" 1428 | (withSupervisor restartRight [] $ 1429 | restartRightWhenRightmostChildDies 1430 | (RunClosure $(mkStaticClosure 'blockIndefinitely))) 1431 | , testCase "Restart Right, Left To Right Stop, Left To Right Start" 1432 | (withSupervisor' 1433 | (RestartRight defaultLimits (RestartInOrder LeftToRight)) [] 1434 | (restartRightWithLeftToRightRestarts False)) 1435 | , testCase "Restart Right, Right To Left Stop, Right To Left Start" 1436 | (withSupervisor' 1437 | (RestartRight defaultLimits (RestartInOrder RightToLeft)) [] 1438 | (restartRightWithRightToLeftRestarts True)) 1439 | , testCase "Restart Right, Left To Right Stop, Reverse Start" 1440 | (withSupervisor' 1441 | (RestartRight defaultLimits (RestartRevOrder LeftToRight)) [] 1442 | (restartRightWithRightToLeftRestarts False)) 1443 | , testCase "Restart Right, Right To Left Stop, Reverse Start" 1444 | (withSupervisor' 1445 | (RestartRight defaultLimits (RestartRevOrder RightToLeft)) [] 1446 | (restartRightWithLeftToRightRestarts True)) 1447 | ] 1448 | ] 1449 | , testGroup "Restart Intensity" 1450 | [ 1451 | testCase "Three Attempts Before Successful Restart" 1452 | (restartAfterThreeAttempts 1453 | (RunClosure $(mkStaticClosure 'blockIndefinitely)) withSupervisor) 1454 | , testCase "Permanent Child Exceeds Restart Limits" 1455 | (permanentChildExceedsRestartsIntensity 1456 | (RunClosure $(mkStaticClosure 'noOp)) withSupervisor) 1457 | , testCase "Permanent Child Delayed Restart" 1458 | (delayedRestartAfterThreeAttempts withSupervisor') 1459 | ] 1460 | ] 1461 | {- , testGroup "CI" 1462 | [ testCase "Flush [NonTest]" 1463 | (withSupervisor' 1464 | (RestartRight defaultLimits (RestartInOrder LeftToRight)) [] 1465 | (\_ -> sleep $ seconds 20)) 1466 | ] 1467 | -} 1468 | ] 1469 | 1470 | main :: IO () 1471 | main = testMain $ tests 1472 | -------------------------------------------------------------------------------- /tests/TestUtils.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE TupleSections #-} 3 | {-# LANGUAGE TemplateHaskell #-} 4 | {-# LANGUAGE DeriveGeneric #-} 5 | 6 | module TestUtils 7 | ( TestResult 8 | -- ping ! 9 | , Ping(Ping) 10 | , ping 11 | , shouldBe 12 | , shouldMatch 13 | , shouldContain 14 | , shouldNotContain 15 | , shouldExitWith 16 | , expectThat 17 | -- test process utilities 18 | , TestProcessControl 19 | , startTestProcess 20 | , runTestProcess 21 | , testProcessGo 22 | , testProcessStop 23 | , testProcessReport 24 | , delayedAssertion 25 | , assertComplete 26 | , waitForExit 27 | -- logging 28 | , Logger() 29 | , newLogger 30 | , putLogMsg 31 | , stopLogger 32 | -- runners 33 | , mkNode 34 | , tryRunProcess 35 | , testMain 36 | , stash 37 | ) where 38 | 39 | #if ! MIN_VERSION_base(4,6,0) 40 | import Prelude hiding (catch) 41 | #endif 42 | import Control.Concurrent 43 | ( ThreadId 44 | , myThreadId 45 | , forkIO 46 | ) 47 | import Control.Concurrent.STM 48 | ( TQueue 49 | , newTQueueIO 50 | , readTQueue 51 | , writeTQueue 52 | ) 53 | import Control.Concurrent.MVar 54 | ( MVar 55 | , newEmptyMVar 56 | , takeMVar 57 | , putMVar 58 | ) 59 | 60 | import Control.Distributed.Process hiding (catch, finally) 61 | import Control.Distributed.Process.Node 62 | import Control.Distributed.Process.Serializable() 63 | import Control.Distributed.Process.Extras.Time 64 | import Control.Distributed.Process.Extras.Timer 65 | import Control.Distributed.Process.Extras.Internal.Types 66 | import Control.Exception (SomeException) 67 | import qualified Control.Exception as Exception 68 | import Control.Monad (forever) 69 | import Control.Monad.Catch (catch) 70 | import Control.Monad.STM (atomically) 71 | import Control.Rematch hiding (match) 72 | import Control.Rematch.Run 73 | import Test.HUnit (Assertion, assertFailure) 74 | import Test.HUnit.Base (assertBool) 75 | import Test.Framework (Test, defaultMain) 76 | import Control.DeepSeq 77 | 78 | import Network.Transport.TCP 79 | import qualified Network.Transport as NT 80 | 81 | import Data.Binary 82 | import Data.Typeable 83 | import GHC.Generics 84 | 85 | --expect :: a -> Matcher a -> Process () 86 | --expect a m = liftIO $ Rematch.expect a m 87 | 88 | expectThat :: a -> Matcher a -> Process () 89 | expectThat a matcher = case res of 90 | MatchSuccess -> return () 91 | (MatchFailure msg) -> liftIO $ assertFailure msg 92 | where res = runMatch matcher a 93 | 94 | shouldBe :: a -> Matcher a -> Process () 95 | shouldBe = expectThat 96 | 97 | shouldContain :: (Show a, Eq a) => [a] -> a -> Process () 98 | shouldContain xs x = expectThat xs $ hasItem (equalTo x) 99 | 100 | shouldNotContain :: (Show a, Eq a) => [a] -> a -> Process () 101 | shouldNotContain xs x = expectThat xs $ isNot (hasItem (equalTo x)) 102 | 103 | shouldMatch :: a -> Matcher a -> Process () 104 | shouldMatch = expectThat 105 | 106 | shouldExitWith :: (Resolvable a) => a -> DiedReason -> Process () 107 | shouldExitWith a r = do 108 | _ <- resolve a 109 | d <- receiveWait [ match (\(ProcessMonitorNotification _ _ r') -> return r') ] 110 | d `shouldBe` equalTo r 111 | 112 | waitForExit :: MVar ExitReason 113 | -> Process (Maybe ExitReason) 114 | waitForExit exitReason = do 115 | -- we *might* end up blocked here, so ensure the test doesn't jam up! 116 | self <- getSelfPid 117 | tref <- killAfter (within 10 Seconds) self "testcast timed out" 118 | tr <- liftIO $ takeMVar exitReason 119 | cancelTimer tref 120 | case tr of 121 | ExitNormal -> return Nothing 122 | other -> return $ Just other 123 | 124 | mkNode :: String -> IO LocalNode 125 | mkNode port = do 126 | Right (transport1, _) <- 127 | createTransportExposeInternals "127.0.0.1" port ("127.0.0.1",) defaultTCPParameters 128 | newLocalNode transport1 initRemoteTable 129 | 130 | -- | Run the supplied @testProc@ using an @MVar@ to collect and assert 131 | -- against its result. Uses the supplied @note@ if the assertion fails. 132 | delayedAssertion :: (Eq a) => String -> LocalNode -> a -> 133 | (TestResult a -> Process ()) -> Assertion 134 | delayedAssertion note localNode expected testProc = do 135 | result <- newEmptyMVar 136 | _ <- forkProcess localNode $ testProc result 137 | assertComplete note result expected 138 | 139 | -- | Takes the value of @mv@ (using @takeMVar@) and asserts that it matches @a@ 140 | assertComplete :: (Eq a) => String -> MVar a -> a -> IO () 141 | assertComplete msg mv a = do 142 | b <- takeMVar mv 143 | assertBool msg (a == b) 144 | 145 | -- synchronised logging 146 | 147 | data Logger = Logger { _tid :: ThreadId, msgs :: TQueue String } 148 | 149 | -- | Create a new Logger. 150 | -- Logger uses a 'TQueue' to receive and process messages on a worker thread. 151 | newLogger :: IO Logger 152 | newLogger = do 153 | tid <- liftIO $ myThreadId 154 | q <- liftIO $ newTQueueIO 155 | _ <- forkIO $ logger q 156 | return $ Logger tid q 157 | where logger q' = forever $ do 158 | msg <- atomically $ readTQueue q' 159 | putStrLn msg 160 | 161 | -- | Send a message to the Logger 162 | putLogMsg :: Logger -> String -> Process () 163 | putLogMsg logger msg = liftIO $ atomically $ writeTQueue (msgs logger) msg 164 | 165 | -- | Stop the worker thread for the given Logger 166 | stopLogger :: Logger -> IO () 167 | stopLogger = (flip Exception.throwTo) Exception.ThreadKilled . _tid 168 | 169 | -- | Given a @builder@ function, make and run a test suite on a single transport 170 | testMain :: (NT.Transport -> IO [Test]) -> IO () 171 | testMain builder = do 172 | Right (transport, _) <- createTransportExposeInternals 173 | "127.0.0.1" "0" ("127.0.0.1",) defaultTCPParameters 174 | testData <- builder transport 175 | defaultMain testData 176 | 177 | -- | Runs a /test process/ around the supplied @proc@, which is executed 178 | -- whenever the outer process loop receives a 'Go' signal. 179 | runTestProcess :: Process () -> Process () 180 | runTestProcess proc = do 181 | ctl <- expect 182 | case ctl of 183 | Stop -> return () 184 | Go -> proc >> runTestProcess proc 185 | Report p -> receiveWait [matchAny (\m -> forward m p)] >> runTestProcess proc 186 | 187 | -- | Starts a test process on the local node. 188 | startTestProcess :: Process () -> Process ProcessId 189 | startTestProcess proc = 190 | spawnLocal $ do 191 | getSelfPid >>= register "test-process" 192 | runTestProcess proc 193 | 194 | -- | Control signals used to manage /test processes/ 195 | data TestProcessControl = Stop | Go | Report ProcessId 196 | deriving (Typeable, Generic) 197 | 198 | instance Binary TestProcessControl where 199 | 200 | -- | A mutable cell containing a test result. 201 | type TestResult a = MVar a 202 | 203 | -- | Stashes a value in our 'TestResult' using @putMVar@ 204 | stash :: TestResult a -> a -> Process () 205 | stash mvar x = liftIO $ putMVar mvar x 206 | 207 | -- | Tell a /test process/ to stop (i.e., 'terminate') 208 | testProcessStop :: ProcessId -> Process () 209 | testProcessStop pid = send pid Stop 210 | 211 | -- | Tell a /test process/ to continue executing 212 | testProcessGo :: ProcessId -> Process () 213 | testProcessGo pid = send pid Go 214 | 215 | -- | A simple @Ping@ signal 216 | data Ping = Ping 217 | deriving (Typeable, Generic, Eq, Show) 218 | 219 | instance Binary Ping where 220 | instance NFData Ping where 221 | 222 | ping :: ProcessId -> Process () 223 | ping pid = send pid Ping 224 | 225 | 226 | tryRunProcess :: LocalNode -> Process () -> IO () 227 | tryRunProcess node p = do 228 | tid <- liftIO myThreadId 229 | runProcess node $ catch p (\e -> liftIO $ Exception.throwTo tid (e::SomeException)) 230 | 231 | -- | Tell a /test process/ to send a report (message) 232 | -- back to the calling process 233 | testProcessReport :: ProcessId -> Process () 234 | testProcessReport pid = do 235 | self <- getSelfPid 236 | send pid $ Report self 237 | --------------------------------------------------------------------------------