├── .gitignore
├── LICENSE
├── README.md
├── Remote.hs
├── Remote
    ├── Call.hs
    ├── Channel.hs
    ├── Closure.hs
    ├── Encoding.hs
    ├── File.hs
    ├── Init.hs
    ├── Peer.hs
    ├── Process.hs
    ├── Reg.hs
    └── Task.hs
├── Setup.hs
├── examples
    ├── kmeans
    │   ├── KMeans.hs
    │   ├── KMeans3.hs
    │   ├── KMeansCommon.hs
    │   ├── MakeData.hs
    │   ├── awsgo
    │   ├── awskill
    │   ├── awslist
    │   └── kmeans
    ├── pi
    │   ├── Pi6.hs
    │   ├── Pi7.hs
    │   └── PiCommon.hs
    └── tests
    │   ├── Test-Call.hs
    │   ├── Test-Channel-Merge.hs
    │   ├── Test-Channel.hs
    │   ├── Test-Closure.hs
    │   ├── Test-MapReduce.hs
    │   ├── Test-Message.hs
    │   ├── Test-Task.hs
    │   └── config
├── remote.cabal
└── util
    ├── Diag.hs
    └── RegServ.hs


/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | cabal-dev


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Jeff Epstein 2011
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of the author nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Attention: This package is obsolete. Please use [distributed-process](https://github.com/haskell-distributed/distributed-process) instead.
  2 | 
  3 | Cloud Haskell
  4 | =============
  5 | 
  6 | This is **Cloud Haskell**. It's a Haskell framework for distributed applications. Basically, it's a tool for writing applications that coordinate their work on a cluster of commodity computers or virtual machines. This is useful for providing highly reliable, redundant, long-running services, as well as for building compute-intensive applications that can benefit from lots of hardware.
  7 | 
  8 | This package, [remote](http://hackage.haskell.org/package/remote) ([github](https://github.com/jepst/CloudHaskell)), contains the original, prototype implementation of Cloud Haskell, which now exists mainly for historical interest. This version has been surpassed by [distributed-process](http://hackage.haskell.org/package/distributed-process) ([github](https://github.com/haskell-distributed/distributed-process)). Although the interface for the two versions is similar, the newer version has a lot of important improvements, including support for polymorphic RPC and pluggable transport back-ends. Users considering Cloud Haskell for practical ends should start with the new version.
  9 | 
 10 | For more information on Cloud Haskell, including motivation, sample applications, performance analysis, and a brief tutorial, I suggest the [paper](http://research.microsoft.com/en-us/um/people/simonpj/papers/parallel/remote.pdf). For even more information, try my [thesis](http://research.microsoft.com/en-us/um/people/simonpj/papers/parallel/epstein-thesis.pdf). These documents apply to the original version, but are mostly relevant to the newer version, as well.
 11 | 
 12 | Tutorial
 13 | ========
 14 | 
 15 | The remainder of this file contains a (slightly out-of-date) introduction to programming with Cloud Haskell. There are example programs included in the distribution.
 16 | 
 17 | Cloud Haskell has two interfaces:
 18 | 
 19 | * an interface based on message-passing between distributed processes. Think of it as Erlang (or MPI) in Haskell. We call this part the _process layer_.
 20 | * a fault-tolerant data-centric interface. We call this part the _task layer_. This layer makes it even easier to build distributed applications; the framework automatically takes care of moving your data around and recovering from hardware failure. This layer can be compared to Google's MapReduce but is in fact more flexible.
 21 | 
 22 | 
 23 | Installation
 24 | ------------
 25 | Assuming you have the Haskell Platform installed, just run this command from the Cloud Haskell directory:
 26 | 
 27 | ```
 28 | cabal install remote
 29 | ```
 30 | 
 31 | You can then compile your own Cloud Haskell applications, or try out the programs in the examples directory.
 32 | 
 33 | Process layer: an introduction
 34 | ------------------------------
 35 | 
 36 | Many programming languages expose concurrent programming as a shared memory model, wherein multiple, concurrently executing programs, or threads, can examine and manipulate variables common to them all. Coordination between threads is achieved with locks, mutexes, and other synchronization mechanisms. In Haskell, these facilities are available as `MVar`s.
 37 | 
 38 | In contrast, languages like Erlang eschew shared data and require that concurrent threads communicate only by message-passing. The key insight of Erlang and languages like it is that reasoning about concurrency is much easier without shared memory. Under a message-passing scheme, a thread provides a  recipient, given as a thread identifier, and a unit of data; that data will be transferred to the recipient's address space and placed in a queue, where it can be retrieved by the recipient. Because data is never shared implicitly, this is a particularly good model for distributed systems.
 39 | 
 40 | This framework presents a combined approach to distributed framework: while it provides an Erlang-style message-passing system, it lets the programmer use existing concurrency paradigms from Haskell.
 41 | 
 42 | Nodes and processes
 43 | -------------------
 44 | 
 45 | Location is represented by a _node_. Usually, a node corresponds to an instance of the Haskell runtime system; that is, each independently executed Haskell program exists in its own node. Multiple nodes may run concurrently on a single physical host system, but the intention is that nodes run on separate hosts, to take advantage of more hardware.
 46 | 
 47 | The basic unit of concurrency is the _process_ (as distinct from an OS process). A process is a concurrent calculation that can  participate in messaging. There is little overhead involved in starting and executing processes, so programmers can start as many as they need. 
 48 | 
 49 | Code that runs in a process is in the `ProcessM` monad.
 50 | 
 51 | Process management
 52 | ------------------
 53 | 
 54 | Processes are created with the `spawn` function. Its type signatures will help explain its operation:
 55 | 
 56 | ```haskell
 57 | spawn :: NodeId -> Closure (ProcessM ()) -> ProcessM ProcessId
 58 | ```
 59 | 
 60 | `spawn` takes a `NodeId`, indicating where to run the process, and a `Closure`, indicating which function will start the process. This lets the programmer start arbitrary functions on other nodes, which may be running on other hosts. Actual code is not transmitted to the other node; instead, a function identifier is sent. This works on the assumption that all connected nodes are running identical copies of the compiled Haskell binary (unlike Erlang, which allows new code to be sent to remote nodes at runtime).
 61 | 
 62 | We encode the function identifier used to start remote processes in a `Closure`. Closures for remotely-callable functions are automatically generated, and are named after the original function with a `__closure` suffix. Therefore, if I have a function like this:
 63 | 
 64 | ```haskell
 65 | greet :: String -> ProcessM ()
 66 | greet name = say ("Hello, " ++ name)
 67 | ```
 68 | 
 69 | I can run it on some node (and get its PID) like this:
 70 | 
 71 | ```haskell
 72 | pid <- spawn someNode (greet__closure "John Baptist")
 73 | ```
 74 | 
 75 | The `greet__closure` symbol here identifies a _closure generator_ and is automatically created by the framework from user-defined functions; see the examples or documentation for more details.
 76 | 
 77 | You can send messages to a process given its PID. The `send` function corresponds to Erlang's ! operator.
 78 | 
 79 | ```haskell
 80 | send :: (Serializable a) => ProcessId -> a -> ProcessM ()
 81 | ```
 82 | 
 83 | Given a `ProcessId` and a chunk of serializable data (implementing the `Data.Binary.Binary` type class), we can send a message to the given process. The message will be transmitted across the network if necessary and placed in the process's message queue. Note that `send` will accept any type of data, as long as it implements Binary.
 84 | 
 85 | A process can receive messages by calling `expect`:
 86 | 
 87 | ```haskell
 88 | expect :: (Serializable a) => ProcessM a
 89 | ```
 90 | 
 91 | Note that `expect` is also polymorphic; the type of message to receive is usually inferred by the compiler. If a message of that type is in the queue, it will be returned. If multiple messages of that type are in the queue, they will be returned in FIFO order. If there are no messages of that type in the queue, the function will block until such a message arrives.
 92 | 
 93 | Channels
 94 | --------
 95 | 
 96 | A _channel_ provides an alternative to message transmission with `send` and `expect`. While `send` and `expect` allow transmission of messages of any type, channels require messages to be of uniform type. Channels work like a distributed equivalent of Haskell's `Control.Concurrent.Chan`. Unlike regular channels, distributed channels have distinct ends: a single receiving port and at least one sending port. Create a channel with a call to `newChannel`:
 97 | 
 98 | ```haskell
 99 | newChannel :: (Serializable a) => ProcessM (SendPort a, ReceivePort a)
100 | ```
101 | 
102 | The resulting `SendPort` can be used with the `sendChannel` function to insert messages into the channel. The `ReceivePort` can be used to receive messages with 'receiveChannel'. The `SendChannel` can be serialized and sent as part of messages to other processes, which can then write to it; the `ReceiveChannel`, though, cannot be serialized, although it can be accessed from multiple threads on the same node.
103 | 
104 | Setup and walkthrough
105 | ---------------------
106 | 
107 | Here we'll provide a basic example of how to get started with your first project. 
108 | 
109 | We'll be running a program that will estimate pi, making use of available computing resources potentially on remote systems. There will be an arbitrary number of nodes, one of which will be designated the master, and the remaining nodes will be slaves. The slaves will estimate pi in such a way that their results can be combined by the master, and an approximation will be output. The more nodes, and the longer they run, the more precise the output.
110 | 
111 | In more detail: the master will assign each slave a region of the Halton sequence, and the slaves will use elements of the sequence to estimate the ratio of points in a unit square that fall within a unit circle, and that the master will sum these ratios. 
112 | 
113 | Here's the procedure, step by step.
114 | 
115 | 1. Compile Pi6.hs. If you have the framework installed correctly, it should be sufficient to run:
116 | 
117 |         ghc --make Pi6
118 | 
119 | 2. Select the machines you want to run the program on, and select one of them to be the master. All hosts must be connected on a local area network. For the purposes of this explanation, we'll assume that you will run your master node on a machine named `masterhost` and you will run two slave nodes each on machines named `slavehost1` and `slavehost2`.
120 | 
121 | 3. Copy the compiled executable Pi6 to some location on each of the three hosts.
122 | 
123 | 4. For each node, we need to create a configuration file. This is plain text file, usually named `config` and usually placed in the same directory with the executable. There are many possible settings that can be given in the configuration file, but only a few are necessary for this example; the rest have sensible defaults. On `masterhost`, create a file named `config` with the following content:
124 | 
125 |         cfgRole MASTER
126 |         cfgHostName masterhost
127 |         cfgKnownHosts masterhost slavehost1 slavehost2
128 | 
129 |     On `slavehost1`, create a file named `config` with the following content: 
130 | 
131 |         cfgRole WORKER
132 |         cfgHostName slavehost1
133 |         cfgKnownHosts masterhost slavehost1 slavehost2
134 | 
135 |     On `slavehost2`, create a file named `config` with the following content: 
136 | 
137 |         cfgRole WORKER
138 |         cfgHostName slavehost2
139 |         cfgKnownHosts masterhost slavehost1 slavehost2
140 | 
141 | A brief discussion of these settings and what they mean:
142 | 
143 | The `cfgRole` setting determines the node's initial behavior. This is a string which is used to differentiate the two kinds of nodes in this example. More complex distributed systems might have more different kinds of roles. In this case, WORKER nodes do nothing on startup, but just wait from a command from a master, whereas MASTER nodes seek out worker nodes and issue them commands.
144 | 
145 | The `cfgHostName` setting indicates to each node the name of the host it's running on.
146 | 
147 | The `cfgKnownHosts` setting provides a list of hosts that form part of this distributed execution. This is necessary so that the master node can find its subservient slave nodes.
148 | 
149 | Taken together, these three settings tell each node (a) its own name, (b) the names of other nodes and (c) their behavioral relationship.
150 | 
151 | 5. Now, run the Pi6 program twice in each of the worker nodes. There should now be four worker nodes awaiting instructions.
152 | 
153 | 6. To start the execution, run Pi6 on the master node. You should see output like this:
154 | 
155 |         2011-02-10 11:14:38.373856 UTC 0 pid://masterhost:48079/6/    SAY Starting...
156 |         2011-02-10 11:14:38.374345 UTC 0 pid://masterhost:48079/6/    SAY Telling slave nid://slavehost1:33716/ to look at range 0..1000000
157 |         2011-02-10 11:14:38.376479 UTC 0 pid://masterhost:48079/6/    SAY Telling slave nid://slavehost1:45343/ to look at range 1000000..2000000
158 |         2011-02-10 11:14:38.382236 UTC 0 pid://masterhost:48079/6/    SAY Telling slave nid://slavehost2:51739/ to look at range 2000000..3000000
159 |         2011-02-10 11:14:38.384613 UTC 0 pid://masterhost:48079/6/    SAY Telling slave nid://slavehost2:44756/ to look at range 3000000..4000000
160 |         2011-02-10 11:14:56.720435 UTC 0 pid://masterhost:48079/6/    SAY Done: 31416061416061416061416061
161 | 
162 | Let's talk about what's going on here.
163 | 
164 | This output is generated by the framework's logging facility. Each line of output has the following fields, left-to-right: the date and time that the log entry was generated; the importance of the message (in this case 0); the process ID of the generating process; the subsystem or component that generated this message (in this case, SAY indicates that these messages were output by a call to the `say` function); and the body of the message. From these messages, we can see that the master node discovered four nodes running on two remote hosts; for each of them, the master emits a "Telling slave..." message. Note that although we had to specify the host names where the nodes were running in the config file, the master found all nodes running on each of those hosts. The log output also tells us which range of indices of the Halton sequence were assigned to each node. Each slave, having performed its calculation, sends its results back to the master, and when the master has received responses from all slaves, it prints out its estimate of pi and ends. The slave nodes continue running, waiting for another request. At this point, we could run the master again, or we can terminate the slaves manually with Ctrl-C or the kill command.
165 | 
166 | Contributors
167 | ------------
168 | 
169 | I am grateful for the contributions of the following people to this project:
170 |  
171 | * Alan Mycroft
172 | * Andrew P. Black
173 | * Conrad Parker
174 | * Dylan Lukes
175 | * John Hughes 
176 | * John Launchbury 
177 | * Koen Claessen 
178 | * Simon Peyton-Jones
179 | * Thomas van Noort 
180 | * Warren Harris
181 | 
182 | 


--------------------------------------------------------------------------------
/Remote.hs:
--------------------------------------------------------------------------------
 1 | -- | Cloud Haskell (previously Remote Haskell) is a distributed computing
 2 | -- framework for Haskell. We can describe its interface
 3 | -- as roughly two levels: the /process layer/, consisting of
 4 | -- processes, messages, and fault monitoring; and the
 5 | -- /task layer/, consisting of tasks, promises, and fault recovery.
 6 | -- This summary module provides the most common interface
 7 | -- functions for both layers, although advanced users might want to import names
 8 | -- from the other constituent modules, as well.
 9 | 
10 | module Remote ( -- * The process layer
11 |                 remoteInit,
12 | 
13 |                 ProcessM, NodeId, ProcessId, MatchM,
14 |                 getSelfPid, getSelfNode,
15 | 
16 |                 send,sendQuiet,
17 | 
18 |                 spawn, spawnLocal, spawnAnd,
19 |                 spawnLink,
20 |                 callRemote, callRemotePure, callRemoteIO,
21 |                 AmSpawnOptions(..), defaultSpawnOptions,
22 |                 terminate,
23 | 
24 |                 expect, receive, receiveWait, receiveTimeout,
25 |                 match, matchIf, matchUnknown, matchUnknownThrow, matchProcessDown,
26 | 
27 |                 logS, say, LogSphere, LogTarget(..), LogFilter(..), LogConfig(..), LogLevel(..),
28 |                 setLogConfig, setNodeLogConfig, getLogConfig, defaultLogConfig, getCfgArgs,
29 | 
30 |                 UnknownMessageException(..), ServiceException(..), 
31 |                 TransmitException(..), TransmitStatus(..),
32 | 
33 |                 nameSet, nameQuery, nameQueryOrStart,
34 | 
35 |                 linkProcess, monitorProcess, unmonitorProcess,
36 |                 withMonitor, MonitorAction(..),
37 |                 ProcessMonitorException(..),
38 | 
39 |                 getPeers, findPeerByRole, PeerInfo,
40 | 
41 |                 remotable, RemoteCallMetaData, Lookup,
42 | 
43 |                 Closure, makeClosure, invokeClosure,
44 |                 Payload, genericPut, genericGet, Serializable,
45 | 
46 |                 -- * Channels
47 | 
48 |                 SendPort, ReceivePort,
49 |                 newChannel, sendChannel, receiveChannel,
50 |                 
51 |                 CombinedChannelAction, combinedChannelAction,
52 |                 combinePortsBiased, combinePortsRR, mergePortsBiased, mergePortsRR,
53 |                 terminateChannel,
54 | 
55 |                 -- * The task layer
56 | 
57 |                 TaskM, runTask, Promise,
58 |                 newPromise, newPromiseHere, newPromiseAtRole, newPromiseNear,
59 |                 toPromise, toPromiseNear, toPromiseImm,
60 |                 readPromise,
61 |                 tlogS, tsay,
62 | 
63 |                 TaskException(..),
64 | 
65 |                 MapReduce(..), mapReduce,
66 |                 chunkify, shuffle,
67 | 
68 |               ) where
69 | 
70 | import Remote.Init
71 | import Remote.Encoding
72 | import Remote.Process
73 | import Remote.Channel
74 | import Remote.Call
75 | import Remote.Task 
76 | import Remote.Peer
77 | import Remote.Reg
78 | import Remote.Closure
79 | 


--------------------------------------------------------------------------------
/Remote/Call.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TemplateHaskell #-}
  2 | 
  3 | -- | Provides Template Haskell-based tools
  4 | -- and syntactic sugar for dealing with closures
  5 | module Remote.Call (
  6 |          remotable,
  7 |          mkClosure,
  8 |          mkClosureRec,
  9 |         ) where
 10 | 
 11 | import Language.Haskell.TH
 12 | import Remote.Encoding (Payload,serialDecode,serialEncode,serialEncodePure)
 13 | import Control.Monad.Trans (liftIO)
 14 | import Control.Monad (liftM)
 15 | import Data.Maybe (isJust)
 16 | import Remote.Closure (Closure(..))
 17 | import Remote.Process (ProcessM)
 18 | import Remote.Reg (putReg,RemoteCallMetaData)
 19 | import Remote.Task (TaskM,serialEncodeA,serialDecodeA)
 20 | 
 21 | ----------------------------------------------
 22 | -- * Compile-time metadata
 23 | ----------------------------------------------
 24 | 
 25 | -- | A compile-time macro to expand a function name to its corresponding
 26 | -- closure name (if such a closure exists), suitable for use with
 27 | -- 'spawn', 'callRemote', etc
 28 | -- In general, using the syntax @$(mkClosure foo)@ is the same
 29 | -- as addressing the closure generator by name, that is,
 30 | -- @foo__closure@. In some cases you may need to use
 31 | -- 'mkClosureRec' instead.
 32 | mkClosure :: Name -> Q Exp
 33 | mkClosure n = do info <- reify n
 34 |                  case info of
 35 |                     VarI iname _ _ _ -> 
 36 |                         do let newn = mkName $ show iname ++ "__closure"
 37 |                            newinfo <- reify newn
 38 |                            case newinfo of
 39 |                               VarI newiname _ _ _ -> varE newiname
 40 |                               _ -> error $ "Unexpected type of closure symbol for "++show n
 41 |                     _ -> error $ "No closure corresponding to "++show n
 42 | 
 43 | -- | A variant of 'mkClosure' suitable for expanding closures
 44 | -- of functions declared in the same module, including that
 45 | -- of the function it's used in. The Rec stands for recursive.
 46 | -- If you get the @Something is not in scope at a reify@ message
 47 | -- when using mkClosure, try using this function instead.
 48 | -- Using this function also turns off the static
 49 | -- checks used by mkClosure, and therefore you are responsible
 50 | -- for making sure that you use 'remotable' with each function
 51 | -- that may be an argument of mkClosureRec
 52 | mkClosureRec :: Name -> Q Exp
 53 | mkClosureRec name =
 54 |  do e <- makeEnv
 55 |     inf <- reify name
 56 |     case inf of
 57 |        VarI aname atype _ _ -> 
 58 |           case nameModule aname of
 59 |              Just a -> case a == loc_module (eLoc e) of
 60 |                            False -> error "Can't use mkClosureRec across modules: use mkClosure instead"
 61 |                            True -> do (aat,aae) <- closureInfo e aname atype
 62 |                                       sigE (return aae) (return aat)
 63 |              _ -> error "mkClosureRec can't figure out module of symbol"
 64 |        _ -> error "mkClosureRec applied to something weird"
 65 | 
 66 | 
 67 | closureInfo :: Env -> Name -> Type -> Q (Type,Exp)
 68 | closureInfo e named typed = 
 69 |   do v <- theval
 70 |      return (thetype,v)
 71 |    where
 72 |      implFqn = loc_module (eLoc e) ++ "." ++ nameBase named ++ "__0__impl"
 73 |      (params, returns) = getReturns typed 0
 74 |      wrapit x = case isArrowType e x of
 75 |                     False -> AppT (eClosureT e) x
 76 |                     True -> wrapMonad e (eClosureT e) x
 77 |      thetype = putParams (params ++ [wrapit (putParams returns)])
 78 |      theval = lamE (map varP paramnames) (appE (appE [e|Closure|] (litE (stringL implFqn))) (appE [e|serialEncodePure|] (tupE (map varE paramnames))))
 79 |      paramnames = map (\x -> mkName $ 'a' : show x) [1..(length params)]
 80 | 
 81 | closureDecs :: Env -> Name -> Type -> Q [Dec]
 82 | closureDecs e n t =
 83 |   do (nt,ne) <- closureInfo e n t
 84 |      sequence [sigD closureName (return nt),
 85 |                funD closureName [clause [] (normalB $ return ne) []]]
 86 |   where closureName = mkName $ nameBase n ++ "__closure"
 87 | 
 88 |      
 89 | data Env = Env
 90 |   { eProcessM :: Type
 91 |   , eIO :: Type
 92 |   , eTaskM :: Type
 93 |   , ePayload :: Type
 94 |   , eLoc :: Loc
 95 |   , eLiftIO :: Exp
 96 |   , eReturn :: Exp
 97 |   , eClosure :: Exp
 98 |   , eClosureT :: Type
 99 |   }
100 | 
101 | makeEnv :: Q Env
102 | makeEnv = 
103 |   do eProcessM <- [t| ProcessM |]
104 |      eIO <- [t| IO |]
105 |      eTaskM <- [t| TaskM |]
106 |      eLoc <- location
107 |      ePayload <- [t| Payload |]
108 |      eLiftIO <- [e|liftIO|]
109 |      eReturn <- [e|return|]
110 |      eClosure <- [e|Closure|]
111 |      eClosureT <- [t|Closure|]
112 |      return Env {
113 |                   eProcessM=eProcessM,
114 |                   eIO = eIO,
115 |                   eTaskM = eTaskM,
116 |                   eLoc = eLoc,
117 |                   ePayload=ePayload,
118 |                   eLiftIO=eLiftIO,
119 |                   eReturn=eReturn,
120 |                   eClosure=eClosure,
121 |                   eClosureT=eClosureT
122 |                 }
123 | 
124 | isMonad :: Env -> Type -> Bool
125 | isMonad e t
126 |   = t == eProcessM e
127 |     || t == eIO e
128 |     || t == eTaskM e
129 | 
130 | monadOf :: Env -> Type -> Maybe Type
131 | monadOf e (AppT m _) |  isMonad e m = Just m
132 | monadOf e _ = Nothing
133 | 
134 | restOf :: Env -> Type -> Type
135 | restOf e (AppT m r ) | isMonad e m = r
136 | restOf e r = r
137 | 
138 | wrapMonad :: Env -> Type -> Type -> Type
139 | wrapMonad e monad val =
140 |   case monadOf e val of
141 |     Just t | t == monad -> val
142 |     Just n -> AppT monad (restOf e val)
143 |     Nothing -> AppT monad val
144 | 
145 | getReturns :: Type -> Int -> ([Type],[Type])
146 | getReturns t shift = splitAt ((length arglist - 1) - shift) arglist
147 |   where arglist = getParams t
148 | 
149 | countReturns :: Type -> Int
150 | countReturns t = length $ getParams t
151 | 
152 | applyArgs :: Exp -> [Exp] -> Exp
153 | applyArgs f [] = f
154 | applyArgs f (l:r) = applyArgs (AppE f l) r
155 | 
156 | isArrowType :: Env -> Type -> Bool
157 | isArrowType _ (AppT (AppT ArrowT _) _) = True 
158 | isArrowType e t | (isJust $ monadOf e t) && isArrowType e (restOf e t) = True
159 | isArrowType _ _ = False
160 | 
161 | generateDecl :: Env -> Name -> Type -> Int -> Q [Dec]
162 | generateDecl e name t shift =
163 |    let
164 |      implName = mkName (nameBase name ++ "__" ++ show shift ++ "__impl") 
165 |      implPlName = mkName (nameBase name ++ "__" ++ show shift ++ "__implPl")
166 |      (params,returns) = getReturns t shift
167 |      topmonad = case monadOf e $ last returns of
168 |                  Just p | p == (eTaskM e) -> eTaskM e
169 |                  _ -> eProcessM e
170 |      lifter :: Exp -> ExpQ
171 |      lifter x = case monadOf e $ putParams returns of
172 |                  Just p | p == topmonad -> return x
173 |                  Just p | p == eIO e -> return $ AppE (eLiftIO e) x
174 |                  _ -> return $ AppE (eReturn e) x
175 |      serialEncoder x = case topmonad of
176 |                  p | p == eTaskM e -> appE [e|serialEncodeA|] x
177 |                  _ -> appE [e|liftIO|] (appE [e|serialEncode|] x)
178 |      serialDecoder x = case topmonad of
179 |                  p | p == eTaskM e -> appE [e|serialDecodeA|] x
180 |                  _ -> appE [e|liftIO|] (appE [e|serialDecode|] x)
181 |      paramnames = map (\x -> 'a' : show x) [1..(length params)]
182 |      paramnamesP = (map (varP . mkName) paramnames)
183 |      paramnamesE = (map (VarE . mkName) paramnames)
184 | 
185 |      just a = conP (mkName "Prelude.Just") [a]
186 | 
187 |      impldec = sigD implName (appT (appT arrowT (return (ePayload e))) (return $ wrapMonad e topmonad $ putParams returns))
188 |      impldef = funD implName [clause [varP (mkName "a")]
189 |                  (normalB (doE [bindS (varP (mkName "res")) ((serialDecoder (varE (mkName "a")))),
190 |                                 noBindS (caseE (varE (mkName "res"))
191 |                                         [match (just (tupP paramnamesP)) (normalB (lifter (applyArgs (VarE name) paramnamesE))) [],
192 |                                          match wildP (normalB (appE [e|error|] (litE (stringL ("Bad decoding in closure splice of "++nameBase name))))) []])
193 |                                       ]))
194 |                       []]
195 |      implPldec = sigD implPlName (return $ putParams $ [ePayload e,wrapMonad e topmonad (ePayload e)] )
196 |      implPldef = funD implPlName [clause [varP (mkName "a")]
197 |                                          (normalB (doE [bindS (varP (mkName "res")) ( (appE (varE implName) (varE (mkName "a")))),
198 |                                                        noBindS ((serialEncoder (varE (mkName "res")))) ] )) [] ] 
199 |      base1 = [impldec,impldef]
200 |      base2 = if isArrowType e $ putParams returns 
201 |                 then []
202 |                 else [implPldec,implPldef]
203 |   in do cld <- closureDecs e name t
204 |         sequence $ base1++base2++(map return cld)
205 | 
206 | 
207 | generateDecls :: Env -> Name -> Q [Dec]
208 | generateDecls e name = 
209 |    do tr <- getType name
210 |       case tr of
211 |         Nothing -> error "remotable applied to bad name"
212 |         Just (fname,ftype) ->
213 | -- Change the following line to: [0..countReturns ftype - 1]
214 | -- to automatically enable partial closure generators
215 |             liftM concat $ mapM (generateDecl e fname ftype) [0]
216 | 
217 | generateMetaData :: Env -> [Dec] -> Q [Dec]
218 | generateMetaData e decls = sequence [sig,dec]
219 |   where regDecls [] = []
220 |         regDecls (first:rest) =
221 |            case first of
222 |               SigD named _ -> named : (regDecls rest)
223 |               _ -> regDecls rest
224 |         registryName = (mkName "__remoteCallMetaData")
225 |         paramName = mkName "x"
226 |         sig = sigD registryName [t| RemoteCallMetaData |]
227 |         dec = funD registryName [clause [varP paramName] (normalB (toChain (regDecls decls))) []]
228 |         fqn n = (maybe (loc_module (eLoc e)++".") ((flip (++))".") (nameModule n)) ++ nameBase n
229 |         app2E op l r = appE (appE op l) r
230 |         toChain [] = varE paramName
231 |         toChain [h] = appE (app2E [e|putReg|] (varE h) (litE $ stringL (fqn h))) (varE paramName)
232 |         toChain (h:t) = appE (app2E [e|putReg|] (varE h) (litE $ stringL (fqn h))) (toChain t)           
233 | 
234 | -- | A compile-time macro to provide easy invocation of closures.
235 | -- To use this, follow the following steps:
236 | --
237 | -- 1. First, enable Template Haskell in the module:
238 | --
239 | --   > {-# LANGUAGE TemplateHaskell #-}
240 | --   > module Main where
241 | --   > import Remote.Call (remotable)
242 | --   >    ...
243 | --
244 | -- 2. Define your functions normally. Restrictions: function's type signature must be explicitly declared; no polymorphism; all parameters must implement Serializable; return value must be pure, or in one of the 'ProcessM', 'TaskM', or 'IO' monads; probably other restrictions as well.
245 | --
246 | --   > greet :: String -> ProcessM ()
247 | --   > greet name = say ("Hello, "++name)
248 | --   > badFib :: Integer -> Integer
249 | --   > badFib 0 = 1
250 | --   > badFib 1 = 1
251 | --   > badFib n = badFib (n-1) + badFib (n-2)
252 | --
253 | -- 3. Use the 'remotable' function to automagically generate stubs and closure generators for your functions:
254 | --
255 | --   > $( remotable ['greet, 'badFib] )
256 | --
257 | --   'remotable' may be used only once per module.
258 | --
259 | -- 4. When you call 'remoteInit' (usually the first thing in your program), 
260 | -- be sure to give it the automagically generated function lookup tables
261 | -- from all modules that use 'remotable':
262 | --
263 | --   > main = remoteInit (Just "config") [Main.__remoteCallMetaData, OtherModule.__remoteCallMetaData] initialProcess
264 | --
265 | -- 5. Now you can invoke your functions remotely. When a function expects a closure, give it the name
266 | -- of the generated closure, rather than the name of the original function. If the function takes parameters,
267 | -- so will the closure. To start the @greet@ function on @someNode@:
268 | --
269 | --   > spawn someNode (greet__closure "John Baptist")
270 | --
271 | -- Note that we say @greet__closure@ rather than just @greet@. If you prefer, you can use 'mkClosure' instead, i.e. @$(mkClosure 'greet)@, which will expand to @greet__closure@. To calculate a Fibonacci number remotely:
272 | --
273 | -- > val <- callRemotePure someNode (badFib__closure 5)
274 | remotable :: [Name] -> Q [Dec]
275 | remotable names =
276 |    do env <- makeEnv
277 |       newDecls <- liftM concat $ mapM (generateDecls env) names
278 |       lookup <- generateMetaData env newDecls
279 |       return $ newDecls ++ lookup
280 | 
281 | getType name = 
282 |   do info <- reify name
283 |      case info of 
284 |        VarI iname itype _ _ -> return $ Just (iname,itype)
285 |        _ -> return Nothing
286 | 
287 | putParams :: [Type] -> Type
288 | putParams (afst:lst:[]) = AppT (AppT ArrowT afst) lst
289 | putParams (afst:[]) = afst
290 | putParams (afst:lst) = AppT (AppT ArrowT afst) (putParams lst)
291 | putParams [] = error "Unexpected parameter type in remotable processing"
292 | 
293 | getParams :: Type -> [Type]
294 | getParams typ = case typ of
295 |                             AppT (AppT ArrowT b) c -> b : getParams c
296 |                             b -> [b]
297 |                                 
298 | 
299 | 


--------------------------------------------------------------------------------
/Remote/Channel.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ExistentialQuantification,DeriveDataTypeable #-}
  2 | 
  3 | -- | This module provides typed channels, an alternative
  4 | -- approach to interprocess messaging. Typed channels
  5 | -- can be used in combination with or instead of the
  6 | -- the untyped channels available in the "Remote.Process"
  7 | -- module via 'send'.
  8 | module Remote.Channel (
  9 |                        -- * Basic typed channels
 10 |                        SendPort,ReceivePort,newChannel,sendChannel,receiveChannel,
 11 | 
 12 |                        -- * Combined typed channels
 13 |                        CombinedChannelAction,combinedChannelAction,
 14 |                        combinePortsBiased,combinePortsRR,mergePortsBiased,mergePortsRR,
 15 | 
 16 |                        -- * Terminate a channel
 17 |                        terminateChannel) where
 18 | 
 19 | import Remote.Process (ProcessM,send,getMessageType,getMessagePayload,setDaemonic,getProcess,prNodeRef,getNewMessageLocal,localFromPid,isPidLocal,TransmitException(..),TransmitStatus(..),spawnLocalAnd,ProcessId,Node,UnknownMessageException(..))
 20 | import Remote.Encoding (Serializable)
 21 | 
 22 | import Data.List (foldl')
 23 | import Data.Binary (Binary,get,put)
 24 | import Data.Typeable (Typeable)
 25 | import Control.Exception (throw)
 26 | import Control.Monad (when)
 27 | import Control.Monad.Trans (liftIO)
 28 | import Control.Concurrent.MVar (MVar,newEmptyMVar,takeMVar,readMVar,putMVar)
 29 | import Control.Concurrent.STM (STM,atomically,retry,orElse)
 30 | import Control.Concurrent.STM.TVar (TVar,newTVarIO,readTVar,writeTVar)
 31 | 
 32 | ----------------------------------------------
 33 | -- * Channels
 34 | ----------------------------------------------
 35 | 
 36 | -- | A channel is a unidirectional communication pipeline
 37 | -- with two ends: a sending port, and a receiving port.
 38 | -- This is the sending port. A process holding this
 39 | -- value can insert messages into the channel. SendPorts
 40 | -- themselves can also be sent to other processes.
 41 | -- The other side of the channel is the 'ReceivePort'.
 42 | newtype SendPort a = SendPort ProcessId deriving (Typeable)
 43 | 
 44 | -- | A process holding a ReceivePort can extract messages
 45 | -- from the channel, which we inserted by
 46 | -- the holder(s) of the corresponding 'SendPort'.
 47 | -- Critically, ReceivePorts, unlike SendPorts, are not serializable.
 48 | -- This means that you can only receive messages through a channel
 49 | -- on the node on which the channel was created.
 50 | data ReceivePort a = ReceivePortSimple ProcessId (MVar ())
 51 |                       | ReceivePortBiased [Node -> STM a]
 52 |                       | ReceivePortRR (TVar [Node -> STM a])
 53 | 
 54 | instance Binary (SendPort a) where
 55 |    put (SendPort pid) = put pid
 56 |    get = get >>= return . SendPort
 57 | 
 58 | -- | Create a new channel, and returns both the 'SendPort'
 59 | -- and 'ReceivePort' thereof.
 60 | newChannel :: (Serializable a) => ProcessM (SendPort a, ReceivePort a)
 61 | newChannel = do mv <- liftIO $ newEmptyMVar
 62 |                 pid <- spawnLocalAnd (body mv) setDaemonic
 63 |                 return (SendPort pid,
 64 |                        ReceivePortSimple pid mv)
 65 |      where body mv = liftIO (takeMVar mv)
 66 | 
 67 | -- | Inserts a new value into the channel.
 68 | sendChannel :: (Serializable a) => SendPort a -> a -> ProcessM ()
 69 | sendChannel (SendPort pid) a = send pid a
 70 | 
 71 | -- | Extract a value from the channel, in FIFO order.
 72 | receiveChannel :: (Serializable a) => ReceivePort a -> ProcessM a
 73 | receiveChannel rc = do p <- getProcess
 74 |                        channelCheckPids [rc]
 75 |                        node <- liftIO $ readMVar (prNodeRef p)
 76 |                        liftIO $ atomically $ receiveChannelImpl node rc
 77 | 
 78 | receiveChannelImpl :: (Serializable a) => Node -> ReceivePort a -> STM a
 79 | receiveChannelImpl node rc =
 80 |                        case rc of
 81 |                          ReceivePortBiased l -> foldl' orElse retry (map (\x -> x node) l)
 82 |                          ReceivePortRR mv -> do tv <- readTVar mv
 83 |                                                 writeTVar mv (rotate tv)
 84 |                                                 foldl' orElse retry (map (\x -> x node) tv)
 85 |                          ReceivePortSimple _ _ -> receiveChannelSimple node rc
 86 |       where rotate [] = []
 87 |             rotate (h:t) = t ++ [h]
 88 | 
 89 | data CombinedChannelAction b = forall a. (Serializable a) => CombinedChannelAction (ReceivePort a) (a -> b)
 90 | 
 91 | -- | Specifies a port and an adapter for combining ports via 'combinePortsBiased' and
 92 | -- 'combinePortsRR'.
 93 | combinedChannelAction :: (Serializable a) => ReceivePort a -> (a -> b) -> CombinedChannelAction b
 94 | combinedChannelAction = CombinedChannelAction
 95 | 
 96 | -- | This function lets us respond to messages on multiple channels
 97 | -- by combining several 'ReceivePort's into one. The resulting port
 98 | -- is the sum of the input ports, and will extract messages from all
 99 | -- of them in FIFO order. The input ports are specified by 
100 | -- 'combinedChannelAction', which also gives a converter function.
101 | -- After combining the underlying receive ports can still
102 | -- be used independently, as well.
103 | -- We provide two ways to combine ports, which differ bias
104 | -- they demonstrate in returning messages when more than one
105 | -- underlying channel is nonempty. combinePortsBiased will
106 | -- check ports in the order given by its argument, and so
107 | -- if the first channel always was a message waiting, it will.
108 | -- starve the other channels. The alternative is 'combinePortsRR'.
109 | combinePortsBiased :: Serializable b => [CombinedChannelAction b] -> ProcessM (ReceivePort b)
110 | combinePortsBiased chns = do mapM_ (\(CombinedChannelAction chn _ ) -> channelCheckPids [chn]) chns
111 |                              return $ ReceivePortBiased [(\node -> receiveChannelImpl node chn >>= return . fun) | (CombinedChannelAction chn fun) <- chns]
112 | 
113 | -- | See 'combinePortsBiased'. This function differs from that one
114 | -- in that the order that the underlying ports are checked is rotated
115 | -- with each invocation, guaranteeing that, given enough invocations,
116 | -- every channel will have a chance to contribute a message.
117 | combinePortsRR :: Serializable b => [CombinedChannelAction b] -> ProcessM (ReceivePort b)
118 | combinePortsRR chns = do mapM_ (\(CombinedChannelAction chn _ ) -> channelCheckPids [chn]) chns
119 |                          tv <- liftIO $ newTVarIO [(\node -> receiveChannelImpl node chn >>= return . fun) | (CombinedChannelAction chn fun) <- chns]
120 |                          return $ ReceivePortRR tv
121 | 
122 | -- | Similar to 'combinePortsBiased', with the difference that the
123 | -- the underlying ports must be of the same type, and you don't
124 | -- have the opportunity to provide an adapter function.
125 | mergePortsBiased :: (Serializable a) => [ReceivePort a] -> ProcessM (ReceivePort a)
126 | mergePortsBiased chns = do channelCheckPids chns
127 |                            return $ ReceivePortBiased [(\node -> receiveChannelImpl node chn) | chn <- chns]
128 | 
129 | -- | Similar to 'combinePortsRR', with the difference that the
130 | -- the underlying ports must be of the same type, and you don't
131 | -- have the opportunity to provide an adapter function.
132 | mergePortsRR :: (Serializable a) => [ReceivePort a] -> ProcessM (ReceivePort a)
133 | mergePortsRR chns = do channelCheckPids chns
134 |                        tv <- liftIO $ newTVarIO [(\node -> receiveChannelImpl node chn) | chn <- chns]
135 |                        return $ ReceivePortRR tv
136 | 
137 | channelCheckPids :: (Serializable a) => [ReceivePort a] -> ProcessM ()
138 | channelCheckPids chns = mapM_ checkPid chns
139 |          where checkPid (ReceivePortSimple pid _) = do islocal <- isPidLocal pid
140 |                                                        when (not islocal)
141 |                                                           (throw $ TransmitException QteUnknownPid)
142 |                checkPid _ = return ()
143 | 
144 | receiveChannelSimple :: (Serializable a) => Node -> ReceivePort a -> STM a
145 | receiveChannelSimple node (ReceivePortSimple chpid _) = 
146 |              do mmsg <- getNewMessageLocal (node) (localFromPid chpid)
147 |                 case mmsg of
148 |                    Nothing -> badPid
149 |                    Just msg -> case getMessagePayload msg of
150 |                                     Nothing -> throw $ UnknownMessageException (getMessageType msg)
151 |                                     Just q -> return q
152 |    where badPid = throw $ TransmitException QteUnknownPid
153 | 
154 | -- | Terminate a channel. After calling this function, 'receiveChannel'
155 | -- on that port (or on any combined port based on it) will either 
156 | -- fail or block indefinitely, and 'sendChannel' on the corresponding
157 | -- 'SendPort' will fail. Any unread messages remaining in the channel
158 | -- will be lost.
159 | terminateChannel :: (Serializable a) => ReceivePort a -> ProcessM ()
160 | terminateChannel (ReceivePortSimple _ term) = liftIO $ putMVar (term) ()
161 | terminateChannel _ = throw $ TransmitException QteUnknownPid
162 | 


--------------------------------------------------------------------------------
/Remote/Closure.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveDataTypeable #-}
 2 | 
 3 | -- | A simple type to represent a closure, that is, a function
 4 | -- and its environment. The current implementation represents
 5 | -- functions as strings, but this could be theoretically
 6 | -- swapped out for the \"static\" mechanism described in the
 7 | -- paper.
 8 | module Remote.Closure (
 9 |                        Closure(..)
10 |                        ) where
11 | 
12 | import Data.Binary (Binary,get,put)
13 | import Data.Typeable (Typeable)
14 | import Remote.Encoding (Payload)
15 | 
16 | -- | A data type representing a closure, that is, a function with its environment.
17 | --   In spirit, this is actually:
18 | --    
19 | -- >   data Closure a where
20 | -- >     Closure :: Serializable v => Static (v -> a) -> v -> Closure a     
21 | --
22 | --   where the Static type wraps a function with no non-static free variables.
23 | --   We simulate this behavior by identifying top-level functions as strings.
24 | --   See the paper for clarification.
25 | data Closure a = Closure String Payload
26 |      deriving (Typeable)
27 | 
28 | instance Show (Closure a) where
29 |      show a = case a of
30 |                 (Closure fn _pl) -> show fn
31 | 
32 | instance Binary (Closure a) where
33 |      get = do s <- get
34 |               v <- get
35 |               return $ Closure s v 
36 |      put (Closure s v) = put s >> put v
37 | 
38 | 


--------------------------------------------------------------------------------
/Remote/Encoding.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveDataTypeable,CPP,FlexibleInstances,UndecidableInstances #-}
  2 | 
  3 | -- | This module provides the 'Serializable' type class and
  4 | -- functions to convert to and from 'Payload's. It's implemented
  5 | -- in terms of Haskell's "Data.Binary". The message sending
  6 | -- and receiving functionality in "Remote.Process" depends on this.
  7 | module Remote.Encoding (
  8 |           Serializable,
  9 |           serialEncode,
 10 |           serialEncodePure,
 11 |           serialDecode,
 12 |           serialDecodePure,
 13 |           dynamicDecodePure,
 14 |           dynamicEncodePure,
 15 |           Payload,
 16 |           DynamicPayload,
 17 |           PayloadLength,
 18 |           hPutPayload,
 19 |           hGetPayload,
 20 |           payloadLength,
 21 |           getPayloadType,
 22 |           getDynamicPayloadType,
 23 |           getPayloadContent,
 24 |           genericPut,
 25 |           genericGet) where
 26 | 
 27 | import Prelude hiding (id)
 28 | import qualified Prelude as Prelude
 29 | 
 30 | import Data.Binary (Binary,encode,decode,Put,Get,put,get,putWord8,getWord8)
 31 | import Control.Monad (liftM)
 32 | import Data.ByteString.Lazy (ByteString)
 33 | import qualified Data.ByteString.Lazy as B (hPut,hGet,length)
 34 | import Control.Exception (try,evaluate,ErrorCall)
 35 | import Data.Int (Int64)
 36 | import System.IO (Handle)
 37 | import Data.Typeable (typeOf,typeOf,Typeable)
 38 | import Data.Dynamic (Dynamic,toDyn,fromDynamic,dynTypeRep)
 39 | import Data.Generics (Data,gfoldl,gunfold, toConstr,constrRep,ConstrRep(..),repConstr,extQ,extR,dataTypeOf)
 40 | 
 41 | -- | Data that can be sent as a message must implement
 42 | -- this class. The class has no functions of its own,
 43 | -- but instead simply requires that the type implement
 44 | -- both 'Typeable' and 'Binary'. Typeable can usually
 45 | -- be derived automatically. Binary requires the put and get
 46 | -- functions, which can be easily implemented by hand,
 47 | -- or you can use the 'genericGet' and 'genericPut' flavors,
 48 | -- which will work automatically for types implementing
 49 | -- 'Data'.
 50 | class (Binary a,Typeable a) => Serializable a
 51 | instance (Binary a,Typeable a) => Serializable a
 52 | 
 53 | data Payload = Payload
 54 |                 { 
 55 |                   payloadType :: !ByteString,
 56 |                   payloadContent :: !ByteString
 57 |                 } deriving (Typeable)
 58 | data DynamicPayload = DynamicPayload
 59 |                 {
 60 |                   dynamicPayloadContent :: Dynamic
 61 |                 }
 62 | type PayloadLength = Int64
 63 | 
 64 | instance Binary Payload where
 65 |   put pl = put (payloadType pl) >> put (payloadContent pl)
 66 |   get = get >>= \a -> get >>= \b -> return $ Payload {payloadType = a,payloadContent=b}
 67 | 
 68 | payloadLength :: Payload -> PayloadLength
 69 | payloadLength (Payload t c) = B.length t + B.length c
 70 | 
 71 | getPayloadContent :: Payload -> ByteString
 72 | getPayloadContent = payloadContent
 73 | 
 74 | getPayloadType :: Payload -> String
 75 | getPayloadType pl = decode $ payloadType pl
 76 | 
 77 | hPutPayload :: Handle -> Payload -> IO ()
 78 | hPutPayload h (Payload t c) = B.hPut h (encode (B.length t :: PayloadLength)) >>  
 79 |                        B.hPut h t >>
 80 |                        B.hPut h (encode (B.length c :: PayloadLength)) >>
 81 |                        B.hPut h c
 82 | 
 83 | hGetPayload :: Handle -> IO Payload
 84 | hGetPayload h = do tl <- B.hGet h (fromIntegral baseLen)
 85 |                    t <- B.hGet h (fromIntegral (decode tl :: PayloadLength))
 86 |                    cl <- B.hGet h (fromIntegral baseLen)
 87 |                    c <- B.hGet h (fromIntegral (decode cl :: PayloadLength))
 88 |                    return $ Payload {payloadType = t,payloadContent = c}
 89 |     where baseLen = B.length (encode (0::PayloadLength))
 90 | 
 91 | serialEncodePure :: (Serializable a) => a -> Payload
 92 | serialEncodePure a = let encoding = encode a
 93 |                       in encoding `seq` Payload {payloadType = encode $ show $ typeOf a,
 94 |                                                  payloadContent = encoding}
 95 | 
 96 | dynamicEncodePure :: (Serializable a) => a -> DynamicPayload
 97 | dynamicEncodePure a = DynamicPayload {dynamicPayloadContent = toDyn a}
 98 | 
 99 | dynamicDecodePure :: (Serializable a) => DynamicPayload -> Maybe a
100 | dynamicDecodePure a = fromDynamic (dynamicPayloadContent a)
101 | 
102 | getDynamicPayloadType :: DynamicPayload -> String
103 | getDynamicPayloadType a = show (dynTypeRep (dynamicPayloadContent a))
104 | 
105 | -- TODO I suspect that we will get better performance for big messages if let this be lazy
106 | -- see also serialDecode
107 | serialEncode :: (Serializable a) => a -> IO Payload
108 | serialEncode a = do encoded <- evaluate $ encode a -- this evaluate is actually necessary, it turns out; it might be better to just use strict ByteStrings
109 |                     return $ Payload {payloadType = encode $ show $ typeOf a,
110 |                                         payloadContent = encoded}
111 | 
112 | 
113 | serialDecodePure :: (Serializable a) => Payload -> Maybe a
114 | serialDecodePure a = (\id -> 
115 |                       let pc = payloadContent a
116 |                       in
117 |                         pc `seq`
118 |                         if (decode $! payloadType a) == 
119 |                               show (typeOf $ id undefined)
120 |                           then Just (id $! decode pc)
121 |                           else Nothing ) Prelude.id
122 | 
123 | 
124 | serialDecode :: (Serializable a) => Payload -> IO (Maybe a)
125 | serialDecode a = (\id ->
126 |                       if (decode $ payloadType a) == 
127 |                             show (typeOf $ id undefined)
128 |                          then do
129 |                                  res <- try (evaluate $ decode (payloadContent a)) 
130 |                                     :: (Serializable a) => IO (Either ErrorCall a)
131 |                                  case res of
132 |                                   Left _ -> return $ Nothing
133 |                                   Right v -> return $ Just $ id v
134 |                          else return Nothing ) Prelude.id
135 | 
136 | 
137 | -- | Data types that can be used in messaging must
138 | -- be serializable, which means that they must implement
139 | -- the 'get' and 'put' methods from 'Binary'. If you
140 | -- are too lazy to write these functions yourself,
141 | -- you can delegate responsibility to this function.
142 | -- It's usually sufficient to do something like this:
143 | --
144 | -- > import Data.Data (Data)
145 | -- > import Data.Typeable (Typeable)
146 | -- > import Data.Binary (Binary, get, put)
147 | -- > data MyType = MkMyType Foobar Int [(String, Waddle Baz)]
148 | -- >             | MkSpatula
149 | -- >                  deriving (Data, Typeable)
150 | -- > instance Binary MyType where
151 | -- >    put = genericPut
152 | -- >    get = genericGet
153 | genericPut :: (Data a) => a ->  Put
154 | genericPut = generic `extQ` genericString
155 |    where generic what = fst $ gfoldl 
156 |             (\(before, a_to_b) a -> (before >> genericPut a, a_to_b a))
157 |             (\x -> (serializeConstr (constrRep (toConstr what)), x))
158 |             what
159 |          genericString :: String -> Put
160 |          genericString = put.encode
161 | 
162 | -- | This is the counterpart 'genericPut'
163 | genericGet :: Data a => Get a
164 | genericGet = generic `extR` genericString
165 |    where generic = (\id -> liftM id $ deserializeConstr $ \constr_rep ->
166 |                    gunfold (\n -> do n' <- n
167 |                                      g' <- genericGet
168 |                                      return $ n' g')
169 |                            (return)
170 |                            (repConstr (dataTypeOf (id undefined)) constr_rep)) Prelude.id
171 |          genericString :: Get String
172 |          genericString = do q <- get
173 |                             return $ decode q
174 | 
175 | serializeConstr :: ConstrRep -> Put
176 | serializeConstr (AlgConstr ix)   = putWord8 1 >> put ix
177 | serializeConstr (IntConstr i)    = putWord8 2 >> put i
178 | serializeConstr (FloatConstr r)  = putWord8 3 >> put r
179 | #if __GLASGOW_HASKELL__ >= 611
180 | serializeConstr (CharConstr c)   = putWord8 4 >> put c
181 | #else
182 | serializeConstr (StringConstr c)   = putWord8 4 >> put (head c)
183 | #endif
184 | 
185 | deserializeConstr :: (ConstrRep -> Get a) -> Get a
186 | deserializeConstr k = 
187 |       do constr_ix <- getWord8
188 |          case constr_ix of
189 |           1 -> get >>= \ix -> k (AlgConstr ix)
190 |           2 -> get >>= \i -> k (IntConstr i)
191 |           3 -> get >>= \r -> k (FloatConstr r)
192 | #if __GLASGOW_HASKELL__ >= 611
193 |           4 -> get >>= \c -> k (CharConstr c)
194 | #else
195 |           4 -> get >>= \c -> k (StringConstr (c:[]))
196 | #endif
197 | 


--------------------------------------------------------------------------------
/Remote/File.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Network.BSD (HostEntry(..),getHostName)
 4 | import Network (HostName,PortID(..),PortNumber(..),listenOn,accept,sClose,connectTo,Socket)
 5 | import Network.Socket (PortNumber(..),setSocketOption,SocketOption(..),socketPort,aNY_PORT )
 6 | import System.Timeout (timeout)
 7 | import Control.Exception (finally)
 8 | import System.IO (hPutStr,hClose,hGetContents)
 9 | 
10 | import Remote.Process (PortId)
11 | 
12 | data Store a = PromiseFile ProcessId
13 | 
14 | {-
15 | 
16 | newPromiseStream :: Closure (PromiseStreamReader a -> TaskM b) -> TaskM (PromiseStreamWriter a)
17 | writePromiseStream :: PromiseStreamWriter a -> a -> TaskM ()
18 | finalizePromiseStream :: PromiseStreamWrier a -> TaskM ()
19 | readPromiseStream :: PromiseStreamReader a -> TaskM (Maybe a)
20 | 
21 | --------
22 | newtype Raw = Raw String
23 | instance Read Raw where
24 |   readsPrec _ x = [(x,[])]
25 | 
26 | data Promise a = PromiseBasic { psRedeemer :: ProcessId, psId :: PromiseId } 
27 |                | PromiseImmediate a deriving Typeable
28 |                | PromiseStream ProcessId StreamId
29 | 
30 | data StreamInfo FilePath ByteRange
31 | 
32 | Map StreamId StreamInfo
33 | 
34 | toStream :: FilePath -> Int -> ProcessM [Stream a] -- partioning of input data
35 | readPromise :: Promise a -> TaskM a
36 | writeStream :: SteamWriter a -> a -> TaskM ()
37 | newStream :: (Closure (StreamWriter a -> TaskM ())) -> TaskM (Stream a)
38 | -}
39 | 
40 | -- The real question is: how to deal with complaints?
41 | -- we should be able to FORCE DISTRIBUTION should certain nodes (where they can later be picked out of the cache)
42 | -- in addition, the master might keep track of locations where copies are available
43 | -- finally, PARTITIONING 
44 | 
45 | sendFile :: FilePath -> PortId -> Int -> MVar PortId -> IO Bool
46 | sendFile fp portid delay mrealport = 
47 |    do sock <- listenOn whichPort
48 |       setSocketOption sock KeepAlive 1
49 |       realPort <- socketPort sock
50 |       putMVar mrealport (fromEnum realPort)
51 |       goSock sock `finally` sClose sock
52 |   where
53 |      goSock sock =
54 |        do 
55 |           res <- timeoutq delay $ accept sock
56 |              case res of
57 |                Nothing -> return False
58 |                Just (h,_,_) ->
59 |                  let acceptgo =
60 |                      do file <- readFile fp
61 |                         hPutStr h file
62 |                         hClose h
63 |                         return True
64 |                   in acceptgo `finally` hClose h
65 |       timeoutq d f = case d of
66 |                        0 -> do r <- f
67 |                                return $ Just r
68 |                       _ -> timeout d f                     
69 |       whichPort = if portid /= 0
70 |                      then PortNumber $ toEnum $ portid
71 |                      else PortNumber aNY_PORT
72 | 
73 | receiveFile :: FilePath -> HostName -> PortId -> IO ()
74 | receiveFile fp hostname portid =
75 |   do h <- connectTo hostname (PortNumber $ toEnum portid)
76 |      (do contents <- hGetContents h
77 |          writeFile fp contents) `finally` (hClose h)
78 | 
79 | 


--------------------------------------------------------------------------------
/Remote/Init.hs:
--------------------------------------------------------------------------------
 1 | -- | Exposes a high-level interface for starting a node of a distributed
 2 | -- program, taking into account a local configuration file, command
 3 | -- line arguments, and commonly-used system processes.
 4 | module Remote.Init (remoteInit,remoteInitFromConfig) where
 5 | 
 6 | import Remote.Peer (startDiscoveryService)
 7 | import Remote.Task (__remoteCallMetaData)
 8 | import Remote.Process (startProcessRegistryService,suppressTransmitException,pbracket,localRegistryRegisterNode,localRegistryHello,localRegistryUnregisterNode,
 9 |                        startProcessMonitorService,startNodeMonitorService,startLoggingService,startSpawnerService,ProcessM,Config(..),readConfig,initNode,startLocalRegistry,
10 |                        forkAndListenAndDeliver,waitForThreads,roleDispatch,Node,runLocalProcess,performFinalization,startFinalizerService)
11 | import Remote.Reg (registerCalls,RemoteCallMetaData)
12 | 
13 | import System.FilePath (FilePath)
14 | import System.Environment (getEnvironment)
15 | import Control.Concurrent (threadDelay)
16 | import Control.Monad.Trans (liftIO)
17 | import Control.Exception (finally)
18 | import Control.Concurrent.MVar (MVar,takeMVar,putMVar,newEmptyMVar)
19 | 
20 | startServices :: ProcessM ()
21 | startServices = 
22 |            do
23 |               startProcessRegistryService
24 |               startNodeMonitorService
25 |               startProcessMonitorService
26 |               startLoggingService
27 |               startDiscoveryService
28 |               startSpawnerService
29 |               startFinalizerService (suppressTransmitException localRegistryUnregisterNode >> return ())
30 | 
31 | dispatchServices :: MVar Node -> IO ()
32 | dispatchServices node = do mv <- newEmptyMVar
33 |                            runLocalProcess node (startServices >> liftIO (putMVar mv ()))
34 |                            takeMVar mv
35 | 
36 | 
37 | remoteInitFromConfig :: Config -> [RemoteCallMetaData] -> (String -> ProcessM ()) -> IO ()
38 | remoteInitFromConfig cfg metadata f =
39 |        let
40 |           defaultMetaData = [Remote.Task.__remoteCallMetaData]
41 |           lookup = registerCalls (defaultMetaData ++ metadata)
42 |        in 
43 |        do
44 |           node <- initNode cfg lookup
45 |           _ <- startLocalRegistry cfg False -- potentially fails silently
46 |           forkAndListenAndDeliver node cfg
47 |           dispatchServices node
48 |           (roleDispatch node userFunction >> waitForThreads node) `finally` (performFinalization node)
49 |           threadDelay 500000 -- TODO make configurable, or something
50 |        where  userFunction s = localRegistryHello >> localRegistryRegisterNode >> f s
51 | 
52 | -- | This is the usual way create a single node of distributed program.
53 | -- The intent is that 'remoteInit' be called in your program's 'Main.main'
54 | -- function. A typical call takes this form:
55 | --
56 | -- > main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
57 | --
58 | -- This will:
59 | --
60 | -- 1. Read the configuration file @config@ in the current directory or, if specified, from the file whose path is given by the environment variable @RH_CONFIG@. If the given file does not exist or is invalid, an exception will be thrown.
61 | --
62 | -- 2. Use the configuration given in the file as well as on the command-line to create a new node. The usual system processes will be started, including logging, discovery, and spawning.
63 | --
64 | -- 3. Compile-time metadata, generated by 'Remote.Call.remotable', will used for invoking closures. Metadata from each module must be explicitly mentioned.
65 | --
66 | -- 4. The function initialProcess will be called, given as a parameter a string indicating the value of the cfgRole setting of this node. initialProcess is provided by the user and provides an entrypoint for controlling node behavior on startup.
67 | remoteInit :: Maybe FilePath -> [RemoteCallMetaData] -> (String -> ProcessM ()) -> IO ()
68 | remoteInit defaultConfig metadata f = 
69 |        do
70 |           configFileName <- getConfigFileName
71 |           cfg <- readConfig True configFileName
72 |               -- TODO sanity-check cfg
73 |           remoteInitFromConfig cfg metadata f
74 |    where  getConfigFileName = do env <- getEnvironment
75 |                                  return $ maybe defaultConfig Just (lookup "RH_CONFIG" env)
76 | 
77 | 


--------------------------------------------------------------------------------
/Remote/Peer.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE  DeriveDataTypeable #-}
  2 | 
  3 | -- | Exposes mechanisms for a program built on the "Remote.Process"
  4 | -- framework to discover nodes on the current network. Programs
  5 | -- can perform node discovery manually, or they can use "Remote.Task",
  6 | -- which does it automatically.
  7 | module Remote.Peer (PeerInfo,startDiscoveryService,getPeers,getPeersStatic,getPeersDynamic,findPeerByRole) where
  8 | 
  9 | import Prelude hiding (all, pi)
 10 | 
 11 | import Network.Socket (defaultHints,sendTo,recv,sClose,Socket,getAddrInfo,AddrInfoFlag(..),setSocketOption,addrFlags,addrSocketType,addrFamily,SocketType(..),Family(..),addrProtocol,SocketOption(..),AddrInfo,bindSocket,addrAddress,SockAddr(..),socket)
 12 | import Network.BSD (getProtocolNumber)
 13 | import Control.Concurrent.MVar (takeMVar, newMVar, modifyMVar_)
 14 | import Remote.Process (PeerInfo,pingNode,makeNodeFromHost,spawnLocalAnd,setDaemonic,TransmitStatus(..),TransmitException(..),PayloadDisposition(..),ptimeout,getSelfNode,sendSimple,cfgRole,cfgKnownHosts,cfgPeerDiscoveryPort,match,receiveWait,getSelfPid,getConfig,NodeId,PortId,ProcessM,ptry,localRegistryQueryNodes)
 15 | import Control.Monad.Trans (liftIO)
 16 | import Data.Typeable (Typeable)
 17 | import Data.Maybe (catMaybes)
 18 | import Data.Binary (Binary,get,put)
 19 | import Control.Exception (try,bracket,ErrorCall(..),throw)
 20 | import Data.List (nub)
 21 | import Control.Monad (filterM)
 22 | import qualified Data.Traversable as Traversable (mapM) 
 23 | import qualified Data.Map as Map (unionsWith,insertWith,empty,lookup)
 24 | 
 25 | data DiscoveryInfo = DiscoveryInfo
 26 |      {
 27 |        discNodeId :: NodeId,
 28 |        discRole :: String
 29 |      } deriving (Typeable,Eq)
 30 | 
 31 | instance Binary DiscoveryInfo where
 32 |    put (DiscoveryInfo nid role) = put nid >> put role
 33 |    get = get >>= \nid -> get >>= \role -> return $ DiscoveryInfo nid role
 34 | 
 35 | getUdpSocket :: PortId -> IO (Socket,AddrInfo) -- mostly copied from Network.Socket
 36 | getUdpSocket port = do
 37 |     proto <- getProtocolNumber "udp"
 38 |     let hints = defaultHints { addrFlags = [AI_PASSIVE,AI_ADDRCONFIG]
 39 |                              , addrSocketType = Datagram
 40 |                              , addrFamily = AF_INET -- only INET supports broadcast
 41 |                              , addrProtocol = proto }
 42 |     addrs <- getAddrInfo (Just hints) Nothing (Just (show port))
 43 |     let addr = head addrs
 44 |     s <- socket (addrFamily addr) (addrSocketType addr) (addrProtocol addr)
 45 |     return (s,addr)
 46 | 
 47 | maxPacket :: Int
 48 | maxPacket = 1024
 49 | 
 50 | listenUdp :: PortId -> IO String
 51 | listenUdp port = 
 52 |     bracket
 53 |         (getUdpSocket port)
 54 | 	(\(s,_) -> sClose s)
 55 | 	(\(sock,addr) -> do
 56 | 	    setSocketOption sock ReuseAddr 1
 57 | 	    bindSocket sock (addrAddress addr)
 58 | 	    msg <- recv sock maxPacket
 59 |             return msg
 60 | 	)
 61 | 
 62 | sendBroadcast :: PortId -> String -> IO ()
 63 | sendBroadcast port str 
 64 |     | length str > maxPacket = throw $ TransmitException $ QteOther $ "sendBroadcast: Specified packet is too big for UDP broadcast, having a length of " ++ (show $ length str)
 65 |     | otherwise = bracket
 66 |         (getUdpSocket port >>= return . fst)
 67 | 	(sClose)
 68 | 	(\sock -> do
 69 |             setSocketOption sock Broadcast 1
 70 | 	    _res <- sendTo sock str (SockAddrInet (toEnum port) (-1))
 71 |             return ()
 72 | 	)
 73 | 
 74 | -- | Returns information about all nodes on the current network
 75 | -- that this node knows about. This function combines dynamic
 76 | -- and static mechanisms. See documentation on 'getPeersStatic' 
 77 | -- and 'getPeersDynamic' for more info. This function depends
 78 | -- on the configuration values @cfgKnownHosts@ and @cfgPeerDiscoveryPort@.
 79 | getPeers :: ProcessM PeerInfo
 80 | getPeers = do a <- getPeersStatic
 81 |               b <- getPeersDynamic 500000
 82 |               verifyPeerInfo $ Map.unionsWith (\x y -> nub $ x ++ y) [a,b]
 83 | 
 84 | verifyPeerInfo :: PeerInfo -> ProcessM PeerInfo
 85 | verifyPeerInfo pi = Traversable.mapM verify1 pi
 86 |         where verify1 = filterM pingNode -- TODO ping should require a response
 87 | 
 88 | -- | Returns a PeerInfo, containing a list of known nodes ordered by role.
 89 | -- This information is acquired by querying the local node registry on
 90 | -- each of the hosts in the cfgKnownHosts entry in this node's config.
 91 | -- Hostnames that don't respond are assumed to be down and nodes running
 92 | -- on them won't be included in the results.
 93 | getPeersStatic :: ProcessM PeerInfo
 94 | getPeersStatic = do cfg <- getConfig
 95 |                     let peers = cfgKnownHosts cfg
 96 |                     peerinfos <- mapM (localRegistryQueryNodes . hostToNodeId) peers
 97 |                     return $ Map.unionsWith (\a b -> nub $ a ++ b) (catMaybes peerinfos)
 98 |      where hostToNodeId host = makeNodeFromHost host 0
 99 | 
100 | -- | Returns a PeerInfo, containing a list of known nodes ordered by role.
101 | -- This information is acquired by sending out a UDP broadcast on the
102 | -- local network; active nodes running the discovery service 
103 | -- should respond with their information.
104 | -- If nodes are running outside of the local network, or if UDP broadcasts
105 | -- are disabled by firewall configuration, this won't return useful
106 | -- information; in that case, use getPeersStatic.
107 | -- This function takes a parameter indicating how long in microseconds 
108 | -- to wait for hosts to respond. A number like 50000 is usually good enough,
109 | -- unless your network is highly congested or with high latency.
110 | getPeersDynamic :: Int -> ProcessM PeerInfo
111 | getPeersDynamic t = 
112 |    do pid <- getSelfPid
113 |       cfg <- getConfig
114 |       case (cfgPeerDiscoveryPort cfg) of
115 |           0 -> return Map.empty
116 |           port -> do  -- TODO should send broacast multiple times in case of packet loss
117 |                       _ <- liftIO $ try $ sendBroadcast port (show pid) :: ProcessM (Either IOError ())
118 |                       responses <- liftIO $ newMVar []
119 |                       _ <- ptimeout t (receiveInfo responses)
120 |                       res <- liftIO $ takeMVar responses
121 |                       let all = map (\di -> (discRole di,[discNodeId di])) (nub res)
122 |                       return $ foldl (\a (k,v) -> Map.insertWith (++) k v a ) Map.empty all
123 |      where receiveInfo responses = let matchInfo = match (\x -> liftIO $ modifyMVar_ responses (\m -> return (x:m))) in
124 |                          receiveWait [matchInfo] >> receiveInfo responses
125 | 
126 | -- | Given a PeerInfo returned by getPeersDynamic or getPeersStatic,
127 | -- give a list of nodes registered as a particular role. If no nodes of
128 | -- that role are found, the empty list is returned.
129 | findPeerByRole :: PeerInfo -> String -> [NodeId]
130 | findPeerByRole disc role = maybe [] id (Map.lookup role disc)
131 | 
132 | {- UNUSED
133 | findRoles :: PeerInfo -> [String]
134 | findRoles disc = Map.keys disc
135 | -}
136 | 
137 | waitForDiscovery :: Int -> ProcessM Bool
138 | waitForDiscovery delay 
139 |               | delay <= 0 = doit 
140 |               | otherwise = ptimeout delay doit >>= (return . maybe False id)
141 |            where doit =
142 |                    do cfg <- getConfig
143 |                       msg <- liftIO $ listenUdp (cfgPeerDiscoveryPort cfg)
144 |                       nodeid <- getSelfNode
145 |                       res <- ptry $ sendSimple (read msg) (DiscoveryInfo {discNodeId=nodeid,discRole=cfgRole cfg}) PldUser
146 |                                  :: ProcessM (Either ErrorCall TransmitStatus)
147 |                       case res of
148 |                          Right QteOK -> return True
149 |                          _ -> return False
150 | 
151 | -- | Starts the discovery process, allowing this node to respond to
152 | -- queries from getPeersDynamic. You don't want to call this yourself,
153 | -- as it's called for you in 'Remote.Init.remoteInit'
154 | startDiscoveryService :: ProcessM ()
155 | startDiscoveryService = 
156 |    do cfg <- getConfig
157 |       if cfgPeerDiscoveryPort cfg /= 0
158 |          then spawnLocalAnd service setDaemonic >> return ()
159 |          else return ()
160 |   where service = waitForDiscovery 0 >> service
161 | 


--------------------------------------------------------------------------------
/Remote/Reg.hs:
--------------------------------------------------------------------------------
 1 | -- | Runtime metadata functions, part of the 
 2 | -- RPC mechanism
 3 | module Remote.Reg (
 4 |          registerCalls,
 5 |          Lookup,
 6 |          Identifier,
 7 |          putReg,
 8 |          getEntryByIdent,
 9 |          empty,
10 |          RemoteCallMetaData
11 |            ) where
12 | 
13 | import Data.Dynamic (Dynamic,toDyn,fromDynamic)
14 | import Data.Typeable (Typeable)
15 | import qualified Data.Map as Map (insert,lookup,Map,empty)
16 | 
17 | ----------------------------------------------
18 | -- * Runtime metadata
19 | ------------------------------
20 | 
21 | -- | Data of this type is generated at compile-time
22 | -- by 'remotable' and can be used with 'registerCalls'
23 | -- and 'remoteInit' to create a metadata lookup table, 'Lookup'.
24 | -- The name '__remoteCallMetaData' will be present
25 | -- in any module that uses 'remotable'.
26 | type RemoteCallMetaData = Lookup -> Lookup
27 | 
28 | 
29 | type Identifier = String
30 | 
31 | data Entry = Entry {
32 |                entryName :: Identifier,
33 |                entryFunRef :: Dynamic
34 |              }
35 | 
36 | -- | Creates a metadata lookup table based on compile-time metadata.
37 | -- You probably don't want to call this function yourself, but instead
38 | -- use 'Remote.Init.remoteInit'.
39 | registerCalls :: [RemoteCallMetaData] -> Lookup
40 | registerCalls [] = empty
41 | registerCalls (h:rest) = let registered = registerCalls rest
42 |                           in h registered
43 | 
44 | makeEntry :: (Typeable a) => Identifier -> a -> Entry
45 | makeEntry ident funref = Entry {entryName=ident, entryFunRef=toDyn funref}
46 | 
47 | type IdentMap = Map.Map Identifier Entry
48 | data Lookup = Lookup { identMap :: IdentMap }
49 | 
50 | putReg :: (Typeable a) => a -> Identifier -> Lookup -> Lookup
51 | putReg a i l = putEntry l a i
52 | 
53 | putEntry :: (Typeable a) => Lookup -> a -> Identifier -> Lookup
54 | putEntry amap value name = 
55 |                              Lookup {
56 |                                 identMap = Map.insert name entry (identMap amap)
57 |                              }
58 |   where
59 |        entry = makeEntry name value
60 | 
61 | 
62 | getEntryByIdent :: (Typeable a) => Lookup -> Identifier -> Maybe a
63 | getEntryByIdent amap ident = (Map.lookup ident (identMap amap)) >>= (\x -> fromDynamic (entryFunRef x))
64 | 
65 | empty :: Lookup
66 | empty = Lookup {identMap = Map.empty}
67 | 
68 | 


--------------------------------------------------------------------------------
/Remote/Task.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveDataTypeable #-}
  2 | 
  3 | -- | This module provides data dependency resolution and
  4 | -- fault tolerance via /promises/ (known elsewhere as /futures/).
  5 | -- It's implemented in terms of the "Remote.Process" module.
  6 | module Remote.Task (
  7 |                    -- * Tasks and promises
  8 |                    TaskM, Promise, PromiseList(..),
  9 |                    runTask,
 10 |                    newPromise, newPromiseAt, newPromiseNear, newPromiseHere, newPromiseAtRole,
 11 |                    toPromise, toPromiseAt, toPromiseNear, toPromiseImm,
 12 |                    readPromise, 
 13 | 
 14 |                    -- * MapReduce
 15 |                    MapReduce(..),
 16 |                    mapReduce,
 17 | 
 18 |                    -- * Useful auxilliaries
 19 |                    chunkify,
 20 |                    shuffle,
 21 |                    tsay,
 22 |                    tlogS,
 23 |                    Locality(..),
 24 |                    TaskException(..),
 25 | 
 26 |                    -- * Internals, not for general use
 27 |                    __remoteCallMetaData, serialEncodeA, serialDecodeA
 28 |                    ) where
 29 | 
 30 | import Remote.Reg (putReg,getEntryByIdent,RemoteCallMetaData)
 31 | import Remote.Encoding (serialEncodePure,hGetPayload,hPutPayload,Payload,getPayloadContent,Serializable,serialDecode,serialEncode)
 32 | import Remote.Process (roundtripQuery, ServiceException(..), TransmitStatus(..),diffTime,getConfig,Config(..),matchProcessDown,terminate,nullPid,monitorProcess,TransmitException(..),MonitorAction(..),ptry,LogConfig(..),getLogConfig,setNodeLogConfig,nodeFromPid,LogLevel(..),LogTarget(..),logS,getLookup,say,LogSphere,NodeId,ProcessM,ProcessId,PayloadDisposition(..),getSelfPid,getSelfNode,matchUnknownThrow,receiveWait,receiveTimeout,roundtripResponse,roundtripResponseAsync,roundtripQueryImpl,match,makePayloadClosure,spawn,spawnLocal,spawnLocalAnd,setDaemonic,send,makeClosure)
 33 | import Remote.Closure (Closure(..))
 34 | import Remote.Peer (getPeers)
 35 | 
 36 | import Data.Dynamic (Dynamic, toDyn, fromDynamic)
 37 | import System.IO (withFile,IOMode(..))
 38 | import System.Directory (renameFile)
 39 | import Data.Binary (Binary,get,put,putWord8,getWord8)
 40 | import Control.Exception (SomeException,Exception,throw)
 41 | import Data.Typeable (Typeable)
 42 | import Control.Applicative (Applicative(..))
 43 | import Control.Monad (liftM,when)
 44 | import Control.Monad.Trans (liftIO)
 45 | import Control.Concurrent.MVar (MVar,modifyMVar,modifyMVar_,newMVar,newEmptyMVar,takeMVar,putMVar,readMVar,withMVar)
 46 | import qualified Data.Map as Map (Map,insert,lookup,empty,insertWith',toList)
 47 | import Data.List ((\\),union,nub,groupBy,sortBy,delete)
 48 | import Data.Time (UTCTime,getCurrentTime)
 49 | 
 50 | -- imports required for hashClosure; is there a lighter-weight of doing this?
 51 | import Data.Digest.Pure.MD5 (md5)
 52 | import Data.ByteString.Lazy.UTF8 (fromString)
 53 | import qualified Data.ByteString.Lazy as B (concat)
 54 | 
 55 | ----------------------------------------------
 56 | -- * Promises and tasks
 57 | ----------------------------------------------
 58 | 
 59 | 
 60 | type PromiseId = Integer
 61 | 
 62 | type Hash = String
 63 | 
 64 | data PromiseList a = PlChunk a (Promise (PromiseList a))
 65 |                    | PlNil deriving Typeable
 66 | 
 67 | instance (Serializable a) => Binary (PromiseList a) where
 68 |    put (PlChunk a p) = putWord8 0 >> put a >> put p
 69 |    put PlNil = putWord8 1
 70 |    get = do w <- getWord8
 71 |             case w of
 72 |                0 -> do a <- get
 73 |                        p <- get
 74 |                        return $ PlChunk a p
 75 |                1 -> return PlNil
 76 | 
 77 | -- | The basic data type for expressing data dependence
 78 | -- in the 'TaskM' monad. A Promise represents a value that
 79 | -- may or may not have been computed yet; thus, it's like
 80 | -- a distributed thunk (in the sense of a non-strict unit
 81 | -- of evaluation). These are created by 'newPromise' and friends,
 82 | -- and the underlying value can be gotten with 'readPromise'.
 83 | data Promise a = PromiseBasic { _psRedeemer :: ProcessId, _psId :: PromiseId } 
 84 |                | PromiseImmediate a deriving Typeable
 85 | -- psRedeemer should maybe be wrapped in an IORef so that it can be updated in case of node failure
 86 | 
 87 | instance (Serializable a) => Binary (Promise a) where
 88 |    put (PromiseBasic a b) = putWord8 0 >> put a >> put b
 89 |    put (PromiseImmediate a) = putWord8 1 >> put a
 90 |    get = do a <- getWord8
 91 |             case a of
 92 |               0 -> do b <- get
 93 |                       c <- get
 94 |                       return $ PromiseBasic b c
 95 |               1 -> do b <- get
 96 |                       return $ PromiseImmediate b
 97 | 
 98 | -- | Stores the data produced by a promise, in one of its
 99 | -- various forms. If it's currently in memory, we keep it
100 | -- as a payload, to be decoded by its ultimate user (who
101 | -- of course has the static type information), the time
102 | -- it was last touched (so we know when to flush it),
103 | -- and perhaps also a decoded version, so that it doesn't
104 | -- need to be decoded repeatedly: this makes this go a lot
105 | -- faster. If it's been flushed to disk, we keep track of
106 | -- where, and if the promise didn't complete, but threw
107 | -- an exception during its execution, we mark there here
108 | -- as well: the exception will be propagated to
109 | -- dependents.
110 | data PromiseStorage = PromiseInMemory PromiseData UTCTime (Maybe Dynamic)
111 |                     | PromiseOnDisk FilePath
112 |                     | PromiseException String
113 | 
114 | type PromiseData = Payload
115 | {- UNUSED
116 | type TimeStamp = UTCTime
117 | -}
118 | 
119 | -- | Keeps track of what we know about currently running promises.
120 | -- The closure and locality and provided by the initial call to
121 | -- newPromise, the nodeboss is where it is currently running.
122 | -- We need this info to deal with complaints.
123 | data PromiseRecord = PromiseRecord ProcessId (Closure PromiseData) Locality
124 | 
125 | data MasterState = MasterState
126 |      {
127 |          -- | Promise IDs are allocated serially from here
128 |          msNextId :: PromiseId,
129 |  
130 |          -- | All currently known nodes, with the role, node ID, and node boss. Updated asychronously by prober thread
131 |          msNodes :: MVar [(String,NodeId,ProcessId)],
132 | 
133 |          -- | Given a nodeboss, which promises belong to it. Not sure what this is good for
134 |          msAllocation :: Map.Map ProcessId [PromiseId],
135 | 
136 |          -- | Given a promise, what do we know about it. Include its nodeboss, its closure, and its locality preference
137 |          msPromises :: Map.Map PromiseId PromiseRecord,
138 | 
139 |          -- | The locality preference of new worker tasks, if not specified otherwise
140 |          msDefaultLocality :: Locality
141 |      }
142 | 
143 | data MmNewPromise = MmNewPromise (Closure Payload) Locality Queueing deriving (Typeable)
144 | instance Binary MmNewPromise where 
145 |   get = do a <- get
146 |            l <- get
147 |            q <- get
148 |            return $ MmNewPromise a l q
149 |   put (MmNewPromise a l q) = put a >> put l >> put q
150 | 
151 | data MmNewPromiseResponse = MmNewPromiseResponse ProcessId PromiseId 
152 |                           | MmNewPromiseResponseFail deriving (Typeable)
153 | instance Binary MmNewPromiseResponse where 
154 |    put (MmNewPromiseResponse a b) =
155 |            do putWord8 0
156 |               put a
157 |               put b
158 |    put MmNewPromiseResponseFail = putWord8 1
159 |    get = do a <- getWord8
160 |             case a of
161 |               0 -> do b <- get 
162 |                       c <- get
163 |                       return $ MmNewPromiseResponse b c
164 |               1 -> return MmNewPromiseResponseFail
165 | 
166 | data MmStatus = MmStatus deriving Typeable
167 | instance Binary MmStatus where
168 |   get = return MmStatus
169 |   put MmStatus = put ()
170 | 
171 | data MmStatusResponse = MmStatusResponse [NodeId] (Map.Map ProcessId [PromiseId]) deriving Typeable
172 | instance Binary MmStatusResponse where
173 |   get = do a <- get
174 |            b <- get
175 |            return $ MmStatusResponse a b
176 |   put (MmStatusResponse a b) = put a >> put b
177 | 
178 | data MmComplain = MmComplain ProcessId PromiseId deriving (Typeable)
179 | instance Binary MmComplain where 
180 |    put (MmComplain a b) = put a >> put b
181 |    get = do a <- get
182 |             b <- get
183 |             return $ MmComplain a b
184 | 
185 | data MmComplainResponse = MmComplainResponse ProcessId deriving (Typeable)
186 | instance Binary MmComplainResponse where
187 |    put (MmComplainResponse a) = put a
188 |    get = do a <- get
189 |             return $ MmComplainResponse a
190 | 
191 | data TmNewPeer = TmNewPeer NodeId deriving (Typeable)
192 | instance Binary TmNewPeer where 
193 |    get = do a <- get
194 |             return $ TmNewPeer a
195 |    put (TmNewPeer nid) = put nid
196 | 
197 | data NmStart = NmStart PromiseId (Closure Payload) Queueing deriving (Typeable)
198 | instance Binary NmStart where 
199 |   get = do a <- get
200 |            b <- get
201 |            c <- get
202 |            return $ NmStart a b c
203 |   put (NmStart a b c) = put a >> put b >> put c
204 | 
205 | data NmStartResponse = NmStartResponse Bool deriving (Typeable)
206 | instance Binary NmStartResponse where 
207 |   get = do a <- get
208 |            return $ NmStartResponse a
209 |   put (NmStartResponse a) = put a
210 | 
211 | data NmRedeem = NmRedeem PromiseId deriving (Typeable)
212 | instance Binary NmRedeem where 
213 |    get = do a <- get
214 |             return $ NmRedeem a
215 |    put (NmRedeem prid) = put prid
216 | 
217 | data NmRedeemResponse = NmRedeemResponse Payload
218 |                       | NmRedeemResponseUnknown
219 |                       | NmRedeemResponseException deriving (Typeable)
220 | instance Binary NmRedeemResponse where 
221 |    get = do a <- getWord8
222 |             case a of
223 |               0 -> do b <- get
224 |                       return $ NmRedeemResponse b
225 |               1 -> return NmRedeemResponseUnknown
226 |               2 -> return NmRedeemResponseException
227 |    put (NmRedeemResponse a) = putWord8 0 >> put a
228 |    put (NmRedeemResponseUnknown) = putWord8 1
229 |    put (NmRedeemResponseException) = putWord8 2
230 | 
231 | data TaskException = TaskException String deriving (Show,Typeable)
232 | instance Exception TaskException
233 | 
234 | -- | (Currently ignored.)
235 | data Queueing = QuNone
236 |               | QuExclusive
237 |               | QuSmall deriving (Typeable,Ord,Eq)
238 | 
239 | defaultQueueing :: Queueing
240 | defaultQueueing = QuNone
241 | 
242 | instance Binary Queueing where
243 |    put QuNone = putWord8 0
244 |    put QuExclusive = putWord8 1
245 |    put QuSmall = putWord8 2
246 |    get = do a <- getWord8
247 |             case a of
248 |                0 -> return QuNone
249 |                1 -> return QuExclusive
250 |                2 -> return QuSmall
251 | 
252 | -- | A specification of preference
253 | -- of where a promise should be allocated,
254 | -- among the nodes visible to the master.
255 | data Locality = LcUnrestricted -- ^ The promise can be placed anywhere.
256 |               | LcDefault -- ^ The default preference is applied, which is for nodes having a role of NODE of WORKER
257 |               | LcByRole [String] -- ^ Nodes having the given roles will be preferred
258 |               | LcByNode [NodeId] -- ^ The given nodes will be preferred
259 | 
260 | instance Binary Locality where
261 |    put LcUnrestricted = putWord8 0
262 |    put LcDefault = putWord8 1
263 |    put (LcByRole a) = putWord8 2 >> put a
264 |    put (LcByNode a) = putWord8 3 >> put a
265 |    get = do a <- getWord8
266 |             case a of
267 |                0 -> return LcUnrestricted
268 |                1 -> return LcDefault
269 |                2 -> do r <- get
270 |                        return $ LcByRole r
271 |                3 -> do r <- get
272 |                        return $ LcByNode r
273 | 
274 | defaultLocality :: Locality
275 | defaultLocality = LcByRole ["WORKER","NODE"]
276 | 
277 | taskError :: String -> a
278 | taskError s = throw $ TaskException s
279 | 
280 | serialEncodeA :: (Serializable a) => a -> TaskM Payload
281 | serialEncodeA = liftTask . liftIO . serialEncode
282 | 
283 | serialDecodeA :: (Serializable a) => Payload -> TaskM (Maybe a)
284 | serialDecodeA = liftTask . liftIO . serialDecode
285 | 
286 | monitorTask :: ProcessId -> ProcessId -> ProcessM TransmitStatus
287 | monitorTask monitor monitee
288 |    = do res <- ptry $ monitorProcess monitor monitee MaMonitor 
289 |         case res of
290 |            Right _ -> return QteOK
291 |            Left (ServiceException e) -> return $ QteOther e
292 | 
293 | roundtripImpl :: (Serializable a, Serializable b) => ProcessId -> a -> ProcessM (Either TransmitStatus b)
294 | roundtripImpl pid dat = roundtripQueryImpl 0 PldUser pid dat id []
295 | 
296 | roundtrip :: (Serializable a, Serializable b) => ProcessId -> a -> TaskM (Either TransmitStatus b)
297 | roundtrip apid dat = 
298 |    TaskM $ \ts ->
299 |      case Map.lookup apid (tsMonitoring ts) of
300 |        Nothing -> do mypid <- getSelfPid
301 |                      res0 <- monitorTask mypid apid
302 |                      case res0 of
303 |                         QteOK -> 
304 |                           do res <- roundtripImpl apid dat
305 |                              return (ts {tsMonitoring=Map.insert apid () (tsMonitoring ts)},res)
306 |                         _ -> return (ts,Left res0)
307 |        Just _ -> do res <- roundtripImpl apid dat
308 |                     return (ts,res)
309 | 
310 | -- roundtrip a b = liftTask $ roundtripQueryUnsafe PldUser a b
311 | 
312 | spawnDaemonic :: ProcessM () -> ProcessM ProcessId
313 | spawnDaemonic p = spawnLocalAnd p setDaemonic
314 | 
315 | runWorkerNode :: ProcessId -> NodeId -> ProcessM ProcessId
316 | runWorkerNode masterpid nid = 
317 |      do clo <- makeClosure "Remote.Task.runWorkerNode__impl" (masterpid) :: ProcessM (Closure (ProcessM ()))
318 |         spawn nid clo 
319 | 
320 | runWorkerNode__impl :: Payload -> ProcessM ()
321 | runWorkerNode__impl pl = 
322 |    do setDaemonic -- maybe it's good to have the node manager be daemonic, but prolly not. If so, the MASTERPID must be terminated when user-provided MASTERPROC ends
323 |       mpid <- liftIO $ serialDecode pl
324 |       case mpid of
325 |          Just masterpid -> handler masterpid
326 |          Nothing -> error "Failure to extract in rwn__impl"
327 |  where handler masterpid = startNodeManager masterpid
328 | 
329 | passthrough__implPl :: Payload -> TaskM Payload
330 | passthrough__implPl pl = return pl
331 | 
332 | passthrough__closure :: (Serializable a) => a -> Closure (TaskM a)
333 | passthrough__closure a = Closure "Remote.Task.passthrough__impl" (serialEncodePure a)
334 | 
335 | __remoteCallMetaData :: RemoteCallMetaData
336 | __remoteCallMetaData x = putReg runWorkerNode__impl "Remote.Task.runWorkerNode__impl" 
337 |                         (putReg passthrough__implPl "Remote.Task.passthrough__implPl" x)
338 | 
339 | updatePromiseInMemory :: PromiseStorage -> IO PromiseStorage
340 | updatePromiseInMemory (PromiseInMemory p _ d) = do utc <- getCurrentTime
341 |                                                    return $ PromiseInMemory p utc d
342 | updatePromiseInMemory other = return other
343 | 
344 | makePromiseInMemory :: PromiseData -> Maybe Dynamic -> IO PromiseStorage
345 | makePromiseInMemory p dyn = do utc <- getCurrentTime
346 |                                return $ PromiseInMemory p utc dyn
347 | 
348 | forwardLogs :: Maybe ProcessId -> ProcessM ()
349 | forwardLogs masterpid = 
350 |         do lc <- getLogConfig
351 |            selfnid <- getSelfNode
352 |            let newlc = lc {logTarget = case masterpid of
353 |                                              Just mp
354 |                                                 | nodeFromPid mp /= selfnid -> LtForward $ nodeFromPid mp
355 |                                              _ -> LtStdout}     
356 |              in setNodeLogConfig newlc
357 | 
358 | hashClosure :: Closure a -> Hash
359 | hashClosure (Closure s pl) = show $ md5 $ B.concat [fromString s, getPayloadContent pl]
360 | 
361 | undiskify :: FilePath -> MVar PromiseStorage -> ProcessM (Maybe PromiseData)
362 | undiskify fpIn mps =
363 |       do wrap $ liftIO $ modifyMVar mps (\val ->
364 |             case val of
365 |               PromiseOnDisk fp -> 
366 |                 do pl <- withFile fp ReadMode hGetPayload
367 |                    inmem <- makePromiseInMemory pl Nothing
368 |                    return (inmem,Just pl)
369 |               PromiseInMemory payload _ _ -> return (val,Just payload)
370 |               _ -> return (val,Nothing))
371 |   where wrap a = do res <- ptry a
372 |                     case res of
373 |                       Left e -> do logS "TSK" LoCritical $ "Error reading promise from file "++fpIn++": "++show (e::IOError)
374 |                                    return Nothing
375 |                       Right r -> return r
376 | 
377 | diskify :: FilePath -> MVar PromiseStorage -> Bool -> ProcessM ()
378 | diskify fp mps reallywrite =
379 |       do cfg <- getConfig
380 |          when (cfgPromiseFlushDelay cfg > 0)
381 |             (handler (cfgPromiseFlushDelay cfg))
382 |     where
383 |          handler delay =
384 |            do _ <- receiveTimeout delay []
385 |               again <- wrap $ liftIO $ modifyMVar mps (\val ->
386 |                    case val of
387 |                        PromiseInMemory payload utc _ ->
388 |                            do now <- getCurrentTime
389 |                               if diffTime now utc > delay
390 |                                  then do when reallywrite $
391 |                                              do liftIO $ withFile tmp WriteMode (\h -> hPutPayload h payload)
392 |                                                 renameFile tmp fp
393 |                                          return (PromiseOnDisk fp,False)
394 |                                  else return (val,True)
395 |                        _      -> return (val,False))
396 |               when again
397 |                  (diskify fp mps reallywrite)
398 |          tmp = fp ++ ".tmp"
399 |          wrap a = do res <- ptry a
400 |                      case res of
401 |                          Left z -> do logS "TSK" LoImportant $ "Error writing promise to disk on file "++fp++": "++show (z::IOError)
402 |                                       return False
403 |                          Right v -> return v
404 | 
405 | startNodeWorker :: ProcessId -> NodeBossState -> 
406 |                    MVar PromiseStorage -> Closure Payload -> ProcessM ()
407 | startNodeWorker masterpid nbs mps clo@(Closure cloname cloarg) = 
408 |   do self <- getSelfPid
409 |      _ <- spawnLocalAnd (starter self) (prefix self)
410 |      return ()
411 |   where 
412 |         prefix nodeboss =
413 |           do self <- getSelfPid
414 |              monitorProcess self nodeboss MaLink
415 |              setDaemonic 
416 |         starter nodeboss = -- TODO try to do an undiskify here, if the promise is left over from a previous, failed run
417 |             let initialState = TaskState {tsMaster=masterpid,tsNodeBoss=Just nodeboss, 
418 |                                           tsPromiseCache=nsPromiseCache nbs, tsRedeemerForwarding=nsRedeemerForwarding nbs,
419 |                                           tsMonitoring=Map.empty}
420 |                 tasker = do tbl <- liftTask $ getLookup
421 |                             case getEntryByIdent tbl cloname of
422 |                               Just funval -> 
423 |                                           do val <- funval cloarg
424 |                                              p <- liftTaskIO $ makePromiseInMemory val Nothing
425 |                                              liftTaskIO $ putMVar mps p
426 |                                              cfg <- liftTask $ getConfig
427 |                                              let cachefile = cfgPromisePrefix cfg++hashClosure clo
428 |                                              liftTask $ diskify cachefile mps True
429 |                               Nothing -> taskError $ "Failed looking up "++cloname++" in closure table"
430 |              in do res <- ptry $ runTaskM tasker initialState :: ProcessM (Either SomeException (TaskState,()))
431 |                    case res of
432 |                      Left ex -> liftIO (putMVar mps (PromiseException (show ex))) >> throw ex
433 |                      Right _ -> return ()
434 | 
435 | data NodeBossState = 
436 |         NodeBossState
437 |         {
438 |            nsPromiseCache :: MVar (Map.Map PromiseId (MVar PromiseStorage)),
439 |            nsRedeemerForwarding :: MVar (Map.Map PromiseId ProcessId)
440 |         }
441 | 
442 | startNodeManager :: ProcessId -> ProcessM ()
443 | startNodeManager masterpid = 
444 |   let
445 |       handler :: NodeBossState -> ProcessM a
446 |       handler state = 
447 |         let promisecache = nsPromiseCache state
448 |             nmStart = roundtripResponse (\(NmStart promise clo _queueing) -> 
449 |                do promisestore <- liftIO $ newEmptyMVar
450 |                   ret <- liftIO $ modifyMVar promisecache 
451 |                     (\pc ->               let newpc = Map.insert promise promisestore pc 
452 |                                            in return (newpc,True))
453 |                   when (ret)
454 |                        (startNodeWorker masterpid state promisestore clo)
455 |                   return (NmStartResponse ret,state)) 
456 |             nmTermination = matchProcessDown masterpid $
457 |                      do forwardLogs Nothing
458 |                         logS "TSK" LoInformation $ "Terminating nodeboss after my master "++show masterpid++" is gone"
459 |                         terminate
460 |             nmRedeem = roundtripResponseAsync (\(NmRedeem promise) ans -> 
461 |                      let answerer = do pc <- liftIO $ readMVar promisecache
462 |                                        case Map.lookup promise pc of
463 |                                          Nothing -> ans NmRedeemResponseUnknown
464 |                                          Just v -> do rv <- liftIO $ readMVar v -- possibly long wait
465 |                                                       case rv of
466 |                                                          PromiseInMemory rrv _ _ -> 
467 |                                                                 do liftIO $ modifyMVar_ v (\_ -> updatePromiseInMemory rv)
468 |                                                                    ans (NmRedeemResponse rrv)
469 |                                                          PromiseOnDisk fp -> do mpd <- undiskify fp v
470 |                                                                                 case mpd of
471 |                                                                                    Nothing -> 
472 |                                                                                      ans (NmRedeemResponseUnknown)
473 |                                                                                    Just a ->
474 |                                                                                      ans (NmRedeemResponse a)
475 |                                                                                 diskify fp v False
476 |                                                          PromiseException _ -> ans NmRedeemResponseException
477 |                       in do _ <- spawnLocal answerer
478 |                             return state) False
479 |          in receiveWait [nmStart, nmRedeem, nmTermination, matchUnknownThrow] >>= handler
480 |    in do forwardLogs $ Just masterpid
481 |          mypid <- getSelfPid
482 |          monitorProcess mypid masterpid MaMonitor
483 |          logS "TSK" LoInformation $ "Starting a nodeboss owned by " ++ show masterpid
484 |          pc <- liftIO $ newMVar Map.empty
485 |          pf <- liftIO $ newMVar Map.empty
486 |          let initState = NodeBossState {nsPromiseCache=pc,nsRedeemerForwarding=pf}
487 |          handler initState
488 | 
489 | -- | Starts a new context for executing a 'TaskM' environment.
490 | -- The node on which this function is run becomes a new master
491 | -- in a Task application; as a result, the application should
492 | -- only call this function once. The master will attempt to
493 | -- control all nodes that it can find; if you are going to be
494 | -- running more than one CH application on a single network,
495 | -- be sure to give each application a different network
496 | -- magic (via cfgNetworkMagic). The master TaskM environment
497 | -- created by this function can then spawn other threads,
498 | -- locally or remotely, using 'newPromise' and friends.
499 | 
500 | runTask :: TaskM a -> ProcessM a
501 | runTask = startMaster
502 | 
503 | startMaster :: TaskM a -> ProcessM a
504 | startMaster proc = 
505 |     do mvmaster <- liftIO $ newEmptyMVar
506 |        mvdone <- liftIO $ newEmptyMVar
507 |        master <- runMaster (masterproc mvdone mvmaster)
508 |        liftIO $ putMVar mvmaster master
509 |        liftIO $ takeMVar mvdone
510 |    where masterproc mvdone mvmaster nodeboss = 
511 |            do master <- liftIO $ takeMVar mvmaster
512 |               pc <- liftIO $ newMVar Map.empty
513 |               pf <- liftIO $ newMVar Map.empty
514 |               let initialState = TaskState {tsMaster=master,tsNodeBoss=Just nodeboss,
515 |                                             tsPromiseCache=pc, tsRedeemerForwarding=pf,
516 |                                             tsMonitoring=Map.empty}
517 |               res <- liftM snd $ runTaskM proc initialState
518 |               liftIO $ putMVar mvdone res
519 | 
520 | {- UNUSED
521 | type LocationSelector = MasterState -> ProcessM (NodeId,ProcessId)
522 | -}
523 | 
524 | selectLocation :: MasterState -> Locality -> ProcessM (Maybe (String,NodeId,ProcessId))
525 | selectLocation ms locality =
526 |    let nodes = msNodes ms
527 |     in liftIO $ modifyMVar nodes
528 |           (\n -> case n of
529 |                     [] -> return (n,Nothing)
530 |                     _ -> let dflt = (rotate n,Just $ head n)
531 |                              filterify f = case filter f n of
532 |                                              [] -> return dflt
533 |                                              (a:_) -> return ((delete a n) ++ [a],Just a)
534 |                           in  case cond locality of
535 |                                 LcUnrestricted -> return dflt
536 |                                 LcDefault -> return dflt
537 |                                 LcByRole l -> filterify (\(r,_,_) -> r `elem` l)
538 |                                 LcByNode l -> filterify (\(_,r,_) -> r `elem` l))
539 |       where rotate [] = []
540 |             rotate (h:t) = t ++ [h]
541 |             cond l = case l of
542 |                        LcDefault -> msDefaultLocality ms
543 |                        _ -> l
544 | 
545 | countLocations :: MasterState -> ProcessM Int
546 | countLocations ms = liftIO $ withMVar (msNodes ms) (\a -> return $ length a)
547 | 
548 | findPeers :: ProcessM [(String,NodeId)]
549 | findPeers = liftM (concat . (map (\(role,v) -> [ (role,x) | x <- v] )) . Map.toList) getPeers
550 | 
551 | sendSilent :: (Serializable a) => ProcessId -> a -> ProcessM ()
552 | sendSilent pid a = do res <- ptry $ send pid a
553 |                       case res of
554 |                         Left (TransmitException _) -> return ()
555 |                         Right _ -> return ()
556 | 
557 | {- UNUSED
558 | getStatus :: TaskM ()
559 | getStatus = 
560 |    do master <- getMaster
561 |       res <- roundtrip master MmStatus
562 |       case res of
563 |         Left _ -> return ()
564 |         Right (MmStatusResponse nodes promises) ->
565 |               let verboseNodes = intercalate ", " (map show nodes)
566 |                   verbosePromises = intercalate "\n" $ map (\(nb,l) -> (show nb)++" -- "++intercalate "," (map show l)) (Map.toList promises) 
567 |                in tsay $ "\nKnown nodes: " ++ verboseNodes ++ "\n\nNodebosses: " ++ verbosePromises
568 | -}
569 | 
570 | runMaster :: (ProcessId -> ProcessM ()) -> ProcessM ProcessId
571 | runMaster masterproc = 
572 |   let
573 |      probeOnce nodes seen masterpid =
574 |         do recentlist <- findPeers -- TODO if a node fails to response to a probe even once, it's gone forever; be more flexible
575 |            let newseen = seen `union` recentlist
576 |            let topidlist = recentlist \\ seen
577 |            let cleanOut n = filter (\(_,nid,_) -> nid `elem` (map snd recentlist)) n
578 |            newlypidded <- mapM (\(role,nid) -> 
579 |                              do pid <- runWorkerNode masterpid nid
580 |                                 return (role,nid,pid)) topidlist           
581 |            (_newlist,totalseen) <- liftIO $ modifyMVar nodes (\oldlist ->
582 |                         return ((cleanOut oldlist) ++ newlypidded,(recentlist,newseen)))
583 |            let newlyadded = totalseen \\ seen
584 |            mapM_ (\nid -> sendSilent masterpid (TmNewPeer nid)) (map snd newlyadded)
585 |            return totalseen
586 |      proberDelay = 10000000 -- how often do we check the network to see what nodes are available?
587 |      prober nodes seen masterpid =
588 |         do totalseen <- probeOnce nodes seen masterpid
589 |            _ <- receiveTimeout proberDelay [matchUnknownThrow]
590 |            prober nodes totalseen masterpid
591 |      master state = 
592 |         let 
593 |             tryAlloc clo promiseid locality queueing =
594 |               do ns <- selectLocation state locality
595 |                  case ns of
596 |                     Nothing -> do logS "TSK" LoCritical "Attempt to allocate a task, but no nodes found"
597 |                                   return Nothing
598 |                     Just (_,nid,nodeboss) -> 
599 |                          do res <- roundtripQuery PldUser nodeboss (NmStart promiseid clo queueing) -- roundtripQuery monitors and then unmonitors, which generates a lot of traffic; we probably don't need to do this
600 |                             case res of
601 |                               Left e -> 
602 |                                  do logS "TSK" LoImportant $ "Failed attempt to start "++show clo++" on " ++show nid ++": "++show e
603 |                                     return Nothing
604 |                               Right (NmStartResponse True) -> return $ Just nodeboss
605 |                               _ -> do logS "TSK" LoImportant $ "Failed attempt to start "++show clo++" on " ++show nid
606 |                                       return Nothing
607 |             basicAllocate clo promiseid locality queueing =
608 |                do count <- countLocations state
609 |                   res1 <- tryAlloc clo promiseid locality queueing
610 |                   case res1 of 
611 |                      Just _ -> return res1
612 |                      Nothing ->  -- TODO we should try all matching locations before moving on to Unrestricted
613 |                        do res <- stubborn count $ tryAlloc clo promiseid LcUnrestricted queueing
614 |                           case res of
615 |                              Nothing -> do logS "TSK" LoCritical $ "Terminally failed to start "++show clo
616 |                                            return res
617 |                              _ -> return res
618 |             statusMsg = roundtripResponse
619 |               (\x -> case x of
620 |                         MmStatus -> 
621 |                          do thenodes <- liftIO $ readMVar $ msNodes state
622 |                             let knownNodes = map (\(_,n,_) -> n) thenodes
623 |                                 proctree = msAllocation state
624 |                             return (MmStatusResponse knownNodes proctree,state))
625 |             complainMsg = roundtripResponse
626 |               (\x -> case x of
627 |                        MmComplain procid promid -> 
628 |                          case Map.lookup promid (msPromises state) of
629 |                             Nothing -> return (MmComplainResponse nullPid,state) -- failure
630 |                             Just (PromiseRecord curprocid curclo curlocality) 
631 |                                | curprocid /= procid -> return (MmComplainResponse curprocid,state)
632 |                                | otherwise -> 
633 |                                    do res <- basicAllocate curclo promid curlocality defaultQueueing
634 |                                       case res of
635 |                                         Nothing -> return (MmComplainResponse nullPid,state) -- failure
636 |                                         Just newprocid ->
637 |                                            let newpromises = Map.insert promid (PromiseRecord newprocid curclo curlocality) (msPromises state)
638 |                                             in return (MmComplainResponse newprocid,state {msPromises=newpromises})) 
639 |             promiseMsg = roundtripResponse 
640 |               (\x -> case x of
641 |                        MmNewPromise clo locality queueing -> 
642 |                           do 
643 |                              let promiseid = msNextId state
644 |                              res <- basicAllocate clo promiseid locality queueing
645 |                              case res of
646 |                                 Just nodeboss -> 
647 |                                         let newstate = state {msAllocation=newAllocation,msPromises=newPromises,msNextId=promiseid+1}
648 |                                             newAllocation = Map.insertWith' (\a b -> nub $ a++b) nodeboss [promiseid] (msAllocation state)
649 |                                             newPromises = Map.insert promiseid (PromiseRecord nodeboss clo locality) (msPromises state)
650 |                                          in return (MmNewPromiseResponse nodeboss promiseid,newstate)
651 |                                 Nothing -> 
652 |                                          return (MmNewPromiseResponseFail,state))
653 |             simpleMsg = match 
654 |               (\x -> case x of
655 |                        TmNewPeer nid -> do logS "TSK" LoInformation $ "Found new peer " ++show nid
656 |                                            return state)
657 |          in receiveWait [simpleMsg, promiseMsg, complainMsg,statusMsg] >>= master -- TODO matchUnknownThrow
658 |    in do nodes <- liftIO $ newMVar []
659 |          selfnode <- getSelfNode
660 |          selfpid <- getSelfPid
661 |          let initState = MasterState {msNextId=0, msAllocation=Map.empty, msPromises=Map.empty, msNodes=nodes, msDefaultLocality = defaultLocality}
662 |          masterpid <- spawnDaemonic (master initState)
663 |          seennodes <- probeOnce nodes [] masterpid
664 |          let getByNid _ [] = Nothing
665 |              getByNid nid ((_,n,nodeboss):xs) = if nid==n then Just nodeboss else getByNid nid xs
666 |          res <- liftIO $ withMVar nodes (\n -> return $ getByNid selfnode n)
667 |          _ <- case res of
668 |              Nothing -> taskError "Can't find self: make sure cfgKnownHosts includes the master"
669 |              Just x -> spawnLocalAnd (masterproc x) (do myself <- getSelfPid
670 |                                                         monitorProcess selfpid myself MaLinkError)
671 |          _ <- spawnDaemonic (prober nodes seennodes masterpid)
672 |          return masterpid
673 | 
674 | stubborn :: (Monad m) => Int -> m (Maybe a) -> m (Maybe a)
675 | stubborn 0 a = a
676 | stubborn n a | n>0
677 |              = do r <- a
678 |                   case r of
679 |                      Just _ -> return r
680 |                      Nothing -> stubborn (n-1) a
681 | 
682 | -- TODO: setDefaultLocality :: Locality -> TaskM ()
683 | 
684 | -- | Like 'newPromise', but creates a promise whose
685 | -- values is already known. In other words, it puts
686 | -- a given, already-calculated value in a promise.
687 | -- Conceptually (but not syntactically, due to closures),
688 | -- you can consider it like this:
689 | --
690 | -- > toPromise a = newPromise (return a)
691 | toPromise :: (Serializable a) => a -> TaskM (Promise a)
692 | toPromise = toPromiseAt LcDefault
693 | 
694 | -- | A variant of 'toPromise' that lets the user
695 | -- express a locality preference, i.e. some information
696 | -- about which node will become the owner of the
697 | -- new promise. These preferences will not necessarily
698 | -- be respected.
699 | toPromiseAt :: (Serializable a) => Locality -> a -> TaskM (Promise a)
700 | toPromiseAt locality a = newPromiseAt locality (passthrough__closure a)
701 | 
702 | -- | Similar to 'toPromiseAt' and 'newPromiseNear'
703 | toPromiseNear :: (Serializable a,Serializable b) => Promise b -> a -> TaskM (Promise a)
704 | toPromiseNear (PromiseImmediate _) = toPromise
705 | -- TODO should I consult tsRedeemerForwarding here?
706 | toPromiseNear (PromiseBasic prhost _prid) = toPromiseAt (LcByNode [nodeFromPid prhost])
707 | 
708 | -- | Creates an /immediate promise/, which is to say, a promise
709 | -- in name only. Unlike a regular promise (created by 'toPromise'), 
710 | -- this kind of promise contains the value directly. The 
711 | -- advantage is that promise redemption is very fast, requiring
712 | -- no network communication. The downside is that it the
713 | -- underlying data will be copied along with the promise.
714 | -- Useful only for small data.
715 | toPromiseImm :: (Serializable a) => a -> TaskM (Promise a)
716 | toPromiseImm = return . PromiseImmediate
717 | 
718 | -- | Given a function (expressed here as a closure, see "Remote.Call")
719 | -- that computes a value, returns a token identifying that value.
720 | -- This token, a 'Promise' can be moved about even if the
721 | -- value hasn't been computed yet. The computing function 
722 | -- will be started somewhere among the nodes visible to the
723 | -- current master, preferring those nodes that correspond
724 | -- to the 'defaultLocality'. Afterwards, attempts to
725 | -- redeem the promise with 'readPromise' will contact the node
726 | -- where the function is executing.
727 | newPromise :: (Serializable a) => Closure (TaskM a) -> TaskM (Promise a)
728 | newPromise = newPromiseAt LcDefault
729 | 
730 | -- | A variant of 'newPromise' that prefers to start
731 | -- the computing function on the same node as the caller.
732 | -- Useful if you plan to use the resulting value
733 | -- locally.
734 | newPromiseHere :: (Serializable a) => Closure (TaskM a) -> TaskM (Promise a)
735 | newPromiseHere clo =
736 |      do mynode <- liftTask $ getSelfNode
737 |         newPromiseAt (LcByNode [mynode]) clo
738 | 
739 | -- | A variant of 'newPromise' that prefers to start
740 | -- the computing function on the same node where some
741 | -- other promise lives. The other promise is not
742 | -- evaluated.
743 | newPromiseNear :: (Serializable a, Serializable b) => Promise b -> Closure (TaskM a) -> TaskM (Promise a)
744 | newPromiseNear (PromiseImmediate _) = newPromise
745 | newPromiseNear (PromiseBasic prhost _prid) = newPromiseAt (LcByNode [nodeFromPid prhost]) 
746 | 
747 | -- | A variant of 'newPromise' that prefers to start
748 | -- the computing functions on some set of nodes that
749 | -- have a given role (assigned by the cfgRole configuration
750 | -- option).
751 | newPromiseAtRole :: (Serializable a) => String -> Closure (TaskM a) -> TaskM (Promise a)
752 | newPromiseAtRole role clo = newPromiseAt (LcByRole [role]) clo
753 | 
754 | -- | A variant of 'newPromise' that lets the user
755 | -- specify a 'Locality'. The other flavors of newPromise,
756 | -- such as 'newPromiseAtRole', 'newPromiseNear', and
757 | -- 'newPromiseHere' at just shorthand for a call to this function.
758 | newPromiseAt :: (Serializable a) => Locality -> Closure (TaskM a) -> TaskM (Promise a)
759 | newPromiseAt locality clo = 
760 |    let realclo = makePayloadClosure clo
761 |     in case realclo of
762 |           Just plclo -> do master <- getMaster
763 |                            res <- roundtrip master (MmNewPromise plclo locality defaultQueueing)
764 |                            case res of
765 |                               Right (MmNewPromiseResponse pid prid) -> return $ PromiseBasic pid prid
766 |                               Right (MmNewPromiseResponseFail) ->
767 |                                           taskError $ "Spawning of closure "++show clo++" by newPromise failed"
768 |                               Left tms -> taskError $ "Spawning of closure "++show clo++" by newPromise resulted in "++show tms
769 |           Nothing -> taskError $ "The specified closure, "++show clo++", can't produce payloads"
770 | 
771 | -- | Given a promise, gets the value that is being
772 | -- calculated. If the calculation has finished,
773 | -- the owning node will be contacted and the data
774 | -- moved to the current node. If the calculation
775 | -- has not finished, this function will block
776 | -- until it has. If the calculation failed
777 | -- by throwing an exception (e.g. divide by zero),
778 | -- then this function will throw an excption as well
779 | -- (a 'TaskException'). If the node owning the
780 | -- promise is not accessible, the calculation
781 | -- will be restarted.
782 | readPromise :: (Serializable a) => Promise a -> TaskM a
783 | readPromise (PromiseImmediate a) = return a
784 | readPromise thepromise@(PromiseBasic prhost prid) = 
785 |        do mp <- lookupCachedPromise prid
786 |           case mp of
787 |             Nothing -> do fprhost <- liftM (maybe prhost id) $ lookupForwardedRedeemer prid
788 |                           res <- roundtrip fprhost (NmRedeem prid)
789 |                           case res of
790 |                              Left e -> do tlogS "TSK" LoInformation $ "Complaining about promise " ++ show prid ++" on " ++show fprhost++" because of "++show e
791 |                                           complain fprhost prid
792 |                              Right NmRedeemResponseUnknown -> 
793 |                                        do tlogS "TSK" LoInformation $ "Complaining about promise " ++ show prid ++" on " ++show fprhost++" because allegedly unknown"
794 |                                           complain fprhost prid
795 |                              Right (NmRedeemResponse thedata) -> 
796 |                                                   do extracted <- extractFromPayload thedata
797 |                                                      promiseinmem <- liftTaskIO $ makePromiseInMemory thedata (Just $ toDyn extracted)
798 |                                                      putPromiseInCache prid promiseinmem
799 |                                                      return extracted
800 |                              Right NmRedeemResponseException ->
801 |                                   taskError "Failed promise redemption" -- don't redeem, this is a terminal failure
802 |             Just mv -> do val <- liftTaskIO $ readMVar mv -- possible long wait here
803 |                           case val of -- TODO this read/write MVars should be combined!
804 |                              PromiseInMemory v _utc thedyn -> 
805 |                                      case thedyn of
806 |                                        Just thedynvalue -> 
807 |                                          case fromDynamic thedynvalue of
808 |                                            Nothing -> do liftTask $ logS "TSK" LoStandard "Insufficiently dynamic promise cache"
809 |                                                          extractFromPayload v
810 |                                            Just realval -> do updated <- liftTaskIO $ makePromiseInMemory v thedyn
811 |                                                               putPromiseInCache prid updated
812 |                                                               return realval
813 |                                        Nothing -> do extracted <- extractFromPayload v
814 |                                                      updated <- liftTaskIO $ makePromiseInMemory v (Just $ toDyn extracted)
815 |                                                      putPromiseInCache prid updated
816 |                                                      return extracted
817 |                              PromiseException _ -> taskError $ "Redemption of promise failed"
818 |                              PromiseOnDisk fp -> do mpd <- liftTask $ undiskify fp mv
819 |                                                     _ <- liftTask $ spawnLocal $ diskify fp mv False
820 |                                                     case mpd of
821 |                                                        Just dat -> extractFromPayload dat
822 |                                                        _ -> taskError "Promise extraction from disk failed"
823 |      where extractFromPayload v = do out <- liftTaskIO $ serialDecode v
824 |                                      case out of
825 |                                        Just r -> return r
826 |                                        Nothing -> taskError "Unexpected payload type"
827 |            complain fprhost prid =
828 |                      do master <- getMaster
829 |                         response <- roundtrip master (MmComplain fprhost prid)
830 |                         case response of 
831 |                           Left a -> taskError $ "Couldn't file complaint with master about " ++ show fprhost ++ " because " ++ show a
832 |                           Right (MmComplainResponse newhost) 
833 |                             | newhost == nullPid -> taskError $ "Couldn't file complaint with master about " ++ show fprhost
834 |                             | otherwise -> do setForwardedRedeemer prid newhost
835 |                                               readPromise thepromise
836 | 
837 | data TaskState = TaskState
838 |       {
839 |          tsMaster :: ProcessId,
840 |          tsNodeBoss :: Maybe ProcessId,
841 |          tsPromiseCache :: MVar (Map.Map PromiseId (MVar PromiseStorage)),
842 |          tsRedeemerForwarding :: MVar (Map.Map PromiseId ProcessId),
843 |          tsMonitoring :: Map.Map ProcessId ()
844 |       }
845 | 
846 | data TaskM a = TaskM { runTaskM :: TaskState -> ProcessM (TaskState, a) } deriving (Typeable)
847 | 
848 | instance Monad TaskM where
849 |    m >>= k = TaskM $ \ts -> do
850 |                 (ts',a) <- runTaskM m ts
851 |                 (ts'',a') <- runTaskM (k a) (ts')
852 |                 return (ts'',a')              
853 |    return x = TaskM $ \ts -> return $ (ts,x)
854 | 
855 | instance Functor TaskM where
856 |     f `fmap` m =
857 |         TaskM $ \ts ->
858 |         runTaskM m ts >>= \(ts', x) ->
859 |         return (ts', f x)
860 | 
861 | instance Applicative TaskM where
862 |     mf <*> mx =
863 |         TaskM $ \ts ->
864 |         runTaskM mf ts >>= \(ts', f) ->
865 |         runTaskM mx ts' >>= \(ts'', x) ->
866 |         return (ts'', f x)
867 |     pure = return
868 | 
869 | lookupForwardedRedeemer :: PromiseId -> TaskM (Maybe ProcessId)
870 | lookupForwardedRedeemer q = 
871 |    TaskM $ \ts ->
872 |      liftIO $ withMVar (tsRedeemerForwarding ts) $ (\fwd ->
873 |        let lo = Map.lookup q fwd 
874 |         in return (ts,lo))
875 | 
876 | setForwardedRedeemer :: PromiseId -> ProcessId -> TaskM ()
877 | setForwardedRedeemer from to =
878 |    TaskM $ \ts -> liftIO $ modifyMVar (tsRedeemerForwarding ts)  (\fwd ->
879 |      let newmap = Map.insert from to fwd 
880 |       in return ( newmap,(ts,())  ) )
881 | 
882 | lookupCachedPromise :: PromiseId -> TaskM (Maybe (MVar PromiseStorage))
883 | lookupCachedPromise prid = TaskM $ \ts -> 
884 |                do mv <- liftIO $ withMVar (tsPromiseCache ts)
885 |                       (\pc -> return $ Map.lookup prid pc)
886 |                   return (ts,mv)
887 | 
888 | putPromiseInCache :: PromiseId -> PromiseStorage -> TaskM ()
889 | putPromiseInCache prid ps = TaskM $ \ts ->
890 |         do liftIO $ modifyMVar_ (tsPromiseCache ts)
891 |                 (\pc -> do mv <- newMVar ps
892 |                            return $ Map.insert prid mv pc)
893 |            return (ts,())
894 | 
895 | getMaster :: TaskM ProcessId
896 | getMaster = TaskM $ \ts -> return (ts,tsMaster ts)
897 | 
898 | liftTask :: ProcessM a -> TaskM a
899 | liftTask a = TaskM $ \ts -> a >>= (\x -> return (ts,x))
900 | 
901 | liftTaskIO :: IO a -> TaskM a
902 | liftTaskIO = liftTask . liftIO
903 | 
904 | -- | A Task-monadic version of 'Remote.Process.say'.
905 | -- Puts text messages in the log.
906 | tsay :: String -> TaskM ()
907 | tsay a = liftTask $ say a
908 | 
909 | -- | Writes various kinds of messages to the
910 | -- "Remote.Process" log.
911 | tlogS :: LogSphere -> LogLevel -> String -> TaskM ()
912 | tlogS a b c = liftTask $ logS a b c
913 | 
914 | ----------------------------------------------
915 | -- * MapReduce
916 | ----------------------------------------------
917 | 
918 | -- | A data structure that stores the important
919 | -- user-provided functions that are the namesakes
920 | -- of the MapReduce algorithm.
921 | -- The number of mapper processes can be controlled
922 | -- by the user by controlling the length of the string
923 | -- returned by mtChunkify. The number of reducer
924 | -- promises is controlled by the number of values
925 | -- values returned by shuffler.
926 | -- The user must provide their own mapper and reducer.
927 | -- For many cases, the default chunkifier ('chunkify')
928 | -- and shuffler ('shuffle') are adequate.
929 | data MapReduce rawinput input middle1 middle2 result 
930 |     = MapReduce
931 |       {
932 |         mtMapper :: input -> Closure (TaskM [middle1]),
933 |         mtReducer :: middle2 -> Closure (TaskM result),
934 |         mtChunkify :: rawinput -> [input],
935 |         mtShuffle :: [middle1] -> [middle2]
936 |       }
937 | 
938 | -- | A convenient way to provide the 'mtShuffle' function
939 | -- as part of 'mapReduce'. 
940 | shuffle :: Ord a => [(a,b)] -> [(a,[b])]
941 | shuffle q = 
942 |     let semi = groupBy (\(a,_) (b,_) -> a==b) (sortBy (\(a,_) (b,_) -> compare a b) q)
943 |      in map (\x -> (fst $ head x,map snd x)) semi 
944 | 
945 | -- | A convenient way to provide the 'mtChunkify' function
946 | -- as part of 'mapReduce'. 
947 | chunkify :: Int -> [a] -> [[a]] 
948 | chunkify numChunks l 
949 |   | numChunks <= 0 = taskError "Can't chunkify into less than one chunk"
950 |   | otherwise = splitSize (ceiling ((fromIntegral (length l) / fromIntegral numChunks) :: Double)) l
951 |    where
952 |       splitSize _ [] = []
953 |       splitSize i v = let (first,second) = splitAt i v 
954 |                        in first : splitSize i second
955 | 
956 | -- | The MapReduce algorithm, implemented in a very
957 | -- simple form on top of the Task layer. Its
958 | -- use depends on four user-determined data types:
959 | --
960 | -- * input -- The data type provided as the input to the algorithm as a whole and given to the mapper.
961 | --
962 | -- * middle1 -- The output of the mapper. This may include some /key/ which is used by the shuffler to allocate data to reducers.
963 | -- If you use the default shuffler, 'shuffle', this type must have the form @Ord a => (a,b)@.
964 | --
965 | -- * middle2 -- The output of the shuffler. The default shuffler emits a type in the form @Ord => (a,[b])@. Each middle2 output 
966 | -- by shuffler is given to a separate reducer.
967 | --
968 | -- * result -- The output of the reducer, upon being given a bunch of middles.
969 | mapReduce :: (Serializable i,Serializable k,Serializable m,Serializable r) =>
970 |              MapReduce ri i k m r -> ri -> TaskM [r]
971 | mapReduce mr inputs =
972 |     let chunks = (mtChunkify mr) inputs 
973 |      in do 
974 |            pmapResult <- mapM (\chunk -> 
975 |                  newPromise ((mtMapper mr) chunk) ) chunks
976 |            mapResult <- mapM readPromise pmapResult
977 |            let shuffled = (mtShuffle mr) (concat mapResult)
978 |            pres <- mapM (\mid2 -> 
979 |                   newPromise ((mtReducer mr) mid2)) shuffled
980 |            mapM readPromise pres
981 | 
982 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | module Main (main) where
2 | 
3 | import Distribution.Simple (defaultMain)
4 | 
5 | main :: IO ()
6 | main = defaultMain
7 | 


--------------------------------------------------------------------------------
/examples/kmeans/KMeans.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TemplateHaskell,DeriveDataTypeable,BangPatterns #-}
  2 | module Main where
  3 | 
  4 | import Remote
  5 | import Remote.Process (roundtripResponse,setRemoteNodeLogConfig,getConfig,PayloadDisposition(..),roundtripQuery,roundtripQueryMulti)
  6 | import KMeansCommon
  7 | 
  8 | import Control.Exception (try,SomeException,evaluate)
  9 | import Control.Monad (liftM)
 10 | import Control.Monad.Trans (liftIO)
 11 | import System.Random (randomR,getStdRandom)
 12 | import Data.Typeable (Typeable)
 13 | import Data.Data (Data)
 14 | import Control.Exception (IOException)
 15 | import Data.Binary (Binary,get,put,encode,decode)
 16 | import Data.Maybe (fromJust)
 17 | import Data.List (minimumBy,sortBy)
 18 | import Data.Time
 19 | import Data.Either (rights)
 20 | import qualified Data.ByteString.Lazy as B
 21 | import qualified Data.Map as Map
 22 | import System.IO
 23 | import Debug.Trace
 24 | 
 25 | split :: Int -> [a] -> [[a]] 
 26 | split numChunks l = splitSize (ceiling $ fromIntegral (length l) / fromIntegral numChunks) l
 27 |    where
 28 |       splitSize i v = let (first,second) = splitAt i v 
 29 |                        in first : splitSize i second
 30 | 
 31 | broadcast :: (Serializable a) => [ProcessId] -> a -> ProcessM ()
 32 | broadcast pids dat = mapM_ (\pid -> send pid dat) pids
 33 | 
 34 | multiSpawn :: [NodeId] -> Closure (ProcessM ()) -> ProcessM [ProcessId]
 35 | multiSpawn nodes f = mapM (\node -> spawnLink node f) nodes
 36 |    where s n = do mypid <- getSelfNode
 37 |                   setRemoteNodeLogConfig n (LogConfig LoTrivial (LtForward mypid) LfAll)
 38 |                   spawnLink n f
 39 | 
 40 | mapperProcess :: ProcessM ()
 41 | mapperProcess = 
 42 |         let mapProcess :: (Maybe [Vector],Maybe [ProcessId],Map.Map Int (Int,Vector)) -> ProcessM ()
 43 |             mapProcess (mvecs,mreducers,mresult) = 
 44 |                          receiveWait
 45 |                          [
 46 |                             match (\vec -> do vecs<-liftIO $ readf vec
 47 |                                               say $ "Mapper read data file" 
 48 |                                               return (Just vecs,mreducers,mresult)),
 49 |                             match (\reducers -> return (mvecs,Just reducers,mresult)),
 50 |                             roundtripResponse (\() -> return (mresult,(mvecs,mreducers,mresult))),
 51 |                             roundtripResponse
 52 |                                   (\clusters -> let tbl = analyze (fromJust mvecs) clustersandcenters Map.empty
 53 |                                                     clustersandcenters = map (\x -> (x,clusterCenter x)) clusters
 54 |                                                     reducers = fromJust mreducers
 55 |                                                     target clust = reducers !! (clust `mod` length reducers)
 56 |                                                     sendout (clustid,(count,sum)) =  send (target clustid) Cluster {clId = clustid,clCount=count, clSum=sum}
 57 |                                                  in do say $ "calculating: "++show (length reducers)++" reducers"
 58 |                                                        mapM_ sendout (Map.toList tbl)
 59 |                                                        return ((),(mvecs,mreducers,tbl))),
 60 |                             matchUnknownThrow
 61 |                          ] >>= mapProcess
 62 |             getit :: Handle -> IO [Vector]
 63 |             getit h = do l <- liftM lines $ hGetContents h
 64 |                          return (map read l) -- evaluate or return?
 65 |             readf fn = do h <- openFile fn ReadMode 
 66 |                           getit h
 67 |             condtrace cond s val = if cond
 68 |                                       then trace s val
 69 |                                       else val
 70 |             analyze :: [Vector] -> [(Cluster,Vector)] -> Map.Map Int (Int,Vector) -> Map.Map Int (Int,Vector) 
 71 |             analyze [] _ ht = ht
 72 |             analyze (v:vectors) clusters ht =
 73 |                    let theclust = assignToCluster clusters v
 74 |                        newh = ht `seq` theclust `seq` Map.insertWith' (\(a,v1) (b,v2) -> let av = addVector v1 v2 in av `seq` (a+b,av) ) theclust (1,v) ht
 75 | -- condtrace (blarg `mod` 1000 == 0) (show blarg) $ 
 76 |                     in newh `seq` analyze vectors clusters newh 
 77 |             assignToCluster :: [(Cluster,Vector)] -> Vector -> Int
 78 |             assignToCluster clusters vector = 
 79 |                    let distances = map (\(x,center) -> (clId x,sqDistance center vector)) clusters
 80 |                     in fst $ minimumBy (\(_,a) (_,b) -> compare a b) distances
 81 |             doit = mapProcess (Nothing,Nothing,Map.empty)
 82 |          in doit >> return ()
 83 | 
 84 | reducerProcess :: ProcessM ()
 85 | reducerProcess = let reduceProcess :: ([Cluster],[Cluster]) -> ProcessM ()
 86 |                      reduceProcess (oldclusters,clusters) = 
 87 |                           receiveWait [
 88 |                                        roundtripResponse (\() -> return (clusters,(clusters,[]))),
 89 |                                        match (\x -> return (oldclusters,combineClusters clusters x)),
 90 |                                        matchUnknownThrow] >>= reduceProcess
 91 |                      combineClusters :: [Cluster] -> Cluster -> [Cluster]
 92 |                      combineClusters [] a = [a]
 93 |                      combineClusters (fstclst:rest) clust | clId fstclst == clId clust = (Cluster {clId = clId fstclst,
 94 |                                                                                                    clCount = clCount fstclst + clCount clust,
 95 |                                                                                                    clSum = addVector (clSum fstclst) (clSum clust)}):rest
 96 |                      combineClusters (fstclst:res) clust = fstclst:(combineClusters res clust)
 97 |                   in reduceProcess ([],[]) >> return ()
 98 | 
 99 | 
100 | $( remotable ['mapperProcess, 'reducerProcess] )
101 | 
102 | 
103 | initialProcess "MASTER" = 
104 |   do peers <- getPeers
105 | --     say $ "Got peers: " ++ show peers
106 |      cfg <- getConfig
107 |      let mappers = findPeerByRole peers "MAPPER"
108 |      let reducers = findPeerByRole peers "REDUCER"
109 |      let numreducers = length reducers
110 |      let nummappers = length mappers
111 |      say $ "Got " ++ show nummappers ++ " mappers and " ++ show numreducers ++ " reducers"
112 |      clusters <- liftIO $ getClusters "kmeans-clusters"
113 |      say $ "Got "++show (length clusters)++" clusters"
114 |      mypid <- getSelfPid
115 |      
116 |      mapperPids <- multiSpawn mappers mapperProcess__closure
117 | 
118 |      reducerPids <- multiSpawn  reducers reducerProcess__closure
119 |      broadcast mapperPids reducerPids
120 |      mapM_ (\(pid,chunk) -> send pid chunk) (zip (mapperPids) (repeat "kmeans-points"))
121 | 
122 |      say "Starting iteration"
123 |      starttime <- liftIO $ getCurrentTime
124 |      let loop howmany clusters = do
125 |            liftIO $ putStrLn $ show howmany
126 |            roundtripQueryMulti PldUser mapperPids clusters :: ProcessM [Either TransmitStatus ()]
127 |            res <- roundtripQueryMulti PldUser reducerPids () :: ProcessM [Either TransmitStatus [Cluster]]
128 |            let newclusters = rights res
129 |            let newclusters2 = (sortBy (\a b -> compare (clId a) (clId b)) (concat newclusters))
130 |            if newclusters2 == clusters || howmany >= 4
131 |               then do
132 |                      donetime <- liftIO $ getCurrentTime
133 |                      say $ "Converged in " ++ show howmany ++ " iterations and " ++ (show $ diffUTCTime donetime starttime)
134 |                      pointmaps <- mapM (\pid -> do (Right m) <- roundtripQuery PldUser pid ()
135 |                                                    return (m::Map.Map Int (Int,Vector))) mapperPids
136 |                      let pointmap = map (\x -> sum $ map fst (Map.elems x)) pointmaps
137 |                      say $ "Total points: " ++ (show $ sum pointmap)
138 | --                     liftIO $ writeFile "kmeans-converged" $ readableShow (Map.toList pointmap)
139 |                      --respoints <- roundtripQueryAsync PldUser mapperPids () :: ProcessM [Either TransmitStatus (Map.Map Int [Vector])]
140 |                      
141 |                      --liftIO $ B.writeFile "kmeans-converged" $ encode $ Map.toList $ Map.unionsWith (++) (rights respoints)
142 |               else
143 |                   loop (howmany+1) newclusters2
144 |      loop 0 clusters
145 | 
146 | initialProcess "MAPPER" = receiveWait []
147 | initialProcess "REDUCER" = receiveWait []
148 | initialProcess _ = error "Role must be MAPPER or REDUCER or MASTER"
149 | 
150 | main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
151 | 


--------------------------------------------------------------------------------
/examples/kmeans/KMeans3.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell #-}
 2 | module Main where
 3 | 
 4 | import Remote
 5 | 
 6 | import Control.Monad.Trans
 7 | import Control.Monad
 8 | import Data.List (minimumBy)
 9 | 
10 | import Debug.Trace
11 | 
12 | import KMeansCommon
13 | 
14 | type Line = String
15 | type Word = String
16 | 
17 | mrMapper :: (Promise [Promise Vector], [Cluster]) -> TaskM [(ClusterId, Promise Vector)]
18 | mrMapper (ppoints,clusters) =
19 |    do points <- readPromise ppoints
20 |       tsay $ "mapping "++show (length points)++" points and "++show (length clusters)++" clusters"
21 |       mapM (assign (map (\c -> (clId c,clusterCenter c)) clusters)) points
22 |   where assign clusters point =
23 |            let distances point = map (\(clid,center) -> (clid,sqDistance center point)) clusters
24 |                assignment point = fst $ minimumBy (\(_,a) (_,b) -> compare a b) (distances point)
25 |             in do vp <- readPromise point
26 |                   vp `seq` return (assignment vp,point)
27 | 
28 | 
29 | mrReducer :: (ClusterId,[Promise Vector]) -> TaskM Cluster
30 | mrReducer (cid,l) = 
31 |    do tsay $ "reducing cluster id " ++ show cid ++ " with " ++ show (length l) ++" points"
32 |       let emptyCluster = makeCluster cid []
33 |        in foldM (\c pv -> do v <- readPromise pv
34 |                              c `seq` return $ addToCluster c v) emptyCluster l
35 | 
36 | $( remotable ['mrMapper,  'mrReducer] )
37 | 
38 | again :: Int -> (b -> TaskM b) -> b -> TaskM b
39 | again 0 f i = tsay "last iteration" >> f i
40 | again n f i = do tsay (show n++" iterations remaining")
41 |                  q <- f i
42 |                  again (n-1) f q
43 | 
44 | initialProcess "MASTER" = 
45 |                    do setNodeLogConfig defaultLogConfig {logLevel = LoInformation} 
46 |                       clusters <- liftIO $ getClusters "kmeans-clusters"
47 |                       points <- liftIO $ getPoints2 "kmeans-points" 
48 |                       say $ "starting master"
49 |                       ans <- runTask $
50 |                         do
51 |                            vpoints <- mapM toPromise points
52 |                            ppoints <- mapM toPromise (chunkify 5 vpoints)
53 |                            let myMapReduce = 
54 |                                 MapReduce 
55 |                                 {
56 |                                  mtMapper = mrMapper__closure,
57 |                                  mtReducer = mrReducer__closure,
58 |                                  mtChunkify = \clusts -> [(ps,clusts) | ps <- ppoints],
59 |                                  mtShuffle = shuffle
60 |                                 }
61 |                            again 4 (mapReduce myMapReduce) clusters
62 |                       say $ "done" -- show ans
63 | initialProcess _ =  setNodeLogConfig defaultLogConfig {logLevel = LoInformation} 
64 |                       >> say "starting worker" >> receiveWait [] 
65 | 
66 | main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
67 | 
68 | 


--------------------------------------------------------------------------------
/examples/kmeans/KMeansCommon.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveDataTypeable #-}
 2 | module KMeansCommon where
 3 | 
 4 | import Data.List (foldl')
 5 | import Data.Typeable (Typeable)
 6 | import Data.Data (Data)
 7 | import Data.Binary
 8 | import Data.Array.Unboxed
 9 | import qualified Data.ByteString.Lazy as B
10 | import Debug.Trace
11 | 
12 | -- Change this and recompile to change the cardinality of the data set!
13 | vectorSize :: Int
14 | vectorSize = 100
15 | 
16 | type ClusterId = Int
17 | data Vector = Vector !(UArray Int Double) deriving (Typeable,Eq)
18 | instance Binary Vector where put (Vector a) = put a
19 |                              get = do a<-get
20 |                                       return $ Vector a
21 | 
22 | instance Read Vector where
23 |   readsPrec i a = let f = readsPrec i a
24 |               in [(Vector $! listArray (0,vectorSize-1) (fst $ head f),snd $ head f)]
25 | 
26 | instance Show Vector where
27 |    showsPrec _ (Vector a) = showList $ elems a
28 |   
29 | 
30 | data Cluster = Cluster
31 |                {
32 |                   clId :: !ClusterId,
33 |                   clCount :: !Int,
34 |                   clSum :: !Vector
35 |                } deriving (Show,Read,Typeable,Eq)
36 | instance Binary Cluster where put (Cluster a b c) = put a>>put b>>put c
37 |                               get = do a<-get
38 |                                        b<-get
39 |                                        c<-get
40 |                                        return $ Cluster a b c
41 | 
42 | clusterCenter :: Cluster -> Vector
43 | clusterCenter cl = let (Vector arr) = clSum cl
44 |                        count = fromIntegral $ clCount cl
45 |                        newelems = case count of
46 |                                      0 -> replicate (vectorSize) 0
47 |                                      _ -> map (\d -> d / count) (elems arr)
48 |                     in Vector $ listArray (bounds arr) newelems
49 | 
50 | sqDistance :: Vector -> Vector -> Double
51 | sqDistance (Vector a1) (Vector a2) = sum $ map (\(a,b) -> let dif = a-b in dif*dif) (zip (elems a1) (elems a2))
52 | 
53 | makeCluster :: ClusterId -> [Vector] -> Cluster
54 | makeCluster clid vecs = Cluster {clId = clid, clCount = length vecs, clSum = vecsum}
55 |    where vecsum = Vector sumArray
56 |          sumArray = listArray (0,vectorSize-1) [ sum $ map ((flip(!))i) (map unVector vecs) | i<-[0..vectorSize-1] ]
57 |          unVector (Vector x) = x
58 | 
59 | 
60 | addCluster :: Cluster -> Cluster -> Cluster
61 | addCluster (Cluster aclid acount asum) (Cluster bclid bcount bsum) = Cluster aclid (acount+bcount) (addVector asum bsum)
62 | 
63 | addToCluster :: Cluster -> Vector -> Cluster
64 | addToCluster (Cluster aclid acount asum) v = Cluster aclid (acount+1) (addVector asum v)
65 | 
66 | addVector :: Vector -> Vector -> Vector
67 | addVector (Vector a ) (Vector b) = Vector $! listArray (bounds a) (map (\(a,b) -> a+b) (zip (elems a) (elems b)))
68 | 
69 | zeroVector :: Vector -> Vector
70 | zeroVector (Vector a) = Vector $! listArray (bounds a) (repeat 0)
71 | 
72 | getPoints :: FilePath -> IO [Vector]
73 | getPoints fp = do c <- readFile fp
74 |                   return $ read c
75 | 
76 | getPoints2 :: FilePath -> IO [Vector]
77 | getPoints2 fp = do c <- readFile fp
78 |                    return $ glom (lines c)
79 |      where glom [] = []
80 |            glom (line:rest) = read line : glom rest
81 | 
82 | getClusters :: FilePath -> IO [Cluster]
83 | getClusters fp = do c <- readFile fp
84 |                     return $ read c
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/examples/kmeans/MakeData.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import System.Random (randomR,getStdRandom)
 4 | import System.Environment
 5 | import KMeansCommon
 6 | import Data.Binary
 7 | import System.IO
 8 | import Data.Array.Unboxed
 9 | import qualified Data.ByteString.Lazy as B
10 | 
11 | 
12 | -- vectorsPerFile = 80000 -- should be 80000
13 | numClusters = 10
14 | vectorDimensions = KMeansCommon.vectorSize
15 | minValue = -1.0
16 | maxValue = 1.0
17 | 
18 | val = getStdRandom (randomR (minValue,maxValue))
19 | vector = do vals <- mapM (const val) [1..vectorDimensions]
20 |             return $ Vector $! listArray (0,vectorDimensions-1) vals
21 | file vectorsPerFile = mapM (const vector) [1..vectorsPerFile]
22 | 
23 | clusters = mapM (\x -> do v <- vector
24 |                           return $ Cluster {clId = x,clCount=1,clSum=v}) [1..numClusters]
25 |             
26 | 
27 | makeBig :: Int -> IO ()
28 | makeBig i = do c <- clusters
29 |                withFile "kmeans-points" WriteMode (\h -> mapM (\_ -> do a <- vector ; hPutStrLn h (show a)) [1..i] )  
30 |                writeFile "kmeans-clusters" $ show c
31 | 
32 | main = do a <- getArgs
33 |           case a of
34 |             ["big",a] -> makeBig (read a)
35 |             _ -> putStrLn "Syntax:\n\tMakeData big 8\n\t\nOutput is in kmeans-points and kmeans-clusters"
36 | 


--------------------------------------------------------------------------------
/examples/kmeans/awsgo:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys
  4 | import time
  5 | import os
  6 | import math
  7 | import optparse
  8 | 
  9 | import boto
 10 | import boto.ec2
 11 | import subprocess
 12 | 
 13 | packages=""
 14 | 
 15 | script_header="""#!/bin/bash
 16 | append()
 17 | {
 18 |    file="$1"
 19 |    shift
 20 |    $@ | tee -a "$file"
 21 | }
 22 | suappend()
 23 | {
 24 |    file="$1"
 25 |    shift
 26 |    $@ | sudo tee -a "$file"
 27 | }
 28 | try()
 29 | {
 30 |    log="$HOME/$1"
 31 |    shift
 32 |    msg="$1"
 33 |    shift
 34 |    (echo "------ `date`: $@") >> $log
 35 |    ($@ 2>&1) | tee -a "$log" | tail -1 > tmp-log
 36 |    ret="${PIPESTATUS[0]}"
 37 |    if [ 0 != "$ret" ]
 38 |    then
 39 |      echo 1>&2 "Failed during $msg: `cat tmp-log`"
 40 |      rm tmp-log
 41 |      return 1
 42 |    fi
 43 |    rm tmp-log
 44 |    return 0
 45 | }
 46 | try1()
 47 | {
 48 |    if ! try "$@"
 49 |    then 
 50 |      echo 1>&2 "Retrying..."
 51 |      sleep 10
 52 |      if ! try "$@"
 53 |      then    
 54 |         echo 1>&2 "Retrying again..."
 55 |         sleep 20
 56 |         if ! try "$@"
 57 |         then
 58 |            echo 1>&2 "Giving up."
 59 |            exit 1
 60 |         fi
 61 |      fi 
 62 |    fi
 63 |    return 0
 64 | }
 65 | """
 66 | exec_args=["-cfgRoundtripTimeout=20000000","+RTS","-K20000000"]
 67 | local_files = ["KMeans.hs","MakeData.hs","KMeansCommon.hs"]
 68 | node_config_data = {"apts":"ghc cabal-install git-core libghc6-binary-dev libghc6-mtl-dev libghc6-network-dev libghc6-stm-dev"}
 69 | node_config_script = script_header+"""
 70 | cd
 71 | try cfg.log "update apt repository" sudo apt-get -q -y update
 72 | try cfg.log "install apt packages" sudo apt-get -q -y install %(apts)s
 73 | try1 cfg.log "cabalupdate" cabal update
 74 | try1 cfg.log "cabalify" cabal install pureMD5 utf8-string directory crypto-api tagged data-default cereal semigroups
 75 | try cfg.log "retrieve git repository" git clone git://github.com/jepst/CloudHaskell.git
 76 | cd CloudHaskell
 77 | try cfg.log "configure local repository" cabal configure
 78 | try1 cfg.log "build local repository" cabal build
 79 | try1 cfg.log "install local repoistory" cabal install
 80 | cd
 81 | try cfg.log "compile executable" ghc -O3 --make KMeans.hs
 82 | """ % node_config_data
 83 | 
 84 | def writefile(fname,content):
 85 |     with open(fname,"w") as f:
 86 |         f.write(content)
 87 | 
 88 | def err(s):
 89 |     print s
 90 |     sys.exit(1)
 91 | 
 92 | def waitforallup(instances):
 93 |     while True:
 94 |         pending = False
 95 |         running = False
 96 |         for instance in instances:
 97 |             if instance.state == u'pending':
 98 |                 pending = True
 99 |                 instance.update()
100 |             elif instance.state == u'running':
101 |                 running = True
102 |             else:
103 |                 print("Encountered unexpected instance state: "+instance.state) # and terminate remaining instances?
104 |         if pending:
105 |             time.sleep(3)
106 |         else:
107 |             break 
108 | 
109 | ssh_opts=["-o","BatchMode=yes","-o","ServerAliveInterval=240","-o","StrictHostKeyChecking=no"]
110 | 
111 | def allinstances_xfer(keyfile,username,instances,thefile):
112 |     processes = []
113 |     for i in instances:
114 |        cmd=["scp","-r","-q","-i",keyfile]+ssh_opts + thefile + [username+"@"+i.public_dns_name+":"]
115 |        while True:
116 |           subproc=subprocess.Popen(cmd,stderr=subprocess.STDOUT)
117 |           result = subproc.wait()
118 |           if result == 0:
119 |              break
120 |           else:
121 |              print "Problem transfering",thefile,"to",i.public_dns_name," and retrying"
122 |              time.sleep(60)
123 | 
124 | def allinstances_recv(keyfile,username,instances,thefile):
125 |     processes = []
126 |     for i in instances:
127 |        cmd=["scp","-r","-q","-i",keyfile]+ssh_opts+ [ username+"@"+i.public_dns_name+":"+thefile,thefile]
128 |        subproc=subprocess.Popen(cmd,stderr=subprocess.STDOUT)
129 |        processes.append(subproc)
130 |     for (i,p) in zip(instances,processes):
131 |         result = p.wait()
132 |         if result != 0:
133 |             err("Error with remote receiving on instance "+i.id+" at "+i.public_dns_name)
134 | 
135 | def allinstances_runscript(keyfile,username,instances,scriptmaker):
136 |     processes = []
137 |     counter=0
138 |     for i in instances:
139 |        text = scriptmaker(i)
140 |        fname="tmp-"+str(counter) # use random nm here
141 |        writefile(fname,text)
142 |        cmd1=["scp","-r","-q","-i",keyfile]+ssh_opts+[ fname ,username+"@"+i.public_dns_name+":"]
143 |        cmd2=["ssh","-q","-i",keyfile]+ssh_opts+["-l",username,"-o","StrictHostKeyChecking=no", i.public_dns_name,"bash",fname]
144 |        cmd = ["bash","-c",(" ".join(cmd1))+" ; "+(" ".join(cmd2))]
145 |        subproc=subprocess.Popen(cmd,stderr=subprocess.STDOUT)
146 |        processes.append(subproc)
147 |        counter=counter+1
148 |        time.sleep(5)
149 |     for (i,p) in zip(instances,processes):
150 |         result = p.wait()
151 |         if result != 0:
152 |             err("Error during remote script execution on instance "+i.id+" at "+i.public_dns_name)
153 |     for i in range(counter):
154 |         os.remove("tmp-"+str(i))
155 | 
156 | 
157 | def allinstances_exec(keyfile,username,instances,thecmd):
158 |     processes = []
159 |     for i in instances:
160 |        cmd=["ssh","-i",keyfile,"-l",username]+ssh_opts+[ i.public_dns_name]+thecmd
161 | #       print " ".join(cmd)
162 |        subproc=subprocess.Popen(cmd,stderr=subprocess.STDOUT)
163 |        processes.append(subproc)
164 |     for (i,p) in zip(instances,processes):
165 |         result = p.wait()
166 |         if result != 0:
167 |             err("Error "+str(result)+" with remote process on instance "+i.id+" at "+i.public_dns_name)
168 | 
169 | def allinstances_spawn(keyfile,username,instances,thecmd):
170 |     processes = []
171 |     for i in instances:
172 |        cmd=["ssh","-n","-q","-i",keyfile,"-l",username]+ssh_opts+[ i.public_dns_name]+thecmd
173 |        subproc=subprocess.Popen(cmd,stderr=subprocess.STDOUT)
174 |        processes.append(subproc)
175 |        time.sleep(5)
176 | 
177 | def hostscript(instance):
178 |     hostsfile = "try cfg.log 'setup hostname' suappend /etc/hosts echo 127.0.1.1 `hostname`"
179 |     return hostsfile
180 | 
181 | def main():
182 |     parser = optparse.OptionParser()
183 |     parser.add_option("-q", "--datamultiple",help="Data multiplier (default 1)",dest="data_multiple",default=1,type="int")
184 |     parser.add_option("-r", "--region", help="Region (default eu-west-1)", dest="region", default="eu-west-1")
185 |     parser.add_option("-i", "--image", help="Virtual machine image (default ami-311f2b45)",dest="ami",default="ami-311f2b45")
186 | # other useful ubuntu instances at
187 | # and here http://uec-images.ubuntu.com/releases/10.04/release/
188 | # and http://aws.amazon.com/ec2/instance-types/
189 |     parser.add_option("-m", "--nmapers", help="Number of mapper nodes (default 10)",dest="mapper_nodes",type="int",default=10)
190 |     parser.add_option("-n", "--nreducers", help="Number of reducer nodes (default 3)",dest="reducer_nodes",type="int",default=3)
191 |     parser.add_option("-p", "--nodesperhost",help="Number of nodes to start on each host (default 4)",dest="nodes_per_host",type="int",default=4)
192 |     parser.add_option("-k", "--keypair",help="Keypair for starting EC2 instances (default aws) -- will look for corresponding file in ~/.ssh/",dest="keypair",default="aws")
193 |     parser.add_option("-s", "--securitygroup",help="Name of security group for launching instances (default default)",dest="securitygroup",default="default")
194 |     parser.add_option("-t","--instancetype",help="Name of instance type for launching instances (default m1.small)",dest="instancetype",default="m1.small")
195 |     parser.add_option("-l","--login-user",help="Username for connecting to instances (default ubuntu)",dest="username",default="ubuntu")
196 |     (options, args) = parser.parse_args()
197 | 
198 |     if os.sep in options.keypair:
199 |         s = os.path.split(options.keypair)[-1]
200 |         keypair = s.split(".pem")[0]
201 |         keypairfile = os.path.expanduser(options.keypair)
202 |         if not os.path.exists(keypairfile):
203 |             err("Specified keypair file "+keypairfile+" doesn't exist")
204 |     else:
205 |         keypairbase = options.keypair
206 |         attempts = [os.path.join("~",".ssh,",keypairbase),
207 |                     os.path.join("~",".ssh,",keypairbase+".pem"),
208 |                     os.path.join("~",".ec2",keypairbase),
209 |                     os.path.join("~",".ec2",keypairbase+".pem") ]
210 |         for attempt in attempts:
211 |             path = os.path.expanduser(attempt)
212 |             if os.path.exists(path):
213 |                 keypair = options.keypair
214 |                 keypairfile = path
215 |                 break
216 |         else:
217 |             err("Could not find a keyfile matching \""+keypairbase+"\" in ~/.ssh or ~/.ec2") 
218 | 
219 |     try:
220 |         for r in boto.ec2.regions():
221 |             if r.name == options.region:
222 |                 region = r
223 |                 break
224 |         else:
225 |             err("Region %s not found." % options.region)
226 |     except AttributeError:
227 |        err("Can't connect to AWS. Set AWS_SECRET_ACCESS_KEY and AWS_ACCESS_KEY_ID environment variables to the values visible under your Security Credentials screen.")
228 |     
229 |     ec2 = boto.connect_ec2(region=region)
230 | 
231 |     num_nodes = 1 + options.mapper_nodes + options.reducer_nodes
232 |     numhosts = int (math.ceil (float(num_nodes) / options.nodes_per_host))
233 | 
234 |     print "Starting "+str(numhosts)+" instances of image "+options.ami+"."
235 |     image = ec2.get_all_images(image_ids=[options.ami])[0]
236 |     reservation = image.run(numhosts,numhosts,key_name=keypair,instance_type=options.instancetype,security_groups=[options.securitygroup])
237 |     waitforallup(reservation.instances)
238 |     time.sleep(30)
239 | 
240 |     print "Configuring instances."
241 |     allinstances_xfer(keypairfile,options.username,reservation.instances,local_files)
242 |     allinstances_runscript(keypairfile,options.username,reservation.instances,lambda a: node_config_script + hostscript(a) + "\n")
243 | 
244 |     config_file="cfgKnownHosts "+(" ".join([i.private_dns_name for i in reservation.instances]))
245 |     writefile("config",config_file)
246 |     allinstances_xfer(keypairfile,options.username,reservation.instances,["config"])
247 | 
248 |     print "Transfering data."
249 |     mappers=[]
250 |     reducers=[]
251 |     master=[]
252 |     insts = reservation.instances * options.nodes_per_host
253 |     master.append(insts.pop(0))
254 |     for i in range(options.mapper_nodes):
255 |         mappers.append(insts.pop(0))
256 |     for i in range(options.reducer_nodes):
257 |         reducers.append(insts.pop(0))
258 |     allinstances_xfer(keypairfile,options.username,reservation.instances,["kmeans-points","kmeans-clusters"])
259 | 
260 |     print "Starting worker nodes."
261 |     allinstances_spawn(keypairfile,options.username,mappers,["./KMeans","-cfgRole=MAPPER"]+exec_args)
262 |     allinstances_spawn(keypairfile,options.username,reducers,["./KMeans","-cfgRole=REDUCER"]+exec_args)
263 |     time.sleep(30)
264 | 
265 |     print "Starting master node on "+master[0].public_dns_name+"."
266 |     starttime=time.time()
267 |     allinstances_exec(keypairfile,options.username,master,["./KMeans","-cfgRole=MASTER"]+exec_args)
268 |     endtime=time.time()
269 | #    print "Completed computation in "+str(endtime-starttime)+" seconds."
270 | 
271 | #    print "Retrieving results."
272 | #    allinstances_recv(keypairfile,options.username,master,"kmeans-converged")
273 | 
274 |     print "Shutting down instances."
275 |     for i in reservation.instances:
276 |         i.stop()
277 | 
278 | def safe_main():
279 |     try:
280 |         main()
281 |     except boto.exception.EC2ResponseError  as a:
282 |         err("Error from EC2 service: "+a.error_message+": "+a.reason)
283 | 
284 | if __name__ == "__main__":
285 |     safe_main()
286 | 
287 | 


--------------------------------------------------------------------------------
/examples/kmeans/awskill:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import optparse
 4 | 
 5 | import boto
 6 | import boto.ec2
 7 | 
 8 | def main():
 9 |     parser = optparse.OptionParser()
10 |     parser.add_option("-r", "--region", help="Region (default eu-west-1)", dest="region", default="eu-west-1")
11 |     (options, args) = parser.parse_args()
12 | 
13 |     for r in boto.ec2.regions():
14 |         if r.name == options.region:
15 |             region = r
16 |             break
17 |         else:
18 |             err("Region %s not found." % options.region)
19 | 
20 |     ec2 = boto.connect_ec2(region=region)
21 | 
22 |     reservation = ec2.get_all_instances()
23 |     for i in reservation:
24 |         for n in i.instances:
25 |             print "Stopping ",n.public_dns_name+"."
26 |             n.stop()
27 | 
28 | if __name__ == "__main__":
29 |     main()
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/kmeans/awslist:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import optparse
 4 | 
 5 | import boto
 6 | import boto.ec2
 7 | 
 8 | def main():
 9 |     parser = optparse.OptionParser()
10 |     parser.add_option("-r", "--region", help="Region (default eu-west-1)", dest="region", default="eu-west-1")
11 |     (options, args) = parser.parse_args()
12 | 
13 |     for r in boto.ec2.regions():
14 |         if r.name == options.region:
15 |             region = r
16 |             break
17 |         else:
18 |             err("Region %s not found." % options.region)
19 | 
20 |     ec2 = boto.connect_ec2(region=region)
21 | 
22 |     reservation = ec2.get_all_instances()
23 |     for i in reservation:
24 |         for n in i.instances:
25 |             print "Found ",n.public_dns_name+"."
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/kmeans/kmeans:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | nmappers=2
 4 | nreducers=2
 5 | 
 6 | pids=""
 7 | host=`hostname`
 8 | 
 9 | args="-cfgRoundtripTimeout=2000000000 -cfgKnownHosts=$host +RTS -K20000000"
10 | 
11 | # ghc --make KMeans || exit 1
12 | 
13 | for i in $( seq 1 ${nmappers} )
14 | do
15 |   ./KMeans -cfgRole=MAPPER  $args &
16 |   pid=$!
17 |   pids="$pids $pid"
18 | done
19 | 
20 | for i in $( seq 1 ${nreducers} )
21 | do
22 |   ./KMeans -cfgRole=REDUCER  $args &
23 |   pid=$!
24 |   pids="$pids $pid"
25 | done
26 | 
27 | ./KMeans -cfgRole=MASTER $args
28 | 
29 | kill $pids
30 | 


--------------------------------------------------------------------------------
/examples/pi/Pi6.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell #-}
 2 | module Main where
 3 | 
 4 | import Remote
 5 | import PiCommon
 6 | 
 7 | worker :: Int -> Int -> ProcessId -> ProcessM ()
 8 | worker count offset master = 
 9 |    let (numin,numout) = countPairs offset count 
10 |     in do send master (numin,numout)
11 |           logS "PI" LoInformation ("Finished mapper from offset "++show offset)
12 | 
13 | $( remotable ['worker] )
14 | 
15 | initialProcess :: String -> ProcessM ()
16 | initialProcess "WORKER" =
17 |   receiveWait []
18 | 
19 | initialProcess "MASTER" = 
20 |   do { peers <- getPeers
21 |      ; mypid <- getSelfPid
22 |      ; let { workers = findPeerByRole peers "WORKER"
23 |      ;       interval = 1000000
24 |      ;       numberedworkers = (zip [0,interval..] workers) }
25 |      ; mapM_ (\ (offset,nid) -> spawn nid (worker__closure (interval-1) offset mypid)) numberedworkers
26 |      ; (x,y) <- receiveLoop (0,0) (length workers)
27 |      ; let est = estimatePi (fromIntegral x) (fromIntegral y)
28 |         in say ("Done: " ++ longdiv (fst est) (snd est) 20) }
29 |   where 
30 |     estimatePi ni no | ni + no == 0 = (0,0)
31 |                      | otherwise = (4 * ni , ni+no)
32 |     receiveLoop a 0 = return a
33 |     receiveLoop (numIn,numOut) n = 
34 |       let 
35 |         resultMatch = match (\ (x,y) -> return (x::Int,y::Int))
36 |       in do { (newin,newout) <- receiveWait [resultMatch]
37 |             ; let { x = numIn + newin
38 |             ;       y = numOut + newout }
39 |             ; receiveLoop (x,y) (n-1) }
40 | 
41 | initialProcess _ = error "Role must be WORKER or MASTER"
42 | 
43 | main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/examples/pi/Pi7.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell #-}
 2 | module Main where
 3 | 
 4 | import Remote
 5 | import PiCommon
 6 | import Data.List (foldl')
 7 | 
 8 | worker :: Int -> Int -> TaskM (Int,Int)
 9 | worker count offset = 
10 |    let (numin,numout) = countPairs offset count 
11 |     in do tlogS "PI" LoInformation ("Finished mapper from offset "++show offset)
12 |           return (numin,numout)
13 | 
14 | $( remotable ['worker] )
15 | 
16 | initialProcess :: String -> ProcessM ()
17 | initialProcess "WORKER" =
18 |   receiveWait []
19 | 
20 | initialProcess "MASTER" = 
21 |   let
22 |         interval = 1000000
23 |         numworkers = 5
24 |         numberedworkers = take numworkers [0,interval..]
25 |         clos = map (worker__closure interval) numberedworkers
26 |   in runTask $ 
27 |         do proms <- mapM newPromise clos
28 |            res <- mapM readPromise proms
29 |            let (sumx,sumy) = foldl' (\(a,b) (c,d) -> (a+c,b+d)) (0,0) res
30 |            let (num,den) = estimatePi (fromIntegral sumx) (fromIntegral sumy)
31 |            tsay ("Done: " ++ longdiv (num) (den) 20)
32 |   where 
33 |     estimatePi ni no | ni + no == 0 = (0,0)
34 |                      | otherwise = (4 * ni , ni+no)
35 | 
36 | initialProcess _ = error "Role must be WORKER or MASTER"
37 | 
38 | main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/examples/pi/PiCommon.hs:
--------------------------------------------------------------------------------
 1 | module PiCommon where
 2 | 
 3 | import Data.List
 4 | import Data.Array
 5 | 
 6 | type Number = Double
 7 | 
 8 | data Seq = Seq {k::Int, x::Number, base::Int, q::Array Int Number, tobreak::Bool}
 9 | 
10 | haltonSeq :: Int -> Int -> [Number]
11 | haltonSeq offset thebase = let
12 |                     digits = 64::Int
13 |                     seqSetup :: Seq -> Int -> Number -> (Seq, Number)
14 |                     seqSetup s j _ = 
15 |                                      let dj =  (k s `mod` base s)
16 |                                          news = s {k = (k s - dj) `div` fromIntegral (base s), x = x s + (fromIntegral dj * ((q s) ! (j+1)))}     
17 |                                       in
18 |                                         (news,fromIntegral dj)
19 |                     seqContinue :: Seq -> Int -> Number -> (Seq, Number)
20 |                     seqContinue s j dj = 
21 |                                      if tobreak s
22 |                                         then (s,dj)
23 |                                         else 
24 |                                                let newdj = dj+1
25 |                                                    newx = x s + (q s) ! (j+1)
26 |                                                in
27 |                                                if newdj < fromIntegral (base s)
28 |                                                   then (s {x=newx,tobreak=True},newdj)
29 |                                                   else (s {x = newx - if j==0 then 1 else (q s) ! j},0)
30 | 
31 |                     initialState base = let q = array (0,digits*2) [(i,v) | i <- [0..digits*2], let v = if i == 0 then 1 else ((q ! ((i)-1))/fromIntegral base)]
32 |                                         in Seq {k=fromIntegral offset,x=0,tobreak=False,base=base,q=q}
33 |                     theseq base = let
34 |                         first :: (Int,[Number],Seq)
35 |                         first = foldl' (\(n,li,s) _ -> let (news,r) = seqSetup s n 0
36 |                                                        in (n+1,r:li,news)) (0,[],initialState base) [0..digits]
37 |                         second :: [Number] -> Seq -> (Int,[Number],Seq)
38 |                         second d s = foldl' (\(n,li,s) dj -> let (news,r) = seqContinue s n dj
39 |                                                              in (n+1,r:li,news)) (0,[],s {tobreak=False}) d
40 |                         in let (_,firstd,firsts) = first
41 |                                therest1 :: [([Number],Seq)]
42 |                                therest1 = iterate (\(d,s) -> let (_,newd,news) = second (reverse d) s in (newd,news)) (firstd,firsts)
43 |                                therest :: [Number]
44 |                                therest = map (\(_,s) -> x s) therest1
45 |                            in therest
46 |                         in (theseq thebase)
47 | 
48 | haltonPairs :: Int -> [(Number,Number)]
49 | haltonPairs offset = 
50 |   zip (haltonSeq offset 2) (haltonSeq offset 3)
51 | 
52 | countPairs :: Int -> Int -> (Int,Int)
53 | countPairs offset count = 
54 |   let range = take count (haltonPairs offset)
55 |       numout = length (filter outCircle range)
56 |    in (count-numout,numout)
57 |   where
58 |      outCircle (x,y) = 
59 |           let fx=x-0.5 
60 |               fy=y-0.5
61 |            in fx*fx + fy*fy > 0.25
62 | 
63 | longdiv :: Integer -> Integer -> Integer -> String
64 | longdiv _ 0 _ = "<inf>"
65 | longdiv numer denom places = 
66 |   let attempt = numer `div` denom 
67 |    in if places==0
68 |          then "" 
69 |          else shows attempt (longdiv2 (numer - attempt*denom) denom (places -1))
70 |    where longdiv2 numer denom places | numer `rem` denom == 0 = "0"
71 |                                      | otherwise = longdiv (numer * 10) denom places
72 | 
73 | 


--------------------------------------------------------------------------------
/examples/tests/Test-Call.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
 2 | module Main where	
 3 | 
 4 | import Remote
 5 | 
 6 | import Control.Exception (throw)
 7 | import Data.Generics (Data)
 8 | import Data.Maybe (fromJust)
 9 | import Data.Typeable (Typeable)
10 | import Control.Monad (when,forever)
11 | import Control.Monad.Trans (liftIO)
12 | import Data.Char (isUpper)
13 | import Control.Concurrent (threadDelay)
14 | import Data.Binary
15 | import Control.Concurrent.MVar
16 | 
17 | sayHi :: ProcessId -> ProcessM ()
18 | sayHi s = do liftIO $ threadDelay 500000
19 |              say $ "Hi there, " ++ show s ++ "!"
20 | 
21 | add :: Int -> Int -> Int
22 | add a b = a + b
23 | 
24 | badFac :: Integer -> Integer
25 | badFac 0 = 1
26 | badFac 1 = 1
27 | badFac n = badFac (n-1) + badFac (n-2)
28 | 
29 | remotable ['sayHi, 'add, 'badFac]
30 | 
31 | 
32 | while :: (Monad m) => m Bool -> m ()
33 | while a = do f <- a
34 |              when (f)
35 |                 (while a >> return ())
36 |              return ()
37 | 
38 | initialProcess "MASTER" = do
39 |               mypid <- getSelfPid
40 |               mynode <- getSelfNode
41 |               peers <- getPeers
42 | 
43 |               let slaves = findPeerByRole peers "SLAVE"
44 |               case slaves of
45 |                 (somenode:_) -> 
46 |                     do say $ "Running badFac on " ++show somenode
47 |                        res <- callRemotePure somenode (badFac__closure 40)
48 |                        say $ "Got result: " ++ show res
49 |                 _ -> say "Couldn't find a SLAVE node to run program on; start a SLAVE, then the MASTER"
50 | 
51 | 
52 |               return ()
53 | initialProcess "SLAVE" = receiveWait []
54 | initialProcess _ = liftIO $ putStrLn "Please use parameter -cfgRole=MASTER or -cfgRole=SLAVE"
55 | 
56 | testSend = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
57 | 
58 | main = testSend
59 | 
60 | 


--------------------------------------------------------------------------------
/examples/tests/Test-Channel-Merge.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
 2 | module Main where	
 3 | 
 4 | -- This example demonstrates the difference between
 5 | -- the biased and round-robin methods for merging
 6 | -- channels. Run this program twice, once with
 7 | -- the parameter "biased" and once with "rr";
 8 | -- the order that the messages will be received
 9 | -- in will change, even though the order that they
10 | -- are sent in does not.
11 | 
12 | import Remote
13 | 
14 | channelCombiner args = case args of
15 |                          ["biased"] ->  combinePortsBiased
16 |                          ["rr"] -> combinePortsRR
17 |                          _ -> error "Please specify 'biased' or 'rr' on the command line"
18 | 
19 | initialProcess _ = do
20 |               mypid <- getSelfPid
21 |               args <- getCfgArgs
22 | 
23 |               (sendchan,recvchan) <- newChannel
24 |               (sendchan2,recvchan2) <- newChannel
25 | 
26 |               spawnLocal $ mapM_ (sendChannel sendchan) [1..(26::Int)]
27 |               spawnLocal $ mapM_ (sendChannel sendchan2) ['A'..'Z']
28 | 
29 |               merged <- (channelCombiner args) [combinedChannelAction recvchan show,combinedChannelAction recvchan2 show]
30 |               let go = do item <- receiveChannel merged
31 |                           say $ "Got: " ++ show item
32 |                           go
33 |               go
34 | 
35 | main = remoteInit (Just "config") [] initialProcess
36 | 
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/examples/tests/Test-Channel.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-}
 2 | module Main where
 3 | 
 4 | -- Simple example of putting data in a channel
 5 | -- and then taking it out.	
 6 | 
 7 | import Remote
 8 | 
 9 | import Data.Binary (Binary,get,put)
10 | import Data.Char (isUpper)
11 | import Data.Generics (Data)
12 | import Data.Typeable (Typeable)
13 | import Prelude hiding (catch)
14 | import Data.Typeable (typeOf)
15 | import Control.Monad.Trans
16 | import Control.Exception
17 | import Control.Monad
18 | import Data.Maybe (fromJust)
19 | import Control.Concurrent
20 | 
21 | initialProcess "NODE" = do
22 | 
23 |               (sendchan,recvchan) <- newChannel
24 | 
25 |               a <- spawnLocal $ do
26 |                               sendChannel sendchan "hi"
27 |                               sendChannel sendchan "lumpy"
28 |                               liftIO $ threadDelay 1000000
29 |                               sendChannel sendchan "spatula"
30 |                               sendChannel sendchan "noodle"
31 |                               mapM_ (sendChannel sendchan) (map show [1..1000])
32 |               liftIO $ threadDelay 500000
33 |               receiveChannel recvchan >>= liftIO . print
34 |               receiveChannel recvchan >>= liftIO . print
35 |               receiveChannel recvchan >>= liftIO . print
36 |               receiveChannel recvchan >>= liftIO . print
37 |               receiveChannel recvchan >>= liftIO . print
38 |               receiveChannel recvchan >>= liftIO . print
39 |               receiveChannel recvchan >>= liftIO . print
40 |               receiveChannel recvchan >>= liftIO . print
41 |               receiveChannel recvchan >>= liftIO . print
42 |               receiveChannel recvchan >>= liftIO . print
43 | 
44 | 
45 | 
46 | main = remoteInit (Just "config") [] initialProcess
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/examples/tests/Test-Closure.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TemplateHaskell #-}
  2 | module Main where	
  3 | 
  4 | -- This file contains some examples of closures, which are the
  5 | -- way to express a function invocation in Cloud Haskell. You
  6 | -- need to use closures to run code on a remote system, which is,
  7 | -- after all, the whole point. See the documentation on the function
  8 | -- 'remotable' for more details. Understanding this example is
  9 | -- essential to effectively using Cloud Haskell.
 10 | 
 11 | import Remote
 12 | import Remote.Call (mkClosure)
 13 | 
 14 | import Data.List (sort)
 15 | 
 16 | -- Step #1: Define the functions that we would
 17 | -- like to call remotely. These are just regular
 18 | -- functions.
 19 |            
 20 | sayHi :: String -> ProcessM ()
 21 | sayHi s = say $ "Greetings, " ++ s
 22 | 
 23 | sayHiPM :: String -> ProcessM String
 24 | sayHiPM s = say ("Hello, "++s) >> 
 25 |                return (sort s)
 26 | 
 27 | sayHiIO :: String -> IO String
 28 | sayHiIO s = putStrLn ("Hello, " ++ s) >>
 29 |                 return (reverse s)
 30 | 
 31 | sayHiPure :: String -> String
 32 | sayHiPure s = "Hello, " ++ s
 33 | 
 34 | simpleSum :: Int -> Int -> Int
 35 | simpleSum a b = a + b + 1
 36 | 
 37 | -- You can also manually call a closure using invokeClosure.
 38 | -- This is what functions like spawn and callRemote do
 39 | -- internally.
 40 | runAnother :: Closure Int -> ProcessM String
 41 | runAnother c = do mi <- invokeClosure c
 42 |                   case mi of
 43 |                     Just i -> return $ concat $ replicate i "Starscream"
 44 |                     Nothing -> return "No good"
 45 | 
 46 | -- This a partial closure: some parameters are provided by the caller,
 47 | -- and some are provided after the closure is invoked. You have to
 48 | -- write the function in a funny way to make this work automatically,
 49 | -- but otherwise it's pretty straightforward.
 50 | funnyHi :: String -> ProcessM (Int -> ProcessM ())
 51 | funnyHi s = return $ \i -> say ("Hello, " ++ (concat $ replicate i (reverse s)))
 52 | 
 53 | -- Step #2: Automagically generate closures
 54 | -- for these functions using remotable. For each
 55 | -- given function n, remotable will create a
 56 | -- closure for that function named n__closure.
 57 | -- You can then use that closure with spawn, remoteCall,
 58 | -- and invokeClosure. See examples below.
 59 | 
 60 | remotable ['sayHi, 'sayHiIO,'sayHiPure, 'sayHiPM, 'funnyHi, 'runAnother, 'simpleSum]
 61 | 
 62 | initialProcess _ = do
 63 |               mynid <- getSelfNode
 64 | 
 65 |               -- spawn and callRemote (and their variants) run
 66 |               -- a function on a given node. We indicate which
 67 |               -- node by giving a node ID (to keep it simple
 68 |               -- we do everything on one node in this
 69 |               -- example), and we indicate which function to run
 70 |               -- by providing its closure.
 71 | 
 72 |               -- A simple spawn. Does not block, and the result
 73 |               -- we get back is the PID of the new process.
 74 |               p <- spawn mynid (sayHi__closure "Zoltan")
 75 |               say $ "Got result " ++ show p
 76 | 
 77 |               -- callRemote is like a synchronous version of spawn.
 78 |               -- It will block until the function ends, and returns
 79 |               -- its result.
 80 |               v <- callRemote mynid ( sayHiPM__closure "Jaroslav")
 81 |               say $ "Got result " ++ v
 82 | 
 83 |               -- We need a different function to call closures in the
 84 |               -- IO monad. Also, instead of using the "something__closure"
 85 |               -- syntax, you can call the Template Haskell mkClosure
 86 |               -- function, which expands to the same thing.
 87 |               w <- callRemoteIO mynid ( $(mkClosure 'sayHiIO) "Noodle")
 88 |               say $ "Got result " ++ show w
 89 | 
 90 |               -- Yet another version of callRemote for nonmonadic functions.
 91 |               q <- callRemotePure mynid (sayHiPure__closure "Spatula")
 92 |               say $ "Got result " ++ show q
 93 | 
 94 |               -- We can even give closures to closures. They can in turn run
 95 |               -- them indirectly (with spawn or callRemote) or directly
 96 |               -- (with invokeClosure).
 97 |               x <- callRemote mynid (runAnother__closure (simpleSum__closure 1 1))
 98 |               say $ "Got result " ++ x
 99 | 
100 |               -- This function takes some parameters after closure invocation.
101 |               -- So the value we get back from invokeClosure is actually
102 |               -- a partially evaluated function.
103 |               mfunnyFun <- invokeClosure (funnyHi__closure "Antwerp")
104 |               case mfunnyFun of
105 |                 Just funnyFun -> funnyFun 3
106 |                 Nothing -> say "No good"
107 | 
108 |               return ()
109 | 
110 | main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
111 | 
112 | 


--------------------------------------------------------------------------------
/examples/tests/Test-MapReduce.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell #-}
 2 | module Main where
 3 | 
 4 | -- A simple word frequency counter using the task layer's mapreduce.
 5 | 
 6 | import Remote
 7 | 
 8 | import Control.Monad.Trans
 9 | import Control.Monad
10 | 
11 | import Debug.Trace
12 | 
13 | type Line = String
14 | type Word = String
15 | 
16 | mrMapper :: [Line] -> TaskM [(Word, Int)]
17 | mrMapper lines = 
18 |       return (concatMap (\line -> map (\w -> (w,1)) (words line)) lines)
19 | 
20 | mrReducer :: (Word,[Int]) -> TaskM (Word,Int)
21 | mrReducer (w,p) = 
22 |     return (w,sum p)
23 | 
24 | $( remotable ['mrMapper,  'mrReducer] )
25 | 
26 | myMapReduce = MapReduce 
27 |                {
28 |                  mtMapper = mrMapper__closure,
29 |                  mtReducer = mrReducer__closure,
30 |                  mtChunkify = chunkify 5,
31 |                  mtShuffle = shuffle
32 |                }
33 | 
34 | initialProcess "MASTER" = 
35 |                    do args <- getCfgArgs
36 |                       case args of
37 |                         [filename] -> 
38 |                           do file <- liftIO $ readFile filename
39 |                              ans <- runTask $
40 |                                 do mapReduce myMapReduce (lines file)
41 |                              say $ show ans
42 |                         _ -> say "When starting MASTER, please also provide a filename on the command line"
43 | initialProcess "WORKER" = receiveWait [] 
44 | initialProcess _ = say "You need to start this program as either a MASTER or a WORKER. Set the appropiate value of cfgRole on the command line or in the config file."
45 | 
46 | main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
47 | 
48 | 


--------------------------------------------------------------------------------
/examples/tests/Test-Message.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveDataTypeable,TemplateHaskell #-}
 2 | module Main where
 3 | 
 4 | -- A convoluted example showing some different ways
 5 | -- messages can be passed between processes.
 6 | 
 7 | import Remote
 8 | 
 9 | import Data.Typeable (Typeable)
10 | import Data.Data (Data)
11 | import Data.Binary (Binary,get,put)
12 | import System.Random (randomR,getStdRandom)
13 | import Control.Concurrent (threadDelay)
14 | import Control.Monad (when)
15 | import Control.Monad.Trans (liftIO)
16 | 
17 | data Chunk = Chunk Int deriving (Typeable,Data)
18 | 
19 | instance Binary Chunk where
20 |     get = genericGet
21 |     put = genericPut
22 | 
23 | slaveWorker :: ProcessM ()
24 | slaveWorker = 
25 |    receiveWait [match(\sndport -> mapM (sendrand sndport) [0..50] >> sendChannel sndport (Chunk 0))]
26 |   where sendrand sndport _ = 
27 |           do newval <- liftIO $ getStdRandom (randomR (1,100))
28 |              sendChannel sndport (Chunk newval)
29 | 
30 | $( remotable ['slaveWorker] )
31 | 
32 | testNode :: ProcessId -> ProcessM ()
33 | testNode pid =
34 |   let getvals mainpid rcv =
35 |         do (Chunk val) <- receiveChannel rcv
36 |            send mainpid val
37 |            when (val /= 0)
38 |               (getvals mainpid rcv)
39 |    in
40 |     do (sendport, receiveport) <- newChannel 
41 |        self <- getSelfPid
42 |        send pid sendport
43 |        spawnLocal $ getvals self receiveport
44 |        return ()
45 |                       
46 | 
47 | initialProcess "MASTER" =
48 |      do peers <- getPeers
49 |         let slaves = findPeerByRole peers "SLAVE"
50 | 
51 |         slavepids <- mapM (\slave -> spawn slave slaveWorker__closure) slaves
52 |         mapM_ testNode slavepids
53 | 
54 |         let getsome = 
55 |              do res <- receiveWait [match(\i -> say ("Got " ++ show (i::Int)) >> return (i/=0))]
56 |                 when res getsome
57 |         mapM_ (const getsome) slaves
58 | 
59 | initialProcess "SLAVE" = receiveWait []
60 | 
61 | main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
62 | 
63 | 


--------------------------------------------------------------------------------
/examples/tests/Test-Task.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE TemplateHaskell #-}
 2 | module Main where
 3 | 
 4 | -- A demonstration of the data dependency resolution 
 5 | -- of the Task layer. Each newPromise spawns a task
 6 | -- that calculates a value. Calls to adder and diver
 7 | -- take integers, but calls to merger take Promises,
 8 | -- which are an expression of the value computed
 9 | -- or yet-to-be-computed by another task. The
10 | -- call to readPromise will retrieve that value
11 | -- or wait until it's available.
12 | 
13 | -- You can run this program on one node, or
14 | -- on several; the task layer will automatically
15 | -- allocate tasks to whatever nodes it can talk to.
16 | -- Unlike the Process layer, the programmer doesn't
17 | -- need to specify the location.
18 | 
19 | import Remote
20 | 
21 | adder :: Int -> Int -> TaskM Int
22 | adder a b = do return $ a + b
23 | 
24 | diver :: Int -> Int -> TaskM Int
25 | diver a b = do return $ a `div` b
26 | 
27 | merger :: Promise Int -> Promise Int -> TaskM Int
28 | merger a b = do a1 <- readPromise a
29 |                 b1 <- readPromise b
30 |                 return $ a1+b1
31 | 
32 | 
33 | $( remotable ['diver,'adder, 'merger] )
34 | 
35 | initialProcess "MASTER" = 
36 |                    do ans <- runTask $
37 |                           do 
38 |                              a <- newPromise (diver__closure 300 9)
39 |                              b <- newPromise (diver__closure 300 5)
40 |                              c <- newPromise (diver__closure 300 1)
41 |                              d <- newPromise (adder__closure 300 9)
42 |                              e <- newPromise (adder__closure 300 5)
43 |                              f <- newPromise (adder__closure 300 1)
44 |                              g <- newPromise (merger__closure a b)
45 |                              h <- newPromise (merger__closure g c)
46 |                              i <- newPromise (merger__closure c d)
47 |                              j <- newPromise (merger__closure e f)
48 |                              k <- newPromise (merger__closure i j)
49 |                              l <- newPromise (merger__closure k g)
50 |                              readPromise l
51 |                       say $ show ans
52 | initialProcess "WORKER" = receiveWait [] 
53 | 
54 | main = remoteInit (Just "config") [Main.__remoteCallMetaData] initialProcess
55 | 
56 | 


--------------------------------------------------------------------------------
/examples/tests/config:
--------------------------------------------------------------------------------
  1 | # config
  2 | # Cloud Haskell config file.
  3 | # Most Cloud Haskell applications will look for this file
  4 | # in the current directory, although it's ultimately up
  5 | # to the programmer which filename to use or if to
  6 | # use a config file at all. You may override
  7 | # the default location with the RH_CONFIG
  8 | # environment variable.
  9 | # All the options given in this file may
 10 | # also be given on the command line of a
 11 | # Cloud Haskell application. Options on the
 12 | # command line override options in a config
 13 | # file. For example, to set the role of an
 14 | # application, use the following command
 15 | # line:
 16 | #      ./MyApp -cfgRole=MASTER
 17 | #
 18 | #   This is a reasonable default config file.
 19 | 
 20 | # The role of a node determines its initial behavior.
 21 | # Typical roles including MASTER or WORKER. Typically,
 22 | # when starting an application, this value must be
 23 | # specified, either here or on the command line.
 24 | # Default: NODE
 25 | cfgRole NODE
 26 | 
 27 | # If specified, this option will override the
 28 | # default assignment of host name, which forms
 29 | # port of all node IDs and process IDs originating
 30 | # on this node.
 31 | ### cfgHostName somehostname
 32 | 
 33 | # If nonzero, will force the node to listen for
 34 | # new incoming connections on a specific port.
 35 | # Otherwise, the port is assigned by the OS.
 36 | # Default: 0
 37 | cfgListenPort 0
 38 | 
 39 | # This is the UDP port on which local network
 40 | # broadcasts are sent to discover peers.
 41 | # If 0, no dynamic peer discovery is performed.
 42 | # This value must be the same for all nodes
 43 | # that want to participate in dynamic discovery
 44 | # with each other.
 45 | # Default: 38813
 46 | cfgPeerDiscoveryPort 38813
 47 | 
 48 | # A unique token that nodes exchange when they
 49 | # communicate. The intent is to prevent different
 50 | # applications on the same network from mixing
 51 | # up their nodes. This value must be the same
 52 | # for all nodes that want to communicate with each
 53 | # other.
 54 | # Default: MAGIC
 55 | cfgNetworkMagic MAGIC
 56 | 
 57 | # Specifies the port where the node registration
 58 | # server will bind. This value must be the same
 59 | # for all nodes that want to communicate with each
 60 | # other.
 61 | # Default: 38813
 62 | cfgLocalRegistryListenPort 38813
 63 | 
 64 | # A list of hosts where nodes will be searched for.
 65 | # All hosts that you wish to communicate with should
 66 | # be included in this list. By host name, we mean
 67 | # the DNS name or IP address of a computer accessible
 68 | # over the network interface where a node registry
 69 | # may be running. Hostnames are separate by spaces.
 70 | ### cfgKnownHosts somehostname anotherhostname thirdhostname
 71 | 
 72 | # The maximum delay, in microseconds, to tolerate
 73 | # delays from service services. In an extremely congested
 74 | # network atmosphere, you may want to increase this.
 75 | # If 0, applications waiting for dead services
 76 | # may wait indefinitely.
 77 | # Default: 10000000
 78 | cfgRoundtripTimeout 10000000
 79 | 
 80 | # The maximum number of concurrent outgoing TCP
 81 | # connections (for sending messages) from this node.
 82 | # If outgoing message density is higher than this
 83 | # number, some processes will block.
 84 | # Default: 50
 85 | cfgMaxOutgoing 50
 86 | 
 87 | # Relates to the task layer only. The amount of
 88 | # time, in microseconds, to wait for an unused
 89 | # promise to flush to a disk file. Flushing is good,
 90 | # because it frees up memory for other data, but
 91 | # if this value is too low, heavy disk access
 92 | # may slow down your program. If 0, promises
 93 | # are never flush to disk.
 94 | # Default: 5000000
 95 | cfgPromiseFlushDelay 5000000
 96 | 
 97 | # Relates to the task layer only. The prefix
 98 | # of the files to be written when they
 99 | # are flushed. May contain a full path.
100 | # Default: rpromise-
101 | cfgPromisePrefix rpromise-
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/remote.cabal:
--------------------------------------------------------------------------------
 1 | Name:                remote
 2 | Version:             0.1.1
 3 | Cabal-Version:       >=1.8
 4 | Description:         Fault-tolerant distributed computing framework
 5 | synopsis:            Cloud Haskell
 6 | License:             BSD3
 7 | License-file:        LICENSE
 8 | Extra-Source-Files:  README.md
 9 | Author:              Jeff Epstein <jepst79@gmail.com>
10 | Maintainer:          Jeff Epstein <jepst79@gmail.com>
11 | Build-Type:          Simple
12 | tested-with:         GHC ==6.12.1
13 | Category:            Distributed Computing
14 | 
15 | extra-source-files:
16 |     examples/kmeans/KMeans.hs
17 |     examples/kmeans/KMeans3.hs
18 |     examples/kmeans/KMeansCommon.hs
19 |     examples/kmeans/MakeData.hs
20 |     examples/kmeans/awsgo
21 |     examples/kmeans/awskill
22 |     examples/kmeans/awslist
23 |     examples/kmeans/kmeans
24 |     examples/pi/Pi6.hs
25 |     examples/pi/Pi7.hs
26 |     examples/pi/PiCommon.hs
27 |     examples/tests/Test-Call.hs
28 |     examples/tests/Test-Channel-Merge.hs
29 |     examples/tests/Test-Channel.hs
30 |     examples/tests/Test-Closure.hs
31 |     examples/tests/Test-MapReduce.hs
32 |     examples/tests/Test-Message.hs
33 |     examples/tests/Test-Task.hs
34 |     examples/tests/config
35 | 
36 | source-repository head
37 |   Type:     git
38 |   Location: git://github.com/jepst/CloudHaskell.git
39 | 
40 | library
41 |   Build-Depends:       base >= 4 && < 5, time, filepath, containers, network, syb, mtl, binary, bytestring, template-haskell, stm, pureMD5, utf8-string, directory
42 |   ghc-options:         -Wall
43 |   Extensions:          TemplateHaskell, FlexibleInstances, UndecidableInstances, CPP, ExistentialQuantification, DeriveDataTypeable
44 |   Exposed-Modules:     Remote.Process, Remote.Encoding, Remote.Call, Remote.Reg, Remote.Peer, Remote.Init, Remote.Closure, Remote.Channel, Remote.Task, Remote
45 | 
46 | 


--------------------------------------------------------------------------------
/util/Diag.hs:
--------------------------------------------------------------------------------
 1 | module Main (main) where
 2 | 
 3 | -- This is a diagnostic program that will
 4 | -- give you feedback about what it sees
 5 | -- as your current configuration of Cloud Haskell.
 6 | -- If your program isn't able to talk to remote
 7 | -- nodes, run this program to check your configuration.
 8 | 
 9 | import Remote (remoteInit, getPeers, getSelfNode, ProcessM)
10 | import Remote.Process (hostFromNid,getConfig,cfgNetworkMagic,cfgKnownHosts,cfgPeerDiscoveryPort)
11 | 
12 | import qualified Data.Map as Map (elems)
13 | import Control.Monad.Trans (liftIO)
14 | import Data.List (intercalate,nub)
15 | 
16 | s :: String -> ProcessM ()
17 | s = liftIO . putStrLn
18 | 
19 | orNone :: [String] -> String
20 | orNone [] = "None"
21 | orNone a = intercalate "," a
22 | 
23 | initialProcess myRole =
24 |    do s "Cloud Haskell diagnostics\n"
25 |       mynid <- getSelfNode
26 |       peers <- getPeers
27 |       cfg <- getConfig
28 |       s $ "I seem to be running on host \""++hostFromNid mynid++"\".\nIf that's wrong, set it using the cfgHostName option.\n"
29 |       s $ "My role is \""++myRole++"\".\nIf that's wrong, set it using the cfgRole option.\n"
30 |       s $ "My magic is \""++cfgNetworkMagic cfg++"\".\nIf that's wrong, set it using the cfgNetworkMagic option.\n"
31 |       s $ "I will look for nodes on the following hosts: " ++ orNone (cfgKnownHosts cfg)
32 |       s $ if cfgPeerDiscoveryPort cfg > 0 
33 |            then "I will also look for nodes on the local network." 
34 |            else "I will not look for nodes on the local network other than those named above."
35 |       let hosts = orNone $ nub $ map (hostFromNid) (concat $ Map.elems peers)
36 |       s $ "I have found nodes on the following hosts: "++hosts++".\nIf I'm not finding all the nodes you expected, make sure they:"
37 |       s $ "\tare running\n\tare not behind a firewall\n\thave the same magic\n\tare listed in cfgKnownHosts"
38 | 
39 | main = remoteInit (Just "config") [] initialProcess
40 | 
41 | 


--------------------------------------------------------------------------------
/util/RegServ.hs:
--------------------------------------------------------------------------------
1 | module Main (main) where
2 | 
3 | -- This is the standalone node registry service.
4 | 
5 | import Remote.Process (standaloneLocalRegistry)
6 | 
7 | main = standaloneLocalRegistry "config"
8 | 


--------------------------------------------------------------------------------