├── Setup.hs ├── .gitignore ├── CONTRIBUTING.md ├── REPOS ├── src ├── Common │ └── PrimeFactors.hs ├── WorkPushing │ ├── SimpleLocalnet.hs │ ├── WorkPushing.hs │ └── Azure.hs ├── WorkStealing │ ├── SimpleLocalnet.hs │ └── WorkStealing.hs ├── TypedWorkPushing │ ├── SimpleLocalnet.hs │ ├── TypedWorkPushing.hs │ └── Azure.hs ├── MasterSlave │ ├── SimpleLocalnet.hs │ ├── Azure.hs │ └── MasterSlave.hs └── MapReduce │ ├── CountWords.hs │ ├── MapReduce.hs │ ├── MonoDistrMapReduce.hs │ ├── SimpleLocalnet.hs │ ├── KMeans.hs │ └── PolyDistrMapReduce.hs ├── README.md ├── LICENSE └── distributed-process-demos.cabal /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | .cabal-sandbox 3 | cabal.sandbox.config 4 | .stack* 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | See https://github.com/haskell-distributed/cloud-haskell/blob/master/CONTRIBUTING.md. 2 | -------------------------------------------------------------------------------- /REPOS: -------------------------------------------------------------------------------- 1 | azure-service-api 2 | distributed-process-azure 3 | 4 | distributed-process-simplelocalnet 5 | distributed-static 6 | network-transport 7 | network-transport-composed 8 | network-transport-inmemory 9 | network-transport-tcp 10 | network-transport-tests 11 | rank1dynamic 12 | -------------------------------------------------------------------------------- /src/Common/PrimeFactors.hs: -------------------------------------------------------------------------------- 1 | -- | Prime factorization 2 | -- 3 | -- Written by Dan Weston 4 | -- 5 | module PrimeFactors where 6 | 7 | primes :: [Integer] 8 | primes = primes' (2:[3,5..]) 9 | where 10 | primes' (x:xs) = x : primes' (filter (notDivisorOf x) xs) 11 | notDivisorOf d n = n `mod` d /= 0 12 | 13 | factors :: [Integer] -> Integer -> [Integer] 14 | factors qs@(p:ps) n 15 | | n <= 1 = [] 16 | | m == 0 = p : factors qs d 17 | | otherwise = factors ps n 18 | where 19 | (d,m) = n `divMod` p 20 | 21 | primeFactors :: Integer -> [Integer] 22 | primeFactors = factors primes 23 | 24 | numPrimeFactors :: Integer -> Integer 25 | numPrimeFactors = fromIntegral . length . primeFactors 26 | -------------------------------------------------------------------------------- /src/WorkPushing/SimpleLocalnet.hs: -------------------------------------------------------------------------------- 1 | import System.Environment (getArgs) 2 | import Control.Distributed.Process 3 | import Control.Distributed.Process.Node (initRemoteTable) 4 | import Control.Distributed.Process.Backend.SimpleLocalnet 5 | import qualified WorkPushing 6 | 7 | rtable :: RemoteTable 8 | rtable = WorkPushing.__remoteTable initRemoteTable 9 | 10 | main :: IO () 11 | main = do 12 | args <- getArgs 13 | 14 | case args of 15 | ["master", host, port, n] -> do 16 | backend <- initializeBackend host port rtable 17 | startMaster backend $ \slaves -> do 18 | result <- WorkPushing.master (read n) slaves 19 | liftIO $ print result 20 | ["slave", host, port] -> do 21 | backend <- initializeBackend host port rtable 22 | startSlave backend 23 | -------------------------------------------------------------------------------- /src/WorkStealing/SimpleLocalnet.hs: -------------------------------------------------------------------------------- 1 | import System.Environment (getArgs) 2 | import Control.Distributed.Process 3 | import Control.Distributed.Process.Node (initRemoteTable) 4 | import Control.Distributed.Process.Backend.SimpleLocalnet 5 | import qualified WorkStealing 6 | 7 | rtable :: RemoteTable 8 | rtable = WorkStealing.__remoteTable initRemoteTable 9 | 10 | main :: IO () 11 | main = do 12 | args <- getArgs 13 | 14 | case args of 15 | ["master", host, port, n] -> do 16 | backend <- initializeBackend host port rtable 17 | startMaster backend $ \slaves -> do 18 | result <- WorkStealing.master (read n) slaves 19 | liftIO $ print result 20 | ["slave", host, port] -> do 21 | backend <- initializeBackend host port rtable 22 | startSlave backend 23 | -------------------------------------------------------------------------------- /src/TypedWorkPushing/SimpleLocalnet.hs: -------------------------------------------------------------------------------- 1 | import System.Environment (getArgs) 2 | import Control.Distributed.Process 3 | import Control.Distributed.Process.Node (initRemoteTable) 4 | import Control.Distributed.Process.Backend.SimpleLocalnet 5 | import qualified TypedWorkPushing 6 | 7 | rtable :: RemoteTable 8 | rtable = TypedWorkPushing.__remoteTable initRemoteTable 9 | 10 | main :: IO () 11 | main = do 12 | args <- getArgs 13 | 14 | case args of 15 | ["master", host, port, n] -> do 16 | backend <- initializeBackend host port rtable 17 | startMaster backend $ \slaves -> do 18 | result <- TypedWorkPushing.master (read n) slaves 19 | liftIO $ print result 20 | ["slave", host, port] -> do 21 | backend <- initializeBackend host port rtable 22 | startSlave backend 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # distributed-process-demos 2 | [![travis](https://secure.travis-ci.org/haskell-distributed/distributed-process-demos.png)](http://travis-ci.org/haskell-distributed/distributed-process-demos) 3 | [![Release](https://img.shields.io/hackage/v/distributed-process-demos.svg)](https://hackage.haskell.org/package/distributed-process-demos) 4 | 5 | This repository is part of Cloud Haskell. 6 | 7 | See http://haskell-distributed.github.com for documentation, user guides, 8 | tutorials and assistance. 9 | 10 | ## Getting Help / Raising Issues 11 | 12 | Please visit the [bug tracker](https://github.com/haskell-distributed/distributed-process-demos/issues) to submit issues. You can contact the distributed-haskell@googlegroups.com mailing list for help and comments. 13 | 14 | ## License 15 | 16 | This package is made available under a 3-clause BSD-style license. 17 | -------------------------------------------------------------------------------- /src/MasterSlave/SimpleLocalnet.hs: -------------------------------------------------------------------------------- 1 | import System.Environment (getArgs) 2 | import Control.Exception (evaluate) 3 | import Control.Distributed.Process 4 | import Control.Distributed.Process.Node (initRemoteTable) 5 | import Control.Distributed.Process.Backend.SimpleLocalnet 6 | import qualified MasterSlave 7 | 8 | rtable :: RemoteTable 9 | rtable = MasterSlave.__remoteTable initRemoteTable 10 | 11 | main :: IO () 12 | main = do 13 | args <- getArgs 14 | 15 | case args of 16 | ["master", host, port, strN, strSpawnStrategy] -> do 17 | backend <- initializeBackend host port rtable 18 | n <- evaluate $ read strN 19 | spawnStrategy <- evaluate $ read strSpawnStrategy 20 | startMaster backend $ \slaves -> do 21 | result <- MasterSlave.master n spawnStrategy slaves 22 | liftIO $ print result 23 | ["slave", host, port] -> do 24 | backend <- initializeBackend host port rtable 25 | startSlave backend 26 | -------------------------------------------------------------------------------- /src/MapReduce/CountWords.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TupleSections #-} 2 | module CountWords 3 | ( Document 4 | , localCountWords 5 | , distrCountWords 6 | , __remoteTable 7 | ) where 8 | 9 | import Control.Distributed.Process 10 | import Control.Distributed.Process.Closure 11 | import MapReduce 12 | import MonoDistrMapReduce hiding (__remoteTable) 13 | import Prelude hiding (Word) 14 | 15 | type Document = String 16 | type Word = String 17 | type Frequency = Int 18 | 19 | countWords :: MapReduce FilePath Document Word Frequency Frequency 20 | countWords = MapReduce { 21 | mrMap = const (map (, 1) . words) 22 | , mrReduce = const sum 23 | } 24 | 25 | localCountWords :: Map FilePath Document -> Map Word Frequency 26 | localCountWords = localMapReduce countWords 27 | 28 | countWords_ :: () -> MapReduce FilePath Document Word Frequency Frequency 29 | countWords_ () = countWords 30 | 31 | remotable ['countWords_] 32 | 33 | distrCountWords :: [NodeId] -> Map FilePath Document -> Process (Map Word Frequency) 34 | distrCountWords = distrMapReduce ($(mkClosure 'countWords_) ()) 35 | -------------------------------------------------------------------------------- /src/WorkPushing/WorkPushing.hs: -------------------------------------------------------------------------------- 1 | module WorkPushing where 2 | 3 | import Control.Monad 4 | import Control.Distributed.Process 5 | import Control.Distributed.Process.Closure 6 | import PrimeFactors 7 | 8 | slave :: ProcessId -> Process () 9 | slave them = forever $ do 10 | n <- expect 11 | send them (numPrimeFactors n) 12 | 13 | remotable ['slave] 14 | 15 | -- | Wait for n integers and sum them all up 16 | sumIntegers :: Int -> Process Integer 17 | sumIntegers = go 0 18 | where 19 | go :: Integer -> Int -> Process Integer 20 | go !acc 0 = return acc 21 | go !acc n = do 22 | m <- expect 23 | go (acc + m) (n - 1) 24 | 25 | master :: Integer -> [NodeId] -> Process Integer 26 | master n slaves = do 27 | us <- getSelfPid 28 | 29 | -- Start slave processes 30 | slaveProcesses <- forM slaves $ \nid -> spawn nid ($(mkClosure 'slave) us) 31 | 32 | -- Distribute 1 .. n amongst the slave processes 33 | spawnLocal $ forM_ (zip [1 .. n] (cycle slaveProcesses)) $ 34 | \(m, them) -> send them m 35 | 36 | -- Wait for the result 37 | sumIntegers (fromIntegral n) 38 | -------------------------------------------------------------------------------- /src/TypedWorkPushing/TypedWorkPushing.hs: -------------------------------------------------------------------------------- 1 | module TypedWorkPushing where 2 | 3 | import Control.Monad 4 | import Control.Distributed.Process 5 | import Control.Distributed.Process.Closure 6 | import PrimeFactors 7 | 8 | slave :: SendPort Integer -> ReceivePort Integer -> Process () 9 | slave results todo = forever $ do 10 | n <- receiveChan todo 11 | sendChan results (numPrimeFactors n) 12 | 13 | sdictInteger :: SerializableDict Integer 14 | sdictInteger = SerializableDict 15 | 16 | remotable ['slave, 'sdictInteger] 17 | 18 | -- | Wait for n integers and sum them all up 19 | sumIntegers :: ReceivePort Integer -> Int -> Process Integer 20 | sumIntegers rport = go 0 21 | where 22 | go :: Integer -> Int -> Process Integer 23 | go !acc 0 = return acc 24 | go !acc n = do 25 | m <- receiveChan rport 26 | go (acc + m) (n - 1) 27 | 28 | master :: Integer -> [NodeId] -> Process Integer 29 | master n slaves = do 30 | (sport, rport) <- newChan 31 | 32 | -- Start slave processes 33 | slaveProcesses <- forM slaves $ \nid -> 34 | spawnChannel $(mkStatic 'sdictInteger) nid ($(mkClosure 'slave) sport) 35 | 36 | -- Distribute 1 .. n amongst the slave processes 37 | spawnLocal $ forM_ (zip [1 .. n] (cycle slaveProcesses)) $ 38 | \(m, them) -> sendChan them m 39 | 40 | -- Wait for the result 41 | sumIntegers rport (fromIntegral n) 42 | -------------------------------------------------------------------------------- /src/MasterSlave/Azure.hs: -------------------------------------------------------------------------------- 1 | import System.Environment (getArgs) 2 | import Control.Monad 3 | import Control.Distributed.Process 4 | import Control.Distributed.Process.Closure 5 | import Control.Distributed.Process.Backend.Azure 6 | import qualified MasterSlave 7 | 8 | azureMaster :: ([NodeId], Integer) -> Backend -> Process () 9 | azureMaster (slaves, n) _backend = do 10 | result <- MasterSlave.master n slaves 11 | mapM_ terminateNode slaves 12 | remoteSend result 13 | 14 | remotable ['azureMaster] 15 | 16 | printResult :: LocalProcess () 17 | printResult = do 18 | result <- localExpect :: LocalProcess Integer 19 | liftIO $ print result 20 | 21 | main :: IO () 22 | main = do 23 | args <- getArgs 24 | case args of 25 | "onvm":args' -> onVmMain (__remoteTable . MasterSlave.__remoteTable) args' 26 | [sid, x509, pkey, user, cloudService, n] -> do 27 | params <- defaultAzureParameters sid x509 pkey 28 | let params' = params { azureSshUserName = user } 29 | backend <- initializeBackend params' cloudService 30 | vms <- findVMs backend 31 | print vms 32 | nids <- forM vms $ \vm -> spawnNodeOnVM backend vm "8080" 33 | callOnVM backend (head vms) "8081" $ 34 | ProcessPair ($(mkClosure 'azureMaster) (nids, read n :: Integer)) 35 | printResult 36 | _ -> 37 | error "Invalid command line arguments" 38 | -------------------------------------------------------------------------------- /src/WorkPushing/Azure.hs: -------------------------------------------------------------------------------- 1 | import System.Environment (getArgs) 2 | import Control.Monad 3 | import Control.Distributed.Process 4 | import Control.Distributed.Process.Closure 5 | import Control.Distributed.Process.Backend.Azure 6 | import qualified MasterSlave 7 | 8 | azureMaster :: ([NodeId], Integer) -> Backend -> Process () 9 | azureMaster (slaves, n) _backend = do 10 | result <- MasterSlave.master n slaves 11 | mapM_ terminateNode slaves 12 | remoteSend result 13 | 14 | remotable ['azureMaster] 15 | 16 | printResult :: LocalProcess () 17 | printResult = do 18 | result <- localExpect :: LocalProcess Integer 19 | liftIO $ print result 20 | 21 | main :: IO () 22 | main = do 23 | args <- getArgs 24 | case args of 25 | "onvm":args' -> onVmMain (__remoteTable . MasterSlave.__remoteTable) args' 26 | [sid, x509, pkey, user, cloudService, n] -> do 27 | params <- defaultAzureParameters sid x509 pkey 28 | let params' = params { azureSshUserName = user } 29 | backend <- initializeBackend params' cloudService 30 | vms <- findVMs backend 31 | print vms 32 | nids <- forM vms $ \vm -> spawnNodeOnVM backend vm "8080" 33 | callOnVM backend (head vms) "8081" $ 34 | ProcessPair ($(mkClosure 'azureMaster) (nids, read n :: Integer)) 35 | printResult 36 | _ -> 37 | error "Invalid command line arguments" 38 | -------------------------------------------------------------------------------- /src/TypedWorkPushing/Azure.hs: -------------------------------------------------------------------------------- 1 | import System.Environment (getArgs) 2 | import Control.Monad 3 | import Control.Distributed.Process 4 | import Control.Distributed.Process.Closure 5 | import Control.Distributed.Process.Backend.Azure 6 | import qualified MasterSlave 7 | 8 | azureMaster :: ([NodeId], Integer) -> Backend -> Process () 9 | azureMaster (slaves, n) _backend = do 10 | result <- MasterSlave.master n slaves 11 | mapM_ terminateNode slaves 12 | remoteSend result 13 | 14 | remotable ['azureMaster] 15 | 16 | printResult :: LocalProcess () 17 | printResult = do 18 | result <- localExpect :: LocalProcess Integer 19 | liftIO $ print result 20 | 21 | main :: IO () 22 | main = do 23 | args <- getArgs 24 | case args of 25 | "onvm":args' -> onVmMain (__remoteTable . MasterSlave.__remoteTable) args' 26 | [sid, x509, pkey, user, cloudService, n] -> do 27 | params <- defaultAzureParameters sid x509 pkey 28 | let params' = params { azureSshUserName = user } 29 | backend <- initializeBackend params' cloudService 30 | vms <- findVMs backend 31 | print vms 32 | nids <- forM vms $ \vm -> spawnNodeOnVM backend vm "8080" 33 | callOnVM backend (head vms) "8081" $ 34 | ProcessPair ($(mkClosure 'azureMaster) (nids, read n :: Integer)) 35 | printResult 36 | _ -> 37 | error "Invalid command line arguments" 38 | -------------------------------------------------------------------------------- /src/WorkStealing/WorkStealing.hs: -------------------------------------------------------------------------------- 1 | module WorkStealing where 2 | 3 | import Control.Monad 4 | import Control.Distributed.Process 5 | import Control.Distributed.Process.Closure 6 | import PrimeFactors 7 | 8 | slave :: (ProcessId, ProcessId) -> Process () 9 | slave (master, workQueue) = do 10 | us <- getSelfPid 11 | go us 12 | where 13 | go us = do 14 | -- Ask the queue for work 15 | send workQueue us 16 | 17 | -- If there is work, do it, otherwise terminate 18 | receiveWait 19 | [ match $ \n -> send master (numPrimeFactors n) >> go us 20 | , match $ \() -> return () 21 | ] 22 | 23 | remotable ['slave] 24 | 25 | -- | Wait for n integers and sum them all up 26 | sumIntegers :: Int -> Process Integer 27 | sumIntegers = go 0 28 | where 29 | go :: Integer -> Int -> Process Integer 30 | go !acc 0 = return acc 31 | go !acc n = do 32 | m <- expect 33 | go (acc + m) (n - 1) 34 | 35 | master :: Integer -> [NodeId] -> Process Integer 36 | master n slaves = do 37 | us <- getSelfPid 38 | 39 | workQueue <- spawnLocal $ do 40 | -- Reply with the next bit of work to be done 41 | forM_ [1 .. n] $ \m -> do 42 | them <- expect 43 | send them m 44 | 45 | -- Once all the work is done, tell the slaves to terminate 46 | forever $ do 47 | pid <- expect 48 | send pid () 49 | 50 | -- Start slave processes 51 | forM_ slaves $ \nid -> spawn nid ($(mkClosure 'slave) (us, workQueue)) 52 | 53 | -- Wait for the result 54 | sumIntegers (fromIntegral n) 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Edsko de Vries 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Edsko de Vries nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /src/MasterSlave/MasterSlave.hs: -------------------------------------------------------------------------------- 1 | module MasterSlave where 2 | 3 | import Control.Monad 4 | import Control.Distributed.Process 5 | import Control.Distributed.Process.Closure 6 | import PrimeFactors 7 | 8 | slave :: (ProcessId, Integer) -> Process () 9 | slave (pid, n) = send pid (numPrimeFactors n) 10 | 11 | remotable ['slave] 12 | 13 | -- | Wait for n integers and sum them all up 14 | sumIntegers :: Int -> Process Integer 15 | sumIntegers = go 0 16 | where 17 | go :: Integer -> Int -> Process Integer 18 | go !acc 0 = return acc 19 | go !acc n = do 20 | m <- expect 21 | go (acc + m) (n - 1) 22 | 23 | data SpawnStrategy = SpawnSyncWithReconnect 24 | | SpawnSyncNoReconnect 25 | | SpawnAsync 26 | deriving (Show, Read) 27 | 28 | master :: Integer -> SpawnStrategy -> [NodeId] -> Process Integer 29 | master n spawnStrategy slaves = do 30 | us <- getSelfPid 31 | 32 | -- Distribute 1 .. n amongst the slave processes 33 | spawnLocal $ case spawnStrategy of 34 | SpawnSyncWithReconnect -> 35 | forM_ (zip [1 .. n] (cycle slaves)) $ \(m, there) -> do 36 | them <- spawn there ($(mkClosure 'slave) (us, m)) 37 | reconnect them 38 | SpawnSyncNoReconnect -> 39 | forM_ (zip [1 .. n] (cycle slaves)) $ \(m, there) -> do 40 | _them <- spawn there ($(mkClosure 'slave) (us, m)) 41 | return () 42 | SpawnAsync -> 43 | forM_ (zip [1 .. n] (cycle slaves)) $ \(m, there) -> do 44 | spawnAsync there ($(mkClosure 'slave) (us, m)) 45 | _ <- expectTimeout 0 :: Process (Maybe DidSpawn) 46 | return () 47 | 48 | -- Wait for the result 49 | sumIntegers (fromIntegral n) 50 | -------------------------------------------------------------------------------- /src/MapReduce/MapReduce.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables, DeriveDataTypeable, GADTs #-} 2 | module MapReduce 3 | ( -- * Map-reduce skeleton and implementation 4 | MapReduce(..) 5 | , localMapReduce 6 | -- * Map-reduce algorithmic components 7 | , reducePerKey 8 | , groupByKey 9 | -- * Re-exports from Data.Map 10 | , Map 11 | ) where 12 | 13 | import Data.Typeable (Typeable) 14 | import Data.Map (Map) 15 | import qualified Data.Map as Map (mapWithKey, fromListWith, toList) 16 | import Control.Arrow (second) 17 | 18 | -------------------------------------------------------------------------------- 19 | -- Definition of a MapReduce skeleton and a local implementation -- 20 | -------------------------------------------------------------------------------- 21 | 22 | -- | MapReduce skeleton 23 | data MapReduce k1 v1 k2 v2 v3 = MapReduce { 24 | mrMap :: k1 -> v1 -> [(k2, v2)] 25 | , mrReduce :: k2 -> [v2] -> v3 26 | } deriving (Typeable) 27 | 28 | -- | Local (non-distributed) implementation of the map-reduce algorithm 29 | -- 30 | -- This can be regarded as the specification of map-reduce; see 31 | -- /Google’s MapReduce Programming Model---Revisited/ by Ralf Laemmel 32 | -- (). 33 | localMapReduce :: forall k1 k2 v1 v2 v3. Ord k2 => 34 | MapReduce k1 v1 k2 v2 v3 35 | -> Map k1 v1 36 | -> Map k2 v3 37 | localMapReduce mr = reducePerKey mr . groupByKey . mapPerKey mr 38 | 39 | reducePerKey :: MapReduce k1 v1 k2 v2 v3 -> Map k2 [v2] -> Map k2 v3 40 | reducePerKey mr = Map.mapWithKey (mrReduce mr) 41 | 42 | groupByKey :: Ord k2 => [(k2, v2)] -> Map k2 [v2] 43 | groupByKey = Map.fromListWith (++) . map (second return) 44 | 45 | mapPerKey :: MapReduce k1 v1 k2 v2 v3 -> Map k1 v1 -> [(k2, v2)] 46 | mapPerKey mr = concatMap (uncurry (mrMap mr)) . Map.toList 47 | -------------------------------------------------------------------------------- /src/MapReduce/MonoDistrMapReduce.hs: -------------------------------------------------------------------------------- 1 | -- | Monomorphic "single-shot" distributed implementation of map-reduce 2 | module MonoDistrMapReduce (distrMapReduce, __remoteTable) where 3 | 4 | import Data.Map (Map) 5 | import qualified Data.Map as Map (size, toList) 6 | import Control.Monad (forM_, replicateM, replicateM_) 7 | import Control.Distributed.Process 8 | import Control.Distributed.Process.Closure 9 | import MapReduce (MapReduce(..), reducePerKey, groupByKey) 10 | 11 | -------------------------------------------------------------------------------- 12 | -- Simple distributed implementation -- 13 | -------------------------------------------------------------------------------- 14 | 15 | mapperProcess :: (ProcessId, ProcessId, Closure (MapReduce String String String Int Int)) 16 | -> Process () 17 | mapperProcess (master, workQueue, mrClosure) = do 18 | us <- getSelfPid 19 | mr <- unClosure mrClosure 20 | go us mr 21 | where 22 | go us mr = do 23 | -- Ask the queue for work 24 | send workQueue us 25 | 26 | -- Wait for a reply; if there is work, do it and repeat; otherwise, exit 27 | receiveWait 28 | [ match $ \(key, val) -> send master (mrMap mr key val) >> go us mr 29 | , match $ \() -> return () 30 | ] 31 | 32 | remotable ['mapperProcess] 33 | 34 | distrMapReduce :: Closure (MapReduce String String String Int Int) 35 | -> [NodeId] 36 | -> Map String String 37 | -> Process (Map String Int) 38 | distrMapReduce mrClosure mappers input = do 39 | mr <- unClosure mrClosure 40 | master <- getSelfPid 41 | 42 | workQueue <- spawnLocal $ do 43 | -- Return the next bit of work to be done 44 | forM_ (Map.toList input) $ \(key, val) -> do 45 | them <- expect 46 | send them (key, val) 47 | 48 | -- Once all teh work is done tell the mappers to terminate 49 | replicateM_ (length mappers) $ do 50 | them <- expect 51 | send them () 52 | 53 | -- Start the mappers 54 | forM_ mappers $ \nid -> spawn nid ($(mkClosure 'mapperProcess) (master, workQueue, mrClosure)) 55 | 56 | -- Wait for the partial results 57 | partials <- replicateM (Map.size input) expect 58 | 59 | -- We reduce on this node 60 | return (reducePerKey mr . groupByKey . concat $ partials) 61 | -------------------------------------------------------------------------------- /src/MapReduce/SimpleLocalnet.hs: -------------------------------------------------------------------------------- 1 | import System.Environment (getArgs) 2 | import System.IO 3 | import Control.Applicative 4 | import Control.Monad 5 | import System.Random 6 | import Control.Distributed.Process 7 | import Control.Distributed.Process.Node (initRemoteTable) 8 | import Control.Distributed.Process.Backend.SimpleLocalnet 9 | import Data.Map (Map) 10 | import Data.Array (Array, listArray) 11 | import qualified Data.Map as Map (fromList) 12 | 13 | import qualified CountWords 14 | import qualified PolyDistrMapReduce 15 | import qualified MonoDistrMapReduce 16 | import qualified KMeans 17 | 18 | rtable :: RemoteTable 19 | rtable = PolyDistrMapReduce.__remoteTable 20 | . MonoDistrMapReduce.__remoteTable 21 | . CountWords.__remoteTable 22 | . KMeans.__remoteTable 23 | $ initRemoteTable 24 | 25 | main :: IO () 26 | main = do 27 | args <- getArgs 28 | 29 | case args of 30 | -- Local word count 31 | "local" : "count" : files -> do 32 | input <- constructInput files 33 | print $ CountWords.localCountWords input 34 | 35 | -- Distributed word count 36 | "master" : host : port : "count" : files -> do 37 | input <- constructInput files 38 | backend <- initializeBackend host port rtable 39 | startMaster backend $ \slaves -> do 40 | result <- CountWords.distrCountWords slaves input 41 | liftIO $ print result 42 | 43 | -- Local k-means 44 | "local" : "kmeans" : [] -> do 45 | points <- replicateM 50000 randomPoint 46 | withFile "plot.data" WriteMode $ KMeans.createGnuPlot $ 47 | KMeans.localKMeans (arrayFromList points) (take 5 points) 5 48 | 49 | -- Distributed k-means 50 | "master" : host : port : "kmeans" : [] -> do 51 | points <- replicateM 50000 randomPoint 52 | backend <- initializeBackend host port rtable 53 | startMaster backend $ \slaves -> do 54 | result <- KMeans.distrKMeans (arrayFromList points) (take 5 points) slaves 5 55 | liftIO $ withFile "plot.data" WriteMode $ KMeans.createGnuPlot result 56 | 57 | -- Generic slave for distributed examples 58 | "slave" : host : port : [] -> do 59 | backend <- initializeBackend host port rtable 60 | startSlave backend 61 | 62 | -------------------------------------------------------------------------------- 63 | -- Auxiliary -- 64 | -------------------------------------------------------------------------------- 65 | 66 | constructInput :: [FilePath] -> IO (Map FilePath CountWords.Document) 67 | constructInput files = do 68 | contents <- mapM readFile files 69 | return . Map.fromList $ zip files contents 70 | 71 | randomPoint :: IO KMeans.Point 72 | randomPoint = (,) <$> randomIO <*> randomIO 73 | 74 | arrayFromList :: [e] -> Array Int e 75 | arrayFromList xs = listArray (0, length xs - 1) xs 76 | -------------------------------------------------------------------------------- /src/MapReduce/KMeans.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TupleSections #-} 2 | module KMeans 3 | ( Point 4 | , Cluster 5 | , localKMeans 6 | , distrKMeans 7 | , createGnuPlot 8 | , __remoteTable 9 | ) where 10 | 11 | import System.IO 12 | import Data.List (minimumBy) 13 | import Data.Function (on) 14 | import Data.Array (Array, (!), bounds) 15 | import qualified Data.Map as Map (fromList, elems, toList, size) 16 | import Control.Distributed.Process 17 | import Control.Distributed.Process.Closure 18 | import MapReduce 19 | import PolyDistrMapReduce hiding (__remoteTable) 20 | 21 | type Point = (Double, Double) 22 | type Cluster = (Double, Double) 23 | 24 | average :: Fractional a => [a] -> a 25 | average xs = sum xs / fromIntegral (length xs) 26 | 27 | distanceSq :: Point -> Point -> Double 28 | distanceSq (x1, y1) (x2, y2) = a * a + b * b 29 | where 30 | a = x2 - x1 31 | b = y2 - y1 32 | 33 | nearest :: Point -> [Cluster] -> Cluster 34 | nearest p = minimumBy (compare `on` distanceSq p) 35 | 36 | center :: [Point] -> Point 37 | center ps = let (xs, ys) = unzip ps in (average xs, average ys) 38 | 39 | kmeans :: Array Int Point -> MapReduce (Int, Int) [Cluster] Cluster Point ([Point], Point) 40 | kmeans points = MapReduce { 41 | mrMap = \(lo, hi) cs -> [ let p = points ! i in (nearest p cs, p) 42 | | i <- [lo .. hi] 43 | ] 44 | , mrReduce = \_ ps -> (ps, center ps) 45 | } 46 | 47 | localKMeans :: Array Int Point 48 | -> [Cluster] 49 | -> Int 50 | -> Map Cluster ([Point], Point) 51 | localKMeans points cs iterations = go (iterations - 1) 52 | where 53 | mr :: [Cluster] -> Map Cluster ([Point], Point) 54 | mr = localMapReduce (kmeans points) . trivialSegmentation 55 | 56 | go :: Int -> Map Cluster ([Point], Point) 57 | go 0 = mr cs 58 | go n = mr . map snd . Map.elems . go $ n - 1 59 | 60 | trivialSegmentation :: [Cluster] -> Map (Int, Int) [Cluster] 61 | trivialSegmentation cs' = Map.fromList [(bounds points, cs')] 62 | 63 | dictIn :: SerializableDict ((Int, Int), [Cluster]) 64 | dictIn = SerializableDict 65 | 66 | dictOut :: SerializableDict [(Cluster, Point)] 67 | dictOut = SerializableDict 68 | 69 | remotable ['kmeans, 'dictIn, 'dictOut] 70 | 71 | distrKMeans :: Array Int Point 72 | -> [Cluster] 73 | -> [NodeId] 74 | -> Int 75 | -> Process (Map Cluster ([Point], Point)) 76 | distrKMeans points cs mappers iterations = 77 | distrMapReduce $(mkStatic 'dictIn) 78 | $(mkStatic 'dictOut) 79 | ($(mkClosure 'kmeans) points) 80 | mappers 81 | (go (iterations - 1)) 82 | where 83 | go :: Int 84 | -> (Map (Int, Int) [Cluster] -> Process (Map Cluster ([Point], Point))) 85 | -> Process (Map Cluster ([Point], Point)) 86 | go 0 iteration = 87 | iteration (Map.fromList $ map (, cs) segments) 88 | go n iteration = do 89 | clusters <- go (n - 1) iteration 90 | let centers = map snd $ Map.elems clusters 91 | iteration (Map.fromList $ map (, centers) segments) 92 | 93 | segments :: [(Int, Int)] 94 | segments = let (lo, _) = bounds points in dividePoints numPoints lo 95 | 96 | dividePoints :: Int -> Int -> [(Int, Int)] 97 | dividePoints pointsLeft offset 98 | | pointsLeft <= pointsPerMapper = [(offset, offset + pointsLeft - 1)] 99 | | otherwise = let offset' = offset + pointsPerMapper in 100 | (offset, offset' - 1) 101 | : dividePoints (pointsLeft - pointsPerMapper) offset' 102 | 103 | pointsPerMapper :: Int 104 | pointsPerMapper = 105 | ceiling (toRational numPoints / toRational (length mappers)) 106 | 107 | numPoints :: Int 108 | numPoints = let (lo, hi) = bounds points in hi - lo + 1 109 | 110 | -- | Create a gnuplot data file for the output of the k-means algorithm 111 | -- 112 | -- To plot the data, use 113 | -- 114 | -- > plot "<>" u 1:2:3 with points palette 115 | createGnuPlot :: Map KMeans.Cluster ([KMeans.Point], KMeans.Point) -> Handle -> IO () 116 | createGnuPlot clusters h = 117 | mapM_ printPoint . flatten . zip colors . Map.toList $ clusters 118 | where 119 | printPoint (x, y, color) = 120 | hPutStrLn h $ show x ++ " " ++ show y ++ " " ++ show color 121 | 122 | flatten :: [(Float, (KMeans.Cluster, ([KMeans.Point], KMeans.Point)))] 123 | -> [(Double, Double, Float)] 124 | flatten = concatMap (\(color, (_, (points, _))) -> map (\(x, y) -> (x, y, color)) points) 125 | 126 | colors :: [Float] 127 | colors = [0, 1 / fromIntegral (Map.size clusters) .. 1] 128 | -------------------------------------------------------------------------------- /src/MapReduce/PolyDistrMapReduce.hs: -------------------------------------------------------------------------------- 1 | -- | Polymorphic "multi-shot" distributed implementation of map-reduce 2 | module PolyDistrMapReduce (distrMapReduce, __remoteTable) where 3 | 4 | import Data.Typeable (Typeable) 5 | import Data.Binary (encode) 6 | import Data.ByteString.Lazy (ByteString) 7 | import Data.Map (Map) 8 | import qualified Data.Map as Map (size, toList) 9 | import Control.Monad (forM_, replicateM, replicateM_) 10 | import Control.Distributed.Process 11 | import Control.Distributed.Process.Serializable (Serializable) 12 | import Control.Distributed.Process.Closure 13 | import Control.Distributed.Static (closureApply, staticCompose, staticApply) 14 | import MapReduce (MapReduce(..), reducePerKey, groupByKey) 15 | 16 | -------------------------------------------------------------------------------- 17 | -- Simple distributed implementation -- 18 | -------------------------------------------------------------------------------- 19 | 20 | matchDict :: forall a b. SerializableDict a -> (a -> Process b) -> Match b 21 | matchDict SerializableDict = match 22 | 23 | sendDict :: forall a. SerializableDict a -> ProcessId -> a -> Process () 24 | sendDict SerializableDict = send 25 | 26 | sdictProcessIdPair :: SerializableDict (ProcessId, ProcessId) 27 | sdictProcessIdPair = SerializableDict 28 | 29 | mapperProcess :: forall k1 v1 k2 v2 v3. 30 | SerializableDict (k1, v1) 31 | -> SerializableDict [(k2, v2)] 32 | -> (ProcessId, ProcessId) 33 | -> MapReduce k1 v1 k2 v2 v3 34 | -> Process () 35 | mapperProcess dictIn dictOut (master, workQueue) mr = getSelfPid >>= go 36 | where 37 | go us = do 38 | -- Ask the queue for work 39 | send workQueue us 40 | 41 | -- Wait for a reply; if there is work, do it and repeat; otherwise, exit 42 | receiveWait 43 | [ matchDict dictIn $ \(key, val) -> do 44 | sendDict dictOut master (mrMap mr key val) 45 | go us 46 | , match $ \() -> 47 | return () 48 | ] 49 | 50 | remotable ['mapperProcess, 'sdictProcessIdPair] 51 | 52 | mapperProcessClosure :: forall k1 v1 k2 v2 v3. 53 | (Typeable k1, Typeable v1, Typeable k2, Typeable v2, Typeable v3) 54 | => Static (SerializableDict (k1, v1)) 55 | -> Static (SerializableDict [(k2, v2)]) 56 | -> Closure (MapReduce k1 v1 k2 v2 v3) 57 | -> ProcessId 58 | -> ProcessId 59 | -> Closure (Process ()) 60 | mapperProcessClosure dictIn dictOut mr master workQueue = 61 | closure decoder (encode (master, workQueue)) `closureApply` mr 62 | where 63 | decoder :: Static (ByteString -> MapReduce k1 v1 k2 v2 v3 -> Process ()) 64 | decoder = 65 | ($(mkStatic 'mapperProcess) `staticApply` dictIn `staticApply` dictOut) 66 | `staticCompose` 67 | staticDecode $(mkStatic 'sdictProcessIdPair) 68 | 69 | distrMapReduce :: forall k1 k2 v1 v2 v3 a. 70 | (Serializable k1, Serializable v1, Serializable k2, Serializable v2, Serializable v3, Ord k2) 71 | => Static (SerializableDict (k1, v1)) 72 | -> Static (SerializableDict [(k2, v2)]) 73 | -> Closure (MapReduce k1 v1 k2 v2 v3) 74 | -> [NodeId] 75 | -> ((Map k1 v1 -> Process (Map k2 v3)) -> Process a) 76 | -> Process a 77 | distrMapReduce dictIn dictOut mr mappers p = do 78 | mr' <- unClosure mr 79 | master <- getSelfPid 80 | 81 | workQueue <- spawnChannelLocal $ \queue -> do 82 | let go :: Process () 83 | go = do 84 | mWork <- receiveChan queue 85 | case mWork of 86 | Just (key, val) -> do 87 | -- As long there is work, make it available to the mappers 88 | them <- expect 89 | send them (key, val) 90 | go 91 | Nothing -> do 92 | -- Tell the mappers to terminate 93 | replicateM_ (length mappers) $ do 94 | them <- expect 95 | send them () 96 | 97 | -- Tell the master that the slaves are terminated 98 | send master () 99 | go 100 | 101 | -- Start the mappers 102 | let workQueuePid = sendPortProcessId (sendPortId workQueue) 103 | forM_ mappers $ \nid -> 104 | spawn nid (mapperProcessClosure dictIn dictOut mr master workQueuePid) 105 | 106 | let iteration :: Map k1 v1 -> Process (Map k2 v3) 107 | iteration input = do 108 | -- Make work available to the mappers 109 | mapM_ (sendChan workQueue . Just) (Map.toList input) 110 | 111 | -- Wait for the partial results 112 | partials <- replicateM (Map.size input) expect 113 | 114 | -- We reduce on this node 115 | return (reducePerKey mr' . groupByKey . concat $ partials) 116 | 117 | result <- p iteration 118 | 119 | -- Terminate the wrappers 120 | sendChan workQueue Nothing 121 | expect :: Process () 122 | 123 | return result 124 | -------------------------------------------------------------------------------- /distributed-process-demos.cabal: -------------------------------------------------------------------------------- 1 | -- Initial distributed-process-demos.cabal generated by cabal init. For 2 | -- further documentation, see http://haskell.org/cabal/users-guide/ 3 | 4 | name: distributed-process-demos 5 | version: 0.1.0.0 6 | synopsis: Cloud Haskell Demo Applications 7 | -- description: 8 | homepage: http://haskell-distributed.github.com 9 | license: BSD3 10 | license-file: LICENSE 11 | author: Edsko de Vries 12 | maintainer: edsko@well-typed.com, watson.timothy@gmail.com 13 | copyright: Well-Typed LLP 14 | category: Control 15 | build-type: Simple 16 | cabal-version: >=1.8 17 | 18 | flag use-simplelocalnet 19 | description: Use the SimpleLocalnet backend 20 | default: True 21 | 22 | flag use-azure 23 | description: Use the Azure backend 24 | default: True 25 | 26 | executable distributed-process-simplelocalnet-masterslave 27 | main-is: SimpleLocalnet.hs 28 | other-modules: MasterSlave 29 | PrimeFactors 30 | hs-source-dirs: src/MasterSlave src/Common 31 | build-depends: base >=4.5 && <5, 32 | distributed-process >= 0.3.2 33 | extensions: BangPatterns, CPP, TemplateHaskell 34 | ghc-options: -Wall 35 | if flag(use-simplelocalnet) 36 | build-depends: distributed-process-simplelocalnet 37 | cpp-options: -DUSE_SIMPLELOCALNET 38 | else 39 | buildable: False 40 | 41 | executable distributed-process-azure-masterslave 42 | main-is: Azure.hs 43 | other-modules: MasterSlave 44 | PrimeFactors 45 | hs-source-dirs: src/MasterSlave src/Common 46 | build-depends: base >=4.5 && <5, 47 | distributed-process >= 0.3.2 48 | extensions: BangPatterns, CPP, TemplateHaskell 49 | ghc-options: -Wall 50 | if flag(use-azure) 51 | build-depends: distributed-process-azure 52 | cpp-options: -DUSE_AZURE 53 | else 54 | buildable: False 55 | 56 | executable distributed-process-simplelocalnet-workpushing 57 | main-is: SimpleLocalnet.hs 58 | other-modules: WorkPushing 59 | PrimeFactors 60 | hs-source-dirs: src/WorkPushing src/Common 61 | build-depends: base >=4.5 && <5, 62 | distributed-process >= 0.3.2 63 | extensions: BangPatterns, CPP, TemplateHaskell 64 | ghc-options: -Wall 65 | if flag(use-simplelocalnet) 66 | build-depends: distributed-process-simplelocalnet 67 | cpp-options: -DUSE_SIMPLELOCALNET 68 | else 69 | buildable: False 70 | 71 | executable distributed-process-azure-workpushing 72 | main-is: Azure.hs 73 | other-modules: WorkPushing 74 | PrimeFactors 75 | hs-source-dirs: src/WorkPushing src/Common 76 | build-depends: base >=4.5 && <5, 77 | distributed-process >= 0.3.2 78 | extensions: BangPatterns, CPP, TemplateHaskell 79 | ghc-options: -Wall 80 | if flag(use-azure) 81 | build-depends: distributed-process-azure 82 | cpp-options: -DUSE_AZURE 83 | else 84 | buildable: False 85 | 86 | executable distributed-process-simplelocalnet-typedworkpushing 87 | main-is: SimpleLocalnet.hs 88 | other-modules: TypedWorkPushing 89 | PrimeFactors 90 | hs-source-dirs: src/TypedWorkPushing src/Common 91 | build-depends: base >=4.5 && <5, 92 | distributed-process >= 0.3.2 93 | extensions: BangPatterns, CPP, TemplateHaskell 94 | ghc-options: -Wall 95 | if flag(use-simplelocalnet) 96 | build-depends: distributed-process-simplelocalnet 97 | cpp-options: -DUSE_SIMPLELOCALNET 98 | else 99 | buildable: False 100 | 101 | executable distributed-process-azure-typedworkpushing 102 | main-is: Azure.hs 103 | other-modules: TypedWorkPushing 104 | PrimeFactors 105 | hs-source-dirs: src/TypedWorkPushing src/Common 106 | build-depends: base >=4.5 && <5, 107 | distributed-process >= 0.3.2 108 | extensions: BangPatterns, CPP, TemplateHaskell 109 | ghc-options: -Wall 110 | if flag(use-azure) 111 | build-depends: distributed-process-azure 112 | cpp-options: -DUSE_AZURE 113 | else 114 | buildable: False 115 | 116 | executable distributed-process-simplelocalnet-workstealing 117 | main-is: SimpleLocalnet.hs 118 | other-modules: WorkStealing 119 | PrimeFactors 120 | hs-source-dirs: src/WorkStealing src/Common 121 | build-depends: base >=4.5 && <5, 122 | distributed-process >= 0.3.2 123 | extensions: BangPatterns, CPP, TemplateHaskell 124 | ghc-options: -Wall 125 | if flag(use-simplelocalnet) 126 | build-depends: distributed-process-simplelocalnet 127 | cpp-options: -DUSE_SIMPLELOCALNET 128 | else 129 | buildable: False 130 | 131 | executable distributed-process-simplelocalnet-mapreduce 132 | main-is: SimpleLocalnet.hs 133 | other-modules: CountWords 134 | KMeans 135 | MapReduce 136 | MonoDistrMapReduce 137 | PolyDistrMapReduce 138 | hs-source-dirs: src/MapReduce 139 | build-depends: base >=4.5 && <5, 140 | distributed-process >= 0.3.2, 141 | distributed-static >= 0.2.0, 142 | containers >= 0.4 && < 0.6, 143 | bytestring >= 0.9 && < 0.11, 144 | binary >= 0.5 && < 0.8, 145 | array >= 0.4 && < 0.6, 146 | random >= 1.0 && < 1.2 147 | extensions: BangPatterns 148 | CPP 149 | KindSignatures 150 | ScopedTypeVariables 151 | TemplateHaskell 152 | ghc-options: -Wall -rtsopts -with-rtsopts=-K64M 153 | if flag(use-simplelocalnet) 154 | build-depends: distributed-process-simplelocalnet 155 | cpp-options: -DUSE_SIMPLELOCALNET 156 | else 157 | buildable: False 158 | --------------------------------------------------------------------------------