├── LICENSE ├── MR.cabal ├── README ├── Setup.hs └── src ├── Distributed.hs ├── Distributed ├── Storage.hs └── Storage │ ├── Common.hs │ ├── InMemory.hs │ ├── InMemory │ └── Server.hs │ └── Simple.hs ├── Main.hs ├── config_master └── config_storage /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, JP Embedded Solutions Limited 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MR.cabal: -------------------------------------------------------------------------------- 1 | name: MR 2 | version: 0.1 3 | cabal-version: >= 1.2 4 | build-type: Simple 5 | data-files: config_master, config_storage 6 | data-dir: src 7 | 8 | executable MR 9 | 10 | hs-source-dirs: src 11 | main-is: Main.hs 12 | build-depends: base, 13 | binary, 14 | remote, 15 | bytestring, 16 | directory, 17 | template-haskell 18 | other-modules: Distributed.Storage,Distributed.Storage.Common,Distributed.Storage.InMemory,Distributed 19 | 20 | library 21 | hs-source-dirs: src 22 | exposed-modules: Distributed,Distributed.Storage,Distributed.Storage.InMemory 23 | other-modules: Distributed.Storage.InMemory.Server,Distributed.Storage.Common 24 | 25 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | SIMPLE CLOUDHASKELL STORAGE SERVICE 2 | =================================== 3 | 4 | This code is distributed under the terms of the GPL, so use it, change it, etc but 5 | please acknowledge its origin. 6 | 7 | Installation 8 | ============ 9 | 10 | (1) Download CloudHaskell from git://github.com/jepst/CloudHaskell.git 11 | (2) Install it with 'cabal install' in its top level directory 12 | (3) Download the storage server code from git://github.com/Julianporter/Distributed-Haskell.git 13 | onto two machines 14 | (4) Modify the two files 'config_master' and 'config_storage' to refer to your two machines' 15 | names. I have used the names 'joker.local' for the server and 'batman.local' for the client 16 | (4) On each machine install the application with 'cabal install' in its top level directory 17 | 18 | Testing 19 | ======= 20 | 21 | (1) On the server machine invoke 'MR -s' 22 | (2) On the client machine invoke 'MR -c' (if you do this first, you will get an exception 23 | complaining that no server is running) 24 | 25 | You will see debugging output on both machines. The client sends two slugs, each of five items 26 | of type (Int,String), to the server, does an exchange and reads them back. 27 | 28 | Generally 29 | ========= 30 | 31 | The server and client API are in Storage.hs, and are fully documented. The client programme 32 | in Main.hs gives a good idea of simple CloudHaskell. 33 | 34 | Further Reading 35 | =============== 36 | 37 | http://jpembeddedsolutions.wordpress.com/ 38 | 39 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /src/Distributed.hs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdstmporter/Distributed-Haskell/a176f51f5cf197de2717f41b74f1ca3e71ae863a/src/Distributed.hs -------------------------------------------------------------------------------- /src/Distributed/Storage.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Defines the storage server's client API, providing the ability to 'push', 'pull' and 'exchange' data. 3 | module Distributed.Storage (push,pull,exchange) where 4 | 5 | 6 | import Remote (ProcessM,expect) 7 | import Data.Binary (Binary,encode,decode) 8 | import Data.ByteString.Lazy (ByteString) 9 | import Control.Applicative ((<$>)) 10 | import Distributed (Service,sendToService) 11 | import Distributed.Storage.Common 12 | 13 | -- | Client API function to append a list of type @['Data.ByteString.Lazy.ByteString']@ to the outputs list in the store. 14 | push :: (Binary a) => Service -- ^ the datastore 15 | -> [a] -- ^ the data to send 16 | -> ProcessM() -- ^ null marker 17 | push s x = do 18 | sendToService s $ Push (encode <$> x) -- send a message 19 | 20 | -- | Client API function to pull the inputs list of type @['Data.ByteString.Lazy.ByteString']@ from the store. 21 | pull :: (Binary a) => Service -- ^ the datastore 22 | -> ProcessM [a] -- ^ data from server wrapped in 'Remote.ProcessM' 23 | pull s = do 24 | sendToService s (Pull::DataMessage) -- send a message 25 | x <- expect -- wait for response 26 | return $ decode <$> x 27 | 28 | -- | Client API function to do exchange: replaces the inputs list with the outputs list and 29 | -- initialises the inputs list to @[]@ 30 | exchange :: Service -- ^ the datastore 31 | -> ProcessM() -- ^ null marker 32 | exchange s = do 33 | sendToService s (Exchange::DataMessage) -- send message 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/Distributed/Storage/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | 3 | -- | Common type for message passing by distributed stores. Used only by store implementations. 4 | module Distributed.Storage.Common where 5 | 6 | import Data.ByteString.Lazy (ByteString) 7 | import Data.Binary (Binary,Get,Put,get,put) 8 | import Data.Typeable 9 | 10 | 11 | -- | Basic type that is used to pass messages between client and server. Messages 12 | -- either push some data, request a pull, or request an exchange. Derives 'Data.Typeable.Typeable' 13 | -- as part of making it 'Remote.Serializable'. 14 | data DataMessage = Push [ByteString] | Pull | Exchange 15 | deriving(Typeable) 16 | 17 | -- | Make it 'Data.Binary.Binary' to enable 'Remote.Serializable'. Standard boilerplate. 18 | instance Binary DataMessage where 19 | put (Push kv) = put 'P' >> put kv 20 | put Exchange = put 'U' 21 | put Pull = put 'G' 22 | get = do 23 | h <- get :: Get Char 24 | case h of 25 | 'P' -> do 26 | kv <- get 27 | return (Push kv) 28 | 'G' -> return Pull 29 | 'U' -> return Exchange 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/Distributed/Storage/InMemory.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | 3 | -- | The wrapper module for the in memory service. Provides a single method that gives a 4 | -- handle on the server instance and then re-exports methods required from other packages. 5 | module Distributed.Storage.InMemory (Service,service,metadata,push,pull,exchange,getInMemoryServer) where 6 | 7 | import Distributed 8 | import Distributed.Storage 9 | import Distributed.Storage.InMemory.Server 10 | 11 | -- | Method to get the various bits of CloudHaskell machinery associated to a server. Allows 12 | -- other parts of 'Distributed.Storage' to abstract away from the actual implementation. 13 | getInMemoryServer :: ServerHandle -- ^ The 'Distributed.ServerHandle' object. 14 | getInMemoryServer = makeServer storageServer__closure Distributed.Storage.InMemory.Server.__remoteCallMetaData 15 | 16 | -------------------------------------------------------------------------------- /src/Distributed/Storage/InMemory/Server.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | 3 | -- | The InMemory storage server. A simple loop that sits waiting for messages from clients and responds 4 | -- appropriately. Stores data in a @['Data.ByteString.Lazy']@. 5 | module Distributed.Storage.InMemory.Server where 6 | 7 | import Remote (ProcessM,expect,send,remotable,say) 8 | import Data.ByteString.Lazy (ByteString) 9 | import Distributed.Storage.Common 10 | 11 | 12 | -- | The server code. Executes within the 'Remote.ProcessM' monad, as it involves message passing. 13 | -- It waits for messages with 'Remote.expect' and then process the data accordingly (as well as 14 | -- producing log messages. 15 | getData :: [ByteString] -- ^ data from clients 16 | -> [ByteString] -- ^ data to send to clients 17 | -> ProcessM () -- ^ null marker; the function never returns 18 | getData ins outs = do 19 | (pid,m) <- expect 20 | case m of 21 | Push x -> do 22 | say $ "PID " ++ show pid ++ " adding data" 23 | getData (ins ++ x) outs 24 | Pull -> do 25 | say $ "PID " ++ show pid ++ " getting data" 26 | send pid outs 27 | getData ins outs 28 | Exchange -> do 29 | say $ "PID " ++ show pid ++ " exchanging" 30 | getData [] ins 31 | 32 | 33 | -- | The server itself. Simply invokes 'getData' with empty initial stores. 34 | storageServer :: ProcessM () 35 | storageServer = getData [] [] 36 | 37 | -- | CloudHaskell boilerplate 38 | $( remotable ['storageServer] ) 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /src/Distributed/Storage/Simple.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell,DeriveDataTypeable #-} 2 | 3 | -- | Module that defines the a simple distributed storage service. 4 | -- 5 | -- The server runs as a CloudHaskell service, and the API consists of three 6 | -- functions to push, pull and exchange data. All client interaction takes 7 | -- place within the @ProcessM@ monad. 8 | -- 9 | -- The basic model is this: the data stored is in the form of @['Data.ByteString.Lazy.ByteString']@. 10 | -- 11 | -- (1) The server holds an /inputs list/, which clients can request a copy of with the 12 | -- function 'pullFromStore'. 13 | -- 14 | -- (2) It holds an /outputs list/, to which instances of @['Data.ByteString.Lazy.ByteString']@ that 15 | -- clients send to the server with 'pushToStore' are appended. 16 | -- 17 | -- (3) There is an operation 'exchangeStore' that replaces the inputs list with 18 | -- the contents of the outputs list, and initialises the outputs list to @[]@. 19 | -- 20 | -- The idea is that it be used in a distributed round-based computation, where 21 | -- the inputs list holds the input data for the processing nodes, which use 22 | -- 'pullFromStore' to get it. They then process it and each pushes its output 23 | -- back to the server with 'pushToStore', resulting in the server appending it 24 | -- to the outputs list. Then, when a round is completed a 25 | -- master invokes 'exchangeStore' to turn the concatenated outputs into the 26 | -- inputs for the next round. 27 | module Distributed.Storage.Simple ( 28 | pushToStore,storageServer__closure,__remoteCallMetaData, 29 | pullFromStore, 30 | exchangeStore) where 31 | 32 | import Remote 33 | import Data.Binary (Binary,encode,decode) 34 | import Data.Typeable 35 | import Data.ByteString.Lazy (ByteString) 36 | import Control.Applicative ((<$>)) 37 | import Distributed.Storage.Common 38 | 39 | 40 | 41 | 42 | 43 | 44 | -- | The server code. Executes within the 'Remote.ProcessM' monad, as it involves message passing. 45 | getData :: [ByteString] -- ^ data from clients 46 | -> [ByteString] -- ^ data to send to clients 47 | -> ProcessM () -- ^ null marker; the function never returns 48 | getData ins outs = do 49 | (pid,m) <- expect 50 | case m of 51 | Push x -> do 52 | say $ "PID " ++ show pid ++ " adding data" 53 | getData (ins ++ x) outs 54 | Pull -> do 55 | say $ "PID " ++ show pid ++ " getting data" 56 | send pid outs 57 | getData ins outs 58 | Exchange -> do 59 | say $ "PID " ++ show pid ++ " exchanging" 60 | getData [] ins 61 | 62 | 63 | -- | The server itself. Simply invokes 'getData' 64 | storageServer :: ProcessM () 65 | storageServer = getData [] [] 66 | 67 | 68 | $( remotable ['storageServer] ) 69 | 70 | 71 | -- | Client API function to append a list of type @['Data.ByteString.Lazy.ByteString']@ to the outputs list in the store. 72 | pushToStore :: (Binary a) => ProcessId -- ^ client's unique identifier 73 | -> ProcessId -- ^ server's unique identifier 74 | -> [a] -- ^ the data to send 75 | -> ProcessM() -- ^ null marker 76 | pushToStore myPid slavePid xs = do 77 | send slavePid (myPid,Push (encode <$> xs)) 78 | 79 | -- | Client API function to pull the inputs list of type @['Data.ByteString.Lazy.ByteString']@ from the store. 80 | pullFromStore :: (Binary a) => ProcessId -- ^ client's unique identifier 81 | -> ProcessId -- ^ server's unique identifier 82 | -> ProcessM [a] -- ^ data from server wrapped in 'ProcessM' 83 | pullFromStore myPid slavePid = do 84 | send slavePid (myPid,Pull::DataMessage) 85 | kv <- expect 86 | return $ decode <$> kv 87 | 88 | -- | Client API function to do exchange: replaces the inputs list with the outputs list and 89 | -- initialises the inputs list to @[]@ 90 | exchangeStore :: ProcessId -- ^ client's unique identifier 91 | -> ProcessId -- ^ server's unique identifier 92 | -> ProcessM() -- ^ null marker 93 | exchangeStore myPid slavePid = do 94 | send slavePid (myPid,Exchange::DataMessage) -------------------------------------------------------------------------------- /src/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import System.Environment (getArgs) 4 | import System.Directory (copyFile) 5 | import Paths_MR 6 | import Distributed.Storage.InMemory 7 | import Remote 8 | 9 | 10 | 11 | -- | Simple interaction with server; put two slugs of data then get them back 12 | doStuff :: Service -> -- ^ the datastore 13 | ProcessM() -- ^ null marker 14 | doStuff d = do 15 | say $ "Putting data to PID " -- ++ show slavePid 16 | push d ([(1,"a"),(2,"b"),(3,"c"),(4,"d"),(5,"e")]::[(Int,String)]) 17 | say $ "Putting data to PID " -- ++ show slavePid 18 | push d ([(6,"a"),(7,"b"),(8,"c"),(9,"d"),(0,"e")]::[(Int,String)]) 19 | say $ "Exchanging on PID " -- ++ show slavePid 20 | exchange d 21 | say $ "Pulling from PID " -- ++ show slavePid 22 | xs <- (pull d)::ProcessM [(Int,String)] 23 | say $ "Got "++show xs 24 | return () 25 | 26 | -- | run the CloudHaskell process 27 | initialProcess :: String -> -- ^ The CloudHaskell role I am running in 28 | ProcessM () -- ^ Nill return 29 | initialProcess "STORAGE" = receiveWait [] 30 | initialProcess "MASTER" = do 31 | let s = getInMemoryServer 32 | d <- service s "STORAGE" 33 | doStuff d 34 | return () 35 | initialProcess _ = error "Role must be STORAGE or MASTER" 36 | 37 | 38 | -- | Print usage string 39 | usage :: IO() 40 | usage = putStrLn "Usage:\n\tMR -c : run as client\n\tMR -s : run as server" 41 | 42 | -- | Main method. Accepts command-line argument "-s" for server and "-c" for client. 43 | main::IO() 44 | main = do 45 | args <- getArgs 46 | case args of 47 | [] -> usage 48 | _ -> do 49 | case (head args) of 50 | "-c" -> init "config_master" 51 | "-s" -> init "config_storage" 52 | _ -> usage 53 | where 54 | init confFile = do 55 | conf <- getDataFileName confFile 56 | putStrLn $ "Using config file " ++ show conf 57 | copyFile conf ".config" -- get the right config file in the right place 58 | let s = getInMemoryServer 59 | remoteInit (Just ".config") [metadata s] initialProcess -------------------------------------------------------------------------------- /src/config_master: -------------------------------------------------------------------------------- 1 | cfgRole MASTER 2 | cfgHostName imac.local 3 | cfgKnownHosts imac.local joker.local 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/config_storage: -------------------------------------------------------------------------------- 1 | cfgRole STORAGE 2 | cfgHostName joker.local 3 | cfgKnownHosts imac.local joker.local --------------------------------------------------------------------------------