├── .gitignore
├── oceanstore
└── oceanstore
│ ├── logging.go
│ ├── oceanstore_test.go
│ ├── ocean_local_impl_test.go
│ ├── client.go
│ ├── ocean_rpc_impl.go
│ ├── listener.go
│ ├── guid.go
│ ├── guid_test.go
│ ├── util.go
│ ├── oceanstore.go
│ ├── inode_test.go
│ ├── ocean_local_impl.go
│ ├── ocean_rpc_api.go
│ └── inode.go
├── raft
├── shell.go
├── cliClient.go
├── raft
│ ├── rpcMessages.go
│ ├── utils.go
│ ├── config.go
│ ├── testingPolicy.go
│ ├── listener.go
│ ├── raftRPCImpl.go
│ ├── clientRPCApi.go
│ ├── raftLocalImpl.go
│ ├── logging.go
│ ├── machine.go
│ ├── testUtils.go
│ ├── client.go
│ ├── raft_test.go
│ ├── raft.go
│ ├── raftRPCApi.go
│ ├── persistenceAPI.go
│ └── persistenceImpl.go
└── README.md
├── chord
├── chord
│ ├── simple_test.go
│ ├── kv_store_test.go
│ ├── finger_test.go
│ ├── util_test.go
│ ├── finger.go
│ ├── util.go
│ ├── node_rpc_impl.go
│ ├── kv_store.go
│ ├── node_local_impl.go
│ ├── chord.go
│ └── node_rpc_api.go
├── utils
│ └── listener.go
├── cli.go
└── README.md
├── tapestry
├── tapestry
│ ├── helper_methods.go
│ ├── routingtable_test.go
│ ├── blobstore.go
│ ├── tapestry-client.go
│ ├── backpointers.go
│ ├── objectstore.go
│ ├── tapestry-local_test.go
│ ├── tapestry-remote.go
│ ├── id.go
│ ├── routingtable.go
│ ├── tapestry-rpcimpl.go
│ ├── tapestry.go
│ ├── id_test.go
│ └── tapestry_test.go
├── cli.go
└── README.md
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.iml
3 | raft/raft/XVlBzgbaiC/
--------------------------------------------------------------------------------
/oceanstore/oceanstore/logging.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
--------------------------------------------------------------------------------
/oceanstore/oceanstore/oceanstore_test.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
--------------------------------------------------------------------------------
/oceanstore/oceanstore/ocean_local_impl_test.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
--------------------------------------------------------------------------------
/raft/shell.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "./raft"
4 |
5 | type Shell struct {
6 | r *raft.RaftNode
7 | c *raft.Client
8 | done chan bool
9 | }
10 |
--------------------------------------------------------------------------------
/chord/chord/simple_test.go:
--------------------------------------------------------------------------------
1 | package chord
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func TestSimple(t *testing.T) {
8 | _, err := CreateNode(nil)
9 | if err != nil {
10 | t.Errorf("Unable to create node, received error:%v\n", err)
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/raft/cliClient.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "./raft"
4 |
5 | func clientInit(shell *Shell, args []string) error {
6 | return shell.c.SendRequest(raft.HASH_CHAIN_INIT, []byte(args[1]))
7 | }
8 |
9 | func clientHash(shell *Shell, args []string) error {
10 | return shell.c.SendRequest(raft.HASH_CHAIN_ADD, []byte{})
11 | }
--------------------------------------------------------------------------------
/raft/raft/rpcMessages.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | //type AppendEntriesMsg struct {
4 | // request *AppendEntriesRequest
5 | // reply chan AppendEntriesReply
6 | //}
7 | //
8 | //type RequestVoteMsg struct {
9 | // request *RequestVoteRequest
10 | // reply chan RequestVoteReply
11 | //}
12 | //
13 | //type ClientRequestMsg struct {
14 | // request *ClientRequest
15 | // reply chan ClientReply
16 | //}
17 | //
18 | //type RegisterClientMsg struct {
19 | // request *RegisterClientRequest
20 | // reply chan RegisterClientReply
21 | //}
22 |
--------------------------------------------------------------------------------
/raft/raft/utils.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | type UInt64Slice []uint64
4 |
5 | func (p UInt64Slice) Len() int {
6 | return len(p)
7 | }
8 |
9 | func (p UInt64Slice) Swap(i, j int) {
10 | p[i], p[j] = p[j], p[i]
11 | }
12 |
13 | func (p UInt64Slice) Less(i, j int) bool {
14 | return p[i] < p[j]
15 | }
16 |
17 | func (r *RaftNode) hasMajority(N uint64) bool {
18 | numNodes := len(r.GetOtherNodes())
19 | sum := 1
20 | for k, v := range r.matchIndex {
21 | if k != r.Id && v >= N {
22 | sum++
23 | }
24 | }
25 | if sum > numNodes/2 {
26 | return true
27 | }
28 | return false
29 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/helper_methods.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import "testing"
4 |
5 | var port int
6 |
7 | func equal_ids(id1, id2 ID) bool {
8 | if SharedPrefixLength(id1, id2) == DIGITS {
9 | return true
10 | }
11 | return false
12 | }
13 |
14 | func makeTapestryNode(id ID, addr string, t *testing.T) *TapestryNode {
15 | tapestry, err := start(id, port, addr)
16 |
17 | if err != nil {
18 | t.Errorf("Error while making a tapestry %v", err)
19 | }
20 |
21 | port++
22 | return tapestry.local
23 | }
24 |
25 | func makeTapestry(id ID, addr string, t *testing.T) *Tapestry {
26 | tapestry, err := start(id, port, addr)
27 |
28 | if err != nil {
29 | t.Errorf("Error while making a tapestry %v", err)
30 | }
31 |
32 | port++
33 | return tapestry
34 | }
--------------------------------------------------------------------------------
/raft/raft/config.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "fmt"
5 | "time"
6 | )
7 |
8 | type Config struct {
9 | ElectionTimeout time.Duration
10 | HeartbeatFrequency time.Duration
11 | ClusterSize int
12 | NodeIdSize int
13 | LogPath string
14 | }
15 |
16 | func DefaultConfig() *Config {
17 | config := new(Config)
18 | config.ClusterSize = 3
19 | config.ElectionTimeout = time.Millisecond * 150
20 | config.HeartbeatFrequency = time.Millisecond * 50
21 | config.NodeIdSize = 2
22 | config.LogPath = "raftlogs"
23 | return config
24 | }
25 |
26 | func CheckConfig(config *Config) error {
27 | if config.ElectionTimeout < config.HeartbeatFrequency {
28 | return fmt.Errorf("The election timeout (%v) is less than the heartbeat frequency (%v)", config.ElectionTimeout, config.HeartbeatFrequency)
29 | }
30 | return nil
31 | }
32 |
--------------------------------------------------------------------------------
/tapestry/tapestry/routingtable_test.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | /*This test adds 100,000 nodes to the table and removes them, checking
8 | that all were deleted.*/
9 |
10 | func TestAddAndRemove(t *testing.T) {
11 | NUM_NODES := 100000
12 | me := Node{RandomID(), ""}
13 | table := NewRoutingTable(me)
14 | nodes := make([]Node, NUM_NODES)
15 | for i := 0; i < NUM_NODES; i++ {
16 | nodes[i] = Node{RandomID(), ""}
17 | table.Add(nodes[i])
18 | }
19 | for i := 0; i < NUM_NODES; i++ {
20 | table.Remove(nodes[i])
21 | }
22 |
23 | for i := 0; i < DIGITS; i++ {
24 | for j := 0; j < BASE; j++ {
25 | if len(*(table.rows[i][j])) > 1 {
26 | t.Errorf("Nodes were not deleted from table.")
27 | }
28 | if len(*(table.rows[i][j])) == 1 &&
29 | !equal_ids(me.Id, (*(table.rows[i][j]))[0].Id) {
30 | t.Errorf("Nodes were not deleted from table.")
31 | }
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/raft/raft/testingPolicy.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "fmt"
5 | "errors"
6 | )
7 |
8 | var ErrorTestingPolicyDenied = errors.New("testing policy has denied this communication")
9 |
10 | type TestingPolicy struct {
11 | pauseWorld bool
12 | rpcPolicy map[string]bool
13 | }
14 |
15 | func NewTesting() *TestingPolicy {
16 | var tp TestingPolicy
17 | tp.rpcPolicy = make(map[string]bool)
18 | return &tp
19 | }
20 |
21 | func (tp *TestingPolicy) IsDenied(a, b NodeAddr) bool {
22 | if tp.pauseWorld {
23 | return true
24 | }
25 | commStr := getCommId(a, b)
26 | denied, exists := tp.rpcPolicy[commStr]
27 | return exists && denied
28 | }
29 |
30 | func getCommId(a, b NodeAddr) string {
31 | return fmt.Sprintf("%v_%v", a.Id, b.Id)
32 | }
33 |
34 | func (tp *TestingPolicy) RegisterPolicy(a, b NodeAddr, allowed bool) {
35 | commStr := getCommId(a, b)
36 | tp.rpcPolicy[commStr] = allowed
37 | }
38 |
39 | func (tp *TestingPolicy) PauseWorld(on bool) {
40 | tp.pauseWorld = on
41 | }
--------------------------------------------------------------------------------
/chord/chord/kv_store_test.go:
--------------------------------------------------------------------------------
1 | package chord
2 |
3 | import (
4 | "testing"
5 | "strconv"
6 | "math/rand"
7 | )
8 |
9 | func TestRemotePutAndGetBundleRandom(t *testing.T) {
10 | nNodes := 10
11 | numRange := 100
12 | base := make(map[int]int64, numRange)
13 | result := make(map[int]int64, numRange)
14 | nodes, _ := CreateNNodesRandom(nNodes)
15 |
16 | for i := 0; i < numRange; i++ {
17 | base[i] = int64(i * i)
18 | //Now we randomly pick a node and put the value in it
19 | nodeIndex := rand.Intn(9)
20 | Put(nodes[nodeIndex], strconv.Itoa(i), strconv.Itoa(i*i))
21 | }
22 |
23 | for i := 0; i < numRange; i++ {
24 | nodeIndex := rand.Intn(9)
25 | val, _ := Get(nodes[nodeIndex], strconv.Itoa(i))
26 | result[i], _ = strconv.ParseInt(val, 10, 32)
27 | }
28 |
29 | equal := true
30 | for i := 0; i < numRange; i++ {
31 | if result[i] != base[i] {
32 | equal = false
33 | }
34 | }
35 | if !equal {
36 | t.Errorf("TestRemotePutAndGetBundleRandom: result")
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/oceanstore/oceanstore/client.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import "fmt"
4 |
5 | const MAX_RETRIES = 10
6 |
7 | type Client struct {
8 | LocalAddr string
9 | Id uint64
10 | OceanServ OceanAddr
11 | }
12 |
13 | func CreateClient(remoteAddr OceanAddr) (cp *Client, err error) {
14 | fmt.Println("Oceanstore Create client")
15 | cp = new(Client)
16 |
17 | request := ConnectRequest{}
18 | var reply *ConnectReply
19 |
20 | retries := 0
21 | for retries < MAX_RETRIES {
22 | reply, err = ConnectRPC(&remoteAddr, request)
23 | if err == nil || err.Error() != "EOF" {
24 | break
25 | }
26 | retries++
27 | }
28 | if err != nil {
29 | fmt.Println(err)
30 | if err.Error() == "EOF" {
31 | err = fmt.Errorf("Could not access the ocean server.")
32 | }
33 | return
34 | }
35 |
36 | if !reply.Ok {
37 | fmt.Errorf("Could not register Client.")
38 | }
39 |
40 | fmt.Println("Create client reply:", reply, err)
41 | cp.Id = reply.Id
42 | cp.OceanServ = remoteAddr
43 |
44 | return
45 | }
46 |
--------------------------------------------------------------------------------
/oceanstore/oceanstore/ocean_rpc_impl.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "net"
5 | "net/rpc"
6 | "fmt"
7 | )
8 |
9 | type OceanRPCServer struct {
10 | node *OceanNode
11 | listener net.Listener
12 | rpc *rpc.Server
13 | }
14 |
15 | func newOceanstoreRPCServer(ocean *OceanNode) (server *OceanRPCServer) {
16 | server = new(OceanRPCServer)
17 | server.node = ocean
18 | server.rpc = rpc.NewServer()
19 | listener, _, err := OpenListener()
20 | server.rpc.RegisterName(listener.Addr().String(), server)
21 | server.listener = listener
22 |
23 | if err != nil {
24 | panic("AA")
25 | }
26 |
27 | go func() {
28 | for {
29 | conn, err := server.listener.Accept()
30 | if err != nil {
31 | fmt.Printf("(%v) Raft RPC server accept error: %v\n", err)
32 | continue
33 | }
34 | go server.rpc.ServeConn(conn)
35 | }
36 | }()
37 |
38 | return
39 | }
40 |
41 | func (server *OceanRPCServer) ConnectImpl(req *ConnectRequest, rep *ConnectReply) error {
42 | rvreply, err := server.node.connect(req)
43 | *rep = rvreply
44 | return err
45 | }
46 |
--------------------------------------------------------------------------------
/raft/raft/listener.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "net"
5 | "math/rand"
6 | "time"
7 | "os"
8 | "fmt"
9 | "syscall"
10 | )
11 |
12 | // ephemeral port range
13 | const LOW_PORT int = 32768
14 | const HIGH_PORT int = 61000
15 |
16 | func OpenListener() (net.Listener, int, error) {
17 | rand.Seed(time.Now().UTC().UnixNano())
18 | port := rand.Intn(HIGH_PORT - LOW_PORT) + LOW_PORT
19 | conn, err := OpenPort(port)
20 | if err != nil {
21 | if addrInUse(err) {
22 | time.Sleep(time.Millisecond * 100)
23 | return OpenListener()
24 | } else {
25 | return nil, 0, err //TODO check if I should use -1 for invalid port
26 | }
27 | }
28 | return conn, port, err
29 | }
30 |
31 | func addrInUse(err error) bool {
32 | if opErr, ok := err.(*net.OpError); ok {
33 | if osErr, ok := opErr.Err.(*os.SyscallError); ok {
34 | return osErr.Err == syscall.EADDRINUSE
35 | }
36 | }
37 | return false
38 | }
39 |
40 | func OpenPort(port int) (net.Listener, error) {
41 | hostname, err := os.Hostname()
42 | if err != nil {
43 | return nil, err
44 | }
45 | addr := fmt.Sprintf("%v:%v", hostname, port)
46 | conn, err := net.Listen("tcp4", addr)
47 | return conn, err
48 | }
49 |
--------------------------------------------------------------------------------
/oceanstore/oceanstore/listener.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "net"
5 | "math/rand"
6 | "time"
7 | "os"
8 | "fmt"
9 | "syscall"
10 | )
11 |
12 | // ephemeral port range
13 | const LOW_PORT int = 32768
14 | const HIGH_PORT int = 61000
15 |
16 | func OpenListener() (net.Listener, int, error) {
17 | rand.Seed(time.Now().UTC().UnixNano())
18 | port := rand.Intn(HIGH_PORT - LOW_PORT) + LOW_PORT
19 | conn, err := OpenPort(port)
20 | if err != nil {
21 | if addrInUse(err) {
22 | time.Sleep(time.Millisecond * 100)
23 | return OpenListener()
24 | } else {
25 | return nil, 0, err //TODO check if I should use -1 for invalid port
26 | }
27 | }
28 | return conn, port, err
29 | }
30 |
31 | func addrInUse(err error) bool {
32 | if opErr, ok := err.(*net.OpError); ok {
33 | if osErr, ok := opErr.Err.(*os.SyscallError); ok {
34 | return osErr.Err == syscall.EADDRINUSE
35 | }
36 | }
37 | return false
38 | }
39 |
40 | func OpenPort(port int) (net.Listener, error) {
41 | hostname, err := os.Hostname()
42 | if err != nil {
43 | return nil, err
44 | }
45 | addr := fmt.Sprintf("%v:%v", hostname, port)
46 | conn, err := net.Listen("tcp4", addr)
47 | return conn, err
48 | }
--------------------------------------------------------------------------------
/chord/utils/listener.go:
--------------------------------------------------------------------------------
1 | /* Purpose: Library code to help create a TCP-based listening socket. */
2 |
3 | package utils
4 |
5 | import (
6 | "fmt"
7 | "math/rand"
8 | "net"
9 | "os"
10 | "syscall"
11 | "time"
12 | )
13 |
14 | // Ephemeral port range
15 | const LOW_PORT int = 32768
16 | const HIGH_PORT int = 61000
17 |
18 | // Errno to support windows machines
19 | const WIN_EADDRINUSE = syscall.Errno(10048)
20 |
21 | // Listens on a random port in the defined ephemeral range, retries if port is already in use
22 | func OpenListener() (net.Listener, int, error) {
23 | rand.Seed(time.Now().UTC().UnixNano())
24 | port := rand.Intn(HIGH_PORT-LOW_PORT) + LOW_PORT
25 | hostname, err := os.Hostname()
26 | if err != nil {
27 | return nil, -1, err
28 | }
29 |
30 | addr := fmt.Sprintf("%v:%v", hostname, port)
31 | conn, err := net.Listen("tcp4", addr)
32 | if err != nil {
33 | if addrInUse(err) {
34 | time.Sleep(100 * time.Millisecond)
35 | return OpenListener()
36 | } else {
37 | return nil, -1, err
38 | }
39 | }
40 | return conn, port, err
41 | }
42 |
43 | func addrInUse(err error) bool {
44 | if opErr, ok := err.(*net.OpError); ok {
45 | if osErr, ok := opErr.Err.(*os.SyscallError); ok {
46 | return osErr.Err == syscall.EADDRINUSE || osErr.Err == WIN_EADDRINUSE
47 | }
48 | }
49 | return false
50 | }
--------------------------------------------------------------------------------
/chord/chord/finger_test.go:
--------------------------------------------------------------------------------
1 | package chord
2 |
3 | import (
4 | "math"
5 | "testing"
6 | "time"
7 | "fmt"
8 | )
9 |
10 | func TestInitFingerTable(t *testing.T) {
11 | var res, expected []byte
12 | m := int(math.Pow(2, KEY_LENGTH))
13 | for i := 0; i < m; i++ {
14 | node, _ := CreateDefinedNode(nil, []byte{byte(i)})
15 | for j := 0; j < KEY_LENGTH; j++ {
16 | res = node.FingerTable[j].Start
17 | expected = []byte{byte((i + int(math.Pow(float64(2), float64(j)))) % m)}
18 | if !EqualIds(res, expected) {
19 | t.Errorf("[%v] BAD ENTRY %v: %v != %v", i, j, res, expected)
20 | }
21 | }
22 | }
23 | }
24 |
25 | /*
26 | Makes 26 nodes, waits a few seconds, and checks that every entry points to its next multiple of 10 from "Start"
27 | (ex: Start = 178 would should always point to 180)
28 | */
29 | func TestFixNextFinger(t *testing.T) {
30 | nodes, _ := CreateNNodes(26)
31 | time.Sleep(time.Second * 5)
32 | for i := 0; i < 26; i++ {
33 | node := nodes[i]
34 | for j := 0; j < KEY_LENGTH; j++ {
35 | start := node.FingerTable[j].Start
36 | pointer := node.FingerTable[j].Node
37 | var expected []byte
38 | if start[0]%10 == 0 {
39 | expected = []byte{byte(start[0])}
40 | } else {
41 | expected = []byte{byte(((start[0]/10 + 1) % 26) * 10)}
42 | }
43 |
44 | if !EqualIds(pointer.Id, expected) {
45 | fmt.Printf("[%v] Error at\nStart: %v, Node: %v, expected: %v",
46 | node.Id, start, pointer.Id, expected)
47 | }
48 | }
49 | }
50 | }
--------------------------------------------------------------------------------
/oceanstore/oceanstore/guid.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "../../raft/raft"
5 | "fmt"
6 | )
7 |
8 | func (ocean *OceanNode) getRaftVguid(aguid Aguid, id uint64) (Vguid, error) {
9 | // Get the raft client struct
10 | c, ok := ocean.clients[id]
11 | if !ok {
12 | panic("Attempted to get client from id, but not found.")
13 | }
14 |
15 | res, err := c.SendRequestWithResponse(raft.GET, []byte(aguid))
16 | if err != nil {
17 | return "", err
18 | }
19 | if res.Status != raft.OK {
20 | return "", fmt.Errorf("Could not get response from raft.")
21 | }
22 |
23 | return Vguid(res.Response), nil
24 | }
25 |
26 | func (ocean *OceanNode) setRaftVguid(aguid Aguid, vguid Vguid, id uint64) error {
27 | // Get the raft client struct
28 | c, ok := ocean.clients[id]
29 | if !ok {
30 | panic("Attempted to get client from id, but not found.")
31 | }
32 |
33 | data := fmt.Sprintf("%v:%v", aguid, vguid)
34 |
35 | res, err := c.SendRequestWithResponse(raft.SET, []byte(data))
36 | if err != nil {
37 | return err
38 | }
39 | if res.Status != raft.OK {
40 | return fmt.Errorf("Could not get response from raft.")
41 | }
42 | return nil
43 | }
44 |
45 | func (ocean *OceanNode) removeRaftVguid(aguid Aguid, id uint64) error {
46 | // Get the raft client struct
47 | c, ok := ocean.clients[id]
48 | if !ok {
49 | panic("Attempted to get client from id, but not found.")
50 | }
51 |
52 | res, err := c.SendRequestWithResponse(raft.REMOVE, []byte(aguid))
53 | if err != nil {
54 | return err
55 | }
56 | if res.Status != raft.OK {
57 | return fmt.Errorf("Could not get response from raft.")
58 | }
59 |
60 | return nil
61 | }
62 |
--------------------------------------------------------------------------------
/oceanstore/oceanstore/guid_test.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "testing"
5 | "strings"
6 | "time"
7 | "fmt"
8 | )
9 |
10 | func TestRaftMap(t *testing.T) {
11 | ocean, err := Start()
12 | if err != nil {
13 | t.Errorf("Could not init puddlestore: %v", err)
14 | return
15 | }
16 | time.Sleep(time.Millisecond * 1000)
17 |
18 | fmt.Println(ocean.Local)
19 | client := ocean.raftClient
20 |
21 | err = ocean.setRaftVguid("DEAD", "BEEF", client.Id)
22 | if err != nil {
23 | t.Errorf("Could set raft vguid: %v", err)
24 | return
25 | }
26 |
27 | response, err := ocean.getRaftVguid("DEAD", client.Id)
28 | if err != nil {
29 | t.Errorf("Could get raft vguid: %v", err)
30 | return
31 | }
32 |
33 | ok := strings.Split(string(response), ":")[0]
34 | aguid := strings.Split(string(response), ":")[1]
35 |
36 | if ok != "SUCCESS" {
37 | t.Errorf("Could not get raft vguid: %v", response)
38 | }
39 |
40 | if aguid != "BEEF" {
41 | t.Errorf("Raft didn't return the correct vguid. BEEF != %d", aguid)
42 | }
43 |
44 | // Reset aguid to another vguid
45 | err = ocean.setRaftVguid("DEAD", "B004", client.Id)
46 | if err != nil {
47 | t.Errorf("Could set raft vguid: %v", err)
48 | return
49 | }
50 |
51 | response, err = ocean.getRaftVguid("DEAD", client.Id)
52 | if err != nil {
53 | t.Errorf("Could get raft vguid: %v", err)
54 | return
55 | }
56 |
57 | ok = strings.Split(string(response), ":")[0]
58 | aguid = strings.Split(string(response), ":")[1]
59 |
60 | if ok != "SUCCESS" {
61 | t.Errorf("Could not get raft vguid: %v", response)
62 | }
63 |
64 | if aguid != "B004" {
65 | t.Errorf("Raft didn't return the correct vguid. B004 != %d", aguid)
66 | }
67 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Oceanstore
2 |
3 | Oceanstore is a distributed file system. Its design is motivated from OceanStore: An Architecture for Global-Scale Persistent Storage paper.
4 |
5 | # Usage Example
6 | WIP
7 |
8 | # File System - Abstractions
9 | The two primitive file system objects are files and directories. A file is a single collection of sequential bytes and directories provide a way to hierarchically organize files.
10 | * Data block represents fixed length array of bytes. Files consist of a number of data blocks.
11 | * Indirect block stores references to ordered list of data blocks that make up a file.
12 | * Inode maintains the metadata associated with a file. Inode of a file points to direct blocks or indirect blocks. Inode of a directory points to inode of files or other directories.
13 |
14 | # File System Operations
15 |
16 | ## Lookup
17 | Find the inode of the root. Traverse the directories/files in its indirect block to find the first directory/file in the path. Repeat the search until we reach end of path.
18 |
19 | ## Reading and Writing
20 | To write or read from file, we need:
21 | * Location This tells the starting location in the file for reading or writing.
22 | * Buffer While reading, contents of the file are put in buffer and while writing, contents of the buffer are put into the file.
23 |
24 | If we pass the end of the file while writing then we need to add new data blocks and add their refernces in the indirect block. We also choose the block size. Given the starting location and number of bytes to read or write, it is easy to find the relevant blocks with start position in the first block and end position in the last block.
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/raft/README.md:
--------------------------------------------------------------------------------
1 | # Raft
2 |
3 | Raft is a consensus protocol. Its design is motivated from In Search of an understandable consensus algorithm paper. I would highly recommend this visualization for understanding raft.
4 |
5 | # Usage Example
6 | Cli-node serves as a console for interacting with raft, creating nodes and querying state on the local nodes. It provides the following commands:
7 | * Debug
8 | * Recv
9 | * Send
10 | * Disable
11 | * Enable
12 | * state
13 | * Exit
14 |
15 | Testing-policy simulates different network splits to ensure correct behaviour under partitions.
16 |
17 | # State Machine
18 | Softwares that make use of raft work by interpreting the entries in a log as input to a state machine. In this project, the [state machine](https://github.com/sattiwari/oceanstore/blob/master/raft/raft/machine.go) calculates the next step of a hash chain. Cli-client supports interaction with state machine. It provides following commands:
19 | * Init (value) sends an initial value for hashing to the replicated state machine
20 | * Hash instructs the state machine to perform another round of hashing
21 |
22 | # Elections
23 | Leader election consists of a raft cluster deciding which of the nodes in the cluster should be the leader for a given term. Raft_states contain the logic for raft node being in one of the three states: FOLLOWER, CANDIDATE, LEADER.
24 |
25 | # Log Replication
26 | Log replication consists of making sure that the raft state machine is up to date across a majority of nodes in the cluster. It is based on AppendEntries (heartbeat), periodically initiated by the leader.
27 |
28 | # Client Interaction
29 | Client sends request to the cluster and get replies with the results once the corresponding log entries have been committed and fed to the state machine. If the raft node a client connects to is not the leader node, the node returns a hint to the leader node.
30 |
--------------------------------------------------------------------------------
/raft/raft/raftRPCImpl.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "fmt"
5 | "net/rpc"
6 | )
7 |
8 | type RaftRPCServer struct {
9 | node *RaftNode
10 | }
11 |
12 | func (server *RaftRPCServer) startRpcServer() {
13 | for {
14 | if server.node.IsShutDown {
15 | fmt.Printf("(%v) Shutting down RPC server\n", server.node.Id)
16 | return
17 | }
18 | conn, err := server.node.Listener.Accept()
19 | if err != nil {
20 | if !server.node.IsShutDown {
21 | fmt.Printf("(%v) Raft RPC server accept error: %v\n", server.node.Id, err)
22 | }
23 | continue
24 | }
25 | if !server.node.IsShutDown {
26 | go rpc.ServeConn(conn)
27 | } else {
28 | conn.Close()
29 | }
30 | }
31 | }
32 |
33 | func (server *RaftRPCServer) JoinImpl(req *JoinRequest, reply *JoinReply) error {
34 | err := server.node.Join(req)
35 | reply.Success = err == nil
36 | return err
37 | }
38 |
39 | func (server *RaftRPCServer) StartNodeImpl(req *StartNodeRequest, reply *StartNodeReply) error {
40 | err := server.node.StartNode(req)
41 | reply.Success = err == nil
42 | return err
43 | }
44 |
45 | func (server *RaftRPCServer) RequestVoteImpl(req *RequestVoteRequest, reply *RequestVoteReply) error {
46 | if server.node.Testing.IsDenied(req.CandidateId, *server.node.GetLocalAddr()) {
47 | return ErrorTestingPolicyDenied
48 | }
49 | rvreply, err := server.node.RequestVote(req)
50 | *reply = rvreply
51 | return err
52 | }
53 |
54 | func (server *RaftRPCServer) ClientRequestImpl(req *ClientRequest, reply *ClientReply) error {
55 | rvreply, err := server.node.ClientRequest(req)
56 | *reply = rvreply
57 | return err
58 | }
59 |
60 | func (server *RaftRPCServer) RegisterClientImpl(req *RegisterClientRequest, reply *RegisterClientReply) error {
61 | rvreply, err := server.node.RegisterClient(req)
62 | *reply = rvreply
63 | return err
64 | }
65 |
66 | func (server *RaftRPCServer) AppendEntriesImpl(req *AppendEntriesRequest, reply *AppendEntriesReply) error {
67 | if server.node.Testing.IsDenied(req.LeaderId, *server.node.GetLocalAddr()) {
68 | return ErrorTestingPolicyDenied
69 | }
70 | aereply, err := server.node.AppendEntries(req)
71 | *reply = aereply
72 | return err
73 | }
--------------------------------------------------------------------------------
/raft/raft/clientRPCApi.go:
--------------------------------------------------------------------------------
1 | package raft
2 | //
3 | //type ClientStatus int
4 | //
5 | //const (
6 | // OK ClientStatus = iota
7 | // NOT_LEADER
8 | // ELECTION_IN_PROGRESS
9 | // REQ_FAILED
10 | //)
11 | //
12 | //type FsmCommand int
13 | //
14 | //const (
15 | // HASH_CHAIN_ADD FsmCommand = iota
16 | // HASH_CHAIN_INIT
17 | // CLIENT_REGISTRATION
18 | // INIT
19 | // NOOP
20 | //)
21 | //
22 | //type ClientRequest struct {
23 | // //unique id associated with client session. Recevied via previous register client call
24 | // ClientId uint64
25 | //
26 | // //avoids duplicates
27 | // SequenceNumber uint64
28 | //
29 | // //Command to be executed by state machine. It may affect state
30 | // Command FsmCommand
31 | //
32 | // //Data to accompany the command to state machine
33 | // Data []byte
34 | //}
35 | //
36 | //type ClientReply struct {
37 | // //OK if the state machine successfully applied command
38 | // Status ClientStatus
39 | //
40 | // //state machine response
41 | // Response string
42 | //
43 | // //a non leader node should reply the correct leader
44 | // LeaderHint NodeAddr
45 | //}
46 | //
47 | //func ClientRequestRPC(remoteNode *NodeAddr, request ClientRequest) (*ClientReply, error) {
48 | // var reply ClientReply
49 | // err := makeRemoteCall(remoteNode, "ClientRequestImpl", request, &reply)
50 | // if err != nil {
51 | // return nil, err
52 | // }
53 | // return &reply, nil
54 | //}
55 | //
56 | //type RegisterClientRequest struct {
57 | // // The client address invoking request
58 | // FromNode NodeAddr
59 | //}
60 | //
61 | //type RegisterClientReply struct {
62 | // //ok if the state machine registered client
63 | // Status ClientStatus
64 | //
65 | //// unique id for the client session
66 | // ClientId uint64
67 | //
68 | // // if the node contacted is not leader, it tells the correct leader
69 | // LeaderHint NodeAddr
70 | //}
71 | //
72 | //func RegisterClientRPC(remoteNode *NodeAddr, request RegisterClientRequest) (*RegisterClientReply, error) {
73 | // var reply RegisterClientReply
74 | // err := makeRemoteCall(remoteNode, "RegisterClientImpl", request, &reply)
75 | // if err != nil {
76 | // return nil, err
77 | // }
78 | // return &reply, nil
79 | //}
80 | //
81 |
--------------------------------------------------------------------------------
/chord/chord/util_test.go:
--------------------------------------------------------------------------------
1 | package chord
2 |
3 | import (
4 | "bytes"
5 | "testing"
6 | )
7 |
8 | func TestHashKey(t *testing.T) {
9 | key := HashKey("Im a string")
10 | sameKey := HashKey("Im a string")
11 |
12 | if !bytes.Equal(key, sameKey) {
13 | t.Errorf("Hash keys made by the same string are not equal.")
14 | }
15 |
16 | differentKey := HashKey("Im another string, totally different.")
17 | if bytes.Equal(key, differentKey) {
18 | t.Errorf("Hash keys made by the different strings are equal.")
19 | }
20 | }
21 |
22 | func TestBetweenSimple(t *testing.T) {
23 | A := []byte{10}
24 | B := []byte{15}
25 | C := []byte{20}
26 |
27 | // B is between A and C...
28 | if !Between(B, A, C) {
29 | t.Errorf("Between does not return true when it should. %v < %v < %v",
30 | A[0], B[0], C[0])
31 | }
32 | // ...but it shouldn't be between C and A
33 | if Between(B, C, A) {
34 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v",
35 | C[0], B[0], A[0])
36 | }
37 | // Between shouldn't be right inclusive.
38 | if Between(B, A, B) {
39 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v",
40 | A[0], B[0], B[0])
41 | }
42 |
43 | if !Between(A, C, B) {
44 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v",
45 | C[0], A[0], B[0])
46 | }
47 | if Between(A, B, C) {
48 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v",
49 | B[0], A[0], C[0])
50 | }
51 | }
52 |
53 | func TestBetweenEdge(t *testing.T) {
54 | A := []byte{230}
55 | B := []byte{15}
56 | C := []byte{80}
57 |
58 | // B is between A and C...
59 | if !Between(B, A, C) {
60 | t.Errorf("Between does not return true when it should. %v < %v < %v",
61 | A[0], B[0], C[0])
62 | }
63 | // ...but it shouldn't be between C and A
64 | if Between(B, C, A) {
65 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v",
66 | A[0], B[0], C[0])
67 | }
68 | }
69 |
70 | func TestBetweenEdge2(t *testing.T) {
71 | A := []byte{20}
72 | B := []byte{90}
73 | C := []byte{0}
74 |
75 | // B is between A and C...
76 | if !Between(B, A, C) {
77 | t.Errorf("Between does not return true when it should. %v < %v < %v",
78 | A[0], B[0], C[0])
79 | }
80 | }
81 |
82 |
--------------------------------------------------------------------------------
/tapestry/tapestry/blobstore.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import "fmt"
4 |
5 | /*
6 | This is a utility class tacked on to the tapestry DOLR.
7 | */
8 | type BlobStore struct {
9 | blobs map[string]Blob
10 | }
11 |
12 | type Blob struct {
13 | bytes []byte
14 | done chan bool
15 | }
16 |
17 | type BlobStoreRPC struct {
18 | store *BlobStore
19 | }
20 |
21 | /*
22 | Create a new blobstore
23 | */
24 | func NewBlobStore() *BlobStore {
25 | bs := new(BlobStore)
26 | bs.blobs = make(map[string]Blob)
27 | return bs
28 | }
29 |
30 | /*
31 | For RPC server registration
32 | */
33 | func NewBlobStoreRPC(store *BlobStore) *BlobStoreRPC {
34 | rpc := new(BlobStoreRPC)
35 | rpc.store = store
36 | return rpc
37 | }
38 |
39 | /*
40 | Remove all blobs and unregister them all
41 | */
42 | func (bs *BlobStore) DeleteAll() {
43 | // unregister every blob
44 | for _, blob := range bs.blobs {
45 | blob.done <- true
46 | }
47 | // clear the map
48 | bs.blobs = make(map[string]Blob)
49 | }
50 |
51 | /*
52 | Remove the blob and unregister it
53 | */
54 | func (bs *BlobStore) Delete(key string) bool {
55 | // If a previous blob exists, unregister it
56 | previous, exists := bs.blobs[key]
57 | if exists {
58 | previous.done <- true
59 | }
60 | delete(bs.blobs, key)
61 | return exists
62 | }
63 |
64 | /*
65 | Store bytes in the blobstore
66 | */
67 | func (bs *BlobStore) Put(key string, blob []byte, unregister chan bool) {
68 | // If a previous blob exists, delete it
69 | bs.Delete(key)
70 |
71 | // Register the new one
72 | bs.blobs[key] = Blob{blob, unregister}
73 | }
74 |
75 | /*
76 | Get bytes from the blobstore
77 | */
78 | func (bs *BlobStore) Get(key string) ([]byte, bool) {
79 | blob, exists := bs.blobs[key]
80 | if exists {
81 | return blob.bytes, true
82 | } else {
83 | return nil, false
84 | }
85 | }
86 |
87 | /*
88 | Fetches the specified blob from the remote node
89 | */
90 | func FetchRemoteBlob(remote Node, key string) (blob *[]byte, err error) {
91 | fmt.Printf("FetchRemoteBlob %v %v", key, remote)
92 | err = makeRemoteCall(remote.Address, "BlobStoreRPC", "Fetch", key, &blob)
93 | return
94 | }
95 |
96 | /*
97 | Invoked over RPC to fetch bytes from the blobstore
98 | */
99 | func (rpc *BlobStoreRPC) Fetch(key string, blob *[]byte) error {
100 | b, exists := rpc.store.blobs[key]
101 | if exists {
102 | *blob = b.bytes
103 | }
104 | return nil
105 | }
--------------------------------------------------------------------------------
/chord/chord/finger.go:
--------------------------------------------------------------------------------
1 | /* Purpose: Finger table related functions for a given Chord node. */
2 |
3 | package chord
4 |
5 | import (
6 | "time"
7 | "math/big"
8 | "log"
9 | "fmt"
10 | )
11 |
12 | /* A single finger table entry */
13 | type FingerEntry struct {
14 | Start []byte /* ID hash of (n + 2^i) mod (2^m) */
15 | Node *RemoteNode /* RemoteNode that Start points to */
16 | }
17 |
18 | /* Create initial finger table that only points to itself, will be fixed later */
19 | func (node *Node) initFingerTable() {
20 | // Create an array of FingerEntries of length KEY_LENGTH
21 | node.FingerTable = make([]FingerEntry, KEY_LENGTH)
22 |
23 | for i := range node.FingerTable {
24 | // FingerEntry pointing to node
25 | newEntry := new(FingerEntry)
26 | newEntry.Start = fingerMath(node.Id, i, KEY_LENGTH)
27 | newEntry.Node = node.RemoteSelf
28 | node.FingerTable[i] = *newEntry
29 | }
30 | node.Successor = node.RemoteSelf
31 | }
32 |
33 | /* Called periodically (in a seperate go routine) to fix entries in our finger table. */
34 | func (node *Node) fixNextFinger(ticker *time.Ticker) {
35 | for _ = range ticker.C {
36 | for _ = range ticker.C {
37 | next_hash := fingerMath(node.Id, node.next, KEY_LENGTH)
38 | successor, err := node.findSuccessor(next_hash)
39 | if err != nil {
40 | log.Fatal(err)
41 | }
42 | node.ftLock.Lock()
43 | node.FingerTable[node.next].Node = successor
44 | node.ftLock.Unlock()
45 | node.next += 1
46 | if node.next >= KEY_LENGTH {
47 | node.next = 1
48 | }
49 | }
50 | }
51 | }
52 |
53 | /* (n + 2^i) mod (2^m) */
54 | func fingerMath(n []byte, i int, m int) []byte {
55 | two := &big.Int{}
56 | two.SetInt64(2)
57 |
58 | N := &big.Int{}
59 | N.SetBytes(n)
60 |
61 | // 2^i
62 | I := &big.Int{}
63 | I.SetInt64(int64(i))
64 | I.Exp(two, I, nil)
65 |
66 | // 2^m
67 | M := &big.Int{}
68 | M.SetInt64(int64(m))
69 | M.Exp(two, M, nil)
70 |
71 | result := &big.Int{}
72 | result.Add(N, I)
73 | result.Mod(result, M)
74 |
75 | // Big int gives an empty array if value is 0.
76 | // Here is a way for us to still return a 0 byte
77 | zero := &big.Int{}
78 | zero.SetInt64(0)
79 | if result.Cmp(zero) == 0 {
80 | return []byte{0}
81 | }
82 |
83 | return result.Bytes()
84 | }
85 |
86 | /* Print contents of a node's finger table */
87 | func PrintFingerTable(node *Node) {
88 | fmt.Printf("[%v] FingerTable:\n", HashStr(node.Id))
89 | for _, val := range node.FingerTable {
90 | fmt.Printf("\t{start:%v\tnodeLoc:%v %v}\n",
91 | HashStr(val.Start), HashStr(val.Node.Id), val.Node.Addr)
92 | }
93 | }
--------------------------------------------------------------------------------
/oceanstore/oceanstore/util.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "strconv"
5 | "strings"
6 | "../../tapestry/tapestry"
7 | "math/rand"
8 | )
9 |
10 | func removeExcessSlashes(path string) string {
11 | var firstNonSlash, lastNonSlash, start int
12 |
13 | onlySlashes := true
14 | str := path
15 |
16 | length := len(path)
17 |
18 | // Nothing to do
19 | if path[0] != '/' && path[length-1] != '/' {
20 | return str
21 | }
22 |
23 | // Get the first non slash
24 | for i := 0; i < length; i++ {
25 | if str[i] != '/' {
26 | onlySlashes = false
27 | firstNonSlash = i
28 | break
29 | }
30 | }
31 |
32 | // Get the last non slash
33 | for i := length - 1; i >= 0; i-- {
34 | if str[i] != '/' {
35 | lastNonSlash = i
36 | break
37 | }
38 | }
39 |
40 | // Guaranteed to be the root path
41 | if onlySlashes {
42 | str = "/"
43 | return str
44 | } else {
45 | length = lastNonSlash - firstNonSlash + 1
46 | if str[0] == '/' {
47 | start = firstNonSlash - 1
48 | length++
49 | } else {
50 | start = 0
51 | }
52 |
53 | str = path[start : start+length]
54 | }
55 |
56 | length = len(str)
57 | for i := 0; i < length; i++ {
58 | if i+1 == length {
59 | break
60 | }
61 |
62 | if str[i] == '/' && str[i+1] == '/' {
63 | str = str[:i] + str[i+1:]
64 | length -= 1
65 | i -= 1
66 | }
67 | }
68 |
69 | return str
70 | }
71 |
72 | func hashToGuid(id tapestry.ID) Guid {
73 | s := ""
74 | for i := 0; i < tapestry.DIGITS; i++ {
75 | s += strconv.FormatUint(uint64(byte(id[i])), tapestry.BASE)
76 | }
77 | return Guid(strings.ToUpper(s))
78 | }
79 |
80 | func (ocean *OceanNode) getRandomTapestryNode() tapestry.Node {
81 | index := rand.Int() % TAPESTRY_NODES
82 | return ocean.tnodes[index].GetLocalNode()
83 | }
84 |
85 | // Puts the contents of the ID inside the given byte
86 | // Starting at 'start' position
87 | func IdIntoByte(bytes []byte, id *tapestry.ID, start int) {
88 | for i := 0; i < tapestry.DIGITS; i++ {
89 | bytes[start+i] = byte(id[i])
90 | }
91 | }
92 |
93 | // Helper function used in 'ls'
94 | func makeString(elements [FILES_PER_INODE + 2]string) string {
95 | ret := ""
96 | for _, s := range elements {
97 | if s == "" {
98 | break
99 | }
100 | ret += "\t" + s
101 | }
102 | return ret
103 | }
104 |
105 | func AguidIntoByte(bytes []byte, aguid Aguid, start uint32) {
106 | for i := uint32(0); i < tapestry.DIGITS; i++ {
107 | bytes[start+i] = byte(aguid[i])
108 | }
109 | }
110 |
111 | func MakeZeros(bytes []byte, start uint32) {
112 | for i := uint32(0); i < tapestry.DIGITS; i++ {
113 | bytes[start+i] = 0
114 | }
115 | }
--------------------------------------------------------------------------------
/chord/cli.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "./chord"
5 | "bufio"
6 | "flag"
7 | "fmt"
8 | "log"
9 | "math/big"
10 | "os"
11 | "strings"
12 | )
13 |
14 | func NodeStr(node *chord.Node) string {
15 | var succ []byte
16 | var pred []byte
17 | if node.Successor != nil {
18 | succ = node.Successor.Id
19 | }
20 | if node.Predecessor != nil {
21 | pred = node.Predecessor.Id
22 | }
23 |
24 | return fmt.Sprintf("Node-%v: {succ:%v, pred:%v}", node.Id, succ, pred)
25 | }
26 |
27 | func main() {
28 | countPtr := flag.Int("count", 5, "Total number of Chord nodes to start up in this process")
29 | addrPtr := flag.String("addr", "", "Address of a node in the Chord ring you wish to join")
30 | idPtr := flag.String("id", "", "ID of a node in the Chord ring you wish to join")
31 | flag.Parse()
32 |
33 | var parent *chord.RemoteNode
34 | if *addrPtr == "" {
35 | parent = nil
36 | } else {
37 | parent = new(chord.RemoteNode)
38 | val := big.NewInt(0)
39 | val.SetString(*idPtr, 10)
40 | parent.Id = val.Bytes()
41 | parent.Addr = *addrPtr
42 | fmt.Printf("Attach this node to id:%v, addr:%v\n", parent.Id, parent.Addr)
43 | }
44 |
45 | var err error
46 | nodes := make([]*chord.Node, *countPtr)
47 | for i, _ := range nodes {
48 | nodes[i], err = chord.CreateNode(parent)
49 | if err != nil {
50 | fmt.Println("Unable to create new node!")
51 | log.Fatal(err)
52 | }
53 | if parent == nil {
54 | parent = nodes[i].RemoteSelf
55 | }
56 | fmt.Printf("Created -id %v -addr %v\n", chord.HashStr(nodes[i].Id), nodes[i].Addr)
57 | }
58 |
59 | for {
60 | fmt.Printf("quit|node|table|addr|data|get|put > ")
61 | reader := bufio.NewReader(os.Stdin)
62 | line, _ := reader.ReadString('\n')
63 | line = strings.TrimSpace(line)
64 | args := strings.SplitN(line, " ", 3)
65 |
66 | switch args[0] {
67 | case "node":
68 | for _, node := range nodes {
69 | fmt.Println(NodeStr(node))
70 | }
71 | case "table":
72 | for _, node := range nodes {
73 | chord.PrintFingerTable(node)
74 | }
75 | case "addr":
76 | for _, node := range nodes {
77 | fmt.Println(node.Addr)
78 | }
79 | case "data":
80 | for _, node := range nodes {
81 | chord.PrintDataStore(node)
82 | }
83 | case "get":
84 | if len(args) > 1 {
85 | val, err := chord.Get(nodes[0], args[1])
86 | if err != nil {
87 | fmt.Println(err)
88 | } else {
89 | fmt.Println(val)
90 | }
91 | }
92 | case "put":
93 | if len(args) > 2 {
94 | err := chord.Put(nodes[0], args[1], args[2])
95 | if err != nil {
96 | fmt.Println(err)
97 | }
98 | }
99 | case "quit":
100 | fmt.Println("goodbye")
101 | for _, node := range nodes {
102 | chord.ShutdownNode(node)
103 | }
104 | return
105 | default:
106 | continue
107 | }
108 | }
109 | }
--------------------------------------------------------------------------------
/chord/chord/util.go:
--------------------------------------------------------------------------------
1 | /* Purpose: Utility functions to help with dealing with ID hashes in Chord. */
2 |
3 | package chord
4 |
5 | import (
6 | "crypto/sha1"
7 | "math/big"
8 | "bytes"
9 | )
10 |
11 | /* Hash a string to its appropriate size */
12 | func HashKey(key string) []byte {
13 | h := sha1.New()
14 | h.Write([]byte(key))
15 | v := h.Sum(nil)
16 | return v[:KEY_LENGTH/8]
17 | }
18 |
19 | /* Convert a []byte to a big.Int string, useful for debugging/logging */
20 | func HashStr(keyHash []byte) string {
21 | keyInt := big.Int{}
22 | keyInt.SetBytes(keyHash)
23 | return keyInt.String()
24 | }
25 |
26 | //On the Chord ring, X is between (A : B)
27 | func Between(nodeX, nodeA, nodeB []byte) bool {
28 |
29 | xInt := big.Int{}
30 | xInt.SetBytes(nodeX)
31 |
32 | aInt := big.Int{}
33 | aInt.SetBytes(nodeA)
34 |
35 | bInt := big.Int{}
36 | bInt.SetBytes(nodeB)
37 |
38 | var result bool
39 | if aInt.Cmp(&bInt) == 0 {
40 | result = false
41 | } else if aInt.Cmp(&bInt) < 0 {
42 | result = (xInt.Cmp(&aInt) == 1 && xInt.Cmp(&bInt) == -1)
43 | } else {
44 | result = !(xInt.Cmp(&bInt) == 1 && xInt.Cmp(&aInt) == -1)
45 | }
46 |
47 | return result
48 | }
49 |
50 | func EqualIds(a, b []byte) bool {
51 | return bytes.Equal(a, b)
52 | }
53 |
54 | func CreateNNodes(n int) ([]*Node, error) {
55 | if n == 0 {
56 | return nil, nil
57 | }
58 | nodes := make([]*Node, n)
59 |
60 | id := []byte{byte(0)}
61 | curr, err := CreateDefinedNode(nil, id)
62 | nodes[0] = curr
63 | if err != nil {
64 | return nil, err
65 | }
66 |
67 | for i := 1; i < n; i++ {
68 | id := []byte{byte(i * 10)}
69 | curr, err := CreateDefinedNode(nodes[0].RemoteSelf, id)
70 | nodes[i] = curr
71 | if err != nil {
72 | return nil, err
73 | }
74 | }
75 |
76 | return nodes, nil
77 | }
78 |
79 | /* Is X between (A : B] */
80 | func BetweenRightIncl(nodeX, nodeA, nodeB []byte) bool {
81 |
82 | xInt := big.Int{}
83 | xInt.SetBytes(nodeX)
84 |
85 | aInt := big.Int{}
86 | aInt.SetBytes(nodeA)
87 |
88 | bInt := big.Int{}
89 | bInt.SetBytes(nodeB)
90 |
91 | var result bool
92 | if aInt.Cmp(&bInt) == 0 {
93 | result = true
94 | } else if aInt.Cmp(&bInt) < 0 {
95 | result = (xInt.Cmp(&aInt) == 1 && xInt.Cmp(&bInt) <= 0)
96 | } else {
97 | result = !(xInt.Cmp(&bInt) == 1 && xInt.Cmp(&aInt) <= 0)
98 | }
99 |
100 | return result
101 | }
102 |
103 | func CreateNNodesRandom(n int) ([]*Node, error) {
104 | if n == 0 {
105 | return nil, nil
106 | }
107 | nodes := make([]*Node, n)
108 |
109 | curr, err := CreateNode(nil)
110 | nodes[0] = curr
111 | if err != nil {
112 | return nil, err
113 | }
114 |
115 | for i := 1; i < n; i++ {
116 | curr, err := CreateNode(nodes[0].RemoteSelf)
117 | nodes[i] = curr
118 | if err != nil {
119 | return nil, err
120 | }
121 | }
122 |
123 | return nodes, nil
124 | }
--------------------------------------------------------------------------------
/raft/raft/raftLocalImpl.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "fmt"
5 | )
6 |
7 | func (r *RaftNode) Join(req *JoinRequest) error {
8 | r.mutex.Lock()
9 | defer r.mutex.Unlock()
10 |
11 | if len(r.GetOtherNodes()) == r.conf.ClusterSize {
12 | for _, otherNode := range r.GetOtherNodes() {
13 | if otherNode.Id == req.FromAddr.Id {
14 | StartNodeRPC(otherNode, r.GetOtherNodes())
15 | return nil
16 | }
17 | }
18 | r.Error("Warning! Unrecognized node tried to join after all other nodes have joined.\n")
19 | return fmt.Errorf("All nodes have already joined this Raft cluster\n")
20 | } else {
21 | r.AppendOtherNodes(req.FromAddr)
22 | }
23 | return nil
24 | }
25 |
26 | func (r *RaftNode) StartNode(req *StartNodeRequest) error {
27 | r.mutex.Lock()
28 | defer r.mutex.Unlock()
29 |
30 | r.SetOtherNodes(req.OtherNodes)
31 | r.printOtherNodes("StartNode")
32 |
33 | // Start the Raft finite-state-machine, initially in follower state
34 | go r.run()
35 |
36 | return nil
37 | }
38 |
39 | type RequestVoteMsg struct {
40 | request *RequestVoteRequest
41 | reply chan RequestVoteReply
42 | }
43 |
44 | func (r *RaftNode) RequestVote(req *RequestVoteRequest) (RequestVoteReply, error) {
45 | r.Out("RequestVote request received\n")
46 | reply := make(chan RequestVoteReply)
47 | r.requestVote <- RequestVoteMsg{req, reply}
48 | return <-reply, nil
49 | }
50 |
51 | type AppendEntriesMsg struct {
52 | request *AppendEntriesRequest
53 | reply chan AppendEntriesReply
54 | }
55 |
56 | func (r *RaftNode) AppendEntries(req *AppendEntriesRequest) (AppendEntriesReply, error) {
57 | r.Debug("AppendEntries request received\n")
58 | reply := make(chan AppendEntriesReply)
59 | r.appendEntries <- AppendEntriesMsg{req, reply}
60 | return <-reply, nil
61 | }
62 |
63 | type ClientRequestMsg struct {
64 | request *ClientRequest
65 | reply chan ClientReply
66 | }
67 |
68 | func (r *RaftNode) ClientRequest(req *ClientRequest) (ClientReply, error) {
69 | r.Debug("ClientRequest request received\n")
70 | reply := make(chan ClientReply)
71 | cr, exists := r.CheckRequestCache(*req)
72 | if exists {
73 | return *cr, nil
74 | } else {
75 | r.clientRequest <- ClientRequestMsg{req, reply}
76 | return <-reply, nil
77 | }
78 | }
79 |
80 | type RegisterClientMsg struct {
81 | request *RegisterClientRequest
82 | reply chan RegisterClientReply
83 | }
84 |
85 | func (r *RaftNode) RegisterClient(req *RegisterClientRequest) (RegisterClientReply, error) {
86 | r.Debug("ClientRequest request received\n")
87 | reply := make(chan RegisterClientReply)
88 | r.registerClient <- RegisterClientMsg{req, reply}
89 | return <-reply, nil
90 | }
91 |
92 | func (r *RaftNode) printOtherNodes(ctx string) {
93 | otherStr := fmt.Sprintf("%v (%v) r.OtherNodes = [", ctx, r.Id)
94 | for _, otherNode := range r.GetOtherNodes() {
95 | otherStr += fmt.Sprintf("%v,", otherNode.Id)
96 | }
97 | Out.Printf(otherStr[:len(otherStr)-1] + "]\n")
98 | }
99 |
--------------------------------------------------------------------------------
/chord/README.md:
--------------------------------------------------------------------------------
1 | # Chord
2 |
3 | Chord is a distributed hash table (DHT) protocol. Its design is motivated from Chord: A Scalable Peer-to-peer Lookup Service for Internet Applications paper. Chord distributes objects over a dynamic network of nodes, and implements a protocol for finding these objects once they have been placed in the network.
4 |
5 | # Usage Example
6 | [cli](cli.go) serves as a console for interacting with chord, creating nodes and querying state on the local nodes. It provides the following commands:
7 | * node display node ID, successor, and predecessor
8 | * table display finger table information for node(s)
9 | * addr display node listener address(es)
10 | * data display datastore(s) for node(s)
11 | * get get value from Chord ring associated with this key
12 | * put put key/value into Chord ring
13 | * quit quit node(s)
14 |
15 | # Keys
16 | The hashed value of the key takes the form of an m-bit unsigned integer. Thus, the keyspace for the DHT resides between 0 and 2m - 1, inclusive. Current implementation uses SHA-1 for hashing.
17 |
18 | # The Ring
19 | Each node in the system also has a hash value (hash of its name - ip address + port). Chord orders the nodes in a circular fashion, in which each node’s successor is the node with the next highest hash.
20 |
21 | # Overlay Network (Finger Table)
22 | To locate the node at which a particular key-value pair is stored, we need to find the successor to the hash value of the key. Linear search would be very slow on large network of nodes so chord uses an overlay network. It maintains a finger table at each node. The number of entries in the finger table is equal to m, where m is the number of bits representing a hash in the keyspace of the DHT (e.g., 128). Entry i in the table, with 0 <= i < m, is the node which the owner of the table believes is the successor for the hash h + 2i (h is the current node’s hash).
23 |
24 | # Lookup in Chord
25 | When node A services a request to find the successor of the key k, it first determines whether its own successor is the owner of k (the successor is simply entry 0 in the finger table). If it is, then A returns its successor in response to the request. Otherwise, node A finds node B in its finger table such that B has the largest hash smaller than the hash of k, and forwards the request to B.
26 |
27 | # Dynamics
28 | Chord supports the dynamic addition and removal of nodes from the network. Each node calls [stabilize](chord/node_local_impl.go#L32) and [fixNextFinger](chord/finger.go#L34) functions periodically to determine the successor and predecessor relationship between nodes as they are added to the network.
29 |
30 | # Future Work
31 | Support fault tolerance by maintaining a list of successors. This would need the keys to be replicated across a number of nodes.
32 |
--------------------------------------------------------------------------------
/raft/raft/logging.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "fmt"
5 | "io/ioutil"
6 | "log"
7 | "os"
8 | )
9 |
10 | var Debug *log.Logger
11 | var Out *log.Logger
12 | var Error *log.Logger
13 |
14 | // Initialize the loggers
15 | func init() {
16 | Debug = log.New(ioutil.Discard, "", log.Ltime|log.Lshortfile)
17 | Out = log.New(os.Stdout, "", log.Ltime|log.Lshortfile)
18 | Error = log.New(os.Stdout, "ERROR: ", log.Ltime|log.Lshortfile)
19 | }
20 |
21 | // Turn debug on or off
22 | func SetDebug(enabled bool) {
23 | if enabled {
24 | Debug = log.New(os.Stdout, "", log.Ldate|log.Ltime|log.Lshortfile)
25 | } else {
26 | Debug = log.New(ioutil.Discard, "", log.Ldate|log.Ltime|log.Lshortfile)
27 | }
28 | }
29 |
30 | func (r *RaftNode) Out(formatString string, args ...interface{}) {
31 | Out.Output(2, fmt.Sprintf("(%v/%v) %v", r.Id, r.State, fmt.Sprintf(formatString, args...)))
32 | }
33 |
34 | func (r *RaftNode) Debug(formatString string, args ...interface{}) {
35 | Debug.Output(2, fmt.Sprintf("(%v/%v) %v", r.Id, r.State, fmt.Sprintf(formatString, args...)))
36 | }
37 |
38 | func (r *RaftNode) Error(formatString string, args ...interface{}) {
39 | Error.Output(2, fmt.Sprintf("(%v/%v) %v", r.Id, r.State, fmt.Sprintf(formatString, args...)))
40 | }
41 |
42 | func (s NodeState) String() string {
43 | switch s {
44 | case FOLLOWER_STATE:
45 | return "follower"
46 | case CANDIDATE_STATE:
47 | return "candidate"
48 | case LEADER_STATE:
49 | return "leader"
50 | case JOIN_STATE:
51 | return "joining"
52 | default:
53 | return "unknown"
54 | }
55 | }
56 |
57 | func FsmCommandString(cmd FsmCommand) string {
58 | switch cmd {
59 | case HASH_CHAIN_ADD:
60 | return "hash-chain-add"
61 | case HASH_CHAIN_INIT:
62 | return "hash-chain-init"
63 | case CLIENT_REGISTRATION:
64 | return "client-registration"
65 | case INIT:
66 | return "init"
67 | case NOOP:
68 | return "noop"
69 | default:
70 | return "unknown"
71 | }
72 | }
73 |
74 | func (r *RaftNode) ShowState() {
75 | fmt.Printf("Current node state:\n")
76 | for i, otherNode := range r.GetOtherNodes() {
77 | fmt.Printf("%v - %v", i, otherNode)
78 | local := *r.GetLocalAddr()
79 |
80 | if local == otherNode {
81 | fmt.Printf(" (local node)")
82 | }
83 | if r.LeaderAddress != nil &&
84 | otherNode == *r.LeaderAddress {
85 | fmt.Printf(" (leader node)")
86 | }
87 | fmt.Printf("\n")
88 |
89 | }
90 | fmt.Printf("Current term: %v\n", r.GetCurrentTerm())
91 | fmt.Printf("Current state: %v\n", r.State)
92 | fmt.Printf("Current commit index: %v\n", r.commitIndex)
93 | fmt.Printf("Current next index: %v\n", r.nextIndex)
94 | fmt.Printf("Current match index: %v\n", r.matchIndex)
95 | fmt.Printf("Current fileMap: %v\n", r.fileMap)
96 | }
97 |
98 | func (r *RaftNode) PrintLogCache() {
99 | fmt.Printf("Node %v LogCache:\n", r.Id)
100 | for _, entry := range r.logCache {
101 | fmt.Printf(" idx:%v, term:%v\n", entry.Index, entry.TermId)
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/oceanstore/oceanstore/oceanstore.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "../../raft/raft"
5 | "../../tapestry/tapestry"
6 | "math/rand"
7 | )
8 |
9 | const TAPESTRY_NODES = 3
10 | const RAFT_NODES = 1
11 |
12 | type OceanAddr struct {
13 | Addr string
14 | }
15 |
16 | type Vguid string
17 | type Aguid string
18 | type Guid string
19 |
20 | type OceanNode struct {
21 | tnodes []*tapestry.Tapestry
22 | rnodes []*raft.RaftNode
23 | rootV uint32
24 | clientPaths map[uint64]string // client id -> curpath
25 | clients map[uint64]*raft.Client // client id -> client
26 |
27 | Local OceanAddr
28 | raftClient *raft.Client
29 | server *OceanRPCServer
30 | }
31 |
32 | func Start() (p *OceanNode, err error) {
33 | var ocean OceanNode
34 | p = &ocean
35 | ocean.tnodes = make([]*tapestry.Tapestry, TAPESTRY_NODES)
36 | ocean.rnodes = make([]*raft.RaftNode, RAFT_NODES)
37 | ocean.clientPaths = make(map[uint64]string)
38 | ocean.clients = make(map[uint64]*raft.Client)
39 |
40 | // Start runnning the tapestry nodes. --------------
41 | t, err := tapestry.Start(0, "")
42 | if err != nil {
43 | panic(err)
44 | }
45 |
46 | ocean.tnodes[0] = t
47 | for i := 1; i < TAPESTRY_NODES; i++ {
48 | t, err = tapestry.Start(0, ocean.tnodes[0].GetLocalAddr())
49 | if err != nil {
50 | panic(err)
51 | }
52 | ocean.tnodes[i] = t
53 | }
54 |
55 | ocean.rnodes, err = raft.CreateLocalCluster(raft.DefaultConfig())
56 | if err != nil {
57 | panic(err)
58 | }
59 |
60 | // RPC server --------------------------------------
61 | ocean.server = newOceanstoreRPCServer(p)
62 | ocean.Local = OceanAddr{ocean.server.listener.Addr().String()}
63 | // -------------------------------------------------
64 |
65 | // Create ocean raft client. Persist until raft is settled
66 | client, err := CreateClient(ocean.Local)
67 | for err != nil {
68 | client, err = CreateClient(ocean.Local)
69 | }
70 |
71 | ocean.raftClient = ocean.clients[client.Id]
72 | if ocean.raftClient == nil {
73 | panic("Could not retrieve ocean raft client.")
74 | }
75 |
76 | // Create the root node ----------------------------
77 | _, err = ocean.mkdir(&MkdirRequest{ocean.raftClient.Id, "/"})
78 | if err != nil {
79 | panic("Could not create root node")
80 | }
81 |
82 | return
83 | }
84 |
85 | func (ocean *OceanNode) getCurrentDir(id uint64) string {
86 | curdir, ok := ocean.clientPaths[id]
87 | if !ok {
88 | panic("Did not found the current path of a client that is supposed to be registered")
89 | }
90 | return curdir
91 | }
92 |
93 | func randSeq(n int) string {
94 | var letters = []rune("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
95 | b := make([]rune, n)
96 | for i := range b {
97 | b[i] = letters[rand.Intn(len(letters))]
98 | }
99 | return string(b)
100 | }
101 |
102 | func (puddle *OceanNode) getRandomRaftNode() *raft.RaftNode {
103 | index := rand.Int() % RAFT_NODES
104 | return puddle.rnodes[index]
105 | }
--------------------------------------------------------------------------------
/chord/chord/node_rpc_impl.go:
--------------------------------------------------------------------------------
1 | package chord
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "errors"
7 | )
8 |
9 | /* Validate that we're executing this RPC on the intended node */
10 | func validateRpc(node *Node, reqId []byte) error {
11 | if !bytes.Equal(node.Id, reqId) {
12 | errStr := fmt.Sprintf("Node ids do not match %v, %v", node.Id, reqId)
13 | return errors.New(errStr)
14 | }
15 | return nil
16 | }
17 |
18 | func (node *Node) GetPredecessorId(req *RemoteId, reply *IdReply) error {
19 | if err := validateRpc(node, req.Id); err != nil {
20 | return err
21 | }
22 | // Predecessor may be nil, which is okay.
23 | if node.Predecessor == nil {
24 | reply.Id = nil
25 | reply.Addr = ""
26 | reply.Valid = false
27 | } else {
28 | reply.Id = node.Predecessor.Id
29 | reply.Addr = node.Predecessor.Addr
30 | reply.Valid = true
31 | }
32 | return nil
33 | }
34 |
35 | func (node *Node) SetPredecessorId(req *UpdateReq, reply *RpcOkay) error {
36 | if err := validateRpc(node, req.FromId); err != nil {
37 | return err
38 | }
39 | node.Predecessor.Id = req.UpdateId
40 | node.Predecessor.Addr = req.UpdateAddr
41 | reply.Ok = true
42 | return nil
43 | }
44 |
45 | func (node *Node) GetSuccessorId(req *RemoteId, reply *IdReply) error {
46 | if err := validateRpc(node, req.Id); err != nil {
47 | return err
48 | }
49 |
50 | reply.Id = node.Successor.Id
51 | reply.Addr = node.Successor.Addr
52 | reply.Valid = true
53 | return nil
54 | }
55 |
56 | func (node *Node) SetSuccessorId(req *UpdateReq, reply *RpcOkay) error {
57 | if err := validateRpc(node, req.FromId); err != nil {
58 | return err
59 | }
60 | node.Successor.Id = req.UpdateId
61 | node.Successor.Addr = req.UpdateAddr
62 | reply.Ok = true
63 | return nil
64 | }
65 |
66 | func (node *Node) FindSuccessor(query *RemoteQuery, reply *IdReply) error {
67 | if err := validateRpc(node, query.FromId); err != nil {
68 | return err
69 | }
70 | remNode, err := node.findSuccessor(query.Id)
71 | if err != nil {
72 | reply.Valid = false
73 | return err
74 | }
75 | reply.Id = remNode.Id
76 | reply.Addr = remNode.Addr
77 | reply.Valid = true
78 | return nil
79 | }
80 |
81 | func (node *Node) ClosestPrecedingFinger(query *RemoteQuery, reply *IdReply) error {
82 | if err := validateRpc(node, query.FromId); err != nil {
83 | return err
84 | }
85 | //remoteId and fromId
86 | for i := KEY_LENGTH - 1; i >= 0; i-- {
87 | if BetweenRightIncl(node.FingerTable[i].Node.Id, node.Id, query.Id) {
88 | reply.Id = node.FingerTable[i].Node.Id
89 | reply.Addr = node.FingerTable[i].Node.Addr
90 | reply.Valid = true
91 | return nil
92 | }
93 | }
94 |
95 | reply.Valid = false
96 | return errors.New("There is no closest preceding finger")
97 | }
98 |
99 | func (node *Node) Notify(req *NotifyReq, reply *RpcOkay) error {
100 | if err := validateRpc(node, req.NodeId); err != nil {
101 | reply.Ok = false
102 | return err
103 | }
104 | remote_node := new(RemoteNode)
105 | remote_node.Id = req.UpdateId
106 | remote_node.Addr = req.UpdateAddr
107 | node.notify(remote_node)
108 | reply.Ok = true
109 | return nil
110 | }
111 |
--------------------------------------------------------------------------------
/raft/raft/machine.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "crypto/md5"
5 | "fmt"
6 | "strings"
7 | )
8 |
9 | func (r *RaftNode) processLog(entry LogEntry) ClientReply {
10 | Out.Printf("%v\n", entry)
11 | status := OK
12 | response := ""
13 | switch entry.Command {
14 | case HASH_CHAIN_INIT:
15 | if r.hash == nil {
16 | r.hash = entry.Data
17 | response = fmt.Sprintf("%v", r.hash)
18 | } else {
19 | status = REQ_FAILED
20 | response = "The hash chain should only be initialized once!"
21 | }
22 | case HASH_CHAIN_ADD:
23 | if r.hash == nil {
24 | status = REQ_FAILED
25 | response = "The hash chain hasn't been initialized yet"
26 | } else {
27 | sum := md5.Sum(r.hash)
28 | fmt.Printf("hash is changing from %v to %v\n", r.hash, sum)
29 | r.hash = sum[:]
30 | response = fmt.Sprintf("%v", r.hash)
31 | }
32 | // For each of the following idk what to do with the hash chain
33 | //TODO: Do the byte[] and string casting for entry.Data
34 | case REMOVE:
35 | //So by now we have received consensus, we need to delete
36 | r.requestMutex.Lock()
37 | key := string(entry.Data)
38 | if entry.Data == nil {
39 | response = "FAIL:The key cannot be nil"
40 | } else if val, ok := r.fileMap[key]; ok {
41 | delete(r.fileMap, key)
42 | response = "SUCCESS:" + val
43 | } else {
44 | response = "FAIL:The key does not exist"
45 | }
46 | r.requestMutex.Unlock()
47 | case SET:
48 | r.requestMutex.Lock()
49 | if entry.Data == nil {
50 | response = "FAIL:The key cannot be nil"
51 | } else {
52 | keyVal := string(entry.Data)
53 | keyValAr := strings.Split(keyVal, ":")
54 | r.fileMap[keyValAr[0]] = keyValAr[1]
55 | response = "SUCCESS:" + keyValAr[1]
56 | }
57 | r.requestMutex.Unlock()
58 |
59 | case LOCK:
60 | r.lockMapMtx.Lock()
61 | key := string(entry.Data)
62 | if entry.Data == nil {
63 | response = "FAIL:The key cannot be nil"
64 | } else if _, ok := r.lockMap[key]; ok {
65 | //means its locked --
66 | response = "FAIL:The key is locked is locked"
67 | } else {
68 | //means its unlocked, so we lock
69 | r.lockMap[key] = true
70 | response = "SUCCESS:Key " + key + "is now locked"
71 | }
72 | r.lockMapMtx.Unlock()
73 |
74 | case UNLOCK:
75 | r.lockMapMtx.Lock()
76 | key := string(entry.Data)
77 | if entry.Data == nil {
78 | response = "FAIL:The key cannot be nil"
79 | } else {
80 | //We dont care and we unlock its for the user not to unlock something of
81 | //someone else
82 | delete(r.lockMap, key)
83 | response = "SUCCESS:Key " + key + "is now unlocked"
84 | }
85 | r.lockMapMtx.Unlock()
86 |
87 | default:
88 | response = "Success!"
89 | }
90 |
91 | reply := ClientReply{
92 | Status: status,
93 | Response: response,
94 | LeaderHint: *r.GetLocalAddr(),
95 | }
96 |
97 | if entry.CacheId != "" {
98 | r.AddRequest(entry.CacheId, reply)
99 | }
100 |
101 | r.requestMutex.Lock()
102 | msg, exists := r.requestMap[entry.Index]
103 | if exists {
104 | msg.reply <- reply
105 | delete(r.requestMap, entry.Index)
106 | }
107 | r.requestMutex.Unlock()
108 |
109 | return reply
110 | }
111 |
--------------------------------------------------------------------------------
/tapestry/tapestry/tapestry-client.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import "fmt"
4 |
5 | // Invoke tapestry.Store on a remote tapestry node
6 | func TapestryStore(remote Node, key string, value []byte) error {
7 | fmt.Printf("Making remote TapestryStore call\n")
8 | return makeRemoteNodeCall(remote, "TapestryStore", StoreRequest{remote, key, value}, &StoreResponse{})
9 | }
10 |
11 | // Invoke tapestry.Lookup on a remote tapestry node
12 | func TapestryLookup(remote Node, key string) (nodes []Node, err error) {
13 | fmt.Printf("Making remote TapestryLookup call\n")
14 | var rsp LookupResponse
15 | err = makeRemoteNodeCall(remote, "TapestryLookup", LookupRequest{remote, key}, &rsp)
16 | nodes = rsp.Nodes
17 | return
18 | }
19 |
20 | // Get data from a tapestry node. Looks up key then fetches directly
21 | func TapestryGet(remote Node, key string) ([]byte, error) {
22 | fmt.Printf("Making remote TapestryGet call\n")
23 | // Lookup the key
24 | replicas, err := TapestryLookup(remote, key)
25 | if err != nil {
26 | return nil, err
27 | }
28 | if len(replicas) == 0 {
29 | return nil, fmt.Errorf("No replicas returned for key %v", key)
30 | }
31 |
32 | // Contact replicas
33 | var errs []error
34 | for _, replica := range replicas {
35 | blob, err := FetchRemoteBlob(replica, key)
36 | if err != nil {
37 | errs = append(errs, err)
38 | }
39 | if blob != nil {
40 | return *blob, nil
41 | }
42 | }
43 |
44 | return nil, fmt.Errorf("Error contacting replicas, %v: %v", replicas, errs)
45 | }
46 |
47 | func TapestryRemove(remote Node, key string) (success bool, err error) {
48 | fmt.Printf("Making remote TapestryRemove call\n")
49 | var rsp RemoveResponse
50 | err = makeRemoteNodeCall(remote, "TapestryRemove", RemoveRequest{remote, key}, &rsp)
51 | success = rsp.Removed
52 | return
53 | }
54 |
55 | type StoreRequest struct {
56 | To Node
57 | Key string
58 | Value []byte
59 | }
60 |
61 | type StoreResponse struct {
62 | }
63 |
64 | type LookupRequest struct {
65 | To Node
66 | Key string
67 | }
68 |
69 | type LookupResponse struct {
70 | Nodes []Node
71 | }
72 |
73 | type RemoveRequest struct {
74 | To Node
75 | Key string
76 | }
77 |
78 | type RemoveResponse struct {
79 | Removed bool
80 | }
81 |
82 | // Server: extension method to open up Store via RPC
83 | func (server *TapestryRPCServer) TapestryStore(req StoreRequest, rsp *StoreResponse) (err error) {
84 | fmt.Printf("Received remote invocation of Tapestry.Store\n")
85 | return server.tapestry.Store(req.Key, req.Value)
86 | }
87 |
88 | // Server: extension method to open up Lookup via RPC
89 | func (server *TapestryRPCServer) TapestryLookup(req LookupRequest, rsp *LookupResponse) (err error) {
90 | fmt.Printf("Received remote invocation of Tapestry.Lookup\n")
91 | rsp.Nodes, err = server.tapestry.Lookup(req.Key)
92 | return
93 | }
94 |
95 | // Server: extension method to open up Remove via RPC
96 | func (server *TapestryRPCServer) TapestryRemove(req RemoveRequest, rsp *RemoveResponse) (err error) {
97 | fmt.Printf("Received remote invocation of Tapestry.Remove\n")
98 | rsp.Removed = server.tapestry.Remove(req.Key)
99 | return
100 | }
--------------------------------------------------------------------------------
/raft/raft/testUtils.go:
--------------------------------------------------------------------------------
1 | package raft
2 | //
3 | //import (
4 | // "fmt"
5 | // "time"
6 | // "math/rand"
7 | //)
8 | //
9 | ////TODO - move away the code to find majority element
10 | //func getLeader(nodes []*RaftNode) *RaftNode {
11 | // it := 1
12 | // var leader *RaftNode = nil
13 | // for leader == nil && it < 50 {
14 | // fmt.Printf("iteration %v\n", it)
15 | // time.Sleep(time.Millisecond * 200)
16 | // clusterSize := nodes[0].conf.ClusterSize
17 | // idCountMap := make(map[string]int, clusterSize)
18 | // for _, n := range nodes {
19 | // if n.LeaderAddress != nil {
20 | // idCountMap[n.LeaderAddress.Id]++
21 | // }
22 | // }
23 | // fmt.Printf("node id to count map %v\n\n", idCountMap)
24 | // var id string
25 | // max := -1
26 | // for k,v := range idCountMap {
27 | // if max < v {
28 | // max = v
29 | // id = k
30 | // }
31 | // }
32 | // if max > clusterSize / 2 {
33 | // for _,node := range nodes {
34 | // if node.LeaderAddress.Id == id {
35 | // return node
36 | // }
37 | // }
38 | // }
39 | // it++
40 | // }
41 | // return leader
42 | //}
43 | //
44 | //func randSeq(n int) string {
45 | // var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
46 | // b := make([]rune, n)
47 | // for i := range b {
48 | // b[i] = letters[rand.Intn(len(letters))]
49 | // }
50 | // return string(b)
51 | //}
52 | //
53 | //func checkNodes(nodes []*RaftNode, clusterSize int) bool {
54 | // for _, n := range nodes {
55 | // if len(n.GetOtherNodes()) != clusterSize {
56 | // Error.Println(len(n.GetOtherNodes()), clusterSize)
57 | // return false
58 | // }
59 | // }
60 | // return true
61 | //}
62 | //
63 | //func printNodes(nodes []*RaftNode) {
64 | // for _, n := range nodes {
65 | // n.PrintLogCache()
66 | // n.ShowState()
67 | // }
68 | //}
69 | //
70 | //func checkMajorityTerms(nodes []*RaftNode) bool {
71 | // sums := make(map[uint64]int, nodes[0].conf.ClusterSize)
72 | // for _, n := range nodes {
73 | // sums[n.GetCurrentTerm()]++
74 | // }
75 | // max := -1
76 | // for _, v := range sums {
77 | // if v > max {
78 | // max = v
79 | // }
80 | // }
81 | //
82 | // if max > len(nodes)/2 {
83 | // return true
84 | // }
85 | // return false
86 | //}
87 | //
88 | //func checkMajorityCommitIndex(nodes []*RaftNode) bool {
89 | // sums := make(map[uint64]int, nodes[0].conf.ClusterSize)
90 | // for _, n := range nodes {
91 | // sums[n.commitIndex]++
92 | // }
93 | // max := -1
94 | // for _, v := range sums {
95 | // if v > max {
96 | // max = v
97 | // }
98 | // }
99 | //
100 | // if max > len(nodes)/2 {
101 | // return true
102 | // }
103 | // return false
104 | //}
105 | //
106 | //func checkLogOrder(nodes []*RaftNode) bool {
107 | // for _, n := range nodes {
108 | // prevIndex := int64(-1)
109 | // prevTerm := int64(-1)
110 | // seen := make(map[uint64]bool)
111 | // for _, entry := range n.logCache {
112 | // if seen[entry.Index] || int64(entry.Index)-1 != prevIndex || int64(entry.Term) < prevTerm {
113 | // return false
114 | // }
115 | //
116 | // seen[entry.Index] = true
117 | // prevIndex = int64(entry.Index)
118 | // prevTerm = int64(entry.Term)
119 | // }
120 | // }
121 | // return true
122 | //}
123 | //
124 | //func shutdownNodes(nodes []*RaftNode) {
125 | // for _, n := range nodes {
126 | // n.IsShutDown = true
127 | // n.gracefulExit <- true
128 | // }
129 | // time.Sleep(time.Millisecond * 200)
130 | //}
--------------------------------------------------------------------------------
/chord/chord/kv_store.go:
--------------------------------------------------------------------------------
1 | package chord
2 |
3 | import (
4 | "log"
5 | "fmt"
6 | )
7 |
8 | /* Get a value in the datastore, provided an abitrary node in the ring */
9 | func Get(node *Node, key string) (string, error) {
10 | remNode, err := node.locate(key)
11 | if err != nil {
12 | log.Fatal(err)
13 | }
14 | return Get_RPC(remNode, key)
15 | }
16 |
17 | /* Put a key/value in the datastore, provided an abitrary node in the ring */
18 | func Put(node *Node, key string, value string) error {
19 | remNode, err := node.locate(key)
20 | if err != nil {
21 | log.Fatal(err)
22 | }
23 | return Put_RPC(remNode, key, value)
24 | }
25 |
26 | /* Internal helper method to find the appropriate node in the ring */
27 | func (node *Node) locate(key string) (*RemoteNode, error) {
28 | id := HashKey(key)
29 | return node.findSuccessor(id)
30 | }
31 |
32 | /* Print the contents of a node's data store */
33 | func PrintDataStore(node *Node) {
34 | fmt.Printf("Node-%v datastore: %v\n", HashStr(node.Id), node.dataStore)
35 | }
36 |
37 | func (node *Node) GetLocal(req *KeyValueReq, reply *KeyValueReply) error {
38 | if err := validateRpc(node, req.NodeId); err != nil {
39 | return err
40 | }
41 | (&node.dsLock).RLock()
42 | key := req.Key
43 | val := node.dataStore[key]
44 | reply.Key = key
45 | reply.Value = val
46 | (&node.dsLock).RUnlock()
47 | return nil
48 | }
49 |
50 | func (node *Node) PutLocal(req *KeyValueReq, reply *KeyValueReply) error {
51 | if err := validateRpc(node, req.NodeId); err != nil {
52 | return err
53 | }
54 | (&node.dsLock).Lock()
55 | key := req.Key
56 | val := req.Value
57 | node.dataStore[key] = val
58 | reply.Key = key
59 | reply.Value = val
60 | (&node.dsLock).Unlock()
61 | return nil
62 | }
63 |
64 | /* When we discover a new predecessor we may need to transfer some keys to it */
65 | /*Oh I think I get it, this one is to send
66 | This was eliminated by the TAs because of its redundancy */
67 | func (node *Node) obtainNewKeys() error {
68 | //lock the local db and get the keys
69 | (&node.dsLock).Lock()
70 | for key, val := range node.dataStore {
71 | keyByte := HashKey(key)
72 | if !BetweenRightIncl(keyByte, node.Predecessor.Id, node.Id) {
73 | //means we send it to the predecessor
74 | err := Put_RPC(node.Predecessor, key, val)
75 | if err != nil {
76 | (&node.dsLock).Unlock()
77 | return err
78 | }
79 | //then we delete it locally
80 | delete(node.dataStore, key)
81 | }
82 | }
83 | //unlock the db
84 | (&node.dsLock).Unlock()
85 | return nil
86 | }
87 |
88 | /* Find locally stored keys that are between (predId : fromId], any of
89 | these nodes should be moved to fromId */
90 | func (node *Node) TransferKeys(req *TransferReq, reply *RpcOkay) error {
91 | if err := validateRpc(node, req.NodeId); err != nil {
92 | return err
93 | }
94 | (&node.dsLock).Lock()
95 | for key, val := range node.dataStore {
96 | keyByte := HashKey(key)
97 | pred := req.PredId
98 | if pred == nil {
99 | pred = node.Id
100 | }
101 | if BetweenRightIncl(keyByte, pred, req.FromId) {
102 | //means we send it to the requester, because it belongs to them
103 | err := Put_RPC(node.Predecessor, key, val)
104 | if err != nil {
105 | (&node.dsLock).Unlock()
106 | reply.Ok = false
107 | return err
108 | }
109 | //then we delete it locally
110 | delete(node.dataStore, key)
111 | }
112 | }
113 | (&node.dsLock).Unlock()
114 | reply.Ok = true
115 | return nil
116 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/backpointers.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "sync"
5 | )
6 |
7 | /*
8 | Backpointers are stored by level, like the routing table
9 | A backpointer at level n indicates that the backpointer shares a prefix of length n with this node
10 | Access to the backpointers is managed by a lock
11 | */
12 | type Backpointers struct {
13 | local Node // the local tapestry node
14 | sets [DIGITS]*NodeSet // backpointers
15 | }
16 |
17 | /*
18 | Represents a set of nodes. The implementation is just a wrapped map, and access is controlled with a mutex.
19 | */
20 | type NodeSet struct {
21 | mutex sync.Mutex
22 | data map[Node]bool
23 | }
24 |
25 | /*
26 | Creates and returns a new backpointer set
27 | */
28 | func NewBackpointers(me Node) *Backpointers {
29 | b := new(Backpointers)
30 | b.local = me
31 | for i := 0; i < DIGITS; i++ {
32 | b.sets[i] = NewNodeSet()
33 | }
34 | return b
35 | }
36 |
37 | /*
38 | Add a backpointer for the provided node
39 | Returns true if a new backpointer was added
40 | */
41 | func (b *Backpointers) Add(node Node) bool {
42 | if b.local != node {
43 | return b.level(node).Add(node)
44 | }
45 | return false
46 | }
47 |
48 | /*
49 | Remove a backpointer for the provided node, if it existed
50 | Returns true if the backpointer existed and was subsequently removed
51 | */
52 | func (b *Backpointers) Remove(node Node) bool {
53 | if b.local != node {
54 | return b.level(node).Remove(node)
55 | }
56 | return false
57 | }
58 |
59 | /*
60 | Get all backpointers at the provided level
61 | */
62 | func (b *Backpointers) Get(level int) []Node {
63 | return b.sets[level].Nodes()
64 | }
65 |
66 | // gets the node set for the level that the specified node should occupy
67 | func (b *Backpointers) level(node Node) *NodeSet {
68 | return b.sets[SharedPrefixLength(b.local.Id, node.Id)]
69 | }
70 |
71 | /*
72 | Create a new node set
73 | */
74 | func NewNodeSet() *NodeSet {
75 | s := new(NodeSet)
76 | s.data = make(map[Node]bool)
77 | return s
78 | }
79 |
80 | /*
81 | Add the given node to the node set if it isn't already in the set
82 | Returns true if the node was added; false if it already existed
83 | */
84 | func (s *NodeSet) Add(n Node) bool {
85 | s.mutex.Lock()
86 | _, exists := s.data[n]
87 | s.data[n] = true
88 | s.mutex.Unlock()
89 | return !exists
90 | }
91 |
92 | /*
93 | Add all of the nodes to the node set
94 | */
95 | func (s *NodeSet) AddAll(nodes []Node) {
96 | s.mutex.Lock()
97 | for _, node := range nodes {
98 | s.data[node] = true
99 | }
100 | s.mutex.Unlock()
101 | }
102 |
103 | /*
104 | Remove the given node from the node set if it's currently in the set
105 | Returns true if the node was removed; false if it was not in the set
106 | */
107 | func (s *NodeSet) Remove(n Node) bool {
108 | s.mutex.Lock()
109 | _, exists := s.data[n]
110 | delete(s.data, n)
111 | s.mutex.Unlock()
112 | return exists
113 | }
114 |
115 | /*
116 | Test whether the specified node is contained in the set
117 | */
118 | func (s *NodeSet) Contains(n Node) (b bool) {
119 | s.mutex.Lock()
120 | b = s.data[n]
121 | s.mutex.Unlock()
122 | return
123 | }
124 |
125 | /*
126 | Returns the size of the set
127 | */
128 | func (s *NodeSet) Size() int {
129 | s.mutex.Lock()
130 | size := len(s.data)
131 | s.mutex.Unlock()
132 | return size
133 | }
134 |
135 | /*
136 | Get all nodes in the set as a slice
137 | */
138 | func (s *NodeSet) Nodes() []Node {
139 | s.mutex.Lock()
140 | nodes := make([]Node, 0, len(s.data))
141 | for node := range s.data {
142 | nodes = append(nodes, node)
143 | }
144 | s.mutex.Unlock()
145 | return nodes
146 | }
--------------------------------------------------------------------------------
/chord/chord/node_local_impl.go:
--------------------------------------------------------------------------------
1 | /* Purpose: Local Chord node functions to interact with the Chord ring. */
2 |
3 | package chord
4 |
5 | import (
6 | "fmt"
7 | "time"
8 | "log"
9 | "errors"
10 | )
11 |
12 | // This node is trying to join an existing ring that a remote node is a part of (i.e., other)
13 | func (node *Node) join(other *RemoteNode) error {
14 | // Handle case of "other" being nil (first node on ring).
15 | if other == nil {
16 | return nil
17 | }
18 |
19 | node.Predecessor = nil
20 | succ, err := FindSuccessor_RPC(other, node.Id)
21 | if EqualIds(succ.Id, node.Id) {
22 | return errors.New("node already exists")
23 | }
24 | node.ftLock.Lock()
25 | node.Successor = succ
26 | node.FingerTable[0].Node = succ
27 | node.ftLock.Unlock()
28 | return err
29 | }
30 |
31 | // Thread 2: Psuedocode from figure 7 of chord paper
32 | func (node *Node) stabilize(ticker *time.Ticker) {
33 | for _ = range ticker.C {
34 | if node.IsShutdown {
35 | fmt.Printf("[%v-stabilize] Shutting down stabilize timer\n", HashStr(node.Id))
36 | ticker.Stop()
37 | return
38 | }
39 |
40 | pred, err := GetPredecessorId_RPC(node.Successor)
41 |
42 | if err != nil {
43 | log.Fatal("GetPredecessorId_RPC error: " + err.Error())
44 | }
45 |
46 | if pred != nil && BetweenRightIncl(pred.Id, node.Id, node.Successor.Id) {
47 | node.ftLock.Lock()
48 | node.Successor = pred
49 | node.FingerTable[0].Node = pred
50 | node.ftLock.Unlock()
51 | }
52 |
53 | // If you are your own successor, do not notify yourself.
54 | if !EqualIds(node.Successor.Id, node.Id) {
55 | err = Notify_RPC(node.Successor, node.RemoteSelf)
56 | if err != nil {
57 | log.Fatal("Notify_RPC error: " + err.Error())
58 | }
59 | }
60 | }
61 | }
62 |
63 | // Psuedocode from figure 7 of chord paper
64 | func (node *Node) notify(remoteNode *RemoteNode) {
65 |
66 | //TODO implement this method
67 | }
68 |
69 | // Psuedocode from figure 4 of chord paper
70 | func (node *Node) findSuccessor(id []byte) (*RemoteNode, error) {
71 | // Check if id is between me and my immediate successor.
72 | // Check if I'm my own successor.
73 | // If so, return it.
74 | if BetweenRightIncl(id, node.Id, node.Successor.Id) ||
75 | EqualIds(node.Successor.Id, node.Id) {
76 |
77 | return node.Successor, nil
78 | }
79 |
80 | n, err := node.findPredecessor(id)
81 | if err != nil {
82 | log.Fatal("findPredecessor error: " + err.Error())
83 | }
84 |
85 | return FindSuccessor_RPC(n, id)
86 |
87 | }
88 |
89 | // Psuedocode from figure 4 of chord paper
90 | func (node *Node) findPredecessor(id []byte) (*RemoteNode, error) {
91 | curr := node.RemoteSelf
92 | succ, err := GetSuccessorId_RPC(curr)
93 |
94 | // Loop while id is not beteen the current node and the calculated successor.
95 | for !Between(id, curr.Id, succ.Id) && !EqualIds(curr.Id, succ.Id) {
96 | curr, err = ClosestPrecedingFinger_RPC(curr, id)
97 | if err != nil {
98 | log.Fatal("ClosestPrecedingFinger_RPC error: " + err.Error())
99 | }
100 |
101 | succ, err = GetSuccessorId_RPC(curr)
102 | if err != nil {
103 | log.Fatal("GetSuccessorId_RPC error: " + err.Error())
104 | }
105 | }
106 | return curr, err
107 | }
108 |
109 | /* Find the closest preceding finger from a remote node for an ID */
110 | func ClosestPrecedingFinger_RPC(remoteNode *RemoteNode, id []byte) (*RemoteNode, error) {
111 | if remoteNode == nil {
112 | return nil, errors.New("RemoteNode is empty!")
113 | }
114 | var reply IdReply
115 | err := makeRemoteCall(remoteNode, "ClosestPrecedingFinger", RemoteQuery{remoteNode.Id, id}, &reply)
116 |
117 | rNode := new(RemoteNode)
118 | rNode.Id = reply.Id
119 | rNode.Addr = reply.Addr
120 | return rNode, err
121 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/objectstore.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "sync"
5 | "time"
6 | "fmt"
7 | )
8 |
9 | /*
10 | Objects advertised to the tapestry get stored in the object store of the object's root node.
11 | An object can be advertised by multiple nodes
12 | Objects time out after some amount of time if the advertising node is not heard from
13 | */
14 | type ObjectStore struct {
15 | mutex sync.Mutex // to manage concurrent access to the object store
16 | data map[string]map[Node]*time.Timer // multimap: stores multiple nodes per key, and each node has a timeout
17 | }
18 |
19 | /*
20 | Create a new objectstore
21 | */
22 | func NewObjectStore() *ObjectStore {
23 | m := new(ObjectStore)
24 | m.data = make(map[string]map[Node]*time.Timer)
25 | return m
26 | }
27 |
28 | /*
29 | Get the nodes that are advertising a given key
30 | */
31 | func (store *ObjectStore) Get(key string) (replicas []Node) {
32 | store.mutex.Lock()
33 |
34 | replicas = slice(store.data[key])
35 |
36 | store.mutex.Unlock()
37 |
38 | return
39 | }
40 |
41 | /*
42 | Removes and returns all objects that should be transferred to the remote node
43 | */
44 | func (store *ObjectStore) GetTransferRegistrations(local Node, remote Node) map[string][]Node {
45 | transfer := make(map[string][]Node)
46 | store.mutex.Lock()
47 |
48 | for key, values := range store.data {
49 | // Compare the first digit after the prefix
50 | if Hash(key).BetterChoice(remote.Id, local.Id) {
51 | transfer[key] = slice(values)
52 | }
53 | }
54 |
55 | for key, _ := range transfer {
56 | delete(store.data, key)
57 | }
58 |
59 | store.mutex.Unlock()
60 | return transfer
61 | }
62 |
63 | /*
64 | Registers the specified node as having advertised the key. Times out after the specified duration.
65 | */
66 | func (store *ObjectStore) Register(key string, replica Node, timeout time.Duration) bool {
67 | store.mutex.Lock()
68 |
69 | // Get the value set for the object
70 | _, exists := store.data[key]
71 | if !exists {
72 | store.data[key] = make(map[Node]*time.Timer)
73 | }
74 |
75 | // Add the value to the value set
76 | timer, exists := store.data[key][replica]
77 | if !exists {
78 | store.data[key][replica] = store.newTimeout(key, replica, timeout)
79 | } else {
80 | timer.Reset(TIMEOUT)
81 | }
82 |
83 | store.mutex.Unlock()
84 |
85 | return !exists
86 | }
87 |
88 | /*
89 | Registers all of the provided nodes and keys.
90 | */
91 | func (store *ObjectStore) RegisterAll(replicamap map[string][]Node, timeout time.Duration) {
92 | store.mutex.Lock()
93 |
94 | for key, replicas := range replicamap {
95 | _, exists := store.data[key]
96 | if !exists {
97 | store.data[key] = make(map[Node]*time.Timer)
98 | }
99 | for _, replica := range replicas {
100 | store.data[key][replica] = store.newTimeout(key, replica, timeout)
101 | }
102 | }
103 |
104 | store.mutex.Unlock()
105 | }
106 |
107 | /*
108 | Utility method. Creates an expiry timer for the (key, value) pair.
109 | */
110 | func (store *ObjectStore) newTimeout(key string, replica Node, timeout time.Duration) *time.Timer {
111 | expire := func() {
112 | fmt.Printf("Expiring %v for node %v\n", key, replica)
113 |
114 | store.mutex.Lock()
115 |
116 | timer, exists := store.data[key][replica]
117 | if exists {
118 | timer.Stop()
119 | delete(store.data[key], replica)
120 | if len(store.data[key]) == 0 {
121 | delete(store.data, key)
122 | }
123 | }
124 |
125 | store.mutex.Unlock()
126 | }
127 |
128 | return time.AfterFunc(timeout, expire)
129 | }
130 |
131 |
132 | // Utility function to get the keys of a map
133 | func slice(valmap map[Node]*time.Timer) (values []Node) {
134 | for value, _ := range valmap {
135 | values = append(values, value)
136 | }
137 | return
138 | }
139 |
--------------------------------------------------------------------------------
/raft/raft/client.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "time"
5 | )
6 |
7 | const MAX_RETRIES = 5
8 |
9 | type Client struct {
10 | LocalAddr *NodeAddr
11 | Id uint64
12 | Leader NodeAddr
13 | SeqNum uint64
14 | }
15 |
16 | func CreateClient(remoteAddr NodeAddr) (cp *Client, err error) {
17 | cp = new(Client)
18 |
19 | request := RegisterClientRequest{}
20 |
21 | var reply *RegisterClientReply
22 |
23 | retries := 0
24 |
25 | LOOP:
26 | for retries < MAX_RETRIES {
27 | reply, err = RegisterClientRPC(&remoteAddr, request)
28 | if err != nil {
29 | return
30 | }
31 | switch reply.Status {
32 | case OK:
33 | Out.Printf("%v is the leader. Client successfully created.\n", remoteAddr)
34 | break LOOP
35 | case REQ_FAILED:
36 | Error.Printf("Request failed...\n")
37 | retries++
38 | case NOT_LEADER:
39 | // The person we've contacted isn't the leader. Use
40 | // their hint to find the leader
41 | Out.Printf("%v is not the leader, but thinks that %v is\n", remoteAddr, reply.LeaderHint)
42 | remoteAddr = reply.LeaderHint
43 | case ELECTION_IN_PROGRESS:
44 | // An election is in progress. Accept the hint
45 | // and wait an appropriate amount of time, so the
46 | // election can finish.
47 | Out.Printf("%v is not the leader, but thinks that %v is\n", remoteAddr, reply.LeaderHint)
48 | remoteAddr = reply.LeaderHint
49 | time.Sleep(time.Millisecond * 200)
50 | default:
51 | }
52 | }
53 |
54 | // We've registered with the leader.
55 | cp.Id = reply.ClientId
56 | cp.Leader = remoteAddr
57 |
58 | return
59 | }
60 |
61 | func (c *Client) SendRequest(command FsmCommand, data []byte) (err error) {
62 |
63 | request := ClientRequest{
64 | c.Id,
65 | c.SeqNum,
66 | command,
67 | data,
68 | }
69 | c.SeqNum += 1
70 |
71 | var reply *ClientReply
72 |
73 | retries := 0
74 |
75 | LOOP:
76 | for retries < MAX_RETRIES {
77 | reply, err = ClientRequestRPC(&c.Leader, request)
78 | if err != nil {
79 | return
80 | }
81 | switch reply.Status {
82 | case OK:
83 | Debug.Printf("%v is the leader\n", c.Leader)
84 | Out.Printf("Request returned \"%v\".\n", reply.Response)
85 | break LOOP
86 | case REQ_FAILED:
87 | Error.Printf("Request failed: %v\n", reply.Response)
88 | retries++
89 | break LOOP
90 | case NOT_LEADER:
91 | // The person we've contacted isn't the leader. Use
92 | // their hint to find the leader
93 | c.Leader = reply.LeaderHint
94 | case ELECTION_IN_PROGRESS:
95 | // An election is in progress. Accept the hint
96 | // and wait an appropriate amount of time, so the
97 | // election can finish.
98 | c.Leader = reply.LeaderHint
99 | time.Sleep(time.Millisecond * 200)
100 | }
101 | }
102 | return
103 | }
104 | //Similar to the function above but it returns a response
105 | func (c *Client) SendRequestWithResponse(command FsmCommand, data []byte) (reply *ClientReply, err error) {
106 | request := ClientRequest{
107 | c.Id,
108 | c.SeqNum,
109 | command,
110 | data,
111 | }
112 | c.SeqNum += 1
113 |
114 | //var reply *ClientReply
115 |
116 | retries := 0
117 | for retries < MAX_RETRIES {
118 | reply, err = ClientRequestRPC(&c.Leader, request)
119 | if err != nil {
120 | return nil, err
121 | }
122 | switch reply.Status {
123 | case OK:
124 | Debug.Printf("%v is the leader\n", c.Leader)
125 | Out.Printf("Request returned \"%v\".\n", reply.Response)
126 | return reply, nil
127 | case REQ_FAILED:
128 | Error.Printf("Request failed: %v\n", reply.Response)
129 | retries++
130 | return reply, nil
131 | case NOT_LEADER:
132 | // The person we've contacted isn't the leader. Use
133 | // their hint to find the leader
134 | c.Leader = reply.LeaderHint
135 | case ELECTION_IN_PROGRESS:
136 | // An election is in progress. Accept the hint
137 | // and wait an appropriate amount of time, so the
138 | // election can finish.
139 | c.Leader = reply.LeaderHint
140 | time.Sleep(time.Millisecond * 200)
141 | }
142 | }
143 | return nil, nil
144 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/tapestry-local_test.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "testing"
5 | )
6 | /*Helper function to make sure that a target node is the same
7 | as the expected node*/
8 | func CheckFindRoot(node *TapestryNode, target ID, expected ID,
9 | t *testing.T) {
10 | result, _ := node.findRoot(node.node, target)
11 | if !equal_ids(result.Id, expected) {
12 | t.Errorf("%v: findRoot of %v is not %v (gives %v)", node.node.Id,
13 | target, expected, result.Id)
14 | }
15 | }
16 |
17 | /* This test checks that find root works from all nodes */
18 | func TestFindRootAndLeave(t *testing.T) {
19 | if DIGITS != 4 {
20 | t.Errorf("Test wont work unless DIGITS is set to 4.")
21 | }
22 |
23 | port = 58000
24 | id := ID{5, 8, 3, 15}
25 | mainNode := makeTapestryNode(id, "", t)
26 | id = ID{7, 0, 0xd, 1}
27 | node1 := makeTapestryNode(id, mainNode.node.Address, t)
28 | id = ID{7, 0, 0xf, 5}
29 | node2 := makeTapestryNode(id, mainNode.node.Address, t)
30 | id = ID{7, 0, 0xf, 0xa}
31 | node3 := makeTapestryNode(id, mainNode.node.Address, t)
32 |
33 | // Checks all possible combinations between all nodes to find
34 | // a given route.
35 | id = ID{3, 0xf, 8, 0xa}
36 | CheckFindRoot(mainNode, id, mainNode.node.Id, t)
37 | CheckFindRoot(node1, id, mainNode.node.Id, t)
38 | CheckFindRoot(node2, id, mainNode.node.Id, t)
39 | CheckFindRoot(node3, id, mainNode.node.Id, t)
40 | id = ID{5, 2, 0, 0xc}
41 | CheckFindRoot(mainNode, id, mainNode.node.Id, t)
42 | CheckFindRoot(node1, id, mainNode.node.Id, t)
43 | CheckFindRoot(node2, id, mainNode.node.Id, t)
44 | CheckFindRoot(node3, id, mainNode.node.Id, t)
45 | id = ID{5, 8, 0xf, 0xf}
46 | CheckFindRoot(mainNode, id, mainNode.node.Id, t)
47 | CheckFindRoot(node1, id, mainNode.node.Id, t)
48 | CheckFindRoot(node2, id, mainNode.node.Id, t)
49 | CheckFindRoot(node3, id, mainNode.node.Id, t)
50 | id = ID{7, 0, 0xc, 3}
51 | CheckFindRoot(mainNode, id, node1.node.Id, t)
52 | CheckFindRoot(node1, id, node1.node.Id, t)
53 | CheckFindRoot(node2, id, node1.node.Id, t)
54 | CheckFindRoot(node3, id, node1.node.Id, t)
55 | id = ID{6, 0, 0xf, 4}
56 | CheckFindRoot(mainNode, id, node2.node.Id, t)
57 | CheckFindRoot(node1, id, node2.node.Id, t)
58 | CheckFindRoot(node2, id, node2.node.Id, t)
59 | CheckFindRoot(node3, id, node2.node.Id, t)
60 | id = ID{7, 0, 0xa, 2}
61 | CheckFindRoot(mainNode, id, node1.node.Id, t)
62 | CheckFindRoot(node1, id, node1.node.Id, t)
63 | CheckFindRoot(node2, id, node1.node.Id, t)
64 | CheckFindRoot(node3, id, node1.node.Id, t)
65 | id = ID{6, 3, 9, 5}
66 | CheckFindRoot(mainNode, id, node1.node.Id, t)
67 | CheckFindRoot(node1, id, node1.node.Id, t)
68 | CheckFindRoot(node2, id, node1.node.Id, t)
69 | CheckFindRoot(node3, id, node1.node.Id, t)
70 | id = ID{6, 8, 3, 0xf}
71 | CheckFindRoot(mainNode, id, node1.node.Id, t)
72 | CheckFindRoot(node1, id, node1.node.Id, t)
73 | CheckFindRoot(node2, id, node1.node.Id, t)
74 | CheckFindRoot(node3, id, node1.node.Id, t)
75 | id = ID{6, 3, 0xe, 5}
76 | CheckFindRoot(mainNode, id, node2.node.Id, t)
77 | CheckFindRoot(node1, id, node2.node.Id, t)
78 | CheckFindRoot(node2, id, node2.node.Id, t)
79 | CheckFindRoot(node3, id, node2.node.Id, t)
80 | id = ID{6, 3, 0xe, 9}
81 | CheckFindRoot(mainNode, id, node3.node.Id, t)
82 | CheckFindRoot(node1, id, node3.node.Id, t)
83 | CheckFindRoot(node2, id, node3.node.Id, t)
84 | CheckFindRoot(node3, id, node3.node.Id, t)
85 | id = ID{0xb, 0xe, 0xe, 0xf}
86 | CheckFindRoot(mainNode, id, mainNode.node.Id, t)
87 | CheckFindRoot(node1, id, mainNode.node.Id, t)
88 | CheckFindRoot(node2, id, mainNode.node.Id, t)
89 | CheckFindRoot(node3, id, mainNode.node.Id, t)
90 |
91 | // Check if after node leaves, tables get updated.
92 | mainNode.tapestry.Leave()
93 |
94 | id = ID{3, 0xf, 8, 0xa}
95 | CheckFindRoot(node1, id, node1.node.Id, t)
96 | CheckFindRoot(node2, id, node1.node.Id, t)
97 | CheckFindRoot(node3, id, node1.node.Id, t)
98 | id = ID{5, 2, 0, 0xc}
99 | CheckFindRoot(node1, id, node1.node.Id, t)
100 | CheckFindRoot(node2, id, node1.node.Id, t)
101 | CheckFindRoot(node3, id, node1.node.Id, t)
102 | id = ID{5, 8, 0xf, 0xf}
103 | CheckFindRoot(node1, id, node2.node.Id, t)
104 | CheckFindRoot(node2, id, node2.node.Id, t)
105 | CheckFindRoot(node3, id, node2.node.Id, t)
106 |
107 | node1.tapestry.Leave()
108 | node2.tapestry.Leave()
109 | node3.tapestry.Leave()
110 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/tapestry-remote.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "net/rpc"
5 | "fmt"
6 | )
7 |
8 | /*
9 | The methods defined in this file parallel the methods defined in tapestry-local.
10 | These methods take an additional argument, the node on which the method should be invoked.
11 | Calling any of these methods will invoke the corresponding method on the specified remote node.
12 | */
13 |
14 | // Remote API: ping an address to get tapestry node info
15 | func (tapestry *Tapestry) hello(address string) (rsp Node, err error) {
16 | err = makeRemoteCall(address, "TapestryRPCServer", "Hello", tapestry.local.node, &rsp)
17 | return
18 | }
19 |
20 | // Helper function to makes a remote call
21 | func makeRemoteNodeCall(remote Node, method string, req interface{}, rsp interface{}) error {
22 | fmt.Printf("%v(%v)\n", method, req)
23 | return makeRemoteCall(remote.Address, "TapestryRPCServer", method, req, rsp)
24 | }
25 |
26 | // Helper function to makes a remote call
27 | func makeRemoteCall(address string, structtype string, method string, req interface{}, rsp interface{}) error {
28 | // Dial the server
29 | client, err := rpc.Dial("tcp", address)
30 | if err != nil {
31 | return err
32 | }
33 |
34 | // Make the request
35 | fqm := fmt.Sprintf("%v.%v", structtype, method)
36 | err = client.Call(fqm, req, rsp)
37 |
38 | client.Close()
39 | if err != nil {
40 | return err
41 | }
42 |
43 | return nil
44 | }
45 |
46 | // Remote API: makes a remote call to the Register function
47 | func (tapestry *Tapestry) register(remote Node, replica Node, key string) (bool, error) {
48 | var rsp RegisterResponse
49 | err := makeRemoteNodeCall(remote, "Register", RegisterRequest{remote, replica, key}, &rsp)
50 | return rsp.IsRoot, err
51 | }
52 |
53 | // Remote API: makes a remote call to the GetNextHop function
54 | func (tapestry *Tapestry) getNextHop(remote Node, id ID) (bool, Node, error) {
55 | var rsp NextHopResponse
56 | err := makeRemoteNodeCall(remote, "GetNextHop", NextHopRequest{remote, id}, &rsp)
57 | return rsp.HasNext, rsp.Next, err
58 | }
59 |
60 | // Remote API: makes a remote call to the RemoveBadNodes function
61 | func (tapestry *Tapestry) removeBadNodes(remote Node, toremove []Node) error {
62 | return makeRemoteNodeCall(remote, "RemoveBadNodes", RemoveBadNodesRequest{remote, toremove}, &Node{})
63 | }
64 |
65 | // Remote API: makes a remote call to the Fetch function
66 | func (tapestry *Tapestry) fetch(remote Node, key string) (bool, []Node, error) {
67 | var rsp FetchResponse
68 | err := makeRemoteNodeCall(remote, "Fetch", FetchRequest{remote, key}, &rsp)
69 | return rsp.IsRoot, rsp.Values, err
70 | }
71 |
72 | // Remote API: makes a remote call to the AddBackpointer function
73 | func (tapestry *Tapestry) addBackpointer(remote Node, toAdd Node) error {
74 | return makeRemoteNodeCall(remote, "AddBackpointer", NodeRequest{remote, toAdd}, &Node{})
75 | }
76 |
77 | // Remote API: makes a remote call to the RemoveBackpointer function
78 | func (tapestry *Tapestry) removeBackpointer(remote Node, toRemove Node) error {
79 | return makeRemoteNodeCall(remote, "RemoveBackpointer", NodeRequest{remote, toRemove}, &Node{})
80 | }
81 |
82 | // Remote API: makes a remote call to the GetBackpointers function
83 | func (tapestry *Tapestry) getBackpointers(remote Node, from Node, level int) (neighbours []Node, err error) {
84 | err = makeRemoteNodeCall(remote, "GetBackpointers", GetBackpointersRequest{remote, from, level}, &neighbours)
85 | return
86 | }
87 |
88 | // Remote API: makes a remote call to the AddNode function
89 | func (tapestry *Tapestry) addNode(remote Node, newnode Node) (neighbours []Node, err error) {
90 | err = makeRemoteNodeCall(remote, "AddNode", NodeRequest{remote, newnode}, &neighbours)
91 | return
92 | }
93 |
94 | // Remote API: makes a remote call to the AddNodeMulticast function
95 | func (tapestry *Tapestry) addNodeMulticast(remote Node, newnode Node, level int) (neighbours []Node, err error) {
96 | err = makeRemoteNodeCall(remote, "AddNodeMulticast", AddNodeMulticastRequest{remote, newnode, level}, &neighbours)
97 | return
98 | }
99 |
100 | func (tapestry *Tapestry) transfer(remote Node, from Node, data map[string][]Node) error {
101 | return makeRemoteNodeCall(remote, "Transfer", TransferRequest{remote, from, data}, &Node{})
102 | }
103 |
104 | // Remote API: makes a remote call to the NotifyLeave function
105 | func (tapestry *Tapestry) notifyLeave(remote Node, from Node, replacement *Node) (err error) {
106 | return makeRemoteNodeCall(remote, "NotifyLeave", NotifyLeaveRequest{remote, from, replacement}, &Node{})
107 | }
108 |
--------------------------------------------------------------------------------
/chord/chord/chord.go:
--------------------------------------------------------------------------------
1 | /* Purpose: Chord struct and related functions to create new nodes, etc. */
2 | /* */
3 |
4 | package chord
5 |
6 | import (
7 | "../utils"
8 | "fmt"
9 | "log"
10 | "net"
11 | "net/rpc"
12 | "sync"
13 | "time"
14 | )
15 |
16 | // Number of bits (i.e. M value), assumes <= 128 and divisible by 8
17 | const KEY_LENGTH = 8
18 |
19 | /* Non-local node representation */
20 | type RemoteNode struct {
21 | Id []byte
22 | Addr string
23 | }
24 |
25 | /* Local node representation */
26 | type Node struct {
27 | Id []byte /* Unique Node ID */
28 | Listener net.Listener /* Node listener socket */
29 | Addr string /* String of listener address */
30 | Successor *RemoteNode /* This Node's successor */
31 | Predecessor *RemoteNode /* This Node's predecessor */
32 | RemoteSelf *RemoteNode /* Remote node of our self */
33 | IsShutdown bool /* Is node in process of shutting down? */
34 | FingerTable []FingerEntry /* Finger table entries */
35 | ftLock sync.RWMutex /* RWLock for finger table */
36 | dataStore map[string]string /* Local datastore for this node */
37 | dsLock sync.RWMutex /* RWLock for datastore */
38 | next int
39 | }
40 |
41 | /* Creates a Chord node with a pre-defined ID (useful for testing) */
42 | func CreateDefinedNode(parent *RemoteNode, definedId []byte) (*Node, error) {
43 | node := new(Node)
44 | err := node.init(parent, definedId)
45 | if err != nil {
46 | return nil, err
47 | }
48 | return node, err
49 | }
50 |
51 | /* Create Chord node with random ID based on listener address */
52 | func CreateNode(parent *RemoteNode) (*Node, error) {
53 | node := new(Node)
54 | err := node.init(parent, nil)
55 | if err != nil {
56 | return nil, err
57 | }
58 | return node, err
59 | }
60 |
61 | /* Initailize a Chord node, start listener, rpc server, and go routines */
62 | func (node *Node) init(parent *RemoteNode, definedId []byte) error {
63 | if KEY_LENGTH > 128 || KEY_LENGTH%8 != 0 {
64 | log.Fatal(fmt.Sprintf("KEY_LENGTH of %v is not supported! Must be <= 128 and divisible by 8", KEY_LENGTH))
65 | }
66 |
67 | listener, _, err := utils.OpenListener()
68 | if err != nil {
69 | return err
70 | }
71 |
72 | node.Id = HashKey(listener.Addr().String())
73 | if definedId != nil {
74 | node.Id = definedId
75 | }
76 |
77 | node.Listener = listener
78 | node.Addr = listener.Addr().String()
79 | node.IsShutdown = false
80 | node.dataStore = make(map[string]string)
81 | node.next = 1
82 |
83 | // Populate RemoteNode that points to self
84 | node.RemoteSelf = new(RemoteNode)
85 | node.RemoteSelf.Id = node.Id
86 | node.RemoteSelf.Addr = node.Addr
87 |
88 | // Populate finger table
89 | node.initFingerTable()
90 |
91 | // Join this node to the same chord ring as parent
92 | err = node.join(parent)
93 | if err != nil {
94 | return err
95 | }
96 |
97 | // Thread 1: start RPC server on this connection
98 | rpc.RegisterName(node.Addr, node)
99 | go node.startRpcServer()
100 |
101 | // Thread 2: kick off timer to stabilize periodically
102 | ticker1 := time.NewTicker(time.Millisecond * 100) //freq
103 | go node.stabilize(ticker1)
104 |
105 | // Thread 3: kick off timer to fix finger table periodically
106 | ticker2 := time.NewTicker(time.Millisecond * 90) //freq
107 | go node.fixNextFinger(ticker2)
108 |
109 | return err
110 | }
111 |
112 | /* Go routine to accept and process RPC requests */
113 | func (node *Node) startRpcServer() {
114 | for {
115 | if node.IsShutdown {
116 | fmt.Printf("[%v] Shutting down RPC server\n", HashStr(node.Id))
117 | return
118 | }
119 | if conn, err := node.Listener.Accept(); err != nil {
120 | log.Fatal("accept error: " + err.Error())
121 | } else {
122 | go rpc.ServeConn(conn)
123 | }
124 | }
125 | }
126 |
127 | /* Shutdown a specified Chord node (gracefully) */
128 | func ShutdownNode(node *Node) {
129 | node.IsShutdown = true
130 | // Wait for go routines to quit, should be enough time.
131 | time.Sleep(time.Millisecond * 2000)
132 | node.Listener.Close()
133 |
134 | //We first disconnect ourselves from our own successors and predecessors
135 | err := SetSuccessorId_RPC(node.Predecessor, node.Successor)
136 | if err != nil {
137 | log.Fatal(err)
138 | }
139 | err = SetPredecessorId_RPC(node.Successor, node.Predecessor)
140 | if err != nil {
141 | log.Fatal(err)
142 | }
143 | //We then transfer the keys to our successor
144 | (&node.dsLock).Lock()
145 | for key, val := range node.dataStore {
146 | err := Put_RPC(node.Successor, key, val)
147 | if err != nil {
148 | //TODO handle error, particularly decide what to do with the ones not transfered
149 | (&node.dsLock).Unlock()
150 | log.Fatal(err)
151 | }
152 | //then we delete it locally
153 | delete(node.dataStore, key)
154 | }
155 | (&node.dsLock).Unlock()
156 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/id.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "math/rand"
5 | "math/big"
6 | "bytes"
7 | "fmt"
8 | "crypto/sha1"
9 | "time"
10 | )
11 |
12 | /*
13 | An ID is just a typedef'ed digit array
14 | */
15 | type ID [DIGITS]Digit
16 |
17 | /*
18 | A digit is just a typedef'ed uint8
19 | */
20 | type Digit uint8
21 |
22 | /*
23 | Generates a random ID
24 | */
25 | func RandomID() ID {
26 | var id ID
27 | for i := range id {
28 | id[i] = Digit(random.Intn(BASE))
29 | }
30 | return id
31 | }
32 |
33 | /*
34 | Returns the length of the prefix that is shared by the two IDs
35 | */
36 | func SharedPrefixLength(a ID, b ID) (i int) {
37 | count := 0
38 | for i := 0; i < DIGITS && a[i] == b[i]; i++ {
39 | count++
40 | }
41 | return count
42 | }
43 |
44 | /*
45 | Used by Tapestry's surrogate routing. Given IDs first and second, which is the better choice?
46 | The "better choice" is the ID that:
47 | - has the longest shared prefix with id
48 | - if both have prefix of length n, which id has a better (n+1)th digit?
49 | - if both have the same (n+1)th digit, consider (n+2)th digit, etc.
50 | Returns true if the first ID is the better choice. Returns false if second ID is closer or if first==second
51 | */
52 | func (id ID) BetterChoice(first ID, second ID) bool {
53 | fPrefix := SharedPrefixLength(first, id)
54 | sPrefix := SharedPrefixLength(second, id)
55 | if fPrefix != sPrefix || (sPrefix == DIGITS && fPrefix == DIGITS) {
56 | //If they are not the same or if they are the same in all the numbers then we return
57 | return fPrefix > sPrefix
58 | }
59 | //So they are the same, but not as long as DIGITS we need to figure out which one is better
60 | index := sPrefix
61 | start := id[index]
62 | target := id[index]
63 | madeAChoice := false
64 | for !madeAChoice {
65 | //If it stays in -1 then the digit of the first is > the digit in ID
66 | fDigit := first[index] % BASE
67 | sDigit := second[index] % BASE
68 | fDistance := 0
69 | sDistance := 0
70 | for sDigit != target {
71 | sDistance++
72 | target++
73 | target = target % BASE
74 | }
75 | target = start
76 | for fDigit != target {
77 | fDistance++
78 | target++
79 | target = target % BASE
80 | }
81 |
82 |
83 | if fDistance == sDistance {
84 | if index == DIGITS-1 {
85 | return false
86 | } else {
87 | index++
88 | target = id[index]
89 | start = id[index]
90 | }
91 | } else {
92 | //fmt.Printf("fDistance: %v, sDistance: %v, target: %v, fDigit: %v, sDigit: %v\n", fDistance, sDistance, target, fDigit, sDigit)
93 | return fDistance < sDistance
94 | }
95 | }
96 | return false
97 | }
98 |
99 | /*
100 | Used when inserting nodes into Tapestry's routing table. If the routing table has multiple candidate nodes for a slot,
101 | then it chooses the node that is closer to the local node.
102 | In a production Tapestry implementation, closeness is determined by looking at the round-trip-times (RTTs) between (a, id) and (b, id),
103 | and the node with the shorter RTT is closer.
104 | In my implementation, I have decided to define closeness as the absolute value of the difference between a and b.
105 | This is NOT the same as the implementation of BetterChoice.
106 | Returns true if a is closer than b. Returns false if b is closer than a, or if a == b.
107 | */
108 | func (id ID) Closer(first ID, second ID) bool {
109 |
110 | firstNum := first.big()
111 | secondNum := second.big()
112 | idNum := id.big()
113 |
114 | difF := big.NewInt(0)
115 | difS := big.NewInt(0)
116 |
117 | difF.Sub(firstNum, idNum)
118 | difS.Sub(secondNum, idNum)
119 | difF.Abs(difF)
120 | difS.Abs(difS)
121 |
122 | if difF.Cmp(difS) == -1 {
123 | return true
124 | } else {
125 | return false
126 | }
127 | }
128 |
129 | /*
130 | Helper function: convert an ID to a big int.
131 | */
132 | func (id ID) big() (b *big.Int) {
133 | b = big.NewInt(0)
134 | base := big.NewInt(BASE)
135 | for _, digit := range id {
136 | b.Mul(b, base)
137 | b.Add(b, big.NewInt(int64(digit)))
138 | }
139 | return b
140 | }
141 |
142 | /*
143 | String representation of an ID is hexstring of each digit
144 | */
145 | func (id ID) String() string {
146 | var buf bytes.Buffer
147 | for _, d := range id {
148 | buf.WriteString(d.String())
149 | }
150 | return buf.String()
151 | }
152 |
153 | /*
154 | Hashes the string to an ID
155 | */
156 | func Hash(key string) (id ID) {
157 | // Sha-hash the key
158 | sha := sha1.New()
159 | sha.Write([]byte(key))
160 | hash := sha.Sum([]byte{})
161 |
162 | // Store in an ID
163 | for i := range id {
164 | id[i] = Digit(hash[(i/2)%len(hash)])
165 | if i%2 == 0 {
166 | id[i] >>= 4
167 | }
168 | id[i] %= BASE
169 | }
170 |
171 | return id
172 | }
173 |
174 | /*
175 | String representation of a digit is its hex value
176 | */
177 | func (digit Digit) String() string {
178 | return fmt.Sprintf("%X", byte(digit))
179 | }
180 |
181 | // generate random node ID
182 | var random = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
--------------------------------------------------------------------------------
/oceanstore/oceanstore/inode_test.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "testing"
5 | "time"
6 | )
7 |
8 | func TestGobEncoding(t *testing.T) {
9 | inode := new(Inode)
10 | inode.name = "Test inode"
11 | inode.filetype = 1
12 | inode.size = 666
13 | inode.indirect = "F666"
14 |
15 | bytes, err := inode.GobEncode()
16 | if err != nil {
17 | t.Errorf("Gob encode didn't work.")
18 | }
19 |
20 | sameInode := new(Inode)
21 | sameInode.GobDecode(bytes)
22 |
23 | if inode.name != sameInode.name {
24 | t.Errorf("Name not the same\n\t%v != %v.", inode.name, sameInode.name)
25 | }
26 |
27 | if inode.filetype != sameInode.filetype {
28 | t.Errorf("Name not the same\n\t%v != %v.", inode.filetype, sameInode.filetype)
29 | }
30 |
31 | if inode.size != sameInode.size {
32 | t.Errorf("Name not the same\n\t%v != %v.", inode.size, sameInode.size)
33 | }
34 |
35 | if inode.indirect != sameInode.indirect {
36 | t.Errorf("Name not the same\n\t%v != %v.", inode.indirect, sameInode.indirect)
37 | }
38 | }
39 |
40 | func TestInodeStorage(t *testing.T) {
41 | ocean, err := Start()
42 | if err != nil {
43 | return
44 | t.Errorf("Could not init oceanstore: %v", err)
45 | }
46 | time.Sleep(time.Millisecond * 500)
47 |
48 | client := ocean.raftClient
49 |
50 | inode := new(Inode)
51 | inode.name = "Test inode"
52 | inode.filetype = 1
53 | inode.size = 666
54 | inode.indirect = "F666"
55 |
56 | inode2 := new(Inode)
57 | inode2.name = "Test inode2"
58 | inode2.filetype = 0
59 | inode2.size = 66
60 | inode2.indirect = "BEEF"
61 |
62 |
63 | err = ocean.storeInode("/path/one", inode, client.Id)
64 | if err != nil {
65 | t.Errorf("Error storing Inode: %v", err)
66 | return
67 | }
68 | err = ocean.storeInode("/second/path", inode2, client.Id)
69 | if err != nil {
70 | t.Errorf("Error storing Inode2: %v", err)
71 | return
72 | }
73 |
74 | sameInode, err := ocean.getInode("/path/one", client.Id)
75 | if err != nil {
76 | t.Errorf("Error geting Inode: %v", err)
77 | return
78 | }
79 | sameInode2, err := ocean.getInode("/second/path", client.Id)
80 | if err != nil {
81 | t.Errorf("Error geting Inode2: %v", err)
82 | return
83 | }
84 |
85 | if inode.name != sameInode.name {
86 | t.Errorf("Name not the same\n\t%v != %v.", inode.name, sameInode.name)
87 | }
88 | if inode.filetype != sameInode.filetype {
89 | t.Errorf("Name not the same\n\t%v != %v.", inode.filetype, sameInode.filetype)
90 | }
91 | if inode.size != sameInode.size {
92 | t.Errorf("Name not the same\n\t%v != %v.", inode.size, sameInode.size)
93 | }
94 | if inode.indirect != sameInode.indirect {
95 | t.Errorf("Name not the same\n\t%v != %v.", inode.indirect, sameInode.indirect)
96 | }
97 |
98 | if inode2.name != sameInode2.name {
99 | t.Errorf("Name not the same\n\t%v != %v.", inode2.name, sameInode2.name)
100 | }
101 | if inode2.filetype != sameInode2.filetype {
102 | t.Errorf("Name not the same\n\t%v != %v.", inode2.filetype, sameInode2.filetype)
103 | }
104 | if inode2.size != sameInode2.size {
105 | t.Errorf("Name not the same\n\t%v != %v.", inode2.size, sameInode2.size)
106 | }
107 | if inode2.indirect != sameInode2.indirect {
108 | t.Errorf("Name not the same\n\t%v != %v.", inode2.indirect, sameInode2.indirect)
109 | }
110 | }
111 |
112 | func TestInodeReplacement(t *testing.T) {
113 | puddle, err := Start()
114 | if err != nil {
115 | return
116 | t.Errorf("Could not init puddlestore: %v", err)
117 | }
118 | time.Sleep(time.Millisecond * 500)
119 | client := puddle.raftClient
120 |
121 | inode := new(Inode)
122 | inode.name = "Test inode"
123 | inode.filetype = 1
124 | inode.size = 666
125 | inode.indirect = "F666"
126 |
127 | err = puddle.storeInode("/path/one", inode, client.Id)
128 | if err != nil {
129 | t.Errorf("Error storing Inode: %v", err)
130 | return
131 | }
132 |
133 | /*
134 | err = puddle.removeKey("/path/one")
135 | if err != nil {
136 | t.Errorf("Error removing key \"/path/one\": %v", err)
137 | return
138 | }*/
139 |
140 | inode2 := new(Inode)
141 | inode2.name = "Imma replace u beaaach"
142 | inode2.filetype = 1
143 | inode2.size = 50
144 | inode2.indirect = "DEAD"
145 |
146 | err = puddle.storeInode("/path/one", inode2, client.Id)
147 | if err != nil {
148 | t.Errorf("Error storing Inode: %v", err)
149 | return
150 | }
151 |
152 | sameInode2, err := puddle.getInode("/path/one", client.Id)
153 | if err != nil {
154 | t.Errorf("Error geting Inode: %v", err)
155 | return
156 | }
157 | if sameInode2 == nil {
158 | t.Errorf("Something went wrong man")
159 | return
160 | }
161 |
162 | if inode2.name != sameInode2.name {
163 | t.Errorf("Name not the same\n\t%v != %v.", inode2.name, sameInode2.name)
164 | }
165 | if inode2.filetype != sameInode2.filetype {
166 | t.Errorf("Name not the same\n\t%v != %v.", inode2.filetype, sameInode2.filetype)
167 | }
168 | if inode2.size != sameInode2.size {
169 | t.Errorf("Name not the same\n\t%v != %v.", inode2.size, sameInode2.size)
170 | }
171 | if inode2.indirect != sameInode2.indirect {
172 | t.Errorf("Name not the same\n\t%v != %v.", inode2.indirect, sameInode2.indirect)
173 | }
174 | }
--------------------------------------------------------------------------------
/tapestry/cli.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "./tapestry"
5 | "bufio"
6 | "fmt"
7 | "os"
8 | "strings"
9 | "flag"
10 | )
11 |
12 | func printHelp() {
13 | fmt.Println("Commands:")
14 | fmt.Println(" - help Prints this help message")
15 | fmt.Println(" - table Prints this node's routing table")
16 | fmt.Println(" - backpointers Prints this node's backpointers")
17 | fmt.Println(" - objects Prints the advertised objects that are registered to this node")
18 | fmt.Println(" - put Stores the provided key-value pair on the local node and advertises the key to the tapestry")
19 | fmt.Println(" - lookup Looks up the specified key in the tapestry and prints its location")
20 | fmt.Println(" - get Looks up the specified key in the tapestry, then fetches the value from one of the replicas")
21 | fmt.Println(" - remove Remove the specified key from the tapestry")
22 | fmt.Println(" - list List the blobs being stored and advertised by the local node")
23 | fmt.Println(" - leave Instructs the local node to gracefully leave the tapestry")
24 | fmt.Println(" - kill Leaves the tapestry without graceful exit")
25 | fmt.Println(" - exit Quit this CLI")
26 | }
27 |
28 | func CLI(t *tapestry.Tapestry, done chan bool) {
29 |
30 | printHelp()
31 | for {
32 | reader := bufio.NewReader(os.Stdin)
33 | fmt.Print("> ")
34 | text, _ := reader.ReadString('\n')
35 | text = strings.TrimSpace(text)
36 | splits := strings.Split(text, " ")
37 | command := strings.ToLower(splits[0])
38 | switch command {
39 | case "quit", "exit":
40 | {
41 | done <- true
42 | return
43 | }
44 | case "table":
45 | {
46 | t.PrintRoutingTable()
47 | }
48 | case "backpointers":
49 | {
50 | t.PrintBackpointers()
51 | }
52 | case "replicas", "data", "objects":
53 | {
54 | t.PrintObjectStore()
55 | }
56 | case "leave":
57 | {
58 | t.Leave()
59 | }
60 | case "put", "add", "store":
61 | {
62 | if len(splits) < 3 {
63 | fmt.Printf("Insufficient arguments for %s, expect %s \n", command, command)
64 | } else {
65 | key := splits[1]
66 | bytes := []byte(splits[2])
67 | err := t.Store(key, bytes)
68 | if err != nil {
69 | fmt.Println(err)
70 | }
71 | }
72 | }
73 | case "list", "listblobs":
74 | {
75 | t.PrintBlobStore()
76 | }
77 | case "lookup", "find":
78 | {
79 | if len(splits) < 2 {
80 | fmt.Printf("Insufficient arguments for %s, expect %s \n", command, command)
81 | } else {
82 | key := splits[1]
83 | replicas, err := t.Lookup(key)
84 | if err != nil {
85 | fmt.Println(err)
86 | } else {
87 | fmt.Printf("%v: %v\n", key, replicas)
88 | }
89 | }
90 | }
91 | case "get":
92 | {
93 | if len(splits) < 2 {
94 | fmt.Printf("Insufficient arguments for %s, expect %s \n", command, command)
95 | } else {
96 | key := splits[1]
97 | bytes, err := t.Get(key)
98 | if err != nil {
99 | fmt.Println(err)
100 | } else {
101 | fmt.Printf("%v: %v\n", key, string(bytes))
102 | }
103 | }
104 | }
105 | case "remove":
106 | {
107 | if len(splits) < 2 {
108 | fmt.Printf("Insufficient arguments for %s, expect %s \n", command, command)
109 | } else {
110 | key := splits[1]
111 | exists := t.Remove(key)
112 | if !exists {
113 | fmt.Printf("This node is not advertising %v\n", key)
114 | }
115 | }
116 | }
117 | case "help", "commands":
118 | {
119 | printHelp()
120 | }
121 | case "kill":
122 | {
123 | t.Kill()
124 | }
125 | default:
126 | {
127 | fmt.Printf("Unknown command %s\n", text)
128 | }
129 | }
130 | }
131 | }
132 |
133 | func main() {
134 | var port int
135 | var addr string
136 |
137 | flag.IntVar(&port, "port", 0, "The server port to bind to. Defaults to a random port.")
138 | flag.StringVar(&addr, "connect", "", "An existing node to connect to. If left blank, does not attempt to connect to another node.")
139 | flag.Parse()
140 |
141 | switch {
142 | case port != 0 && addr != "":
143 | {
144 | fmt.Printf("Starting a node on port %v and connecting to %v\n", port, addr)
145 | }
146 | case port != 0:
147 | {
148 | fmt.Printf("Starting a standalone node on port %v\n", port)
149 | }
150 | case addr != "":
151 | {
152 | fmt.Printf("Starting a node on a random port and connecting to %v\n", addr)
153 | }
154 | default:
155 | {
156 | fmt.Printf("Starting a standalone node on a random port\n")
157 | }
158 | }
159 |
160 | t, err := tapestry.Start(port, addr)
161 |
162 | if err != nil {
163 | fmt.Printf("Error starting tapestry node: %v\n", err)
164 | return
165 | }
166 |
167 | fmt.Printf("Successfully started: %v\n", t)
168 |
169 | // Kick off CLI, await exit
170 | done := make(chan bool)
171 | go CLI(t, done)
172 |
173 | for !(<-done) {
174 | }
175 |
176 | fmt.Println("Closing tapestry")
177 | t.Leave()
178 | }
--------------------------------------------------------------------------------
/raft/raft/raft_test.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "fmt"
5 | "math/rand"
6 | "testing"
7 | "time"
8 | )
9 |
10 | func TestLeaderElection(t *testing.T) {
11 | config := DefaultConfig()
12 | config.ClusterSize = 5
13 | config.LogPath = randSeq(10)
14 |
15 | nodes, err := CreateLocalCluster(config)
16 | if err != nil {
17 | t.Errorf("Could not create nodes")
18 | return
19 | }
20 | time.Sleep(time.Millisecond * 500)
21 | if !checkNodes(nodes, config.ClusterSize) {
22 | t.Errorf("CreateLocalCluster FAILED")
23 | return
24 | }
25 |
26 | fmt.Printf("Before loop\n")
27 | leader := getLeader(nodes)
28 | fmt.Printf("after loop\n")
29 | if leader == nil {
30 | t.Errorf("Not found the leader")
31 | fmt.Printf("# nodes: %v\n", len(nodes))
32 | printNodes(nodes)
33 | return
34 | }
35 |
36 | time.Sleep(time.Millisecond * 500)
37 | if !checkMajorityTerms(nodes) {
38 | t.Errorf("Nodes are not on the same term (%v)", leader.GetCurrentTerm())
39 | }
40 | if !checkMajorityCommitIndex(nodes) {
41 | t.Errorf("Nodes dont have the same commit index (%v)", leader.commitIndex)
42 | }
43 | if !checkLogOrder(nodes) {
44 | t.Errorf("Nodes logs are not in an ok order")
45 | printNodes(nodes)
46 | }
47 |
48 | fmt.Printf("The disabled node is: %v\n", leader.Id)
49 | leader.Testing.PauseWorld(true)
50 | disableLeader := leader
51 | time.Sleep(time.Millisecond * 100)
52 | leader = getLeader(nodes)
53 | if leader == nil {
54 | t.Errorf("Leader is not the same %v is not located in node", leader.Id)
55 | return
56 | }
57 |
58 | fmt.Printf("We now enable %v\n", disableLeader.Id)
59 | disableLeader.Testing.PauseWorld(false)
60 | time.Sleep(time.Millisecond * 100)
61 | leader = getLeader(nodes)
62 | if leader == nil {
63 | t.Errorf("Leader is not the same %v is not located in node", leader.Id)
64 | return
65 | }
66 | time.Sleep(time.Millisecond * 500)
67 | if !checkMajorityTerms(nodes) {
68 | t.Errorf("Nodes are not on the same term (%v)", leader.GetCurrentTerm())
69 | }
70 | if !checkMajorityCommitIndex(nodes) {
71 | t.Errorf("Nodes dont have the same commit index (%v)", leader.commitIndex)
72 | }
73 | if !checkLogOrder(nodes) {
74 | t.Errorf("Nodes logs are not in an ok order")
75 | printNodes(nodes)
76 | }
77 |
78 | fmt.Println("TestLeaderElection pass")
79 | shutdownNodes(nodes)
80 | }
81 |
82 | func checkLogOrder(nodes []*RaftNode) bool {
83 | for _, n := range nodes {
84 | prevIndex := int64(-1)
85 | prevTerm := int64(-1)
86 | seen := make(map[uint64]bool)
87 | for _, entry := range n.logCache {
88 | if seen[entry.Index] || int64(entry.Index)-1 != prevIndex || int64(entry.TermId) < prevTerm {
89 | return false
90 | }
91 |
92 | seen[entry.Index] = true
93 | prevIndex = int64(entry.Index)
94 | prevTerm = int64(entry.TermId)
95 | }
96 | }
97 | return true
98 | }
99 |
100 | // Loops until it finds a majority leader in nodes.
101 | func getLeader(nodes []*RaftNode) *RaftNode {
102 | //Check all and make sure that leader matches
103 | var leader *RaftNode
104 | leader = nil
105 | it := 1
106 | for leader == nil && it < 50 {
107 | fmt.Printf("%v\n", it)
108 | time.Sleep(time.Millisecond * 200)
109 | sums := make(map[string]int, nodes[0].conf.ClusterSize)
110 | for _, n := range nodes {
111 | if n.LeaderAddress != nil {
112 | sums[n.LeaderAddress.Id]++
113 | }
114 | }
115 | fmt.Printf("mapa %v\n\n\n", sums)
116 | var maxNode string
117 | max := -1
118 | for k, v := range sums {
119 | if v > max {
120 | maxNode = k
121 | max = v
122 | }
123 | }
124 |
125 | if max > len(nodes)/2 {
126 | for _, n := range nodes {
127 | if maxNode == n.Id {
128 | leader = n
129 | }
130 | }
131 | }
132 | it++
133 | }
134 |
135 | if it >= 50 {
136 | return nil
137 | }
138 | return leader
139 | }
140 |
141 | func checkMajorityTerms(nodes []*RaftNode) bool {
142 | sums := make(map[uint64]int, nodes[0].conf.ClusterSize)
143 | for _, n := range nodes {
144 | sums[n.GetCurrentTerm()]++
145 | }
146 | max := -1
147 | for _, v := range sums {
148 | if v > max {
149 | max = v
150 | }
151 | }
152 |
153 | if max > len(nodes)/2 {
154 | return true
155 | }
156 | return false
157 | }
158 |
159 | func checkMajorityCommitIndex(nodes []*RaftNode) bool {
160 | sums := make(map[uint64]int, nodes[0].conf.ClusterSize)
161 | for _, n := range nodes {
162 | sums[n.commitIndex]++
163 | }
164 | max := -1
165 | for _, v := range sums {
166 | if v > max {
167 | max = v
168 | }
169 | }
170 |
171 | if max > len(nodes)/2 {
172 | return true
173 | }
174 | return false
175 | }
176 |
177 | func checkNodes(nodes []*RaftNode, clusterSize int) bool {
178 | for _, n := range nodes {
179 | if len(n.GetOtherNodes()) != clusterSize {
180 | return false
181 | }
182 | }
183 | return true
184 | }
185 |
186 | func printNodes(nodes []*RaftNode) {
187 | for _, n := range nodes {
188 | n.PrintLogCache()
189 | n.ShowState()
190 | }
191 | }
192 |
193 | func removeLogs(nodes []*RaftNode) {
194 | for _, n := range nodes {
195 | n.RemoveLogs()
196 | }
197 | }
198 |
199 | func shutdownNodes(nodes []*RaftNode) {
200 | for _, n := range nodes {
201 | n.IsShutDown = true
202 | n.gracefulExit <- true
203 | }
204 | time.Sleep(time.Millisecond * 200)
205 | }
206 |
207 | func randSeq(n int) string {
208 | var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
209 | b := make([]rune, n)
210 | for i := range b {
211 | b[i] = letters[rand.Intn(len(letters))]
212 | }
213 | return string(b)
214 | }
--------------------------------------------------------------------------------
/chord/chord/node_rpc_api.go:
--------------------------------------------------------------------------------
1 | package chord
2 |
3 | import (
4 | "errors"
5 | "net/rpc"
6 | "fmt"
7 | )
8 |
9 | type RemoteId struct {
10 | Id []byte
11 | }
12 |
13 | type RemoteQuery struct {
14 | FromId []byte
15 | Id []byte
16 | }
17 |
18 | type IdReply struct {
19 | Id []byte
20 | Addr string
21 | Valid bool
22 | }
23 |
24 | type KeyValueReq struct {
25 | NodeId []byte
26 | Key string
27 | Value string
28 | }
29 |
30 | type KeyValueReply struct {
31 | Key string
32 | Value string
33 | }
34 |
35 | type RpcOkay struct {
36 | Ok bool
37 | }
38 |
39 | type UpdateReq struct {
40 | FromId []byte
41 | UpdateId []byte
42 | UpdateAddr string
43 | }
44 |
45 | type NotifyReq struct {
46 | NodeId []byte
47 | NodeAddr string
48 | UpdateId []byte
49 | UpdateAddr string
50 | }
51 |
52 | type TransferReq struct {
53 | NodeId []byte
54 | FromId []byte
55 | FromAddr string
56 | PredId []byte
57 | }
58 |
59 | /* RPC connection map cache */
60 | var connMap = make(map[string]*rpc.Client)
61 |
62 | /* Find the successor node of a given ID in the entire ring */
63 | func FindSuccessor_RPC(remoteNode *RemoteNode, id []byte) (*RemoteNode, error) {
64 | if remoteNode == nil {
65 | return nil, errors.New("RemoteNode is empty!")
66 | }
67 | var reply IdReply
68 | err := makeRemoteCall(remoteNode, "FindSuccessor", RemoteQuery{remoteNode.Id, id}, &reply)
69 |
70 | rNode := new(RemoteNode)
71 | rNode.Id = reply.Id
72 | rNode.Addr = reply.Addr
73 | return rNode, err
74 | }
75 |
76 | /* Helper function to make a call to a remote node */
77 | func makeRemoteCall(remoteNode *RemoteNode, method string, req interface{}, rsp interface{}) error {
78 | // Dial the server if we don't already have a connection to it
79 | remoteNodeAddrStr := remoteNode.Addr
80 | var err error
81 | client, ok := connMap[remoteNodeAddrStr]
82 | if !ok {
83 | client, err = rpc.Dial("tcp", remoteNode.Addr)
84 | if err != nil {
85 | return err
86 | }
87 | connMap[remoteNodeAddrStr] = client
88 | }
89 |
90 | // Make the request
91 | uniqueMethodName := fmt.Sprintf("%v.%v", remoteNodeAddrStr, method)
92 | err = client.Call(uniqueMethodName, req, rsp)
93 | if err != nil {
94 | return err
95 | }
96 |
97 | return nil
98 | }
99 |
100 | /* Get the predecessor ID of a remote node */
101 | func GetPredecessorId_RPC(remoteNode *RemoteNode) (*RemoteNode, error) {
102 | var reply IdReply
103 | err := makeRemoteCall(remoteNode, "GetPredecessorId", RemoteId{remoteNode.Id}, &reply)
104 | if err != nil {
105 | return nil, err
106 | }
107 |
108 | if !reply.Valid {
109 | return nil, err
110 | }
111 |
112 | rNode := new(RemoteNode)
113 | rNode.Id = reply.Id
114 | rNode.Addr = reply.Addr
115 | return rNode, err
116 | }
117 |
118 | /* Get the successor ID of a remote node */
119 | func GetSuccessorId_RPC(remoteNode *RemoteNode) (*RemoteNode, error) {
120 | var reply IdReply
121 | err := makeRemoteCall(remoteNode, "GetSuccessorId", RemoteId{remoteNode.Id}, &reply)
122 | if err != nil {
123 | return nil, err
124 | }
125 | rNode := new(RemoteNode)
126 | rNode.Id = reply.Id
127 | rNode.Addr = reply.Addr
128 | return rNode, err
129 | }
130 |
131 | /* Get a value from a remote node's datastore for a given key */
132 | func Get_RPC(locNode *RemoteNode, key string) (string, error) {
133 | if locNode == nil {
134 | return "", errors.New("RemoteNode is empty!")
135 | }
136 |
137 | var reply KeyValueReply
138 | req := KeyValueReq{locNode.Id, key, ""}
139 | err := makeRemoteCall(locNode, "GetLocal", &req, &reply)
140 |
141 | return reply.Value, err
142 | }
143 |
144 | /* Put a key/value into a datastore on a remote node */
145 | func Put_RPC(locNode *RemoteNode, key string, value string) error {
146 | if locNode == nil {
147 | return errors.New("RemoteNode is empty!")
148 | }
149 |
150 | var reply KeyValueReply
151 | req := KeyValueReq{locNode.Id, key, value}
152 | err := makeRemoteCall(locNode, "PutLocal", &req, &reply)
153 |
154 | return err
155 | }
156 |
157 | /* Set the predecessor ID of a remote node */
158 | func SetPredecessorId_RPC(remoteNode, newPred *RemoteNode) error {
159 | var reply RpcOkay
160 | var req UpdateReq
161 | req.FromId = remoteNode.Id
162 | if newPred != nil {
163 | req.UpdateId = newPred.Id
164 | req.UpdateAddr = newPred.Addr
165 | }
166 |
167 | err := makeRemoteCall(remoteNode, "SetPredecessorId", &req, &reply)
168 | if err != nil {
169 | return err
170 | }
171 | if !reply.Ok {
172 | return errors.New(fmt.Sprintf("RPC replied not valid from %v", remoteNode.Id))
173 | }
174 |
175 | return err
176 | }
177 |
178 | /* Set the successor ID of a remote node */
179 | func SetSuccessorId_RPC(remoteNode, newSucc *RemoteNode) error {
180 | var reply RpcOkay
181 | var req UpdateReq
182 | req.FromId = remoteNode.Id
183 | req.UpdateId = newSucc.Id
184 | req.UpdateAddr = newSucc.Addr
185 |
186 | err := makeRemoteCall(remoteNode, "SetSuccessorId", &req, &reply)
187 | if err != nil {
188 | return err
189 | }
190 | if !reply.Ok {
191 | return errors.New(fmt.Sprintf("RPC replied not valid from %v", remoteNode.Id))
192 | }
193 |
194 | return err
195 | }
196 |
197 | /* Notify a remote node that we believe we are its predecessor */
198 | func Notify_RPC(remoteNode, us *RemoteNode) error {
199 | if remoteNode == nil {
200 | return errors.New("RemoteNode is empty!")
201 | }
202 | var reply RpcOkay
203 | var req NotifyReq
204 | req.NodeId = remoteNode.Id
205 | req.NodeAddr = remoteNode.Addr
206 | req.UpdateId = us.Id
207 | req.UpdateAddr = us.Addr
208 |
209 | // must send us and intended node
210 | err := makeRemoteCall(remoteNode, "Notify", &req, &reply)
211 | if !reply.Ok {
212 | return errors.New(fmt.Sprintf("RPC replied not valid from %v", remoteNode.Id))
213 | }
214 |
215 | return err
216 | }
--------------------------------------------------------------------------------
/oceanstore/oceanstore/ocean_local_impl.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 | "../../tapestry/tapestry"
7 | "../../raft/raft"
8 | )
9 |
10 | func (ocean *OceanNode) mkdir(req *MkdirRequest) (MkdirReply, error) {
11 | fmt.Println("Entered mkdir")
12 | reply := MkdirReply{}
13 |
14 | path := req.Path
15 | length := len(path)
16 | clientId := req.ClientId
17 |
18 | if path[0] != '/' {
19 | path = ocean.getCurrentDir(clientId) + "/" + path
20 | }
21 | path = removeExcessSlashes(path)
22 |
23 | if length == 0 {
24 | return reply, fmt.Errorf("Empty path")
25 | }
26 | if (length > 2 && path[length-1] == '.' && path[length-2] == '.') ||
27 | path[length-1] == '.' {
28 | return reply, fmt.Errorf("There already exists a file/dir with that name.")
29 | }
30 |
31 | dirInode, name, fullPath, dirPath, err := ocean.dir_namev(path, clientId)
32 | if err != nil {
33 | fmt.Println(err)
34 | return reply, err
35 | }
36 |
37 | // File we are about to make should not exist.
38 | _, err = ocean.getInode(fullPath, clientId)
39 | if err == nil {
40 | return reply, fmt.Errorf("There already exists a file/dir with that name.")
41 | }
42 |
43 | // This is the root node creation.
44 | if dirInode == nil {
45 |
46 | // Create the root Inode and its block
47 | newDirInode := CreateDirInode(name)
48 | newDirBlock := CreateBlock()
49 |
50 | // Set block paths for the indirect block and dot references
51 | blockPath := fmt.Sprintf("%v:%v", fullPath, "indirect") // this will be '/:indirect'
52 |
53 | // Hash the dot references to put them on the indirect block.
54 | blockHash := tapestry.Hash(blockPath)
55 |
56 | // Save the root Inode indirect block in tapestry
57 | ocean.storeIndirectBlock(fullPath, newDirBlock.bytes, clientId)
58 |
59 | newDirInode.indirect = hashToGuid(blockHash)
60 | fmt.Println(blockHash, "->", newDirInode.indirect)
61 |
62 | // Save the root Inode
63 | ocean.storeInode(fullPath, newDirInode, clientId)
64 |
65 | } else {
66 | // Get indirect block from the directory that is going to create
67 | // the node
68 | dirBlock, err := ocean.getInodeBlock(dirPath, clientId)
69 | if err != nil {
70 | fmt.Println(err)
71 | return reply, err
72 | }
73 |
74 | // Create new inode and block
75 | newDirInode := CreateDirInode(name)
76 | newDirBlock := CreateBlock()
77 |
78 | // Declare block paths
79 | blockPath := fmt.Sprintf("%v:%v", fullPath, "indirect")
80 |
81 | // Get hashes
82 | newDirInodeHash := tapestry.Hash(fullPath)
83 |
84 | fmt.Println("Dirpath: %v", dirPath)
85 | fmt.Println("Fullpath: %v", fullPath)
86 | fmt.Println("blockPath: %v", blockPath)
87 | fmt.Println("newDirInodeHAsh: %v", newDirInodeHash)
88 |
89 | // Write the new dir to the old dir and increase its size
90 | IdIntoByte(dirBlock, &newDirInodeHash, int(dirInode.size))
91 | dirInode.size += tapestry.DIGITS
92 |
93 | bytes := make([]byte, tapestry.DIGITS)
94 | IdIntoByte(bytes, &newDirInodeHash, 0)
95 | newDirInode.indirect = Guid(ByteIntoAguid(bytes, 0))
96 | fmt.Println("\n\n\n\n\n\n", newDirInodeHash, "->", newDirInode.indirect)
97 |
98 | // Save both blocks in tapestry
99 | ocean.storeIndirectBlock(fullPath, newDirBlock.bytes, clientId)
100 | ocean.storeIndirectBlock(dirPath, dirBlock, clientId)
101 |
102 | // Encode both inodes
103 | ocean.storeInode(dirPath, dirInode, clientId)
104 | ocean.storeInode(fullPath, newDirInode, clientId)
105 | }
106 |
107 | reply.Ok = true
108 | return reply, nil
109 | }
110 |
111 | func (ocean *OceanNode) dir_namev(pathname string, id uint64) (*Inode, string, string, string, error) {
112 |
113 | path := removeExcessSlashes(pathname)
114 | lastSlash := strings.LastIndex(path, "/")
115 | var dirPath, name string
116 |
117 | fmt.Println("Last slash:", lastSlash)
118 |
119 | if lastSlash == 0 && len(path) != 1 {
120 | return ocean.getRootInode(id), pathname[1:], pathname, "/", nil
121 | } else if lastSlash == 0 {
122 | return nil, "/", "/", "", nil
123 | } else if lastSlash != -1 && len(path) != 1 { // K. all good
124 | dirPath = path[:lastSlash]
125 | name = path[lastSlash+1:]
126 | } else if lastSlash == -1 { // No slashes at all (relative path probably)
127 | dirPath = ocean.getCurrentDir(id)
128 | name = path
129 | } else {
130 | panic("What should go here?")
131 | }
132 |
133 | path = removeExcessSlashes(path)
134 |
135 | if dirPath[0] != '/' {
136 | dirPath = ocean.getCurrentDir(id) + "/" + dirPath
137 | }
138 |
139 | dirInode, err := ocean.getInode(dirPath, id)
140 | if err != nil { // Dir path does not exist
141 | fmt.Println(err)
142 | return nil, "", "", "", err
143 | }
144 |
145 | dirPath = removeExcessSlashes(dirPath)
146 | fullPath := removeExcessSlashes(dirPath + "/" + name)
147 |
148 | return dirInode, name, fullPath, dirPath, nil
149 | }
150 |
151 | func (ocean *OceanNode) getRootInode(id uint64) *Inode {
152 | inode, err := ocean.getInode("/", id)
153 | if err != nil {
154 | panic("Root inode not found!")
155 | }
156 | return inode
157 | }
158 |
159 | func (ocean *OceanNode) connect(req *ConnectRequest) (ConnectReply, error) {
160 | reply := ConnectReply{}
161 | // addr := req.FromNode.Addr
162 | // raftNode := puddle.getRandomRaftNode()
163 | // fromAddr := raft.NodeAddr{raft.AddrToId(addr, raftNode.GetConfig().NodeIdSize), addr}
164 |
165 | raftAddr := ocean.getRandomRaftNode().GetLocalAddr()
166 |
167 | client, err := raft.CreateClient(*raftAddr)
168 | if err != nil {
169 | fmt.Println(err)
170 | return ConnectReply{false, 0}, err
171 | }
172 |
173 | // Clients that just started the connection should start in root node.
174 | ocean.clientPaths[client.Id] = "/"
175 | ocean.clients[client.Id] = client
176 |
177 | reply.Ok = true
178 | reply.Id = client.Id
179 | fmt.Println("connect reply:", reply)
180 | return reply, nil
181 | }
--------------------------------------------------------------------------------
/raft/raft/raft.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "../../tapestry/tapestry"
5 | "crypto/sha1"
6 | "math/big"
7 | "net"
8 | "net/rpc"
9 | "os"
10 | "sync"
11 | "time"
12 | )
13 |
14 | /* Node's can be in three possible states */
15 | type NodeState int
16 |
17 | // Tapestry's id
18 | type ID tapestry.ID
19 |
20 | const (
21 | FOLLOWER_STATE NodeState = iota
22 | CANDIDATE_STATE
23 | LEADER_STATE
24 | JOIN_STATE
25 | )
26 |
27 | type RaftNode struct {
28 | Id string
29 | Listener net.Listener
30 | listenPort int
31 |
32 | //At any given time each server is in one of three states: leader, follower, or candidate.
33 | State NodeState
34 | LeaderAddress *NodeAddr
35 |
36 | conf *Config
37 | IsShutDown bool
38 | RPCServer *RaftRPCServer
39 | mutex sync.Mutex
40 | Testing *TestingPolicy
41 |
42 | logCache []LogEntry
43 |
44 | //file descriptors and values for persistent state
45 | logFileDescriptor FileData
46 | metaFileDescriptor FileData
47 | stableState NodeStableState
48 | ssMutex sync.Mutex
49 |
50 | //leader specific volatile state
51 | commitIndex uint64
52 | lastApplied uint64
53 | leaderMutex map[string]uint64
54 | nextIndex map[string]uint64
55 | matchIndex map[string]uint64
56 |
57 | // channels to send and rcv RPC messages
58 | appendEntries chan AppendEntriesMsg
59 | requestVote chan RequestVoteMsg
60 | clientRequest chan ClientRequestMsg
61 | registerClient chan RegisterClientMsg
62 | gracefulExit chan bool
63 |
64 | // the replicated state machine
65 | hash []byte
66 | requestMutex sync.Mutex
67 | requestMap map[uint64]ClientRequestMsg
68 |
69 | fileMap map[string]string
70 | fileMapMtx sync.Mutex
71 | lockMap map[string]bool
72 | lockMapMtx sync.Mutex
73 | }
74 |
75 | type NodeAddr struct {
76 | Id string
77 | Addr string
78 | }
79 |
80 | func CreateNode(localPort int, leaderAddr *NodeAddr, conf *Config) (rp *RaftNode, err error) {
81 | var r RaftNode
82 | rp = &r
83 | var conn net.Listener
84 |
85 | r.IsShutDown = false
86 | r.conf = conf
87 |
88 | //init rpc channels
89 | r.appendEntries = make(chan AppendEntriesMsg)
90 | r.requestVote = make(chan RequestVoteMsg)
91 | r.clientRequest = make(chan ClientRequestMsg)
92 | r.registerClient = make(chan RegisterClientMsg)
93 | r.gracefulExit = make(chan bool)
94 |
95 | r.hash = nil
96 | r.requestMap = make(map[uint64]ClientRequestMsg)
97 |
98 | r.commitIndex = 0
99 | r.lastApplied = 0
100 | r.nextIndex = make(map[string]uint64)
101 | r.matchIndex = make(map[string]uint64)
102 |
103 | r.fileMap = make(map[string]string)
104 | r.lockMap = make(map[string]bool)
105 | r.Testing = NewTesting()
106 | r.Testing.PauseWorld(false)
107 |
108 | if localPort != 0 {
109 | conn, err = OpenPort(localPort)
110 | } else {
111 | conn, localPort, err = OpenListener()
112 | }
113 |
114 | if err != nil {
115 | return nil, err
116 | }
117 |
118 | // create node id based on listener address
119 | r.Id = AddrToId(conn.Addr().String(), conf.NodeIdSize)
120 |
121 | r.Listener = conn
122 | r.listenPort = localPort
123 | Out.Printf("started node with id %v, listening at %v", r.Id, conn.Addr().String())
124 |
125 | freshNode, err := r.initStableStore()
126 | if err != nil {
127 | Error.Printf("Error initializing the stable store: %v \n", err)
128 | return nil, err
129 | }
130 |
131 | r.setLocalAddr(&NodeAddr{Id: r.Id, Addr: conn.Addr().String()})
132 |
133 | // Start RPC server
134 | r.RPCServer = &RaftRPCServer{rp}
135 | rpc.RegisterName(r.GetLocalAddr().Addr, r.RPCServer)
136 | go r.RPCServer.startRpcServer()
137 |
138 | if freshNode {
139 | r.State = JOIN_STATE
140 | if leaderAddr != nil {
141 | err = JoinRPC(leaderAddr, r.GetLocalAddr())
142 | } else {
143 | Out.Printf("Waiting to start nodes until all have joined\n")
144 | go r.startNodes()
145 | }
146 | } else {
147 | r.State = FOLLOWER_STATE
148 | go r.run()
149 | }
150 |
151 | return
152 | }
153 |
154 | func (r *RaftNode) startNodes() {
155 | r.mutex.Lock()
156 | r.AppendOtherNodes(*r.GetLocalAddr())
157 | r.mutex.Unlock()
158 |
159 | for len(r.GetOtherNodes()) < r.conf.ClusterSize {
160 | time.Sleep(time.Millisecond * 100)
161 | }
162 |
163 | for _, otherNode := range r.GetOtherNodes() {
164 | if r.Id != otherNode.Id {
165 | Out.Printf("(%v) Starting node-%v\n", r.Id, otherNode.Id)
166 | StartNodeRPC(otherNode, r.GetOtherNodes())
167 | }
168 | }
169 |
170 | // Start the Raft finite-state-machine, initially in follower state
171 | go r.run()
172 | }
173 |
174 | func CreateLocalCluster(config *Config) ([]*RaftNode, error) {
175 | if config == nil {
176 | config = DefaultConfig()
177 | }
178 | err := CheckConfig(config)
179 | if err != nil {
180 | return nil, err
181 | }
182 |
183 | nodes := make([]*RaftNode, config.ClusterSize)
184 |
185 | nodes[0], err = CreateNode(0, nil, config)
186 | for i := 1; i < config.ClusterSize; i++ {
187 | nodes[i], err = CreateNode(0, nodes[0].GetLocalAddr(), config)
188 | if err != nil {
189 | return nil, err
190 | }
191 | }
192 | return nodes, nil
193 | }
194 |
195 |
196 |
197 | func AddrToId(addr string, length int) string {
198 | h := sha1.New()
199 | h.Write([]byte(addr))
200 | v := h.Sum(nil)
201 | keyInt := big.Int{}
202 | keyInt.SetBytes(v[:length])
203 | return keyInt.String()
204 | }
205 |
206 | func (r *RaftNode) Exit() {
207 | Out.Printf("Abruptly shutting down node!")
208 | os.Exit(0)
209 | }
210 |
211 | func (r *RaftNode) GracefulExit() {
212 | r.Testing.PauseWorld(true)
213 | Out.Println("gracefully shutting down the node %v", r.Id)
214 | r.gracefulExit <- true
215 | }
216 |
217 | func (r *RaftNode) GetConfig() *Config {
218 | return r.conf
219 | }
220 |
221 | func (r *RaftNode) run() {
222 | curr := r.doFollower
223 | for curr != nil {
224 | curr = curr()
225 | }
226 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/routingtable.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import "sync"
4 |
5 | /*
6 | A routing table has a number of levels equal to the number of digits in an ID (default 40)
7 | Each level has a number of slots equal to the digit base (default 16)
8 | A node that exists on level n thereby shares a prefix of length n with the local node.
9 | Access to the routing table is managed by a lock
10 | */
11 | type RoutingTable struct {
12 | local Node // the local tapestry node
13 | mutex sync.Mutex // to manage concurrent access to the routing table. could have a per-level mutex though
14 | rows [DIGITS][BASE]*[]Node // the rows of the routing table
15 | }
16 |
17 | /*
18 | Creates and returns a new routing table, placing the local node at the appropriate slot in each level of the table
19 | */
20 | func NewRoutingTable(me Node) *RoutingTable {
21 | t := new(RoutingTable)
22 | t.local = me
23 |
24 | // Create the node lists with capacity of SLOTSIZE
25 | for i := 0; i < DIGITS; i++ {
26 | for j := 0; j < BASE; j++ {
27 | slot := make([]Node, 0, SLOTSIZE)
28 | t.rows[i][j] = &slot
29 | }
30 | }
31 |
32 | // Make sure each row has at least our node in it
33 | for i := 0; i < DIGITS; i++ {
34 | slot := t.rows[i][t.local.Id[i]]
35 | *slot = append(*slot, t.local)
36 | }
37 |
38 | return t
39 | }
40 |
41 | func GetFurthest(id ID, nodes []Node) int {
42 | furthest := 0
43 | for i := 1; i < SLOTSIZE; i++ {
44 | if id.Closer(nodes[furthest].Id, nodes[i].Id) {
45 | furthest = i
46 | }
47 | }
48 | return furthest
49 | }
50 |
51 | /*
52 | Adds the given node to the routing table
53 | Returns true if the node did not previously exist in the table and was subsequently added
54 | Returns the previous node in the table, if one was overwritten
55 | */
56 | func (t *RoutingTable) Add(node Node) (added bool, previous *Node) {
57 | t.mutex.Lock()
58 |
59 | // Find table slot.
60 | level := SharedPrefixLength(node.Id, t.local.Id)
61 |
62 | if level == DIGITS {
63 | added = false
64 | t.mutex.Unlock()
65 | return
66 | }
67 |
68 | // fmt.Printf("%v, %v\n", i, node.Id[i])
69 | slot := t.rows[level][node.Id[level]]
70 |
71 | // Check if it exists; if it does return false
72 | for i := 0; i < len(*slot); i++ {
73 | if SharedPrefixLength((*slot)[i].Id, node.Id) == DIGITS {
74 | added = false
75 | t.mutex.Unlock()
76 | return
77 | }
78 | }
79 |
80 | // Append new slot and make sure theres a 3 node maximum.
81 |
82 | for i := 0; i <= level; i++ {
83 | slot = t.rows[i][node.Id[i]]
84 | *slot = append(*slot, node)
85 | if len(*slot) > SLOTSIZE {
86 | furthest := GetFurthest(t.local.Id, *slot)
87 | previous = &(*slot)[furthest]
88 | *slot = append((*slot)[:furthest], (*slot)[furthest+1:]...)
89 | }
90 | }
91 |
92 | added = true
93 | t.mutex.Unlock()
94 | return
95 | }
96 |
97 | /*
98 | Removes the specified node from the routing table, if it exists
99 | Returns true if the node was in the table and was successfully removed
100 | */
101 | func (t *RoutingTable) Remove(node Node) (wasRemoved bool) {
102 | t.mutex.Lock()
103 |
104 | // Get the table slot
105 | level := SharedPrefixLength(node.Id, t.local.Id)
106 | if level == DIGITS {
107 | // Never delete youself on your own routing table.
108 | wasRemoved = false
109 | t.mutex.Unlock()
110 | return
111 | }
112 |
113 | wasRemoved = false
114 |
115 | for j := 0; j <= level; j++ {
116 | slot := t.rows[j][node.Id[j]]
117 |
118 | // Find and remove node
119 | for i := 0; i < len(*slot); i++ {
120 | if SharedPrefixLength((*slot)[i].Id, node.Id) == DIGITS {
121 | *slot = append((*slot)[:i], (*slot)[i+1:]...) // This is remove in Go
122 | wasRemoved = true
123 | }
124 | }
125 | }
126 |
127 | // Return false if node was not found.
128 | t.mutex.Unlock()
129 | return
130 | }
131 |
132 | /*
133 | Search the table for the closest next-hop node for the provided ID
134 | */
135 | func (t *RoutingTable) GetNextHop(id ID) (node Node) {
136 |
137 | t.mutex.Lock()
138 |
139 | level := SharedPrefixLength(id, t.local.Id)
140 | row := t.rows[level]
141 | // fmt.Printf("%v: %v y %v\n", id, level, id[level])
142 | col := id[level]
143 | for len(*(row[col])) == 0 {
144 | col = (col + 1) % BASE
145 | // fmt.Printf("%v\n", col)
146 | }
147 | // fmt.Printf("%v\n", col)
148 |
149 | if len(*(row[col])) == 1 {
150 | node = (*(row[col]))[0]
151 | } else if len(*(row[col])) == 2 {
152 | if id.BetterChoice((*(row[col]))[0].Id, (*(row[col]))[1].Id) {
153 | node = (*(row[col]))[0]
154 | } else {
155 | node = (*(row[col]))[1]
156 | }
157 | } else { // Consider optimization if its too slow
158 | if id.BetterChoice((*(row[col]))[0].Id, (*(row[col]))[1].Id) &&
159 | id.BetterChoice((*(row[col]))[0].Id, (*(row[col]))[2].Id) {
160 | node = (*(row[col]))[0]
161 | } else if id.BetterChoice((*(row[col]))[1].Id, (*(row[col]))[0].Id) &&
162 | id.BetterChoice((*(row[col]))[1].Id, (*(row[col]))[2].Id) {
163 | node = (*(row[col]))[1]
164 | } else if id.BetterChoice((*(row[col]))[2].Id, (*(row[col]))[0].Id) &&
165 | id.BetterChoice((*(row[col]))[2].Id, (*(row[col]))[1].Id) {
166 | node = (*(row[col]))[2]
167 | } else {
168 | node = (*(row[col]))[0]
169 | }
170 | }
171 |
172 | t.mutex.Unlock()
173 |
174 | return
175 | }
176 |
177 | /*
178 | Get all nodes on the specified level of the routing table, EXCLUDING the local node
179 | */
180 | func (t *RoutingTable) GetLevel(level int) (nodes []Node) {
181 | t.mutex.Lock()
182 | row := t.rows[level]
183 | for i := 0; i < BASE; i++ {
184 | if t.local.Id[level] == Digit(i) {
185 | continue
186 | }
187 | for j := 0; j < len(*row[i]); j++ {
188 | if SharedPrefixLength((*(row[i]))[j].Id, t.local.Id) != DIGITS {
189 | nodes = append(nodes, (*(row[i]))[j]) // append node
190 | }
191 | }
192 | }
193 | t.mutex.Unlock()
194 | return
195 | }
196 |
197 |
--------------------------------------------------------------------------------
/tapestry/tapestry/tapestry-rpcimpl.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "net"
5 | "net/rpc"
6 | "fmt"
7 | )
8 |
9 | /*
10 | Receives remote invocations of methods for the local tapestry node
11 | */
12 | type TapestryRPCServer struct {
13 | tapestry *Tapestry
14 | listener net.Listener
15 | rpc *rpc.Server
16 | }
17 |
18 | type RegisterRequest struct {
19 | To Node
20 | From Node
21 | Key string
22 | }
23 |
24 | type RegisterResponse struct {
25 | IsRoot bool
26 | }
27 |
28 | type NextHopRequest struct {
29 | To Node
30 | Id ID
31 | }
32 | type NextHopResponse struct {
33 | HasNext bool
34 | Next Node
35 | }
36 |
37 | type RemoveBadNodesRequest struct {
38 | To Node
39 | BadNodes []Node
40 | }
41 |
42 | type FetchRequest struct {
43 | To Node
44 | Key string
45 | }
46 |
47 | type FetchResponse struct {
48 | To Node
49 | IsRoot bool
50 | Values []Node
51 | }
52 |
53 | type GetBackpointersRequest struct {
54 | To Node
55 | From Node
56 | Level int
57 | }
58 |
59 | type TransferRequest struct {
60 | To Node
61 | From Node
62 | Data map[string][]Node
63 | }
64 |
65 | type NodeRequest struct {
66 | To Node
67 | Node Node
68 | }
69 |
70 | type AddNodeMulticastRequest struct {
71 | To Node
72 | NewNode Node
73 | Level int
74 | }
75 |
76 | type NotifyLeaveRequest struct {
77 | To Node
78 | From Node
79 | Replacement *Node
80 | }
81 |
82 | /*
83 | Creates the tapestry RPC server of a tapestry node. The RPC server receives function invocations,
84 | and proxies them to the tapestrynode implementations
85 | */
86 | func newTapestryRPCServer(port int, tapestry *Tapestry) (server *TapestryRPCServer, err error) {
87 | // Create the RPC server
88 | server = new(TapestryRPCServer)
89 | server.tapestry = tapestry
90 | server.rpc = rpc.NewServer()
91 | server.rpc.Register(server)
92 | server.rpc.Register(NewBlobStoreRPC(tapestry.blobstore))
93 | server.listener, err = net.Listen("tcp", fmt.Sprintf(":%v", port))
94 | if err != nil {
95 | return nil, fmt.Errorf("Tapestry RPC server unable to listen on tcp port %v, reason: %v", port, err)
96 | }
97 |
98 | // Start the RPC server
99 | go func() {
100 | for {
101 | cxn, err := server.listener.Accept()
102 | if err != nil {
103 | fmt.Printf("Server %v closing: %s\n", port, err)
104 | return
105 | }
106 | go server.rpc.ServeConn(cxn)
107 | }
108 | }()
109 |
110 | return
111 | }
112 |
113 | func (server *TapestryRPCServer) Hello(req Node, rsp *Node) (err error) {
114 | *rsp = server.tapestry.local.node
115 | return
116 | }
117 |
118 | func (server *TapestryRPCServer) validate(expect Node) error {
119 | if server.tapestry.local.node != expect {
120 | return fmt.Errorf("Remote node expected us to be %v, but we are %v", expect, server.tapestry.local.node)
121 | }
122 | return nil
123 | }
124 |
125 | func (server *TapestryRPCServer) GetNextHop(req NextHopRequest, rsp *NextHopResponse) (err error) {
126 | err = server.validate(req.To)
127 | if err == nil {
128 | rsp.HasNext, rsp.Next, err = server.tapestry.local.GetNextHop(req.Id)
129 | }
130 | return
131 | }
132 |
133 | // Server: proxies a remote method invocation to the local node
134 | func (server *TapestryRPCServer) RemoveBadNodes(req RemoveBadNodesRequest, rsp *Node) error {
135 | err := server.validate(req.To)
136 | if err != nil {
137 | return err
138 | }
139 | return server.tapestry.local.RemoveBadNodes(req.BadNodes)
140 | }
141 |
142 | func (server *TapestryRPCServer) Fetch(req FetchRequest, rsp *FetchResponse) (err error) {
143 | err = server.validate(req.To)
144 | if err == nil {
145 | rsp.IsRoot, rsp.Values, err = server.tapestry.local.Fetch(req.Key)
146 | }
147 | return
148 | }
149 |
150 | func (server *TapestryRPCServer) AddBackpointer(req NodeRequest, rsp *Node) error {
151 | err := server.validate(req.To)
152 | if err != nil {
153 | return err
154 | }
155 | return server.tapestry.local.AddBackpointer(req.Node)
156 | }
157 |
158 | func (server *TapestryRPCServer) RemoveBackpointer(req NodeRequest, rsp *Node) error {
159 | err := server.validate(req.To)
160 | if err != nil {
161 | return err
162 | }
163 | return server.tapestry.local.RemoveBackpointer(req.Node)
164 | }
165 |
166 | func (server *TapestryRPCServer) GetBackpointers(req GetBackpointersRequest, rsp *[]Node) (err error) {
167 | err = server.validate(req.To)
168 | if err != nil {
169 | return err
170 | }
171 | backpointers, err := server.tapestry.local.GetBackpointers(req.From, req.Level)
172 | *rsp = append(*rsp, backpointers...)
173 | return
174 | }
175 |
176 | func (server *TapestryRPCServer) AddNode(req NodeRequest, rsp *[]Node) (err error) {
177 | err = server.validate(req.To)
178 | if err != nil {
179 | return
180 | }
181 | neighbours, err := server.tapestry.local.AddNode(req.Node)
182 | *rsp = append(*rsp, neighbours...)
183 | return
184 | }
185 |
186 | func (server *TapestryRPCServer) AddNodeMulticast(req AddNodeMulticastRequest, rsp *[]Node) (err error) {
187 | err = server.validate(req.To)
188 | if err != nil {
189 | return err
190 | }
191 | neighbours, err := server.tapestry.local.AddNodeMulticast(req.NewNode, req.Level)
192 | *rsp = append(*rsp, neighbours...)
193 | return err
194 | }
195 |
196 | func (server *TapestryRPCServer) Transfer(req TransferRequest, rsp *Node) error {
197 | err := server.validate(req.To)
198 | if err != nil {
199 | return err
200 | }
201 | return server.tapestry.local.Transfer(req.From, req.Data)
202 | }
203 |
204 | func (server *TapestryRPCServer) NotifyLeave(req NotifyLeaveRequest, rsp *Node) error {
205 | err := server.validate(req.To)
206 | if err != nil {
207 | return err
208 | }
209 | return server.tapestry.local.NotifyLeave(req.From, req.Replacement)
210 | }
211 |
212 | /*
213 | This method is invoked over RPC by other Tapestry nodes.
214 | Register the specified node as an advertiser of the specified key.
215 |
216 | * Check that we are the root node for the key
217 | * Add the node to the object store
218 | * Kick off a timer to remove the node if it's not advertised again after a set amount of time
219 | */
220 | func (server *TapestryRPCServer) Register(req RegisterRequest, rsp *RegisterResponse) (err error) {
221 | err = server.validate(req.To)
222 | if err == nil {
223 | rsp.IsRoot, err = server.tapestry.local.Register(req.Key, req.From)
224 | }
225 | return
226 | }
--------------------------------------------------------------------------------
/oceanstore/oceanstore/ocean_rpc_api.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "fmt"
5 | "net/rpc"
6 | )
7 |
8 | var connMap = make(map[string]*rpc.Client)
9 |
10 | type ConnectRequest struct {
11 | FromNode OceanAddr
12 | }
13 |
14 | type ConnectReply struct {
15 | Ok bool
16 | Id uint64
17 | }
18 |
19 | func ConnectRPC(remotenode *OceanAddr, request ConnectRequest) (*ConnectReply, error) {
20 | fmt.Println("(Oceanstore) RPC Connect to", remotenode.Addr)
21 | var reply ConnectReply
22 |
23 | err := makeRemoteCall(remotenode, "ConnectImpl", request, &reply)
24 | if err != nil {
25 | return nil, err
26 | }
27 |
28 | return &reply, nil
29 | }
30 |
31 | type PwdRequest struct {
32 | ClientId uint64
33 | }
34 |
35 | type PwdReply struct {
36 | Ok bool
37 | Path string
38 | }
39 |
40 | func pwdRPC(remotenode *OceanAddr, request PwdRequest) (*PwdReply, error) {
41 | var reply PwdReply
42 |
43 | err := makeRemoteCall(remotenode, "PwdImpl", request, &reply)
44 | if err != nil {
45 | return nil, err
46 | }
47 |
48 | return &reply, nil
49 | }
50 |
51 | type LsRequest struct {
52 | ClientId uint64
53 | Path string
54 | }
55 |
56 | type LsReply struct {
57 | Ok bool
58 | Elements string
59 | }
60 |
61 | func lsRPC(remotenode *OceanAddr, request LsRequest) (*LsReply, error) {
62 | var reply LsReply
63 |
64 | err := makeRemoteCall(remotenode, "LsImpl", request, &reply)
65 | if err != nil {
66 | return nil, err
67 | }
68 |
69 | return &reply, nil
70 | }
71 |
72 | type CdRequest struct {
73 | ClientId uint64
74 | Path string
75 | }
76 |
77 | type CdReply struct {
78 | Ok bool
79 | }
80 |
81 | func cdRPC(remotenode *OceanAddr, request CdRequest) (*CdReply, error) {
82 | var reply CdReply
83 |
84 | err := makeRemoteCall(remotenode, "CdImpl", request, &reply)
85 | if err != nil {
86 | return nil, err
87 | }
88 |
89 | return &reply, nil
90 | }
91 |
92 | type MvRequest struct {
93 | ClientId uint64
94 | Source string
95 | Dest string
96 | }
97 |
98 | type MvReply struct {
99 | Ok bool
100 | }
101 |
102 | func mvRPC(remotenode *OceanAddr, request MvRequest) (*MvReply, error) {
103 | var reply MvReply
104 |
105 | err := makeRemoteCall(remotenode, "MvImpl", request, &reply)
106 | if err != nil {
107 | return nil, err
108 | }
109 |
110 | return &reply, nil
111 | }
112 |
113 | type CpRequest struct {
114 | ClientId uint64
115 | Source string
116 | Dest string
117 | }
118 |
119 | type CpReply struct {
120 | Ok bool
121 | }
122 |
123 | func cpRPC(remotenode *OceanAddr, request CpRequest) (*CpReply, error) {
124 | var reply CpReply
125 |
126 | err := makeRemoteCall(remotenode, "CpImpl", request, &reply)
127 | if err != nil {
128 | return nil, err
129 | }
130 |
131 | return &reply, nil
132 | }
133 |
134 | type MkdirRequest struct {
135 | ClientId uint64
136 | Path string
137 | }
138 |
139 | type MkdirReply struct {
140 | Ok bool
141 | }
142 |
143 | func mkdirRPC(remotenode *OceanAddr, request MkdirRequest) (*MkdirReply, error) {
144 | var reply MkdirReply
145 |
146 | err := makeRemoteCall(remotenode, "MkdirImpl", request, &reply)
147 | if err != nil {
148 | return nil, err
149 | }
150 |
151 | return &reply, nil
152 | }
153 |
154 | type RmdirRequest struct {
155 | ClientId uint64
156 | Path string
157 | }
158 |
159 | type RmdirReply struct {
160 | Ok bool
161 | }
162 |
163 | func rmdirRPC(remotenode *OceanAddr, request RmdirRequest) (*RmdirReply, error) {
164 | var reply RmdirReply
165 |
166 | err := makeRemoteCall(remotenode, "RmdirImpl", request, &reply)
167 | if err != nil {
168 | return nil, err
169 | }
170 |
171 | return &reply, nil
172 | }
173 |
174 | type MkfileRequest struct {
175 | ClientId uint64
176 | Path string
177 | }
178 |
179 | type MkfileReply struct {
180 | Ok bool
181 | }
182 |
183 | func mkfileRPC(remotenode *OceanAddr, request MkfileRequest) (*MkfileReply, error) {
184 | var reply MkfileReply
185 |
186 | err := makeRemoteCall(remotenode, "MkfileImpl", request, &reply)
187 | if err != nil {
188 | return nil, err
189 | }
190 |
191 | return &reply, nil
192 | }
193 |
194 | type RmfileRequest struct {
195 | ClientId uint64
196 | Path string
197 | }
198 |
199 | type RmfileReply struct {
200 | Ok bool
201 | }
202 |
203 | func rmfileRPC(remotenode *OceanAddr, request RmfileRequest) (*RmfileReply, error) {
204 | var reply RmfileReply
205 |
206 | err := makeRemoteCall(remotenode, "RmfileImpl", request, &reply)
207 | if err != nil {
208 | return nil, err
209 | }
210 |
211 | return &reply, nil
212 | }
213 |
214 | type WritefileRequest struct {
215 | ClientId uint64
216 | Path string
217 | Location uint32
218 | Buffer []byte
219 | }
220 |
221 | type WritefileReply struct {
222 | Ok bool
223 | Written uint32
224 | }
225 |
226 | func writefileRPC(remotenode *OceanAddr, request WritefileRequest) (*WritefileReply, error) {
227 | var reply WritefileReply
228 |
229 | err := makeRemoteCall(remotenode, "WritefileImpl", request, &reply)
230 | if err != nil {
231 | return nil, err
232 | }
233 |
234 | return &reply, nil
235 | }
236 |
237 | type CatRequest struct {
238 | ClientId uint64
239 | Path string
240 | Location uint32
241 | Count uint32
242 | }
243 |
244 | type CatReply struct {
245 | Ok bool
246 | Read uint32
247 | Buffer []byte
248 | }
249 |
250 | func catRPC(remotenode *OceanAddr, request CatRequest) (*CatReply, error) {
251 | var reply CatReply
252 |
253 | err := makeRemoteCall(remotenode, "CatImpl", request, &reply)
254 | if err != nil {
255 | return nil, err
256 | }
257 |
258 | return &reply, nil
259 | }
260 |
261 | /* Helper function to make a call to a remote node */
262 | func makeRemoteCall(remoteNode *OceanAddr, method string, req interface{}, rsp interface{}) error {
263 | // Dial the server if we don't already have a connection to it
264 | remoteNodeAddrStr := remoteNode.Addr
265 | var err error
266 | client, ok := connMap[remoteNodeAddrStr]
267 | if !ok {
268 | client, err = rpc.Dial("tcp", remoteNode.Addr)
269 | if err != nil {
270 | return err
271 | }
272 | connMap[remoteNodeAddrStr] = client
273 | }
274 |
275 | // Make the request
276 | uniqueMethodName := fmt.Sprintf("%v.%v", remoteNodeAddrStr, method)
277 | err = client.Call(uniqueMethodName, req, rsp)
278 | if err != nil {
279 | client.Close()
280 | delete(connMap, remoteNodeAddrStr)
281 | return err
282 | }
283 |
284 | return nil
285 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/tapestry.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "fmt"
5 | "net"
6 | "os"
7 | "time"
8 | "strings"
9 | )
10 |
11 | /* The Tapestry object provides the API for accessing tapestry.
12 | * It will call remote methods across RPC, and receives remote RPC
13 | * calls which get forwarded to the local node */
14 |
15 | const BASE = 16 // The base of a digit of an ID. By default, a digit is base-16
16 | const DIGITS = 40 // The number of digits in an ID. By default, an ID has 40 digits.
17 | const RETRIES = 3 // The number of retries on failure. By default we have 3 retries
18 | const K = 10 // During neighbour traversal, trim the neighbourset to this size before fetching backpointers. By default this has a value of 10
19 | const SLOTSIZE = 3 // The each slot in the routing table should store this many nodes. By default this is 3.
20 |
21 | // Default = 10
22 | const REPUBLISH = 1 * time.Second // object republish interval for nodes advertising objects
23 | // Default = 25
24 | const TIMEOUT = 3 * time.Second // object timeout interval for nodes storing objects
25 |
26 | /*
27 | Provides the private API for communicating with remote nodes
28 | */
29 | type Tapestry struct {
30 | local *TapestryNode // the local node
31 | server *TapestryRPCServer // receives remote method invocations and calls the corresponding local node methods
32 | blobstore *BlobStore // stores blobs on the local node
33 | }
34 |
35 | /*
36 | Public API: Start a tapestry node on the specified port.
37 | Optionally, specify the address of an existing node in the tapestry mesh to connect to, otherwise set to ""
38 | */
39 | func Start(port int, connectTo string) (*Tapestry, error) {
40 | return start(RandomID(), port, connectTo)
41 | }
42 |
43 | /*
44 | Private method, useful for testing: start a node with the specified ID rather than a random ID
45 | */
46 | func start(id ID, port int, connectTo string) (tapestry *Tapestry, err error) {
47 | // Create the tapestry object
48 | tapestry = new(Tapestry)
49 |
50 | // Create the blob store
51 | tapestry.blobstore = NewBlobStore()
52 |
53 | // Create the RPC server
54 | tapestry.server, err = newTapestryRPCServer(port, tapestry)
55 | if err != nil {
56 | return nil, err
57 | }
58 |
59 | // Get the hostname of this machine
60 | name, err := os.Hostname()
61 | if err != nil {
62 | return nil, fmt.Errorf("Unable to get hostname of local machine to start Tapestry node. Reason: %v", err)
63 | }
64 |
65 | // Get the port we are bound to
66 | _, actualport, err := net.SplitHostPort(tapestry.server.listener.Addr().String()) //fmt.Sprintf("%v:%v", name, port)
67 | if err != nil {
68 | return nil, err
69 | }
70 |
71 | // The actual address of this node
72 | address := fmt.Sprintf("%s:%s", name, actualport)
73 |
74 | // Create the local node
75 | tapestry.local = newTapestryNode(Node{id, address}, tapestry)
76 |
77 | // If specified, connect to the provided address
78 | if connectTo != "" {
79 | // Get the node we're joining
80 | node, err := tapestry.hello(connectTo)
81 | if err != nil {
82 | return nil, fmt.Errorf("Error joining existing tapestry node %v, reason: %v", address, err)
83 | }
84 | err = tapestry.local.Join(node)
85 | if err != nil {
86 | return nil, err
87 | }
88 | }
89 |
90 | return tapestry, nil
91 | }
92 |
93 | /*
94 | Store a blob on the local node and publish the key to the tapestry
95 | */
96 | func (tapestry *Tapestry) Store(key string, value []byte) error {
97 | done, err := tapestry.local.Publish(key)
98 | if err != nil {
99 | return err
100 | }
101 | tapestry.blobstore.Put(key, value, done)
102 | return nil
103 | }
104 |
105 | /*
106 | Lookup a key in the tapestry and return its root node
107 | */
108 | func (tapestry *Tapestry) Lookup(key string) ([]Node, error) {
109 | return tapestry.local.Lookup(key)
110 | }
111 |
112 | /*
113 | Lookup a key in the tapestry then fetch the corresponding blob from the remote blob store
114 | */
115 | func (tapestry *Tapestry) Get(key string) ([]byte, error) {
116 | // Lookup the key
117 | replicas, err := tapestry.Lookup(key)
118 | if err != nil {
119 | return nil, err
120 | }
121 | if len(replicas) == 0 {
122 | return nil, fmt.Errorf("No replicas returned for key %v", key)
123 | }
124 |
125 | // Contact replicas
126 | var errs []error
127 | for _, replica := range replicas {
128 | blob, err := FetchRemoteBlob(replica, key)
129 | if err != nil {
130 | errs = append(errs, err)
131 | }
132 | if blob != nil {
133 | return *blob, nil
134 | }
135 | }
136 |
137 | return nil, fmt.Errorf("Error contacting replicas, %v: %v", replicas, errs)
138 | }
139 |
140 | /*
141 | Remove the blob from the local blob store and stop advertising
142 | */
143 | func (tapestry *Tapestry) Remove(key string) bool {
144 | return tapestry.blobstore.Delete(key)
145 | }
146 |
147 | /*
148 | Leave the tapestry.
149 | */
150 | func (tapestry *Tapestry) Leave() {
151 | tapestry.blobstore.DeleteAll()
152 | tapestry.local.Leave()
153 | tapestry.server.listener.Close()
154 | }
155 |
156 | /*
157 | Kill this node without gracefully leaving the tapestry
158 | */
159 | func (tapestry *Tapestry) Kill() {
160 | tapestry.server.listener.Close()
161 | }
162 |
163 | func (tapestry *Tapestry) GetLocalAddr() string {
164 | return tapestry.local.node.Address
165 | }
166 |
167 | // Prints a routing table
168 | func (tapestry *Tapestry) PrintRoutingTable() {
169 | table := tapestry.local.table
170 | id := table.local.Id.String()
171 | for i, row := range table.rows {
172 | for j, slot := range row {
173 | for _, node := range *slot {
174 | fmt.Printf(" %v%v %v: %v %v\n", id[:i], strings.Repeat(" ", DIGITS-i+1), Digit(j), node.Address, node.Id.String())
175 | }
176 | }
177 | }
178 | }
179 |
180 | // Prints the object store
181 | func (tapestry *Tapestry) PrintObjectStore() {
182 | fmt.Printf("ObjectStore for node %v\n", tapestry.local.node)
183 | for key, values := range tapestry.local.store.data {
184 | fmt.Printf(" %v: %v\n", key, slice(values))
185 | }
186 | }
187 |
188 | // Prints the backpointers
189 | func (tapestry *Tapestry) PrintBackpointers() {
190 | bp := tapestry.local.backpointers
191 | fmt.Printf("Backpointers for node %v\n", tapestry.local.node)
192 | for i, set := range bp.sets {
193 | for _, node := range set.Nodes() {
194 | fmt.Printf(" %v %v: %v\n", i, node.Address, node.Id.String())
195 | }
196 | }
197 | }
198 |
199 | // Prints the blobstore
200 | func (tapestry *Tapestry) PrintBlobStore() {
201 | for k, _ := range tapestry.blobstore.blobs {
202 | fmt.Println(k)
203 | }
204 | }
205 |
206 | func (tapestry *Tapestry) GetLocalNode() Node {
207 | return tapestry.local.node
208 | }
--------------------------------------------------------------------------------
/oceanstore/oceanstore/inode.go:
--------------------------------------------------------------------------------
1 | package oceanstore
2 |
3 | import (
4 | "../../tapestry/tapestry"
5 | "fmt"
6 | "bytes"
7 | "encoding/gob"
8 | "strings"
9 | "strconv"
10 | )
11 |
12 | type Filetype int
13 |
14 | const (
15 | DIR Filetype = iota
16 | FILE
17 | )
18 |
19 | const BLOCK_SIZE = uint32(4096)
20 | const FILES_PER_INODE = 4
21 |
22 | type Inode struct {
23 | name string
24 | filetype Filetype
25 | size uint32
26 | indirect Guid
27 | }
28 |
29 | type Block struct {
30 | bytes []byte
31 | }
32 |
33 | func CreateDirInode(name string) *Inode {
34 | inode := new(Inode)
35 | inode.name = name
36 | inode.filetype = DIR
37 | inode.size = 0
38 | inode.indirect = ""
39 | return inode
40 | }
41 |
42 | func CreateFileInode(name string) *Inode {
43 | inode := new(Inode)
44 | inode.name = name
45 | inode.filetype = FILE
46 | inode.size = 0
47 | inode.indirect = ""
48 | return inode
49 | }
50 |
51 | func CreateBlock() *Block {
52 | block := new(Block)
53 | block.bytes = make([]byte, BLOCK_SIZE)
54 | return block
55 | }
56 |
57 | // Gets the inode that has a given path
58 | func (ocean *OceanNode) getInode(path string, id uint64) (*Inode, error) {
59 |
60 | hash := tapestry.Hash(path)
61 |
62 | aguid := Aguid(hashToGuid(hash))
63 |
64 | // Get the vguid using raft
65 | bytes, err := ocean.getTapestryData(aguid, id)
66 |
67 | inode := new(Inode)
68 | err = inode.GobDecode(bytes)
69 | if err != nil {
70 | fmt.Println(bytes)
71 | return nil, err
72 | }
73 |
74 | return inode, nil
75 | }
76 |
77 | func (d *Inode) GobEncode() ([]byte, error) {
78 | w := new(bytes.Buffer)
79 | encoder := gob.NewEncoder(w)
80 | err := encoder.Encode(d.name)
81 | if err != nil {
82 | return nil, err
83 | }
84 | err = encoder.Encode(d.filetype)
85 | if err != nil {
86 | return nil, err
87 | }
88 | err = encoder.Encode(d.size)
89 | if err != nil {
90 | return nil, err
91 | }
92 | err = encoder.Encode(d.indirect)
93 | if err != nil {
94 | return nil, err
95 | }
96 | return w.Bytes(), nil
97 | }
98 |
99 | func (d *Inode) GobDecode(buf []byte) error {
100 | r := bytes.NewBuffer(buf)
101 | decoder := gob.NewDecoder(r)
102 | err := decoder.Decode(&d.name)
103 | if err != nil {
104 | return err
105 | }
106 | err = decoder.Decode(&d.filetype)
107 | if err != nil {
108 | return err
109 | }
110 | err = decoder.Decode(&d.size)
111 | if err != nil {
112 | return err
113 | }
114 | return decoder.Decode(&d.indirect)
115 | }
116 |
117 | // Generic method. Gets data given an aguid.
118 | func (ocean *OceanNode) getTapestryData(aguid Aguid, id uint64) ([]byte, error) {
119 | tapestryNode := ocean.getRandomTapestryNode()
120 | response, err := ocean.getRaftVguid(aguid, id)
121 | if err != nil {
122 | return nil, err
123 | }
124 |
125 | ok := strings.Split(string(response), ":")[0]
126 | vguid := strings.Split(string(response), ":")[1]
127 | if ok != "SUCCESS" {
128 | return nil, fmt.Errorf("Could not get raft vguid: %v", response)
129 | }
130 |
131 | data, err := tapestry.TapestryGet(tapestryNode, string(vguid))
132 | if err != nil {
133 | return nil, err
134 | }
135 | return data, nil
136 | }
137 |
138 | // Gets the inode that has a given aguid
139 | func (ocean *OceanNode) getInodeFromAguid(aguid Aguid, id uint64) (*Inode, error) {
140 | // Get the vguid using raft
141 | bytes, err := ocean.getTapestryData(aguid, id)
142 |
143 | inode := new(Inode)
144 | err = inode.GobDecode(bytes)
145 | if err != nil {
146 | fmt.Println(bytes)
147 | return nil, err
148 | }
149 |
150 | return inode, nil
151 | }
152 |
153 | func (ocean *OceanNode) getFileBlock(key string, blockno uint32, id uint64) ([]byte, error) {
154 | blockPath := fmt.Sprintf("%v:%v", key, blockno)
155 | hash := tapestry.Hash(blockPath)
156 | aguid := Aguid(hashToGuid(hash))
157 |
158 | return ocean.getTapestryData(aguid, id)
159 | }
160 |
161 | // Gets the block of the inode of the specified key/path
162 | func (ocean *OceanNode) getInodeBlock(key string, id uint64) ([]byte, error) {
163 | blockPath := fmt.Sprintf("%v:%v", key, "indirect")
164 | hash := tapestry.Hash(blockPath)
165 | aguid := Aguid(hashToGuid(hash))
166 |
167 | return ocean.getTapestryData(aguid, id)
168 | }
169 |
170 |
171 | // Stores inode as data
172 | func (ocean *OceanNode) storeInode(path string, inode *Inode, id uint64) error {
173 |
174 | hash := tapestry.Hash(path)
175 |
176 | aguid := Aguid(hashToGuid(hash))
177 | vguid := Vguid(randSeq(tapestry.DIGITS))
178 |
179 | // Encode the inode
180 | bytes, err := inode.GobEncode()
181 | if err != nil {
182 | return err
183 | }
184 |
185 | // Set the new aguid -> vguid pair with raft
186 | err = ocean.setRaftVguid(aguid, vguid, id)
187 | if err != nil {
188 | return err
189 | }
190 |
191 | // Store data in tapestry with key: vguid
192 | err = tapestry.TapestryStore(ocean.getRandomTapestryNode(), string(vguid), bytes)
193 | if err != nil {
194 | return err
195 | }
196 |
197 | return nil
198 | }
199 |
200 | func (ocean *OceanNode) storeIndirectBlock(inodePath string, block []byte,
201 | id uint64) error {
202 |
203 | blockPath := fmt.Sprintf("%v:%v", inodePath, "indirect")
204 | hash := tapestry.Hash(blockPath)
205 |
206 | aguid := Aguid(hashToGuid(hash))
207 | vguid := Vguid(randSeq(tapestry.DIGITS))
208 |
209 | // Set the new aguid -> vguid pair with raft
210 | err := ocean.setRaftVguid(aguid, vguid, id)
211 | if err != nil {
212 | return err
213 | }
214 |
215 | err = tapestry.TapestryStore(ocean.getRandomTapestryNode(), string(vguid), block)
216 | if err != nil {
217 | return fmt.Errorf("Tapestry error")
218 | }
219 |
220 | return nil
221 | }
222 |
223 | func (ocean *OceanNode) storeFileBlock(inodePath string, blockno uint32,
224 | block []byte, id uint64) error {
225 |
226 | blockPath := fmt.Sprintf("%v:%v", inodePath, blockno)
227 | hash := tapestry.Hash(blockPath)
228 |
229 | aguid := Aguid(hashToGuid(hash))
230 | vguid := Vguid(randSeq(tapestry.DIGITS))
231 |
232 | // Set the new aguid -> vguid pair with raft
233 | err := ocean.setRaftVguid(aguid, vguid, id)
234 | if err != nil {
235 | return err
236 | }
237 |
238 | err = tapestry.TapestryStore(ocean.getRandomTapestryNode(), string(vguid), block)
239 | if err != nil {
240 | return err
241 | }
242 |
243 | return nil
244 | }
245 |
246 | // Removes an entry from a directory block. If it not the last entry,
247 | // It moves and replaces the last entry with the removing entry.
248 | func (ocean *OceanNode) removeEntryFromBlock(bytes []byte, vguid Vguid,
249 | size uint32, id uint64) error {
250 |
251 | start, err := ocean.lookupInode(bytes, vguid, size, id)
252 | if err != nil {
253 | return err
254 | }
255 | if start == size-tapestry.DIGITS { // Last one
256 | // MakeZeros(bytes, start)
257 | } else {
258 | for i := uint32(0); i < tapestry.DIGITS; i++ {
259 | bytes[start+i] = bytes[size-tapestry.DIGITS+i]
260 | }
261 | }
262 | return nil
263 | }
264 |
265 | // Get the inode that has a specific vuid from a directory block.
266 | func (puddle *OceanNode) lookupInode(block []byte, vguid Vguid,
267 | size uint32, id uint64) (uint32, error) {
268 | length := size / tapestry.DIGITS
269 | for i := uint32(0); i < length; i++ {
270 | curAguid := ByteIntoAguid(block, i*tapestry.DIGITS)
271 | res, err := puddle.getRaftVguid(curAguid, id)
272 | curVguid := Vguid(strings.Split(string(res), ":")[1])
273 | if err != nil {
274 | return 0, err
275 | }
276 | if curVguid == vguid {
277 | fmt.Println("Found:", curAguid, curVguid)
278 | return i, nil
279 | }
280 | }
281 |
282 | return 0, fmt.Errorf("Not found!")
283 | }
284 |
285 | func ByteIntoAguid(bytes []byte, start uint32) Aguid {
286 | aguid := ""
287 | for i := uint32(0); i < tapestry.DIGITS; i++ {
288 | aguid += strconv.FormatUint(uint64(bytes[start+i]), tapestry.BASE)
289 | }
290 | return Aguid(strings.ToUpper(aguid))
291 | }
--------------------------------------------------------------------------------
/tapestry/tapestry/id_test.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import "testing"
4 |
5 | // This test makes sure that the prefix length is working
6 | func TestSharedPrefixLength(t *testing.T) {
7 | a := ID{1,2,3,4,5,6,7,8,9,6,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
8 | b := ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
9 | count := SharedPrefixLength(a, b)
10 | if (count != 9) {
11 | t.Errorf("The SharedPrefixLength does not work")
12 | }
13 | a = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
14 | b = ID{2,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
15 | count = SharedPrefixLength(a, b)
16 | if (count != 0) {
17 | t.Errorf("The SharedPrefixLength does not work")
18 | }
19 | a = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
20 | b = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
21 | count = SharedPrefixLength(a, b)
22 | if (count != 40) {
23 | t.Errorf("The SharedPrefixLength does not work")
24 | }
25 | }
26 |
27 |
28 | //This function tests several types of ID and makes sure that the output is the expected one.*/
29 | func TestBetterChoice(t *testing.T) {
30 | a := ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
31 | b := ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
32 | id := ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
33 | choice := id.BetterChoice(a, b)
34 | if (choice) {//choice should be false since they are the same
35 | t.Errorf("The BetterChoice does not work")
36 | }
37 | a = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
38 | b = ID{1,2,3,4,5,6,8,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
39 | id = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
40 | choice = id.BetterChoice(a, b)
41 | if (!choice) {//choice should be true for the prefix
42 | t.Errorf("The BetterChoice does not work")
43 | }
44 | a = ID{1,2,3,4,5,6,7,6,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
45 | b = ID{1,2,3,4,5,6,7,7,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
46 | id =ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
47 | choice = id.BetterChoice(a, b)
48 | if (!choice) {//choice should be true becuase we get to 6 from 8 faster than to 7
49 | t.Errorf("The BetterChoice does not work", choice, a, b)
50 | }
51 | a = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
52 | b = ID{1,2,3,4,5,6,7,7,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
53 | id =ID{1,2,3,4,5,6,7,6,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
54 | choice = id.BetterChoice(a, b)
55 | if (choice) {//choice should be false because it is closer to get to b (7) than 8
56 | t.Errorf("The BetterChoice does not work", choice, a, b)
57 | }
58 | a = ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4, 4,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
59 | b = ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4, 5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
60 | id = ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
61 | choice = id.BetterChoice(a, b)
62 | if (!choice) {//choice should be true because it is faster to get to a (4) from 2 than to b(5)
63 | t.Errorf("The BetterChoice does not work", choice, a, b)
64 | }
65 | a = ID{13,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
66 | b = ID{7,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
67 | id = ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
68 | choice = id.BetterChoice(a, b)
69 | if (choice) {//choice should be false at the very beginning, 7 (b)is closer to 1 than 13(a)
70 | t.Errorf("The BetterChoice does not work", choice, a, b)
71 | }
72 | }
73 |
74 | //test for the Closer function.
75 | func TestCloser(t *testing.T) {
76 |
77 | a := ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
78 | id := ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
79 | b := ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5}
80 | choice := id.Closer(a, b)
81 | if (choice) {//Answer should be false because they are the same ids
82 | t.Errorf("The Closer does not work", choice, a, b)
83 | }
84 | a = ID{1,2,3,4,5,6,7,6,9,5,11,12, 1,2,3,0, 4,4,3,0,2,12,15,13,15,13,2,5,10,11,13, 13,8,9,12,13,0,9,8,5}
85 | id = ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,11, 2,8,9,12,13,0,9,8,5}
86 | b = ID{1,2,3,4,5,6,7,6,9,5,11,12, 1,2,3,0, 4,4,3,0,2,12,15,13,15,13,2,5,10,11,13, 10,8,9,12,13,0,9,8,5}
87 | choice = id.Closer(a, b)
88 | if (choice) {//Answer should be false because b is closer in absolute value
89 | t.Errorf("The Closer does not work", choice, a, b)
90 | }
91 | a = ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3, 0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13, 15,8,9,12,13,0,9,8,5}
92 | id = ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,13, 13,8,9,12,13,0,9,8,5}
93 | b = ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3, 0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13, 12,8,9,12,13,0,9,8,5}
94 | choice = id.Closer(a, b)
95 | if (choice) {//Answer should be false because b is closer in absolute value
96 | t.Errorf("The Closer does not work", choice, a, b)
97 | }
98 | //some more obvious
99 | a = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2}
100 | id = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
101 | b = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}
102 | choice = id.Closer(a, b)
103 | if (choice) {//Answer should be false because b is closer in absolute value
104 | t.Errorf("The Closer does not work", choice, a, b)
105 | }
106 | //This one is tricky because it goes to the other digit
107 | a = ID {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13}
108 | id = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15}
109 | b = ID {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0}
110 | choice = id.Closer(a, b)
111 | if (choice) {//Answer should be false because of the base change
112 | t.Errorf("The Closer does not work", choice, a, b)
113 | }
114 | a = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2}
115 | id = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5}
116 | b = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}
117 | choice = id.Closer(a, b)
118 | if (choice) {//Answer should be false because a has a 1
119 | t.Errorf("The Closer does not work", choice, a, b)
120 | }
121 | a = ID {1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2}
122 | id = ID{0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5}
123 | b = ID {1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}
124 | choice = id.Closer(a, b)
125 | if (choice) {//Answer should be false (b)
126 | t.Errorf("The Closer does not work", choice, a, b)
127 | }
128 | a = ID {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2}
129 | id = ID{0,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9}
130 | b = ID {0,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,0}
131 | choice = id.Closer(a, b)
132 | if (choice) {//Answer should be b
133 | t.Errorf("The Closer does not work", choice, a, b)
134 | }
135 | }
--------------------------------------------------------------------------------
/tapestry/README.md:
--------------------------------------------------------------------------------
1 | # Tapestry
2 |
3 | Tapestry is a distributed object location are retrieval (DOLR) system. Its design is motivated from Tapestry: A Resilient Global-Scale Overlay for
4 | Service Deployment paper. It is an overlay network that implements simple key based routing.
5 |
6 | # Usage Example
7 | [cli](cli.go) serves as a console for interacting with chord, creating nodes and querying state on the local nodes. It provides the following commands:
8 | table- Print this node’s routing table
9 | * backpointers Print this node’s backpointers
10 | * objects Print the object replicas stored on this node
11 | * put Stores the provided key-value pair on the local node and advertises the key to the tapestry
12 | * lookup Looks up the specified key in the tapestry and prints its location
13 | * get Looks up the specified key in the tapestry, then fetches the value from one of the returned replicas
14 | * remove Remove the value stored locally for the provided key and stops advertising the key to the tapestry
15 | * list List the keys currently being advertised by the local node
16 | * leave Instructs the local node to gracefully leave the Tapestry
17 | * kill Leaves the tapestry without graceful exit
18 | * exit Quit the CLI
19 |
20 | # Identifying Nodes and Objects
21 | Nodes and objects in the Tapestry network are each assigned a sequence of n base-16 digits globally unique identifier.
22 |
23 | # Root Nodes
24 | In order to make it possible for any node in the network to find the location of an object, a single node is appointed as the “root” node for that object. A root node is the one which shares the same hash value as the object.
25 |
26 | # Surrogate Nodes
27 | There to be fewer nodes in the network than possible values in the space of hash values so a “surrogate” node for an object is chosen to be the one with a hash value that shares as many prefix digits in the object’s hash value as possible.
28 |
29 | # Selecting the Surrogate Node
30 | Starting at the leftmost digit d, we take the set of nodes that have d as the leftmost digit of their hashes as well. If no such set of nodes exists, it is necessary to deterministically choose another set. To do this, we can try to find a set of nodes that share the digit d + 1 as their leftmost hash digit. Until a non-empty set of nodes is found, the value of the digit we are searching with increases (modulo the base of the hash-value). Once the set has been found the same logic can be applied for the next digit in the hash, choosing from the set of nodes we identified with the previous digit. When this algorithm has been applied for every digit, only one node will be left and that node is the surrogate.
31 |
32 | # Routing Tables
33 | In order to allow nodes to locate objects stored at other nodes, each node maintains a routing table that stores references to a subset of the nodes in the network.
34 |
35 | # Backpointer Tables
36 | Backpointers are references to every node in the network which refers to the local node in their own routing tables. These are useful in maintaining routing tables in a dynamic network. When the local node adds or removes a remote node from its routing table, it notifies the remote node, who will then update its backpointer table.
37 |
38 | # Prefix Routing
39 | A node that matches some number of digits from the object’s hash may be chosen from the routing table. In turn, the selected node’s routing table is inspected and the next node in the route to the surrogate is chosen. At each successive node in the route, the number of digits that match the object’s hash value increases until the last digit has been matched and the surrogate node has been reached. This type of routing is called “prefix routing”. [findRoot](tapestry/tapestry-local.go#L94) has this logic.
40 |
41 | # Publishing and Retrieving Objects
42 | When an object is “published” by a node, that node routes towards the root node for the key, then registers itself on that node as a location of the key. Multiple nodes can publish the same object. A tapestry client wishing to lookup the object will first route to the root node of the object. The root node then informs the client of which Tapestry nodes are the ones that have published the object. The client then directly contacts one or more of those publishers to retrieve the actual object data.
43 |
44 | # Adding Tapestry Nodes
45 | The new node is assigned its ID and then routes towards the root node for that id. The root node initiates the transfer of all keys that should now be stored on the new node. The new node then iteratively traverses backpointers, starting from the root node, to populate its own routing table.
46 |
47 | # Acknowledged Multicast
48 | If the new node has a shared prefix of length n with its root, then any other node that also has a shared prefix of length n is called need-to-know node. The root node performs an acknowledged multicast when it is contacted by the new node. The multicast eventually returns the full set of need-to-know nodes from the Tapestry. The multicast is a recursive call — the root node contacts all nodes on levels >= n of its routing table; those nodes contact all nodes on levels >= n + 1 of their routing tables; and so on. A node that is contacted during the multicast will initiate background transfer of relevant object references to the new node, trigger multicast to the next level of its routing table, then merge and return the resulting lists of nodes (removing duplicates). [AddNodeMulticast](tapestry/tapestry-local.go#L321) has this logic.
49 |
50 | # Backpointer Traversal
51 | Once the multicast has completed, the root node returns the list of need-to-know nodes to the new node. The new node uses this list as an initial neighbor set to populate its routing table. The node iteratively contacts the nodes, asking for their backpointers.
52 |
53 | # Graceful Exit
54 | Tapestry is extremely fault tolerant, so a node could leave without notifying any other nodes. However, a node can gracefully exit the Tapestry, too. When a node gracefully exits, it notifies all of the nodes in its backpointer table of the leave. As part of this notification, it consults its own routing table to find a suitable replacement for the other node’s routing table.
55 |
56 | # Fault Tolerance
57 | Following mechanisms ensure that there is no single point of failure in the system:
58 |
59 | * Errors While Routing
60 | When routing towards a surrogate node, it is possible that a communication failure with any of the intermediate nodes could impede the search. For this reason, routing tables store lists of nodes rather than a single node at each slot. If a failed node is encountered, the node that is searching can request that the failed node be removed from any routing tables it encounters, and resume its search at the last node it communicated with successfully. If the last node it communicated with successfully is no longer responding, it should communicate with the last successful node before that.
61 |
62 | * Loss of Root Node
63 | Published objects continually republish themselves at regular intervals. This ensures that if a surrogate node goes down, a new surrogate node will eventually take its place.
64 |
65 | * Loss of replicas
66 | Finally, applications built on top of Tapestry might wish to ensure that an object remains available at all times, even if the node that published it fails.
67 | Multiple tapestry nodes can publish the same object. This means that client applications can learn of multiple locations of the object, so if the object becomes unavailable in one of these locations, the client can simply contact another of the nodes. This ensures that an object remains available at all times, even if the node that published it fails.
68 |
69 | * Miscellaneous
70 | The cases listed above are the common issues which can arise due to network errors. There are other more obscure ways in which surrogates may become unreachable for a short time when nodes join or fail in a certain order. Tapestry’s method for dealing with this is to assume that there are enough seeded hash values for a given object that not all seeds will become unreachable due to such errors, and those which do become unreachable will be corrected when the replica performs its periodic republishing.
71 |
72 | # Future Work
73 | Reduce the number of hops in object lookups. This can be done by caching the nodes encountered along the path to the surrogate node when the location of an object is published to the its surrogate node.
74 |
--------------------------------------------------------------------------------
/raft/raft/raftRPCApi.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "fmt"
5 | "net/rpc"
6 | )
7 |
8 | /* RPC connection map cache */
9 | var connMap = make(map[string]*rpc.Client)
10 |
11 | /* */
12 | /* Join RPC, used when a node in the cluster is first starting up so */
13 | /* it can notify a leader what their listening address is. */
14 | /* */
15 | type JoinRequest struct {
16 | RemoteNode NodeAddr
17 | FromAddr NodeAddr
18 | }
19 |
20 | type JoinReply struct {
21 | Success bool
22 | }
23 |
24 | func JoinRPC(remoteNode *NodeAddr, fromAddr *NodeAddr) error {
25 | request := JoinRequest{RemoteNode: *remoteNode, FromAddr: *fromAddr}
26 | var reply JoinReply
27 | err := makeRemoteCall(remoteNode, "JoinImpl", request, &reply)
28 | if err != nil {
29 | return err
30 | }
31 | if !reply.Success {
32 | return fmt.Errorf("Unable to join Raft cluster\n")
33 | }
34 | return err
35 | }
36 |
37 | /* */
38 | /* StartNode RPC, once the first node in the cluster has all of the */
39 | /* addresses for all other nodes in the cluster it can then tell them */
40 | /* to transition into Follower state and start the Raft protocol. */
41 | /* */
42 | type StartNodeRequest struct {
43 | RemoteNode NodeAddr
44 | OtherNodes []NodeAddr
45 | }
46 |
47 | type StartNodeReply struct {
48 | Success bool
49 | }
50 |
51 | func StartNodeRPC(remoteNode NodeAddr, otherNodes []NodeAddr) error {
52 | request := StartNodeRequest{}
53 | request.RemoteNode = remoteNode
54 |
55 | request.OtherNodes = make([]NodeAddr, len(otherNodes))
56 | for i, n := range otherNodes {
57 | request.OtherNodes[i].Addr = n.Addr
58 | request.OtherNodes[i].Id = n.Id
59 | }
60 |
61 | var reply StartNodeReply
62 | err := makeRemoteCall(&remoteNode, "StartNodeImpl", request, &reply)
63 | if err != nil {
64 | return err
65 | }
66 | return err
67 | }
68 |
69 | /* */
70 | /* Raft RequestVote RPC, invoked by candidates to gather votes */
71 | /* */
72 | type RequestVoteRequest struct {
73 | /* The candidate's current term Id */
74 | Term uint64
75 |
76 | /* The cadidate Id currently requesting a node to vote for it. */
77 | CandidateId NodeAddr
78 |
79 | /* The index of the candidate's last log entry */
80 | LastLogIndex uint64
81 |
82 | /* The term of the candidate's last log entry */
83 | LastLogTerm uint64
84 |
85 | CurrentIndex uint64
86 | }
87 |
88 | type RequestVoteReply struct {
89 | /* The current term, for candidate to update itself */
90 | Term uint64
91 |
92 | /* True means candidate received vote */
93 | VoteGranted bool
94 | }
95 |
96 | func (r *RaftNode) RequestVoteRPC(remoteNode *NodeAddr, request RequestVoteRequest) (*RequestVoteReply, error) {
97 | if r.Testing.IsDenied(*r.GetLocalAddr(), *remoteNode) {
98 | return nil, ErrorTestingPolicyDenied
99 | }
100 | var reply RequestVoteReply
101 | err := makeRemoteCall(remoteNode, "RequestVoteImpl", request, &reply)
102 | if err != nil {
103 | return nil, err
104 | }
105 | return &reply, err
106 | }
107 |
108 | /* */
109 | /* Raft AppendEntries RPC, invoked by leader to replicate log entries; */
110 | /* also used as a heartbeat between leaders and followers. */
111 | /* */
112 | type AppendEntriesRequest struct {
113 | /* The leader's term */
114 | Term uint64
115 |
116 | /* The ID of the leader, so that followers can redirect clients */
117 | LeaderId NodeAddr
118 |
119 | /* The index of the log entry immediately preceding new ones */
120 | PrevLogIndex uint64
121 |
122 | /* The term of the prevLogIndex entry */
123 | PrevLogTerm uint64
124 |
125 | /* The log entries the follower needs to store (empty for */
126 | /* heartbeat; may send more than one for efficiency) */
127 | Entries []LogEntry
128 |
129 | /* The leader's commitIndex */
130 | LeaderCommit uint64
131 | }
132 |
133 | type AppendEntriesReply struct {
134 | /* The current term, for leader to update itself */
135 | Term uint64
136 |
137 | /* True if follower contained entry matching prevLogIndex and prevLogTerm*/
138 | Success bool
139 | }
140 |
141 | func (r *RaftNode) AppendEntriesRPC(remoteNode *NodeAddr, request AppendEntriesRequest) (*AppendEntriesReply, error) {
142 | if r.Testing.IsDenied(*r.GetLocalAddr(), *remoteNode) {
143 | return nil, ErrorTestingPolicyDenied
144 | }
145 | var reply AppendEntriesReply
146 | err := makeRemoteCall(remoteNode, "AppendEntriesImpl", request, &reply)
147 | if err != nil {
148 | return nil, err
149 | }
150 | return &reply, err
151 | }
152 |
153 | /* Node's can be in three possible states */
154 | type ClientStatus int
155 |
156 | const (
157 | OK ClientStatus = iota
158 | NOT_LEADER
159 | ELECTION_IN_PROGRESS
160 | REQ_FAILED
161 | )
162 |
163 | type FsmCommand int
164 |
165 | const (
166 | HASH_CHAIN_ADD FsmCommand = iota
167 | HASH_CHAIN_INIT
168 | CLIENT_REGISTRATION
169 | INIT
170 | NOOP
171 | //Adding commands to interact with filesystem
172 | REMOVE //For deleting
173 | //Commands that modify the map that raft is in charge of
174 | GET //for querying
175 | SET //for modifying files
176 | LOCK
177 | UNLOCK
178 | )
179 |
180 | type ClientRequest struct {
181 | /* The unique client ID associated with this client session (received */
182 | /* via a previous RegisterClient call). */
183 | ClientId uint64
184 |
185 | /* A sequence number is associated to request to avoid duplicates */
186 | SequenceNum uint64
187 |
188 | /* Command to be executed on the state machine; it may affect state */
189 | Command FsmCommand
190 |
191 | /* Data to accompany the command to the state machine; it may affect state */
192 | Data []byte
193 | }
194 |
195 | type ClientReply struct {
196 | /* OK if state machine successfully applied command */
197 | Status ClientStatus
198 |
199 | /* State machine output, if successful */
200 | Response string
201 |
202 | /* In cases where the client contacted a non-leader, the node should */
203 | /* reply with the correct current leader. */
204 | LeaderHint NodeAddr
205 | }
206 |
207 | func ClientRequestRPC(remoteNode *NodeAddr, request ClientRequest) (*ClientReply, error) {
208 | var reply ClientReply
209 | err := makeRemoteCall(remoteNode, "ClientRequestImpl", request, &reply)
210 | if err != nil {
211 | return nil, err
212 | }
213 | return &reply, err
214 | }
215 |
216 | type RegisterClientRequest struct {
217 | /* The client address invoking request */
218 | FromNode NodeAddr
219 | }
220 |
221 | type RegisterClientReply struct {
222 | /* OK if state machine registered client */
223 | Status ClientStatus
224 |
225 | /* Unique ID for client session */
226 | ClientId uint64
227 |
228 | /* In cases where the client contacted a non-leader, the node should */
229 | /* reply with the correct current leader. */
230 | LeaderHint NodeAddr
231 | }
232 |
233 | func RegisterClientRPC(remoteNode *NodeAddr, request RegisterClientRequest) (*RegisterClientReply, error) {
234 | var reply RegisterClientReply
235 | err := makeRemoteCall(remoteNode, "RegisterClientImpl", request, &reply)
236 | if err != nil {
237 | return nil, err
238 | }
239 | return &reply, err
240 | }
241 |
242 | /* Helper function to make a call to a remote node */
243 | func makeRemoteCall(remoteNode *NodeAddr, method string, req interface{}, rsp interface{}) error {
244 | // Dial the server if we don't already have a connection to it
245 | remoteNodeAddrStr := remoteNode.Addr
246 | var err error
247 | client, ok := connMap[remoteNodeAddrStr]
248 | if !ok {
249 | client, err = rpc.Dial("tcp", remoteNode.Addr)
250 | if err != nil {
251 | return err
252 | }
253 | connMap[remoteNodeAddrStr] = client
254 | }
255 |
256 | // Make the request
257 | uniqueMethodName := fmt.Sprintf("%v.%v", remoteNodeAddrStr, method)
258 | // fmt.Println(uniqueMethodName)
259 | err = client.Call(uniqueMethodName, req, rsp)
260 | if err != nil {
261 | client.Close()
262 | delete(connMap, remoteNodeAddrStr)
263 | return err
264 | }
265 |
266 | return nil
267 | }
268 |
--------------------------------------------------------------------------------
/tapestry/tapestry/tapestry_test.go:
--------------------------------------------------------------------------------
1 | package tapestry
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | "time"
7 | )
8 |
9 | /*
10 | PART 1) This test inserts several nodes and objects and
11 | PART 2) then inserts others to make sure that the root changes to the new nodes.
12 |
13 | PART 3) At the end it also makes sure that there is no replica in the previous node after the timeout.
14 | */
15 | func TestChangeRoot(t *testing.T) {
16 | if DIGITS != 4 {
17 | t.Errorf("Test wont work unless DIGITS is set to 4.")
18 | return
19 | }
20 | if TIMEOUT > 3*time.Second && REPUBLISH > 2*time.Second {
21 | t.Errorf("Test will take too long unless TIMEOUT is set to 3 and REPUBLISH is set to 2.")
22 | return
23 | }
24 | //PART 1)
25 | port = 58000
26 | id := ID{5, 8, 3, 15}
27 | node0 := makeTapestry(id, "", t)
28 | id = ID{7, 0, 0xd, 1}
29 | node1 := makeTapestry(id, node0.local.node.Address, t)
30 | id = ID{9, 0, 0xf, 5}
31 | node2 := makeTapestry(id, node0.local.node.Address, t)
32 | id = ID{0xb, 0, 0xf, 0xa}
33 | node3 := makeTapestry(id, node0.local.node.Address, t)
34 |
35 | node0.Store("spoon", []byte("cuchara"))
36 | node1.Store("table", []byte("mesa"))
37 | node2.Store("chair", []byte("silla"))
38 | node3.Store("fork", []byte("tenedor"))
39 |
40 | //The root for the node is
41 | root, _ := node3.local.findRoot(node3.local.node, Hash("fork"))
42 | if !equal_ids(root.Id, node0.local.node.Id) {
43 | t.Errorf("The root for the fork is not node0, its %v\n", root.Id)
44 | }
45 | //PART 2) Now we insert a new node
46 | id = ID{0x5, 2, 0xa, 0xa}
47 | node4 := makeTapestry(id, node2.local.node.Address, t)
48 | node4.Store("napkin", []byte("servilleta"))
49 |
50 | //We wait the timeout
51 | time.Sleep(TIMEOUT + 1)
52 |
53 | //The root for fork should have changed to node4
54 | fmt.Printf("hash for fork: %v\n", Hash("fork"))
55 | fmt.Printf("hash for spoon: %v\n", Hash("spoon"))
56 | fmt.Printf("hash for table: %v\n", Hash("table"))
57 | fmt.Printf("hash for chair: %v\n", Hash("chair"))
58 | root2, _ := node2.local.findRoot(node2.local.node, Hash("fork"))
59 | if !equal_ids(root2.Id, node4.local.node.Id) {
60 | t.Errorf("The root for the fork is not node4, its %v\n", root2.Id)
61 | }
62 | //PART 3) We now make sure that the replica is no longer in the previous node
63 | replica := node0.local.store.Get("fork")
64 | if len(replica) != 0 {
65 | t.Errorf("This node still has a replica for another node %v", replica)
66 | }
67 |
68 | node1.Leave()
69 | node2.Leave()
70 | node3.Leave()
71 | node4.Leave()
72 | node0.Leave()
73 | }
74 |
75 | /*
76 | This test is the same as the previous but it does not have a timeout. It tests the transfer of keys during the
77 | AddNodeMulticast where keys are transfered to new joining node.*/
78 | func TestTransferKeys(t *testing.T) {
79 | if DIGITS != 4 {
80 | t.Errorf("Test wont work unless DIGITS is set to 4.")
81 | return
82 | }
83 | if TIMEOUT > 3*time.Second && REPUBLISH > 2*time.Second {
84 | t.Errorf("Test will take too long unless TIMEOUT is set to 3 and REPUBLISH is set to 2.")
85 | return
86 | }
87 | port = 58000
88 | id := ID{5, 8, 3, 15}
89 | node0 := makeTapestry(id, "", t)
90 | id = ID{7, 0, 0xd, 1}
91 | node1 := makeTapestry(id, node0.local.node.Address, t)
92 | id = ID{9, 0, 0xf, 5}
93 | node2 := makeTapestry(id, node0.local.node.Address, t)
94 | id = ID{0xb, 0, 0xf, 0xa}
95 | node3 := makeTapestry(id, node0.local.node.Address, t)
96 |
97 | node0.Store("spoon", []byte("cuchara"))
98 | node1.Store("table", []byte("mesa"))
99 | node2.Store("chair", []byte("silla"))
100 | node3.Store("fork", []byte("tenedor"))
101 |
102 | //The root for the node is
103 | root, _ := node3.local.findRoot(node3.local.node, Hash("fork"))
104 | if !equal_ids(root.Id, node0.local.node.Id) {
105 | t.Errorf("The root for the fork is not node0, its %v\n", root.Id)
106 | }
107 | //Now we insert a new node
108 | id = ID{0x5, 2, 0xa, 0xa}
109 | node4 := makeTapestry(id, node2.local.node.Address, t)
110 | node4.Store("napkin", []byte("servilleta"))
111 |
112 | // //The root for spoon should have changed to node4
113 | fmt.Printf("hash for fork: %v\n", Hash("fork"))
114 | fmt.Printf("hash for spoon: %v\n", Hash("spoon"))
115 | fmt.Printf("hash for table: %v\n", Hash("table"))
116 | fmt.Printf("hash for chair: %v\n", Hash("chair"))
117 | root2, _ := node2.local.findRoot(node2.local.node, Hash("fork"))
118 | if !equal_ids(root2.Id, node4.local.node.Id) {
119 | t.Errorf("The root for the fork is not node4, its %v\n", root2.Id)
120 | }
121 | //We now make sure that the replica is no longer in the previous node
122 | replica := node0.local.store.Get("fork")
123 | if len(replica) != 0 {
124 | t.Errorf("This node still has a replica for another node %v", replica)
125 | }
126 |
127 | node1.Leave()
128 | node2.Leave()
129 | node3.Leave()
130 | node4.Leave()
131 | node0.Leave()
132 | }
133 |
134 | /*
135 | PART 1) This test first adds several nodes, and adds several objects to the nodes.
136 | It then checks for the existance of objects from several nodes.
137 |
138 | PART 2)Then it deletes one node and makes sure that the object it had
139 | "spoon" is no longer available
140 |
141 | PART 3)Then a new node with the "spoon" object joins and makes sure that it is available through another node.
142 | */
143 | func TestPublishAndRegister(t *testing.T) {
144 | if DIGITS != 4 {
145 | t.Errorf("Test wont work unless DIGITS is set to 4.")
146 | return
147 | }
148 | if TIMEOUT > 3*time.Second && REPUBLISH > 2*time.Second {
149 | t.Errorf("Test will take too long unless TIMEOUT is set to 3 and REPUBLISH is set to 2.")
150 | return
151 | }
152 | //PART 1
153 | port = 58000
154 | id := ID{5, 8, 3, 15}
155 | node0 := makeTapestry(id, "", t)
156 | id = ID{7, 0, 0xd, 1}
157 | node1 := makeTapestry(id, node0.local.node.Address, t)
158 | id = ID{9, 0, 0xf, 5}
159 | node2 := makeTapestry(id, node0.local.node.Address, t)
160 | id = ID{0xb, 0, 0xf, 0xa}
161 | node3 := makeTapestry(id, node0.local.node.Address, t)
162 |
163 | node0.Store("spoon", []byte("cuchara"))
164 | node1.Store("table", []byte("mesa"))
165 | node2.Store("chair", []byte("silla"))
166 | node3.Store("fork", []byte("tenedor"))
167 |
168 | time.Sleep(time.Second * 5)
169 |
170 | // Objects should persist after TIMEOUT seconds because
171 | // publish is called every two seconds.
172 | result, err := node1.Get("spoon")
173 | CheckGet(err, result, "cuchara", t)
174 | result, err = node2.Get("table")
175 | CheckGet(err, result, "mesa", t)
176 | result, err = node3.Get("chair")
177 | CheckGet(err, result, "silla", t)
178 | result, err = node0.Get("fork")
179 | CheckGet(err, result, "tenedor", t)
180 |
181 | // PART 2) Root node of Hash(spoon) should no longer have a record
182 | // of this object after node0 leaves after TIMEOUT seconds.
183 | root := FindRootOfHash([]*Tapestry{node1, node2, node3}, Hash("chair"))
184 | fmt.Printf("The root is: %v and the node0 id is: %v", root, node0.local.node.Id)
185 | node0.Leave()
186 | //fmt.Printf("The root is: %v and the node0 id is: %v", root.local.node.Id, node0.local.node.Id)
187 | if root == nil {
188 | t.Errorf("Could not find Root of Hash")
189 | } else {
190 | replicas := root.local.store.Get("spoon")
191 | if len(replicas) == 0 && len(replicas) > 1 {
192 | t.Errorf("Replica of 'spoon' not in root node. What?")
193 | } else {
194 | time.Sleep(time.Second * 5)
195 | replicas = root.local.store.Get("spoon")
196 | if len(replicas) != 0 {
197 | t.Errorf("Replica of 'spoon' is in root node after node containing it left.")
198 | }
199 | }
200 | }
201 | //PART 3) We add a new node that contains spoon and we should find it.
202 | id = ID{0x5, 2, 0xa, 0xa}
203 | node4 := makeTapestry(id, node2.local.node.Address, t)
204 | node4.Store("spoon", []byte("cuchara"))
205 | time.Sleep(time.Second * 5)
206 | replicas, _ := node1.local.tapestry.Get("spoon")
207 | fmt.Printf("id of root is: %v\n", root.local.node.Id)
208 | if len(replicas) == 0 {
209 | t.Errorf("'spoon' is not there even after a new node containing it joined")
210 | }
211 |
212 | node1.Leave()
213 | node2.Leave()
214 | node3.Leave()
215 | node4.Leave()
216 | }
217 |
218 | /*Helper function to compare a result with an expected string.*/
219 | func CheckGet(err error, result []byte, expected string, t *testing.T) {
220 | if err != nil {
221 | t.Errorf("Get errored out. returned: %v", err)
222 | return
223 | }
224 |
225 | if string(result) != expected {
226 | t.Errorf("Get(\"%v\") did not return expected result '%v'",
227 | string(result), expected)
228 | }
229 | }
230 | /*Helper function that returns the root of an ID from a slice of nodes*/
231 | func FindRootOfHash(nodes []*Tapestry, hash ID) *Tapestry {
232 | if len(nodes) == 0 {
233 | return nil
234 | }
235 | root, _ := nodes[0].local.findRoot(nodes[0].local.node, hash)
236 |
237 | for _, node := range nodes {
238 | if equal_ids(node.local.node.Id, root.Id) {
239 | return node
240 | }
241 | }
242 |
243 | return nil
244 | }
--------------------------------------------------------------------------------
/raft/raft/persistenceAPI.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "os"
7 | )
8 |
9 | type NodeStableState struct {
10 | /* Latest term the server has seen (initialized */
11 | /* to 0 on first boot, increases monotonically) */
12 | CurrentTerm uint64
13 |
14 | /* The candidate Id that received our vote in */
15 | /* the current term (or "" if none). */
16 | VotedFor string
17 |
18 | /* Our local listening address and Id */
19 | LocalAddr NodeAddr
20 |
21 | /* The addresses of everyone in our cluster */
22 | OtherNodes []NodeAddr
23 |
24 | /* Client request cache, maps a client request */
25 | /* to the response that was sent to them. */
26 | ClientRequestSequences map[string]ClientReply
27 | }
28 |
29 | type LogEntry struct {
30 | /* Index of log entry (first index = 1) */
31 | Index uint64
32 |
33 | /* The term that this entry was in when added */
34 | TermId uint64
35 |
36 | /* Command associated with this log entry in */
37 | /* the user's finite-state-machine. */
38 | Command FsmCommand
39 |
40 | /* Data associated with this log entry in the */
41 | /* user's finite-state-machine. */
42 | Data []byte
43 |
44 | /* After processing this log entry, what ID to */
45 | /* use when caching the response. Use an empty */
46 | /* string to not cache at all */
47 | CacheId string
48 | }
49 |
50 | type FileData struct {
51 | /* Active file descriptor of to file */
52 | fd *os.File
53 |
54 | /* Size of file after reading it in and after writes */
55 | size int64
56 |
57 | /* Filename of file */
58 | filename string
59 |
60 | /* Map from LogEntry index to size of file before that index starts */
61 | idxMap map[uint64]int64
62 |
63 | /* Is the fd open or not? */
64 | open bool
65 | }
66 |
67 | func (r *RaftNode) initStableStore() (bool, error) {
68 | freshNode := false
69 | // Create log path directory if it doesn't already exist
70 | err := os.Mkdir(r.conf.LogPath, 0777)
71 | if err == nil {
72 | Out.Printf("Created log directory: %v\n", r.conf.LogPath)
73 | }
74 | if err != nil && !os.IsExist(err) {
75 | Error.Printf("error creating dir %v\n", err)
76 | return freshNode, err
77 | }
78 |
79 | r.logFileDescriptor = FileData{
80 | fd: nil,
81 | size: 0,
82 | filename: fmt.Sprintf("%v/%d_raftlog.dat", r.conf.LogPath, r.listenPort),
83 | }
84 | r.metaFileDescriptor = FileData{
85 | fd: nil,
86 | size: 0,
87 | filename: fmt.Sprintf("%v/%d_raftmeta.dat", r.conf.LogPath, r.listenPort),
88 | }
89 | raftLogSize, raftLogExists := getFileInfo(r.logFileDescriptor.filename)
90 | r.logFileDescriptor.size = raftLogSize
91 |
92 | raftMetaSize, raftMetaExists := getFileInfo(r.metaFileDescriptor.filename)
93 | r.metaFileDescriptor.size = raftMetaSize
94 |
95 | // Previous state exists, re-populate everything
96 | if raftLogExists && raftMetaExists {
97 | fmt.Printf("Reloading previous raftlog (%v) and raftmeta (%v)\n",
98 | r.logFileDescriptor.filename, r.metaFileDescriptor.filename)
99 | // Read in previous log and populate index mappings
100 | entries, err := ReadRaftLog(&r.logFileDescriptor)
101 | if err != nil {
102 | Error.Printf("Error reading in raft log: %v\n", err)
103 | return freshNode, err
104 | }
105 | r.logCache = entries
106 |
107 | // Create append-only file descriptor for later writing out of log entries.
108 | err = openRaftLogForWrite(&r.logFileDescriptor)
109 | if err != nil {
110 | Error.Printf("Error opening raftlog for write: %v\n", err)
111 | return freshNode, err
112 | }
113 |
114 | // Read in previous metalog and set cache
115 | ss, err := ReadStableState(&r.metaFileDescriptor)
116 | if err != nil {
117 | Error.Printf("Error reading stable state: %v\n", err)
118 | return freshNode, err
119 | }
120 | r.stableState = *ss
121 |
122 | } else if (!raftLogExists && raftMetaExists) || (raftLogExists && !raftMetaExists) {
123 | Error.Println("Both raftlog and raftmeta files must exist to proceed!")
124 | err = errors.New("Both raftlog and raftmeta files must exist to start this node")
125 | return freshNode, err
126 |
127 | } else {
128 | // We now assume neither file exists, so let's create new ones
129 | freshNode = true
130 | Out.Printf("Creating new raftlog and raftmeta files")
131 | err := CreateRaftLog(&r.logFileDescriptor)
132 | if err != nil {
133 | Error.Printf("Error creating new raftlog: %v\n", err)
134 | return freshNode, err
135 | }
136 | err = CreateStableState(&r.metaFileDescriptor)
137 | if err != nil {
138 | Error.Printf("Error creating new stable state: %v\n", err)
139 | return freshNode, err
140 | }
141 |
142 | // Init other nodes to zero, this will become populated
143 | r.stableState.OtherNodes = make([]NodeAddr, 0)
144 |
145 | // Init client request cache
146 | r.stableState.ClientRequestSequences = make(map[string]ClientReply)
147 |
148 | // No previous log cache exists, so a fresh one must be created.
149 | r.logCache = make([]LogEntry, 0)
150 |
151 | // If the log is empty we need to bootstrap it by adding the first committed entry.
152 | initEntry := LogEntry{
153 | Index: 0,
154 | TermId: r.GetCurrentTerm(),
155 | Command: INIT,
156 | Data: []byte{0},
157 | }
158 | r.appendLogEntry(initEntry)
159 | r.setCurrentTerm(0)
160 | }
161 |
162 | return freshNode, nil
163 | }
164 |
165 | /* Raft metadata setters/getters */
166 | func (r *RaftNode) setCurrentTerm(newTerm uint64) {
167 | r.ssMutex.Lock()
168 | defer r.ssMutex.Unlock()
169 | if newTerm != r.stableState.CurrentTerm {
170 | Out.Printf("(%v) Setting current term from %v -> %v", r.Id, r.stableState.CurrentTerm, newTerm)
171 | }
172 | r.stableState.CurrentTerm = newTerm
173 | err := WriteStableState(&r.metaFileDescriptor, r.stableState)
174 | if err != nil {
175 | Error.Printf("Unable to flush new term to disk: %v\n", err)
176 | panic(err)
177 | }
178 | }
179 |
180 | func (r *RaftNode) GetCurrentTerm() uint64 {
181 | return r.stableState.CurrentTerm
182 | }
183 |
184 | func (r *RaftNode) setVotedFor(candidateId string) {
185 | r.ssMutex.Lock()
186 | defer r.ssMutex.Unlock()
187 | r.stableState.VotedFor = candidateId
188 | err := WriteStableState(&r.metaFileDescriptor, r.stableState)
189 | if err != nil {
190 | Error.Printf("Unable to flush new votedFor to disk: %v\n", err)
191 | panic(err)
192 | }
193 | }
194 |
195 | func (r *RaftNode) GetVotedFor() string {
196 | return r.stableState.VotedFor
197 | }
198 |
199 | func (r *RaftNode) setLocalAddr(localAddr *NodeAddr) {
200 | r.ssMutex.Lock()
201 | defer r.ssMutex.Unlock()
202 | r.stableState.LocalAddr = *localAddr
203 | err := WriteStableState(&r.metaFileDescriptor, r.stableState)
204 | if err != nil {
205 | Error.Printf("Unable to flush new localaddr to disk: %v\n", err)
206 | panic(err)
207 | }
208 | }
209 |
210 | func (r *RaftNode) GetLocalAddr() *NodeAddr {
211 | return &r.stableState.LocalAddr
212 | }
213 |
214 | func (r *RaftNode) GetOtherNodes() []NodeAddr {
215 | return r.stableState.OtherNodes
216 | }
217 |
218 | func (r *RaftNode) SetOtherNodes(nodes []NodeAddr) {
219 | r.ssMutex.Lock()
220 | defer r.ssMutex.Unlock()
221 | r.stableState.OtherNodes = nodes
222 | err := WriteStableState(&r.metaFileDescriptor, r.stableState)
223 | if err != nil {
224 | Error.Printf("Unable to flush new other nodes to disk: %v\n", err)
225 | panic(err)
226 | }
227 | }
228 |
229 | func (r *RaftNode) AppendOtherNodes(other NodeAddr) {
230 | r.ssMutex.Lock()
231 | defer r.ssMutex.Unlock()
232 | r.stableState.OtherNodes = append(r.stableState.OtherNodes, other)
233 | err := WriteStableState(&r.metaFileDescriptor, r.stableState)
234 | if err != nil {
235 | Error.Printf("Unable to flush new other nodes to disk: %v\n", err)
236 | panic(err)
237 | }
238 | }
239 |
240 | func (r *RaftNode) CheckRequestCache(clientReq ClientRequest) (*ClientReply, bool) {
241 | uniqueId := fmt.Sprintf("%v-%v", clientReq.ClientId, clientReq.SequenceNum)
242 | val, ok := r.stableState.ClientRequestSequences[uniqueId]
243 | if ok {
244 | return &val, ok
245 | } else {
246 | return nil, ok
247 | }
248 | }
249 |
250 | func (r *RaftNode) AddRequest(uniqueId string, reply ClientReply) error {
251 | r.ssMutex.Lock()
252 | defer r.ssMutex.Unlock()
253 | _, ok := r.stableState.ClientRequestSequences[uniqueId]
254 | if ok {
255 | return errors.New("Request with the same clientId and seqNum already exists!")
256 | }
257 | r.stableState.ClientRequestSequences[uniqueId] = reply
258 |
259 | err := WriteStableState(&r.metaFileDescriptor, r.stableState)
260 | if err != nil {
261 | Error.Printf("Unable to flush new client request to disk: %v\n", err)
262 | panic(err)
263 | }
264 |
265 | return nil
266 | }
267 |
268 | /* Raft log setters/getters */
269 | func (r *RaftNode) getLogEntry(index uint64) *LogEntry {
270 | if index < uint64(len(r.logCache)) {
271 | return &r.logCache[index]
272 | } else {
273 | return nil
274 | }
275 | }
276 |
277 | func (r *RaftNode) getLastLogEntry() *LogEntry {
278 | return r.getLogEntry(r.getLastLogIndex())
279 | }
280 |
281 | func (r *RaftNode) getLogEntries(start, end uint64) []LogEntry {
282 | if start < uint64(len(r.logCache)) {
283 | if end > uint64(len(r.logCache)) {
284 | end = uint64(len(r.logCache))
285 | } else {
286 | end++
287 | }
288 | return r.logCache[start:end]
289 | } else {
290 | return make([]LogEntry, 0)
291 | }
292 | }
293 |
294 | func (r *RaftNode) getLastLogIndex() uint64 {
295 | return uint64(len(r.logCache) - 1)
296 | }
297 |
298 | func (r *RaftNode) getLastLogTerm() uint64 {
299 | return r.getLogEntry(r.getLastLogIndex()).TermId
300 | }
301 |
302 | func (r *RaftNode) getLogTerm(index uint64) uint64 {
303 | return r.getLogEntry(index).TermId
304 | }
305 |
306 | func (r *RaftNode) appendLogEntry(entry LogEntry) error {
307 | // write entry to disk
308 | err := AppendLogEntry(&r.logFileDescriptor, &entry)
309 | if err != nil {
310 | return err
311 | }
312 | // update entry in cache
313 | r.logCache = append(r.logCache, entry)
314 | return nil
315 | }
316 |
317 | // Truncate file to remove everything at index and after it (an inclusive truncation!)
318 | func (r *RaftNode) truncateLog(index uint64) error {
319 | err := TruncateLog(&r.logFileDescriptor, index)
320 | if err != nil {
321 | return err
322 | }
323 |
324 | // Truncate cache as well
325 | r.logCache = r.logCache[:index]
326 | return nil
327 | }
328 |
329 | func CreateFileData(filename string) FileData {
330 | fileData := FileData{}
331 | fileData.filename = filename
332 | return fileData
333 | }
334 |
335 | func (r *RaftNode) RemoveLogs() error {
336 | r.logFileDescriptor.fd.Close()
337 | r.logFileDescriptor.open = false
338 | err := os.Remove(r.logFileDescriptor.filename)
339 | if err != nil {
340 | r.Error("Unable to remove raftlog file")
341 | return err
342 | }
343 |
344 | r.metaFileDescriptor.fd.Close()
345 | r.metaFileDescriptor.open = false
346 | err = os.Remove(r.metaFileDescriptor.filename)
347 | if err != nil {
348 | r.Error("Unable to remove raftmeta file")
349 | return err
350 | }
351 |
352 | return nil
353 | }
--------------------------------------------------------------------------------
/raft/raft/persistenceImpl.go:
--------------------------------------------------------------------------------
1 | package raft
2 |
3 | import (
4 | "bytes"
5 | "encoding/gob"
6 | "errors"
7 | "fmt"
8 | "io"
9 | "os"
10 | )
11 |
12 | /* */
13 | /* Main functions to assist with interacting with log entries, etc. */
14 | /* */
15 |
16 | func openRaftLogForWrite(fileData *FileData) error {
17 | if fileExists(fileData.filename) {
18 | fd, err := os.OpenFile(fileData.filename, os.O_APPEND|os.O_WRONLY, 0600)
19 | fileData.fd = fd
20 | fileData.open = true
21 | return err
22 | } else {
23 | return errors.New("Raftfile does not exist")
24 | }
25 | }
26 |
27 | func CreateRaftLog(fileData *FileData) error {
28 | fd, err := os.OpenFile(fileData.filename, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
29 | fileData.fd = fd
30 | fileData.size = int64(0)
31 | fileData.idxMap = make(map[uint64]int64)
32 | fileData.open = true
33 | return err
34 | }
35 |
36 | func ReadRaftLog(fileData *FileData) ([]LogEntry, error) {
37 | f, err := os.Open(fileData.filename)
38 | defer f.Close()
39 | fileData.idxMap = make(map[uint64]int64)
40 |
41 | entries := make([]LogEntry, 0)
42 |
43 | fileLocation := int64(0)
44 | for err != io.EOF {
45 | size, err := readStructSize(f)
46 | if err != nil {
47 | if err == io.EOF {
48 | break
49 | }
50 | Error.Printf("Error reading struct size: %v at loc: %v\n", err, fileLocation)
51 | fileData.open = false
52 | return entries, err
53 | }
54 |
55 | entry, err := readLogEntry(f, size)
56 | if err != nil {
57 | Error.Printf("Error reading log entry: %v at loc: %v\n", err, fileLocation)
58 | fileData.open = false
59 | return entries, err
60 | }
61 | fileData.idxMap[entry.Index] = fileLocation
62 | fileLocation += INT_GOB_SIZE + int64(size)
63 | entries = append(entries, *entry)
64 | }
65 |
66 | fileData.open = false
67 | return entries, nil
68 | }
69 |
70 | func AppendLogEntry(fileData *FileData, entry *LogEntry) error {
71 | sizeIdx := fileData.size
72 |
73 | logBytes, err := getLogEntryBytes(entry)
74 | if err != nil {
75 | return err
76 | }
77 | size, err := getSizeBytes(len(logBytes))
78 | if err != nil {
79 | return err
80 | }
81 |
82 | numBytesWritten, err := fileData.fd.Write(size)
83 | if err != nil {
84 | return err
85 | }
86 | if int64(numBytesWritten) != INT_GOB_SIZE {
87 | panic("int gob size is not correct, cannot proceed")
88 | }
89 | fileData.size += int64(numBytesWritten)
90 |
91 | err = fileData.fd.Sync()
92 | if err != nil {
93 | return err
94 | }
95 |
96 | numBytesWritten, err = fileData.fd.Write(logBytes)
97 | if err != nil {
98 | return err
99 | }
100 | if numBytesWritten != len(logBytes) {
101 | panic("did not write correct amount of bytes for some reason for log entry")
102 | }
103 | fileData.size += int64(numBytesWritten)
104 |
105 | err = fileData.fd.Sync()
106 | if err != nil {
107 | return err
108 | }
109 |
110 | // Update index mapping for this entry
111 | fileData.idxMap[entry.Index] = int64(sizeIdx)
112 |
113 | return nil
114 | }
115 |
116 | func TruncateLog(raftLogFd *FileData, index uint64) error {
117 | newFileSize, exist := raftLogFd.idxMap[index]
118 | if !exist {
119 | return fmt.Errorf("Truncation failed, log index %v doesn't exist\n", index)
120 | }
121 |
122 | // Windows does not allow truncation of open file, must close first
123 | raftLogFd.fd.Close()
124 | err := os.Truncate(raftLogFd.filename, newFileSize)
125 | if err != nil {
126 | return err
127 | }
128 | fd, err := os.OpenFile(raftLogFd.filename, os.O_APPEND|os.O_WRONLY, 0600)
129 | raftLogFd.fd = fd
130 |
131 | for i := index; i < uint64(len(raftLogFd.idxMap)); i++ {
132 | delete(raftLogFd.idxMap, i)
133 | }
134 | raftLogFd.size = newFileSize
135 | return nil
136 | }
137 |
138 | /* */
139 | /* Main functions to assist with interacting with stable state entries, etc. */
140 | /* */
141 | func openStableStateForWrite(fileData *FileData) error {
142 | if fileExists(fileData.filename) {
143 | fd, err := os.OpenFile(fileData.filename, os.O_APPEND|os.O_WRONLY, 0600)
144 | fileData.fd = fd
145 | fileData.open = true
146 | return err
147 | } else {
148 | return errors.New("Stable state file does not exist")
149 | }
150 | }
151 |
152 | func CreateStableState(fileData *FileData) error {
153 | fd, err := os.OpenFile(fileData.filename, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
154 | fileData.fd = fd
155 | fileData.open = true
156 | return err
157 | }
158 |
159 | func ReadStableState(fileData *FileData) (*NodeStableState, error) {
160 | f, err := os.Open(fileData.filename)
161 |
162 | stat, err := f.Stat()
163 | if err != nil {
164 | f.Close()
165 | return nil, err
166 | }
167 |
168 | ss, err := readStableStateEntry(f, int(stat.Size()))
169 | f.Close()
170 |
171 | if err != nil {
172 | // for some reason we failed to read our stable state file, try backup file.
173 | backupFilename := fmt.Sprintf("%v.bak", fileData.filename)
174 | fbak, err := os.Open(backupFilename)
175 |
176 | stat, err := f.Stat()
177 | if err != nil {
178 | fbak.Close()
179 | return nil, err
180 | }
181 |
182 | ss, err := readStableStateEntry(f, int(stat.Size()))
183 | if err != nil {
184 | Error.Printf("we were unable to read stable storage or its backup: %v\n", err)
185 | fbak.Close()
186 | return nil, err
187 | }
188 | fbak.Close()
189 |
190 | // we were successful reading from backup, move to live copy
191 | err = os.Remove(fileData.filename)
192 | if err != nil {
193 | return nil, err
194 | }
195 | err = copyFile(backupFilename, fileData.filename)
196 | if err != nil {
197 | return nil, err
198 | }
199 |
200 | return ss, nil
201 | }
202 |
203 | return ss, nil
204 | }
205 |
206 | func WriteStableState(fileData *FileData, ss NodeStableState) error {
207 | // backup old stable state
208 | backupFilename := fmt.Sprintf("%v.bak", fileData.filename)
209 | err := backupStableState(fileData, backupFilename)
210 | if err != nil {
211 | return fmt.Errorf("Backup failed: %v", err)
212 | }
213 |
214 | // Windows does not allow truncation of open file, must close first
215 | fileData.fd.Close()
216 |
217 | // truncate live stable state
218 | err = os.Truncate(fileData.filename, 0)
219 | if err != nil {
220 | return fmt.Errorf("Truncation failed: %v", err)
221 | }
222 | fd, err := os.OpenFile(fileData.filename, os.O_APPEND|os.O_WRONLY, 0600)
223 | fileData.fd = fd
224 |
225 | // write out stable state to live version
226 | bytes, err := getStableStateBytes(ss)
227 | if err != nil {
228 | return err
229 | }
230 |
231 | numBytes, err := fileData.fd.Write(bytes)
232 | if numBytes != len(bytes) {
233 | panic("did not write correct amount of bytes for some reason for ss")
234 | }
235 |
236 | err = fileData.fd.Sync()
237 | if err != nil {
238 | return fmt.Errorf("Sync #2 failed: %v", err)
239 | }
240 |
241 | // remove backup file
242 | err = os.Remove(backupFilename)
243 | if err != nil && !os.IsNotExist(err) {
244 | return fmt.Errorf("Remove failed: %v", err)
245 | }
246 |
247 | return nil
248 | }
249 |
250 | func backupStableState(fileData *FileData, backupFilename string) error {
251 | if fileData.open && fileData.fd != nil {
252 | err := fileData.fd.Close()
253 | fileData.open = false
254 | if err != nil {
255 | return fmt.Errorf("Closing file failed: %v", err)
256 | }
257 | }
258 |
259 | err := os.Remove(backupFilename)
260 | if err != nil && !os.IsNotExist(err) {
261 | return fmt.Errorf("Remove failed: %v", err)
262 | }
263 |
264 | err = copyFile(fileData.filename, backupFilename)
265 | if err != nil {
266 | return fmt.Errorf("File copy failed: %v", err)
267 | }
268 |
269 | err = openStableStateForWrite(fileData)
270 | if err != nil {
271 | return fmt.Errorf("Opening stable state for writing failed: %v", err)
272 | }
273 |
274 | return nil
275 | }
276 |
277 | func copyFile(srcFile string, dstFile string) error {
278 | src, err := os.Open(srcFile)
279 | if err != nil {
280 | return err
281 | }
282 |
283 | dst, err := os.Create(dstFile)
284 | if err != nil {
285 | return err
286 | }
287 |
288 | _, err = io.Copy(dst, src)
289 | if err != nil {
290 | return err
291 | }
292 |
293 | err = src.Close()
294 | if err != nil {
295 | fmt.Errorf("Error closing src file")
296 | return err
297 | }
298 |
299 | err = dst.Close()
300 | if err != nil {
301 | fmt.Errorf("Error closing dst file")
302 | return err
303 | }
304 | return nil
305 | }
306 |
307 | /* */
308 | /* Helper functions to assist with read/writing log entries, etc. */
309 | /* */
310 |
311 | const INT_GOB_SIZE int64 = 5
312 |
313 | func getStableStateBytes(ss NodeStableState) ([]byte, error) {
314 | b := new(bytes.Buffer)
315 | e := gob.NewEncoder(b)
316 | err := e.Encode(ss)
317 | if err != nil {
318 | return nil, err
319 | }
320 | return b.Bytes(), nil
321 | }
322 |
323 | func getSizeBytes(size int) ([]byte, error) {
324 | b := new(bytes.Buffer)
325 | e := gob.NewEncoder(b)
326 | err := e.Encode(size)
327 | if err != nil {
328 | return nil, err
329 | }
330 | return b.Bytes(), nil
331 | }
332 |
333 | func getLogEntryBytes(entry *LogEntry) ([]byte, error) {
334 | b := new(bytes.Buffer)
335 | e := gob.NewEncoder(b)
336 | err := e.Encode(*entry)
337 | if err != nil {
338 | return nil, err
339 | }
340 | return b.Bytes(), nil
341 | }
342 |
343 | func readStructSize(f *os.File) (int, error) {
344 | // Read bytes for size value
345 | b := make([]byte, INT_GOB_SIZE)
346 | sizeBytes, err := f.Read(b)
347 | if err != nil {
348 | return -1, err
349 | }
350 | if int64(sizeBytes) != INT_GOB_SIZE {
351 | panic("The raftlog may be corrupt, cannot proceed")
352 | }
353 |
354 | // Decode bytes as int, which is sizeof(LogEntry).
355 | buff := bytes.NewBuffer(b)
356 | var size int
357 | dataDecoder := gob.NewDecoder(buff)
358 | err = dataDecoder.Decode(&size)
359 | if err != nil {
360 | return -1, err
361 | }
362 |
363 | return size, nil
364 | }
365 |
366 | func readLogEntry(f *os.File, size int) (*LogEntry, error) {
367 | b := make([]byte, size)
368 | leSize, err := f.Read(b)
369 | if err != nil {
370 | return nil, err
371 | }
372 | if leSize != size {
373 | panic("The raftlog may be corrupt, cannot proceed")
374 | }
375 |
376 | buff := bytes.NewBuffer(b)
377 | var entry LogEntry
378 | dataDecoder := gob.NewDecoder(buff)
379 | err = dataDecoder.Decode(&entry)
380 | if err != nil {
381 | return nil, err
382 | }
383 |
384 | return &entry, nil
385 | }
386 |
387 | func readStableStateEntry(f *os.File, size int) (*NodeStableState, error) {
388 | b := make([]byte, size)
389 | leSize, err := f.Read(b)
390 | if err != nil {
391 | return nil, err
392 | }
393 | if leSize != size {
394 | panic("The stable state log may be corrupt, cannot proceed")
395 | }
396 |
397 | buff := bytes.NewBuffer(b)
398 | var ss NodeStableState
399 | dataDecoder := gob.NewDecoder(buff)
400 | err = dataDecoder.Decode(&ss)
401 | if err != nil {
402 | return nil, err
403 | }
404 |
405 | return &ss, nil
406 | }
407 |
408 | func fileExists(filename string) bool {
409 | _, err := os.Stat(filename)
410 | if err == nil {
411 | return true
412 | } else if os.IsNotExist(err) {
413 | return false
414 | } else {
415 | panic(err)
416 | }
417 | }
418 |
419 | func getFileInfo(filename string) (int64, bool) {
420 | stat, err := os.Stat(filename)
421 | if err == nil {
422 | return stat.Size(), true
423 | } else if os.IsNotExist(err) {
424 | return 0, false
425 | } else {
426 | panic(err)
427 | }
428 | }
--------------------------------------------------------------------------------