├── .gitignore ├── oceanstore └── oceanstore │ ├── logging.go │ ├── oceanstore_test.go │ ├── ocean_local_impl_test.go │ ├── client.go │ ├── ocean_rpc_impl.go │ ├── listener.go │ ├── guid.go │ ├── guid_test.go │ ├── util.go │ ├── oceanstore.go │ ├── inode_test.go │ ├── ocean_local_impl.go │ ├── ocean_rpc_api.go │ └── inode.go ├── raft ├── shell.go ├── cliClient.go ├── raft │ ├── rpcMessages.go │ ├── utils.go │ ├── config.go │ ├── testingPolicy.go │ ├── listener.go │ ├── raftRPCImpl.go │ ├── clientRPCApi.go │ ├── raftLocalImpl.go │ ├── logging.go │ ├── machine.go │ ├── testUtils.go │ ├── client.go │ ├── raft_test.go │ ├── raft.go │ ├── raftRPCApi.go │ ├── persistenceAPI.go │ └── persistenceImpl.go └── README.md ├── chord ├── chord │ ├── simple_test.go │ ├── kv_store_test.go │ ├── finger_test.go │ ├── util_test.go │ ├── finger.go │ ├── util.go │ ├── node_rpc_impl.go │ ├── kv_store.go │ ├── node_local_impl.go │ ├── chord.go │ └── node_rpc_api.go ├── utils │ └── listener.go ├── cli.go └── README.md ├── tapestry ├── tapestry │ ├── helper_methods.go │ ├── routingtable_test.go │ ├── blobstore.go │ ├── tapestry-client.go │ ├── backpointers.go │ ├── objectstore.go │ ├── tapestry-local_test.go │ ├── tapestry-remote.go │ ├── id.go │ ├── routingtable.go │ ├── tapestry-rpcimpl.go │ ├── tapestry.go │ ├── id_test.go │ └── tapestry_test.go ├── cli.go └── README.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | raft/raft/XVlBzgbaiC/ -------------------------------------------------------------------------------- /oceanstore/oceanstore/logging.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | -------------------------------------------------------------------------------- /oceanstore/oceanstore/oceanstore_test.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | -------------------------------------------------------------------------------- /oceanstore/oceanstore/ocean_local_impl_test.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | -------------------------------------------------------------------------------- /raft/shell.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "./raft" 4 | 5 | type Shell struct { 6 | r *raft.RaftNode 7 | c *raft.Client 8 | done chan bool 9 | } 10 | -------------------------------------------------------------------------------- /chord/chord/simple_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestSimple(t *testing.T) { 8 | _, err := CreateNode(nil) 9 | if err != nil { 10 | t.Errorf("Unable to create node, received error:%v\n", err) 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /raft/cliClient.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "./raft" 4 | 5 | func clientInit(shell *Shell, args []string) error { 6 | return shell.c.SendRequest(raft.HASH_CHAIN_INIT, []byte(args[1])) 7 | } 8 | 9 | func clientHash(shell *Shell, args []string) error { 10 | return shell.c.SendRequest(raft.HASH_CHAIN_ADD, []byte{}) 11 | } -------------------------------------------------------------------------------- /raft/raft/rpcMessages.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | //type AppendEntriesMsg struct { 4 | // request *AppendEntriesRequest 5 | // reply chan AppendEntriesReply 6 | //} 7 | // 8 | //type RequestVoteMsg struct { 9 | // request *RequestVoteRequest 10 | // reply chan RequestVoteReply 11 | //} 12 | // 13 | //type ClientRequestMsg struct { 14 | // request *ClientRequest 15 | // reply chan ClientReply 16 | //} 17 | // 18 | //type RegisterClientMsg struct { 19 | // request *RegisterClientRequest 20 | // reply chan RegisterClientReply 21 | //} 22 | -------------------------------------------------------------------------------- /raft/raft/utils.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | type UInt64Slice []uint64 4 | 5 | func (p UInt64Slice) Len() int { 6 | return len(p) 7 | } 8 | 9 | func (p UInt64Slice) Swap(i, j int) { 10 | p[i], p[j] = p[j], p[i] 11 | } 12 | 13 | func (p UInt64Slice) Less(i, j int) bool { 14 | return p[i] < p[j] 15 | } 16 | 17 | func (r *RaftNode) hasMajority(N uint64) bool { 18 | numNodes := len(r.GetOtherNodes()) 19 | sum := 1 20 | for k, v := range r.matchIndex { 21 | if k != r.Id && v >= N { 22 | sum++ 23 | } 24 | } 25 | if sum > numNodes/2 { 26 | return true 27 | } 28 | return false 29 | } -------------------------------------------------------------------------------- /tapestry/tapestry/helper_methods.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import "testing" 4 | 5 | var port int 6 | 7 | func equal_ids(id1, id2 ID) bool { 8 | if SharedPrefixLength(id1, id2) == DIGITS { 9 | return true 10 | } 11 | return false 12 | } 13 | 14 | func makeTapestryNode(id ID, addr string, t *testing.T) *TapestryNode { 15 | tapestry, err := start(id, port, addr) 16 | 17 | if err != nil { 18 | t.Errorf("Error while making a tapestry %v", err) 19 | } 20 | 21 | port++ 22 | return tapestry.local 23 | } 24 | 25 | func makeTapestry(id ID, addr string, t *testing.T) *Tapestry { 26 | tapestry, err := start(id, port, addr) 27 | 28 | if err != nil { 29 | t.Errorf("Error while making a tapestry %v", err) 30 | } 31 | 32 | port++ 33 | return tapestry 34 | } -------------------------------------------------------------------------------- /raft/raft/config.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | type Config struct { 9 | ElectionTimeout time.Duration 10 | HeartbeatFrequency time.Duration 11 | ClusterSize int 12 | NodeIdSize int 13 | LogPath string 14 | } 15 | 16 | func DefaultConfig() *Config { 17 | config := new(Config) 18 | config.ClusterSize = 3 19 | config.ElectionTimeout = time.Millisecond * 150 20 | config.HeartbeatFrequency = time.Millisecond * 50 21 | config.NodeIdSize = 2 22 | config.LogPath = "raftlogs" 23 | return config 24 | } 25 | 26 | func CheckConfig(config *Config) error { 27 | if config.ElectionTimeout < config.HeartbeatFrequency { 28 | return fmt.Errorf("The election timeout (%v) is less than the heartbeat frequency (%v)", config.ElectionTimeout, config.HeartbeatFrequency) 29 | } 30 | return nil 31 | } 32 | -------------------------------------------------------------------------------- /tapestry/tapestry/routingtable_test.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | /*This test adds 100,000 nodes to the table and removes them, checking 8 | that all were deleted.*/ 9 | 10 | func TestAddAndRemove(t *testing.T) { 11 | NUM_NODES := 100000 12 | me := Node{RandomID(), ""} 13 | table := NewRoutingTable(me) 14 | nodes := make([]Node, NUM_NODES) 15 | for i := 0; i < NUM_NODES; i++ { 16 | nodes[i] = Node{RandomID(), ""} 17 | table.Add(nodes[i]) 18 | } 19 | for i := 0; i < NUM_NODES; i++ { 20 | table.Remove(nodes[i]) 21 | } 22 | 23 | for i := 0; i < DIGITS; i++ { 24 | for j := 0; j < BASE; j++ { 25 | if len(*(table.rows[i][j])) > 1 { 26 | t.Errorf("Nodes were not deleted from table.") 27 | } 28 | if len(*(table.rows[i][j])) == 1 && 29 | !equal_ids(me.Id, (*(table.rows[i][j]))[0].Id) { 30 | t.Errorf("Nodes were not deleted from table.") 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /raft/raft/testingPolicy.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | "errors" 6 | ) 7 | 8 | var ErrorTestingPolicyDenied = errors.New("testing policy has denied this communication") 9 | 10 | type TestingPolicy struct { 11 | pauseWorld bool 12 | rpcPolicy map[string]bool 13 | } 14 | 15 | func NewTesting() *TestingPolicy { 16 | var tp TestingPolicy 17 | tp.rpcPolicy = make(map[string]bool) 18 | return &tp 19 | } 20 | 21 | func (tp *TestingPolicy) IsDenied(a, b NodeAddr) bool { 22 | if tp.pauseWorld { 23 | return true 24 | } 25 | commStr := getCommId(a, b) 26 | denied, exists := tp.rpcPolicy[commStr] 27 | return exists && denied 28 | } 29 | 30 | func getCommId(a, b NodeAddr) string { 31 | return fmt.Sprintf("%v_%v", a.Id, b.Id) 32 | } 33 | 34 | func (tp *TestingPolicy) RegisterPolicy(a, b NodeAddr, allowed bool) { 35 | commStr := getCommId(a, b) 36 | tp.rpcPolicy[commStr] = allowed 37 | } 38 | 39 | func (tp *TestingPolicy) PauseWorld(on bool) { 40 | tp.pauseWorld = on 41 | } -------------------------------------------------------------------------------- /chord/chord/kv_store_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "testing" 5 | "strconv" 6 | "math/rand" 7 | ) 8 | 9 | func TestRemotePutAndGetBundleRandom(t *testing.T) { 10 | nNodes := 10 11 | numRange := 100 12 | base := make(map[int]int64, numRange) 13 | result := make(map[int]int64, numRange) 14 | nodes, _ := CreateNNodesRandom(nNodes) 15 | 16 | for i := 0; i < numRange; i++ { 17 | base[i] = int64(i * i) 18 | //Now we randomly pick a node and put the value in it 19 | nodeIndex := rand.Intn(9) 20 | Put(nodes[nodeIndex], strconv.Itoa(i), strconv.Itoa(i*i)) 21 | } 22 | 23 | for i := 0; i < numRange; i++ { 24 | nodeIndex := rand.Intn(9) 25 | val, _ := Get(nodes[nodeIndex], strconv.Itoa(i)) 26 | result[i], _ = strconv.ParseInt(val, 10, 32) 27 | } 28 | 29 | equal := true 30 | for i := 0; i < numRange; i++ { 31 | if result[i] != base[i] { 32 | equal = false 33 | } 34 | } 35 | if !equal { 36 | t.Errorf("TestRemotePutAndGetBundleRandom: result") 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /oceanstore/oceanstore/client.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import "fmt" 4 | 5 | const MAX_RETRIES = 10 6 | 7 | type Client struct { 8 | LocalAddr string 9 | Id uint64 10 | OceanServ OceanAddr 11 | } 12 | 13 | func CreateClient(remoteAddr OceanAddr) (cp *Client, err error) { 14 | fmt.Println("Oceanstore Create client") 15 | cp = new(Client) 16 | 17 | request := ConnectRequest{} 18 | var reply *ConnectReply 19 | 20 | retries := 0 21 | for retries < MAX_RETRIES { 22 | reply, err = ConnectRPC(&remoteAddr, request) 23 | if err == nil || err.Error() != "EOF" { 24 | break 25 | } 26 | retries++ 27 | } 28 | if err != nil { 29 | fmt.Println(err) 30 | if err.Error() == "EOF" { 31 | err = fmt.Errorf("Could not access the ocean server.") 32 | } 33 | return 34 | } 35 | 36 | if !reply.Ok { 37 | fmt.Errorf("Could not register Client.") 38 | } 39 | 40 | fmt.Println("Create client reply:", reply, err) 41 | cp.Id = reply.Id 42 | cp.OceanServ = remoteAddr 43 | 44 | return 45 | } 46 | -------------------------------------------------------------------------------- /oceanstore/oceanstore/ocean_rpc_impl.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "net" 5 | "net/rpc" 6 | "fmt" 7 | ) 8 | 9 | type OceanRPCServer struct { 10 | node *OceanNode 11 | listener net.Listener 12 | rpc *rpc.Server 13 | } 14 | 15 | func newOceanstoreRPCServer(ocean *OceanNode) (server *OceanRPCServer) { 16 | server = new(OceanRPCServer) 17 | server.node = ocean 18 | server.rpc = rpc.NewServer() 19 | listener, _, err := OpenListener() 20 | server.rpc.RegisterName(listener.Addr().String(), server) 21 | server.listener = listener 22 | 23 | if err != nil { 24 | panic("AA") 25 | } 26 | 27 | go func() { 28 | for { 29 | conn, err := server.listener.Accept() 30 | if err != nil { 31 | fmt.Printf("(%v) Raft RPC server accept error: %v\n", err) 32 | continue 33 | } 34 | go server.rpc.ServeConn(conn) 35 | } 36 | }() 37 | 38 | return 39 | } 40 | 41 | func (server *OceanRPCServer) ConnectImpl(req *ConnectRequest, rep *ConnectReply) error { 42 | rvreply, err := server.node.connect(req) 43 | *rep = rvreply 44 | return err 45 | } 46 | -------------------------------------------------------------------------------- /raft/raft/listener.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "net" 5 | "math/rand" 6 | "time" 7 | "os" 8 | "fmt" 9 | "syscall" 10 | ) 11 | 12 | // ephemeral port range 13 | const LOW_PORT int = 32768 14 | const HIGH_PORT int = 61000 15 | 16 | func OpenListener() (net.Listener, int, error) { 17 | rand.Seed(time.Now().UTC().UnixNano()) 18 | port := rand.Intn(HIGH_PORT - LOW_PORT) + LOW_PORT 19 | conn, err := OpenPort(port) 20 | if err != nil { 21 | if addrInUse(err) { 22 | time.Sleep(time.Millisecond * 100) 23 | return OpenListener() 24 | } else { 25 | return nil, 0, err //TODO check if I should use -1 for invalid port 26 | } 27 | } 28 | return conn, port, err 29 | } 30 | 31 | func addrInUse(err error) bool { 32 | if opErr, ok := err.(*net.OpError); ok { 33 | if osErr, ok := opErr.Err.(*os.SyscallError); ok { 34 | return osErr.Err == syscall.EADDRINUSE 35 | } 36 | } 37 | return false 38 | } 39 | 40 | func OpenPort(port int) (net.Listener, error) { 41 | hostname, err := os.Hostname() 42 | if err != nil { 43 | return nil, err 44 | } 45 | addr := fmt.Sprintf("%v:%v", hostname, port) 46 | conn, err := net.Listen("tcp4", addr) 47 | return conn, err 48 | } 49 | -------------------------------------------------------------------------------- /oceanstore/oceanstore/listener.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "net" 5 | "math/rand" 6 | "time" 7 | "os" 8 | "fmt" 9 | "syscall" 10 | ) 11 | 12 | // ephemeral port range 13 | const LOW_PORT int = 32768 14 | const HIGH_PORT int = 61000 15 | 16 | func OpenListener() (net.Listener, int, error) { 17 | rand.Seed(time.Now().UTC().UnixNano()) 18 | port := rand.Intn(HIGH_PORT - LOW_PORT) + LOW_PORT 19 | conn, err := OpenPort(port) 20 | if err != nil { 21 | if addrInUse(err) { 22 | time.Sleep(time.Millisecond * 100) 23 | return OpenListener() 24 | } else { 25 | return nil, 0, err //TODO check if I should use -1 for invalid port 26 | } 27 | } 28 | return conn, port, err 29 | } 30 | 31 | func addrInUse(err error) bool { 32 | if opErr, ok := err.(*net.OpError); ok { 33 | if osErr, ok := opErr.Err.(*os.SyscallError); ok { 34 | return osErr.Err == syscall.EADDRINUSE 35 | } 36 | } 37 | return false 38 | } 39 | 40 | func OpenPort(port int) (net.Listener, error) { 41 | hostname, err := os.Hostname() 42 | if err != nil { 43 | return nil, err 44 | } 45 | addr := fmt.Sprintf("%v:%v", hostname, port) 46 | conn, err := net.Listen("tcp4", addr) 47 | return conn, err 48 | } -------------------------------------------------------------------------------- /chord/utils/listener.go: -------------------------------------------------------------------------------- 1 | /* Purpose: Library code to help create a TCP-based listening socket. */ 2 | 3 | package utils 4 | 5 | import ( 6 | "fmt" 7 | "math/rand" 8 | "net" 9 | "os" 10 | "syscall" 11 | "time" 12 | ) 13 | 14 | // Ephemeral port range 15 | const LOW_PORT int = 32768 16 | const HIGH_PORT int = 61000 17 | 18 | // Errno to support windows machines 19 | const WIN_EADDRINUSE = syscall.Errno(10048) 20 | 21 | // Listens on a random port in the defined ephemeral range, retries if port is already in use 22 | func OpenListener() (net.Listener, int, error) { 23 | rand.Seed(time.Now().UTC().UnixNano()) 24 | port := rand.Intn(HIGH_PORT-LOW_PORT) + LOW_PORT 25 | hostname, err := os.Hostname() 26 | if err != nil { 27 | return nil, -1, err 28 | } 29 | 30 | addr := fmt.Sprintf("%v:%v", hostname, port) 31 | conn, err := net.Listen("tcp4", addr) 32 | if err != nil { 33 | if addrInUse(err) { 34 | time.Sleep(100 * time.Millisecond) 35 | return OpenListener() 36 | } else { 37 | return nil, -1, err 38 | } 39 | } 40 | return conn, port, err 41 | } 42 | 43 | func addrInUse(err error) bool { 44 | if opErr, ok := err.(*net.OpError); ok { 45 | if osErr, ok := opErr.Err.(*os.SyscallError); ok { 46 | return osErr.Err == syscall.EADDRINUSE || osErr.Err == WIN_EADDRINUSE 47 | } 48 | } 49 | return false 50 | } -------------------------------------------------------------------------------- /chord/chord/finger_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | "time" 7 | "fmt" 8 | ) 9 | 10 | func TestInitFingerTable(t *testing.T) { 11 | var res, expected []byte 12 | m := int(math.Pow(2, KEY_LENGTH)) 13 | for i := 0; i < m; i++ { 14 | node, _ := CreateDefinedNode(nil, []byte{byte(i)}) 15 | for j := 0; j < KEY_LENGTH; j++ { 16 | res = node.FingerTable[j].Start 17 | expected = []byte{byte((i + int(math.Pow(float64(2), float64(j)))) % m)} 18 | if !EqualIds(res, expected) { 19 | t.Errorf("[%v] BAD ENTRY %v: %v != %v", i, j, res, expected) 20 | } 21 | } 22 | } 23 | } 24 | 25 | /* 26 | Makes 26 nodes, waits a few seconds, and checks that every entry points to its next multiple of 10 from "Start" 27 | (ex: Start = 178 would should always point to 180) 28 | */ 29 | func TestFixNextFinger(t *testing.T) { 30 | nodes, _ := CreateNNodes(26) 31 | time.Sleep(time.Second * 5) 32 | for i := 0; i < 26; i++ { 33 | node := nodes[i] 34 | for j := 0; j < KEY_LENGTH; j++ { 35 | start := node.FingerTable[j].Start 36 | pointer := node.FingerTable[j].Node 37 | var expected []byte 38 | if start[0]%10 == 0 { 39 | expected = []byte{byte(start[0])} 40 | } else { 41 | expected = []byte{byte(((start[0]/10 + 1) % 26) * 10)} 42 | } 43 | 44 | if !EqualIds(pointer.Id, expected) { 45 | fmt.Printf("[%v] Error at\nStart: %v, Node: %v, expected: %v", 46 | node.Id, start, pointer.Id, expected) 47 | } 48 | } 49 | } 50 | } -------------------------------------------------------------------------------- /oceanstore/oceanstore/guid.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "../../raft/raft" 5 | "fmt" 6 | ) 7 | 8 | func (ocean *OceanNode) getRaftVguid(aguid Aguid, id uint64) (Vguid, error) { 9 | // Get the raft client struct 10 | c, ok := ocean.clients[id] 11 | if !ok { 12 | panic("Attempted to get client from id, but not found.") 13 | } 14 | 15 | res, err := c.SendRequestWithResponse(raft.GET, []byte(aguid)) 16 | if err != nil { 17 | return "", err 18 | } 19 | if res.Status != raft.OK { 20 | return "", fmt.Errorf("Could not get response from raft.") 21 | } 22 | 23 | return Vguid(res.Response), nil 24 | } 25 | 26 | func (ocean *OceanNode) setRaftVguid(aguid Aguid, vguid Vguid, id uint64) error { 27 | // Get the raft client struct 28 | c, ok := ocean.clients[id] 29 | if !ok { 30 | panic("Attempted to get client from id, but not found.") 31 | } 32 | 33 | data := fmt.Sprintf("%v:%v", aguid, vguid) 34 | 35 | res, err := c.SendRequestWithResponse(raft.SET, []byte(data)) 36 | if err != nil { 37 | return err 38 | } 39 | if res.Status != raft.OK { 40 | return fmt.Errorf("Could not get response from raft.") 41 | } 42 | return nil 43 | } 44 | 45 | func (ocean *OceanNode) removeRaftVguid(aguid Aguid, id uint64) error { 46 | // Get the raft client struct 47 | c, ok := ocean.clients[id] 48 | if !ok { 49 | panic("Attempted to get client from id, but not found.") 50 | } 51 | 52 | res, err := c.SendRequestWithResponse(raft.REMOVE, []byte(aguid)) 53 | if err != nil { 54 | return err 55 | } 56 | if res.Status != raft.OK { 57 | return fmt.Errorf("Could not get response from raft.") 58 | } 59 | 60 | return nil 61 | } 62 | -------------------------------------------------------------------------------- /oceanstore/oceanstore/guid_test.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "testing" 5 | "strings" 6 | "time" 7 | "fmt" 8 | ) 9 | 10 | func TestRaftMap(t *testing.T) { 11 | ocean, err := Start() 12 | if err != nil { 13 | t.Errorf("Could not init puddlestore: %v", err) 14 | return 15 | } 16 | time.Sleep(time.Millisecond * 1000) 17 | 18 | fmt.Println(ocean.Local) 19 | client := ocean.raftClient 20 | 21 | err = ocean.setRaftVguid("DEAD", "BEEF", client.Id) 22 | if err != nil { 23 | t.Errorf("Could set raft vguid: %v", err) 24 | return 25 | } 26 | 27 | response, err := ocean.getRaftVguid("DEAD", client.Id) 28 | if err != nil { 29 | t.Errorf("Could get raft vguid: %v", err) 30 | return 31 | } 32 | 33 | ok := strings.Split(string(response), ":")[0] 34 | aguid := strings.Split(string(response), ":")[1] 35 | 36 | if ok != "SUCCESS" { 37 | t.Errorf("Could not get raft vguid: %v", response) 38 | } 39 | 40 | if aguid != "BEEF" { 41 | t.Errorf("Raft didn't return the correct vguid. BEEF != %d", aguid) 42 | } 43 | 44 | // Reset aguid to another vguid 45 | err = ocean.setRaftVguid("DEAD", "B004", client.Id) 46 | if err != nil { 47 | t.Errorf("Could set raft vguid: %v", err) 48 | return 49 | } 50 | 51 | response, err = ocean.getRaftVguid("DEAD", client.Id) 52 | if err != nil { 53 | t.Errorf("Could get raft vguid: %v", err) 54 | return 55 | } 56 | 57 | ok = strings.Split(string(response), ":")[0] 58 | aguid = strings.Split(string(response), ":")[1] 59 | 60 | if ok != "SUCCESS" { 61 | t.Errorf("Could not get raft vguid: %v", response) 62 | } 63 | 64 | if aguid != "B004" { 65 | t.Errorf("Raft didn't return the correct vguid. B004 != %d", aguid) 66 | } 67 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Oceanstore 2 | 3 | Oceanstore is a distributed file system. Its design is motivated from OceanStore: An Architecture for Global-Scale Persistent Storage paper. 4 | 5 | # Usage Example 6 | WIP 7 | 8 | # File System - Abstractions 9 | The two primitive file system objects are files and directories. A file is a single collection of sequential bytes and directories provide a way to hierarchically organize files. 10 | * Data block represents fixed length array of bytes. Files consist of a number of data blocks. 11 | * Indirect block stores references to ordered list of data blocks that make up a file. 12 | * Inode maintains the metadata associated with a file. Inode of a file points to direct blocks or indirect blocks. Inode of a directory points to inode of files or other directories. 13 | 14 | # File System Operations 15 | 16 | ## Lookup 17 | Find the inode of the root. Traverse the directories/files in its indirect block to find the first directory/file in the path. Repeat the search until we reach end of path. 18 | 19 | ## Reading and Writing 20 | To write or read from file, we need: 21 | * Location This tells the starting location in the file for reading or writing. 22 | * Buffer While reading, contents of the file are put in buffer and while writing, contents of the buffer are put into the file.
23 | 24 | If we pass the end of the file while writing then we need to add new data blocks and add their refernces in the indirect block. We also choose the block size. Given the starting location and number of bytes to read or write, it is easy to find the relevant blocks with start position in the first block and end position in the last block. 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /raft/README.md: -------------------------------------------------------------------------------- 1 | # Raft 2 | 3 | Raft is a consensus protocol. Its design is motivated from In Search of an understandable consensus algorithm paper. I would highly recommend this visualization for understanding raft. 4 | 5 | # Usage Example 6 | Cli-node serves as a console for interacting with raft, creating nodes and querying state on the local nodes. It provides the following commands: 7 | * Debug 8 | * Recv 9 | * Send 10 | * Disable 11 | * Enable 12 | * state 13 | * Exit 14 | 15 | Testing-policy simulates different network splits to ensure correct behaviour under partitions. 16 | 17 | # State Machine 18 | Softwares that make use of raft work by interpreting the entries in a log as input to a state machine. In this project, the [state machine](https://github.com/sattiwari/oceanstore/blob/master/raft/raft/machine.go) calculates the next step of a hash chain. Cli-client supports interaction with state machine. It provides following commands: 19 | * Init (value) sends an initial value for hashing to the replicated state machine 20 | * Hash instructs the state machine to perform another round of hashing 21 | 22 | # Elections 23 | Leader election consists of a raft cluster deciding which of the nodes in the cluster should be the leader for a given term. Raft_states contain the logic for raft node being in one of the three states: FOLLOWER, CANDIDATE, LEADER. 24 | 25 | # Log Replication 26 | Log replication consists of making sure that the raft state machine is up to date across a majority of nodes in the cluster. It is based on AppendEntries (heartbeat), periodically initiated by the leader. 27 | 28 | # Client Interaction 29 | Client sends request to the cluster and get replies with the results once the corresponding log entries have been committed and fed to the state machine. If the raft node a client connects to is not the leader node, the node returns a hint to the leader node. 30 | -------------------------------------------------------------------------------- /raft/raft/raftRPCImpl.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | "net/rpc" 6 | ) 7 | 8 | type RaftRPCServer struct { 9 | node *RaftNode 10 | } 11 | 12 | func (server *RaftRPCServer) startRpcServer() { 13 | for { 14 | if server.node.IsShutDown { 15 | fmt.Printf("(%v) Shutting down RPC server\n", server.node.Id) 16 | return 17 | } 18 | conn, err := server.node.Listener.Accept() 19 | if err != nil { 20 | if !server.node.IsShutDown { 21 | fmt.Printf("(%v) Raft RPC server accept error: %v\n", server.node.Id, err) 22 | } 23 | continue 24 | } 25 | if !server.node.IsShutDown { 26 | go rpc.ServeConn(conn) 27 | } else { 28 | conn.Close() 29 | } 30 | } 31 | } 32 | 33 | func (server *RaftRPCServer) JoinImpl(req *JoinRequest, reply *JoinReply) error { 34 | err := server.node.Join(req) 35 | reply.Success = err == nil 36 | return err 37 | } 38 | 39 | func (server *RaftRPCServer) StartNodeImpl(req *StartNodeRequest, reply *StartNodeReply) error { 40 | err := server.node.StartNode(req) 41 | reply.Success = err == nil 42 | return err 43 | } 44 | 45 | func (server *RaftRPCServer) RequestVoteImpl(req *RequestVoteRequest, reply *RequestVoteReply) error { 46 | if server.node.Testing.IsDenied(req.CandidateId, *server.node.GetLocalAddr()) { 47 | return ErrorTestingPolicyDenied 48 | } 49 | rvreply, err := server.node.RequestVote(req) 50 | *reply = rvreply 51 | return err 52 | } 53 | 54 | func (server *RaftRPCServer) ClientRequestImpl(req *ClientRequest, reply *ClientReply) error { 55 | rvreply, err := server.node.ClientRequest(req) 56 | *reply = rvreply 57 | return err 58 | } 59 | 60 | func (server *RaftRPCServer) RegisterClientImpl(req *RegisterClientRequest, reply *RegisterClientReply) error { 61 | rvreply, err := server.node.RegisterClient(req) 62 | *reply = rvreply 63 | return err 64 | } 65 | 66 | func (server *RaftRPCServer) AppendEntriesImpl(req *AppendEntriesRequest, reply *AppendEntriesReply) error { 67 | if server.node.Testing.IsDenied(req.LeaderId, *server.node.GetLocalAddr()) { 68 | return ErrorTestingPolicyDenied 69 | } 70 | aereply, err := server.node.AppendEntries(req) 71 | *reply = aereply 72 | return err 73 | } -------------------------------------------------------------------------------- /raft/raft/clientRPCApi.go: -------------------------------------------------------------------------------- 1 | package raft 2 | // 3 | //type ClientStatus int 4 | // 5 | //const ( 6 | // OK ClientStatus = iota 7 | // NOT_LEADER 8 | // ELECTION_IN_PROGRESS 9 | // REQ_FAILED 10 | //) 11 | // 12 | //type FsmCommand int 13 | // 14 | //const ( 15 | // HASH_CHAIN_ADD FsmCommand = iota 16 | // HASH_CHAIN_INIT 17 | // CLIENT_REGISTRATION 18 | // INIT 19 | // NOOP 20 | //) 21 | // 22 | //type ClientRequest struct { 23 | // //unique id associated with client session. Recevied via previous register client call 24 | // ClientId uint64 25 | // 26 | // //avoids duplicates 27 | // SequenceNumber uint64 28 | // 29 | // //Command to be executed by state machine. It may affect state 30 | // Command FsmCommand 31 | // 32 | // //Data to accompany the command to state machine 33 | // Data []byte 34 | //} 35 | // 36 | //type ClientReply struct { 37 | // //OK if the state machine successfully applied command 38 | // Status ClientStatus 39 | // 40 | // //state machine response 41 | // Response string 42 | // 43 | // //a non leader node should reply the correct leader 44 | // LeaderHint NodeAddr 45 | //} 46 | // 47 | //func ClientRequestRPC(remoteNode *NodeAddr, request ClientRequest) (*ClientReply, error) { 48 | // var reply ClientReply 49 | // err := makeRemoteCall(remoteNode, "ClientRequestImpl", request, &reply) 50 | // if err != nil { 51 | // return nil, err 52 | // } 53 | // return &reply, nil 54 | //} 55 | // 56 | //type RegisterClientRequest struct { 57 | // // The client address invoking request 58 | // FromNode NodeAddr 59 | //} 60 | // 61 | //type RegisterClientReply struct { 62 | // //ok if the state machine registered client 63 | // Status ClientStatus 64 | // 65 | //// unique id for the client session 66 | // ClientId uint64 67 | // 68 | // // if the node contacted is not leader, it tells the correct leader 69 | // LeaderHint NodeAddr 70 | //} 71 | // 72 | //func RegisterClientRPC(remoteNode *NodeAddr, request RegisterClientRequest) (*RegisterClientReply, error) { 73 | // var reply RegisterClientReply 74 | // err := makeRemoteCall(remoteNode, "RegisterClientImpl", request, &reply) 75 | // if err != nil { 76 | // return nil, err 77 | // } 78 | // return &reply, nil 79 | //} 80 | // 81 | -------------------------------------------------------------------------------- /chord/chord/util_test.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | func TestHashKey(t *testing.T) { 9 | key := HashKey("Im a string") 10 | sameKey := HashKey("Im a string") 11 | 12 | if !bytes.Equal(key, sameKey) { 13 | t.Errorf("Hash keys made by the same string are not equal.") 14 | } 15 | 16 | differentKey := HashKey("Im another string, totally different.") 17 | if bytes.Equal(key, differentKey) { 18 | t.Errorf("Hash keys made by the different strings are equal.") 19 | } 20 | } 21 | 22 | func TestBetweenSimple(t *testing.T) { 23 | A := []byte{10} 24 | B := []byte{15} 25 | C := []byte{20} 26 | 27 | // B is between A and C... 28 | if !Between(B, A, C) { 29 | t.Errorf("Between does not return true when it should. %v < %v < %v", 30 | A[0], B[0], C[0]) 31 | } 32 | // ...but it shouldn't be between C and A 33 | if Between(B, C, A) { 34 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v", 35 | C[0], B[0], A[0]) 36 | } 37 | // Between shouldn't be right inclusive. 38 | if Between(B, A, B) { 39 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v", 40 | A[0], B[0], B[0]) 41 | } 42 | 43 | if !Between(A, C, B) { 44 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v", 45 | C[0], A[0], B[0]) 46 | } 47 | if Between(A, B, C) { 48 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v", 49 | B[0], A[0], C[0]) 50 | } 51 | } 52 | 53 | func TestBetweenEdge(t *testing.T) { 54 | A := []byte{230} 55 | B := []byte{15} 56 | C := []byte{80} 57 | 58 | // B is between A and C... 59 | if !Between(B, A, C) { 60 | t.Errorf("Between does not return true when it should. %v < %v < %v", 61 | A[0], B[0], C[0]) 62 | } 63 | // ...but it shouldn't be between C and A 64 | if Between(B, C, A) { 65 | t.Errorf("Between returns true when it shouldn't. %v < %v < %v", 66 | A[0], B[0], C[0]) 67 | } 68 | } 69 | 70 | func TestBetweenEdge2(t *testing.T) { 71 | A := []byte{20} 72 | B := []byte{90} 73 | C := []byte{0} 74 | 75 | // B is between A and C... 76 | if !Between(B, A, C) { 77 | t.Errorf("Between does not return true when it should. %v < %v < %v", 78 | A[0], B[0], C[0]) 79 | } 80 | } 81 | 82 | -------------------------------------------------------------------------------- /tapestry/tapestry/blobstore.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import "fmt" 4 | 5 | /* 6 | This is a utility class tacked on to the tapestry DOLR. 7 | */ 8 | type BlobStore struct { 9 | blobs map[string]Blob 10 | } 11 | 12 | type Blob struct { 13 | bytes []byte 14 | done chan bool 15 | } 16 | 17 | type BlobStoreRPC struct { 18 | store *BlobStore 19 | } 20 | 21 | /* 22 | Create a new blobstore 23 | */ 24 | func NewBlobStore() *BlobStore { 25 | bs := new(BlobStore) 26 | bs.blobs = make(map[string]Blob) 27 | return bs 28 | } 29 | 30 | /* 31 | For RPC server registration 32 | */ 33 | func NewBlobStoreRPC(store *BlobStore) *BlobStoreRPC { 34 | rpc := new(BlobStoreRPC) 35 | rpc.store = store 36 | return rpc 37 | } 38 | 39 | /* 40 | Remove all blobs and unregister them all 41 | */ 42 | func (bs *BlobStore) DeleteAll() { 43 | // unregister every blob 44 | for _, blob := range bs.blobs { 45 | blob.done <- true 46 | } 47 | // clear the map 48 | bs.blobs = make(map[string]Blob) 49 | } 50 | 51 | /* 52 | Remove the blob and unregister it 53 | */ 54 | func (bs *BlobStore) Delete(key string) bool { 55 | // If a previous blob exists, unregister it 56 | previous, exists := bs.blobs[key] 57 | if exists { 58 | previous.done <- true 59 | } 60 | delete(bs.blobs, key) 61 | return exists 62 | } 63 | 64 | /* 65 | Store bytes in the blobstore 66 | */ 67 | func (bs *BlobStore) Put(key string, blob []byte, unregister chan bool) { 68 | // If a previous blob exists, delete it 69 | bs.Delete(key) 70 | 71 | // Register the new one 72 | bs.blobs[key] = Blob{blob, unregister} 73 | } 74 | 75 | /* 76 | Get bytes from the blobstore 77 | */ 78 | func (bs *BlobStore) Get(key string) ([]byte, bool) { 79 | blob, exists := bs.blobs[key] 80 | if exists { 81 | return blob.bytes, true 82 | } else { 83 | return nil, false 84 | } 85 | } 86 | 87 | /* 88 | Fetches the specified blob from the remote node 89 | */ 90 | func FetchRemoteBlob(remote Node, key string) (blob *[]byte, err error) { 91 | fmt.Printf("FetchRemoteBlob %v %v", key, remote) 92 | err = makeRemoteCall(remote.Address, "BlobStoreRPC", "Fetch", key, &blob) 93 | return 94 | } 95 | 96 | /* 97 | Invoked over RPC to fetch bytes from the blobstore 98 | */ 99 | func (rpc *BlobStoreRPC) Fetch(key string, blob *[]byte) error { 100 | b, exists := rpc.store.blobs[key] 101 | if exists { 102 | *blob = b.bytes 103 | } 104 | return nil 105 | } -------------------------------------------------------------------------------- /chord/chord/finger.go: -------------------------------------------------------------------------------- 1 | /* Purpose: Finger table related functions for a given Chord node. */ 2 | 3 | package chord 4 | 5 | import ( 6 | "time" 7 | "math/big" 8 | "log" 9 | "fmt" 10 | ) 11 | 12 | /* A single finger table entry */ 13 | type FingerEntry struct { 14 | Start []byte /* ID hash of (n + 2^i) mod (2^m) */ 15 | Node *RemoteNode /* RemoteNode that Start points to */ 16 | } 17 | 18 | /* Create initial finger table that only points to itself, will be fixed later */ 19 | func (node *Node) initFingerTable() { 20 | // Create an array of FingerEntries of length KEY_LENGTH 21 | node.FingerTable = make([]FingerEntry, KEY_LENGTH) 22 | 23 | for i := range node.FingerTable { 24 | // FingerEntry pointing to node 25 | newEntry := new(FingerEntry) 26 | newEntry.Start = fingerMath(node.Id, i, KEY_LENGTH) 27 | newEntry.Node = node.RemoteSelf 28 | node.FingerTable[i] = *newEntry 29 | } 30 | node.Successor = node.RemoteSelf 31 | } 32 | 33 | /* Called periodically (in a seperate go routine) to fix entries in our finger table. */ 34 | func (node *Node) fixNextFinger(ticker *time.Ticker) { 35 | for _ = range ticker.C { 36 | for _ = range ticker.C { 37 | next_hash := fingerMath(node.Id, node.next, KEY_LENGTH) 38 | successor, err := node.findSuccessor(next_hash) 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | node.ftLock.Lock() 43 | node.FingerTable[node.next].Node = successor 44 | node.ftLock.Unlock() 45 | node.next += 1 46 | if node.next >= KEY_LENGTH { 47 | node.next = 1 48 | } 49 | } 50 | } 51 | } 52 | 53 | /* (n + 2^i) mod (2^m) */ 54 | func fingerMath(n []byte, i int, m int) []byte { 55 | two := &big.Int{} 56 | two.SetInt64(2) 57 | 58 | N := &big.Int{} 59 | N.SetBytes(n) 60 | 61 | // 2^i 62 | I := &big.Int{} 63 | I.SetInt64(int64(i)) 64 | I.Exp(two, I, nil) 65 | 66 | // 2^m 67 | M := &big.Int{} 68 | M.SetInt64(int64(m)) 69 | M.Exp(two, M, nil) 70 | 71 | result := &big.Int{} 72 | result.Add(N, I) 73 | result.Mod(result, M) 74 | 75 | // Big int gives an empty array if value is 0. 76 | // Here is a way for us to still return a 0 byte 77 | zero := &big.Int{} 78 | zero.SetInt64(0) 79 | if result.Cmp(zero) == 0 { 80 | return []byte{0} 81 | } 82 | 83 | return result.Bytes() 84 | } 85 | 86 | /* Print contents of a node's finger table */ 87 | func PrintFingerTable(node *Node) { 88 | fmt.Printf("[%v] FingerTable:\n", HashStr(node.Id)) 89 | for _, val := range node.FingerTable { 90 | fmt.Printf("\t{start:%v\tnodeLoc:%v %v}\n", 91 | HashStr(val.Start), HashStr(val.Node.Id), val.Node.Addr) 92 | } 93 | } -------------------------------------------------------------------------------- /oceanstore/oceanstore/util.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | "../../tapestry/tapestry" 7 | "math/rand" 8 | ) 9 | 10 | func removeExcessSlashes(path string) string { 11 | var firstNonSlash, lastNonSlash, start int 12 | 13 | onlySlashes := true 14 | str := path 15 | 16 | length := len(path) 17 | 18 | // Nothing to do 19 | if path[0] != '/' && path[length-1] != '/' { 20 | return str 21 | } 22 | 23 | // Get the first non slash 24 | for i := 0; i < length; i++ { 25 | if str[i] != '/' { 26 | onlySlashes = false 27 | firstNonSlash = i 28 | break 29 | } 30 | } 31 | 32 | // Get the last non slash 33 | for i := length - 1; i >= 0; i-- { 34 | if str[i] != '/' { 35 | lastNonSlash = i 36 | break 37 | } 38 | } 39 | 40 | // Guaranteed to be the root path 41 | if onlySlashes { 42 | str = "/" 43 | return str 44 | } else { 45 | length = lastNonSlash - firstNonSlash + 1 46 | if str[0] == '/' { 47 | start = firstNonSlash - 1 48 | length++ 49 | } else { 50 | start = 0 51 | } 52 | 53 | str = path[start : start+length] 54 | } 55 | 56 | length = len(str) 57 | for i := 0; i < length; i++ { 58 | if i+1 == length { 59 | break 60 | } 61 | 62 | if str[i] == '/' && str[i+1] == '/' { 63 | str = str[:i] + str[i+1:] 64 | length -= 1 65 | i -= 1 66 | } 67 | } 68 | 69 | return str 70 | } 71 | 72 | func hashToGuid(id tapestry.ID) Guid { 73 | s := "" 74 | for i := 0; i < tapestry.DIGITS; i++ { 75 | s += strconv.FormatUint(uint64(byte(id[i])), tapestry.BASE) 76 | } 77 | return Guid(strings.ToUpper(s)) 78 | } 79 | 80 | func (ocean *OceanNode) getRandomTapestryNode() tapestry.Node { 81 | index := rand.Int() % TAPESTRY_NODES 82 | return ocean.tnodes[index].GetLocalNode() 83 | } 84 | 85 | // Puts the contents of the ID inside the given byte 86 | // Starting at 'start' position 87 | func IdIntoByte(bytes []byte, id *tapestry.ID, start int) { 88 | for i := 0; i < tapestry.DIGITS; i++ { 89 | bytes[start+i] = byte(id[i]) 90 | } 91 | } 92 | 93 | // Helper function used in 'ls' 94 | func makeString(elements [FILES_PER_INODE + 2]string) string { 95 | ret := "" 96 | for _, s := range elements { 97 | if s == "" { 98 | break 99 | } 100 | ret += "\t" + s 101 | } 102 | return ret 103 | } 104 | 105 | func AguidIntoByte(bytes []byte, aguid Aguid, start uint32) { 106 | for i := uint32(0); i < tapestry.DIGITS; i++ { 107 | bytes[start+i] = byte(aguid[i]) 108 | } 109 | } 110 | 111 | func MakeZeros(bytes []byte, start uint32) { 112 | for i := uint32(0); i < tapestry.DIGITS; i++ { 113 | bytes[start+i] = 0 114 | } 115 | } -------------------------------------------------------------------------------- /chord/cli.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "./chord" 5 | "bufio" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "math/big" 10 | "os" 11 | "strings" 12 | ) 13 | 14 | func NodeStr(node *chord.Node) string { 15 | var succ []byte 16 | var pred []byte 17 | if node.Successor != nil { 18 | succ = node.Successor.Id 19 | } 20 | if node.Predecessor != nil { 21 | pred = node.Predecessor.Id 22 | } 23 | 24 | return fmt.Sprintf("Node-%v: {succ:%v, pred:%v}", node.Id, succ, pred) 25 | } 26 | 27 | func main() { 28 | countPtr := flag.Int("count", 5, "Total number of Chord nodes to start up in this process") 29 | addrPtr := flag.String("addr", "", "Address of a node in the Chord ring you wish to join") 30 | idPtr := flag.String("id", "", "ID of a node in the Chord ring you wish to join") 31 | flag.Parse() 32 | 33 | var parent *chord.RemoteNode 34 | if *addrPtr == "" { 35 | parent = nil 36 | } else { 37 | parent = new(chord.RemoteNode) 38 | val := big.NewInt(0) 39 | val.SetString(*idPtr, 10) 40 | parent.Id = val.Bytes() 41 | parent.Addr = *addrPtr 42 | fmt.Printf("Attach this node to id:%v, addr:%v\n", parent.Id, parent.Addr) 43 | } 44 | 45 | var err error 46 | nodes := make([]*chord.Node, *countPtr) 47 | for i, _ := range nodes { 48 | nodes[i], err = chord.CreateNode(parent) 49 | if err != nil { 50 | fmt.Println("Unable to create new node!") 51 | log.Fatal(err) 52 | } 53 | if parent == nil { 54 | parent = nodes[i].RemoteSelf 55 | } 56 | fmt.Printf("Created -id %v -addr %v\n", chord.HashStr(nodes[i].Id), nodes[i].Addr) 57 | } 58 | 59 | for { 60 | fmt.Printf("quit|node|table|addr|data|get|put > ") 61 | reader := bufio.NewReader(os.Stdin) 62 | line, _ := reader.ReadString('\n') 63 | line = strings.TrimSpace(line) 64 | args := strings.SplitN(line, " ", 3) 65 | 66 | switch args[0] { 67 | case "node": 68 | for _, node := range nodes { 69 | fmt.Println(NodeStr(node)) 70 | } 71 | case "table": 72 | for _, node := range nodes { 73 | chord.PrintFingerTable(node) 74 | } 75 | case "addr": 76 | for _, node := range nodes { 77 | fmt.Println(node.Addr) 78 | } 79 | case "data": 80 | for _, node := range nodes { 81 | chord.PrintDataStore(node) 82 | } 83 | case "get": 84 | if len(args) > 1 { 85 | val, err := chord.Get(nodes[0], args[1]) 86 | if err != nil { 87 | fmt.Println(err) 88 | } else { 89 | fmt.Println(val) 90 | } 91 | } 92 | case "put": 93 | if len(args) > 2 { 94 | err := chord.Put(nodes[0], args[1], args[2]) 95 | if err != nil { 96 | fmt.Println(err) 97 | } 98 | } 99 | case "quit": 100 | fmt.Println("goodbye") 101 | for _, node := range nodes { 102 | chord.ShutdownNode(node) 103 | } 104 | return 105 | default: 106 | continue 107 | } 108 | } 109 | } -------------------------------------------------------------------------------- /chord/chord/util.go: -------------------------------------------------------------------------------- 1 | /* Purpose: Utility functions to help with dealing with ID hashes in Chord. */ 2 | 3 | package chord 4 | 5 | import ( 6 | "crypto/sha1" 7 | "math/big" 8 | "bytes" 9 | ) 10 | 11 | /* Hash a string to its appropriate size */ 12 | func HashKey(key string) []byte { 13 | h := sha1.New() 14 | h.Write([]byte(key)) 15 | v := h.Sum(nil) 16 | return v[:KEY_LENGTH/8] 17 | } 18 | 19 | /* Convert a []byte to a big.Int string, useful for debugging/logging */ 20 | func HashStr(keyHash []byte) string { 21 | keyInt := big.Int{} 22 | keyInt.SetBytes(keyHash) 23 | return keyInt.String() 24 | } 25 | 26 | //On the Chord ring, X is between (A : B) 27 | func Between(nodeX, nodeA, nodeB []byte) bool { 28 | 29 | xInt := big.Int{} 30 | xInt.SetBytes(nodeX) 31 | 32 | aInt := big.Int{} 33 | aInt.SetBytes(nodeA) 34 | 35 | bInt := big.Int{} 36 | bInt.SetBytes(nodeB) 37 | 38 | var result bool 39 | if aInt.Cmp(&bInt) == 0 { 40 | result = false 41 | } else if aInt.Cmp(&bInt) < 0 { 42 | result = (xInt.Cmp(&aInt) == 1 && xInt.Cmp(&bInt) == -1) 43 | } else { 44 | result = !(xInt.Cmp(&bInt) == 1 && xInt.Cmp(&aInt) == -1) 45 | } 46 | 47 | return result 48 | } 49 | 50 | func EqualIds(a, b []byte) bool { 51 | return bytes.Equal(a, b) 52 | } 53 | 54 | func CreateNNodes(n int) ([]*Node, error) { 55 | if n == 0 { 56 | return nil, nil 57 | } 58 | nodes := make([]*Node, n) 59 | 60 | id := []byte{byte(0)} 61 | curr, err := CreateDefinedNode(nil, id) 62 | nodes[0] = curr 63 | if err != nil { 64 | return nil, err 65 | } 66 | 67 | for i := 1; i < n; i++ { 68 | id := []byte{byte(i * 10)} 69 | curr, err := CreateDefinedNode(nodes[0].RemoteSelf, id) 70 | nodes[i] = curr 71 | if err != nil { 72 | return nil, err 73 | } 74 | } 75 | 76 | return nodes, nil 77 | } 78 | 79 | /* Is X between (A : B] */ 80 | func BetweenRightIncl(nodeX, nodeA, nodeB []byte) bool { 81 | 82 | xInt := big.Int{} 83 | xInt.SetBytes(nodeX) 84 | 85 | aInt := big.Int{} 86 | aInt.SetBytes(nodeA) 87 | 88 | bInt := big.Int{} 89 | bInt.SetBytes(nodeB) 90 | 91 | var result bool 92 | if aInt.Cmp(&bInt) == 0 { 93 | result = true 94 | } else if aInt.Cmp(&bInt) < 0 { 95 | result = (xInt.Cmp(&aInt) == 1 && xInt.Cmp(&bInt) <= 0) 96 | } else { 97 | result = !(xInt.Cmp(&bInt) == 1 && xInt.Cmp(&aInt) <= 0) 98 | } 99 | 100 | return result 101 | } 102 | 103 | func CreateNNodesRandom(n int) ([]*Node, error) { 104 | if n == 0 { 105 | return nil, nil 106 | } 107 | nodes := make([]*Node, n) 108 | 109 | curr, err := CreateNode(nil) 110 | nodes[0] = curr 111 | if err != nil { 112 | return nil, err 113 | } 114 | 115 | for i := 1; i < n; i++ { 116 | curr, err := CreateNode(nodes[0].RemoteSelf) 117 | nodes[i] = curr 118 | if err != nil { 119 | return nil, err 120 | } 121 | } 122 | 123 | return nodes, nil 124 | } -------------------------------------------------------------------------------- /raft/raft/raftLocalImpl.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | func (r *RaftNode) Join(req *JoinRequest) error { 8 | r.mutex.Lock() 9 | defer r.mutex.Unlock() 10 | 11 | if len(r.GetOtherNodes()) == r.conf.ClusterSize { 12 | for _, otherNode := range r.GetOtherNodes() { 13 | if otherNode.Id == req.FromAddr.Id { 14 | StartNodeRPC(otherNode, r.GetOtherNodes()) 15 | return nil 16 | } 17 | } 18 | r.Error("Warning! Unrecognized node tried to join after all other nodes have joined.\n") 19 | return fmt.Errorf("All nodes have already joined this Raft cluster\n") 20 | } else { 21 | r.AppendOtherNodes(req.FromAddr) 22 | } 23 | return nil 24 | } 25 | 26 | func (r *RaftNode) StartNode(req *StartNodeRequest) error { 27 | r.mutex.Lock() 28 | defer r.mutex.Unlock() 29 | 30 | r.SetOtherNodes(req.OtherNodes) 31 | r.printOtherNodes("StartNode") 32 | 33 | // Start the Raft finite-state-machine, initially in follower state 34 | go r.run() 35 | 36 | return nil 37 | } 38 | 39 | type RequestVoteMsg struct { 40 | request *RequestVoteRequest 41 | reply chan RequestVoteReply 42 | } 43 | 44 | func (r *RaftNode) RequestVote(req *RequestVoteRequest) (RequestVoteReply, error) { 45 | r.Out("RequestVote request received\n") 46 | reply := make(chan RequestVoteReply) 47 | r.requestVote <- RequestVoteMsg{req, reply} 48 | return <-reply, nil 49 | } 50 | 51 | type AppendEntriesMsg struct { 52 | request *AppendEntriesRequest 53 | reply chan AppendEntriesReply 54 | } 55 | 56 | func (r *RaftNode) AppendEntries(req *AppendEntriesRequest) (AppendEntriesReply, error) { 57 | r.Debug("AppendEntries request received\n") 58 | reply := make(chan AppendEntriesReply) 59 | r.appendEntries <- AppendEntriesMsg{req, reply} 60 | return <-reply, nil 61 | } 62 | 63 | type ClientRequestMsg struct { 64 | request *ClientRequest 65 | reply chan ClientReply 66 | } 67 | 68 | func (r *RaftNode) ClientRequest(req *ClientRequest) (ClientReply, error) { 69 | r.Debug("ClientRequest request received\n") 70 | reply := make(chan ClientReply) 71 | cr, exists := r.CheckRequestCache(*req) 72 | if exists { 73 | return *cr, nil 74 | } else { 75 | r.clientRequest <- ClientRequestMsg{req, reply} 76 | return <-reply, nil 77 | } 78 | } 79 | 80 | type RegisterClientMsg struct { 81 | request *RegisterClientRequest 82 | reply chan RegisterClientReply 83 | } 84 | 85 | func (r *RaftNode) RegisterClient(req *RegisterClientRequest) (RegisterClientReply, error) { 86 | r.Debug("ClientRequest request received\n") 87 | reply := make(chan RegisterClientReply) 88 | r.registerClient <- RegisterClientMsg{req, reply} 89 | return <-reply, nil 90 | } 91 | 92 | func (r *RaftNode) printOtherNodes(ctx string) { 93 | otherStr := fmt.Sprintf("%v (%v) r.OtherNodes = [", ctx, r.Id) 94 | for _, otherNode := range r.GetOtherNodes() { 95 | otherStr += fmt.Sprintf("%v,", otherNode.Id) 96 | } 97 | Out.Printf(otherStr[:len(otherStr)-1] + "]\n") 98 | } 99 | -------------------------------------------------------------------------------- /chord/README.md: -------------------------------------------------------------------------------- 1 | # Chord 2 | 3 | Chord is a distributed hash table (DHT) protocol. Its design is motivated from Chord: A Scalable Peer-to-peer Lookup Service for Internet Applications paper. Chord distributes objects over a dynamic network of nodes, and implements a protocol for finding these objects once they have been placed in the network. 4 | 5 | # Usage Example 6 | [cli](cli.go) serves as a console for interacting with chord, creating nodes and querying state on the local nodes. It provides the following commands: 7 | * node display node ID, successor, and predecessor 8 | * table display finger table information for node(s) 9 | * addr display node listener address(es) 10 | * data display datastore(s) for node(s) 11 | * get get value from Chord ring associated with this key 12 | * put put key/value into Chord ring 13 | * quit quit node(s) 14 | 15 | # Keys 16 | The hashed value of the key takes the form of an m-bit unsigned integer. Thus, the keyspace for the DHT resides between 0 and 2m - 1, inclusive. Current implementation uses SHA-1 for hashing. 17 | 18 | # The Ring 19 | Each node in the system also has a hash value (hash of its name - ip address + port). Chord orders the nodes in a circular fashion, in which each node’s successor is the node with the next highest hash. 20 | 21 | # Overlay Network (Finger Table) 22 | To locate the node at which a particular key-value pair is stored, we need to find the successor to the hash value of the key. Linear search would be very slow on large network of nodes so chord uses an overlay network. It maintains a finger table at each node. The number of entries in the finger table is equal to m, where m is the number of bits representing a hash in the keyspace of the DHT (e.g., 128). Entry i in the table, with 0 <= i < m, is the node which the owner of the table believes is the successor for the hash h + 2i (h is the current node’s hash). 23 | 24 | # Lookup in Chord 25 | When node A services a request to find the successor of the key k, it first determines whether its own successor is the owner of k (the successor is simply entry 0 in the finger table). If it is, then A returns its successor in response to the request. Otherwise, node A finds node B in its finger table such that B has the largest hash smaller than the hash of k, and forwards the request to B. 26 | 27 | # Dynamics 28 | Chord supports the dynamic addition and removal of nodes from the network. Each node calls [stabilize](chord/node_local_impl.go#L32) and [fixNextFinger](chord/finger.go#L34) functions periodically to determine the successor and predecessor relationship between nodes as they are added to the network. 29 | 30 | # Future Work 31 | Support fault tolerance by maintaining a list of successors. This would need the keys to be replicated across a number of nodes. 32 | -------------------------------------------------------------------------------- /raft/raft/logging.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | "os" 8 | ) 9 | 10 | var Debug *log.Logger 11 | var Out *log.Logger 12 | var Error *log.Logger 13 | 14 | // Initialize the loggers 15 | func init() { 16 | Debug = log.New(ioutil.Discard, "", log.Ltime|log.Lshortfile) 17 | Out = log.New(os.Stdout, "", log.Ltime|log.Lshortfile) 18 | Error = log.New(os.Stdout, "ERROR: ", log.Ltime|log.Lshortfile) 19 | } 20 | 21 | // Turn debug on or off 22 | func SetDebug(enabled bool) { 23 | if enabled { 24 | Debug = log.New(os.Stdout, "", log.Ldate|log.Ltime|log.Lshortfile) 25 | } else { 26 | Debug = log.New(ioutil.Discard, "", log.Ldate|log.Ltime|log.Lshortfile) 27 | } 28 | } 29 | 30 | func (r *RaftNode) Out(formatString string, args ...interface{}) { 31 | Out.Output(2, fmt.Sprintf("(%v/%v) %v", r.Id, r.State, fmt.Sprintf(formatString, args...))) 32 | } 33 | 34 | func (r *RaftNode) Debug(formatString string, args ...interface{}) { 35 | Debug.Output(2, fmt.Sprintf("(%v/%v) %v", r.Id, r.State, fmt.Sprintf(formatString, args...))) 36 | } 37 | 38 | func (r *RaftNode) Error(formatString string, args ...interface{}) { 39 | Error.Output(2, fmt.Sprintf("(%v/%v) %v", r.Id, r.State, fmt.Sprintf(formatString, args...))) 40 | } 41 | 42 | func (s NodeState) String() string { 43 | switch s { 44 | case FOLLOWER_STATE: 45 | return "follower" 46 | case CANDIDATE_STATE: 47 | return "candidate" 48 | case LEADER_STATE: 49 | return "leader" 50 | case JOIN_STATE: 51 | return "joining" 52 | default: 53 | return "unknown" 54 | } 55 | } 56 | 57 | func FsmCommandString(cmd FsmCommand) string { 58 | switch cmd { 59 | case HASH_CHAIN_ADD: 60 | return "hash-chain-add" 61 | case HASH_CHAIN_INIT: 62 | return "hash-chain-init" 63 | case CLIENT_REGISTRATION: 64 | return "client-registration" 65 | case INIT: 66 | return "init" 67 | case NOOP: 68 | return "noop" 69 | default: 70 | return "unknown" 71 | } 72 | } 73 | 74 | func (r *RaftNode) ShowState() { 75 | fmt.Printf("Current node state:\n") 76 | for i, otherNode := range r.GetOtherNodes() { 77 | fmt.Printf("%v - %v", i, otherNode) 78 | local := *r.GetLocalAddr() 79 | 80 | if local == otherNode { 81 | fmt.Printf(" (local node)") 82 | } 83 | if r.LeaderAddress != nil && 84 | otherNode == *r.LeaderAddress { 85 | fmt.Printf(" (leader node)") 86 | } 87 | fmt.Printf("\n") 88 | 89 | } 90 | fmt.Printf("Current term: %v\n", r.GetCurrentTerm()) 91 | fmt.Printf("Current state: %v\n", r.State) 92 | fmt.Printf("Current commit index: %v\n", r.commitIndex) 93 | fmt.Printf("Current next index: %v\n", r.nextIndex) 94 | fmt.Printf("Current match index: %v\n", r.matchIndex) 95 | fmt.Printf("Current fileMap: %v\n", r.fileMap) 96 | } 97 | 98 | func (r *RaftNode) PrintLogCache() { 99 | fmt.Printf("Node %v LogCache:\n", r.Id) 100 | for _, entry := range r.logCache { 101 | fmt.Printf(" idx:%v, term:%v\n", entry.Index, entry.TermId) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /oceanstore/oceanstore/oceanstore.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "../../raft/raft" 5 | "../../tapestry/tapestry" 6 | "math/rand" 7 | ) 8 | 9 | const TAPESTRY_NODES = 3 10 | const RAFT_NODES = 1 11 | 12 | type OceanAddr struct { 13 | Addr string 14 | } 15 | 16 | type Vguid string 17 | type Aguid string 18 | type Guid string 19 | 20 | type OceanNode struct { 21 | tnodes []*tapestry.Tapestry 22 | rnodes []*raft.RaftNode 23 | rootV uint32 24 | clientPaths map[uint64]string // client id -> curpath 25 | clients map[uint64]*raft.Client // client id -> client 26 | 27 | Local OceanAddr 28 | raftClient *raft.Client 29 | server *OceanRPCServer 30 | } 31 | 32 | func Start() (p *OceanNode, err error) { 33 | var ocean OceanNode 34 | p = &ocean 35 | ocean.tnodes = make([]*tapestry.Tapestry, TAPESTRY_NODES) 36 | ocean.rnodes = make([]*raft.RaftNode, RAFT_NODES) 37 | ocean.clientPaths = make(map[uint64]string) 38 | ocean.clients = make(map[uint64]*raft.Client) 39 | 40 | // Start runnning the tapestry nodes. -------------- 41 | t, err := tapestry.Start(0, "") 42 | if err != nil { 43 | panic(err) 44 | } 45 | 46 | ocean.tnodes[0] = t 47 | for i := 1; i < TAPESTRY_NODES; i++ { 48 | t, err = tapestry.Start(0, ocean.tnodes[0].GetLocalAddr()) 49 | if err != nil { 50 | panic(err) 51 | } 52 | ocean.tnodes[i] = t 53 | } 54 | 55 | ocean.rnodes, err = raft.CreateLocalCluster(raft.DefaultConfig()) 56 | if err != nil { 57 | panic(err) 58 | } 59 | 60 | // RPC server -------------------------------------- 61 | ocean.server = newOceanstoreRPCServer(p) 62 | ocean.Local = OceanAddr{ocean.server.listener.Addr().String()} 63 | // ------------------------------------------------- 64 | 65 | // Create ocean raft client. Persist until raft is settled 66 | client, err := CreateClient(ocean.Local) 67 | for err != nil { 68 | client, err = CreateClient(ocean.Local) 69 | } 70 | 71 | ocean.raftClient = ocean.clients[client.Id] 72 | if ocean.raftClient == nil { 73 | panic("Could not retrieve ocean raft client.") 74 | } 75 | 76 | // Create the root node ---------------------------- 77 | _, err = ocean.mkdir(&MkdirRequest{ocean.raftClient.Id, "/"}) 78 | if err != nil { 79 | panic("Could not create root node") 80 | } 81 | 82 | return 83 | } 84 | 85 | func (ocean *OceanNode) getCurrentDir(id uint64) string { 86 | curdir, ok := ocean.clientPaths[id] 87 | if !ok { 88 | panic("Did not found the current path of a client that is supposed to be registered") 89 | } 90 | return curdir 91 | } 92 | 93 | func randSeq(n int) string { 94 | var letters = []rune("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 95 | b := make([]rune, n) 96 | for i := range b { 97 | b[i] = letters[rand.Intn(len(letters))] 98 | } 99 | return string(b) 100 | } 101 | 102 | func (puddle *OceanNode) getRandomRaftNode() *raft.RaftNode { 103 | index := rand.Int() % RAFT_NODES 104 | return puddle.rnodes[index] 105 | } -------------------------------------------------------------------------------- /chord/chord/node_rpc_impl.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "errors" 7 | ) 8 | 9 | /* Validate that we're executing this RPC on the intended node */ 10 | func validateRpc(node *Node, reqId []byte) error { 11 | if !bytes.Equal(node.Id, reqId) { 12 | errStr := fmt.Sprintf("Node ids do not match %v, %v", node.Id, reqId) 13 | return errors.New(errStr) 14 | } 15 | return nil 16 | } 17 | 18 | func (node *Node) GetPredecessorId(req *RemoteId, reply *IdReply) error { 19 | if err := validateRpc(node, req.Id); err != nil { 20 | return err 21 | } 22 | // Predecessor may be nil, which is okay. 23 | if node.Predecessor == nil { 24 | reply.Id = nil 25 | reply.Addr = "" 26 | reply.Valid = false 27 | } else { 28 | reply.Id = node.Predecessor.Id 29 | reply.Addr = node.Predecessor.Addr 30 | reply.Valid = true 31 | } 32 | return nil 33 | } 34 | 35 | func (node *Node) SetPredecessorId(req *UpdateReq, reply *RpcOkay) error { 36 | if err := validateRpc(node, req.FromId); err != nil { 37 | return err 38 | } 39 | node.Predecessor.Id = req.UpdateId 40 | node.Predecessor.Addr = req.UpdateAddr 41 | reply.Ok = true 42 | return nil 43 | } 44 | 45 | func (node *Node) GetSuccessorId(req *RemoteId, reply *IdReply) error { 46 | if err := validateRpc(node, req.Id); err != nil { 47 | return err 48 | } 49 | 50 | reply.Id = node.Successor.Id 51 | reply.Addr = node.Successor.Addr 52 | reply.Valid = true 53 | return nil 54 | } 55 | 56 | func (node *Node) SetSuccessorId(req *UpdateReq, reply *RpcOkay) error { 57 | if err := validateRpc(node, req.FromId); err != nil { 58 | return err 59 | } 60 | node.Successor.Id = req.UpdateId 61 | node.Successor.Addr = req.UpdateAddr 62 | reply.Ok = true 63 | return nil 64 | } 65 | 66 | func (node *Node) FindSuccessor(query *RemoteQuery, reply *IdReply) error { 67 | if err := validateRpc(node, query.FromId); err != nil { 68 | return err 69 | } 70 | remNode, err := node.findSuccessor(query.Id) 71 | if err != nil { 72 | reply.Valid = false 73 | return err 74 | } 75 | reply.Id = remNode.Id 76 | reply.Addr = remNode.Addr 77 | reply.Valid = true 78 | return nil 79 | } 80 | 81 | func (node *Node) ClosestPrecedingFinger(query *RemoteQuery, reply *IdReply) error { 82 | if err := validateRpc(node, query.FromId); err != nil { 83 | return err 84 | } 85 | //remoteId and fromId 86 | for i := KEY_LENGTH - 1; i >= 0; i-- { 87 | if BetweenRightIncl(node.FingerTable[i].Node.Id, node.Id, query.Id) { 88 | reply.Id = node.FingerTable[i].Node.Id 89 | reply.Addr = node.FingerTable[i].Node.Addr 90 | reply.Valid = true 91 | return nil 92 | } 93 | } 94 | 95 | reply.Valid = false 96 | return errors.New("There is no closest preceding finger") 97 | } 98 | 99 | func (node *Node) Notify(req *NotifyReq, reply *RpcOkay) error { 100 | if err := validateRpc(node, req.NodeId); err != nil { 101 | reply.Ok = false 102 | return err 103 | } 104 | remote_node := new(RemoteNode) 105 | remote_node.Id = req.UpdateId 106 | remote_node.Addr = req.UpdateAddr 107 | node.notify(remote_node) 108 | reply.Ok = true 109 | return nil 110 | } 111 | -------------------------------------------------------------------------------- /raft/raft/machine.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "crypto/md5" 5 | "fmt" 6 | "strings" 7 | ) 8 | 9 | func (r *RaftNode) processLog(entry LogEntry) ClientReply { 10 | Out.Printf("%v\n", entry) 11 | status := OK 12 | response := "" 13 | switch entry.Command { 14 | case HASH_CHAIN_INIT: 15 | if r.hash == nil { 16 | r.hash = entry.Data 17 | response = fmt.Sprintf("%v", r.hash) 18 | } else { 19 | status = REQ_FAILED 20 | response = "The hash chain should only be initialized once!" 21 | } 22 | case HASH_CHAIN_ADD: 23 | if r.hash == nil { 24 | status = REQ_FAILED 25 | response = "The hash chain hasn't been initialized yet" 26 | } else { 27 | sum := md5.Sum(r.hash) 28 | fmt.Printf("hash is changing from %v to %v\n", r.hash, sum) 29 | r.hash = sum[:] 30 | response = fmt.Sprintf("%v", r.hash) 31 | } 32 | // For each of the following idk what to do with the hash chain 33 | //TODO: Do the byte[] and string casting for entry.Data 34 | case REMOVE: 35 | //So by now we have received consensus, we need to delete 36 | r.requestMutex.Lock() 37 | key := string(entry.Data) 38 | if entry.Data == nil { 39 | response = "FAIL:The key cannot be nil" 40 | } else if val, ok := r.fileMap[key]; ok { 41 | delete(r.fileMap, key) 42 | response = "SUCCESS:" + val 43 | } else { 44 | response = "FAIL:The key does not exist" 45 | } 46 | r.requestMutex.Unlock() 47 | case SET: 48 | r.requestMutex.Lock() 49 | if entry.Data == nil { 50 | response = "FAIL:The key cannot be nil" 51 | } else { 52 | keyVal := string(entry.Data) 53 | keyValAr := strings.Split(keyVal, ":") 54 | r.fileMap[keyValAr[0]] = keyValAr[1] 55 | response = "SUCCESS:" + keyValAr[1] 56 | } 57 | r.requestMutex.Unlock() 58 | 59 | case LOCK: 60 | r.lockMapMtx.Lock() 61 | key := string(entry.Data) 62 | if entry.Data == nil { 63 | response = "FAIL:The key cannot be nil" 64 | } else if _, ok := r.lockMap[key]; ok { 65 | //means its locked -- 66 | response = "FAIL:The key is locked is locked" 67 | } else { 68 | //means its unlocked, so we lock 69 | r.lockMap[key] = true 70 | response = "SUCCESS:Key " + key + "is now locked" 71 | } 72 | r.lockMapMtx.Unlock() 73 | 74 | case UNLOCK: 75 | r.lockMapMtx.Lock() 76 | key := string(entry.Data) 77 | if entry.Data == nil { 78 | response = "FAIL:The key cannot be nil" 79 | } else { 80 | //We dont care and we unlock its for the user not to unlock something of 81 | //someone else 82 | delete(r.lockMap, key) 83 | response = "SUCCESS:Key " + key + "is now unlocked" 84 | } 85 | r.lockMapMtx.Unlock() 86 | 87 | default: 88 | response = "Success!" 89 | } 90 | 91 | reply := ClientReply{ 92 | Status: status, 93 | Response: response, 94 | LeaderHint: *r.GetLocalAddr(), 95 | } 96 | 97 | if entry.CacheId != "" { 98 | r.AddRequest(entry.CacheId, reply) 99 | } 100 | 101 | r.requestMutex.Lock() 102 | msg, exists := r.requestMap[entry.Index] 103 | if exists { 104 | msg.reply <- reply 105 | delete(r.requestMap, entry.Index) 106 | } 107 | r.requestMutex.Unlock() 108 | 109 | return reply 110 | } 111 | -------------------------------------------------------------------------------- /tapestry/tapestry/tapestry-client.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import "fmt" 4 | 5 | // Invoke tapestry.Store on a remote tapestry node 6 | func TapestryStore(remote Node, key string, value []byte) error { 7 | fmt.Printf("Making remote TapestryStore call\n") 8 | return makeRemoteNodeCall(remote, "TapestryStore", StoreRequest{remote, key, value}, &StoreResponse{}) 9 | } 10 | 11 | // Invoke tapestry.Lookup on a remote tapestry node 12 | func TapestryLookup(remote Node, key string) (nodes []Node, err error) { 13 | fmt.Printf("Making remote TapestryLookup call\n") 14 | var rsp LookupResponse 15 | err = makeRemoteNodeCall(remote, "TapestryLookup", LookupRequest{remote, key}, &rsp) 16 | nodes = rsp.Nodes 17 | return 18 | } 19 | 20 | // Get data from a tapestry node. Looks up key then fetches directly 21 | func TapestryGet(remote Node, key string) ([]byte, error) { 22 | fmt.Printf("Making remote TapestryGet call\n") 23 | // Lookup the key 24 | replicas, err := TapestryLookup(remote, key) 25 | if err != nil { 26 | return nil, err 27 | } 28 | if len(replicas) == 0 { 29 | return nil, fmt.Errorf("No replicas returned for key %v", key) 30 | } 31 | 32 | // Contact replicas 33 | var errs []error 34 | for _, replica := range replicas { 35 | blob, err := FetchRemoteBlob(replica, key) 36 | if err != nil { 37 | errs = append(errs, err) 38 | } 39 | if blob != nil { 40 | return *blob, nil 41 | } 42 | } 43 | 44 | return nil, fmt.Errorf("Error contacting replicas, %v: %v", replicas, errs) 45 | } 46 | 47 | func TapestryRemove(remote Node, key string) (success bool, err error) { 48 | fmt.Printf("Making remote TapestryRemove call\n") 49 | var rsp RemoveResponse 50 | err = makeRemoteNodeCall(remote, "TapestryRemove", RemoveRequest{remote, key}, &rsp) 51 | success = rsp.Removed 52 | return 53 | } 54 | 55 | type StoreRequest struct { 56 | To Node 57 | Key string 58 | Value []byte 59 | } 60 | 61 | type StoreResponse struct { 62 | } 63 | 64 | type LookupRequest struct { 65 | To Node 66 | Key string 67 | } 68 | 69 | type LookupResponse struct { 70 | Nodes []Node 71 | } 72 | 73 | type RemoveRequest struct { 74 | To Node 75 | Key string 76 | } 77 | 78 | type RemoveResponse struct { 79 | Removed bool 80 | } 81 | 82 | // Server: extension method to open up Store via RPC 83 | func (server *TapestryRPCServer) TapestryStore(req StoreRequest, rsp *StoreResponse) (err error) { 84 | fmt.Printf("Received remote invocation of Tapestry.Store\n") 85 | return server.tapestry.Store(req.Key, req.Value) 86 | } 87 | 88 | // Server: extension method to open up Lookup via RPC 89 | func (server *TapestryRPCServer) TapestryLookup(req LookupRequest, rsp *LookupResponse) (err error) { 90 | fmt.Printf("Received remote invocation of Tapestry.Lookup\n") 91 | rsp.Nodes, err = server.tapestry.Lookup(req.Key) 92 | return 93 | } 94 | 95 | // Server: extension method to open up Remove via RPC 96 | func (server *TapestryRPCServer) TapestryRemove(req RemoveRequest, rsp *RemoveResponse) (err error) { 97 | fmt.Printf("Received remote invocation of Tapestry.Remove\n") 98 | rsp.Removed = server.tapestry.Remove(req.Key) 99 | return 100 | } -------------------------------------------------------------------------------- /raft/raft/testUtils.go: -------------------------------------------------------------------------------- 1 | package raft 2 | // 3 | //import ( 4 | // "fmt" 5 | // "time" 6 | // "math/rand" 7 | //) 8 | // 9 | ////TODO - move away the code to find majority element 10 | //func getLeader(nodes []*RaftNode) *RaftNode { 11 | // it := 1 12 | // var leader *RaftNode = nil 13 | // for leader == nil && it < 50 { 14 | // fmt.Printf("iteration %v\n", it) 15 | // time.Sleep(time.Millisecond * 200) 16 | // clusterSize := nodes[0].conf.ClusterSize 17 | // idCountMap := make(map[string]int, clusterSize) 18 | // for _, n := range nodes { 19 | // if n.LeaderAddress != nil { 20 | // idCountMap[n.LeaderAddress.Id]++ 21 | // } 22 | // } 23 | // fmt.Printf("node id to count map %v\n\n", idCountMap) 24 | // var id string 25 | // max := -1 26 | // for k,v := range idCountMap { 27 | // if max < v { 28 | // max = v 29 | // id = k 30 | // } 31 | // } 32 | // if max > clusterSize / 2 { 33 | // for _,node := range nodes { 34 | // if node.LeaderAddress.Id == id { 35 | // return node 36 | // } 37 | // } 38 | // } 39 | // it++ 40 | // } 41 | // return leader 42 | //} 43 | // 44 | //func randSeq(n int) string { 45 | // var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 46 | // b := make([]rune, n) 47 | // for i := range b { 48 | // b[i] = letters[rand.Intn(len(letters))] 49 | // } 50 | // return string(b) 51 | //} 52 | // 53 | //func checkNodes(nodes []*RaftNode, clusterSize int) bool { 54 | // for _, n := range nodes { 55 | // if len(n.GetOtherNodes()) != clusterSize { 56 | // Error.Println(len(n.GetOtherNodes()), clusterSize) 57 | // return false 58 | // } 59 | // } 60 | // return true 61 | //} 62 | // 63 | //func printNodes(nodes []*RaftNode) { 64 | // for _, n := range nodes { 65 | // n.PrintLogCache() 66 | // n.ShowState() 67 | // } 68 | //} 69 | // 70 | //func checkMajorityTerms(nodes []*RaftNode) bool { 71 | // sums := make(map[uint64]int, nodes[0].conf.ClusterSize) 72 | // for _, n := range nodes { 73 | // sums[n.GetCurrentTerm()]++ 74 | // } 75 | // max := -1 76 | // for _, v := range sums { 77 | // if v > max { 78 | // max = v 79 | // } 80 | // } 81 | // 82 | // if max > len(nodes)/2 { 83 | // return true 84 | // } 85 | // return false 86 | //} 87 | // 88 | //func checkMajorityCommitIndex(nodes []*RaftNode) bool { 89 | // sums := make(map[uint64]int, nodes[0].conf.ClusterSize) 90 | // for _, n := range nodes { 91 | // sums[n.commitIndex]++ 92 | // } 93 | // max := -1 94 | // for _, v := range sums { 95 | // if v > max { 96 | // max = v 97 | // } 98 | // } 99 | // 100 | // if max > len(nodes)/2 { 101 | // return true 102 | // } 103 | // return false 104 | //} 105 | // 106 | //func checkLogOrder(nodes []*RaftNode) bool { 107 | // for _, n := range nodes { 108 | // prevIndex := int64(-1) 109 | // prevTerm := int64(-1) 110 | // seen := make(map[uint64]bool) 111 | // for _, entry := range n.logCache { 112 | // if seen[entry.Index] || int64(entry.Index)-1 != prevIndex || int64(entry.Term) < prevTerm { 113 | // return false 114 | // } 115 | // 116 | // seen[entry.Index] = true 117 | // prevIndex = int64(entry.Index) 118 | // prevTerm = int64(entry.Term) 119 | // } 120 | // } 121 | // return true 122 | //} 123 | // 124 | //func shutdownNodes(nodes []*RaftNode) { 125 | // for _, n := range nodes { 126 | // n.IsShutDown = true 127 | // n.gracefulExit <- true 128 | // } 129 | // time.Sleep(time.Millisecond * 200) 130 | //} -------------------------------------------------------------------------------- /chord/chord/kv_store.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "log" 5 | "fmt" 6 | ) 7 | 8 | /* Get a value in the datastore, provided an abitrary node in the ring */ 9 | func Get(node *Node, key string) (string, error) { 10 | remNode, err := node.locate(key) 11 | if err != nil { 12 | log.Fatal(err) 13 | } 14 | return Get_RPC(remNode, key) 15 | } 16 | 17 | /* Put a key/value in the datastore, provided an abitrary node in the ring */ 18 | func Put(node *Node, key string, value string) error { 19 | remNode, err := node.locate(key) 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | return Put_RPC(remNode, key, value) 24 | } 25 | 26 | /* Internal helper method to find the appropriate node in the ring */ 27 | func (node *Node) locate(key string) (*RemoteNode, error) { 28 | id := HashKey(key) 29 | return node.findSuccessor(id) 30 | } 31 | 32 | /* Print the contents of a node's data store */ 33 | func PrintDataStore(node *Node) { 34 | fmt.Printf("Node-%v datastore: %v\n", HashStr(node.Id), node.dataStore) 35 | } 36 | 37 | func (node *Node) GetLocal(req *KeyValueReq, reply *KeyValueReply) error { 38 | if err := validateRpc(node, req.NodeId); err != nil { 39 | return err 40 | } 41 | (&node.dsLock).RLock() 42 | key := req.Key 43 | val := node.dataStore[key] 44 | reply.Key = key 45 | reply.Value = val 46 | (&node.dsLock).RUnlock() 47 | return nil 48 | } 49 | 50 | func (node *Node) PutLocal(req *KeyValueReq, reply *KeyValueReply) error { 51 | if err := validateRpc(node, req.NodeId); err != nil { 52 | return err 53 | } 54 | (&node.dsLock).Lock() 55 | key := req.Key 56 | val := req.Value 57 | node.dataStore[key] = val 58 | reply.Key = key 59 | reply.Value = val 60 | (&node.dsLock).Unlock() 61 | return nil 62 | } 63 | 64 | /* When we discover a new predecessor we may need to transfer some keys to it */ 65 | /*Oh I think I get it, this one is to send 66 | This was eliminated by the TAs because of its redundancy */ 67 | func (node *Node) obtainNewKeys() error { 68 | //lock the local db and get the keys 69 | (&node.dsLock).Lock() 70 | for key, val := range node.dataStore { 71 | keyByte := HashKey(key) 72 | if !BetweenRightIncl(keyByte, node.Predecessor.Id, node.Id) { 73 | //means we send it to the predecessor 74 | err := Put_RPC(node.Predecessor, key, val) 75 | if err != nil { 76 | (&node.dsLock).Unlock() 77 | return err 78 | } 79 | //then we delete it locally 80 | delete(node.dataStore, key) 81 | } 82 | } 83 | //unlock the db 84 | (&node.dsLock).Unlock() 85 | return nil 86 | } 87 | 88 | /* Find locally stored keys that are between (predId : fromId], any of 89 | these nodes should be moved to fromId */ 90 | func (node *Node) TransferKeys(req *TransferReq, reply *RpcOkay) error { 91 | if err := validateRpc(node, req.NodeId); err != nil { 92 | return err 93 | } 94 | (&node.dsLock).Lock() 95 | for key, val := range node.dataStore { 96 | keyByte := HashKey(key) 97 | pred := req.PredId 98 | if pred == nil { 99 | pred = node.Id 100 | } 101 | if BetweenRightIncl(keyByte, pred, req.FromId) { 102 | //means we send it to the requester, because it belongs to them 103 | err := Put_RPC(node.Predecessor, key, val) 104 | if err != nil { 105 | (&node.dsLock).Unlock() 106 | reply.Ok = false 107 | return err 108 | } 109 | //then we delete it locally 110 | delete(node.dataStore, key) 111 | } 112 | } 113 | (&node.dsLock).Unlock() 114 | reply.Ok = true 115 | return nil 116 | } -------------------------------------------------------------------------------- /tapestry/tapestry/backpointers.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | /* 8 | Backpointers are stored by level, like the routing table 9 | A backpointer at level n indicates that the backpointer shares a prefix of length n with this node 10 | Access to the backpointers is managed by a lock 11 | */ 12 | type Backpointers struct { 13 | local Node // the local tapestry node 14 | sets [DIGITS]*NodeSet // backpointers 15 | } 16 | 17 | /* 18 | Represents a set of nodes. The implementation is just a wrapped map, and access is controlled with a mutex. 19 | */ 20 | type NodeSet struct { 21 | mutex sync.Mutex 22 | data map[Node]bool 23 | } 24 | 25 | /* 26 | Creates and returns a new backpointer set 27 | */ 28 | func NewBackpointers(me Node) *Backpointers { 29 | b := new(Backpointers) 30 | b.local = me 31 | for i := 0; i < DIGITS; i++ { 32 | b.sets[i] = NewNodeSet() 33 | } 34 | return b 35 | } 36 | 37 | /* 38 | Add a backpointer for the provided node 39 | Returns true if a new backpointer was added 40 | */ 41 | func (b *Backpointers) Add(node Node) bool { 42 | if b.local != node { 43 | return b.level(node).Add(node) 44 | } 45 | return false 46 | } 47 | 48 | /* 49 | Remove a backpointer for the provided node, if it existed 50 | Returns true if the backpointer existed and was subsequently removed 51 | */ 52 | func (b *Backpointers) Remove(node Node) bool { 53 | if b.local != node { 54 | return b.level(node).Remove(node) 55 | } 56 | return false 57 | } 58 | 59 | /* 60 | Get all backpointers at the provided level 61 | */ 62 | func (b *Backpointers) Get(level int) []Node { 63 | return b.sets[level].Nodes() 64 | } 65 | 66 | // gets the node set for the level that the specified node should occupy 67 | func (b *Backpointers) level(node Node) *NodeSet { 68 | return b.sets[SharedPrefixLength(b.local.Id, node.Id)] 69 | } 70 | 71 | /* 72 | Create a new node set 73 | */ 74 | func NewNodeSet() *NodeSet { 75 | s := new(NodeSet) 76 | s.data = make(map[Node]bool) 77 | return s 78 | } 79 | 80 | /* 81 | Add the given node to the node set if it isn't already in the set 82 | Returns true if the node was added; false if it already existed 83 | */ 84 | func (s *NodeSet) Add(n Node) bool { 85 | s.mutex.Lock() 86 | _, exists := s.data[n] 87 | s.data[n] = true 88 | s.mutex.Unlock() 89 | return !exists 90 | } 91 | 92 | /* 93 | Add all of the nodes to the node set 94 | */ 95 | func (s *NodeSet) AddAll(nodes []Node) { 96 | s.mutex.Lock() 97 | for _, node := range nodes { 98 | s.data[node] = true 99 | } 100 | s.mutex.Unlock() 101 | } 102 | 103 | /* 104 | Remove the given node from the node set if it's currently in the set 105 | Returns true if the node was removed; false if it was not in the set 106 | */ 107 | func (s *NodeSet) Remove(n Node) bool { 108 | s.mutex.Lock() 109 | _, exists := s.data[n] 110 | delete(s.data, n) 111 | s.mutex.Unlock() 112 | return exists 113 | } 114 | 115 | /* 116 | Test whether the specified node is contained in the set 117 | */ 118 | func (s *NodeSet) Contains(n Node) (b bool) { 119 | s.mutex.Lock() 120 | b = s.data[n] 121 | s.mutex.Unlock() 122 | return 123 | } 124 | 125 | /* 126 | Returns the size of the set 127 | */ 128 | func (s *NodeSet) Size() int { 129 | s.mutex.Lock() 130 | size := len(s.data) 131 | s.mutex.Unlock() 132 | return size 133 | } 134 | 135 | /* 136 | Get all nodes in the set as a slice 137 | */ 138 | func (s *NodeSet) Nodes() []Node { 139 | s.mutex.Lock() 140 | nodes := make([]Node, 0, len(s.data)) 141 | for node := range s.data { 142 | nodes = append(nodes, node) 143 | } 144 | s.mutex.Unlock() 145 | return nodes 146 | } -------------------------------------------------------------------------------- /chord/chord/node_local_impl.go: -------------------------------------------------------------------------------- 1 | /* Purpose: Local Chord node functions to interact with the Chord ring. */ 2 | 3 | package chord 4 | 5 | import ( 6 | "fmt" 7 | "time" 8 | "log" 9 | "errors" 10 | ) 11 | 12 | // This node is trying to join an existing ring that a remote node is a part of (i.e., other) 13 | func (node *Node) join(other *RemoteNode) error { 14 | // Handle case of "other" being nil (first node on ring). 15 | if other == nil { 16 | return nil 17 | } 18 | 19 | node.Predecessor = nil 20 | succ, err := FindSuccessor_RPC(other, node.Id) 21 | if EqualIds(succ.Id, node.Id) { 22 | return errors.New("node already exists") 23 | } 24 | node.ftLock.Lock() 25 | node.Successor = succ 26 | node.FingerTable[0].Node = succ 27 | node.ftLock.Unlock() 28 | return err 29 | } 30 | 31 | // Thread 2: Psuedocode from figure 7 of chord paper 32 | func (node *Node) stabilize(ticker *time.Ticker) { 33 | for _ = range ticker.C { 34 | if node.IsShutdown { 35 | fmt.Printf("[%v-stabilize] Shutting down stabilize timer\n", HashStr(node.Id)) 36 | ticker.Stop() 37 | return 38 | } 39 | 40 | pred, err := GetPredecessorId_RPC(node.Successor) 41 | 42 | if err != nil { 43 | log.Fatal("GetPredecessorId_RPC error: " + err.Error()) 44 | } 45 | 46 | if pred != nil && BetweenRightIncl(pred.Id, node.Id, node.Successor.Id) { 47 | node.ftLock.Lock() 48 | node.Successor = pred 49 | node.FingerTable[0].Node = pred 50 | node.ftLock.Unlock() 51 | } 52 | 53 | // If you are your own successor, do not notify yourself. 54 | if !EqualIds(node.Successor.Id, node.Id) { 55 | err = Notify_RPC(node.Successor, node.RemoteSelf) 56 | if err != nil { 57 | log.Fatal("Notify_RPC error: " + err.Error()) 58 | } 59 | } 60 | } 61 | } 62 | 63 | // Psuedocode from figure 7 of chord paper 64 | func (node *Node) notify(remoteNode *RemoteNode) { 65 | 66 | //TODO implement this method 67 | } 68 | 69 | // Psuedocode from figure 4 of chord paper 70 | func (node *Node) findSuccessor(id []byte) (*RemoteNode, error) { 71 | // Check if id is between me and my immediate successor. 72 | // Check if I'm my own successor. 73 | // If so, return it. 74 | if BetweenRightIncl(id, node.Id, node.Successor.Id) || 75 | EqualIds(node.Successor.Id, node.Id) { 76 | 77 | return node.Successor, nil 78 | } 79 | 80 | n, err := node.findPredecessor(id) 81 | if err != nil { 82 | log.Fatal("findPredecessor error: " + err.Error()) 83 | } 84 | 85 | return FindSuccessor_RPC(n, id) 86 | 87 | } 88 | 89 | // Psuedocode from figure 4 of chord paper 90 | func (node *Node) findPredecessor(id []byte) (*RemoteNode, error) { 91 | curr := node.RemoteSelf 92 | succ, err := GetSuccessorId_RPC(curr) 93 | 94 | // Loop while id is not beteen the current node and the calculated successor. 95 | for !Between(id, curr.Id, succ.Id) && !EqualIds(curr.Id, succ.Id) { 96 | curr, err = ClosestPrecedingFinger_RPC(curr, id) 97 | if err != nil { 98 | log.Fatal("ClosestPrecedingFinger_RPC error: " + err.Error()) 99 | } 100 | 101 | succ, err = GetSuccessorId_RPC(curr) 102 | if err != nil { 103 | log.Fatal("GetSuccessorId_RPC error: " + err.Error()) 104 | } 105 | } 106 | return curr, err 107 | } 108 | 109 | /* Find the closest preceding finger from a remote node for an ID */ 110 | func ClosestPrecedingFinger_RPC(remoteNode *RemoteNode, id []byte) (*RemoteNode, error) { 111 | if remoteNode == nil { 112 | return nil, errors.New("RemoteNode is empty!") 113 | } 114 | var reply IdReply 115 | err := makeRemoteCall(remoteNode, "ClosestPrecedingFinger", RemoteQuery{remoteNode.Id, id}, &reply) 116 | 117 | rNode := new(RemoteNode) 118 | rNode.Id = reply.Id 119 | rNode.Addr = reply.Addr 120 | return rNode, err 121 | } -------------------------------------------------------------------------------- /tapestry/tapestry/objectstore.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | "fmt" 7 | ) 8 | 9 | /* 10 | Objects advertised to the tapestry get stored in the object store of the object's root node. 11 | An object can be advertised by multiple nodes 12 | Objects time out after some amount of time if the advertising node is not heard from 13 | */ 14 | type ObjectStore struct { 15 | mutex sync.Mutex // to manage concurrent access to the object store 16 | data map[string]map[Node]*time.Timer // multimap: stores multiple nodes per key, and each node has a timeout 17 | } 18 | 19 | /* 20 | Create a new objectstore 21 | */ 22 | func NewObjectStore() *ObjectStore { 23 | m := new(ObjectStore) 24 | m.data = make(map[string]map[Node]*time.Timer) 25 | return m 26 | } 27 | 28 | /* 29 | Get the nodes that are advertising a given key 30 | */ 31 | func (store *ObjectStore) Get(key string) (replicas []Node) { 32 | store.mutex.Lock() 33 | 34 | replicas = slice(store.data[key]) 35 | 36 | store.mutex.Unlock() 37 | 38 | return 39 | } 40 | 41 | /* 42 | Removes and returns all objects that should be transferred to the remote node 43 | */ 44 | func (store *ObjectStore) GetTransferRegistrations(local Node, remote Node) map[string][]Node { 45 | transfer := make(map[string][]Node) 46 | store.mutex.Lock() 47 | 48 | for key, values := range store.data { 49 | // Compare the first digit after the prefix 50 | if Hash(key).BetterChoice(remote.Id, local.Id) { 51 | transfer[key] = slice(values) 52 | } 53 | } 54 | 55 | for key, _ := range transfer { 56 | delete(store.data, key) 57 | } 58 | 59 | store.mutex.Unlock() 60 | return transfer 61 | } 62 | 63 | /* 64 | Registers the specified node as having advertised the key. Times out after the specified duration. 65 | */ 66 | func (store *ObjectStore) Register(key string, replica Node, timeout time.Duration) bool { 67 | store.mutex.Lock() 68 | 69 | // Get the value set for the object 70 | _, exists := store.data[key] 71 | if !exists { 72 | store.data[key] = make(map[Node]*time.Timer) 73 | } 74 | 75 | // Add the value to the value set 76 | timer, exists := store.data[key][replica] 77 | if !exists { 78 | store.data[key][replica] = store.newTimeout(key, replica, timeout) 79 | } else { 80 | timer.Reset(TIMEOUT) 81 | } 82 | 83 | store.mutex.Unlock() 84 | 85 | return !exists 86 | } 87 | 88 | /* 89 | Registers all of the provided nodes and keys. 90 | */ 91 | func (store *ObjectStore) RegisterAll(replicamap map[string][]Node, timeout time.Duration) { 92 | store.mutex.Lock() 93 | 94 | for key, replicas := range replicamap { 95 | _, exists := store.data[key] 96 | if !exists { 97 | store.data[key] = make(map[Node]*time.Timer) 98 | } 99 | for _, replica := range replicas { 100 | store.data[key][replica] = store.newTimeout(key, replica, timeout) 101 | } 102 | } 103 | 104 | store.mutex.Unlock() 105 | } 106 | 107 | /* 108 | Utility method. Creates an expiry timer for the (key, value) pair. 109 | */ 110 | func (store *ObjectStore) newTimeout(key string, replica Node, timeout time.Duration) *time.Timer { 111 | expire := func() { 112 | fmt.Printf("Expiring %v for node %v\n", key, replica) 113 | 114 | store.mutex.Lock() 115 | 116 | timer, exists := store.data[key][replica] 117 | if exists { 118 | timer.Stop() 119 | delete(store.data[key], replica) 120 | if len(store.data[key]) == 0 { 121 | delete(store.data, key) 122 | } 123 | } 124 | 125 | store.mutex.Unlock() 126 | } 127 | 128 | return time.AfterFunc(timeout, expire) 129 | } 130 | 131 | 132 | // Utility function to get the keys of a map 133 | func slice(valmap map[Node]*time.Timer) (values []Node) { 134 | for value, _ := range valmap { 135 | values = append(values, value) 136 | } 137 | return 138 | } 139 | -------------------------------------------------------------------------------- /raft/raft/client.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | const MAX_RETRIES = 5 8 | 9 | type Client struct { 10 | LocalAddr *NodeAddr 11 | Id uint64 12 | Leader NodeAddr 13 | SeqNum uint64 14 | } 15 | 16 | func CreateClient(remoteAddr NodeAddr) (cp *Client, err error) { 17 | cp = new(Client) 18 | 19 | request := RegisterClientRequest{} 20 | 21 | var reply *RegisterClientReply 22 | 23 | retries := 0 24 | 25 | LOOP: 26 | for retries < MAX_RETRIES { 27 | reply, err = RegisterClientRPC(&remoteAddr, request) 28 | if err != nil { 29 | return 30 | } 31 | switch reply.Status { 32 | case OK: 33 | Out.Printf("%v is the leader. Client successfully created.\n", remoteAddr) 34 | break LOOP 35 | case REQ_FAILED: 36 | Error.Printf("Request failed...\n") 37 | retries++ 38 | case NOT_LEADER: 39 | // The person we've contacted isn't the leader. Use 40 | // their hint to find the leader 41 | Out.Printf("%v is not the leader, but thinks that %v is\n", remoteAddr, reply.LeaderHint) 42 | remoteAddr = reply.LeaderHint 43 | case ELECTION_IN_PROGRESS: 44 | // An election is in progress. Accept the hint 45 | // and wait an appropriate amount of time, so the 46 | // election can finish. 47 | Out.Printf("%v is not the leader, but thinks that %v is\n", remoteAddr, reply.LeaderHint) 48 | remoteAddr = reply.LeaderHint 49 | time.Sleep(time.Millisecond * 200) 50 | default: 51 | } 52 | } 53 | 54 | // We've registered with the leader. 55 | cp.Id = reply.ClientId 56 | cp.Leader = remoteAddr 57 | 58 | return 59 | } 60 | 61 | func (c *Client) SendRequest(command FsmCommand, data []byte) (err error) { 62 | 63 | request := ClientRequest{ 64 | c.Id, 65 | c.SeqNum, 66 | command, 67 | data, 68 | } 69 | c.SeqNum += 1 70 | 71 | var reply *ClientReply 72 | 73 | retries := 0 74 | 75 | LOOP: 76 | for retries < MAX_RETRIES { 77 | reply, err = ClientRequestRPC(&c.Leader, request) 78 | if err != nil { 79 | return 80 | } 81 | switch reply.Status { 82 | case OK: 83 | Debug.Printf("%v is the leader\n", c.Leader) 84 | Out.Printf("Request returned \"%v\".\n", reply.Response) 85 | break LOOP 86 | case REQ_FAILED: 87 | Error.Printf("Request failed: %v\n", reply.Response) 88 | retries++ 89 | break LOOP 90 | case NOT_LEADER: 91 | // The person we've contacted isn't the leader. Use 92 | // their hint to find the leader 93 | c.Leader = reply.LeaderHint 94 | case ELECTION_IN_PROGRESS: 95 | // An election is in progress. Accept the hint 96 | // and wait an appropriate amount of time, so the 97 | // election can finish. 98 | c.Leader = reply.LeaderHint 99 | time.Sleep(time.Millisecond * 200) 100 | } 101 | } 102 | return 103 | } 104 | //Similar to the function above but it returns a response 105 | func (c *Client) SendRequestWithResponse(command FsmCommand, data []byte) (reply *ClientReply, err error) { 106 | request := ClientRequest{ 107 | c.Id, 108 | c.SeqNum, 109 | command, 110 | data, 111 | } 112 | c.SeqNum += 1 113 | 114 | //var reply *ClientReply 115 | 116 | retries := 0 117 | for retries < MAX_RETRIES { 118 | reply, err = ClientRequestRPC(&c.Leader, request) 119 | if err != nil { 120 | return nil, err 121 | } 122 | switch reply.Status { 123 | case OK: 124 | Debug.Printf("%v is the leader\n", c.Leader) 125 | Out.Printf("Request returned \"%v\".\n", reply.Response) 126 | return reply, nil 127 | case REQ_FAILED: 128 | Error.Printf("Request failed: %v\n", reply.Response) 129 | retries++ 130 | return reply, nil 131 | case NOT_LEADER: 132 | // The person we've contacted isn't the leader. Use 133 | // their hint to find the leader 134 | c.Leader = reply.LeaderHint 135 | case ELECTION_IN_PROGRESS: 136 | // An election is in progress. Accept the hint 137 | // and wait an appropriate amount of time, so the 138 | // election can finish. 139 | c.Leader = reply.LeaderHint 140 | time.Sleep(time.Millisecond * 200) 141 | } 142 | } 143 | return nil, nil 144 | } -------------------------------------------------------------------------------- /tapestry/tapestry/tapestry-local_test.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "testing" 5 | ) 6 | /*Helper function to make sure that a target node is the same 7 | as the expected node*/ 8 | func CheckFindRoot(node *TapestryNode, target ID, expected ID, 9 | t *testing.T) { 10 | result, _ := node.findRoot(node.node, target) 11 | if !equal_ids(result.Id, expected) { 12 | t.Errorf("%v: findRoot of %v is not %v (gives %v)", node.node.Id, 13 | target, expected, result.Id) 14 | } 15 | } 16 | 17 | /* This test checks that find root works from all nodes */ 18 | func TestFindRootAndLeave(t *testing.T) { 19 | if DIGITS != 4 { 20 | t.Errorf("Test wont work unless DIGITS is set to 4.") 21 | } 22 | 23 | port = 58000 24 | id := ID{5, 8, 3, 15} 25 | mainNode := makeTapestryNode(id, "", t) 26 | id = ID{7, 0, 0xd, 1} 27 | node1 := makeTapestryNode(id, mainNode.node.Address, t) 28 | id = ID{7, 0, 0xf, 5} 29 | node2 := makeTapestryNode(id, mainNode.node.Address, t) 30 | id = ID{7, 0, 0xf, 0xa} 31 | node3 := makeTapestryNode(id, mainNode.node.Address, t) 32 | 33 | // Checks all possible combinations between all nodes to find 34 | // a given route. 35 | id = ID{3, 0xf, 8, 0xa} 36 | CheckFindRoot(mainNode, id, mainNode.node.Id, t) 37 | CheckFindRoot(node1, id, mainNode.node.Id, t) 38 | CheckFindRoot(node2, id, mainNode.node.Id, t) 39 | CheckFindRoot(node3, id, mainNode.node.Id, t) 40 | id = ID{5, 2, 0, 0xc} 41 | CheckFindRoot(mainNode, id, mainNode.node.Id, t) 42 | CheckFindRoot(node1, id, mainNode.node.Id, t) 43 | CheckFindRoot(node2, id, mainNode.node.Id, t) 44 | CheckFindRoot(node3, id, mainNode.node.Id, t) 45 | id = ID{5, 8, 0xf, 0xf} 46 | CheckFindRoot(mainNode, id, mainNode.node.Id, t) 47 | CheckFindRoot(node1, id, mainNode.node.Id, t) 48 | CheckFindRoot(node2, id, mainNode.node.Id, t) 49 | CheckFindRoot(node3, id, mainNode.node.Id, t) 50 | id = ID{7, 0, 0xc, 3} 51 | CheckFindRoot(mainNode, id, node1.node.Id, t) 52 | CheckFindRoot(node1, id, node1.node.Id, t) 53 | CheckFindRoot(node2, id, node1.node.Id, t) 54 | CheckFindRoot(node3, id, node1.node.Id, t) 55 | id = ID{6, 0, 0xf, 4} 56 | CheckFindRoot(mainNode, id, node2.node.Id, t) 57 | CheckFindRoot(node1, id, node2.node.Id, t) 58 | CheckFindRoot(node2, id, node2.node.Id, t) 59 | CheckFindRoot(node3, id, node2.node.Id, t) 60 | id = ID{7, 0, 0xa, 2} 61 | CheckFindRoot(mainNode, id, node1.node.Id, t) 62 | CheckFindRoot(node1, id, node1.node.Id, t) 63 | CheckFindRoot(node2, id, node1.node.Id, t) 64 | CheckFindRoot(node3, id, node1.node.Id, t) 65 | id = ID{6, 3, 9, 5} 66 | CheckFindRoot(mainNode, id, node1.node.Id, t) 67 | CheckFindRoot(node1, id, node1.node.Id, t) 68 | CheckFindRoot(node2, id, node1.node.Id, t) 69 | CheckFindRoot(node3, id, node1.node.Id, t) 70 | id = ID{6, 8, 3, 0xf} 71 | CheckFindRoot(mainNode, id, node1.node.Id, t) 72 | CheckFindRoot(node1, id, node1.node.Id, t) 73 | CheckFindRoot(node2, id, node1.node.Id, t) 74 | CheckFindRoot(node3, id, node1.node.Id, t) 75 | id = ID{6, 3, 0xe, 5} 76 | CheckFindRoot(mainNode, id, node2.node.Id, t) 77 | CheckFindRoot(node1, id, node2.node.Id, t) 78 | CheckFindRoot(node2, id, node2.node.Id, t) 79 | CheckFindRoot(node3, id, node2.node.Id, t) 80 | id = ID{6, 3, 0xe, 9} 81 | CheckFindRoot(mainNode, id, node3.node.Id, t) 82 | CheckFindRoot(node1, id, node3.node.Id, t) 83 | CheckFindRoot(node2, id, node3.node.Id, t) 84 | CheckFindRoot(node3, id, node3.node.Id, t) 85 | id = ID{0xb, 0xe, 0xe, 0xf} 86 | CheckFindRoot(mainNode, id, mainNode.node.Id, t) 87 | CheckFindRoot(node1, id, mainNode.node.Id, t) 88 | CheckFindRoot(node2, id, mainNode.node.Id, t) 89 | CheckFindRoot(node3, id, mainNode.node.Id, t) 90 | 91 | // Check if after node leaves, tables get updated. 92 | mainNode.tapestry.Leave() 93 | 94 | id = ID{3, 0xf, 8, 0xa} 95 | CheckFindRoot(node1, id, node1.node.Id, t) 96 | CheckFindRoot(node2, id, node1.node.Id, t) 97 | CheckFindRoot(node3, id, node1.node.Id, t) 98 | id = ID{5, 2, 0, 0xc} 99 | CheckFindRoot(node1, id, node1.node.Id, t) 100 | CheckFindRoot(node2, id, node1.node.Id, t) 101 | CheckFindRoot(node3, id, node1.node.Id, t) 102 | id = ID{5, 8, 0xf, 0xf} 103 | CheckFindRoot(node1, id, node2.node.Id, t) 104 | CheckFindRoot(node2, id, node2.node.Id, t) 105 | CheckFindRoot(node3, id, node2.node.Id, t) 106 | 107 | node1.tapestry.Leave() 108 | node2.tapestry.Leave() 109 | node3.tapestry.Leave() 110 | } -------------------------------------------------------------------------------- /tapestry/tapestry/tapestry-remote.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "net/rpc" 5 | "fmt" 6 | ) 7 | 8 | /* 9 | The methods defined in this file parallel the methods defined in tapestry-local. 10 | These methods take an additional argument, the node on which the method should be invoked. 11 | Calling any of these methods will invoke the corresponding method on the specified remote node. 12 | */ 13 | 14 | // Remote API: ping an address to get tapestry node info 15 | func (tapestry *Tapestry) hello(address string) (rsp Node, err error) { 16 | err = makeRemoteCall(address, "TapestryRPCServer", "Hello", tapestry.local.node, &rsp) 17 | return 18 | } 19 | 20 | // Helper function to makes a remote call 21 | func makeRemoteNodeCall(remote Node, method string, req interface{}, rsp interface{}) error { 22 | fmt.Printf("%v(%v)\n", method, req) 23 | return makeRemoteCall(remote.Address, "TapestryRPCServer", method, req, rsp) 24 | } 25 | 26 | // Helper function to makes a remote call 27 | func makeRemoteCall(address string, structtype string, method string, req interface{}, rsp interface{}) error { 28 | // Dial the server 29 | client, err := rpc.Dial("tcp", address) 30 | if err != nil { 31 | return err 32 | } 33 | 34 | // Make the request 35 | fqm := fmt.Sprintf("%v.%v", structtype, method) 36 | err = client.Call(fqm, req, rsp) 37 | 38 | client.Close() 39 | if err != nil { 40 | return err 41 | } 42 | 43 | return nil 44 | } 45 | 46 | // Remote API: makes a remote call to the Register function 47 | func (tapestry *Tapestry) register(remote Node, replica Node, key string) (bool, error) { 48 | var rsp RegisterResponse 49 | err := makeRemoteNodeCall(remote, "Register", RegisterRequest{remote, replica, key}, &rsp) 50 | return rsp.IsRoot, err 51 | } 52 | 53 | // Remote API: makes a remote call to the GetNextHop function 54 | func (tapestry *Tapestry) getNextHop(remote Node, id ID) (bool, Node, error) { 55 | var rsp NextHopResponse 56 | err := makeRemoteNodeCall(remote, "GetNextHop", NextHopRequest{remote, id}, &rsp) 57 | return rsp.HasNext, rsp.Next, err 58 | } 59 | 60 | // Remote API: makes a remote call to the RemoveBadNodes function 61 | func (tapestry *Tapestry) removeBadNodes(remote Node, toremove []Node) error { 62 | return makeRemoteNodeCall(remote, "RemoveBadNodes", RemoveBadNodesRequest{remote, toremove}, &Node{}) 63 | } 64 | 65 | // Remote API: makes a remote call to the Fetch function 66 | func (tapestry *Tapestry) fetch(remote Node, key string) (bool, []Node, error) { 67 | var rsp FetchResponse 68 | err := makeRemoteNodeCall(remote, "Fetch", FetchRequest{remote, key}, &rsp) 69 | return rsp.IsRoot, rsp.Values, err 70 | } 71 | 72 | // Remote API: makes a remote call to the AddBackpointer function 73 | func (tapestry *Tapestry) addBackpointer(remote Node, toAdd Node) error { 74 | return makeRemoteNodeCall(remote, "AddBackpointer", NodeRequest{remote, toAdd}, &Node{}) 75 | } 76 | 77 | // Remote API: makes a remote call to the RemoveBackpointer function 78 | func (tapestry *Tapestry) removeBackpointer(remote Node, toRemove Node) error { 79 | return makeRemoteNodeCall(remote, "RemoveBackpointer", NodeRequest{remote, toRemove}, &Node{}) 80 | } 81 | 82 | // Remote API: makes a remote call to the GetBackpointers function 83 | func (tapestry *Tapestry) getBackpointers(remote Node, from Node, level int) (neighbours []Node, err error) { 84 | err = makeRemoteNodeCall(remote, "GetBackpointers", GetBackpointersRequest{remote, from, level}, &neighbours) 85 | return 86 | } 87 | 88 | // Remote API: makes a remote call to the AddNode function 89 | func (tapestry *Tapestry) addNode(remote Node, newnode Node) (neighbours []Node, err error) { 90 | err = makeRemoteNodeCall(remote, "AddNode", NodeRequest{remote, newnode}, &neighbours) 91 | return 92 | } 93 | 94 | // Remote API: makes a remote call to the AddNodeMulticast function 95 | func (tapestry *Tapestry) addNodeMulticast(remote Node, newnode Node, level int) (neighbours []Node, err error) { 96 | err = makeRemoteNodeCall(remote, "AddNodeMulticast", AddNodeMulticastRequest{remote, newnode, level}, &neighbours) 97 | return 98 | } 99 | 100 | func (tapestry *Tapestry) transfer(remote Node, from Node, data map[string][]Node) error { 101 | return makeRemoteNodeCall(remote, "Transfer", TransferRequest{remote, from, data}, &Node{}) 102 | } 103 | 104 | // Remote API: makes a remote call to the NotifyLeave function 105 | func (tapestry *Tapestry) notifyLeave(remote Node, from Node, replacement *Node) (err error) { 106 | return makeRemoteNodeCall(remote, "NotifyLeave", NotifyLeaveRequest{remote, from, replacement}, &Node{}) 107 | } 108 | -------------------------------------------------------------------------------- /chord/chord/chord.go: -------------------------------------------------------------------------------- 1 | /* Purpose: Chord struct and related functions to create new nodes, etc. */ 2 | /* */ 3 | 4 | package chord 5 | 6 | import ( 7 | "../utils" 8 | "fmt" 9 | "log" 10 | "net" 11 | "net/rpc" 12 | "sync" 13 | "time" 14 | ) 15 | 16 | // Number of bits (i.e. M value), assumes <= 128 and divisible by 8 17 | const KEY_LENGTH = 8 18 | 19 | /* Non-local node representation */ 20 | type RemoteNode struct { 21 | Id []byte 22 | Addr string 23 | } 24 | 25 | /* Local node representation */ 26 | type Node struct { 27 | Id []byte /* Unique Node ID */ 28 | Listener net.Listener /* Node listener socket */ 29 | Addr string /* String of listener address */ 30 | Successor *RemoteNode /* This Node's successor */ 31 | Predecessor *RemoteNode /* This Node's predecessor */ 32 | RemoteSelf *RemoteNode /* Remote node of our self */ 33 | IsShutdown bool /* Is node in process of shutting down? */ 34 | FingerTable []FingerEntry /* Finger table entries */ 35 | ftLock sync.RWMutex /* RWLock for finger table */ 36 | dataStore map[string]string /* Local datastore for this node */ 37 | dsLock sync.RWMutex /* RWLock for datastore */ 38 | next int 39 | } 40 | 41 | /* Creates a Chord node with a pre-defined ID (useful for testing) */ 42 | func CreateDefinedNode(parent *RemoteNode, definedId []byte) (*Node, error) { 43 | node := new(Node) 44 | err := node.init(parent, definedId) 45 | if err != nil { 46 | return nil, err 47 | } 48 | return node, err 49 | } 50 | 51 | /* Create Chord node with random ID based on listener address */ 52 | func CreateNode(parent *RemoteNode) (*Node, error) { 53 | node := new(Node) 54 | err := node.init(parent, nil) 55 | if err != nil { 56 | return nil, err 57 | } 58 | return node, err 59 | } 60 | 61 | /* Initailize a Chord node, start listener, rpc server, and go routines */ 62 | func (node *Node) init(parent *RemoteNode, definedId []byte) error { 63 | if KEY_LENGTH > 128 || KEY_LENGTH%8 != 0 { 64 | log.Fatal(fmt.Sprintf("KEY_LENGTH of %v is not supported! Must be <= 128 and divisible by 8", KEY_LENGTH)) 65 | } 66 | 67 | listener, _, err := utils.OpenListener() 68 | if err != nil { 69 | return err 70 | } 71 | 72 | node.Id = HashKey(listener.Addr().String()) 73 | if definedId != nil { 74 | node.Id = definedId 75 | } 76 | 77 | node.Listener = listener 78 | node.Addr = listener.Addr().String() 79 | node.IsShutdown = false 80 | node.dataStore = make(map[string]string) 81 | node.next = 1 82 | 83 | // Populate RemoteNode that points to self 84 | node.RemoteSelf = new(RemoteNode) 85 | node.RemoteSelf.Id = node.Id 86 | node.RemoteSelf.Addr = node.Addr 87 | 88 | // Populate finger table 89 | node.initFingerTable() 90 | 91 | // Join this node to the same chord ring as parent 92 | err = node.join(parent) 93 | if err != nil { 94 | return err 95 | } 96 | 97 | // Thread 1: start RPC server on this connection 98 | rpc.RegisterName(node.Addr, node) 99 | go node.startRpcServer() 100 | 101 | // Thread 2: kick off timer to stabilize periodically 102 | ticker1 := time.NewTicker(time.Millisecond * 100) //freq 103 | go node.stabilize(ticker1) 104 | 105 | // Thread 3: kick off timer to fix finger table periodically 106 | ticker2 := time.NewTicker(time.Millisecond * 90) //freq 107 | go node.fixNextFinger(ticker2) 108 | 109 | return err 110 | } 111 | 112 | /* Go routine to accept and process RPC requests */ 113 | func (node *Node) startRpcServer() { 114 | for { 115 | if node.IsShutdown { 116 | fmt.Printf("[%v] Shutting down RPC server\n", HashStr(node.Id)) 117 | return 118 | } 119 | if conn, err := node.Listener.Accept(); err != nil { 120 | log.Fatal("accept error: " + err.Error()) 121 | } else { 122 | go rpc.ServeConn(conn) 123 | } 124 | } 125 | } 126 | 127 | /* Shutdown a specified Chord node (gracefully) */ 128 | func ShutdownNode(node *Node) { 129 | node.IsShutdown = true 130 | // Wait for go routines to quit, should be enough time. 131 | time.Sleep(time.Millisecond * 2000) 132 | node.Listener.Close() 133 | 134 | //We first disconnect ourselves from our own successors and predecessors 135 | err := SetSuccessorId_RPC(node.Predecessor, node.Successor) 136 | if err != nil { 137 | log.Fatal(err) 138 | } 139 | err = SetPredecessorId_RPC(node.Successor, node.Predecessor) 140 | if err != nil { 141 | log.Fatal(err) 142 | } 143 | //We then transfer the keys to our successor 144 | (&node.dsLock).Lock() 145 | for key, val := range node.dataStore { 146 | err := Put_RPC(node.Successor, key, val) 147 | if err != nil { 148 | //TODO handle error, particularly decide what to do with the ones not transfered 149 | (&node.dsLock).Unlock() 150 | log.Fatal(err) 151 | } 152 | //then we delete it locally 153 | delete(node.dataStore, key) 154 | } 155 | (&node.dsLock).Unlock() 156 | } -------------------------------------------------------------------------------- /tapestry/tapestry/id.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "math/rand" 5 | "math/big" 6 | "bytes" 7 | "fmt" 8 | "crypto/sha1" 9 | "time" 10 | ) 11 | 12 | /* 13 | An ID is just a typedef'ed digit array 14 | */ 15 | type ID [DIGITS]Digit 16 | 17 | /* 18 | A digit is just a typedef'ed uint8 19 | */ 20 | type Digit uint8 21 | 22 | /* 23 | Generates a random ID 24 | */ 25 | func RandomID() ID { 26 | var id ID 27 | for i := range id { 28 | id[i] = Digit(random.Intn(BASE)) 29 | } 30 | return id 31 | } 32 | 33 | /* 34 | Returns the length of the prefix that is shared by the two IDs 35 | */ 36 | func SharedPrefixLength(a ID, b ID) (i int) { 37 | count := 0 38 | for i := 0; i < DIGITS && a[i] == b[i]; i++ { 39 | count++ 40 | } 41 | return count 42 | } 43 | 44 | /* 45 | Used by Tapestry's surrogate routing. Given IDs first and second, which is the better choice? 46 | The "better choice" is the ID that: 47 | - has the longest shared prefix with id 48 | - if both have prefix of length n, which id has a better (n+1)th digit? 49 | - if both have the same (n+1)th digit, consider (n+2)th digit, etc. 50 | Returns true if the first ID is the better choice. Returns false if second ID is closer or if first==second 51 | */ 52 | func (id ID) BetterChoice(first ID, second ID) bool { 53 | fPrefix := SharedPrefixLength(first, id) 54 | sPrefix := SharedPrefixLength(second, id) 55 | if fPrefix != sPrefix || (sPrefix == DIGITS && fPrefix == DIGITS) { 56 | //If they are not the same or if they are the same in all the numbers then we return 57 | return fPrefix > sPrefix 58 | } 59 | //So they are the same, but not as long as DIGITS we need to figure out which one is better 60 | index := sPrefix 61 | start := id[index] 62 | target := id[index] 63 | madeAChoice := false 64 | for !madeAChoice { 65 | //If it stays in -1 then the digit of the first is > the digit in ID 66 | fDigit := first[index] % BASE 67 | sDigit := second[index] % BASE 68 | fDistance := 0 69 | sDistance := 0 70 | for sDigit != target { 71 | sDistance++ 72 | target++ 73 | target = target % BASE 74 | } 75 | target = start 76 | for fDigit != target { 77 | fDistance++ 78 | target++ 79 | target = target % BASE 80 | } 81 | 82 | 83 | if fDistance == sDistance { 84 | if index == DIGITS-1 { 85 | return false 86 | } else { 87 | index++ 88 | target = id[index] 89 | start = id[index] 90 | } 91 | } else { 92 | //fmt.Printf("fDistance: %v, sDistance: %v, target: %v, fDigit: %v, sDigit: %v\n", fDistance, sDistance, target, fDigit, sDigit) 93 | return fDistance < sDistance 94 | } 95 | } 96 | return false 97 | } 98 | 99 | /* 100 | Used when inserting nodes into Tapestry's routing table. If the routing table has multiple candidate nodes for a slot, 101 | then it chooses the node that is closer to the local node. 102 | In a production Tapestry implementation, closeness is determined by looking at the round-trip-times (RTTs) between (a, id) and (b, id), 103 | and the node with the shorter RTT is closer. 104 | In my implementation, I have decided to define closeness as the absolute value of the difference between a and b. 105 | This is NOT the same as the implementation of BetterChoice. 106 | Returns true if a is closer than b. Returns false if b is closer than a, or if a == b. 107 | */ 108 | func (id ID) Closer(first ID, second ID) bool { 109 | 110 | firstNum := first.big() 111 | secondNum := second.big() 112 | idNum := id.big() 113 | 114 | difF := big.NewInt(0) 115 | difS := big.NewInt(0) 116 | 117 | difF.Sub(firstNum, idNum) 118 | difS.Sub(secondNum, idNum) 119 | difF.Abs(difF) 120 | difS.Abs(difS) 121 | 122 | if difF.Cmp(difS) == -1 { 123 | return true 124 | } else { 125 | return false 126 | } 127 | } 128 | 129 | /* 130 | Helper function: convert an ID to a big int. 131 | */ 132 | func (id ID) big() (b *big.Int) { 133 | b = big.NewInt(0) 134 | base := big.NewInt(BASE) 135 | for _, digit := range id { 136 | b.Mul(b, base) 137 | b.Add(b, big.NewInt(int64(digit))) 138 | } 139 | return b 140 | } 141 | 142 | /* 143 | String representation of an ID is hexstring of each digit 144 | */ 145 | func (id ID) String() string { 146 | var buf bytes.Buffer 147 | for _, d := range id { 148 | buf.WriteString(d.String()) 149 | } 150 | return buf.String() 151 | } 152 | 153 | /* 154 | Hashes the string to an ID 155 | */ 156 | func Hash(key string) (id ID) { 157 | // Sha-hash the key 158 | sha := sha1.New() 159 | sha.Write([]byte(key)) 160 | hash := sha.Sum([]byte{}) 161 | 162 | // Store in an ID 163 | for i := range id { 164 | id[i] = Digit(hash[(i/2)%len(hash)]) 165 | if i%2 == 0 { 166 | id[i] >>= 4 167 | } 168 | id[i] %= BASE 169 | } 170 | 171 | return id 172 | } 173 | 174 | /* 175 | String representation of a digit is its hex value 176 | */ 177 | func (digit Digit) String() string { 178 | return fmt.Sprintf("%X", byte(digit)) 179 | } 180 | 181 | // generate random node ID 182 | var random = rand.New(rand.NewSource(time.Now().UTC().UnixNano())) -------------------------------------------------------------------------------- /oceanstore/oceanstore/inode_test.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | ) 7 | 8 | func TestGobEncoding(t *testing.T) { 9 | inode := new(Inode) 10 | inode.name = "Test inode" 11 | inode.filetype = 1 12 | inode.size = 666 13 | inode.indirect = "F666" 14 | 15 | bytes, err := inode.GobEncode() 16 | if err != nil { 17 | t.Errorf("Gob encode didn't work.") 18 | } 19 | 20 | sameInode := new(Inode) 21 | sameInode.GobDecode(bytes) 22 | 23 | if inode.name != sameInode.name { 24 | t.Errorf("Name not the same\n\t%v != %v.", inode.name, sameInode.name) 25 | } 26 | 27 | if inode.filetype != sameInode.filetype { 28 | t.Errorf("Name not the same\n\t%v != %v.", inode.filetype, sameInode.filetype) 29 | } 30 | 31 | if inode.size != sameInode.size { 32 | t.Errorf("Name not the same\n\t%v != %v.", inode.size, sameInode.size) 33 | } 34 | 35 | if inode.indirect != sameInode.indirect { 36 | t.Errorf("Name not the same\n\t%v != %v.", inode.indirect, sameInode.indirect) 37 | } 38 | } 39 | 40 | func TestInodeStorage(t *testing.T) { 41 | ocean, err := Start() 42 | if err != nil { 43 | return 44 | t.Errorf("Could not init oceanstore: %v", err) 45 | } 46 | time.Sleep(time.Millisecond * 500) 47 | 48 | client := ocean.raftClient 49 | 50 | inode := new(Inode) 51 | inode.name = "Test inode" 52 | inode.filetype = 1 53 | inode.size = 666 54 | inode.indirect = "F666" 55 | 56 | inode2 := new(Inode) 57 | inode2.name = "Test inode2" 58 | inode2.filetype = 0 59 | inode2.size = 66 60 | inode2.indirect = "BEEF" 61 | 62 | 63 | err = ocean.storeInode("/path/one", inode, client.Id) 64 | if err != nil { 65 | t.Errorf("Error storing Inode: %v", err) 66 | return 67 | } 68 | err = ocean.storeInode("/second/path", inode2, client.Id) 69 | if err != nil { 70 | t.Errorf("Error storing Inode2: %v", err) 71 | return 72 | } 73 | 74 | sameInode, err := ocean.getInode("/path/one", client.Id) 75 | if err != nil { 76 | t.Errorf("Error geting Inode: %v", err) 77 | return 78 | } 79 | sameInode2, err := ocean.getInode("/second/path", client.Id) 80 | if err != nil { 81 | t.Errorf("Error geting Inode2: %v", err) 82 | return 83 | } 84 | 85 | if inode.name != sameInode.name { 86 | t.Errorf("Name not the same\n\t%v != %v.", inode.name, sameInode.name) 87 | } 88 | if inode.filetype != sameInode.filetype { 89 | t.Errorf("Name not the same\n\t%v != %v.", inode.filetype, sameInode.filetype) 90 | } 91 | if inode.size != sameInode.size { 92 | t.Errorf("Name not the same\n\t%v != %v.", inode.size, sameInode.size) 93 | } 94 | if inode.indirect != sameInode.indirect { 95 | t.Errorf("Name not the same\n\t%v != %v.", inode.indirect, sameInode.indirect) 96 | } 97 | 98 | if inode2.name != sameInode2.name { 99 | t.Errorf("Name not the same\n\t%v != %v.", inode2.name, sameInode2.name) 100 | } 101 | if inode2.filetype != sameInode2.filetype { 102 | t.Errorf("Name not the same\n\t%v != %v.", inode2.filetype, sameInode2.filetype) 103 | } 104 | if inode2.size != sameInode2.size { 105 | t.Errorf("Name not the same\n\t%v != %v.", inode2.size, sameInode2.size) 106 | } 107 | if inode2.indirect != sameInode2.indirect { 108 | t.Errorf("Name not the same\n\t%v != %v.", inode2.indirect, sameInode2.indirect) 109 | } 110 | } 111 | 112 | func TestInodeReplacement(t *testing.T) { 113 | puddle, err := Start() 114 | if err != nil { 115 | return 116 | t.Errorf("Could not init puddlestore: %v", err) 117 | } 118 | time.Sleep(time.Millisecond * 500) 119 | client := puddle.raftClient 120 | 121 | inode := new(Inode) 122 | inode.name = "Test inode" 123 | inode.filetype = 1 124 | inode.size = 666 125 | inode.indirect = "F666" 126 | 127 | err = puddle.storeInode("/path/one", inode, client.Id) 128 | if err != nil { 129 | t.Errorf("Error storing Inode: %v", err) 130 | return 131 | } 132 | 133 | /* 134 | err = puddle.removeKey("/path/one") 135 | if err != nil { 136 | t.Errorf("Error removing key \"/path/one\": %v", err) 137 | return 138 | }*/ 139 | 140 | inode2 := new(Inode) 141 | inode2.name = "Imma replace u beaaach" 142 | inode2.filetype = 1 143 | inode2.size = 50 144 | inode2.indirect = "DEAD" 145 | 146 | err = puddle.storeInode("/path/one", inode2, client.Id) 147 | if err != nil { 148 | t.Errorf("Error storing Inode: %v", err) 149 | return 150 | } 151 | 152 | sameInode2, err := puddle.getInode("/path/one", client.Id) 153 | if err != nil { 154 | t.Errorf("Error geting Inode: %v", err) 155 | return 156 | } 157 | if sameInode2 == nil { 158 | t.Errorf("Something went wrong man") 159 | return 160 | } 161 | 162 | if inode2.name != sameInode2.name { 163 | t.Errorf("Name not the same\n\t%v != %v.", inode2.name, sameInode2.name) 164 | } 165 | if inode2.filetype != sameInode2.filetype { 166 | t.Errorf("Name not the same\n\t%v != %v.", inode2.filetype, sameInode2.filetype) 167 | } 168 | if inode2.size != sameInode2.size { 169 | t.Errorf("Name not the same\n\t%v != %v.", inode2.size, sameInode2.size) 170 | } 171 | if inode2.indirect != sameInode2.indirect { 172 | t.Errorf("Name not the same\n\t%v != %v.", inode2.indirect, sameInode2.indirect) 173 | } 174 | } -------------------------------------------------------------------------------- /tapestry/cli.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "./tapestry" 5 | "bufio" 6 | "fmt" 7 | "os" 8 | "strings" 9 | "flag" 10 | ) 11 | 12 | func printHelp() { 13 | fmt.Println("Commands:") 14 | fmt.Println(" - help Prints this help message") 15 | fmt.Println(" - table Prints this node's routing table") 16 | fmt.Println(" - backpointers Prints this node's backpointers") 17 | fmt.Println(" - objects Prints the advertised objects that are registered to this node") 18 | fmt.Println(" - put Stores the provided key-value pair on the local node and advertises the key to the tapestry") 19 | fmt.Println(" - lookup Looks up the specified key in the tapestry and prints its location") 20 | fmt.Println(" - get Looks up the specified key in the tapestry, then fetches the value from one of the replicas") 21 | fmt.Println(" - remove Remove the specified key from the tapestry") 22 | fmt.Println(" - list List the blobs being stored and advertised by the local node") 23 | fmt.Println(" - leave Instructs the local node to gracefully leave the tapestry") 24 | fmt.Println(" - kill Leaves the tapestry without graceful exit") 25 | fmt.Println(" - exit Quit this CLI") 26 | } 27 | 28 | func CLI(t *tapestry.Tapestry, done chan bool) { 29 | 30 | printHelp() 31 | for { 32 | reader := bufio.NewReader(os.Stdin) 33 | fmt.Print("> ") 34 | text, _ := reader.ReadString('\n') 35 | text = strings.TrimSpace(text) 36 | splits := strings.Split(text, " ") 37 | command := strings.ToLower(splits[0]) 38 | switch command { 39 | case "quit", "exit": 40 | { 41 | done <- true 42 | return 43 | } 44 | case "table": 45 | { 46 | t.PrintRoutingTable() 47 | } 48 | case "backpointers": 49 | { 50 | t.PrintBackpointers() 51 | } 52 | case "replicas", "data", "objects": 53 | { 54 | t.PrintObjectStore() 55 | } 56 | case "leave": 57 | { 58 | t.Leave() 59 | } 60 | case "put", "add", "store": 61 | { 62 | if len(splits) < 3 { 63 | fmt.Printf("Insufficient arguments for %s, expect %s \n", command, command) 64 | } else { 65 | key := splits[1] 66 | bytes := []byte(splits[2]) 67 | err := t.Store(key, bytes) 68 | if err != nil { 69 | fmt.Println(err) 70 | } 71 | } 72 | } 73 | case "list", "listblobs": 74 | { 75 | t.PrintBlobStore() 76 | } 77 | case "lookup", "find": 78 | { 79 | if len(splits) < 2 { 80 | fmt.Printf("Insufficient arguments for %s, expect %s \n", command, command) 81 | } else { 82 | key := splits[1] 83 | replicas, err := t.Lookup(key) 84 | if err != nil { 85 | fmt.Println(err) 86 | } else { 87 | fmt.Printf("%v: %v\n", key, replicas) 88 | } 89 | } 90 | } 91 | case "get": 92 | { 93 | if len(splits) < 2 { 94 | fmt.Printf("Insufficient arguments for %s, expect %s \n", command, command) 95 | } else { 96 | key := splits[1] 97 | bytes, err := t.Get(key) 98 | if err != nil { 99 | fmt.Println(err) 100 | } else { 101 | fmt.Printf("%v: %v\n", key, string(bytes)) 102 | } 103 | } 104 | } 105 | case "remove": 106 | { 107 | if len(splits) < 2 { 108 | fmt.Printf("Insufficient arguments for %s, expect %s \n", command, command) 109 | } else { 110 | key := splits[1] 111 | exists := t.Remove(key) 112 | if !exists { 113 | fmt.Printf("This node is not advertising %v\n", key) 114 | } 115 | } 116 | } 117 | case "help", "commands": 118 | { 119 | printHelp() 120 | } 121 | case "kill": 122 | { 123 | t.Kill() 124 | } 125 | default: 126 | { 127 | fmt.Printf("Unknown command %s\n", text) 128 | } 129 | } 130 | } 131 | } 132 | 133 | func main() { 134 | var port int 135 | var addr string 136 | 137 | flag.IntVar(&port, "port", 0, "The server port to bind to. Defaults to a random port.") 138 | flag.StringVar(&addr, "connect", "", "An existing node to connect to. If left blank, does not attempt to connect to another node.") 139 | flag.Parse() 140 | 141 | switch { 142 | case port != 0 && addr != "": 143 | { 144 | fmt.Printf("Starting a node on port %v and connecting to %v\n", port, addr) 145 | } 146 | case port != 0: 147 | { 148 | fmt.Printf("Starting a standalone node on port %v\n", port) 149 | } 150 | case addr != "": 151 | { 152 | fmt.Printf("Starting a node on a random port and connecting to %v\n", addr) 153 | } 154 | default: 155 | { 156 | fmt.Printf("Starting a standalone node on a random port\n") 157 | } 158 | } 159 | 160 | t, err := tapestry.Start(port, addr) 161 | 162 | if err != nil { 163 | fmt.Printf("Error starting tapestry node: %v\n", err) 164 | return 165 | } 166 | 167 | fmt.Printf("Successfully started: %v\n", t) 168 | 169 | // Kick off CLI, await exit 170 | done := make(chan bool) 171 | go CLI(t, done) 172 | 173 | for !(<-done) { 174 | } 175 | 176 | fmt.Println("Closing tapestry") 177 | t.Leave() 178 | } -------------------------------------------------------------------------------- /raft/raft/raft_test.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestLeaderElection(t *testing.T) { 11 | config := DefaultConfig() 12 | config.ClusterSize = 5 13 | config.LogPath = randSeq(10) 14 | 15 | nodes, err := CreateLocalCluster(config) 16 | if err != nil { 17 | t.Errorf("Could not create nodes") 18 | return 19 | } 20 | time.Sleep(time.Millisecond * 500) 21 | if !checkNodes(nodes, config.ClusterSize) { 22 | t.Errorf("CreateLocalCluster FAILED") 23 | return 24 | } 25 | 26 | fmt.Printf("Before loop\n") 27 | leader := getLeader(nodes) 28 | fmt.Printf("after loop\n") 29 | if leader == nil { 30 | t.Errorf("Not found the leader") 31 | fmt.Printf("# nodes: %v\n", len(nodes)) 32 | printNodes(nodes) 33 | return 34 | } 35 | 36 | time.Sleep(time.Millisecond * 500) 37 | if !checkMajorityTerms(nodes) { 38 | t.Errorf("Nodes are not on the same term (%v)", leader.GetCurrentTerm()) 39 | } 40 | if !checkMajorityCommitIndex(nodes) { 41 | t.Errorf("Nodes dont have the same commit index (%v)", leader.commitIndex) 42 | } 43 | if !checkLogOrder(nodes) { 44 | t.Errorf("Nodes logs are not in an ok order") 45 | printNodes(nodes) 46 | } 47 | 48 | fmt.Printf("The disabled node is: %v\n", leader.Id) 49 | leader.Testing.PauseWorld(true) 50 | disableLeader := leader 51 | time.Sleep(time.Millisecond * 100) 52 | leader = getLeader(nodes) 53 | if leader == nil { 54 | t.Errorf("Leader is not the same %v is not located in node", leader.Id) 55 | return 56 | } 57 | 58 | fmt.Printf("We now enable %v\n", disableLeader.Id) 59 | disableLeader.Testing.PauseWorld(false) 60 | time.Sleep(time.Millisecond * 100) 61 | leader = getLeader(nodes) 62 | if leader == nil { 63 | t.Errorf("Leader is not the same %v is not located in node", leader.Id) 64 | return 65 | } 66 | time.Sleep(time.Millisecond * 500) 67 | if !checkMajorityTerms(nodes) { 68 | t.Errorf("Nodes are not on the same term (%v)", leader.GetCurrentTerm()) 69 | } 70 | if !checkMajorityCommitIndex(nodes) { 71 | t.Errorf("Nodes dont have the same commit index (%v)", leader.commitIndex) 72 | } 73 | if !checkLogOrder(nodes) { 74 | t.Errorf("Nodes logs are not in an ok order") 75 | printNodes(nodes) 76 | } 77 | 78 | fmt.Println("TestLeaderElection pass") 79 | shutdownNodes(nodes) 80 | } 81 | 82 | func checkLogOrder(nodes []*RaftNode) bool { 83 | for _, n := range nodes { 84 | prevIndex := int64(-1) 85 | prevTerm := int64(-1) 86 | seen := make(map[uint64]bool) 87 | for _, entry := range n.logCache { 88 | if seen[entry.Index] || int64(entry.Index)-1 != prevIndex || int64(entry.TermId) < prevTerm { 89 | return false 90 | } 91 | 92 | seen[entry.Index] = true 93 | prevIndex = int64(entry.Index) 94 | prevTerm = int64(entry.TermId) 95 | } 96 | } 97 | return true 98 | } 99 | 100 | // Loops until it finds a majority leader in nodes. 101 | func getLeader(nodes []*RaftNode) *RaftNode { 102 | //Check all and make sure that leader matches 103 | var leader *RaftNode 104 | leader = nil 105 | it := 1 106 | for leader == nil && it < 50 { 107 | fmt.Printf("%v\n", it) 108 | time.Sleep(time.Millisecond * 200) 109 | sums := make(map[string]int, nodes[0].conf.ClusterSize) 110 | for _, n := range nodes { 111 | if n.LeaderAddress != nil { 112 | sums[n.LeaderAddress.Id]++ 113 | } 114 | } 115 | fmt.Printf("mapa %v\n\n\n", sums) 116 | var maxNode string 117 | max := -1 118 | for k, v := range sums { 119 | if v > max { 120 | maxNode = k 121 | max = v 122 | } 123 | } 124 | 125 | if max > len(nodes)/2 { 126 | for _, n := range nodes { 127 | if maxNode == n.Id { 128 | leader = n 129 | } 130 | } 131 | } 132 | it++ 133 | } 134 | 135 | if it >= 50 { 136 | return nil 137 | } 138 | return leader 139 | } 140 | 141 | func checkMajorityTerms(nodes []*RaftNode) bool { 142 | sums := make(map[uint64]int, nodes[0].conf.ClusterSize) 143 | for _, n := range nodes { 144 | sums[n.GetCurrentTerm()]++ 145 | } 146 | max := -1 147 | for _, v := range sums { 148 | if v > max { 149 | max = v 150 | } 151 | } 152 | 153 | if max > len(nodes)/2 { 154 | return true 155 | } 156 | return false 157 | } 158 | 159 | func checkMajorityCommitIndex(nodes []*RaftNode) bool { 160 | sums := make(map[uint64]int, nodes[0].conf.ClusterSize) 161 | for _, n := range nodes { 162 | sums[n.commitIndex]++ 163 | } 164 | max := -1 165 | for _, v := range sums { 166 | if v > max { 167 | max = v 168 | } 169 | } 170 | 171 | if max > len(nodes)/2 { 172 | return true 173 | } 174 | return false 175 | } 176 | 177 | func checkNodes(nodes []*RaftNode, clusterSize int) bool { 178 | for _, n := range nodes { 179 | if len(n.GetOtherNodes()) != clusterSize { 180 | return false 181 | } 182 | } 183 | return true 184 | } 185 | 186 | func printNodes(nodes []*RaftNode) { 187 | for _, n := range nodes { 188 | n.PrintLogCache() 189 | n.ShowState() 190 | } 191 | } 192 | 193 | func removeLogs(nodes []*RaftNode) { 194 | for _, n := range nodes { 195 | n.RemoveLogs() 196 | } 197 | } 198 | 199 | func shutdownNodes(nodes []*RaftNode) { 200 | for _, n := range nodes { 201 | n.IsShutDown = true 202 | n.gracefulExit <- true 203 | } 204 | time.Sleep(time.Millisecond * 200) 205 | } 206 | 207 | func randSeq(n int) string { 208 | var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 209 | b := make([]rune, n) 210 | for i := range b { 211 | b[i] = letters[rand.Intn(len(letters))] 212 | } 213 | return string(b) 214 | } -------------------------------------------------------------------------------- /chord/chord/node_rpc_api.go: -------------------------------------------------------------------------------- 1 | package chord 2 | 3 | import ( 4 | "errors" 5 | "net/rpc" 6 | "fmt" 7 | ) 8 | 9 | type RemoteId struct { 10 | Id []byte 11 | } 12 | 13 | type RemoteQuery struct { 14 | FromId []byte 15 | Id []byte 16 | } 17 | 18 | type IdReply struct { 19 | Id []byte 20 | Addr string 21 | Valid bool 22 | } 23 | 24 | type KeyValueReq struct { 25 | NodeId []byte 26 | Key string 27 | Value string 28 | } 29 | 30 | type KeyValueReply struct { 31 | Key string 32 | Value string 33 | } 34 | 35 | type RpcOkay struct { 36 | Ok bool 37 | } 38 | 39 | type UpdateReq struct { 40 | FromId []byte 41 | UpdateId []byte 42 | UpdateAddr string 43 | } 44 | 45 | type NotifyReq struct { 46 | NodeId []byte 47 | NodeAddr string 48 | UpdateId []byte 49 | UpdateAddr string 50 | } 51 | 52 | type TransferReq struct { 53 | NodeId []byte 54 | FromId []byte 55 | FromAddr string 56 | PredId []byte 57 | } 58 | 59 | /* RPC connection map cache */ 60 | var connMap = make(map[string]*rpc.Client) 61 | 62 | /* Find the successor node of a given ID in the entire ring */ 63 | func FindSuccessor_RPC(remoteNode *RemoteNode, id []byte) (*RemoteNode, error) { 64 | if remoteNode == nil { 65 | return nil, errors.New("RemoteNode is empty!") 66 | } 67 | var reply IdReply 68 | err := makeRemoteCall(remoteNode, "FindSuccessor", RemoteQuery{remoteNode.Id, id}, &reply) 69 | 70 | rNode := new(RemoteNode) 71 | rNode.Id = reply.Id 72 | rNode.Addr = reply.Addr 73 | return rNode, err 74 | } 75 | 76 | /* Helper function to make a call to a remote node */ 77 | func makeRemoteCall(remoteNode *RemoteNode, method string, req interface{}, rsp interface{}) error { 78 | // Dial the server if we don't already have a connection to it 79 | remoteNodeAddrStr := remoteNode.Addr 80 | var err error 81 | client, ok := connMap[remoteNodeAddrStr] 82 | if !ok { 83 | client, err = rpc.Dial("tcp", remoteNode.Addr) 84 | if err != nil { 85 | return err 86 | } 87 | connMap[remoteNodeAddrStr] = client 88 | } 89 | 90 | // Make the request 91 | uniqueMethodName := fmt.Sprintf("%v.%v", remoteNodeAddrStr, method) 92 | err = client.Call(uniqueMethodName, req, rsp) 93 | if err != nil { 94 | return err 95 | } 96 | 97 | return nil 98 | } 99 | 100 | /* Get the predecessor ID of a remote node */ 101 | func GetPredecessorId_RPC(remoteNode *RemoteNode) (*RemoteNode, error) { 102 | var reply IdReply 103 | err := makeRemoteCall(remoteNode, "GetPredecessorId", RemoteId{remoteNode.Id}, &reply) 104 | if err != nil { 105 | return nil, err 106 | } 107 | 108 | if !reply.Valid { 109 | return nil, err 110 | } 111 | 112 | rNode := new(RemoteNode) 113 | rNode.Id = reply.Id 114 | rNode.Addr = reply.Addr 115 | return rNode, err 116 | } 117 | 118 | /* Get the successor ID of a remote node */ 119 | func GetSuccessorId_RPC(remoteNode *RemoteNode) (*RemoteNode, error) { 120 | var reply IdReply 121 | err := makeRemoteCall(remoteNode, "GetSuccessorId", RemoteId{remoteNode.Id}, &reply) 122 | if err != nil { 123 | return nil, err 124 | } 125 | rNode := new(RemoteNode) 126 | rNode.Id = reply.Id 127 | rNode.Addr = reply.Addr 128 | return rNode, err 129 | } 130 | 131 | /* Get a value from a remote node's datastore for a given key */ 132 | func Get_RPC(locNode *RemoteNode, key string) (string, error) { 133 | if locNode == nil { 134 | return "", errors.New("RemoteNode is empty!") 135 | } 136 | 137 | var reply KeyValueReply 138 | req := KeyValueReq{locNode.Id, key, ""} 139 | err := makeRemoteCall(locNode, "GetLocal", &req, &reply) 140 | 141 | return reply.Value, err 142 | } 143 | 144 | /* Put a key/value into a datastore on a remote node */ 145 | func Put_RPC(locNode *RemoteNode, key string, value string) error { 146 | if locNode == nil { 147 | return errors.New("RemoteNode is empty!") 148 | } 149 | 150 | var reply KeyValueReply 151 | req := KeyValueReq{locNode.Id, key, value} 152 | err := makeRemoteCall(locNode, "PutLocal", &req, &reply) 153 | 154 | return err 155 | } 156 | 157 | /* Set the predecessor ID of a remote node */ 158 | func SetPredecessorId_RPC(remoteNode, newPred *RemoteNode) error { 159 | var reply RpcOkay 160 | var req UpdateReq 161 | req.FromId = remoteNode.Id 162 | if newPred != nil { 163 | req.UpdateId = newPred.Id 164 | req.UpdateAddr = newPred.Addr 165 | } 166 | 167 | err := makeRemoteCall(remoteNode, "SetPredecessorId", &req, &reply) 168 | if err != nil { 169 | return err 170 | } 171 | if !reply.Ok { 172 | return errors.New(fmt.Sprintf("RPC replied not valid from %v", remoteNode.Id)) 173 | } 174 | 175 | return err 176 | } 177 | 178 | /* Set the successor ID of a remote node */ 179 | func SetSuccessorId_RPC(remoteNode, newSucc *RemoteNode) error { 180 | var reply RpcOkay 181 | var req UpdateReq 182 | req.FromId = remoteNode.Id 183 | req.UpdateId = newSucc.Id 184 | req.UpdateAddr = newSucc.Addr 185 | 186 | err := makeRemoteCall(remoteNode, "SetSuccessorId", &req, &reply) 187 | if err != nil { 188 | return err 189 | } 190 | if !reply.Ok { 191 | return errors.New(fmt.Sprintf("RPC replied not valid from %v", remoteNode.Id)) 192 | } 193 | 194 | return err 195 | } 196 | 197 | /* Notify a remote node that we believe we are its predecessor */ 198 | func Notify_RPC(remoteNode, us *RemoteNode) error { 199 | if remoteNode == nil { 200 | return errors.New("RemoteNode is empty!") 201 | } 202 | var reply RpcOkay 203 | var req NotifyReq 204 | req.NodeId = remoteNode.Id 205 | req.NodeAddr = remoteNode.Addr 206 | req.UpdateId = us.Id 207 | req.UpdateAddr = us.Addr 208 | 209 | // must send us and intended node 210 | err := makeRemoteCall(remoteNode, "Notify", &req, &reply) 211 | if !reply.Ok { 212 | return errors.New(fmt.Sprintf("RPC replied not valid from %v", remoteNode.Id)) 213 | } 214 | 215 | return err 216 | } -------------------------------------------------------------------------------- /oceanstore/oceanstore/ocean_local_impl.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "../../tapestry/tapestry" 7 | "../../raft/raft" 8 | ) 9 | 10 | func (ocean *OceanNode) mkdir(req *MkdirRequest) (MkdirReply, error) { 11 | fmt.Println("Entered mkdir") 12 | reply := MkdirReply{} 13 | 14 | path := req.Path 15 | length := len(path) 16 | clientId := req.ClientId 17 | 18 | if path[0] != '/' { 19 | path = ocean.getCurrentDir(clientId) + "/" + path 20 | } 21 | path = removeExcessSlashes(path) 22 | 23 | if length == 0 { 24 | return reply, fmt.Errorf("Empty path") 25 | } 26 | if (length > 2 && path[length-1] == '.' && path[length-2] == '.') || 27 | path[length-1] == '.' { 28 | return reply, fmt.Errorf("There already exists a file/dir with that name.") 29 | } 30 | 31 | dirInode, name, fullPath, dirPath, err := ocean.dir_namev(path, clientId) 32 | if err != nil { 33 | fmt.Println(err) 34 | return reply, err 35 | } 36 | 37 | // File we are about to make should not exist. 38 | _, err = ocean.getInode(fullPath, clientId) 39 | if err == nil { 40 | return reply, fmt.Errorf("There already exists a file/dir with that name.") 41 | } 42 | 43 | // This is the root node creation. 44 | if dirInode == nil { 45 | 46 | // Create the root Inode and its block 47 | newDirInode := CreateDirInode(name) 48 | newDirBlock := CreateBlock() 49 | 50 | // Set block paths for the indirect block and dot references 51 | blockPath := fmt.Sprintf("%v:%v", fullPath, "indirect") // this will be '/:indirect' 52 | 53 | // Hash the dot references to put them on the indirect block. 54 | blockHash := tapestry.Hash(blockPath) 55 | 56 | // Save the root Inode indirect block in tapestry 57 | ocean.storeIndirectBlock(fullPath, newDirBlock.bytes, clientId) 58 | 59 | newDirInode.indirect = hashToGuid(blockHash) 60 | fmt.Println(blockHash, "->", newDirInode.indirect) 61 | 62 | // Save the root Inode 63 | ocean.storeInode(fullPath, newDirInode, clientId) 64 | 65 | } else { 66 | // Get indirect block from the directory that is going to create 67 | // the node 68 | dirBlock, err := ocean.getInodeBlock(dirPath, clientId) 69 | if err != nil { 70 | fmt.Println(err) 71 | return reply, err 72 | } 73 | 74 | // Create new inode and block 75 | newDirInode := CreateDirInode(name) 76 | newDirBlock := CreateBlock() 77 | 78 | // Declare block paths 79 | blockPath := fmt.Sprintf("%v:%v", fullPath, "indirect") 80 | 81 | // Get hashes 82 | newDirInodeHash := tapestry.Hash(fullPath) 83 | 84 | fmt.Println("Dirpath: %v", dirPath) 85 | fmt.Println("Fullpath: %v", fullPath) 86 | fmt.Println("blockPath: %v", blockPath) 87 | fmt.Println("newDirInodeHAsh: %v", newDirInodeHash) 88 | 89 | // Write the new dir to the old dir and increase its size 90 | IdIntoByte(dirBlock, &newDirInodeHash, int(dirInode.size)) 91 | dirInode.size += tapestry.DIGITS 92 | 93 | bytes := make([]byte, tapestry.DIGITS) 94 | IdIntoByte(bytes, &newDirInodeHash, 0) 95 | newDirInode.indirect = Guid(ByteIntoAguid(bytes, 0)) 96 | fmt.Println("\n\n\n\n\n\n", newDirInodeHash, "->", newDirInode.indirect) 97 | 98 | // Save both blocks in tapestry 99 | ocean.storeIndirectBlock(fullPath, newDirBlock.bytes, clientId) 100 | ocean.storeIndirectBlock(dirPath, dirBlock, clientId) 101 | 102 | // Encode both inodes 103 | ocean.storeInode(dirPath, dirInode, clientId) 104 | ocean.storeInode(fullPath, newDirInode, clientId) 105 | } 106 | 107 | reply.Ok = true 108 | return reply, nil 109 | } 110 | 111 | func (ocean *OceanNode) dir_namev(pathname string, id uint64) (*Inode, string, string, string, error) { 112 | 113 | path := removeExcessSlashes(pathname) 114 | lastSlash := strings.LastIndex(path, "/") 115 | var dirPath, name string 116 | 117 | fmt.Println("Last slash:", lastSlash) 118 | 119 | if lastSlash == 0 && len(path) != 1 { 120 | return ocean.getRootInode(id), pathname[1:], pathname, "/", nil 121 | } else if lastSlash == 0 { 122 | return nil, "/", "/", "", nil 123 | } else if lastSlash != -1 && len(path) != 1 { // K. all good 124 | dirPath = path[:lastSlash] 125 | name = path[lastSlash+1:] 126 | } else if lastSlash == -1 { // No slashes at all (relative path probably) 127 | dirPath = ocean.getCurrentDir(id) 128 | name = path 129 | } else { 130 | panic("What should go here?") 131 | } 132 | 133 | path = removeExcessSlashes(path) 134 | 135 | if dirPath[0] != '/' { 136 | dirPath = ocean.getCurrentDir(id) + "/" + dirPath 137 | } 138 | 139 | dirInode, err := ocean.getInode(dirPath, id) 140 | if err != nil { // Dir path does not exist 141 | fmt.Println(err) 142 | return nil, "", "", "", err 143 | } 144 | 145 | dirPath = removeExcessSlashes(dirPath) 146 | fullPath := removeExcessSlashes(dirPath + "/" + name) 147 | 148 | return dirInode, name, fullPath, dirPath, nil 149 | } 150 | 151 | func (ocean *OceanNode) getRootInode(id uint64) *Inode { 152 | inode, err := ocean.getInode("/", id) 153 | if err != nil { 154 | panic("Root inode not found!") 155 | } 156 | return inode 157 | } 158 | 159 | func (ocean *OceanNode) connect(req *ConnectRequest) (ConnectReply, error) { 160 | reply := ConnectReply{} 161 | // addr := req.FromNode.Addr 162 | // raftNode := puddle.getRandomRaftNode() 163 | // fromAddr := raft.NodeAddr{raft.AddrToId(addr, raftNode.GetConfig().NodeIdSize), addr} 164 | 165 | raftAddr := ocean.getRandomRaftNode().GetLocalAddr() 166 | 167 | client, err := raft.CreateClient(*raftAddr) 168 | if err != nil { 169 | fmt.Println(err) 170 | return ConnectReply{false, 0}, err 171 | } 172 | 173 | // Clients that just started the connection should start in root node. 174 | ocean.clientPaths[client.Id] = "/" 175 | ocean.clients[client.Id] = client 176 | 177 | reply.Ok = true 178 | reply.Id = client.Id 179 | fmt.Println("connect reply:", reply) 180 | return reply, nil 181 | } -------------------------------------------------------------------------------- /raft/raft/raft.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "../../tapestry/tapestry" 5 | "crypto/sha1" 6 | "math/big" 7 | "net" 8 | "net/rpc" 9 | "os" 10 | "sync" 11 | "time" 12 | ) 13 | 14 | /* Node's can be in three possible states */ 15 | type NodeState int 16 | 17 | // Tapestry's id 18 | type ID tapestry.ID 19 | 20 | const ( 21 | FOLLOWER_STATE NodeState = iota 22 | CANDIDATE_STATE 23 | LEADER_STATE 24 | JOIN_STATE 25 | ) 26 | 27 | type RaftNode struct { 28 | Id string 29 | Listener net.Listener 30 | listenPort int 31 | 32 | //At any given time each server is in one of three states: leader, follower, or candidate. 33 | State NodeState 34 | LeaderAddress *NodeAddr 35 | 36 | conf *Config 37 | IsShutDown bool 38 | RPCServer *RaftRPCServer 39 | mutex sync.Mutex 40 | Testing *TestingPolicy 41 | 42 | logCache []LogEntry 43 | 44 | //file descriptors and values for persistent state 45 | logFileDescriptor FileData 46 | metaFileDescriptor FileData 47 | stableState NodeStableState 48 | ssMutex sync.Mutex 49 | 50 | //leader specific volatile state 51 | commitIndex uint64 52 | lastApplied uint64 53 | leaderMutex map[string]uint64 54 | nextIndex map[string]uint64 55 | matchIndex map[string]uint64 56 | 57 | // channels to send and rcv RPC messages 58 | appendEntries chan AppendEntriesMsg 59 | requestVote chan RequestVoteMsg 60 | clientRequest chan ClientRequestMsg 61 | registerClient chan RegisterClientMsg 62 | gracefulExit chan bool 63 | 64 | // the replicated state machine 65 | hash []byte 66 | requestMutex sync.Mutex 67 | requestMap map[uint64]ClientRequestMsg 68 | 69 | fileMap map[string]string 70 | fileMapMtx sync.Mutex 71 | lockMap map[string]bool 72 | lockMapMtx sync.Mutex 73 | } 74 | 75 | type NodeAddr struct { 76 | Id string 77 | Addr string 78 | } 79 | 80 | func CreateNode(localPort int, leaderAddr *NodeAddr, conf *Config) (rp *RaftNode, err error) { 81 | var r RaftNode 82 | rp = &r 83 | var conn net.Listener 84 | 85 | r.IsShutDown = false 86 | r.conf = conf 87 | 88 | //init rpc channels 89 | r.appendEntries = make(chan AppendEntriesMsg) 90 | r.requestVote = make(chan RequestVoteMsg) 91 | r.clientRequest = make(chan ClientRequestMsg) 92 | r.registerClient = make(chan RegisterClientMsg) 93 | r.gracefulExit = make(chan bool) 94 | 95 | r.hash = nil 96 | r.requestMap = make(map[uint64]ClientRequestMsg) 97 | 98 | r.commitIndex = 0 99 | r.lastApplied = 0 100 | r.nextIndex = make(map[string]uint64) 101 | r.matchIndex = make(map[string]uint64) 102 | 103 | r.fileMap = make(map[string]string) 104 | r.lockMap = make(map[string]bool) 105 | r.Testing = NewTesting() 106 | r.Testing.PauseWorld(false) 107 | 108 | if localPort != 0 { 109 | conn, err = OpenPort(localPort) 110 | } else { 111 | conn, localPort, err = OpenListener() 112 | } 113 | 114 | if err != nil { 115 | return nil, err 116 | } 117 | 118 | // create node id based on listener address 119 | r.Id = AddrToId(conn.Addr().String(), conf.NodeIdSize) 120 | 121 | r.Listener = conn 122 | r.listenPort = localPort 123 | Out.Printf("started node with id %v, listening at %v", r.Id, conn.Addr().String()) 124 | 125 | freshNode, err := r.initStableStore() 126 | if err != nil { 127 | Error.Printf("Error initializing the stable store: %v \n", err) 128 | return nil, err 129 | } 130 | 131 | r.setLocalAddr(&NodeAddr{Id: r.Id, Addr: conn.Addr().String()}) 132 | 133 | // Start RPC server 134 | r.RPCServer = &RaftRPCServer{rp} 135 | rpc.RegisterName(r.GetLocalAddr().Addr, r.RPCServer) 136 | go r.RPCServer.startRpcServer() 137 | 138 | if freshNode { 139 | r.State = JOIN_STATE 140 | if leaderAddr != nil { 141 | err = JoinRPC(leaderAddr, r.GetLocalAddr()) 142 | } else { 143 | Out.Printf("Waiting to start nodes until all have joined\n") 144 | go r.startNodes() 145 | } 146 | } else { 147 | r.State = FOLLOWER_STATE 148 | go r.run() 149 | } 150 | 151 | return 152 | } 153 | 154 | func (r *RaftNode) startNodes() { 155 | r.mutex.Lock() 156 | r.AppendOtherNodes(*r.GetLocalAddr()) 157 | r.mutex.Unlock() 158 | 159 | for len(r.GetOtherNodes()) < r.conf.ClusterSize { 160 | time.Sleep(time.Millisecond * 100) 161 | } 162 | 163 | for _, otherNode := range r.GetOtherNodes() { 164 | if r.Id != otherNode.Id { 165 | Out.Printf("(%v) Starting node-%v\n", r.Id, otherNode.Id) 166 | StartNodeRPC(otherNode, r.GetOtherNodes()) 167 | } 168 | } 169 | 170 | // Start the Raft finite-state-machine, initially in follower state 171 | go r.run() 172 | } 173 | 174 | func CreateLocalCluster(config *Config) ([]*RaftNode, error) { 175 | if config == nil { 176 | config = DefaultConfig() 177 | } 178 | err := CheckConfig(config) 179 | if err != nil { 180 | return nil, err 181 | } 182 | 183 | nodes := make([]*RaftNode, config.ClusterSize) 184 | 185 | nodes[0], err = CreateNode(0, nil, config) 186 | for i := 1; i < config.ClusterSize; i++ { 187 | nodes[i], err = CreateNode(0, nodes[0].GetLocalAddr(), config) 188 | if err != nil { 189 | return nil, err 190 | } 191 | } 192 | return nodes, nil 193 | } 194 | 195 | 196 | 197 | func AddrToId(addr string, length int) string { 198 | h := sha1.New() 199 | h.Write([]byte(addr)) 200 | v := h.Sum(nil) 201 | keyInt := big.Int{} 202 | keyInt.SetBytes(v[:length]) 203 | return keyInt.String() 204 | } 205 | 206 | func (r *RaftNode) Exit() { 207 | Out.Printf("Abruptly shutting down node!") 208 | os.Exit(0) 209 | } 210 | 211 | func (r *RaftNode) GracefulExit() { 212 | r.Testing.PauseWorld(true) 213 | Out.Println("gracefully shutting down the node %v", r.Id) 214 | r.gracefulExit <- true 215 | } 216 | 217 | func (r *RaftNode) GetConfig() *Config { 218 | return r.conf 219 | } 220 | 221 | func (r *RaftNode) run() { 222 | curr := r.doFollower 223 | for curr != nil { 224 | curr = curr() 225 | } 226 | } -------------------------------------------------------------------------------- /tapestry/tapestry/routingtable.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import "sync" 4 | 5 | /* 6 | A routing table has a number of levels equal to the number of digits in an ID (default 40) 7 | Each level has a number of slots equal to the digit base (default 16) 8 | A node that exists on level n thereby shares a prefix of length n with the local node. 9 | Access to the routing table is managed by a lock 10 | */ 11 | type RoutingTable struct { 12 | local Node // the local tapestry node 13 | mutex sync.Mutex // to manage concurrent access to the routing table. could have a per-level mutex though 14 | rows [DIGITS][BASE]*[]Node // the rows of the routing table 15 | } 16 | 17 | /* 18 | Creates and returns a new routing table, placing the local node at the appropriate slot in each level of the table 19 | */ 20 | func NewRoutingTable(me Node) *RoutingTable { 21 | t := new(RoutingTable) 22 | t.local = me 23 | 24 | // Create the node lists with capacity of SLOTSIZE 25 | for i := 0; i < DIGITS; i++ { 26 | for j := 0; j < BASE; j++ { 27 | slot := make([]Node, 0, SLOTSIZE) 28 | t.rows[i][j] = &slot 29 | } 30 | } 31 | 32 | // Make sure each row has at least our node in it 33 | for i := 0; i < DIGITS; i++ { 34 | slot := t.rows[i][t.local.Id[i]] 35 | *slot = append(*slot, t.local) 36 | } 37 | 38 | return t 39 | } 40 | 41 | func GetFurthest(id ID, nodes []Node) int { 42 | furthest := 0 43 | for i := 1; i < SLOTSIZE; i++ { 44 | if id.Closer(nodes[furthest].Id, nodes[i].Id) { 45 | furthest = i 46 | } 47 | } 48 | return furthest 49 | } 50 | 51 | /* 52 | Adds the given node to the routing table 53 | Returns true if the node did not previously exist in the table and was subsequently added 54 | Returns the previous node in the table, if one was overwritten 55 | */ 56 | func (t *RoutingTable) Add(node Node) (added bool, previous *Node) { 57 | t.mutex.Lock() 58 | 59 | // Find table slot. 60 | level := SharedPrefixLength(node.Id, t.local.Id) 61 | 62 | if level == DIGITS { 63 | added = false 64 | t.mutex.Unlock() 65 | return 66 | } 67 | 68 | // fmt.Printf("%v, %v\n", i, node.Id[i]) 69 | slot := t.rows[level][node.Id[level]] 70 | 71 | // Check if it exists; if it does return false 72 | for i := 0; i < len(*slot); i++ { 73 | if SharedPrefixLength((*slot)[i].Id, node.Id) == DIGITS { 74 | added = false 75 | t.mutex.Unlock() 76 | return 77 | } 78 | } 79 | 80 | // Append new slot and make sure theres a 3 node maximum. 81 | 82 | for i := 0; i <= level; i++ { 83 | slot = t.rows[i][node.Id[i]] 84 | *slot = append(*slot, node) 85 | if len(*slot) > SLOTSIZE { 86 | furthest := GetFurthest(t.local.Id, *slot) 87 | previous = &(*slot)[furthest] 88 | *slot = append((*slot)[:furthest], (*slot)[furthest+1:]...) 89 | } 90 | } 91 | 92 | added = true 93 | t.mutex.Unlock() 94 | return 95 | } 96 | 97 | /* 98 | Removes the specified node from the routing table, if it exists 99 | Returns true if the node was in the table and was successfully removed 100 | */ 101 | func (t *RoutingTable) Remove(node Node) (wasRemoved bool) { 102 | t.mutex.Lock() 103 | 104 | // Get the table slot 105 | level := SharedPrefixLength(node.Id, t.local.Id) 106 | if level == DIGITS { 107 | // Never delete youself on your own routing table. 108 | wasRemoved = false 109 | t.mutex.Unlock() 110 | return 111 | } 112 | 113 | wasRemoved = false 114 | 115 | for j := 0; j <= level; j++ { 116 | slot := t.rows[j][node.Id[j]] 117 | 118 | // Find and remove node 119 | for i := 0; i < len(*slot); i++ { 120 | if SharedPrefixLength((*slot)[i].Id, node.Id) == DIGITS { 121 | *slot = append((*slot)[:i], (*slot)[i+1:]...) // This is remove in Go 122 | wasRemoved = true 123 | } 124 | } 125 | } 126 | 127 | // Return false if node was not found. 128 | t.mutex.Unlock() 129 | return 130 | } 131 | 132 | /* 133 | Search the table for the closest next-hop node for the provided ID 134 | */ 135 | func (t *RoutingTable) GetNextHop(id ID) (node Node) { 136 | 137 | t.mutex.Lock() 138 | 139 | level := SharedPrefixLength(id, t.local.Id) 140 | row := t.rows[level] 141 | // fmt.Printf("%v: %v y %v\n", id, level, id[level]) 142 | col := id[level] 143 | for len(*(row[col])) == 0 { 144 | col = (col + 1) % BASE 145 | // fmt.Printf("%v\n", col) 146 | } 147 | // fmt.Printf("%v\n", col) 148 | 149 | if len(*(row[col])) == 1 { 150 | node = (*(row[col]))[0] 151 | } else if len(*(row[col])) == 2 { 152 | if id.BetterChoice((*(row[col]))[0].Id, (*(row[col]))[1].Id) { 153 | node = (*(row[col]))[0] 154 | } else { 155 | node = (*(row[col]))[1] 156 | } 157 | } else { // Consider optimization if its too slow 158 | if id.BetterChoice((*(row[col]))[0].Id, (*(row[col]))[1].Id) && 159 | id.BetterChoice((*(row[col]))[0].Id, (*(row[col]))[2].Id) { 160 | node = (*(row[col]))[0] 161 | } else if id.BetterChoice((*(row[col]))[1].Id, (*(row[col]))[0].Id) && 162 | id.BetterChoice((*(row[col]))[1].Id, (*(row[col]))[2].Id) { 163 | node = (*(row[col]))[1] 164 | } else if id.BetterChoice((*(row[col]))[2].Id, (*(row[col]))[0].Id) && 165 | id.BetterChoice((*(row[col]))[2].Id, (*(row[col]))[1].Id) { 166 | node = (*(row[col]))[2] 167 | } else { 168 | node = (*(row[col]))[0] 169 | } 170 | } 171 | 172 | t.mutex.Unlock() 173 | 174 | return 175 | } 176 | 177 | /* 178 | Get all nodes on the specified level of the routing table, EXCLUDING the local node 179 | */ 180 | func (t *RoutingTable) GetLevel(level int) (nodes []Node) { 181 | t.mutex.Lock() 182 | row := t.rows[level] 183 | for i := 0; i < BASE; i++ { 184 | if t.local.Id[level] == Digit(i) { 185 | continue 186 | } 187 | for j := 0; j < len(*row[i]); j++ { 188 | if SharedPrefixLength((*(row[i]))[j].Id, t.local.Id) != DIGITS { 189 | nodes = append(nodes, (*(row[i]))[j]) // append node 190 | } 191 | } 192 | } 193 | t.mutex.Unlock() 194 | return 195 | } 196 | 197 | -------------------------------------------------------------------------------- /tapestry/tapestry/tapestry-rpcimpl.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "net" 5 | "net/rpc" 6 | "fmt" 7 | ) 8 | 9 | /* 10 | Receives remote invocations of methods for the local tapestry node 11 | */ 12 | type TapestryRPCServer struct { 13 | tapestry *Tapestry 14 | listener net.Listener 15 | rpc *rpc.Server 16 | } 17 | 18 | type RegisterRequest struct { 19 | To Node 20 | From Node 21 | Key string 22 | } 23 | 24 | type RegisterResponse struct { 25 | IsRoot bool 26 | } 27 | 28 | type NextHopRequest struct { 29 | To Node 30 | Id ID 31 | } 32 | type NextHopResponse struct { 33 | HasNext bool 34 | Next Node 35 | } 36 | 37 | type RemoveBadNodesRequest struct { 38 | To Node 39 | BadNodes []Node 40 | } 41 | 42 | type FetchRequest struct { 43 | To Node 44 | Key string 45 | } 46 | 47 | type FetchResponse struct { 48 | To Node 49 | IsRoot bool 50 | Values []Node 51 | } 52 | 53 | type GetBackpointersRequest struct { 54 | To Node 55 | From Node 56 | Level int 57 | } 58 | 59 | type TransferRequest struct { 60 | To Node 61 | From Node 62 | Data map[string][]Node 63 | } 64 | 65 | type NodeRequest struct { 66 | To Node 67 | Node Node 68 | } 69 | 70 | type AddNodeMulticastRequest struct { 71 | To Node 72 | NewNode Node 73 | Level int 74 | } 75 | 76 | type NotifyLeaveRequest struct { 77 | To Node 78 | From Node 79 | Replacement *Node 80 | } 81 | 82 | /* 83 | Creates the tapestry RPC server of a tapestry node. The RPC server receives function invocations, 84 | and proxies them to the tapestrynode implementations 85 | */ 86 | func newTapestryRPCServer(port int, tapestry *Tapestry) (server *TapestryRPCServer, err error) { 87 | // Create the RPC server 88 | server = new(TapestryRPCServer) 89 | server.tapestry = tapestry 90 | server.rpc = rpc.NewServer() 91 | server.rpc.Register(server) 92 | server.rpc.Register(NewBlobStoreRPC(tapestry.blobstore)) 93 | server.listener, err = net.Listen("tcp", fmt.Sprintf(":%v", port)) 94 | if err != nil { 95 | return nil, fmt.Errorf("Tapestry RPC server unable to listen on tcp port %v, reason: %v", port, err) 96 | } 97 | 98 | // Start the RPC server 99 | go func() { 100 | for { 101 | cxn, err := server.listener.Accept() 102 | if err != nil { 103 | fmt.Printf("Server %v closing: %s\n", port, err) 104 | return 105 | } 106 | go server.rpc.ServeConn(cxn) 107 | } 108 | }() 109 | 110 | return 111 | } 112 | 113 | func (server *TapestryRPCServer) Hello(req Node, rsp *Node) (err error) { 114 | *rsp = server.tapestry.local.node 115 | return 116 | } 117 | 118 | func (server *TapestryRPCServer) validate(expect Node) error { 119 | if server.tapestry.local.node != expect { 120 | return fmt.Errorf("Remote node expected us to be %v, but we are %v", expect, server.tapestry.local.node) 121 | } 122 | return nil 123 | } 124 | 125 | func (server *TapestryRPCServer) GetNextHop(req NextHopRequest, rsp *NextHopResponse) (err error) { 126 | err = server.validate(req.To) 127 | if err == nil { 128 | rsp.HasNext, rsp.Next, err = server.tapestry.local.GetNextHop(req.Id) 129 | } 130 | return 131 | } 132 | 133 | // Server: proxies a remote method invocation to the local node 134 | func (server *TapestryRPCServer) RemoveBadNodes(req RemoveBadNodesRequest, rsp *Node) error { 135 | err := server.validate(req.To) 136 | if err != nil { 137 | return err 138 | } 139 | return server.tapestry.local.RemoveBadNodes(req.BadNodes) 140 | } 141 | 142 | func (server *TapestryRPCServer) Fetch(req FetchRequest, rsp *FetchResponse) (err error) { 143 | err = server.validate(req.To) 144 | if err == nil { 145 | rsp.IsRoot, rsp.Values, err = server.tapestry.local.Fetch(req.Key) 146 | } 147 | return 148 | } 149 | 150 | func (server *TapestryRPCServer) AddBackpointer(req NodeRequest, rsp *Node) error { 151 | err := server.validate(req.To) 152 | if err != nil { 153 | return err 154 | } 155 | return server.tapestry.local.AddBackpointer(req.Node) 156 | } 157 | 158 | func (server *TapestryRPCServer) RemoveBackpointer(req NodeRequest, rsp *Node) error { 159 | err := server.validate(req.To) 160 | if err != nil { 161 | return err 162 | } 163 | return server.tapestry.local.RemoveBackpointer(req.Node) 164 | } 165 | 166 | func (server *TapestryRPCServer) GetBackpointers(req GetBackpointersRequest, rsp *[]Node) (err error) { 167 | err = server.validate(req.To) 168 | if err != nil { 169 | return err 170 | } 171 | backpointers, err := server.tapestry.local.GetBackpointers(req.From, req.Level) 172 | *rsp = append(*rsp, backpointers...) 173 | return 174 | } 175 | 176 | func (server *TapestryRPCServer) AddNode(req NodeRequest, rsp *[]Node) (err error) { 177 | err = server.validate(req.To) 178 | if err != nil { 179 | return 180 | } 181 | neighbours, err := server.tapestry.local.AddNode(req.Node) 182 | *rsp = append(*rsp, neighbours...) 183 | return 184 | } 185 | 186 | func (server *TapestryRPCServer) AddNodeMulticast(req AddNodeMulticastRequest, rsp *[]Node) (err error) { 187 | err = server.validate(req.To) 188 | if err != nil { 189 | return err 190 | } 191 | neighbours, err := server.tapestry.local.AddNodeMulticast(req.NewNode, req.Level) 192 | *rsp = append(*rsp, neighbours...) 193 | return err 194 | } 195 | 196 | func (server *TapestryRPCServer) Transfer(req TransferRequest, rsp *Node) error { 197 | err := server.validate(req.To) 198 | if err != nil { 199 | return err 200 | } 201 | return server.tapestry.local.Transfer(req.From, req.Data) 202 | } 203 | 204 | func (server *TapestryRPCServer) NotifyLeave(req NotifyLeaveRequest, rsp *Node) error { 205 | err := server.validate(req.To) 206 | if err != nil { 207 | return err 208 | } 209 | return server.tapestry.local.NotifyLeave(req.From, req.Replacement) 210 | } 211 | 212 | /* 213 | This method is invoked over RPC by other Tapestry nodes. 214 | Register the specified node as an advertiser of the specified key. 215 | 216 | * Check that we are the root node for the key 217 | * Add the node to the object store 218 | * Kick off a timer to remove the node if it's not advertised again after a set amount of time 219 | */ 220 | func (server *TapestryRPCServer) Register(req RegisterRequest, rsp *RegisterResponse) (err error) { 221 | err = server.validate(req.To) 222 | if err == nil { 223 | rsp.IsRoot, err = server.tapestry.local.Register(req.Key, req.From) 224 | } 225 | return 226 | } -------------------------------------------------------------------------------- /oceanstore/oceanstore/ocean_rpc_api.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "fmt" 5 | "net/rpc" 6 | ) 7 | 8 | var connMap = make(map[string]*rpc.Client) 9 | 10 | type ConnectRequest struct { 11 | FromNode OceanAddr 12 | } 13 | 14 | type ConnectReply struct { 15 | Ok bool 16 | Id uint64 17 | } 18 | 19 | func ConnectRPC(remotenode *OceanAddr, request ConnectRequest) (*ConnectReply, error) { 20 | fmt.Println("(Oceanstore) RPC Connect to", remotenode.Addr) 21 | var reply ConnectReply 22 | 23 | err := makeRemoteCall(remotenode, "ConnectImpl", request, &reply) 24 | if err != nil { 25 | return nil, err 26 | } 27 | 28 | return &reply, nil 29 | } 30 | 31 | type PwdRequest struct { 32 | ClientId uint64 33 | } 34 | 35 | type PwdReply struct { 36 | Ok bool 37 | Path string 38 | } 39 | 40 | func pwdRPC(remotenode *OceanAddr, request PwdRequest) (*PwdReply, error) { 41 | var reply PwdReply 42 | 43 | err := makeRemoteCall(remotenode, "PwdImpl", request, &reply) 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | return &reply, nil 49 | } 50 | 51 | type LsRequest struct { 52 | ClientId uint64 53 | Path string 54 | } 55 | 56 | type LsReply struct { 57 | Ok bool 58 | Elements string 59 | } 60 | 61 | func lsRPC(remotenode *OceanAddr, request LsRequest) (*LsReply, error) { 62 | var reply LsReply 63 | 64 | err := makeRemoteCall(remotenode, "LsImpl", request, &reply) 65 | if err != nil { 66 | return nil, err 67 | } 68 | 69 | return &reply, nil 70 | } 71 | 72 | type CdRequest struct { 73 | ClientId uint64 74 | Path string 75 | } 76 | 77 | type CdReply struct { 78 | Ok bool 79 | } 80 | 81 | func cdRPC(remotenode *OceanAddr, request CdRequest) (*CdReply, error) { 82 | var reply CdReply 83 | 84 | err := makeRemoteCall(remotenode, "CdImpl", request, &reply) 85 | if err != nil { 86 | return nil, err 87 | } 88 | 89 | return &reply, nil 90 | } 91 | 92 | type MvRequest struct { 93 | ClientId uint64 94 | Source string 95 | Dest string 96 | } 97 | 98 | type MvReply struct { 99 | Ok bool 100 | } 101 | 102 | func mvRPC(remotenode *OceanAddr, request MvRequest) (*MvReply, error) { 103 | var reply MvReply 104 | 105 | err := makeRemoteCall(remotenode, "MvImpl", request, &reply) 106 | if err != nil { 107 | return nil, err 108 | } 109 | 110 | return &reply, nil 111 | } 112 | 113 | type CpRequest struct { 114 | ClientId uint64 115 | Source string 116 | Dest string 117 | } 118 | 119 | type CpReply struct { 120 | Ok bool 121 | } 122 | 123 | func cpRPC(remotenode *OceanAddr, request CpRequest) (*CpReply, error) { 124 | var reply CpReply 125 | 126 | err := makeRemoteCall(remotenode, "CpImpl", request, &reply) 127 | if err != nil { 128 | return nil, err 129 | } 130 | 131 | return &reply, nil 132 | } 133 | 134 | type MkdirRequest struct { 135 | ClientId uint64 136 | Path string 137 | } 138 | 139 | type MkdirReply struct { 140 | Ok bool 141 | } 142 | 143 | func mkdirRPC(remotenode *OceanAddr, request MkdirRequest) (*MkdirReply, error) { 144 | var reply MkdirReply 145 | 146 | err := makeRemoteCall(remotenode, "MkdirImpl", request, &reply) 147 | if err != nil { 148 | return nil, err 149 | } 150 | 151 | return &reply, nil 152 | } 153 | 154 | type RmdirRequest struct { 155 | ClientId uint64 156 | Path string 157 | } 158 | 159 | type RmdirReply struct { 160 | Ok bool 161 | } 162 | 163 | func rmdirRPC(remotenode *OceanAddr, request RmdirRequest) (*RmdirReply, error) { 164 | var reply RmdirReply 165 | 166 | err := makeRemoteCall(remotenode, "RmdirImpl", request, &reply) 167 | if err != nil { 168 | return nil, err 169 | } 170 | 171 | return &reply, nil 172 | } 173 | 174 | type MkfileRequest struct { 175 | ClientId uint64 176 | Path string 177 | } 178 | 179 | type MkfileReply struct { 180 | Ok bool 181 | } 182 | 183 | func mkfileRPC(remotenode *OceanAddr, request MkfileRequest) (*MkfileReply, error) { 184 | var reply MkfileReply 185 | 186 | err := makeRemoteCall(remotenode, "MkfileImpl", request, &reply) 187 | if err != nil { 188 | return nil, err 189 | } 190 | 191 | return &reply, nil 192 | } 193 | 194 | type RmfileRequest struct { 195 | ClientId uint64 196 | Path string 197 | } 198 | 199 | type RmfileReply struct { 200 | Ok bool 201 | } 202 | 203 | func rmfileRPC(remotenode *OceanAddr, request RmfileRequest) (*RmfileReply, error) { 204 | var reply RmfileReply 205 | 206 | err := makeRemoteCall(remotenode, "RmfileImpl", request, &reply) 207 | if err != nil { 208 | return nil, err 209 | } 210 | 211 | return &reply, nil 212 | } 213 | 214 | type WritefileRequest struct { 215 | ClientId uint64 216 | Path string 217 | Location uint32 218 | Buffer []byte 219 | } 220 | 221 | type WritefileReply struct { 222 | Ok bool 223 | Written uint32 224 | } 225 | 226 | func writefileRPC(remotenode *OceanAddr, request WritefileRequest) (*WritefileReply, error) { 227 | var reply WritefileReply 228 | 229 | err := makeRemoteCall(remotenode, "WritefileImpl", request, &reply) 230 | if err != nil { 231 | return nil, err 232 | } 233 | 234 | return &reply, nil 235 | } 236 | 237 | type CatRequest struct { 238 | ClientId uint64 239 | Path string 240 | Location uint32 241 | Count uint32 242 | } 243 | 244 | type CatReply struct { 245 | Ok bool 246 | Read uint32 247 | Buffer []byte 248 | } 249 | 250 | func catRPC(remotenode *OceanAddr, request CatRequest) (*CatReply, error) { 251 | var reply CatReply 252 | 253 | err := makeRemoteCall(remotenode, "CatImpl", request, &reply) 254 | if err != nil { 255 | return nil, err 256 | } 257 | 258 | return &reply, nil 259 | } 260 | 261 | /* Helper function to make a call to a remote node */ 262 | func makeRemoteCall(remoteNode *OceanAddr, method string, req interface{}, rsp interface{}) error { 263 | // Dial the server if we don't already have a connection to it 264 | remoteNodeAddrStr := remoteNode.Addr 265 | var err error 266 | client, ok := connMap[remoteNodeAddrStr] 267 | if !ok { 268 | client, err = rpc.Dial("tcp", remoteNode.Addr) 269 | if err != nil { 270 | return err 271 | } 272 | connMap[remoteNodeAddrStr] = client 273 | } 274 | 275 | // Make the request 276 | uniqueMethodName := fmt.Sprintf("%v.%v", remoteNodeAddrStr, method) 277 | err = client.Call(uniqueMethodName, req, rsp) 278 | if err != nil { 279 | client.Close() 280 | delete(connMap, remoteNodeAddrStr) 281 | return err 282 | } 283 | 284 | return nil 285 | } -------------------------------------------------------------------------------- /tapestry/tapestry/tapestry.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "os" 7 | "time" 8 | "strings" 9 | ) 10 | 11 | /* The Tapestry object provides the API for accessing tapestry. 12 | * It will call remote methods across RPC, and receives remote RPC 13 | * calls which get forwarded to the local node */ 14 | 15 | const BASE = 16 // The base of a digit of an ID. By default, a digit is base-16 16 | const DIGITS = 40 // The number of digits in an ID. By default, an ID has 40 digits. 17 | const RETRIES = 3 // The number of retries on failure. By default we have 3 retries 18 | const K = 10 // During neighbour traversal, trim the neighbourset to this size before fetching backpointers. By default this has a value of 10 19 | const SLOTSIZE = 3 // The each slot in the routing table should store this many nodes. By default this is 3. 20 | 21 | // Default = 10 22 | const REPUBLISH = 1 * time.Second // object republish interval for nodes advertising objects 23 | // Default = 25 24 | const TIMEOUT = 3 * time.Second // object timeout interval for nodes storing objects 25 | 26 | /* 27 | Provides the private API for communicating with remote nodes 28 | */ 29 | type Tapestry struct { 30 | local *TapestryNode // the local node 31 | server *TapestryRPCServer // receives remote method invocations and calls the corresponding local node methods 32 | blobstore *BlobStore // stores blobs on the local node 33 | } 34 | 35 | /* 36 | Public API: Start a tapestry node on the specified port. 37 | Optionally, specify the address of an existing node in the tapestry mesh to connect to, otherwise set to "" 38 | */ 39 | func Start(port int, connectTo string) (*Tapestry, error) { 40 | return start(RandomID(), port, connectTo) 41 | } 42 | 43 | /* 44 | Private method, useful for testing: start a node with the specified ID rather than a random ID 45 | */ 46 | func start(id ID, port int, connectTo string) (tapestry *Tapestry, err error) { 47 | // Create the tapestry object 48 | tapestry = new(Tapestry) 49 | 50 | // Create the blob store 51 | tapestry.blobstore = NewBlobStore() 52 | 53 | // Create the RPC server 54 | tapestry.server, err = newTapestryRPCServer(port, tapestry) 55 | if err != nil { 56 | return nil, err 57 | } 58 | 59 | // Get the hostname of this machine 60 | name, err := os.Hostname() 61 | if err != nil { 62 | return nil, fmt.Errorf("Unable to get hostname of local machine to start Tapestry node. Reason: %v", err) 63 | } 64 | 65 | // Get the port we are bound to 66 | _, actualport, err := net.SplitHostPort(tapestry.server.listener.Addr().String()) //fmt.Sprintf("%v:%v", name, port) 67 | if err != nil { 68 | return nil, err 69 | } 70 | 71 | // The actual address of this node 72 | address := fmt.Sprintf("%s:%s", name, actualport) 73 | 74 | // Create the local node 75 | tapestry.local = newTapestryNode(Node{id, address}, tapestry) 76 | 77 | // If specified, connect to the provided address 78 | if connectTo != "" { 79 | // Get the node we're joining 80 | node, err := tapestry.hello(connectTo) 81 | if err != nil { 82 | return nil, fmt.Errorf("Error joining existing tapestry node %v, reason: %v", address, err) 83 | } 84 | err = tapestry.local.Join(node) 85 | if err != nil { 86 | return nil, err 87 | } 88 | } 89 | 90 | return tapestry, nil 91 | } 92 | 93 | /* 94 | Store a blob on the local node and publish the key to the tapestry 95 | */ 96 | func (tapestry *Tapestry) Store(key string, value []byte) error { 97 | done, err := tapestry.local.Publish(key) 98 | if err != nil { 99 | return err 100 | } 101 | tapestry.blobstore.Put(key, value, done) 102 | return nil 103 | } 104 | 105 | /* 106 | Lookup a key in the tapestry and return its root node 107 | */ 108 | func (tapestry *Tapestry) Lookup(key string) ([]Node, error) { 109 | return tapestry.local.Lookup(key) 110 | } 111 | 112 | /* 113 | Lookup a key in the tapestry then fetch the corresponding blob from the remote blob store 114 | */ 115 | func (tapestry *Tapestry) Get(key string) ([]byte, error) { 116 | // Lookup the key 117 | replicas, err := tapestry.Lookup(key) 118 | if err != nil { 119 | return nil, err 120 | } 121 | if len(replicas) == 0 { 122 | return nil, fmt.Errorf("No replicas returned for key %v", key) 123 | } 124 | 125 | // Contact replicas 126 | var errs []error 127 | for _, replica := range replicas { 128 | blob, err := FetchRemoteBlob(replica, key) 129 | if err != nil { 130 | errs = append(errs, err) 131 | } 132 | if blob != nil { 133 | return *blob, nil 134 | } 135 | } 136 | 137 | return nil, fmt.Errorf("Error contacting replicas, %v: %v", replicas, errs) 138 | } 139 | 140 | /* 141 | Remove the blob from the local blob store and stop advertising 142 | */ 143 | func (tapestry *Tapestry) Remove(key string) bool { 144 | return tapestry.blobstore.Delete(key) 145 | } 146 | 147 | /* 148 | Leave the tapestry. 149 | */ 150 | func (tapestry *Tapestry) Leave() { 151 | tapestry.blobstore.DeleteAll() 152 | tapestry.local.Leave() 153 | tapestry.server.listener.Close() 154 | } 155 | 156 | /* 157 | Kill this node without gracefully leaving the tapestry 158 | */ 159 | func (tapestry *Tapestry) Kill() { 160 | tapestry.server.listener.Close() 161 | } 162 | 163 | func (tapestry *Tapestry) GetLocalAddr() string { 164 | return tapestry.local.node.Address 165 | } 166 | 167 | // Prints a routing table 168 | func (tapestry *Tapestry) PrintRoutingTable() { 169 | table := tapestry.local.table 170 | id := table.local.Id.String() 171 | for i, row := range table.rows { 172 | for j, slot := range row { 173 | for _, node := range *slot { 174 | fmt.Printf(" %v%v %v: %v %v\n", id[:i], strings.Repeat(" ", DIGITS-i+1), Digit(j), node.Address, node.Id.String()) 175 | } 176 | } 177 | } 178 | } 179 | 180 | // Prints the object store 181 | func (tapestry *Tapestry) PrintObjectStore() { 182 | fmt.Printf("ObjectStore for node %v\n", tapestry.local.node) 183 | for key, values := range tapestry.local.store.data { 184 | fmt.Printf(" %v: %v\n", key, slice(values)) 185 | } 186 | } 187 | 188 | // Prints the backpointers 189 | func (tapestry *Tapestry) PrintBackpointers() { 190 | bp := tapestry.local.backpointers 191 | fmt.Printf("Backpointers for node %v\n", tapestry.local.node) 192 | for i, set := range bp.sets { 193 | for _, node := range set.Nodes() { 194 | fmt.Printf(" %v %v: %v\n", i, node.Address, node.Id.String()) 195 | } 196 | } 197 | } 198 | 199 | // Prints the blobstore 200 | func (tapestry *Tapestry) PrintBlobStore() { 201 | for k, _ := range tapestry.blobstore.blobs { 202 | fmt.Println(k) 203 | } 204 | } 205 | 206 | func (tapestry *Tapestry) GetLocalNode() Node { 207 | return tapestry.local.node 208 | } -------------------------------------------------------------------------------- /oceanstore/oceanstore/inode.go: -------------------------------------------------------------------------------- 1 | package oceanstore 2 | 3 | import ( 4 | "../../tapestry/tapestry" 5 | "fmt" 6 | "bytes" 7 | "encoding/gob" 8 | "strings" 9 | "strconv" 10 | ) 11 | 12 | type Filetype int 13 | 14 | const ( 15 | DIR Filetype = iota 16 | FILE 17 | ) 18 | 19 | const BLOCK_SIZE = uint32(4096) 20 | const FILES_PER_INODE = 4 21 | 22 | type Inode struct { 23 | name string 24 | filetype Filetype 25 | size uint32 26 | indirect Guid 27 | } 28 | 29 | type Block struct { 30 | bytes []byte 31 | } 32 | 33 | func CreateDirInode(name string) *Inode { 34 | inode := new(Inode) 35 | inode.name = name 36 | inode.filetype = DIR 37 | inode.size = 0 38 | inode.indirect = "" 39 | return inode 40 | } 41 | 42 | func CreateFileInode(name string) *Inode { 43 | inode := new(Inode) 44 | inode.name = name 45 | inode.filetype = FILE 46 | inode.size = 0 47 | inode.indirect = "" 48 | return inode 49 | } 50 | 51 | func CreateBlock() *Block { 52 | block := new(Block) 53 | block.bytes = make([]byte, BLOCK_SIZE) 54 | return block 55 | } 56 | 57 | // Gets the inode that has a given path 58 | func (ocean *OceanNode) getInode(path string, id uint64) (*Inode, error) { 59 | 60 | hash := tapestry.Hash(path) 61 | 62 | aguid := Aguid(hashToGuid(hash)) 63 | 64 | // Get the vguid using raft 65 | bytes, err := ocean.getTapestryData(aguid, id) 66 | 67 | inode := new(Inode) 68 | err = inode.GobDecode(bytes) 69 | if err != nil { 70 | fmt.Println(bytes) 71 | return nil, err 72 | } 73 | 74 | return inode, nil 75 | } 76 | 77 | func (d *Inode) GobEncode() ([]byte, error) { 78 | w := new(bytes.Buffer) 79 | encoder := gob.NewEncoder(w) 80 | err := encoder.Encode(d.name) 81 | if err != nil { 82 | return nil, err 83 | } 84 | err = encoder.Encode(d.filetype) 85 | if err != nil { 86 | return nil, err 87 | } 88 | err = encoder.Encode(d.size) 89 | if err != nil { 90 | return nil, err 91 | } 92 | err = encoder.Encode(d.indirect) 93 | if err != nil { 94 | return nil, err 95 | } 96 | return w.Bytes(), nil 97 | } 98 | 99 | func (d *Inode) GobDecode(buf []byte) error { 100 | r := bytes.NewBuffer(buf) 101 | decoder := gob.NewDecoder(r) 102 | err := decoder.Decode(&d.name) 103 | if err != nil { 104 | return err 105 | } 106 | err = decoder.Decode(&d.filetype) 107 | if err != nil { 108 | return err 109 | } 110 | err = decoder.Decode(&d.size) 111 | if err != nil { 112 | return err 113 | } 114 | return decoder.Decode(&d.indirect) 115 | } 116 | 117 | // Generic method. Gets data given an aguid. 118 | func (ocean *OceanNode) getTapestryData(aguid Aguid, id uint64) ([]byte, error) { 119 | tapestryNode := ocean.getRandomTapestryNode() 120 | response, err := ocean.getRaftVguid(aguid, id) 121 | if err != nil { 122 | return nil, err 123 | } 124 | 125 | ok := strings.Split(string(response), ":")[0] 126 | vguid := strings.Split(string(response), ":")[1] 127 | if ok != "SUCCESS" { 128 | return nil, fmt.Errorf("Could not get raft vguid: %v", response) 129 | } 130 | 131 | data, err := tapestry.TapestryGet(tapestryNode, string(vguid)) 132 | if err != nil { 133 | return nil, err 134 | } 135 | return data, nil 136 | } 137 | 138 | // Gets the inode that has a given aguid 139 | func (ocean *OceanNode) getInodeFromAguid(aguid Aguid, id uint64) (*Inode, error) { 140 | // Get the vguid using raft 141 | bytes, err := ocean.getTapestryData(aguid, id) 142 | 143 | inode := new(Inode) 144 | err = inode.GobDecode(bytes) 145 | if err != nil { 146 | fmt.Println(bytes) 147 | return nil, err 148 | } 149 | 150 | return inode, nil 151 | } 152 | 153 | func (ocean *OceanNode) getFileBlock(key string, blockno uint32, id uint64) ([]byte, error) { 154 | blockPath := fmt.Sprintf("%v:%v", key, blockno) 155 | hash := tapestry.Hash(blockPath) 156 | aguid := Aguid(hashToGuid(hash)) 157 | 158 | return ocean.getTapestryData(aguid, id) 159 | } 160 | 161 | // Gets the block of the inode of the specified key/path 162 | func (ocean *OceanNode) getInodeBlock(key string, id uint64) ([]byte, error) { 163 | blockPath := fmt.Sprintf("%v:%v", key, "indirect") 164 | hash := tapestry.Hash(blockPath) 165 | aguid := Aguid(hashToGuid(hash)) 166 | 167 | return ocean.getTapestryData(aguid, id) 168 | } 169 | 170 | 171 | // Stores inode as data 172 | func (ocean *OceanNode) storeInode(path string, inode *Inode, id uint64) error { 173 | 174 | hash := tapestry.Hash(path) 175 | 176 | aguid := Aguid(hashToGuid(hash)) 177 | vguid := Vguid(randSeq(tapestry.DIGITS)) 178 | 179 | // Encode the inode 180 | bytes, err := inode.GobEncode() 181 | if err != nil { 182 | return err 183 | } 184 | 185 | // Set the new aguid -> vguid pair with raft 186 | err = ocean.setRaftVguid(aguid, vguid, id) 187 | if err != nil { 188 | return err 189 | } 190 | 191 | // Store data in tapestry with key: vguid 192 | err = tapestry.TapestryStore(ocean.getRandomTapestryNode(), string(vguid), bytes) 193 | if err != nil { 194 | return err 195 | } 196 | 197 | return nil 198 | } 199 | 200 | func (ocean *OceanNode) storeIndirectBlock(inodePath string, block []byte, 201 | id uint64) error { 202 | 203 | blockPath := fmt.Sprintf("%v:%v", inodePath, "indirect") 204 | hash := tapestry.Hash(blockPath) 205 | 206 | aguid := Aguid(hashToGuid(hash)) 207 | vguid := Vguid(randSeq(tapestry.DIGITS)) 208 | 209 | // Set the new aguid -> vguid pair with raft 210 | err := ocean.setRaftVguid(aguid, vguid, id) 211 | if err != nil { 212 | return err 213 | } 214 | 215 | err = tapestry.TapestryStore(ocean.getRandomTapestryNode(), string(vguid), block) 216 | if err != nil { 217 | return fmt.Errorf("Tapestry error") 218 | } 219 | 220 | return nil 221 | } 222 | 223 | func (ocean *OceanNode) storeFileBlock(inodePath string, blockno uint32, 224 | block []byte, id uint64) error { 225 | 226 | blockPath := fmt.Sprintf("%v:%v", inodePath, blockno) 227 | hash := tapestry.Hash(blockPath) 228 | 229 | aguid := Aguid(hashToGuid(hash)) 230 | vguid := Vguid(randSeq(tapestry.DIGITS)) 231 | 232 | // Set the new aguid -> vguid pair with raft 233 | err := ocean.setRaftVguid(aguid, vguid, id) 234 | if err != nil { 235 | return err 236 | } 237 | 238 | err = tapestry.TapestryStore(ocean.getRandomTapestryNode(), string(vguid), block) 239 | if err != nil { 240 | return err 241 | } 242 | 243 | return nil 244 | } 245 | 246 | // Removes an entry from a directory block. If it not the last entry, 247 | // It moves and replaces the last entry with the removing entry. 248 | func (ocean *OceanNode) removeEntryFromBlock(bytes []byte, vguid Vguid, 249 | size uint32, id uint64) error { 250 | 251 | start, err := ocean.lookupInode(bytes, vguid, size, id) 252 | if err != nil { 253 | return err 254 | } 255 | if start == size-tapestry.DIGITS { // Last one 256 | // MakeZeros(bytes, start) 257 | } else { 258 | for i := uint32(0); i < tapestry.DIGITS; i++ { 259 | bytes[start+i] = bytes[size-tapestry.DIGITS+i] 260 | } 261 | } 262 | return nil 263 | } 264 | 265 | // Get the inode that has a specific vuid from a directory block. 266 | func (puddle *OceanNode) lookupInode(block []byte, vguid Vguid, 267 | size uint32, id uint64) (uint32, error) { 268 | length := size / tapestry.DIGITS 269 | for i := uint32(0); i < length; i++ { 270 | curAguid := ByteIntoAguid(block, i*tapestry.DIGITS) 271 | res, err := puddle.getRaftVguid(curAguid, id) 272 | curVguid := Vguid(strings.Split(string(res), ":")[1]) 273 | if err != nil { 274 | return 0, err 275 | } 276 | if curVguid == vguid { 277 | fmt.Println("Found:", curAguid, curVguid) 278 | return i, nil 279 | } 280 | } 281 | 282 | return 0, fmt.Errorf("Not found!") 283 | } 284 | 285 | func ByteIntoAguid(bytes []byte, start uint32) Aguid { 286 | aguid := "" 287 | for i := uint32(0); i < tapestry.DIGITS; i++ { 288 | aguid += strconv.FormatUint(uint64(bytes[start+i]), tapestry.BASE) 289 | } 290 | return Aguid(strings.ToUpper(aguid)) 291 | } -------------------------------------------------------------------------------- /tapestry/tapestry/id_test.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import "testing" 4 | 5 | // This test makes sure that the prefix length is working 6 | func TestSharedPrefixLength(t *testing.T) { 7 | a := ID{1,2,3,4,5,6,7,8,9,6,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 8 | b := ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 9 | count := SharedPrefixLength(a, b) 10 | if (count != 9) { 11 | t.Errorf("The SharedPrefixLength does not work") 12 | } 13 | a = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 14 | b = ID{2,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 15 | count = SharedPrefixLength(a, b) 16 | if (count != 0) { 17 | t.Errorf("The SharedPrefixLength does not work") 18 | } 19 | a = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 20 | b = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 21 | count = SharedPrefixLength(a, b) 22 | if (count != 40) { 23 | t.Errorf("The SharedPrefixLength does not work") 24 | } 25 | } 26 | 27 | 28 | //This function tests several types of ID and makes sure that the output is the expected one.*/ 29 | func TestBetterChoice(t *testing.T) { 30 | a := ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 31 | b := ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 32 | id := ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 33 | choice := id.BetterChoice(a, b) 34 | if (choice) {//choice should be false since they are the same 35 | t.Errorf("The BetterChoice does not work") 36 | } 37 | a = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 38 | b = ID{1,2,3,4,5,6,8,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 39 | id = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 40 | choice = id.BetterChoice(a, b) 41 | if (!choice) {//choice should be true for the prefix 42 | t.Errorf("The BetterChoice does not work") 43 | } 44 | a = ID{1,2,3,4,5,6,7,6,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 45 | b = ID{1,2,3,4,5,6,7,7,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 46 | id =ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 47 | choice = id.BetterChoice(a, b) 48 | if (!choice) {//choice should be true becuase we get to 6 from 8 faster than to 7 49 | t.Errorf("The BetterChoice does not work", choice, a, b) 50 | } 51 | a = ID{1,2,3,4,5,6,7,8,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 52 | b = ID{1,2,3,4,5,6,7,7,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 53 | id =ID{1,2,3,4,5,6,7,6,9,5,11,12,13,14,15,0,2,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 54 | choice = id.BetterChoice(a, b) 55 | if (choice) {//choice should be false because it is closer to get to b (7) than 8 56 | t.Errorf("The BetterChoice does not work", choice, a, b) 57 | } 58 | a = ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4, 4,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 59 | b = ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4, 5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 60 | id = ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 61 | choice = id.BetterChoice(a, b) 62 | if (!choice) {//choice should be true because it is faster to get to a (4) from 2 than to b(5) 63 | t.Errorf("The BetterChoice does not work", choice, a, b) 64 | } 65 | a = ID{13,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 66 | b = ID{7,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4,5,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 67 | id = ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 68 | choice = id.BetterChoice(a, b) 69 | if (choice) {//choice should be false at the very beginning, 7 (b)is closer to 1 than 13(a) 70 | t.Errorf("The BetterChoice does not work", choice, a, b) 71 | } 72 | } 73 | 74 | //test for the Closer function. 75 | func TestCloser(t *testing.T) { 76 | 77 | a := ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 78 | id := ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 79 | b := ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3,0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13,2,8,9,12,13,0,9,8,5} 80 | choice := id.Closer(a, b) 81 | if (choice) {//Answer should be false because they are the same ids 82 | t.Errorf("The Closer does not work", choice, a, b) 83 | } 84 | a = ID{1,2,3,4,5,6,7,6,9,5,11,12, 1,2,3,0, 4,4,3,0,2,12,15,13,15,13,2,5,10,11,13, 13,8,9,12,13,0,9,8,5} 85 | id = ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,11, 2,8,9,12,13,0,9,8,5} 86 | b = ID{1,2,3,4,5,6,7,6,9,5,11,12, 1,2,3,0, 4,4,3,0,2,12,15,13,15,13,2,5,10,11,13, 10,8,9,12,13,0,9,8,5} 87 | choice = id.Closer(a, b) 88 | if (choice) {//Answer should be false because b is closer in absolute value 89 | t.Errorf("The Closer does not work", choice, a, b) 90 | } 91 | a = ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3, 0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13, 15,8,9,12,13,0,9,8,5} 92 | id = ID{1,2,3,4,5,6,7,6,9,5,10,12,13,13,15,0,2,2,3,0,2,12,15,13,15,13,2,5,10,11,13, 13,8,9,12,13,0,9,8,5} 93 | b = ID{1,2,3,4,5,6,7,6,9,5,11,12,1,2,3, 0,4,4,3,0,2,12,15,13,15,13,2,5,10,11,13, 12,8,9,12,13,0,9,8,5} 94 | choice = id.Closer(a, b) 95 | if (choice) {//Answer should be false because b is closer in absolute value 96 | t.Errorf("The Closer does not work", choice, a, b) 97 | } 98 | //some more obvious 99 | a = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2} 100 | id = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} 101 | b = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1} 102 | choice = id.Closer(a, b) 103 | if (choice) {//Answer should be false because b is closer in absolute value 104 | t.Errorf("The Closer does not work", choice, a, b) 105 | } 106 | //This one is tricky because it goes to the other digit 107 | a = ID {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13} 108 | id = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15} 109 | b = ID {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0} 110 | choice = id.Closer(a, b) 111 | if (choice) {//Answer should be false because of the base change 112 | t.Errorf("The Closer does not work", choice, a, b) 113 | } 114 | a = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2} 115 | id = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5} 116 | b = ID{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1} 117 | choice = id.Closer(a, b) 118 | if (choice) {//Answer should be false because a has a 1 119 | t.Errorf("The Closer does not work", choice, a, b) 120 | } 121 | a = ID {1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2} 122 | id = ID{0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5} 123 | b = ID {1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1} 124 | choice = id.Closer(a, b) 125 | if (choice) {//Answer should be false (b) 126 | t.Errorf("The Closer does not work", choice, a, b) 127 | } 128 | a = ID {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2} 129 | id = ID{0,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9} 130 | b = ID {0,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,0} 131 | choice = id.Closer(a, b) 132 | if (choice) {//Answer should be b 133 | t.Errorf("The Closer does not work", choice, a, b) 134 | } 135 | } -------------------------------------------------------------------------------- /tapestry/README.md: -------------------------------------------------------------------------------- 1 | # Tapestry 2 | 3 | Tapestry is a distributed object location are retrieval (DOLR) system. Its design is motivated from Tapestry: A Resilient Global-Scale Overlay for 4 | Service Deployment paper. It is an overlay network that implements simple key based routing. 5 | 6 | # Usage Example 7 | [cli](cli.go) serves as a console for interacting with chord, creating nodes and querying state on the local nodes. It provides the following commands: 8 | table- Print this node’s routing table 9 | * backpointers Print this node’s backpointers 10 | * objects Print the object replicas stored on this node 11 | * put Stores the provided key-value pair on the local node and advertises the key to the tapestry 12 | * lookup Looks up the specified key in the tapestry and prints its location 13 | * get Looks up the specified key in the tapestry, then fetches the value from one of the returned replicas 14 | * remove Remove the value stored locally for the provided key and stops advertising the key to the tapestry 15 | * list List the keys currently being advertised by the local node 16 | * leave Instructs the local node to gracefully leave the Tapestry 17 | * kill Leaves the tapestry without graceful exit 18 | * exit Quit the CLI 19 | 20 | # Identifying Nodes and Objects 21 | Nodes and objects in the Tapestry network are each assigned a sequence of n base-16 digits globally unique identifier. 22 | 23 | # Root Nodes 24 | In order to make it possible for any node in the network to find the location of an object, a single node is appointed as the “root” node for that object. A root node is the one which shares the same hash value as the object. 25 | 26 | # Surrogate Nodes 27 | There to be fewer nodes in the network than possible values in the space of hash values so a “surrogate” node for an object is chosen to be the one with a hash value that shares as many prefix digits in the object’s hash value as possible. 28 | 29 | # Selecting the Surrogate Node 30 | Starting at the leftmost digit d, we take the set of nodes that have d as the leftmost digit of their hashes as well. If no such set of nodes exists, it is necessary to deterministically choose another set. To do this, we can try to find a set of nodes that share the digit d + 1 as their leftmost hash digit. Until a non-empty set of nodes is found, the value of the digit we are searching with increases (modulo the base of the hash-value). Once the set has been found the same logic can be applied for the next digit in the hash, choosing from the set of nodes we identified with the previous digit. When this algorithm has been applied for every digit, only one node will be left and that node is the surrogate. 31 | 32 | # Routing Tables 33 | In order to allow nodes to locate objects stored at other nodes, each node maintains a routing table that stores references to a subset of the nodes in the network. 34 | 35 | # Backpointer Tables 36 | Backpointers are references to every node in the network which refers to the local node in their own routing tables. These are useful in maintaining routing tables in a dynamic network. When the local node adds or removes a remote node from its routing table, it notifies the remote node, who will then update its backpointer table. 37 | 38 | # Prefix Routing 39 | A node that matches some number of digits from the object’s hash may be chosen from the routing table. In turn, the selected node’s routing table is inspected and the next node in the route to the surrogate is chosen. At each successive node in the route, the number of digits that match the object’s hash value increases until the last digit has been matched and the surrogate node has been reached. This type of routing is called “prefix routing”. [findRoot](tapestry/tapestry-local.go#L94) has this logic. 40 | 41 | # Publishing and Retrieving Objects 42 | When an object is “published” by a node, that node routes towards the root node for the key, then registers itself on that node as a location of the key. Multiple nodes can publish the same object. A tapestry client wishing to lookup the object will first route to the root node of the object. The root node then informs the client of which Tapestry nodes are the ones that have published the object. The client then directly contacts one or more of those publishers to retrieve the actual object data. 43 | 44 | # Adding Tapestry Nodes 45 | The new node is assigned its ID and then routes towards the root node for that id. The root node initiates the transfer of all keys that should now be stored on the new node. The new node then iteratively traverses backpointers, starting from the root node, to populate its own routing table. 46 | 47 | # Acknowledged Multicast 48 | If the new node has a shared prefix of length n with its root, then any other node that also has a shared prefix of length n is called need-to-know node. The root node performs an acknowledged multicast when it is contacted by the new node. The multicast eventually returns the full set of need-to-know nodes from the Tapestry. The multicast is a recursive call — the root node contacts all nodes on levels >= n of its routing table; those nodes contact all nodes on levels >= n + 1 of their routing tables; and so on. A node that is contacted during the multicast will initiate background transfer of relevant object references to the new node, trigger multicast to the next level of its routing table, then merge and return the resulting lists of nodes (removing duplicates). [AddNodeMulticast](tapestry/tapestry-local.go#L321) has this logic. 49 | 50 | # Backpointer Traversal 51 | Once the multicast has completed, the root node returns the list of need-to-know nodes to the new node. The new node uses this list as an initial neighbor set to populate its routing table. The node iteratively contacts the nodes, asking for their backpointers. 52 | 53 | # Graceful Exit 54 | Tapestry is extremely fault tolerant, so a node could leave without notifying any other nodes. However, a node can gracefully exit the Tapestry, too. When a node gracefully exits, it notifies all of the nodes in its backpointer table of the leave. As part of this notification, it consults its own routing table to find a suitable replacement for the other node’s routing table. 55 | 56 | # Fault Tolerance 57 | Following mechanisms ensure that there is no single point of failure in the system: 58 | 59 | * Errors While Routing 60 | When routing towards a surrogate node, it is possible that a communication failure with any of the intermediate nodes could impede the search. For this reason, routing tables store lists of nodes rather than a single node at each slot. If a failed node is encountered, the node that is searching can request that the failed node be removed from any routing tables it encounters, and resume its search at the last node it communicated with successfully. If the last node it communicated with successfully is no longer responding, it should communicate with the last successful node before that. 61 | 62 | * Loss of Root Node 63 | Published objects continually republish themselves at regular intervals. This ensures that if a surrogate node goes down, a new surrogate node will eventually take its place. 64 | 65 | * Loss of replicas 66 | Finally, applications built on top of Tapestry might wish to ensure that an object remains available at all times, even if the node that published it fails. 67 | Multiple tapestry nodes can publish the same object. This means that client applications can learn of multiple locations of the object, so if the object becomes unavailable in one of these locations, the client can simply contact another of the nodes. This ensures that an object remains available at all times, even if the node that published it fails. 68 | 69 | * Miscellaneous 70 | The cases listed above are the common issues which can arise due to network errors. There are other more obscure ways in which surrogates may become unreachable for a short time when nodes join or fail in a certain order. Tapestry’s method for dealing with this is to assume that there are enough seeded hash values for a given object that not all seeds will become unreachable due to such errors, and those which do become unreachable will be corrected when the replica performs its periodic republishing. 71 | 72 | # Future Work 73 | Reduce the number of hops in object lookups. This can be done by caching the nodes encountered along the path to the surrogate node when the location of an object is published to the its surrogate node. 74 | -------------------------------------------------------------------------------- /raft/raft/raftRPCApi.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "fmt" 5 | "net/rpc" 6 | ) 7 | 8 | /* RPC connection map cache */ 9 | var connMap = make(map[string]*rpc.Client) 10 | 11 | /* */ 12 | /* Join RPC, used when a node in the cluster is first starting up so */ 13 | /* it can notify a leader what their listening address is. */ 14 | /* */ 15 | type JoinRequest struct { 16 | RemoteNode NodeAddr 17 | FromAddr NodeAddr 18 | } 19 | 20 | type JoinReply struct { 21 | Success bool 22 | } 23 | 24 | func JoinRPC(remoteNode *NodeAddr, fromAddr *NodeAddr) error { 25 | request := JoinRequest{RemoteNode: *remoteNode, FromAddr: *fromAddr} 26 | var reply JoinReply 27 | err := makeRemoteCall(remoteNode, "JoinImpl", request, &reply) 28 | if err != nil { 29 | return err 30 | } 31 | if !reply.Success { 32 | return fmt.Errorf("Unable to join Raft cluster\n") 33 | } 34 | return err 35 | } 36 | 37 | /* */ 38 | /* StartNode RPC, once the first node in the cluster has all of the */ 39 | /* addresses for all other nodes in the cluster it can then tell them */ 40 | /* to transition into Follower state and start the Raft protocol. */ 41 | /* */ 42 | type StartNodeRequest struct { 43 | RemoteNode NodeAddr 44 | OtherNodes []NodeAddr 45 | } 46 | 47 | type StartNodeReply struct { 48 | Success bool 49 | } 50 | 51 | func StartNodeRPC(remoteNode NodeAddr, otherNodes []NodeAddr) error { 52 | request := StartNodeRequest{} 53 | request.RemoteNode = remoteNode 54 | 55 | request.OtherNodes = make([]NodeAddr, len(otherNodes)) 56 | for i, n := range otherNodes { 57 | request.OtherNodes[i].Addr = n.Addr 58 | request.OtherNodes[i].Id = n.Id 59 | } 60 | 61 | var reply StartNodeReply 62 | err := makeRemoteCall(&remoteNode, "StartNodeImpl", request, &reply) 63 | if err != nil { 64 | return err 65 | } 66 | return err 67 | } 68 | 69 | /* */ 70 | /* Raft RequestVote RPC, invoked by candidates to gather votes */ 71 | /* */ 72 | type RequestVoteRequest struct { 73 | /* The candidate's current term Id */ 74 | Term uint64 75 | 76 | /* The cadidate Id currently requesting a node to vote for it. */ 77 | CandidateId NodeAddr 78 | 79 | /* The index of the candidate's last log entry */ 80 | LastLogIndex uint64 81 | 82 | /* The term of the candidate's last log entry */ 83 | LastLogTerm uint64 84 | 85 | CurrentIndex uint64 86 | } 87 | 88 | type RequestVoteReply struct { 89 | /* The current term, for candidate to update itself */ 90 | Term uint64 91 | 92 | /* True means candidate received vote */ 93 | VoteGranted bool 94 | } 95 | 96 | func (r *RaftNode) RequestVoteRPC(remoteNode *NodeAddr, request RequestVoteRequest) (*RequestVoteReply, error) { 97 | if r.Testing.IsDenied(*r.GetLocalAddr(), *remoteNode) { 98 | return nil, ErrorTestingPolicyDenied 99 | } 100 | var reply RequestVoteReply 101 | err := makeRemoteCall(remoteNode, "RequestVoteImpl", request, &reply) 102 | if err != nil { 103 | return nil, err 104 | } 105 | return &reply, err 106 | } 107 | 108 | /* */ 109 | /* Raft AppendEntries RPC, invoked by leader to replicate log entries; */ 110 | /* also used as a heartbeat between leaders and followers. */ 111 | /* */ 112 | type AppendEntriesRequest struct { 113 | /* The leader's term */ 114 | Term uint64 115 | 116 | /* The ID of the leader, so that followers can redirect clients */ 117 | LeaderId NodeAddr 118 | 119 | /* The index of the log entry immediately preceding new ones */ 120 | PrevLogIndex uint64 121 | 122 | /* The term of the prevLogIndex entry */ 123 | PrevLogTerm uint64 124 | 125 | /* The log entries the follower needs to store (empty for */ 126 | /* heartbeat; may send more than one for efficiency) */ 127 | Entries []LogEntry 128 | 129 | /* The leader's commitIndex */ 130 | LeaderCommit uint64 131 | } 132 | 133 | type AppendEntriesReply struct { 134 | /* The current term, for leader to update itself */ 135 | Term uint64 136 | 137 | /* True if follower contained entry matching prevLogIndex and prevLogTerm*/ 138 | Success bool 139 | } 140 | 141 | func (r *RaftNode) AppendEntriesRPC(remoteNode *NodeAddr, request AppendEntriesRequest) (*AppendEntriesReply, error) { 142 | if r.Testing.IsDenied(*r.GetLocalAddr(), *remoteNode) { 143 | return nil, ErrorTestingPolicyDenied 144 | } 145 | var reply AppendEntriesReply 146 | err := makeRemoteCall(remoteNode, "AppendEntriesImpl", request, &reply) 147 | if err != nil { 148 | return nil, err 149 | } 150 | return &reply, err 151 | } 152 | 153 | /* Node's can be in three possible states */ 154 | type ClientStatus int 155 | 156 | const ( 157 | OK ClientStatus = iota 158 | NOT_LEADER 159 | ELECTION_IN_PROGRESS 160 | REQ_FAILED 161 | ) 162 | 163 | type FsmCommand int 164 | 165 | const ( 166 | HASH_CHAIN_ADD FsmCommand = iota 167 | HASH_CHAIN_INIT 168 | CLIENT_REGISTRATION 169 | INIT 170 | NOOP 171 | //Adding commands to interact with filesystem 172 | REMOVE //For deleting 173 | //Commands that modify the map that raft is in charge of 174 | GET //for querying 175 | SET //for modifying files 176 | LOCK 177 | UNLOCK 178 | ) 179 | 180 | type ClientRequest struct { 181 | /* The unique client ID associated with this client session (received */ 182 | /* via a previous RegisterClient call). */ 183 | ClientId uint64 184 | 185 | /* A sequence number is associated to request to avoid duplicates */ 186 | SequenceNum uint64 187 | 188 | /* Command to be executed on the state machine; it may affect state */ 189 | Command FsmCommand 190 | 191 | /* Data to accompany the command to the state machine; it may affect state */ 192 | Data []byte 193 | } 194 | 195 | type ClientReply struct { 196 | /* OK if state machine successfully applied command */ 197 | Status ClientStatus 198 | 199 | /* State machine output, if successful */ 200 | Response string 201 | 202 | /* In cases where the client contacted a non-leader, the node should */ 203 | /* reply with the correct current leader. */ 204 | LeaderHint NodeAddr 205 | } 206 | 207 | func ClientRequestRPC(remoteNode *NodeAddr, request ClientRequest) (*ClientReply, error) { 208 | var reply ClientReply 209 | err := makeRemoteCall(remoteNode, "ClientRequestImpl", request, &reply) 210 | if err != nil { 211 | return nil, err 212 | } 213 | return &reply, err 214 | } 215 | 216 | type RegisterClientRequest struct { 217 | /* The client address invoking request */ 218 | FromNode NodeAddr 219 | } 220 | 221 | type RegisterClientReply struct { 222 | /* OK if state machine registered client */ 223 | Status ClientStatus 224 | 225 | /* Unique ID for client session */ 226 | ClientId uint64 227 | 228 | /* In cases where the client contacted a non-leader, the node should */ 229 | /* reply with the correct current leader. */ 230 | LeaderHint NodeAddr 231 | } 232 | 233 | func RegisterClientRPC(remoteNode *NodeAddr, request RegisterClientRequest) (*RegisterClientReply, error) { 234 | var reply RegisterClientReply 235 | err := makeRemoteCall(remoteNode, "RegisterClientImpl", request, &reply) 236 | if err != nil { 237 | return nil, err 238 | } 239 | return &reply, err 240 | } 241 | 242 | /* Helper function to make a call to a remote node */ 243 | func makeRemoteCall(remoteNode *NodeAddr, method string, req interface{}, rsp interface{}) error { 244 | // Dial the server if we don't already have a connection to it 245 | remoteNodeAddrStr := remoteNode.Addr 246 | var err error 247 | client, ok := connMap[remoteNodeAddrStr] 248 | if !ok { 249 | client, err = rpc.Dial("tcp", remoteNode.Addr) 250 | if err != nil { 251 | return err 252 | } 253 | connMap[remoteNodeAddrStr] = client 254 | } 255 | 256 | // Make the request 257 | uniqueMethodName := fmt.Sprintf("%v.%v", remoteNodeAddrStr, method) 258 | // fmt.Println(uniqueMethodName) 259 | err = client.Call(uniqueMethodName, req, rsp) 260 | if err != nil { 261 | client.Close() 262 | delete(connMap, remoteNodeAddrStr) 263 | return err 264 | } 265 | 266 | return nil 267 | } 268 | -------------------------------------------------------------------------------- /tapestry/tapestry/tapestry_test.go: -------------------------------------------------------------------------------- 1 | package tapestry 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | /* 10 | PART 1) This test inserts several nodes and objects and 11 | PART 2) then inserts others to make sure that the root changes to the new nodes. 12 | 13 | PART 3) At the end it also makes sure that there is no replica in the previous node after the timeout. 14 | */ 15 | func TestChangeRoot(t *testing.T) { 16 | if DIGITS != 4 { 17 | t.Errorf("Test wont work unless DIGITS is set to 4.") 18 | return 19 | } 20 | if TIMEOUT > 3*time.Second && REPUBLISH > 2*time.Second { 21 | t.Errorf("Test will take too long unless TIMEOUT is set to 3 and REPUBLISH is set to 2.") 22 | return 23 | } 24 | //PART 1) 25 | port = 58000 26 | id := ID{5, 8, 3, 15} 27 | node0 := makeTapestry(id, "", t) 28 | id = ID{7, 0, 0xd, 1} 29 | node1 := makeTapestry(id, node0.local.node.Address, t) 30 | id = ID{9, 0, 0xf, 5} 31 | node2 := makeTapestry(id, node0.local.node.Address, t) 32 | id = ID{0xb, 0, 0xf, 0xa} 33 | node3 := makeTapestry(id, node0.local.node.Address, t) 34 | 35 | node0.Store("spoon", []byte("cuchara")) 36 | node1.Store("table", []byte("mesa")) 37 | node2.Store("chair", []byte("silla")) 38 | node3.Store("fork", []byte("tenedor")) 39 | 40 | //The root for the node is 41 | root, _ := node3.local.findRoot(node3.local.node, Hash("fork")) 42 | if !equal_ids(root.Id, node0.local.node.Id) { 43 | t.Errorf("The root for the fork is not node0, its %v\n", root.Id) 44 | } 45 | //PART 2) Now we insert a new node 46 | id = ID{0x5, 2, 0xa, 0xa} 47 | node4 := makeTapestry(id, node2.local.node.Address, t) 48 | node4.Store("napkin", []byte("servilleta")) 49 | 50 | //We wait the timeout 51 | time.Sleep(TIMEOUT + 1) 52 | 53 | //The root for fork should have changed to node4 54 | fmt.Printf("hash for fork: %v\n", Hash("fork")) 55 | fmt.Printf("hash for spoon: %v\n", Hash("spoon")) 56 | fmt.Printf("hash for table: %v\n", Hash("table")) 57 | fmt.Printf("hash for chair: %v\n", Hash("chair")) 58 | root2, _ := node2.local.findRoot(node2.local.node, Hash("fork")) 59 | if !equal_ids(root2.Id, node4.local.node.Id) { 60 | t.Errorf("The root for the fork is not node4, its %v\n", root2.Id) 61 | } 62 | //PART 3) We now make sure that the replica is no longer in the previous node 63 | replica := node0.local.store.Get("fork") 64 | if len(replica) != 0 { 65 | t.Errorf("This node still has a replica for another node %v", replica) 66 | } 67 | 68 | node1.Leave() 69 | node2.Leave() 70 | node3.Leave() 71 | node4.Leave() 72 | node0.Leave() 73 | } 74 | 75 | /* 76 | This test is the same as the previous but it does not have a timeout. It tests the transfer of keys during the 77 | AddNodeMulticast where keys are transfered to new joining node.*/ 78 | func TestTransferKeys(t *testing.T) { 79 | if DIGITS != 4 { 80 | t.Errorf("Test wont work unless DIGITS is set to 4.") 81 | return 82 | } 83 | if TIMEOUT > 3*time.Second && REPUBLISH > 2*time.Second { 84 | t.Errorf("Test will take too long unless TIMEOUT is set to 3 and REPUBLISH is set to 2.") 85 | return 86 | } 87 | port = 58000 88 | id := ID{5, 8, 3, 15} 89 | node0 := makeTapestry(id, "", t) 90 | id = ID{7, 0, 0xd, 1} 91 | node1 := makeTapestry(id, node0.local.node.Address, t) 92 | id = ID{9, 0, 0xf, 5} 93 | node2 := makeTapestry(id, node0.local.node.Address, t) 94 | id = ID{0xb, 0, 0xf, 0xa} 95 | node3 := makeTapestry(id, node0.local.node.Address, t) 96 | 97 | node0.Store("spoon", []byte("cuchara")) 98 | node1.Store("table", []byte("mesa")) 99 | node2.Store("chair", []byte("silla")) 100 | node3.Store("fork", []byte("tenedor")) 101 | 102 | //The root for the node is 103 | root, _ := node3.local.findRoot(node3.local.node, Hash("fork")) 104 | if !equal_ids(root.Id, node0.local.node.Id) { 105 | t.Errorf("The root for the fork is not node0, its %v\n", root.Id) 106 | } 107 | //Now we insert a new node 108 | id = ID{0x5, 2, 0xa, 0xa} 109 | node4 := makeTapestry(id, node2.local.node.Address, t) 110 | node4.Store("napkin", []byte("servilleta")) 111 | 112 | // //The root for spoon should have changed to node4 113 | fmt.Printf("hash for fork: %v\n", Hash("fork")) 114 | fmt.Printf("hash for spoon: %v\n", Hash("spoon")) 115 | fmt.Printf("hash for table: %v\n", Hash("table")) 116 | fmt.Printf("hash for chair: %v\n", Hash("chair")) 117 | root2, _ := node2.local.findRoot(node2.local.node, Hash("fork")) 118 | if !equal_ids(root2.Id, node4.local.node.Id) { 119 | t.Errorf("The root for the fork is not node4, its %v\n", root2.Id) 120 | } 121 | //We now make sure that the replica is no longer in the previous node 122 | replica := node0.local.store.Get("fork") 123 | if len(replica) != 0 { 124 | t.Errorf("This node still has a replica for another node %v", replica) 125 | } 126 | 127 | node1.Leave() 128 | node2.Leave() 129 | node3.Leave() 130 | node4.Leave() 131 | node0.Leave() 132 | } 133 | 134 | /* 135 | PART 1) This test first adds several nodes, and adds several objects to the nodes. 136 | It then checks for the existance of objects from several nodes. 137 | 138 | PART 2)Then it deletes one node and makes sure that the object it had 139 | "spoon" is no longer available 140 | 141 | PART 3)Then a new node with the "spoon" object joins and makes sure that it is available through another node. 142 | */ 143 | func TestPublishAndRegister(t *testing.T) { 144 | if DIGITS != 4 { 145 | t.Errorf("Test wont work unless DIGITS is set to 4.") 146 | return 147 | } 148 | if TIMEOUT > 3*time.Second && REPUBLISH > 2*time.Second { 149 | t.Errorf("Test will take too long unless TIMEOUT is set to 3 and REPUBLISH is set to 2.") 150 | return 151 | } 152 | //PART 1 153 | port = 58000 154 | id := ID{5, 8, 3, 15} 155 | node0 := makeTapestry(id, "", t) 156 | id = ID{7, 0, 0xd, 1} 157 | node1 := makeTapestry(id, node0.local.node.Address, t) 158 | id = ID{9, 0, 0xf, 5} 159 | node2 := makeTapestry(id, node0.local.node.Address, t) 160 | id = ID{0xb, 0, 0xf, 0xa} 161 | node3 := makeTapestry(id, node0.local.node.Address, t) 162 | 163 | node0.Store("spoon", []byte("cuchara")) 164 | node1.Store("table", []byte("mesa")) 165 | node2.Store("chair", []byte("silla")) 166 | node3.Store("fork", []byte("tenedor")) 167 | 168 | time.Sleep(time.Second * 5) 169 | 170 | // Objects should persist after TIMEOUT seconds because 171 | // publish is called every two seconds. 172 | result, err := node1.Get("spoon") 173 | CheckGet(err, result, "cuchara", t) 174 | result, err = node2.Get("table") 175 | CheckGet(err, result, "mesa", t) 176 | result, err = node3.Get("chair") 177 | CheckGet(err, result, "silla", t) 178 | result, err = node0.Get("fork") 179 | CheckGet(err, result, "tenedor", t) 180 | 181 | // PART 2) Root node of Hash(spoon) should no longer have a record 182 | // of this object after node0 leaves after TIMEOUT seconds. 183 | root := FindRootOfHash([]*Tapestry{node1, node2, node3}, Hash("chair")) 184 | fmt.Printf("The root is: %v and the node0 id is: %v", root, node0.local.node.Id) 185 | node0.Leave() 186 | //fmt.Printf("The root is: %v and the node0 id is: %v", root.local.node.Id, node0.local.node.Id) 187 | if root == nil { 188 | t.Errorf("Could not find Root of Hash") 189 | } else { 190 | replicas := root.local.store.Get("spoon") 191 | if len(replicas) == 0 && len(replicas) > 1 { 192 | t.Errorf("Replica of 'spoon' not in root node. What?") 193 | } else { 194 | time.Sleep(time.Second * 5) 195 | replicas = root.local.store.Get("spoon") 196 | if len(replicas) != 0 { 197 | t.Errorf("Replica of 'spoon' is in root node after node containing it left.") 198 | } 199 | } 200 | } 201 | //PART 3) We add a new node that contains spoon and we should find it. 202 | id = ID{0x5, 2, 0xa, 0xa} 203 | node4 := makeTapestry(id, node2.local.node.Address, t) 204 | node4.Store("spoon", []byte("cuchara")) 205 | time.Sleep(time.Second * 5) 206 | replicas, _ := node1.local.tapestry.Get("spoon") 207 | fmt.Printf("id of root is: %v\n", root.local.node.Id) 208 | if len(replicas) == 0 { 209 | t.Errorf("'spoon' is not there even after a new node containing it joined") 210 | } 211 | 212 | node1.Leave() 213 | node2.Leave() 214 | node3.Leave() 215 | node4.Leave() 216 | } 217 | 218 | /*Helper function to compare a result with an expected string.*/ 219 | func CheckGet(err error, result []byte, expected string, t *testing.T) { 220 | if err != nil { 221 | t.Errorf("Get errored out. returned: %v", err) 222 | return 223 | } 224 | 225 | if string(result) != expected { 226 | t.Errorf("Get(\"%v\") did not return expected result '%v'", 227 | string(result), expected) 228 | } 229 | } 230 | /*Helper function that returns the root of an ID from a slice of nodes*/ 231 | func FindRootOfHash(nodes []*Tapestry, hash ID) *Tapestry { 232 | if len(nodes) == 0 { 233 | return nil 234 | } 235 | root, _ := nodes[0].local.findRoot(nodes[0].local.node, hash) 236 | 237 | for _, node := range nodes { 238 | if equal_ids(node.local.node.Id, root.Id) { 239 | return node 240 | } 241 | } 242 | 243 | return nil 244 | } -------------------------------------------------------------------------------- /raft/raft/persistenceAPI.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | type NodeStableState struct { 10 | /* Latest term the server has seen (initialized */ 11 | /* to 0 on first boot, increases monotonically) */ 12 | CurrentTerm uint64 13 | 14 | /* The candidate Id that received our vote in */ 15 | /* the current term (or "" if none). */ 16 | VotedFor string 17 | 18 | /* Our local listening address and Id */ 19 | LocalAddr NodeAddr 20 | 21 | /* The addresses of everyone in our cluster */ 22 | OtherNodes []NodeAddr 23 | 24 | /* Client request cache, maps a client request */ 25 | /* to the response that was sent to them. */ 26 | ClientRequestSequences map[string]ClientReply 27 | } 28 | 29 | type LogEntry struct { 30 | /* Index of log entry (first index = 1) */ 31 | Index uint64 32 | 33 | /* The term that this entry was in when added */ 34 | TermId uint64 35 | 36 | /* Command associated with this log entry in */ 37 | /* the user's finite-state-machine. */ 38 | Command FsmCommand 39 | 40 | /* Data associated with this log entry in the */ 41 | /* user's finite-state-machine. */ 42 | Data []byte 43 | 44 | /* After processing this log entry, what ID to */ 45 | /* use when caching the response. Use an empty */ 46 | /* string to not cache at all */ 47 | CacheId string 48 | } 49 | 50 | type FileData struct { 51 | /* Active file descriptor of to file */ 52 | fd *os.File 53 | 54 | /* Size of file after reading it in and after writes */ 55 | size int64 56 | 57 | /* Filename of file */ 58 | filename string 59 | 60 | /* Map from LogEntry index to size of file before that index starts */ 61 | idxMap map[uint64]int64 62 | 63 | /* Is the fd open or not? */ 64 | open bool 65 | } 66 | 67 | func (r *RaftNode) initStableStore() (bool, error) { 68 | freshNode := false 69 | // Create log path directory if it doesn't already exist 70 | err := os.Mkdir(r.conf.LogPath, 0777) 71 | if err == nil { 72 | Out.Printf("Created log directory: %v\n", r.conf.LogPath) 73 | } 74 | if err != nil && !os.IsExist(err) { 75 | Error.Printf("error creating dir %v\n", err) 76 | return freshNode, err 77 | } 78 | 79 | r.logFileDescriptor = FileData{ 80 | fd: nil, 81 | size: 0, 82 | filename: fmt.Sprintf("%v/%d_raftlog.dat", r.conf.LogPath, r.listenPort), 83 | } 84 | r.metaFileDescriptor = FileData{ 85 | fd: nil, 86 | size: 0, 87 | filename: fmt.Sprintf("%v/%d_raftmeta.dat", r.conf.LogPath, r.listenPort), 88 | } 89 | raftLogSize, raftLogExists := getFileInfo(r.logFileDescriptor.filename) 90 | r.logFileDescriptor.size = raftLogSize 91 | 92 | raftMetaSize, raftMetaExists := getFileInfo(r.metaFileDescriptor.filename) 93 | r.metaFileDescriptor.size = raftMetaSize 94 | 95 | // Previous state exists, re-populate everything 96 | if raftLogExists && raftMetaExists { 97 | fmt.Printf("Reloading previous raftlog (%v) and raftmeta (%v)\n", 98 | r.logFileDescriptor.filename, r.metaFileDescriptor.filename) 99 | // Read in previous log and populate index mappings 100 | entries, err := ReadRaftLog(&r.logFileDescriptor) 101 | if err != nil { 102 | Error.Printf("Error reading in raft log: %v\n", err) 103 | return freshNode, err 104 | } 105 | r.logCache = entries 106 | 107 | // Create append-only file descriptor for later writing out of log entries. 108 | err = openRaftLogForWrite(&r.logFileDescriptor) 109 | if err != nil { 110 | Error.Printf("Error opening raftlog for write: %v\n", err) 111 | return freshNode, err 112 | } 113 | 114 | // Read in previous metalog and set cache 115 | ss, err := ReadStableState(&r.metaFileDescriptor) 116 | if err != nil { 117 | Error.Printf("Error reading stable state: %v\n", err) 118 | return freshNode, err 119 | } 120 | r.stableState = *ss 121 | 122 | } else if (!raftLogExists && raftMetaExists) || (raftLogExists && !raftMetaExists) { 123 | Error.Println("Both raftlog and raftmeta files must exist to proceed!") 124 | err = errors.New("Both raftlog and raftmeta files must exist to start this node") 125 | return freshNode, err 126 | 127 | } else { 128 | // We now assume neither file exists, so let's create new ones 129 | freshNode = true 130 | Out.Printf("Creating new raftlog and raftmeta files") 131 | err := CreateRaftLog(&r.logFileDescriptor) 132 | if err != nil { 133 | Error.Printf("Error creating new raftlog: %v\n", err) 134 | return freshNode, err 135 | } 136 | err = CreateStableState(&r.metaFileDescriptor) 137 | if err != nil { 138 | Error.Printf("Error creating new stable state: %v\n", err) 139 | return freshNode, err 140 | } 141 | 142 | // Init other nodes to zero, this will become populated 143 | r.stableState.OtherNodes = make([]NodeAddr, 0) 144 | 145 | // Init client request cache 146 | r.stableState.ClientRequestSequences = make(map[string]ClientReply) 147 | 148 | // No previous log cache exists, so a fresh one must be created. 149 | r.logCache = make([]LogEntry, 0) 150 | 151 | // If the log is empty we need to bootstrap it by adding the first committed entry. 152 | initEntry := LogEntry{ 153 | Index: 0, 154 | TermId: r.GetCurrentTerm(), 155 | Command: INIT, 156 | Data: []byte{0}, 157 | } 158 | r.appendLogEntry(initEntry) 159 | r.setCurrentTerm(0) 160 | } 161 | 162 | return freshNode, nil 163 | } 164 | 165 | /* Raft metadata setters/getters */ 166 | func (r *RaftNode) setCurrentTerm(newTerm uint64) { 167 | r.ssMutex.Lock() 168 | defer r.ssMutex.Unlock() 169 | if newTerm != r.stableState.CurrentTerm { 170 | Out.Printf("(%v) Setting current term from %v -> %v", r.Id, r.stableState.CurrentTerm, newTerm) 171 | } 172 | r.stableState.CurrentTerm = newTerm 173 | err := WriteStableState(&r.metaFileDescriptor, r.stableState) 174 | if err != nil { 175 | Error.Printf("Unable to flush new term to disk: %v\n", err) 176 | panic(err) 177 | } 178 | } 179 | 180 | func (r *RaftNode) GetCurrentTerm() uint64 { 181 | return r.stableState.CurrentTerm 182 | } 183 | 184 | func (r *RaftNode) setVotedFor(candidateId string) { 185 | r.ssMutex.Lock() 186 | defer r.ssMutex.Unlock() 187 | r.stableState.VotedFor = candidateId 188 | err := WriteStableState(&r.metaFileDescriptor, r.stableState) 189 | if err != nil { 190 | Error.Printf("Unable to flush new votedFor to disk: %v\n", err) 191 | panic(err) 192 | } 193 | } 194 | 195 | func (r *RaftNode) GetVotedFor() string { 196 | return r.stableState.VotedFor 197 | } 198 | 199 | func (r *RaftNode) setLocalAddr(localAddr *NodeAddr) { 200 | r.ssMutex.Lock() 201 | defer r.ssMutex.Unlock() 202 | r.stableState.LocalAddr = *localAddr 203 | err := WriteStableState(&r.metaFileDescriptor, r.stableState) 204 | if err != nil { 205 | Error.Printf("Unable to flush new localaddr to disk: %v\n", err) 206 | panic(err) 207 | } 208 | } 209 | 210 | func (r *RaftNode) GetLocalAddr() *NodeAddr { 211 | return &r.stableState.LocalAddr 212 | } 213 | 214 | func (r *RaftNode) GetOtherNodes() []NodeAddr { 215 | return r.stableState.OtherNodes 216 | } 217 | 218 | func (r *RaftNode) SetOtherNodes(nodes []NodeAddr) { 219 | r.ssMutex.Lock() 220 | defer r.ssMutex.Unlock() 221 | r.stableState.OtherNodes = nodes 222 | err := WriteStableState(&r.metaFileDescriptor, r.stableState) 223 | if err != nil { 224 | Error.Printf("Unable to flush new other nodes to disk: %v\n", err) 225 | panic(err) 226 | } 227 | } 228 | 229 | func (r *RaftNode) AppendOtherNodes(other NodeAddr) { 230 | r.ssMutex.Lock() 231 | defer r.ssMutex.Unlock() 232 | r.stableState.OtherNodes = append(r.stableState.OtherNodes, other) 233 | err := WriteStableState(&r.metaFileDescriptor, r.stableState) 234 | if err != nil { 235 | Error.Printf("Unable to flush new other nodes to disk: %v\n", err) 236 | panic(err) 237 | } 238 | } 239 | 240 | func (r *RaftNode) CheckRequestCache(clientReq ClientRequest) (*ClientReply, bool) { 241 | uniqueId := fmt.Sprintf("%v-%v", clientReq.ClientId, clientReq.SequenceNum) 242 | val, ok := r.stableState.ClientRequestSequences[uniqueId] 243 | if ok { 244 | return &val, ok 245 | } else { 246 | return nil, ok 247 | } 248 | } 249 | 250 | func (r *RaftNode) AddRequest(uniqueId string, reply ClientReply) error { 251 | r.ssMutex.Lock() 252 | defer r.ssMutex.Unlock() 253 | _, ok := r.stableState.ClientRequestSequences[uniqueId] 254 | if ok { 255 | return errors.New("Request with the same clientId and seqNum already exists!") 256 | } 257 | r.stableState.ClientRequestSequences[uniqueId] = reply 258 | 259 | err := WriteStableState(&r.metaFileDescriptor, r.stableState) 260 | if err != nil { 261 | Error.Printf("Unable to flush new client request to disk: %v\n", err) 262 | panic(err) 263 | } 264 | 265 | return nil 266 | } 267 | 268 | /* Raft log setters/getters */ 269 | func (r *RaftNode) getLogEntry(index uint64) *LogEntry { 270 | if index < uint64(len(r.logCache)) { 271 | return &r.logCache[index] 272 | } else { 273 | return nil 274 | } 275 | } 276 | 277 | func (r *RaftNode) getLastLogEntry() *LogEntry { 278 | return r.getLogEntry(r.getLastLogIndex()) 279 | } 280 | 281 | func (r *RaftNode) getLogEntries(start, end uint64) []LogEntry { 282 | if start < uint64(len(r.logCache)) { 283 | if end > uint64(len(r.logCache)) { 284 | end = uint64(len(r.logCache)) 285 | } else { 286 | end++ 287 | } 288 | return r.logCache[start:end] 289 | } else { 290 | return make([]LogEntry, 0) 291 | } 292 | } 293 | 294 | func (r *RaftNode) getLastLogIndex() uint64 { 295 | return uint64(len(r.logCache) - 1) 296 | } 297 | 298 | func (r *RaftNode) getLastLogTerm() uint64 { 299 | return r.getLogEntry(r.getLastLogIndex()).TermId 300 | } 301 | 302 | func (r *RaftNode) getLogTerm(index uint64) uint64 { 303 | return r.getLogEntry(index).TermId 304 | } 305 | 306 | func (r *RaftNode) appendLogEntry(entry LogEntry) error { 307 | // write entry to disk 308 | err := AppendLogEntry(&r.logFileDescriptor, &entry) 309 | if err != nil { 310 | return err 311 | } 312 | // update entry in cache 313 | r.logCache = append(r.logCache, entry) 314 | return nil 315 | } 316 | 317 | // Truncate file to remove everything at index and after it (an inclusive truncation!) 318 | func (r *RaftNode) truncateLog(index uint64) error { 319 | err := TruncateLog(&r.logFileDescriptor, index) 320 | if err != nil { 321 | return err 322 | } 323 | 324 | // Truncate cache as well 325 | r.logCache = r.logCache[:index] 326 | return nil 327 | } 328 | 329 | func CreateFileData(filename string) FileData { 330 | fileData := FileData{} 331 | fileData.filename = filename 332 | return fileData 333 | } 334 | 335 | func (r *RaftNode) RemoveLogs() error { 336 | r.logFileDescriptor.fd.Close() 337 | r.logFileDescriptor.open = false 338 | err := os.Remove(r.logFileDescriptor.filename) 339 | if err != nil { 340 | r.Error("Unable to remove raftlog file") 341 | return err 342 | } 343 | 344 | r.metaFileDescriptor.fd.Close() 345 | r.metaFileDescriptor.open = false 346 | err = os.Remove(r.metaFileDescriptor.filename) 347 | if err != nil { 348 | r.Error("Unable to remove raftmeta file") 349 | return err 350 | } 351 | 352 | return nil 353 | } -------------------------------------------------------------------------------- /raft/raft/persistenceImpl.go: -------------------------------------------------------------------------------- 1 | package raft 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "os" 10 | ) 11 | 12 | /* */ 13 | /* Main functions to assist with interacting with log entries, etc. */ 14 | /* */ 15 | 16 | func openRaftLogForWrite(fileData *FileData) error { 17 | if fileExists(fileData.filename) { 18 | fd, err := os.OpenFile(fileData.filename, os.O_APPEND|os.O_WRONLY, 0600) 19 | fileData.fd = fd 20 | fileData.open = true 21 | return err 22 | } else { 23 | return errors.New("Raftfile does not exist") 24 | } 25 | } 26 | 27 | func CreateRaftLog(fileData *FileData) error { 28 | fd, err := os.OpenFile(fileData.filename, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600) 29 | fileData.fd = fd 30 | fileData.size = int64(0) 31 | fileData.idxMap = make(map[uint64]int64) 32 | fileData.open = true 33 | return err 34 | } 35 | 36 | func ReadRaftLog(fileData *FileData) ([]LogEntry, error) { 37 | f, err := os.Open(fileData.filename) 38 | defer f.Close() 39 | fileData.idxMap = make(map[uint64]int64) 40 | 41 | entries := make([]LogEntry, 0) 42 | 43 | fileLocation := int64(0) 44 | for err != io.EOF { 45 | size, err := readStructSize(f) 46 | if err != nil { 47 | if err == io.EOF { 48 | break 49 | } 50 | Error.Printf("Error reading struct size: %v at loc: %v\n", err, fileLocation) 51 | fileData.open = false 52 | return entries, err 53 | } 54 | 55 | entry, err := readLogEntry(f, size) 56 | if err != nil { 57 | Error.Printf("Error reading log entry: %v at loc: %v\n", err, fileLocation) 58 | fileData.open = false 59 | return entries, err 60 | } 61 | fileData.idxMap[entry.Index] = fileLocation 62 | fileLocation += INT_GOB_SIZE + int64(size) 63 | entries = append(entries, *entry) 64 | } 65 | 66 | fileData.open = false 67 | return entries, nil 68 | } 69 | 70 | func AppendLogEntry(fileData *FileData, entry *LogEntry) error { 71 | sizeIdx := fileData.size 72 | 73 | logBytes, err := getLogEntryBytes(entry) 74 | if err != nil { 75 | return err 76 | } 77 | size, err := getSizeBytes(len(logBytes)) 78 | if err != nil { 79 | return err 80 | } 81 | 82 | numBytesWritten, err := fileData.fd.Write(size) 83 | if err != nil { 84 | return err 85 | } 86 | if int64(numBytesWritten) != INT_GOB_SIZE { 87 | panic("int gob size is not correct, cannot proceed") 88 | } 89 | fileData.size += int64(numBytesWritten) 90 | 91 | err = fileData.fd.Sync() 92 | if err != nil { 93 | return err 94 | } 95 | 96 | numBytesWritten, err = fileData.fd.Write(logBytes) 97 | if err != nil { 98 | return err 99 | } 100 | if numBytesWritten != len(logBytes) { 101 | panic("did not write correct amount of bytes for some reason for log entry") 102 | } 103 | fileData.size += int64(numBytesWritten) 104 | 105 | err = fileData.fd.Sync() 106 | if err != nil { 107 | return err 108 | } 109 | 110 | // Update index mapping for this entry 111 | fileData.idxMap[entry.Index] = int64(sizeIdx) 112 | 113 | return nil 114 | } 115 | 116 | func TruncateLog(raftLogFd *FileData, index uint64) error { 117 | newFileSize, exist := raftLogFd.idxMap[index] 118 | if !exist { 119 | return fmt.Errorf("Truncation failed, log index %v doesn't exist\n", index) 120 | } 121 | 122 | // Windows does not allow truncation of open file, must close first 123 | raftLogFd.fd.Close() 124 | err := os.Truncate(raftLogFd.filename, newFileSize) 125 | if err != nil { 126 | return err 127 | } 128 | fd, err := os.OpenFile(raftLogFd.filename, os.O_APPEND|os.O_WRONLY, 0600) 129 | raftLogFd.fd = fd 130 | 131 | for i := index; i < uint64(len(raftLogFd.idxMap)); i++ { 132 | delete(raftLogFd.idxMap, i) 133 | } 134 | raftLogFd.size = newFileSize 135 | return nil 136 | } 137 | 138 | /* */ 139 | /* Main functions to assist with interacting with stable state entries, etc. */ 140 | /* */ 141 | func openStableStateForWrite(fileData *FileData) error { 142 | if fileExists(fileData.filename) { 143 | fd, err := os.OpenFile(fileData.filename, os.O_APPEND|os.O_WRONLY, 0600) 144 | fileData.fd = fd 145 | fileData.open = true 146 | return err 147 | } else { 148 | return errors.New("Stable state file does not exist") 149 | } 150 | } 151 | 152 | func CreateStableState(fileData *FileData) error { 153 | fd, err := os.OpenFile(fileData.filename, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600) 154 | fileData.fd = fd 155 | fileData.open = true 156 | return err 157 | } 158 | 159 | func ReadStableState(fileData *FileData) (*NodeStableState, error) { 160 | f, err := os.Open(fileData.filename) 161 | 162 | stat, err := f.Stat() 163 | if err != nil { 164 | f.Close() 165 | return nil, err 166 | } 167 | 168 | ss, err := readStableStateEntry(f, int(stat.Size())) 169 | f.Close() 170 | 171 | if err != nil { 172 | // for some reason we failed to read our stable state file, try backup file. 173 | backupFilename := fmt.Sprintf("%v.bak", fileData.filename) 174 | fbak, err := os.Open(backupFilename) 175 | 176 | stat, err := f.Stat() 177 | if err != nil { 178 | fbak.Close() 179 | return nil, err 180 | } 181 | 182 | ss, err := readStableStateEntry(f, int(stat.Size())) 183 | if err != nil { 184 | Error.Printf("we were unable to read stable storage or its backup: %v\n", err) 185 | fbak.Close() 186 | return nil, err 187 | } 188 | fbak.Close() 189 | 190 | // we were successful reading from backup, move to live copy 191 | err = os.Remove(fileData.filename) 192 | if err != nil { 193 | return nil, err 194 | } 195 | err = copyFile(backupFilename, fileData.filename) 196 | if err != nil { 197 | return nil, err 198 | } 199 | 200 | return ss, nil 201 | } 202 | 203 | return ss, nil 204 | } 205 | 206 | func WriteStableState(fileData *FileData, ss NodeStableState) error { 207 | // backup old stable state 208 | backupFilename := fmt.Sprintf("%v.bak", fileData.filename) 209 | err := backupStableState(fileData, backupFilename) 210 | if err != nil { 211 | return fmt.Errorf("Backup failed: %v", err) 212 | } 213 | 214 | // Windows does not allow truncation of open file, must close first 215 | fileData.fd.Close() 216 | 217 | // truncate live stable state 218 | err = os.Truncate(fileData.filename, 0) 219 | if err != nil { 220 | return fmt.Errorf("Truncation failed: %v", err) 221 | } 222 | fd, err := os.OpenFile(fileData.filename, os.O_APPEND|os.O_WRONLY, 0600) 223 | fileData.fd = fd 224 | 225 | // write out stable state to live version 226 | bytes, err := getStableStateBytes(ss) 227 | if err != nil { 228 | return err 229 | } 230 | 231 | numBytes, err := fileData.fd.Write(bytes) 232 | if numBytes != len(bytes) { 233 | panic("did not write correct amount of bytes for some reason for ss") 234 | } 235 | 236 | err = fileData.fd.Sync() 237 | if err != nil { 238 | return fmt.Errorf("Sync #2 failed: %v", err) 239 | } 240 | 241 | // remove backup file 242 | err = os.Remove(backupFilename) 243 | if err != nil && !os.IsNotExist(err) { 244 | return fmt.Errorf("Remove failed: %v", err) 245 | } 246 | 247 | return nil 248 | } 249 | 250 | func backupStableState(fileData *FileData, backupFilename string) error { 251 | if fileData.open && fileData.fd != nil { 252 | err := fileData.fd.Close() 253 | fileData.open = false 254 | if err != nil { 255 | return fmt.Errorf("Closing file failed: %v", err) 256 | } 257 | } 258 | 259 | err := os.Remove(backupFilename) 260 | if err != nil && !os.IsNotExist(err) { 261 | return fmt.Errorf("Remove failed: %v", err) 262 | } 263 | 264 | err = copyFile(fileData.filename, backupFilename) 265 | if err != nil { 266 | return fmt.Errorf("File copy failed: %v", err) 267 | } 268 | 269 | err = openStableStateForWrite(fileData) 270 | if err != nil { 271 | return fmt.Errorf("Opening stable state for writing failed: %v", err) 272 | } 273 | 274 | return nil 275 | } 276 | 277 | func copyFile(srcFile string, dstFile string) error { 278 | src, err := os.Open(srcFile) 279 | if err != nil { 280 | return err 281 | } 282 | 283 | dst, err := os.Create(dstFile) 284 | if err != nil { 285 | return err 286 | } 287 | 288 | _, err = io.Copy(dst, src) 289 | if err != nil { 290 | return err 291 | } 292 | 293 | err = src.Close() 294 | if err != nil { 295 | fmt.Errorf("Error closing src file") 296 | return err 297 | } 298 | 299 | err = dst.Close() 300 | if err != nil { 301 | fmt.Errorf("Error closing dst file") 302 | return err 303 | } 304 | return nil 305 | } 306 | 307 | /* */ 308 | /* Helper functions to assist with read/writing log entries, etc. */ 309 | /* */ 310 | 311 | const INT_GOB_SIZE int64 = 5 312 | 313 | func getStableStateBytes(ss NodeStableState) ([]byte, error) { 314 | b := new(bytes.Buffer) 315 | e := gob.NewEncoder(b) 316 | err := e.Encode(ss) 317 | if err != nil { 318 | return nil, err 319 | } 320 | return b.Bytes(), nil 321 | } 322 | 323 | func getSizeBytes(size int) ([]byte, error) { 324 | b := new(bytes.Buffer) 325 | e := gob.NewEncoder(b) 326 | err := e.Encode(size) 327 | if err != nil { 328 | return nil, err 329 | } 330 | return b.Bytes(), nil 331 | } 332 | 333 | func getLogEntryBytes(entry *LogEntry) ([]byte, error) { 334 | b := new(bytes.Buffer) 335 | e := gob.NewEncoder(b) 336 | err := e.Encode(*entry) 337 | if err != nil { 338 | return nil, err 339 | } 340 | return b.Bytes(), nil 341 | } 342 | 343 | func readStructSize(f *os.File) (int, error) { 344 | // Read bytes for size value 345 | b := make([]byte, INT_GOB_SIZE) 346 | sizeBytes, err := f.Read(b) 347 | if err != nil { 348 | return -1, err 349 | } 350 | if int64(sizeBytes) != INT_GOB_SIZE { 351 | panic("The raftlog may be corrupt, cannot proceed") 352 | } 353 | 354 | // Decode bytes as int, which is sizeof(LogEntry). 355 | buff := bytes.NewBuffer(b) 356 | var size int 357 | dataDecoder := gob.NewDecoder(buff) 358 | err = dataDecoder.Decode(&size) 359 | if err != nil { 360 | return -1, err 361 | } 362 | 363 | return size, nil 364 | } 365 | 366 | func readLogEntry(f *os.File, size int) (*LogEntry, error) { 367 | b := make([]byte, size) 368 | leSize, err := f.Read(b) 369 | if err != nil { 370 | return nil, err 371 | } 372 | if leSize != size { 373 | panic("The raftlog may be corrupt, cannot proceed") 374 | } 375 | 376 | buff := bytes.NewBuffer(b) 377 | var entry LogEntry 378 | dataDecoder := gob.NewDecoder(buff) 379 | err = dataDecoder.Decode(&entry) 380 | if err != nil { 381 | return nil, err 382 | } 383 | 384 | return &entry, nil 385 | } 386 | 387 | func readStableStateEntry(f *os.File, size int) (*NodeStableState, error) { 388 | b := make([]byte, size) 389 | leSize, err := f.Read(b) 390 | if err != nil { 391 | return nil, err 392 | } 393 | if leSize != size { 394 | panic("The stable state log may be corrupt, cannot proceed") 395 | } 396 | 397 | buff := bytes.NewBuffer(b) 398 | var ss NodeStableState 399 | dataDecoder := gob.NewDecoder(buff) 400 | err = dataDecoder.Decode(&ss) 401 | if err != nil { 402 | return nil, err 403 | } 404 | 405 | return &ss, nil 406 | } 407 | 408 | func fileExists(filename string) bool { 409 | _, err := os.Stat(filename) 410 | if err == nil { 411 | return true 412 | } else if os.IsNotExist(err) { 413 | return false 414 | } else { 415 | panic(err) 416 | } 417 | } 418 | 419 | func getFileInfo(filename string) (int64, bool) { 420 | stat, err := os.Stat(filename) 421 | if err == nil { 422 | return stat.Size(), true 423 | } else if os.IsNotExist(err) { 424 | return 0, false 425 | } else { 426 | panic(err) 427 | } 428 | } --------------------------------------------------------------------------------