├── pkg ├── util │ └── service │ │ └── service.go ├── actuator │ ├── mock │ │ ├── mock_actuator_test.go │ │ └── mock_actuator.go │ ├── util │ │ └── parents.go │ ├── rpc │ │ └── actuator.go │ └── actuator.go ├── api │ ├── load_info.go │ ├── storage.go │ ├── types.go │ ├── key.go │ ├── range_info.go │ ├── range_id.go │ ├── node_id.go │ ├── placement_state.go │ ├── action.go │ ├── command.go │ ├── range_state.go │ ├── zzz_action_string.go │ ├── zzz_range_state.go │ ├── node.go │ ├── zzz_placement_state.go │ ├── meta.go │ ├── zzz_remote_state.go │ ├── remote_state.go │ └── remote.go ├── ranje │ ├── range_getter.go │ ├── range_state.go │ ├── placement_state_transition.go │ ├── constraint.go │ ├── replication_config.go │ └── placement.go ├── proto │ ├── conv │ │ ├── range_id.go │ │ ├── node_id.go │ │ ├── meta.go │ │ ├── load_info.go │ │ ├── range_info.go │ │ ├── range_state.go │ │ ├── placement_state.go │ │ └── remote_state.go │ ├── controller.proto │ ├── debug.proto │ ├── node.proto │ ├── ranje.proto │ └── gen │ │ └── controller_grpc.pb.go ├── roster │ ├── node_info.go │ ├── node_getter.go │ ├── roster_test.go │ └── node.go ├── rangelet │ ├── storage │ │ └── null │ │ │ └── null_storage.go │ ├── server_test.go │ ├── mirror │ │ ├── mirror_test.go │ │ └── mirror.go │ └── server.go ├── persister │ ├── interface.go │ └── consul │ │ └── consul.go ├── test │ ├── fake_storage │ │ └── fake_storage.go │ ├── fake_node │ │ └── barrier.go │ └── fake_nodes │ │ └── fake_nodes.go ├── orchestrator │ ├── operations.go │ ├── server_debug.go │ └── server_orchestrator.go ├── discovery │ ├── mock │ │ ├── mock_discovery.go │ │ └── mock_discoverer.go │ ├── interface.go │ └── consul │ │ ├── consul_discovery.go │ │ └── consul_discoverer.go └── keyspace │ ├── replication_state.go │ ├── replication_state_test.go │ └── x_test.go ├── examples ├── cache │ ├── .gitignore │ ├── Procfile │ ├── bin │ │ └── dev.sh │ └── README.md └── kv │ ├── Brewfile │ ├── Procfile │ ├── bin │ ├── gen-proto.sh │ ├── client.sh │ ├── get.sh │ ├── put.sh │ └── dev.sh │ ├── test │ ├── hammer.bats │ ├── controller.bats │ └── test_helper.bash │ ├── proto │ ├── kv.proto │ └── gen │ │ └── kv_grpc.pb.go │ ├── main.go │ ├── pkg │ ├── proxy │ │ ├── server.go │ │ └── proxy.go │ └── node │ │ ├── fetcher.go │ │ ├── control.go │ │ ├── data.go │ │ └── node.go │ ├── tools │ └── hammer │ │ └── config.json │ └── README.md ├── bin ├── test.sh └── gen-proto.sh ├── .vscode ├── settings.json └── launch.json ├── .gitignore ├── .github └── workflows │ └── go.yml ├── docs ├── place.md ├── move.md ├── join.md └── split.md ├── LICENSE ├── cmd ├── rangerd │ ├── main.go │ └── controller.go └── dumbbal │ └── main.go └── go.mod /pkg/util/service/service.go: -------------------------------------------------------------------------------- 1 | package service 2 | -------------------------------------------------------------------------------- /pkg/actuator/mock/mock_actuator_test.go: -------------------------------------------------------------------------------- 1 | package mock 2 | -------------------------------------------------------------------------------- /examples/cache/.gitignore: -------------------------------------------------------------------------------- 1 | # binaries 2 | /cache 3 | /rangerd 4 | /rangerctl 5 | -------------------------------------------------------------------------------- /pkg/api/load_info.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | type LoadInfo struct { 4 | Keys int 5 | Splits []Key 6 | } 7 | -------------------------------------------------------------------------------- /examples/kv/Brewfile: -------------------------------------------------------------------------------- 1 | brew "consul" 2 | brew "foreman" 3 | brew "protoc-gen-go" 4 | brew "protoc-gen-go-grpc" 5 | -------------------------------------------------------------------------------- /pkg/api/storage.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | type Storage interface { 4 | Read() []*RangeInfo 5 | Write() 6 | } 7 | -------------------------------------------------------------------------------- /bin/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euxo pipefail 3 | cd "$(dirname "$0")/.." 4 | 5 | go test -count=1 -vet=all -v ./... 6 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "protoc": { 3 | "options": [ 4 | "--proto_path=pkg/proto" 5 | ] 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /examples/cache/Procfile: -------------------------------------------------------------------------------- 1 | controller: ./rangerd -addr "127.0.0.1:$PORT" 2 | node: ./cache -grpc "127.0.0.1:1$PORT" -http "127.0.0.1:2$PORT" 3 | -------------------------------------------------------------------------------- /examples/kv/Procfile: -------------------------------------------------------------------------------- 1 | controller: ./rangerd -addr "127.0.0.1:$PORT" 2 | proxy: ./kv -proxy -addr "127.0.0.1:$PORT" 3 | node: ./kv -node -addr "127.0.0.1:$PORT" -chaos 4 | -------------------------------------------------------------------------------- /pkg/api/types.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | type Placement struct { 4 | Node string 5 | State PlacementState 6 | } 7 | 8 | type Parent struct { 9 | Meta Meta 10 | Parents []RangeID 11 | Placements []Placement 12 | } 13 | -------------------------------------------------------------------------------- /bin/gen-proto.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euxo pipefail 3 | cd "$(dirname "$0")" 4 | 5 | pushd ../pkg/proto 6 | protoc --go_out=gen --go_opt=paths=source_relative --go-grpc_out=gen --go-grpc_opt=paths=source_relative ./*.proto 7 | popd 8 | -------------------------------------------------------------------------------- /examples/kv/bin/gen-proto.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euxo pipefail 3 | cd "$(dirname "$0")" 4 | 5 | pushd ../proto 6 | protoc --go_out=gen --go_opt=paths=source_relative --go-grpc_out=gen --go-grpc_opt=paths=source_relative ./*.proto 7 | popd 8 | -------------------------------------------------------------------------------- /pkg/ranje/range_getter.go: -------------------------------------------------------------------------------- 1 | package ranje 2 | 3 | import "github.com/adammck/ranger/pkg/api" 4 | 5 | // RangeGetter allows callers to fetch a Range from its RangeIdent. 6 | type RangeGetter interface { 7 | GetRange(rID api.RangeID) (*Range, error) 8 | } 9 | -------------------------------------------------------------------------------- /pkg/api/key.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | // Key is a point in the keyspace. 4 | type Key string 5 | 6 | // Special case representing both negative and positive infinity. 7 | // Don't compare anything against this! Always check for it explicitly. 8 | const ZeroKey Key = "" 9 | -------------------------------------------------------------------------------- /examples/kv/bin/client.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | case $# in 5 | "3") 6 | #set -x 7 | echo "$3" | grpcurl -plaintext -d @ localhost:"$1" "$2" 8 | ;; 9 | 10 | *) 11 | echo "Usage: $0 port symbol data" 12 | exit 1 13 | ;; 14 | esac 15 | -------------------------------------------------------------------------------- /pkg/api/range_info.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | // RangeInfo represents something we know about a Range on a Node at a moment in 4 | // time. These are emitted and cached by the Roster to anyone who cares. 5 | type RangeInfo struct { 6 | Meta Meta 7 | State RemoteState 8 | Info LoadInfo 9 | } 10 | -------------------------------------------------------------------------------- /pkg/api/range_id.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import "fmt" 4 | 5 | // RangeID is the unique identity of a range. 6 | type RangeID uint64 7 | 8 | // ZeroRange is not a valid RangeID. 9 | const ZeroRange RangeID = 0 10 | 11 | func (id RangeID) String() string { 12 | return fmt.Sprintf("%d", id) 13 | } 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # binaries 2 | __debug_bin 3 | /cmd/dumbbal/dumbbal 4 | /cmd/rangerctl/rangerctl 5 | /cmd/rangerd/rangerd 6 | /examples/kv/kv 7 | /examples/kv/rangerctl 8 | /examples/kv/rangerd 9 | /examples/kv/tools/hammer/hammer 10 | 11 | # no need to lock development deps 12 | /examples/kv/Brewfile.lock.json 13 | -------------------------------------------------------------------------------- /examples/kv/bin/get.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | case $# in 5 | "2") 6 | #set -x 7 | echo '{"key": "'"$(echo -n "$2" | base64)"'"}'\ 8 | | grpcurl -plaintext -d @ localhost:"$1" kv.KV.Get 9 | ;; 10 | 11 | *) 12 | echo "Usage: $0 port key" 13 | exit 1 14 | ;; 15 | esac 16 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "type":"go", 6 | "name": "Launch Controller", 7 | "request": "launch", 8 | "program": "${workspaceFolder}/cmd/rangerd", 9 | "args": ["-addr", ":5200"], 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /pkg/api/node_id.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | // NodeID is the unique identity of a node. 4 | // N.b. we used to just use naked strings for this, so it's possible that some 5 | // of those hanging around. Use this instead. 6 | type NodeID string 7 | 8 | const ZeroNodeID NodeID = "" 9 | 10 | func (nID NodeID) String() string { 11 | return string(nID) 12 | } 13 | -------------------------------------------------------------------------------- /examples/kv/bin/put.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | case $# in 5 | "3") 6 | #set -x 7 | echo '{"key": "'"$(echo -n "$2" | base64)"'", "value": "'"$(echo -n "$3" | base64)"'"}'\ 8 | | grpcurl -plaintext -d @ localhost:"$1" kv.KV.Put 9 | ;; 10 | 11 | *) 12 | echo "Usage: $0 port key value" 13 | exit 1 14 | ;; 15 | esac 16 | -------------------------------------------------------------------------------- /pkg/api/placement_state.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | type PlacementState uint8 4 | 5 | const ( 6 | // Should never be in this state. Indicates an deserializing error. 7 | PsUnknown PlacementState = iota 8 | 9 | PsPending 10 | PsInactive 11 | PsActive 12 | PsMissing 13 | PsDropped 14 | ) 15 | 16 | //go:generate stringer -type=PlacementState -output=zzz_placement_state.go 17 | -------------------------------------------------------------------------------- /examples/cache/bin/dev.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | cd "$(dirname "$0")/.." 4 | 5 | if ! consul info 1>/dev/null 2>/dev/null; then 6 | echo "Error: Consul is not running" 7 | exit 1 8 | fi 9 | 10 | go build # cache 11 | go build "$(dirname "$0")"/../../../cmd/rangerctl 12 | go build "$(dirname "$0")"/../../../cmd/rangerd 13 | foreman start -m controller=1,node=3 14 | -------------------------------------------------------------------------------- /examples/kv/bin/dev.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | cd "$(dirname "$0")/.." 4 | 5 | if ! consul info 1>/dev/null 2>/dev/null; then 6 | echo "Error: Consul is not running" 7 | exit 1 8 | fi 9 | 10 | go build # kv 11 | go build "$(dirname "$0")"/../../../cmd/rangerctl 12 | go build "$(dirname "$0")"/../../../cmd/rangerd 13 | foreman start -m controller=1,proxy=1,node=3 14 | -------------------------------------------------------------------------------- /pkg/proto/conv/range_id.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | ) 8 | 9 | func RangeIDFromProto(p uint64) (api.RangeID, error) { 10 | id := api.RangeID(p) 11 | 12 | if id == api.ZeroRange { 13 | return id, errors.New("missing: key") 14 | } 15 | 16 | return id, nil 17 | } 18 | 19 | func RangeIDToProto(ident api.RangeID) uint64 { 20 | return uint64(ident) 21 | } 22 | -------------------------------------------------------------------------------- /pkg/roster/node_info.go: -------------------------------------------------------------------------------- 1 | package roster 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | ) 8 | 9 | // TODO: Make node ID a proper type like range ID. 10 | 11 | type NodeInfo struct { 12 | Time time.Time 13 | NodeID api.NodeID 14 | Ranges []api.RangeInfo 15 | 16 | // Expired is true when the node was automatically expired because we 17 | // haven't been able to probe it in a while. 18 | Expired bool 19 | } 20 | -------------------------------------------------------------------------------- /pkg/api/action.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | // Action represents each of the state transitions that ranger can ask a node 4 | // to make. (They're named for the RPC interface, but it's pluggable.) They're 5 | // exposed here for testing. See also Command. 6 | type Action uint8 7 | 8 | const ( 9 | NoAction Action = iota 10 | Prepare 11 | Activate 12 | Deactivate 13 | Drop 14 | ) 15 | 16 | //go:generate stringer -type=Action -output=zzz_action_string.go 17 | -------------------------------------------------------------------------------- /pkg/proto/conv/node_id.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | ) 8 | 9 | var ErrMissingNodeID = errors.New("missing node ID") 10 | 11 | func NodeIDFromProto(p string) (api.NodeID, error) { 12 | id := api.NodeID(p) 13 | 14 | if id == api.ZeroNodeID { 15 | return id, ErrMissingNodeID 16 | } 17 | 18 | return id, nil 19 | } 20 | 21 | func NodeIDToProto(nID api.NodeID) string { 22 | return string(nID) 23 | } 24 | -------------------------------------------------------------------------------- /pkg/rangelet/storage/null/null_storage.go: -------------------------------------------------------------------------------- 1 | package null 2 | 3 | import "github.com/adammck/ranger/pkg/api" 4 | 5 | // NullStorage doesn't persist ranges. Read returns no ranges, and Write does 6 | // nothing. This is useful for clients which don't care about range persistance 7 | // across restarts. 8 | type NullStorage struct { 9 | } 10 | 11 | func (s *NullStorage) Read() []*api.RangeInfo { 12 | return []*api.RangeInfo{} 13 | } 14 | 15 | func (s *NullStorage) Write() { 16 | } 17 | -------------------------------------------------------------------------------- /pkg/persister/interface.go: -------------------------------------------------------------------------------- 1 | package persister 2 | 3 | import "github.com/adammck/ranger/pkg/ranje" 4 | 5 | type Persister interface { 6 | 7 | // GetRanges returns the latest snapshot of all known ranges. It's called 8 | // once, at controller startup. 9 | GetRanges() ([]*ranje.Range, error) 10 | 11 | // PutRanges writes all of the given Ranges to the store. Implementations 12 | // must be transactional, so either they all succeed or none do. 13 | PutRanges([]*ranje.Range) error 14 | } 15 | -------------------------------------------------------------------------------- /pkg/api/command.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type Command struct { 8 | RangeIdent RangeID 9 | NodeIdent NodeID 10 | Action Action 11 | } 12 | 13 | func (t Command) String() string { 14 | return fmt.Sprintf("%s(R%d, %s)", t.Action, t.RangeIdent, t.NodeIdent) 15 | } 16 | 17 | func (t Command) Less(other Command) bool { 18 | if t.RangeIdent != other.RangeIdent { 19 | return t.RangeIdent < other.RangeIdent 20 | } else { 21 | return t.NodeIdent < other.NodeIdent 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /pkg/test/fake_storage/fake_storage.go: -------------------------------------------------------------------------------- 1 | package fake_storage 2 | 3 | import ( 4 | "github.com/adammck/ranger/pkg/api" 5 | ) 6 | 7 | type storage struct { 8 | infos []*api.RangeInfo 9 | } 10 | 11 | func NewFakeStorage(rangeInfos map[api.RangeID]*api.RangeInfo) *storage { 12 | infos := []*api.RangeInfo{} 13 | for _, ri := range rangeInfos { 14 | infos = append(infos, ri) 15 | } 16 | 17 | return &storage{infos} 18 | } 19 | 20 | func (s *storage) Read() []*api.RangeInfo { 21 | return s.infos 22 | } 23 | 24 | func (s *storage) Write() { 25 | } 26 | -------------------------------------------------------------------------------- /pkg/roster/node_getter.go: -------------------------------------------------------------------------------- 1 | package roster 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | ) 8 | 9 | // NodeGetter allows callers to get a Node from its NodeIdent. 10 | type NodeGetter interface { 11 | NodeByIdent(nID api.NodeID) (*Node, error) 12 | } 13 | 14 | // ErrNodeNotFound is returned by NodeGetter implementations when NodeByIdent is 15 | // called with a node which does not exist. 16 | type ErrNodeNotFound struct { 17 | NodeID api.NodeID 18 | } 19 | 20 | func (e ErrNodeNotFound) Error() string { 21 | return fmt.Sprintf("no such node: %s", e.NodeID) 22 | } 23 | -------------------------------------------------------------------------------- /pkg/api/range_state.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | type RangeState uint8 4 | 5 | const ( 6 | RsUnknown RangeState = iota 7 | 8 | // The range is active, i.e. it should be placed on the appropriate number 9 | // of nodes and left alone until we decide to supersede it with another 10 | // range by joining or splitting. 11 | RsActive 12 | 13 | RsSubsuming 14 | 15 | // The range has finished being split or joined, has been dropped from all 16 | // nodes, and will never be placed on any node again. 17 | RsObsolete 18 | ) 19 | 20 | //go:generate stringer -type=RangeState -output=zzz_range_state.go 21 | -------------------------------------------------------------------------------- /pkg/ranje/range_state.go: -------------------------------------------------------------------------------- 1 | package ranje 2 | 3 | import ( 4 | "github.com/adammck/ranger/pkg/api" 5 | ) 6 | 7 | type RangeStateTransition struct { 8 | from api.RangeState 9 | to api.RangeState 10 | } 11 | 12 | var RangeStateTransitions []RangeStateTransition 13 | 14 | func init() { 15 | RangeStateTransitions = []RangeStateTransition{ 16 | {api.RsActive, api.RsSubsuming}, 17 | {api.RsSubsuming, api.RsObsolete}, 18 | {api.RsSubsuming, api.RsObsolete}, 19 | 20 | {api.RsActive, api.RsSubsuming}, 21 | {api.RsSubsuming, api.RsObsolete}, 22 | {api.RsSubsuming, api.RsObsolete}, 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | 14 | - name: Check out repo 15 | uses: actions/checkout@v3 16 | 17 | - name: Set up Go 18 | uses: actions/setup-go@v3 19 | with: 20 | go-version: 1.18 21 | 22 | - name: Build 23 | run: go build -v ./... 24 | 25 | - name: Test 26 | run: go test -v ./... 27 | 28 | - name: Test with Race Detector 29 | run: go test -v -race -count=10 ./... 30 | -------------------------------------------------------------------------------- /pkg/proto/conv/meta.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "github.com/adammck/ranger/pkg/api" 5 | pb "github.com/adammck/ranger/pkg/proto/gen" 6 | ) 7 | 8 | func MetaFromProto(m *pb.RangeMeta) (api.Meta, error) { 9 | id, err := RangeIDFromProto(m.Ident) 10 | if err != nil { 11 | return api.Meta{}, err 12 | } 13 | 14 | return api.Meta{ 15 | Ident: id, 16 | Start: api.Key(m.Start), 17 | End: api.Key(m.End), 18 | }, nil 19 | } 20 | 21 | func MetaToProto(m api.Meta) *pb.RangeMeta { 22 | return &pb.RangeMeta{ 23 | Ident: RangeIDToProto(m.Ident), 24 | Start: []byte(m.Start), 25 | End: []byte(m.End), 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /examples/kv/test/hammer.bats: -------------------------------------------------------------------------------- 1 | setup_file() { 2 | go build 3 | } 4 | 5 | setup() { 6 | # TODO: Use this instead: https://github.com/ztombol/bats-docs#homebrew 7 | load '/Users/adammck/code/src/github.com/bats-core/bats-support/load.bash' 8 | load '/Users/adammck/code/src/github.com/bats-core/bats-assert/load.bash' 9 | load test_helper 10 | start_consul 11 | } 12 | 13 | teardown() { 14 | stop_cmds 15 | } 16 | 17 | @test "hammer" { 18 | start_node 8001 19 | start_node 8002 20 | start_node 8003 21 | start_proxy 8000 22 | start_controller 9000 23 | 24 | sleep 0.5 25 | 26 | go run tools/hammer/main.go -addr localhost:8000 -duration 10s 27 | } 28 | -------------------------------------------------------------------------------- /docs/place.md: -------------------------------------------------------------------------------- 1 | # Place 2 | 3 | When a range isn't assigned to any node, we **place** it on a node (a). 4 | 5 | 1. Prepare(a) 6 | 2. Activate(a) 7 | 8 | [_TestPlace_](https://cs.github.com/adammck/ranger?q=symbol%3ATestPlace) 9 | 10 | ## Failures 11 | 12 | If step 1 fails, abort the place: 13 | 14 | 1. Prepare(a) 15 | 16 | [_TestPlaceFailure_Prepare_](https://cs.github.com/adammck/ranger?q=symbol%3ATestPlaceFailure_Prepare) 17 | 18 | --- 19 | 20 | If step 2 fails, drop the placement and abort the place: 21 | 22 | 1. Prepare(a) 23 | 2. Activate(a) 24 | 3. Drop(a) 25 | 26 | [_TestPlaceFailure_Activate_](https://cs.github.com/adammck/ranger?q=symbol%3ATestPlaceFailure_Activate) 27 | -------------------------------------------------------------------------------- /pkg/proto/conv/load_info.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "github.com/adammck/ranger/pkg/api" 5 | pb "github.com/adammck/ranger/pkg/proto/gen" 6 | ) 7 | 8 | func LoadInfoFromProto(li *pb.LoadInfo) api.LoadInfo { 9 | splits := make([]api.Key, len(li.Splits)) 10 | for i := range li.Splits { 11 | splits[i] = api.Key(li.Splits[i]) 12 | } 13 | 14 | return api.LoadInfo{ 15 | Keys: int(li.Keys), 16 | Splits: splits, 17 | } 18 | } 19 | 20 | func LoadInfoToProto(li api.LoadInfo) *pb.LoadInfo { 21 | splits := make([]string, len(li.Splits)) 22 | for i := range li.Splits { 23 | splits[i] = string(li.Splits[i]) 24 | } 25 | 26 | return &pb.LoadInfo{ 27 | Keys: uint64(li.Keys), 28 | Splits: splits, 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /pkg/orchestrator/operations.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import "github.com/adammck/ranger/pkg/api" 4 | 5 | // TODO: Split this into Add, Remove 6 | type OpMove struct { 7 | Range api.RangeID 8 | Src api.NodeID 9 | Dest api.NodeID 10 | Err chan error 11 | } 12 | 13 | type OpSplit struct { 14 | Range api.RangeID 15 | Key api.Key 16 | 17 | // TODO: Update this interface (and the proto) to accomodate replication. 18 | // Currently only the first placement can be placed deliberately. Others 19 | // just go wherever. 20 | Left api.NodeID 21 | Right api.NodeID 22 | 23 | Err chan error 24 | } 25 | 26 | type OpJoin struct { 27 | Left api.RangeID 28 | Right api.RangeID 29 | Dest api.NodeID 30 | Err chan error 31 | } 32 | -------------------------------------------------------------------------------- /pkg/api/zzz_action_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=Action -output=zzz_action_string.go"; DO NOT EDIT. 2 | 3 | package api 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[NoAction-0] 12 | _ = x[Prepare-1] 13 | _ = x[Activate-2] 14 | _ = x[Deactivate-3] 15 | _ = x[Drop-4] 16 | } 17 | 18 | const _Action_name = "NoActionPrepareActivateDeactivateDrop" 19 | 20 | var _Action_index = [...]uint8{0, 8, 15, 23, 33, 37} 21 | 22 | func (i Action) String() string { 23 | if i >= Action(len(_Action_index)-1) { 24 | return "Action(" + strconv.FormatInt(int64(i), 10) + ")" 25 | } 26 | return _Action_name[_Action_index[i]:_Action_index[i+1]] 27 | } 28 | -------------------------------------------------------------------------------- /pkg/proto/conv/range_info.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | pb "github.com/adammck/ranger/pkg/proto/gen" 8 | ) 9 | 10 | func RangeInfoFromProto(r *pb.RangeInfo) (api.RangeInfo, error) { 11 | if r.Meta == nil { 12 | return api.RangeInfo{}, fmt.Errorf("missing: meta") 13 | } 14 | 15 | m, err := MetaFromProto(r.Meta) 16 | if err != nil { 17 | return api.RangeInfo{}, fmt.Errorf("parsing meta: %v", err) 18 | } 19 | 20 | return api.RangeInfo{ 21 | Meta: m, 22 | State: RemoteStateFromProto(r.State), 23 | Info: LoadInfoFromProto(r.Info), 24 | }, nil 25 | } 26 | 27 | func RangeInfoToProto(ri api.RangeInfo) *pb.RangeInfo { 28 | return &pb.RangeInfo{ 29 | Meta: MetaToProto(ri.Meta), 30 | State: RemoteStateToProto(ri.State), 31 | Info: LoadInfoToProto(ri.Info), 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /pkg/api/zzz_range_state.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=RangeState -output=zzz_range_state.go"; DO NOT EDIT. 2 | 3 | package api 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[RsUnknown-0] 12 | _ = x[RsActive-1] 13 | _ = x[RsSubsuming-2] 14 | _ = x[RsObsolete-3] 15 | } 16 | 17 | const _RangeState_name = "RsUnknownRsActiveRsSubsumingRsObsolete" 18 | 19 | var _RangeState_index = [...]uint8{0, 9, 17, 28, 38} 20 | 21 | func (i RangeState) String() string { 22 | if i >= RangeState(len(_RangeState_index)-1) { 23 | return "RangeState(" + strconv.FormatInt(int64(i), 10) + ")" 24 | } 25 | return _RangeState_name[_RangeState_index[i]:_RangeState_index[i+1]] 26 | } 27 | -------------------------------------------------------------------------------- /pkg/api/node.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ErrNotFound = errors.New("not found") 8 | 9 | type Node interface { 10 | 11 | // GetLoadInfo returns the LoadInfo for the given range. 12 | // Implementations should return NotFound if (from their point of view) the 13 | // range doesn't exist. This can happen when GetLoadInfo and Prepare and/or 14 | // Drop are racing. 15 | GetLoadInfo(rID RangeID) (LoadInfo, error) 16 | 17 | // Prepare. 18 | Prepare(m Meta, p []Parent) error 19 | 20 | // Activate 21 | Activate(rID RangeID) error 22 | 23 | // Deactivate 24 | Deactivate(rID RangeID) error 25 | 26 | // Drop 27 | // Range state will be set to NsDropping before calling this. If an error is 28 | // returned, the range will be forgotten. If no error is returned, the range 29 | // state will be set to NsDroppingError. 30 | Drop(rID RangeID) error 31 | } 32 | -------------------------------------------------------------------------------- /examples/kv/proto/kv.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | option go_package = "github.com/adammck/ranger/examples/kv/proto"; 3 | 4 | package kv; 5 | 6 | message GetRequest { 7 | string key = 1; 8 | } 9 | 10 | message GetResponse { 11 | bytes value = 1; 12 | } 13 | 14 | message PutRequest { 15 | string key = 1; 16 | bytes value = 2; 17 | } 18 | 19 | message PutResponse { 20 | } 21 | 22 | message DumpRequest { 23 | uint64 range_ident = 1; 24 | } 25 | 26 | message Pair { 27 | string key = 1; 28 | bytes value = 2; 29 | } 30 | 31 | message DumpResponse { 32 | repeated Pair pairs = 1; 33 | } 34 | 35 | service KV { 36 | rpc Get (GetRequest) returns (GetResponse) {} 37 | rpc Put (PutRequest) returns (PutResponse) {} 38 | 39 | // Only used for nodes to transfer state. 40 | // Move this to a separate service so the proxy doesn't have to implement it. 41 | rpc Dump (DumpRequest) returns (DumpResponse) {} 42 | } 43 | -------------------------------------------------------------------------------- /pkg/api/zzz_placement_state.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=PlacementState -output=zzz_placement_state.go"; DO NOT EDIT. 2 | 3 | package api 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[PsUnknown-0] 12 | _ = x[PsPending-1] 13 | _ = x[PsInactive-2] 14 | _ = x[PsActive-3] 15 | _ = x[PsMissing-4] 16 | _ = x[PsDropped-5] 17 | } 18 | 19 | const _PlacementState_name = "PsUnknownPsPendingPsInactivePsActivePsMissingPsDropped" 20 | 21 | var _PlacementState_index = [...]uint8{0, 9, 18, 28, 36, 45, 54} 22 | 23 | func (i PlacementState) String() string { 24 | if i >= PlacementState(len(_PlacementState_index)-1) { 25 | return "PlacementState(" + strconv.FormatInt(int64(i), 10) + ")" 26 | } 27 | return _PlacementState_name[_PlacementState_index[i]:_PlacementState_index[i+1]] 28 | } 29 | -------------------------------------------------------------------------------- /pkg/api/meta.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // Meta is a range minus all the state. 8 | // Should be immutable after construction. 9 | // TODO: Rename this to RangeMeta. 10 | type Meta struct { 11 | Ident RangeID 12 | Start Key // inclusive 13 | End Key // exclusive 14 | } 15 | 16 | // String returns a string like: 1234 (aaa, bbb] 17 | func (m Meta) String() string { 18 | var s, e string 19 | 20 | if m.Start == ZeroKey { 21 | s = "[-inf" 22 | } else { 23 | s = fmt.Sprintf("(%s", m.Start) 24 | } 25 | 26 | if m.End == ZeroKey { 27 | e = "+inf]" 28 | } else { 29 | e = fmt.Sprintf("%s]", m.End) 30 | } 31 | 32 | return fmt.Sprintf("%s %s, %s", m.Ident.String(), s, e) 33 | } 34 | 35 | func (m Meta) Contains(k Key) bool { 36 | if m.Start != ZeroKey { 37 | if k < m.Start { 38 | return false 39 | } 40 | } 41 | 42 | if m.End != ZeroKey { 43 | // Note that the range end is exclusive! 44 | if k >= m.End { 45 | return false 46 | } 47 | } 48 | 49 | return true 50 | } 51 | -------------------------------------------------------------------------------- /pkg/api/zzz_remote_state.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=RemoteState -output=zzz_remote_state.go"; DO NOT EDIT. 2 | 3 | package api 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[NsUnknown-0] 12 | _ = x[NsInactive-1] 13 | _ = x[NsActive-2] 14 | _ = x[NsPreparing-3] 15 | _ = x[NsActivating-4] 16 | _ = x[NsDeactivating-5] 17 | _ = x[NsDropping-6] 18 | _ = x[NsNotFound-7] 19 | } 20 | 21 | const _RemoteState_name = "NsUnknownNsInactiveNsActiveNsPreparingNsActivatingNsDeactivatingNsDroppingNsNotFound" 22 | 23 | var _RemoteState_index = [...]uint8{0, 9, 19, 27, 38, 50, 64, 74, 84} 24 | 25 | func (i RemoteState) String() string { 26 | if i >= RemoteState(len(_RemoteState_index)-1) { 27 | return "RemoteState(" + strconv.FormatInt(int64(i), 10) + ")" 28 | } 29 | return _RemoteState_name[_RemoteState_index[i]:_RemoteState_index[i+1]] 30 | } 31 | -------------------------------------------------------------------------------- /pkg/proto/conv/range_state.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/adammck/ranger/pkg/api" 8 | pb "github.com/adammck/ranger/pkg/proto/gen" 9 | ) 10 | 11 | func RangeStateFromProto(rs pb.RangeState) api.RangeState { 12 | switch rs { 13 | case pb.RangeState_RS_UNKNOWN: 14 | return api.RsUnknown 15 | case pb.RangeState_RS_ACTIVE: 16 | return api.RsActive 17 | case pb.RangeState_RS_SUBSUMING: 18 | return api.RsSubsuming 19 | case pb.RangeState_RS_OBSOLETE: 20 | return api.RsObsolete 21 | } 22 | 23 | log.Printf("warn: unknown pb.RangeState: %#v", rs) 24 | return api.RsUnknown 25 | } 26 | 27 | func RangeStateToProto(rs api.RangeState) pb.RangeState { 28 | switch rs { 29 | case api.RsUnknown: 30 | return pb.RangeState_RS_UNKNOWN 31 | case api.RsActive: 32 | return pb.RangeState_RS_ACTIVE 33 | case api.RsSubsuming: 34 | return pb.RangeState_RS_SUBSUMING 35 | case api.RsObsolete: 36 | return pb.RangeState_RS_OBSOLETE 37 | } 38 | 39 | panic(fmt.Sprintf("unknown RangeState: %#v", rs)) 40 | } 41 | -------------------------------------------------------------------------------- /pkg/api/remote_state.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | type RemoteState uint8 4 | 5 | const ( 6 | 7 | // Should never be in this state. Indicates a bug. 8 | NsUnknown RemoteState = iota 9 | 10 | // Stable states 11 | NsInactive 12 | NsActive 13 | 14 | // During transitions 15 | NsPreparing // Pending -> Inactive 16 | NsActivating // Inactive -> Active 17 | NsDeactivating // Active -> Inactive 18 | NsDropping // Inactive -> NotFound 19 | 20 | // Special case: This is never returned by probes, since those only include 21 | // the state of ranges which the node has. This is returned by redundant 22 | // Drop RPCs which instruct nodes to drop a range that they don't have. 23 | // (Maybe it was already dropped, or maybe the node never had it. Can't 24 | // know.) This is a success, not an error, because those RPCs may be 25 | // received multiple times during a normal drop, and should be treated 26 | // idempotently. But we don't want to return NsUnknown, because we do know. 27 | NsNotFound 28 | ) 29 | 30 | //go:generate stringer -type=RemoteState -output=zzz_remote_state.go 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Adam Mckaig 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pkg/discovery/mock/mock_discovery.go: -------------------------------------------------------------------------------- 1 | package mock 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | ) 8 | 9 | // TODO: Methods to add/remove remotes. 10 | type MockDiscovery struct { 11 | Remotes map[string][]api.Remote 12 | sync.RWMutex 13 | } 14 | 15 | func New() *MockDiscovery { 16 | return &MockDiscovery{ 17 | Remotes: map[string][]api.Remote{}, 18 | } 19 | } 20 | 21 | // interface 22 | 23 | func (d *MockDiscovery) Start() error { 24 | return nil 25 | } 26 | 27 | func (d *MockDiscovery) Stop() error { 28 | return nil 29 | } 30 | 31 | func (d *MockDiscovery) Get(name string) ([]api.Remote, error) { 32 | d.RLock() 33 | defer d.RUnlock() 34 | 35 | rems, ok := d.Remotes[name] 36 | if !ok { 37 | return []api.Remote{}, nil 38 | } 39 | 40 | return rems, nil 41 | } 42 | 43 | // test helpers 44 | 45 | func (d *MockDiscovery) Set(name string, remotes []api.Remote) { 46 | d.Lock() 47 | defer d.Unlock() 48 | d.Remotes[name] = remotes 49 | } 50 | 51 | func (d *MockDiscovery) Add(name string, remote api.Remote) { 52 | d.Lock() 53 | defer d.Unlock() 54 | d.Remotes[name] = append(d.Remotes[name], remote) 55 | } 56 | -------------------------------------------------------------------------------- /pkg/ranje/placement_state_transition.go: -------------------------------------------------------------------------------- 1 | package ranje 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | ) 8 | 9 | type PlacementStateTransition struct { 10 | from api.PlacementState 11 | to api.PlacementState 12 | } 13 | 14 | var PlacementStateTransitions []PlacementStateTransition 15 | 16 | func init() { 17 | PlacementStateTransitions = []PlacementStateTransition{ 18 | // Happy Path 19 | {api.PsPending, api.PsInactive}, // Prepare 20 | {api.PsInactive, api.PsActive}, // Activate 21 | {api.PsActive, api.PsInactive}, // Deactivate 22 | {api.PsInactive, api.PsDropped}, // Drop 23 | 24 | // Node crashed (or placement mysteriously vanished) 25 | {api.PsPending, api.PsMissing}, 26 | {api.PsInactive, api.PsMissing}, 27 | {api.PsActive, api.PsMissing}, 28 | 29 | // Recovery? 30 | {api.PsMissing, api.PsDropped}, 31 | } 32 | } 33 | 34 | func CanTransitionPlacement(from, to api.PlacementState) error { 35 | for _, t := range PlacementStateTransitions { 36 | if t.from == from && t.to == to { 37 | return nil 38 | } 39 | } 40 | 41 | return fmt.Errorf("invalid transition: from=%s, to:%s", from.String(), to.String()) 42 | } 43 | -------------------------------------------------------------------------------- /pkg/discovery/interface.go: -------------------------------------------------------------------------------- 1 | // This is not a general-purpose service discovery interface! This is just the 2 | // specific things that I need for this library, to avoid letting Consul details 3 | // get all over the place. 4 | 5 | package discovery 6 | 7 | import "github.com/adammck/ranger/pkg/api" 8 | 9 | // Discoverable is an interface to make oneself discoverable (by type). For 10 | // environments where discoverability is implicit, this is unnecessary. 11 | type Discoverable interface { 12 | Start() error 13 | Stop() error 14 | } 15 | 16 | // Discoverer is an interface to find other services by type (e.g. node). 17 | type Discoverer interface { 18 | Discover(svcName string, add, remove func(api.Remote)) Getter 19 | } 20 | 21 | // Getter is returned by Discoverer.Discover. 22 | type Getter interface { 23 | 24 | // Get returns all of the currently known remotes. 25 | // TODO: Support some kind of filters here, like region and AZ. 26 | // TODO: Update callers to use add/remove callbacks and remove this method. 27 | Get() ([]api.Remote, error) 28 | 29 | // Stop terminates this getter. It should not call the remove callback for 30 | // any known remotes. Get will return no results after this is called. 31 | Stop() error 32 | } 33 | -------------------------------------------------------------------------------- /cmd/rangerd/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "log" 7 | "os" 8 | "os/signal" 9 | "syscall" 10 | "time" 11 | ) 12 | 13 | func main() { 14 | addrLis := flag.String("addr", "localhost:8000", "address to start grpc server on") 15 | addrPub := flag.String("pub-addr", "", "address for other nodes to reach this (default: same as -addr)") 16 | interval := flag.Duration("interval", 250*time.Millisecond, "frequency of orchestration loop") 17 | once := flag.Bool("once", false, "perform one rebalance cycle and exit") 18 | flag.Parse() 19 | 20 | if *addrPub == "" { 21 | *addrPub = *addrLis 22 | } 23 | 24 | // Replace default logger. 25 | // TODO: Switch to a better logging package. 26 | log.Default().SetOutput(os.Stdout) 27 | log.Default().SetPrefix("") 28 | log.Default().SetFlags(0) 29 | 30 | cmd, err := New(*addrLis, *addrPub, *interval, *once) 31 | if err != nil { 32 | exit(err) 33 | } 34 | 35 | ctx, cancel := context.WithCancel(context.Background()) 36 | 37 | sig := make(chan os.Signal, 1) 38 | signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM) 39 | 40 | go func() { 41 | <-sig 42 | cancel() 43 | }() 44 | 45 | err = cmd.Run(ctx) 46 | if err != nil { 47 | exit(err) 48 | } 49 | } 50 | 51 | func exit(err error) { 52 | log.Fatalf("Error: %s", err) 53 | } 54 | -------------------------------------------------------------------------------- /pkg/proto/conv/placement_state.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/adammck/ranger/pkg/api" 8 | pb "github.com/adammck/ranger/pkg/proto/gen" 9 | ) 10 | 11 | func PlacementStateFromProto(ps pb.PlacementState) api.PlacementState { 12 | switch ps { 13 | case pb.PlacementState_PS_UNKNOWN: 14 | return api.PsUnknown 15 | case pb.PlacementState_PS_PENDING: 16 | return api.PsPending 17 | case pb.PlacementState_PS_INACTIVE: 18 | return api.PsInactive 19 | case pb.PlacementState_PS_ACTIVE: 20 | return api.PsActive 21 | case pb.PlacementState_PS_MISSING: 22 | return api.PsMissing 23 | case pb.PlacementState_PS_DROPPED: 24 | return api.PsDropped 25 | } 26 | 27 | log.Printf("warn: unknown pb.PlacementState: %#v", ps) 28 | return api.PsUnknown 29 | } 30 | 31 | func PlacementStateToProto(ps api.PlacementState) pb.PlacementState { 32 | switch ps { 33 | case api.PsUnknown: 34 | return pb.PlacementState_PS_UNKNOWN 35 | case api.PsPending: 36 | return pb.PlacementState_PS_PENDING 37 | case api.PsInactive: 38 | return pb.PlacementState_PS_INACTIVE 39 | case api.PsActive: 40 | return pb.PlacementState_PS_ACTIVE 41 | case api.PsMissing: 42 | return pb.PlacementState_PS_MISSING 43 | case api.PsDropped: 44 | return pb.PlacementState_PS_DROPPED 45 | } 46 | 47 | panic(fmt.Sprintf("unknown PlacementState: %#v", ps)) 48 | } 49 | -------------------------------------------------------------------------------- /pkg/ranje/constraint.go: -------------------------------------------------------------------------------- 1 | package ranje 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/adammck/ranger/pkg/api" 8 | ) 9 | 10 | type Constraint struct { 11 | NodeID api.NodeID 12 | Not []api.NodeID 13 | } 14 | 15 | func (c Constraint) Copy() Constraint { 16 | not := make([]api.NodeID, len(c.Not)) 17 | copy(not, c.Not) 18 | 19 | return Constraint{ 20 | NodeID: c.NodeID, 21 | Not: not, 22 | } 23 | } 24 | 25 | func (c Constraint) String() string { 26 | tokens := []string{} 27 | 28 | if c.NodeID != "" { 29 | tokens = append(tokens, fmt.Sprintf("nID(%s)", c.NodeID)) 30 | } 31 | 32 | if len(c.Not) > 0 { 33 | nots := make([]string, len(c.Not)) 34 | for i := range c.Not { 35 | nots[i] = c.Not[i].String() 36 | } 37 | tokens = append(tokens, fmt.Sprintf("not(%s)", strings.Join(nots, ","))) 38 | } 39 | 40 | // TODO: Include Not nIDs in here. 41 | 42 | if len(tokens) == 0 { 43 | tokens = append(tokens, "any") 44 | } 45 | 46 | return fmt.Sprintf("Constraint{%s}", strings.Join(tokens, ",")) 47 | } 48 | 49 | func (c Constraint) WithNot(nID api.NodeID) Constraint { 50 | new := c.Copy() 51 | new.Not = append(new.Not, nID) 52 | return new 53 | } 54 | 55 | func (c Constraint) WithNodeID(nID api.NodeID) Constraint { 56 | new := c.Copy() 57 | new.NodeID = nID 58 | return new 59 | } 60 | 61 | // AnyNode is an empty constraint, which matches... any node. 62 | var AnyNode = Constraint{} 63 | -------------------------------------------------------------------------------- /docs/move.md: -------------------------------------------------------------------------------- 1 | # Move 2 | 3 | When a range is assigned to a node (a), and we want it to be assigned to a 4 | different node (b), we **move** it. 5 | 6 | 1. Prepare(b) 7 | 2. Deactivate(a) 8 | 3. Activate(b) 9 | 4. Drop(a) 10 | 11 | [_TestMove_](https://cs.github.com/adammck/ranger?q=symbol%3ATestMove) 12 | 13 | ## Failures 14 | 15 | If step 1 fails, abort the move: 16 | 17 | 1. Prepare(b) 18 | 19 | [_TestMoveFailure_Prepare_](https://cs.github.com/adammck/ranger?q=symbol%3ATestMoveFailure_Prepare) 20 | 21 | --- 22 | 23 | If step 2 fails, do nothing. We are stuck until the source placement 24 | relinquishes the range: 25 | 26 | 1. Prepare(b) 27 | 2. Deactivate(a) 28 | 29 | [_TestMoveFailure_Deactivate_](https://cs.github.com/adammck/ranger?q=symbol%3ATestMoveFailure_Deactivate) 30 | 31 | --- 32 | 33 | If step 3 fails, reactivate the source placement, drop the destination 34 | placement, and abort the move: 35 | 36 | 1. Prepare(b) 37 | 2. Deactivate(a) 38 | 3. Activate(b) 39 | 4. Activate(a) 40 | 5. Drop(b) 41 | 42 | [_TestMoveFailure_Activate_](https://cs.github.com/adammck/ranger?q=symbol%3ATestMoveFailure_Activate) 43 | 44 | --- 45 | 46 | If step 4 fails, do nothing but keep trying forever: 47 | 48 | 1. Prepare(b) 49 | 2. Deactivate(a) 50 | 3. Activate(b) 51 | 4. Drop(a) 52 | 53 | [_TestMoveFailure_Drop_](https://cs.github.com/adammck/ranger?q=symbol%3ATestMoveFailure_Drop) 54 | -------------------------------------------------------------------------------- /pkg/api/remote.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | // Remote represents a service listening on some remote host and port. They're 8 | // returned by discovery. This is most often used to refer to nodes/rangelets, 9 | // but isn't limited to that -- clients use it to find the controller, and I 10 | // have vague ideas about rangelets finding distributors in future. That is why 11 | // Ident is a string and not simply a NodeID. 12 | // 13 | // Ident must be globally unique and stable within a ranger installation, since 14 | // they are used to refer to *logical* service instances (which may have state) 15 | // as they are rescheduled between machines. (For example, a k8s pod using local 16 | // storage may be rescheduled on the same host with a different ip, or even on 17 | // a different host if using e.g. an EBS volume.) 18 | // 19 | // TODO: Should we remove support for non-node/rangelet use-cases? It would 20 | // simplify the api. If not, should we store the remote type, too? 21 | type Remote struct { 22 | Ident string 23 | Host string 24 | Port int 25 | } 26 | 27 | // Addr returns an address which can be dialled to connect to the remote. 28 | func (r Remote) Addr() string { 29 | return fmt.Sprintf("%s:%d", r.Host, r.Port) 30 | } 31 | 32 | // NodeID returns the remote ident as a NodeID, since that's most often how it's 33 | // used, though it isn't one. 34 | func (r Remote) NodeID() NodeID { 35 | return NodeID(r.Ident) 36 | } 37 | -------------------------------------------------------------------------------- /pkg/test/fake_node/barrier.go: -------------------------------------------------------------------------------- 1 | package fake_node 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "sync/atomic" 7 | ) 8 | 9 | type barrier struct { 10 | desc string // just for error messages 11 | arrived uint32 12 | arrival *sync.WaitGroup 13 | release *sync.WaitGroup 14 | cb func() 15 | } 16 | 17 | func NewBarrier(desc string, n int, cb func()) *barrier { 18 | a := &sync.WaitGroup{} 19 | b := &sync.WaitGroup{} 20 | a.Add(n) 21 | b.Add(n) 22 | 23 | return &barrier{desc, 0, a, b, cb} 24 | } 25 | 26 | // Wait blocks until Arrive has been called. 27 | // TODO: Rename this to e.g. AwaitBarrier. 28 | func (b *barrier) Wait() { 29 | b.arrival.Wait() 30 | } 31 | 32 | // Release is called in tests to unblock a state transition, which is currently 33 | // blocked in Arrive. Before returning, as a convenience, it calls the callback 34 | // (which is probably a WaitGroup on the rangelet changing state) so the caller 35 | // doesn't have to wait for that itself. 36 | // TODO: Rename this to e.g. CompleteTransition. 37 | func (b *barrier) Release() { 38 | if atomic.LoadUint32(&b.arrived) == 0 { 39 | panic(fmt.Sprintf("Release called before Arrive for barrier: %s", b.desc)) 40 | } 41 | 42 | b.release.Done() 43 | b.cb() 44 | } 45 | 46 | func (b *barrier) Arrive() { 47 | if atomic.LoadUint32(&b.arrived) == 1 { 48 | panic(fmt.Sprintf("Arrive already called for barrier: %s", b.desc)) 49 | } 50 | 51 | atomic.StoreUint32(&b.arrived, 1) 52 | b.arrival.Done() 53 | b.release.Wait() 54 | } 55 | -------------------------------------------------------------------------------- /pkg/proto/controller.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option go_package = "github.com/adammck/ranger/pkg/proto"; 4 | 5 | package ranger; 6 | 7 | message MoveRequest { 8 | uint64 range = 1; 9 | 10 | // The ident of the node to assign the range to. 11 | // TODO: Also allow nodes to identified by other features. 12 | // TODO: Does this need its own message type? 13 | string node = 2; 14 | } 15 | 16 | message MoveResponse { 17 | } 18 | 19 | message SplitRequest { 20 | uint64 range = 1; 21 | 22 | // The point at which to split the two ranges. This becomes the end key of the 23 | // left range, and the start key of the right range. 24 | bytes boundary = 2; 25 | 26 | // The ident of the node to assign the two parts to. Can be the same as the 27 | // node which the range is currently on. 28 | string node_left = 3; 29 | string node_right = 4; 30 | } 31 | 32 | message SplitResponse { 33 | } 34 | 35 | message JoinRequest { 36 | uint64 range_left = 1; 37 | uint64 range_right = 2; 38 | 39 | // The ident of the node to assign the resulting range to. 40 | string node = 3; 41 | } 42 | 43 | message JoinResponse { 44 | } 45 | 46 | service Orchestrator { 47 | 48 | // Place a range on specific node, moving it from the node it is currently 49 | // placed on, if any. 50 | rpc Move (MoveRequest) returns (MoveResponse) {} 51 | 52 | // Split a range in two. 53 | rpc Split (SplitRequest) returns (SplitResponse) {} 54 | 55 | // Join two ranges into one. 56 | rpc Join (JoinRequest) returns (JoinResponse) {} 57 | } 58 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/adammck/ranger 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/hashicorp/consul/api v1.12.0 7 | github.com/lthibault/jitterbug v2.0.0+incompatible 8 | github.com/stretchr/testify v1.7.1 9 | golang.org/x/sync v0.0.0-20220513210516-0976fa681c29 10 | google.golang.org/grpc v1.46.2 11 | google.golang.org/protobuf v1.28.0 12 | ) 13 | 14 | require ( 15 | github.com/google/go-cmp v0.5.8 // indirect 16 | github.com/pkg/errors v0.8.1 // indirect 17 | ) 18 | 19 | require ( 20 | github.com/armon/go-metrics v0.4.0 // indirect 21 | github.com/davecgh/go-spew v1.1.1 // indirect 22 | github.com/fatih/color v1.13.0 // indirect 23 | github.com/golang/protobuf v1.5.2 // indirect 24 | github.com/hashicorp/go-cleanhttp v0.5.2 // indirect 25 | github.com/hashicorp/go-hclog v1.2.0 // indirect 26 | github.com/hashicorp/go-immutable-radix v1.3.1 // indirect 27 | github.com/hashicorp/go-rootcerts v1.0.2 // indirect 28 | github.com/hashicorp/golang-lru v0.5.4 // indirect 29 | github.com/hashicorp/serf v0.9.8 // indirect 30 | github.com/mattn/go-colorable v0.1.12 // indirect 31 | github.com/mattn/go-isatty v0.0.14 // indirect 32 | github.com/mitchellh/go-homedir v1.1.0 // indirect 33 | github.com/mitchellh/mapstructure v1.5.0 // indirect 34 | github.com/pmezard/go-difflib v1.0.0 // indirect 35 | golang.org/x/net v0.7.0 // indirect 36 | golang.org/x/sys v0.5.0 // indirect 37 | golang.org/x/text v0.7.0 // indirect 38 | google.golang.org/genproto v0.0.0-20220527130721-00d5c0f3be58 // indirect 39 | gopkg.in/yaml.v3 v3.0.1 // indirect 40 | gotest.tools v2.2.0+incompatible 41 | ) 42 | -------------------------------------------------------------------------------- /pkg/proto/conv/remote_state.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/adammck/ranger/pkg/api" 8 | pb "github.com/adammck/ranger/pkg/proto/gen" 9 | ) 10 | 11 | func RemoteStateFromProto(s pb.RangeNodeState) api.RemoteState { 12 | switch s { 13 | case pb.RangeNodeState_UNKNOWN: 14 | return api.NsUnknown 15 | case pb.RangeNodeState_INACTIVE: 16 | return api.NsInactive 17 | case pb.RangeNodeState_ACTIVE: 18 | return api.NsActive 19 | case pb.RangeNodeState_PREPARING: 20 | return api.NsPreparing 21 | case pb.RangeNodeState_ACTIVATING: 22 | return api.NsActivating 23 | case pb.RangeNodeState_DEACTIVATING: 24 | return api.NsDeactivating 25 | case pb.RangeNodeState_DROPPING: 26 | return api.NsDropping 27 | case pb.RangeNodeState_NOT_FOUND: 28 | return api.NsNotFound 29 | } 30 | 31 | log.Printf("warn: unknown pb.RangeNodeState: %#v", s) 32 | return api.NsUnknown 33 | } 34 | 35 | func RemoteStateToProto(rs api.RemoteState) pb.RangeNodeState { 36 | switch rs { 37 | case api.NsUnknown: 38 | return pb.RangeNodeState_UNKNOWN 39 | case api.NsInactive: 40 | return pb.RangeNodeState_INACTIVE 41 | case api.NsActive: 42 | return pb.RangeNodeState_ACTIVE 43 | case api.NsPreparing: 44 | return pb.RangeNodeState_PREPARING 45 | case api.NsActivating: 46 | return pb.RangeNodeState_ACTIVATING 47 | case api.NsDeactivating: 48 | return pb.RangeNodeState_DEACTIVATING 49 | case api.NsDropping: 50 | return pb.RangeNodeState_DROPPING 51 | case api.NsNotFound: 52 | return pb.RangeNodeState_NOT_FOUND 53 | } 54 | 55 | //return pb.RangeNodeState_UNKNOWN 56 | panic(fmt.Sprintf("unknown RemoteState: %#v", rs)) 57 | } 58 | -------------------------------------------------------------------------------- /pkg/ranje/replication_config.go: -------------------------------------------------------------------------------- 1 | package ranje 2 | 3 | // TODO: Move this into the keyspace package. 4 | type ReplicationConfig struct { 5 | 6 | // The number of active placements that a range should aim to have, when the 7 | // keyspace is stable. Whether the number of active placements is more or 8 | // fewer than this during operations depends on MinActive and MaxActive. 9 | TargetActive int 10 | 11 | // The minimum number of active placements that a range will ever be allowed 12 | // to voluntarily have. (Sometimes the number will be lower involuntarily, 13 | // because of e.g. nodes crashing.) 14 | MinActive int 15 | 16 | // The maximum number of active placements that a range will ever be allowed 17 | // to have. 18 | MaxActive int 19 | 20 | // TODO 21 | MinPlacements int 22 | 23 | // TODO 24 | MaxPlacements int 25 | } 26 | 27 | // TODO: Docs 28 | func (rc *ReplicationConfig) Validate() error { 29 | return nil 30 | } 31 | 32 | // R1 is an example replication config for systems which want a single active 33 | // placement of each key, and can tolerate an additional inactive placement. 34 | var R1 = ReplicationConfig{ 35 | TargetActive: 1, 36 | MinActive: 0, 37 | MaxActive: 1, 38 | // 39 | MinPlacements: 1, 40 | MaxPlacements: 2, 41 | } 42 | 43 | // R1 is an example replication config for high-availability systems which want 44 | // to maintain three active placements of each key, can tolerate an additional 45 | // two placements during operations, one of which can be active. 46 | var R3 = ReplicationConfig{ 47 | TargetActive: 3, 48 | MinActive: 3, 49 | MaxActive: 4, 50 | // 51 | MinPlacements: 3, 52 | MaxPlacements: 5, // Up to two spare 53 | } 54 | -------------------------------------------------------------------------------- /pkg/discovery/mock/mock_discoverer.go: -------------------------------------------------------------------------------- 1 | package mock 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | discovery "github.com/adammck/ranger/pkg/discovery" 8 | ) 9 | 10 | type Discoverer struct { 11 | getters []*discoveryGetter 12 | 13 | // svcName (e.g. "node") -> remotes 14 | remotes map[string][]api.Remote 15 | remotesMu sync.RWMutex 16 | } 17 | 18 | func NewDiscoverer() *Discoverer { 19 | return &Discoverer{ 20 | remotes: map[string][]api.Remote{}, 21 | } 22 | } 23 | 24 | type discoveryGetter struct { 25 | disc *Discoverer 26 | svcName string 27 | 28 | // Functions to be called when new remotes are added and removed. 29 | add func(api.Remote) 30 | remove func(api.Remote) 31 | } 32 | 33 | func (d *Discoverer) Discover(svcName string, add, remove func(api.Remote)) discovery.Getter { 34 | dg := &discoveryGetter{ 35 | disc: d, 36 | svcName: svcName, 37 | add: add, 38 | remove: remove, 39 | } 40 | d.getters = append(d.getters, dg) 41 | return dg 42 | } 43 | 44 | func (dg *discoveryGetter) Get() ([]api.Remote, error) { 45 | dg.disc.remotesMu.RLock() 46 | defer dg.disc.remotesMu.RUnlock() 47 | 48 | remotes, ok := dg.disc.remotes[dg.svcName] 49 | if !ok { 50 | return []api.Remote{}, nil 51 | } 52 | 53 | res := make([]api.Remote, len(remotes)) 54 | copy(res, remotes) 55 | 56 | return res, nil 57 | } 58 | 59 | func (dg *discoveryGetter) Stop() error { 60 | return nil 61 | } 62 | 63 | // test helpers 64 | 65 | func (d *Discoverer) Add(svcName string, remote api.Remote) { 66 | d.remotesMu.RLock() 67 | defer d.remotesMu.RUnlock() 68 | 69 | // TODO: Need to init slice? 70 | d.remotes[svcName] = append(d.remotes[svcName], remote) 71 | 72 | for _, dg := range d.getters { 73 | dg.add(remote) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /pkg/proto/debug.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option go_package = "github.com/adammck/ranger/pkg/proto"; 4 | 5 | import "ranje.proto"; 6 | 7 | package ranger; 8 | 9 | message RangesListRequest { 10 | } 11 | 12 | message RangesListResponse { 13 | repeated RangeResponse ranges = 1; 14 | } 15 | 16 | message RangeRequest { 17 | uint64 range = 1; 18 | } 19 | 20 | // Similar to Placement (from ranje.proto), but includes extra junk for debug 21 | // views. The other one is used on the control plane, so should be minimal. 22 | message PlacementWithRangeInfo { 23 | Placement placement = 1; 24 | RangeInfo range_info = 2; 25 | } 26 | 27 | message RangeResponse { 28 | RangeMeta meta = 1; 29 | RangeState state = 2; 30 | 31 | // These two are obviously redundant when all ranges are dumped, but the 32 | // output is intended to be useful, not correct. 33 | repeated uint64 parents = 3; 34 | repeated uint64 children = 4; 35 | 36 | repeated PlacementWithRangeInfo placements = 5; 37 | } 38 | 39 | message NodesListRequest { 40 | } 41 | 42 | message NodesListResponse { 43 | repeated NodeResponse nodes = 1; 44 | } 45 | 46 | message NodeRequest { 47 | string node = 1; 48 | } 49 | 50 | message NodeMeta { 51 | string ident = 1; 52 | string address = 2; 53 | bool want_drain = 3; 54 | } 55 | 56 | // TODO: Remove this, and use PlacementWithRangeInfo 57 | message NodeRange { 58 | RangeMeta meta = 1; 59 | PlacementState state = 3; 60 | } 61 | 62 | message NodeResponse { 63 | NodeMeta node = 1; 64 | repeated NodeRange ranges = 2; 65 | } 66 | 67 | service Debug { 68 | rpc RangesList (RangesListRequest) returns (RangesListResponse) {} 69 | rpc Range (RangeRequest) returns (RangeResponse) {} 70 | rpc NodesList (NodesListRequest) returns (NodesListResponse) {} 71 | rpc Node (NodeRequest) returns (NodeResponse) {} 72 | } 73 | -------------------------------------------------------------------------------- /pkg/keyspace/replication_state.go: -------------------------------------------------------------------------------- 1 | package keyspace 2 | 3 | import ( 4 | "log" 5 | "sort" 6 | 7 | "github.com/adammck/ranger/pkg/api" 8 | ) 9 | 10 | type Repl struct { 11 | Start api.Key 12 | End api.Key 13 | Total int 14 | Active int 15 | } 16 | 17 | func (ks *Keyspace) ReplicationState() []Repl { 18 | flat := flatRanges(ks) 19 | 20 | for i := range flat { 21 | for _, r := range ks.ranges { 22 | if r.State == api.RsObsolete { 23 | continue 24 | } 25 | if r.Meta.Contains(flat[i].Start) { 26 | log.Printf("%s: r=%s, t=%d, a=%d", flat[i].Start, r.String(), len(r.Placements), r.NumPlacementsInState(api.PsActive)) 27 | flat[i].Total += len(r.Placements) 28 | flat[i].Active += r.NumPlacementsInState(api.PsActive) 29 | } 30 | } 31 | } 32 | 33 | return flat 34 | } 35 | 36 | func flatRanges(ks *Keyspace) []Repl { 37 | 38 | keyMap := make(map[api.Key]struct{}, len(ks.ranges)) 39 | for _, r := range ks.ranges { 40 | if r.State == api.RsObsolete { 41 | // Obsolete ranges can't have placements, so skip them. 42 | continue 43 | } 44 | 45 | if r.Meta.Start != api.ZeroKey { 46 | keyMap[r.Meta.Start] = struct{}{} 47 | } 48 | 49 | if r.Meta.End != api.ZeroKey { 50 | keyMap[r.Meta.End] = struct{}{} 51 | } 52 | } 53 | 54 | i := 0 55 | keyList := make([]api.Key, len(keyMap)) 56 | for k := range keyMap { 57 | keyList[i] = k 58 | i++ 59 | } 60 | 61 | sort.Slice(keyList, func(i, j int) bool { 62 | return keyList[i] < keyList[j] 63 | }) 64 | 65 | if len(keyMap) == 0 { 66 | return []Repl{ 67 | { 68 | Start: api.ZeroKey, 69 | End: api.ZeroKey, 70 | }, 71 | } 72 | } 73 | 74 | out := make([]Repl, len(keyList)+1) 75 | for i := 0; i < len(keyList); i++ { 76 | out[i].End = keyList[i] 77 | 78 | if i < len(keyList) { 79 | out[i+1].Start = keyList[i] 80 | } 81 | } 82 | 83 | return out 84 | } 85 | -------------------------------------------------------------------------------- /pkg/test/fake_nodes/fake_nodes.go: -------------------------------------------------------------------------------- 1 | package fake_nodes 2 | 3 | import ( 4 | "fmt" 5 | 6 | "context" 7 | 8 | "github.com/adammck/ranger/pkg/api" 9 | mockdisc "github.com/adammck/ranger/pkg/discovery/mock" 10 | "github.com/adammck/ranger/pkg/test/fake_node" 11 | "google.golang.org/grpc" 12 | ) 13 | 14 | type TestNodes struct { 15 | disc *mockdisc.Discoverer 16 | nodes map[api.NodeID]*fake_node.TestNode 17 | closers []func() 18 | } 19 | 20 | func NewTestNodes() *TestNodes { 21 | tn := &TestNodes{ 22 | disc: mockdisc.NewDiscoverer(), 23 | nodes: map[api.NodeID]*fake_node.TestNode{}, 24 | } 25 | 26 | return tn 27 | } 28 | 29 | func (tn *TestNodes) Close() { 30 | for _, f := range tn.closers { 31 | f() 32 | } 33 | } 34 | 35 | func (tn *TestNodes) Add(ctx context.Context, remote api.Remote, rangeInfos map[api.RangeID]*api.RangeInfo) { 36 | n, closer := fake_node.NewTestNode(ctx, remote.Addr(), rangeInfos) 37 | tn.closers = append(tn.closers, closer) 38 | tn.nodes[remote.NodeID()] = n 39 | tn.disc.Add("node", remote) 40 | } 41 | 42 | func (tn *TestNodes) Get(nID string) *fake_node.TestNode { 43 | n, ok := tn.nodes[api.NodeID(nID)] 44 | if !ok { 45 | panic(fmt.Sprintf("no such node: %s", nID)) 46 | } 47 | 48 | return n 49 | } 50 | 51 | func (tn *TestNodes) SetStrictTransitions(b bool) { 52 | for _, n := range tn.nodes { 53 | n.SetStrictTransitions(b) 54 | } 55 | } 56 | 57 | // Use this to stub out the Roster. 58 | func (tn *TestNodes) NodeConnFactory(ctx context.Context, remote api.Remote) (*grpc.ClientConn, error) { 59 | for _, n := range tn.nodes { 60 | if n.Addr == remote.Addr() { 61 | 62 | if n.Conn == nil { 63 | // Fail rather than return nil connection 64 | return nil, fmt.Errorf("nil conn (called before Listen) for test node: %v", n) 65 | } 66 | 67 | return n.Conn, nil 68 | } 69 | } 70 | 71 | return nil, fmt.Errorf("no such connection: %v", remote.Addr()) 72 | } 73 | 74 | func (tn *TestNodes) Discovery() *mockdisc.Discoverer { 75 | return tn.disc 76 | } 77 | -------------------------------------------------------------------------------- /examples/kv/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "flag" 7 | "log" 8 | "math/rand" 9 | "os" 10 | "os/signal" 11 | "syscall" 12 | "time" 13 | 14 | "github.com/adammck/ranger/examples/kv/pkg/node" 15 | "github.com/adammck/ranger/examples/kv/pkg/proxy" 16 | ) 17 | 18 | type Runner interface { 19 | Run(ctx context.Context) error 20 | } 21 | 22 | func init() { 23 | rand.Seed(time.Now().UTC().UnixNano()) 24 | } 25 | 26 | func main() { 27 | fnod := flag.Bool("node", false, "start a node") 28 | fprx := flag.Bool("proxy", false, "start a proxy") 29 | 30 | addrLis := flag.String("addr", "localhost:8000", "address to start grpc server on") 31 | addrPub := flag.String("pub-addr", "", "address for other nodes to reach this (default: same as -listen)") 32 | drain := flag.Bool("drain", false, "node: drain ranges before shutting down") 33 | LogReqs := flag.Bool("log-reqs", false, "proxy, node: enable request logging") 34 | chaos := flag.Bool("chaos", false, "enable random failures and delays") 35 | flag.Parse() 36 | 37 | if *addrPub == "" { 38 | *addrPub = *addrLis 39 | } 40 | 41 | // Replace default logger. 42 | log.Default().SetOutput(os.Stdout) 43 | log.Default().SetPrefix("") 44 | log.Default().SetFlags(0) 45 | 46 | ctx, cancel := context.WithCancel(context.Background()) 47 | 48 | sig := make(chan os.Signal, 1) 49 | signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM) 50 | 51 | go func() { 52 | <-sig 53 | cancel() 54 | }() 55 | 56 | var cmd Runner 57 | var err error 58 | 59 | if *fnod && !*fprx { 60 | cmd, err = node.New(*addrLis, *addrPub, *drain, *LogReqs, *chaos) 61 | 62 | } else if !*fnod && *fprx { 63 | cmd, err = proxy.New(*addrLis, *addrPub, *LogReqs) 64 | 65 | } else { 66 | err = errors.New("must provide one of -node, -proxy") 67 | } 68 | 69 | if err != nil { 70 | exit(err) 71 | } 72 | 73 | err = cmd.Run(ctx) 74 | if err != nil { 75 | exit(err) 76 | } 77 | } 78 | 79 | func exit(err error) { 80 | log.Fatalf("Error: %s", err) 81 | } 82 | -------------------------------------------------------------------------------- /pkg/actuator/util/parents.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | "github.com/adammck/ranger/pkg/proto/conv" 8 | pb "github.com/adammck/ranger/pkg/proto/gen" 9 | "github.com/adammck/ranger/pkg/ranje" 10 | "github.com/adammck/ranger/pkg/roster" 11 | ) 12 | 13 | // TODO: Where does this belong? Probably not here! 14 | func GetParents(ks ranje.RangeGetter, ros roster.NodeGetter, rang *ranje.Range) []*pb.Parent { 15 | parents := []*pb.Parent{} 16 | seen := map[api.RangeID]struct{}{} 17 | addParents(ks, ros, rang, &parents, seen) 18 | return parents 19 | } 20 | 21 | func addParents(ks ranje.RangeGetter, ros roster.NodeGetter, rang *ranje.Range, parents *[]*pb.Parent, seen map[api.RangeID]struct{}) { 22 | _, ok := seen[rang.Meta.Ident] 23 | if ok { 24 | return 25 | } 26 | 27 | *parents = append(*parents, pbPlacement(ros, rang)) 28 | seen[rang.Meta.Ident] = struct{}{} 29 | 30 | for _, rID := range rang.Parents { 31 | r, err := ks.GetRange(rID) 32 | if err != nil { 33 | // TODO: Think about how to recover from this. It's bad. 34 | panic(fmt.Sprintf("getting range with ident %v: %v", rID, err)) 35 | } 36 | 37 | addParents(ks, ros, r, parents, seen) 38 | } 39 | } 40 | 41 | func pbPlacement(ros roster.NodeGetter, r *ranje.Range) *pb.Parent { 42 | 43 | // TODO: The kv example doesn't care about range history, because it has no 44 | // external write log, so can only fetch from nodes. So we can skip sending 45 | // them at all. Maybe add a controller feature flag? 46 | 47 | pbPlacements := make([]*pb.Placement, len(r.Placements)) 48 | 49 | for i, p := range r.Placements { 50 | 51 | // TODO: Don't ignore errors here. 52 | n, _ := ros.NodeByIdent(p.NodeID) 53 | 54 | node := "" 55 | if n != nil { 56 | node = n.Addr() 57 | } 58 | 59 | pbPlacements[i] = &pb.Placement{ 60 | Node: node, 61 | State: conv.PlacementStateToProto(p.StateCurrent), 62 | } 63 | } 64 | 65 | return &pb.Parent{ 66 | Range: conv.MetaToProto(r.Meta), 67 | Placements: pbPlacements, 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /examples/kv/pkg/proxy/server.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "time" 7 | 8 | pbkv "github.com/adammck/ranger/examples/kv/proto/gen" 9 | "github.com/adammck/ranger/pkg/rangelet/mirror" 10 | ) 11 | 12 | type proxyServer struct { 13 | pbkv.UnimplementedKVServer 14 | proxy *Proxy 15 | } 16 | 17 | func (ps *proxyServer) Get(ctx context.Context, req *pbkv.GetRequest) (*pbkv.GetResponse, error) { 18 | client, mres, err := ps.proxy.getClient(req.Key) 19 | if err != nil { 20 | return nil, err 21 | } 22 | 23 | res, err := client.Get(ctx, req) 24 | if err != nil { 25 | log.Printf("Error: %s (method=Get, key=%s, nID=%s, state=%v)", err, req.Key, mres.NodeID(), mres.State) 26 | } else if ps.proxy.logReqs { 27 | log.Printf("Get: %s -> %s", req.Key, mres.NodeID()) 28 | } 29 | 30 | return res, err 31 | } 32 | 33 | func (ps *proxyServer) Put(ctx context.Context, req *pbkv.PutRequest) (*pbkv.PutResponse, error) { 34 | var client pbkv.KVClient 35 | var res *pbkv.PutResponse 36 | var mres mirror.Result 37 | var err error 38 | 39 | retries := 0 40 | maxRetries := 10 41 | 42 | for { 43 | 44 | client, mres, err = ps.proxy.getClient(req.Key) 45 | if err == nil { 46 | res, err = client.Put(ctx, req) 47 | if err == nil { 48 | // Success! 49 | break 50 | } 51 | } 52 | 53 | if retries >= maxRetries { 54 | break 55 | } 56 | 57 | retries += 1 58 | 59 | // TODO: Use a proper backoff lib here. 60 | // 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.28s, 2.56s, 5.12s 61 | d := time.Duration(((1<>1)*10) * time.Millisecond 62 | 63 | // Sleep but respect cancellation. 64 | delay := time.NewTimer(d) 65 | select { 66 | case <-delay.C: 67 | case <-ctx.Done(): 68 | if !delay.Stop() { 69 | <-delay.C 70 | } 71 | } 72 | } 73 | 74 | if err != nil { 75 | log.Printf("Error: %s (method=Put, key=%s, node=%s, state=%v)", err, req.Key, mres.NodeID(), mres.State) 76 | } else if ps.proxy.logReqs { 77 | log.Printf("Put: %s -> %s", req.Key, mres.NodeID()) 78 | } 79 | 80 | return res, err 81 | } 82 | -------------------------------------------------------------------------------- /examples/kv/tools/hammer/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "workers": [ 3 | { "prefix": "a", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 4 | { "prefix": "b", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 5 | { "prefix": "c", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 6 | { "prefix": "d", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 7 | { "prefix": "e", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 8 | { "prefix": "f", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 9 | { "prefix": "g", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 10 | { "prefix": "h", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 11 | { "prefix": "i", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 12 | { "prefix": "j", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 13 | { "prefix": "k", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 14 | { "prefix": "l", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 15 | { "prefix": "m", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 16 | { "prefix": "n", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 17 | { "prefix": "o", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 18 | { "prefix": "p", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 19 | { "prefix": "q", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 20 | { "prefix": "r", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 21 | { "prefix": "s", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 22 | { "prefix": "t", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 23 | { "prefix": "u", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 24 | { "prefix": "v", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 25 | { "prefix": "w", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 26 | { "prefix": "x", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 27 | { "prefix": "y", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } }, 28 | { "prefix": "z", "qps": { "create": 1, "read": 5, "update": 1, "delete": 0 } } 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /docs/join.md: -------------------------------------------------------------------------------- 1 | # Join 2 | 3 | When two ranges (1, 2) are assigned to nodes (a, b), and we want to join them 4 | into a single range (3) assined to a different node (c), we **join** them. 5 | 6 | 1. Prepare(c, 3) 7 | 2. Deactivate 8 | 1. Deactivate(a, 1) 9 | 2. Deactivate(a, 2) 10 | 3. Activate(c, 3) 11 | 4. Drop 12 | 1. Drop(a, 1) 13 | 2. Drop(a, 2) 14 | 15 | [_TestJoin_](https://cs.github.com/adammck/ranger?q=symbol%3ATestJoin) 16 | 17 | ## Failures 18 | 19 | If step 1 fails, just abort the join: 20 | 21 | 1. Prepare(c, 3) 22 | 23 | [_TestJoinFailure_Prepare_](https://cs.github.com/adammck/ranger?q=symbol%3ATestJoinFailure_Prepare) 24 | 25 | --- 26 | 27 | If step 2 fails, reactivate any of the source placements which were deactivated, 28 | drop the destination placement, and abort the join: 29 | 30 | 1. Prepare(c, 3) 31 | 2. Deactivate 32 | 1. Deactivate(a, 1) 33 | 2. Deactivate(a, 2) 34 | 3. Drop(c, 3) 35 | 36 | or 37 | 38 | 1. Prepare(c, 3) 39 | 2. Deactivate 40 | 1. Deactivate(a, 1) 41 | 2. Deactivate(a, 2) 42 | 3. Activate 43 | 1. Activate(a, 2) 44 | 4. Drop(c, 3) 45 | 46 | or 47 | 48 | 1. Prepare(c, 3) 49 | 2. Deactivate 50 | 1. Deactivate(a, 1) 51 | 2. Deactivate(a, 2) 52 | 3. Activate 53 | 1. Activate(a, 1) 54 | 4. Drop(c, 3) 55 | 56 | [_TestJoinFailure_Deactivate_](https://cs.github.com/adammck/ranger?q=symbol%3ATestJoinFailure_Deactivate) 57 | 58 | --- 59 | 60 | If step 3 fails, reactivate source placements, drop the destination placement, 61 | and abort the join: 62 | 63 | 1. Prepare(c, 3) 64 | 2. Deactivate 65 | 1. Deactivate(a, 1) 66 | 2. Deactivate(a, 2) 67 | 3. Activate(c, 3) 68 | 4. Activate 69 | 1. Activate(a, 1) 70 | 2. Activate(a, 2) 71 | 4. Drop(c, 3) 72 | 73 | [_TestJoinFailure_Activate_](https://cs.github.com/adammck/ranger?q=symbol%3ATestJoinFailure_Activate) 74 | 75 | --- 76 | 77 | If step 4 fails, do nothing but keep trying forever until both source placements 78 | are dropped: 79 | 80 | 1. Prepare(c, 3) 81 | 2. Deactivate 82 | 1. Deactivate(a, 1) 83 | 2. Deactivate(a, 2) 84 | 3. Activate(c, 3) 85 | 4. Drop 86 | 1. Drop(a, 1) 87 | 2. Drop(a, 2) 88 | 89 | [_TestJoinFailure_Drop_](https://cs.github.com/adammck/ranger?q=symbol%3ATestJoinFailure_Drop) 90 | -------------------------------------------------------------------------------- /pkg/proto/node.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option go_package = "github.com/adammck/ranger/pkg/proto"; 4 | 5 | import "ranje.proto"; 6 | 7 | package ranger; 8 | 9 | service Node { 10 | rpc Prepare (PrepareRequest) returns (PrepareResponse) {} 11 | rpc Activate (ServeRequest) returns (ServeResponse) {} 12 | rpc Deactivate (DeactivateRequest) returns (DeactivateResponse) {} 13 | rpc Drop (DropRequest) returns (DropResponse) {} 14 | 15 | // Controller wants to know the state of the node, including its ranges. 16 | // Proxy shouldn't call this; use Ranges instead. 17 | rpc Info (InfoRequest) returns (InfoResponse) {} 18 | 19 | // Proxy wants to know what it can forward to this node. 20 | // Controller shouldn't call this; use Info instead. 21 | rpc Ranges (RangesRequest) returns (stream RangesResponse) {} 22 | } 23 | 24 | message Parent { 25 | RangeMeta range = 1; 26 | 27 | // Range IDs in here may not appear in the PrepareRequest, because at some point 28 | // the history is pruned. 29 | repeated uint64 parent = 2; 30 | 31 | // TODO: This should probably be two fields, host and port? Or node ident? 32 | repeated Placement placements = 3; 33 | } 34 | 35 | message PrepareRequest { 36 | RangeMeta range = 1; 37 | 38 | // The range(s) which this range was created from, and the nodes where they 39 | // can currently be found. This is empty is the range is brand new. Nodes may 40 | // use this info to restore the current state of the range when accepting it. 41 | // TODO: Need nested parents here? 42 | repeated Parent parents = 3; 43 | } 44 | 45 | message PrepareResponse { 46 | // TODO: Return just the state instead, like ServeResponse. 47 | RangeInfo range_info = 1; 48 | } 49 | 50 | message ServeRequest { 51 | uint64 range = 1; 52 | bool force = 2; 53 | } 54 | 55 | message ServeResponse { 56 | RangeNodeState state = 1; 57 | } 58 | 59 | message DeactivateRequest { 60 | uint64 range = 1; 61 | } 62 | 63 | message DeactivateResponse { 64 | RangeNodeState state = 1; 65 | } 66 | 67 | message DropRequest { 68 | uint64 range = 1; 69 | bool force = 2; 70 | } 71 | 72 | message DropResponse { 73 | RangeNodeState state = 1; 74 | } 75 | 76 | message InfoRequest { 77 | } 78 | 79 | message InfoResponse { 80 | repeated RangeInfo ranges = 1; 81 | 82 | // The nod wants the controller to remove all ranges from it. Probably because 83 | // it wants to shut down gracefully. 84 | bool wantDrain = 2; 85 | } 86 | 87 | message RangesRequest { 88 | } 89 | 90 | message RangesResponse { 91 | RangeMeta meta = 1; 92 | RangeNodeState state = 2; 93 | } 94 | -------------------------------------------------------------------------------- /examples/kv/README.md: -------------------------------------------------------------------------------- 1 | # Example: Key-Value Store 2 | 3 | This is a simple distributed in-memory key-value store using Ranger. You should 4 | not use it for anything under any circumstances; there's no persistence, and it 5 | totally ignores various important edge-cases. It's intended to demonstrate the 6 | following specific use case: 7 | 8 | - One or zero active replicas. 9 | - Direct (node-to-node) state transfer. 10 | - External proxy layer. 11 | 12 | It has two components in addition to the controller: 13 | 14 | - **node**: 15 | This stateful service stores the actual data. It exposes a simple get/put 16 | interface over gRPC. It includes a _Rangelet_, which will coordinate workload 17 | assignment with the controller. 18 | - **proxy**: 19 | This stateless service connects to every node and watches the range 20 | assignments. It exposes the same get/put interface, but can transparently 21 | forward requests to the appropriate storage node. 22 | 23 | ---- 24 | 25 | ## Deps 26 | 27 | Install dependencies with Brew. 28 | Ths only works on macOS. (Sorry.) 29 | 30 | ```console 31 | $ brew bundle --file Brewfile 32 | Homebrew Bundle complete! 2 Brewfile dependencies now installed. 33 | ``` 34 | 35 | ## Usage 36 | 37 | Start consul in the background (for service discovery), and run a simple three 38 | node cluster: 39 | (This uses Foreman to keep things simple, but you can also start up the services 40 | in separate tabs or whatever, if you prefer.) 41 | 42 | ```console 43 | $ brew services run consul 44 | ==> Successfully ran `consul` (label: homebrew.mxcl.consul) 45 | $ cd ~/code/src/github.com/adammck/ranger/examples/kv 46 | $ bin/dev.sh 47 | 23:15:00 controller.1 | started with pid 18951 48 | 23:15:00 proxy.1 | started with pid 18952 49 | 23:15:01 node.1 | started with pid 18953 50 | 23:15:01 node.2 | started with pid 18954 51 | 23:15:01 node.3 | started with pid 18955 52 | ``` 53 | 54 | Run a load test: 55 | (This hammer tool is specific to the kv example, and is kind of janky. It's 56 | intended to demonstrate availability during range moves/splits/joins.) 57 | 58 | ```console 59 | $ cd tools/hammer 60 | $ go build 61 | $ ./hammer -addr localhost:5100 -duration 60s 62 | ``` 63 | 64 | Move range 1 (the only range, for now) to node 2: 65 | 66 | ```console 67 | $ rangerctl move 1 5201 68 | ``` 69 | 70 | Split range 1 onto nodes 1 and 3: 71 | 72 | ```console 73 | $ rangerctl split 1 m 5200 5202 74 | ``` 75 | 76 | Join ranges 1 and 3 back onto node 2: 77 | 78 | ```console 79 | $ rangerctl join 2 3 5021 80 | ``` 81 | 82 | ## Tests 83 | 84 | These aren't exactly working right now. 85 | 86 | ```console 87 | $ bats test 88 | ``` 89 | 90 | ## License 91 | 92 | MIT. 93 | -------------------------------------------------------------------------------- /pkg/rangelet/server_test.go: -------------------------------------------------------------------------------- 1 | package rangelet 2 | 3 | import ( 4 | "context" 5 | "net" 6 | "testing" 7 | 8 | "github.com/adammck/ranger/pkg/api" 9 | pb "github.com/adammck/ranger/pkg/proto/gen" 10 | "github.com/adammck/ranger/pkg/test/fake_storage" 11 | "google.golang.org/grpc" 12 | "google.golang.org/grpc/credentials/insecure" 13 | "google.golang.org/grpc/test/bufconn" 14 | "google.golang.org/protobuf/testing/protocmp" 15 | "gotest.tools/assert" 16 | ) 17 | 18 | type rangeInfos map[api.RangeID]*api.RangeInfo 19 | 20 | func TestRanges(t *testing.T) { 21 | h := setup(t, singleRange()) 22 | req := &pb.RangesRequest{} 23 | 24 | res, err := h.client.Ranges(h.ctx, req) 25 | assert.NilError(t, err) 26 | 27 | r, err := res.Recv() 28 | assert.NilError(t, err) 29 | assert.DeepEqual(t, &pb.RangesResponse{ 30 | Meta: &pb.RangeMeta{ 31 | Ident: 1, 32 | }, 33 | State: pb.RangeNodeState_ACTIVE, 34 | }, r, protocmp.Transform()) 35 | 36 | err = h.rglt.ForceDrop(1) 37 | assert.NilError(t, err) 38 | 39 | r, err = res.Recv() 40 | assert.NilError(t, err) 41 | assert.DeepEqual(t, &pb.RangesResponse{ 42 | Meta: &pb.RangeMeta{ 43 | Ident: 1, 44 | }, 45 | State: pb.RangeNodeState_NOT_FOUND, 46 | }, r, protocmp.Transform()) 47 | 48 | err = res.CloseSend() 49 | assert.NilError(t, err) 50 | } 51 | 52 | type testHarness struct { 53 | ctx context.Context 54 | rglt *Rangelet 55 | client pb.NodeClient 56 | } 57 | 58 | func setup(t *testing.T, ri rangeInfos) *testHarness { 59 | ctx := context.Background() 60 | 61 | stor := fake_storage.NewFakeStorage(ri) 62 | rglt := newRangelet(nil, stor) 63 | ns := newNodeServer(rglt) // <-- SUT 64 | srv := grpc.NewServer() 65 | ns.Register(srv) 66 | 67 | // client 68 | conn, closer := nodeServer(ctx, srv) 69 | t.Cleanup(closer) 70 | 71 | client := pb.NewNodeClient(conn) 72 | 73 | return &testHarness{ 74 | ctx: ctx, 75 | rglt: rglt, 76 | client: client, 77 | } 78 | } 79 | 80 | func singleRange() rangeInfos { 81 | return rangeInfos{ 82 | 1: { 83 | Meta: api.Meta{Ident: 1}, 84 | State: api.NsActive, 85 | }, 86 | } 87 | } 88 | 89 | // From: https://harrigan.xyz/blog/testing-go-grpc-server-using-an-in-memory-buffer-with-bufconn/ 90 | func nodeServer(ctx context.Context, s *grpc.Server) (*grpc.ClientConn, func()) { 91 | listener := bufconn.Listen(1024 * 1024) 92 | 93 | go func() { 94 | if err := s.Serve(listener); err != nil { 95 | panic(err) 96 | } 97 | }() 98 | 99 | conn, _ := grpc.DialContext(ctx, "", grpc.WithContextDialer(func(context.Context, string) (net.Conn, error) { 100 | return listener.Dial() 101 | }), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) 102 | 103 | return conn, s.Stop 104 | } 105 | -------------------------------------------------------------------------------- /pkg/discovery/consul/consul_discovery.go: -------------------------------------------------------------------------------- 1 | package consul 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "strconv" 7 | "time" 8 | 9 | consulapi "github.com/hashicorp/consul/api" 10 | "google.golang.org/grpc" 11 | "google.golang.org/grpc/health" 12 | hv1 "google.golang.org/grpc/health/grpc_health_v1" 13 | ) 14 | 15 | type Discovery struct { 16 | svcName string 17 | host string 18 | port int 19 | consul *consulapi.Client 20 | hs *health.Server 21 | } 22 | 23 | func (d *Discovery) getIdent() string { 24 | if d.host == "" || d.host == "localhost" || d.host == "127.0.0.1" { 25 | return fmt.Sprintf("%d", d.port) 26 | } 27 | 28 | return fmt.Sprintf("%s:%d", d.host, d.port) 29 | } 30 | 31 | // TODO: Deactivate a consul API here, not a cfg. 32 | func New(serviceName, addr string, cfg *consulapi.Config, srv *grpc.Server) (*Discovery, error) { 33 | client, err := consulapi.NewClient(cfg) 34 | if err != nil { 35 | return nil, err 36 | } 37 | 38 | // Extract host:port from the given address. 39 | // TODO: Maybe better to do this outside? 40 | host, sPort, err := net.SplitHostPort(addr) 41 | if err != nil { 42 | return nil, err 43 | } 44 | nPort, err := strconv.Atoi(sPort) 45 | if err != nil { 46 | return nil, err 47 | } 48 | 49 | d := &Discovery{ 50 | svcName: serviceName, 51 | host: host, 52 | port: nPort, 53 | 54 | consul: client, 55 | hs: health.NewServer(), 56 | } 57 | 58 | d.hs.SetServingStatus("", hv1.HealthCheckResponse_SERVING) 59 | hv1.RegisterHealthServer(srv, d.hs) 60 | 61 | return d, nil 62 | } 63 | 64 | func (d *Discovery) Start() error { 65 | def := &consulapi.AgentServiceRegistration{ 66 | Name: d.svcName, 67 | ID: d.getIdent(), 68 | 69 | // How other nodes should call the service. 70 | Address: d.host, 71 | Port: d.port, 72 | 73 | Check: &consulapi.AgentServiceCheck{ 74 | GRPC: fmt.Sprintf("%s:%d", d.host, d.port), 75 | 76 | // How long to wait between checks. 77 | Interval: (3 * time.Second).String(), 78 | 79 | // How long to wait for a response before giving up. 80 | Timeout: (1 * time.Second).String(), 81 | 82 | // How long to wait after a service becomes critical (i.e. starts 83 | // returning error, unhealthy responses, or timing out) before 84 | // removing it from service discovery. Might actually take longer 85 | // than this because of Consul implementation. 86 | DeregisterCriticalServiceAfter: (10 * time.Second).String(), 87 | }, 88 | } 89 | 90 | // TODO: Send this in a loop while running, in case Consul dies. 91 | err := d.consul.Agent().ServiceRegister(def) 92 | if err != nil { 93 | return err 94 | } 95 | 96 | return nil 97 | } 98 | 99 | func (d *Discovery) Stop() error { 100 | err := d.consul.Agent().ServiceDeregister(d.getIdent()) 101 | if err != nil { 102 | return err 103 | } 104 | 105 | return nil 106 | } 107 | -------------------------------------------------------------------------------- /examples/kv/pkg/node/fetcher.go: -------------------------------------------------------------------------------- 1 | package node 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | pbkv "github.com/adammck/ranger/examples/kv/proto/gen" 9 | "github.com/adammck/ranger/pkg/api" 10 | "golang.org/x/sync/errgroup" 11 | "google.golang.org/grpc" 12 | "google.golang.org/grpc/credentials/insecure" 13 | ) 14 | 15 | type src struct { 16 | meta api.Meta 17 | node string 18 | } 19 | 20 | type fetcher struct { 21 | meta api.Meta 22 | srcs []src 23 | } 24 | 25 | func newFetcher(rm api.Meta, parents []api.Parent) *fetcher { 26 | srcs := []src{} 27 | 28 | // If this is a range move, we can just fetch the whole thing from a single 29 | // node. Writes to that node will be disabled (via Deactivate) before the 30 | // fetch occurs (via Activate). 31 | 32 | for _, par := range parents { 33 | for _, plc := range par.Placements { 34 | if plc.State == api.PsActive { 35 | src := src{meta: par.Meta, node: plc.Node} 36 | srcs = append(srcs, src) 37 | } 38 | } 39 | } 40 | 41 | // TODO: Verify that the src ranges cover the entire dest range. 42 | 43 | return &fetcher{ 44 | meta: rm, 45 | srcs: srcs, 46 | } 47 | } 48 | 49 | func (f *fetcher) Fetch(dest *Range) error { 50 | 51 | ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 52 | defer cancel() 53 | 54 | // Fetch each source range in parallel. 55 | g, ctx := errgroup.WithContext(ctx) 56 | for i := range f.srcs { 57 | 58 | // lol, golang 59 | // https://golang.org/doc/faq#closures_and_goroutines 60 | i := i 61 | 62 | g.Go(func() error { 63 | return fetch(ctx, dest, f.meta, f.srcs[i].node, f.srcs[i].meta) 64 | }) 65 | } 66 | 67 | if err := g.Wait(); err != nil { 68 | return err 69 | } 70 | 71 | return nil 72 | } 73 | 74 | func fetch(ctx context.Context, dest *Range, meta api.Meta, addr string, src api.Meta) error { 75 | conn, err := grpc.DialContext( 76 | ctx, 77 | addr, 78 | grpc.WithTransportCredentials(insecure.NewCredentials()), 79 | grpc.WithBlock()) 80 | if err != nil { 81 | // TODO: Probably a bit excessive 82 | return fmt.Errorf("fetch failed: DialContext: %v", err) 83 | } 84 | 85 | defer conn.Close() 86 | client := pbkv.NewKVClient(conn) 87 | 88 | res, err := client.Dump(ctx, &pbkv.DumpRequest{RangeIdent: uint64(src.Ident)}) 89 | if err != nil { 90 | return err 91 | } 92 | 93 | load := 0 94 | skip := 0 95 | 96 | func() { 97 | // Hold lock for duration rather than flapping. 98 | dest.dataMu.Lock() 99 | defer dest.dataMu.Unlock() 100 | for _, pair := range res.Pairs { 101 | 102 | // Ignore any keys which are not in the destination range, since we 103 | // might be reading from a dump of a superset (if this is a join). 104 | if !meta.Contains(api.Key(pair.Key)) { 105 | skip += 1 106 | continue 107 | } 108 | 109 | dest.data[string(pair.Key)] = pair.Value 110 | load += 1 111 | } 112 | }() 113 | 114 | return nil 115 | } 116 | -------------------------------------------------------------------------------- /pkg/proto/ranje.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option go_package = "github.com/adammck/ranger/pkg/proto"; 4 | 5 | package ranger; 6 | 7 | message RangeMeta { 8 | // Unique for every range. 9 | uint64 ident = 1; 10 | 11 | // Range of the keys in this range. 12 | bytes start = 2; // inclusive 13 | bytes end = 3; // exclusive 14 | } 15 | 16 | // Sent from the controller with Prepare. 17 | // 18 | // TODO: Should include the placement index in here, so the node can verify that 19 | // the controller is talking about the same placement when it sees duplicates. 20 | // Just in case the controller has gone mad and is trying to place multiple 21 | // replicas of the same range on a single node. 22 | message Placement { 23 | string node = 1; 24 | PlacementState state = 2; 25 | } 26 | 27 | // Proto of rangelet.LoadInfo and roster.LoadInfo 28 | message LoadInfo { 29 | // Number of keys which this range contains. 30 | // Just for reporting? Not balancing? 31 | uint64 keys = 1; 32 | 33 | // Where the node would suggest that this range be split, in order for the 34 | // resulting ranges to be evenly loaded. Otherwise the mid-point between start 35 | // and end keys will be used, which is probably not an even split. 36 | repeated string splits = 2; 37 | 38 | // TODO: Generic load info? cpu/ram/network/disk? 39 | // TODO: Extra domain-specific info? 40 | } 41 | 42 | // TODO: Rename to RemoteRangeInfo, since this is the view from the remote. 43 | message RangeInfo { 44 | // TODO: Do we need the whole meta here? Maybe ID is enough? Nice to confirm 45 | // range boundaries I guess. 46 | RangeMeta meta = 1; 47 | 48 | // The state which the range is currently in, according to the node. 49 | RangeNodeState state = 2; 50 | 51 | // LoadInfo informs the controller how much load this range is applying to the 52 | // node, relative to the other ranges on that node. The controller will use 53 | // this info to rebalance ranges. 54 | LoadInfo info = 3; 55 | } 56 | 57 | // TODO: Rename to RemoteState, like the non-proto type. 58 | // Keep synced with roster/api.RemoteState (in pkg/roster/state/remote_state.go) 59 | enum RangeNodeState { 60 | UNKNOWN = 0; 61 | 62 | // Stable states 63 | INACTIVE = 1; 64 | ACTIVE = 2; 65 | 66 | // During transitions 67 | PREPARING = 3; // Pending -> Inactive 68 | ACTIVATING = 4; // Inactive -> Active 69 | DEACTIVATING = 5; // Active -> Inactive 70 | DROPPING = 6; // Inactive -> NotFound 71 | 72 | // Special case: See roster.RemoteState 73 | NOT_FOUND = 7; 74 | } 75 | 76 | // This is only for debugging purposes, for now. 77 | // Keep synced with ranje.RangeState (in pkg/ranje/range_state.go) 78 | // TODO: Remove the prefix; the const is currently e.g. RangeState_RS_ACTIVE. 79 | enum RangeState { 80 | RS_UNKNOWN = 0; 81 | RS_ACTIVE = 1; 82 | RS_SUBSUMING = 2; 83 | RS_OBSOLETE = 3; 84 | } 85 | 86 | // This is only for debugging purposes, for now. 87 | // Keep synced with ranje.PlacementState (in pkg/ranje/placement_state.go) 88 | enum PlacementState { 89 | PS_UNKNOWN = 0; 90 | PS_PENDING = 1; 91 | PS_INACTIVE = 2; 92 | PS_ACTIVE = 3; 93 | PS_MISSING = 5; 94 | PS_DROPPED = 6; 95 | } 96 | -------------------------------------------------------------------------------- /pkg/persister/consul/consul.go: -------------------------------------------------------------------------------- 1 | package consul 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "log" 7 | "strconv" 8 | "strings" 9 | "sync" 10 | 11 | rapi "github.com/adammck/ranger/pkg/api" 12 | "github.com/adammck/ranger/pkg/ranje" 13 | capi "github.com/hashicorp/consul/api" 14 | ) 15 | 16 | type Persister struct { 17 | kv *capi.KV 18 | 19 | // keep track of the last ModifyIndex for each range. 20 | // Note that the key is a *pointer* which is weird. 21 | modifyIndex map[*ranje.Range]uint64 22 | 23 | // guards modifyIndex 24 | sync.Mutex 25 | } 26 | 27 | func New(client *capi.Client) *Persister { 28 | return &Persister{ 29 | kv: client.KV(), 30 | modifyIndex: map[*ranje.Range]uint64{}, 31 | } 32 | } 33 | 34 | func (cp *Persister) GetRanges() ([]*ranje.Range, error) { 35 | pairs, _, err := cp.kv.List("/ranges", nil) 36 | if err != nil { 37 | return nil, err 38 | } 39 | 40 | out := []*ranje.Range{} 41 | 42 | // TODO: Something less dumb than this. 43 | cp.Lock() 44 | defer cp.Unlock() 45 | 46 | for _, kv := range pairs { 47 | s := strings.SplitN(kv.Key, "/", 2) 48 | if len(s) != 2 { 49 | log.Printf("warn: invalid Consul key: %s", kv.Key) 50 | continue 51 | } 52 | 53 | key, err := strconv.ParseUint(s[1], 10, 64) 54 | if err != nil { 55 | log.Printf("warn: invalid Consul key: %s", kv.Key) 56 | continue 57 | } 58 | 59 | r := &ranje.Range{} 60 | json.Unmarshal(kv.Value, r) 61 | 62 | rID := rapi.RangeID(key) 63 | if rID != r.Meta.Ident { 64 | log.Printf("warn: mismatch between Consul KV key and encoded range: key=%v, r.meta.ident=%v", key, r.Meta.Ident) 65 | continue 66 | } 67 | 68 | // Update 69 | cp.modifyIndex[r] = kv.ModifyIndex 70 | 71 | out = append(out, r) 72 | } 73 | 74 | return out, nil 75 | } 76 | 77 | func (cp *Persister) PutRanges(ranges []*ranje.Range) error { 78 | cp.Lock() 79 | defer cp.Unlock() 80 | 81 | var ops capi.KVTxnOps 82 | keyToRange := map[string]*ranje.Range{} 83 | 84 | for _, r := range ranges { 85 | v, err := json.Marshal(r) 86 | if err != nil { 87 | return err 88 | } 89 | 90 | op := &capi.KVTxnOp{ 91 | Verb: capi.KVCAS, 92 | Key: fmt.Sprintf("ranges/%d", r.Meta.Ident), 93 | Value: v, 94 | } 95 | 96 | // Keep track of which range each key came from, so we can update the 97 | // modifyIndex cache when we receive the response. 98 | // TODO: Maybe use the op.Key as the map key here instead? 99 | keyToRange[op.Key] = r 100 | 101 | if index, ok := cp.modifyIndex[r]; ok { 102 | op.Index = index 103 | } 104 | 105 | ops = append(ops, op) 106 | } 107 | 108 | ok, res, _, err := cp.kv.Txn(ops, nil) 109 | if err != nil { 110 | return err 111 | } 112 | if !ok { 113 | // This should never happen 114 | panic("got no err but !ok from Txn?") 115 | } 116 | if len(res.Results) != len(ops) { 117 | panic(fmt.Sprintf("expected %d result from Txn, got %d", len(ops), len(res.Results))) 118 | } 119 | 120 | for _, res := range res.Results { 121 | r := keyToRange[res.Key] 122 | cp.modifyIndex[r] = res.ModifyIndex 123 | } 124 | 125 | return nil 126 | } 127 | -------------------------------------------------------------------------------- /examples/cache/README.md: -------------------------------------------------------------------------------- 1 | # Example: Cache 2 | 3 | This is a simple distributed cache using ranger. The work it performs is totally 4 | pointless and deliberately expensive -- it recursively hashes incoming payloads 5 | ten million times -- but is cached by key, so subsequent requests for the same 6 | payload are fast. We can pretend that the workload is doing something expensive 7 | but useful, like sending some RPCs, searching through a lot of data, or caching 8 | some data from underlying storage. 9 | 10 | This demonstrates the following features: 11 | 12 | - **Separate data/control planes**: 13 | Ranger traffic (from controller to node, and between nodes) is exchanged over 14 | gRPC, but the service exposes its own endpoint(s) over a separte HTTP server. 15 | - **Dumb client**: 16 | There's no custom client to send a request; it's just cURL. 17 | - **Request Forwarding**: 18 | Incoming requests are either handled, or forwarded (via HTTP 302) to the 19 | appropriate node. This is accomplished by every node maintaining a mirror of 20 | all range assignments. 21 | - **Stateless**: 22 | Cached data is not moved between nodes when range reassignments happen. It's 23 | just thrown away. The rangelet integration is therefore very simple. 24 | 25 | ## Usage 26 | 27 | Start the thing using Foreman: 28 | (The port assignments are a huge hack.) 29 | 30 | ```console 31 | $ brew services run consul 32 | ==> Successfully ran `consul` (label: homebrew.mxcl.consul) 33 | $ cd ~/code/src/github.com/adammck/ranger/examples/cache 34 | $ bin/dev.sh 35 | 18:56:00 controller.1 | started with pid 93093 36 | 18:56:00 node.1 | started with pid 93094 37 | 18:56:00 node.2 | started with pid 93095 38 | 18:56:00 node.3 | started with pid 93096 39 | 18:56:00 controller.1 | listening on: 127.0.0.1:5000 40 | 18:56:00 node.1 | grpc listening on: 127.0.0.1:15100 41 | 18:56:00 node.1 | http listening on: 127.0.0.1:25100 42 | 18:56:00 node.2 | grpc listening on: 127.0.0.1:15101 43 | 18:56:00 node.2 | http listening on: 127.0.0.1:25101 44 | 18:56:00 node.3 | grpc listening on: 127.0.0.1:15102 45 | 18:56:00 node.3 | http listening on: 127.0.0.1:25102 46 | ``` 47 | 48 | In a separate terminal, send some requests to node 1, which is currently 49 | assigned the entire keyspace in a single range: 50 | 51 | ```console 52 | $ curl -L http://localhost:25100/a 53 | 4851381cac0c5b0c2e4a6c7e5629c6ac6db47f2a15c31d40f242a6be39ffb97d 54 | 55 | $ curl -L http://localhost:25100/b 56 | 3adbb65d20ee48ab81fc63063dc2ec38c31c7089782fc6f434627c3829eaf87c 57 | ``` 58 | 59 | Now send some requests to node 2, which is assigned nothing. It works, because 60 | the request is forwarded to node 1, as can be seen by showing HTTP headers: 61 | 62 | ```console 63 | $ curl -iL http://localhost:25101/c 64 | HTTP/1.1 302 Found 65 | Content-Type: text/plain 66 | Location: http://127.0.0.1:25100/c 67 | Date: Sun, 27 Nov 2022 00:57:00 GMT 68 | Content-Length: 0 69 | 70 | HTTP/1.1 200 OK 71 | Content-Type: text/plain 72 | Server: 127.0.0.1:25100 73 | Date: Sun, 27 Nov 2022 00:57:00 GMT 74 | Content-Length: 65 75 | 76 | ca4f2be4e4c9604df3b971deae26f077841f0ec34ff9a77a534988c6352566f6 77 | ``` 78 | 79 | Use 80 | [rangerctl](https://github.com/adammck/ranger/tree/master/cmd/rangerctl) 81 | to query the state of the cluster (nodes and range assignments) and initiate 82 | range operations (moves, splits, joins). This is built by `dev.sh`, so to see 83 | usage, run: 84 | 85 | ```console 86 | $ ./rangerctl 87 | ``` 88 | 89 | ## License 90 | 91 | MIT. 92 | -------------------------------------------------------------------------------- /examples/kv/pkg/proxy/proxy.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "net" 7 | 8 | pbkv "github.com/adammck/ranger/examples/kv/proto/gen" 9 | "github.com/adammck/ranger/pkg/api" 10 | "github.com/adammck/ranger/pkg/discovery" 11 | consuldisc "github.com/adammck/ranger/pkg/discovery/consul" 12 | "github.com/adammck/ranger/pkg/rangelet/mirror" 13 | consulapi "github.com/hashicorp/consul/api" 14 | "google.golang.org/grpc" 15 | "google.golang.org/grpc/codes" 16 | "google.golang.org/grpc/credentials/insecure" 17 | "google.golang.org/grpc/reflection" 18 | "google.golang.org/grpc/status" 19 | ) 20 | 21 | type Proxy struct { 22 | name string 23 | addrLis string 24 | addrPub string // do we actually need this? maybe only discovery does. 25 | srv *grpc.Server 26 | disc discovery.Discoverer 27 | mirror *mirror.Mirror 28 | 29 | // Options 30 | logReqs bool 31 | } 32 | 33 | func New(addrLis, addrPub string, logReqs bool) (*Proxy, error) { 34 | var opts []grpc.ServerOption 35 | srv := grpc.NewServer(opts...) 36 | 37 | // Register reflection service, so client can introspect (for debugging). 38 | // TODO: Make this optional. 39 | reflection.Register(srv) 40 | 41 | disc, err := consuldisc.NewDiscoverer(consulapi.DefaultConfig()) 42 | if err != nil { 43 | return nil, err 44 | } 45 | 46 | mir := mirror.New(disc).WithDialler(func(ctx context.Context, rem api.Remote) (*grpc.ClientConn, error) { 47 | return grpc.DialContext(ctx, rem.Addr(), grpc.WithTransportCredentials(insecure.NewCredentials())) 48 | }) 49 | 50 | p := &Proxy{ 51 | name: "proxy", 52 | addrLis: addrLis, 53 | addrPub: addrPub, 54 | srv: srv, 55 | disc: disc, 56 | mirror: mir, 57 | logReqs: logReqs, 58 | } 59 | 60 | ps := proxyServer{ 61 | proxy: p, 62 | } 63 | pbkv.RegisterKVServer(srv, &ps) 64 | 65 | return p, nil 66 | } 67 | 68 | func (p *Proxy) Run(ctx context.Context) error { 69 | 70 | // For the gRPC server. 71 | lis, err := net.Listen("tcp", p.addrLis) 72 | if err != nil { 73 | return err 74 | } 75 | 76 | log.Printf("listening on: %s", p.addrLis) 77 | 78 | // Start the gRPC server in a background routine. 79 | errChan := make(chan error) 80 | go func() { 81 | err := p.srv.Serve(lis) 82 | if err != nil { 83 | errChan <- err 84 | } 85 | close(errChan) 86 | }() 87 | 88 | // Block until context is cancelled, indicating that caller wants shutdown. 89 | <-ctx.Done() 90 | 91 | // Stop mirroring ranges. This isn't necessary, just cleanup. 92 | err = p.mirror.Stop() 93 | if err != nil { 94 | return err 95 | } 96 | 97 | // Let in-flight RPCs finish and then stop serving. errChan will contain the 98 | // error returned by srv.Serve (see above) or be closed with no error. 99 | p.srv.GracefulStop() 100 | err = <-errChan 101 | if err != nil { 102 | log.Printf("error from srv.Serve: %v", err) 103 | return err 104 | } 105 | 106 | return nil 107 | } 108 | 109 | func (p *Proxy) getClient(k string) (pbkv.KVClient, mirror.Result, error) { 110 | results := p.mirror.Find(api.Key(k), api.NsActive) 111 | res := mirror.Result{} 112 | 113 | if len(results) == 0 { 114 | return nil, res, status.Errorf( 115 | codes.FailedPrecondition, 116 | "no nodes have key") 117 | } 118 | 119 | // Just pick the first one for now. 120 | // TODO: Pick a random one? Should the server-side shuffle them? 121 | res = results[0] 122 | 123 | conn, ok := p.mirror.Conn(res.NodeID()) 124 | if !ok { 125 | // This should not happen. 126 | return nil, res, status.Errorf( 127 | codes.FailedPrecondition, 128 | "no client connection for node id %s", res.NodeID()) 129 | } 130 | 131 | // We could cache it, but constructing clients is cheap. 132 | return pbkv.NewKVClient(conn), res, nil 133 | } 134 | -------------------------------------------------------------------------------- /pkg/ranje/placement.go: -------------------------------------------------------------------------------- 1 | package ranje 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "sync" 7 | 8 | "github.com/adammck/ranger/pkg/api" 9 | ) 10 | 11 | // Placement represents a pair of range+node. 12 | type Placement struct { 13 | // owned by Keyspace. 14 | // 15 | // TODO: Make this a range Ident instead? Anyone who needs the range will 16 | // probably have a RangeGetter like keyspace. 17 | rang *Range 18 | 19 | // NodeID is the Ident of the node this placement is assigned to. Immutable 20 | // after construction. (Create a new placement instead of changing it.) 21 | NodeID api.NodeID 22 | 23 | // StateCurrent is the controller-side state of the placement. It reflects 24 | // the actual remote state, as reported by the Rangelet via the Roster. 25 | // 26 | // Don't access this field directly! It's only public for deserialization 27 | // from the store. Modify it via ToState. This is currently violated all 28 | // over the place. 29 | StateCurrent api.PlacementState 30 | 31 | // StateDesired is the state the Orchestator would like this placement to be 32 | // in. The Actuator is responsible for telling the remote node about this. 33 | StateDesired api.PlacementState 34 | 35 | // Set by the orchestrator to indicate that this placement should be 36 | // deactivated and dropped when possible. This won't actually happen until 37 | // it's possible to do so within the min/max placement boundaries. 38 | // 39 | // (Adding a placement without tainting the old one will result in the new 40 | // one sitting at Inactive indefinitely, since there's no reason for the old 41 | // one to deactivate itself.) 42 | Tainted bool `json:",omitempty"` 43 | 44 | // failures is updated by the actuator when an action is attempted a few 45 | // times but fails. This generally causes the placement to become wedged 46 | // until an operator intervenes. 47 | failures map[api.Action]bool 48 | 49 | // Not persisted. 50 | onDestroy func() 51 | 52 | // Guards everything. 53 | // TODO: What is "everything" ?? 54 | // TODO: Change into an RWLock, check callers. 55 | // TODO: Should this also lock the range and node? I think no? 56 | sync.Mutex 57 | } 58 | 59 | // TODO: Get rid of this once deserialization works properly. 60 | func (p *Placement) Repair(r *Range) { 61 | if p.rang != nil { 62 | panic("tried to repair valid placementn") 63 | } 64 | 65 | p.rang = r 66 | } 67 | 68 | // TODO: Rename this to just String? 69 | func (p *Placement) LogString() string { 70 | return fmt.Sprintf("{%s %s:%s}", p.rang.Meta, p.NodeID, p.StateCurrent) 71 | } 72 | 73 | func (p *Placement) Range() *Range { 74 | return p.rang 75 | } 76 | 77 | func (p *Placement) Want(new api.PlacementState) error { 78 | if err := CanTransitionPlacement(p.StateCurrent, new); err != nil { 79 | return err 80 | } 81 | 82 | p.StateDesired = new 83 | return nil 84 | } 85 | 86 | func (p *Placement) ToState(new api.PlacementState) error { 87 | if err := CanTransitionPlacement(p.StateCurrent, new); err != nil { 88 | return err 89 | } 90 | 91 | old := p.StateCurrent 92 | p.StateCurrent = new 93 | p.failures = nil 94 | p.rang.dirty = true 95 | 96 | log.Printf("R%sP%d: %s -> %s", p.rang.Meta.Ident, p.rang.PlacementIndex(p.NodeID), old, new) 97 | 98 | return nil 99 | } 100 | 101 | func (p *Placement) OnDestroy(f func()) { 102 | p.onDestroy = f 103 | } 104 | 105 | // Failed returns true if the given action has been attempted but has failed. 106 | func (p *Placement) Failed(a api.Action) bool { 107 | if p.failures == nil { 108 | return false 109 | } 110 | 111 | return p.failures[a] 112 | } 113 | 114 | func (p *Placement) SetFailed(a api.Action, value bool) { 115 | if p.failures == nil { 116 | // lazy init, since most placements don't fail. 117 | p.failures = map[api.Action]bool{} 118 | } 119 | 120 | p.failures[a] = value 121 | } 122 | -------------------------------------------------------------------------------- /pkg/actuator/rpc/actuator.go: -------------------------------------------------------------------------------- 1 | package rpc 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/adammck/ranger/pkg/actuator/util" 9 | "github.com/adammck/ranger/pkg/api" 10 | "github.com/adammck/ranger/pkg/proto/conv" 11 | pb "github.com/adammck/ranger/pkg/proto/gen" 12 | "github.com/adammck/ranger/pkg/ranje" 13 | "github.com/adammck/ranger/pkg/roster" 14 | ) 15 | 16 | type Actuator struct { 17 | rg ranje.RangeGetter 18 | ng roster.NodeGetter 19 | } 20 | 21 | const rpcTimeout = 1 * time.Second 22 | 23 | func New(rg ranje.RangeGetter, ng roster.NodeGetter) *Actuator { 24 | return &Actuator{ 25 | rg: rg, 26 | ng: ng, 27 | } 28 | } 29 | 30 | // TODO: This is currently duplicated. 31 | // 32 | // TODO: This interface should probably only take the command -- the placement 33 | // and node can be fetched from the Getters if needed. 34 | func (a *Actuator) Command(cmd api.Command, p *ranje.Placement, n *roster.Node) error { 35 | s, err := a.cmd(cmd.Action, p, n) 36 | if err != nil { 37 | return err 38 | } 39 | 40 | // TODO: This special case is weird. It was less so when Prepare was a 41 | // separate method. Think about it or something. 42 | if cmd.Action == api.Prepare { 43 | n.UpdateRangeInfo(&api.RangeInfo{ 44 | Meta: p.Range().Meta, 45 | State: s, 46 | Info: api.LoadInfo{}, 47 | }) 48 | } else { 49 | n.UpdateRangeState(p.Range().Meta.Ident, s) 50 | } 51 | 52 | return nil 53 | } 54 | 55 | func (a *Actuator) cmd(action api.Action, p *ranje.Placement, n *roster.Node) (api.RemoteState, error) { 56 | ctx, cancel := context.WithTimeout(context.Background(), rpcTimeout) 57 | defer cancel() 58 | 59 | var s pb.RangeNodeState 60 | var err error 61 | 62 | switch action { 63 | case api.Prepare: 64 | s, err = give(ctx, n, p, util.GetParents(a.rg, a.ng, p.Range())) 65 | 66 | case api.Activate: 67 | s, err = serve(ctx, n, p) 68 | 69 | case api.Deactivate: 70 | s, err = take(ctx, n, p) 71 | 72 | case api.Drop: 73 | s, err = drop(ctx, n, p) 74 | 75 | default: 76 | // TODO: Use exhaustive analyzer? 77 | panic(fmt.Sprintf("unknown action: %v", action)) 78 | } 79 | 80 | if err != nil { 81 | return api.NsUnknown, err 82 | } 83 | 84 | return conv.RemoteStateFromProto(s), nil 85 | } 86 | 87 | func give(ctx context.Context, n *roster.Node, p *ranje.Placement, parents []*pb.Parent) (pb.RangeNodeState, error) { 88 | req := &pb.PrepareRequest{ 89 | Range: conv.MetaToProto(p.Range().Meta), 90 | Parents: parents, 91 | } 92 | 93 | // TODO: Retry a few times before giving up. 94 | res, err := n.Client.Prepare(ctx, req) 95 | if err != nil { 96 | return pb.RangeNodeState_UNKNOWN, err 97 | } 98 | 99 | return res.RangeInfo.State, nil 100 | } 101 | 102 | func serve(ctx context.Context, n *roster.Node, p *ranje.Placement) (pb.RangeNodeState, error) { 103 | rID := p.Range().Meta.Ident 104 | req := &pb.ServeRequest{ 105 | Range: conv.RangeIDToProto(rID), 106 | } 107 | 108 | // TODO: Retry a few times before giving up. 109 | res, err := n.Client.Activate(ctx, req) 110 | if err != nil { 111 | return pb.RangeNodeState_UNKNOWN, err 112 | } 113 | 114 | return res.State, nil 115 | } 116 | 117 | func take(ctx context.Context, n *roster.Node, p *ranje.Placement) (pb.RangeNodeState, error) { 118 | rID := p.Range().Meta.Ident 119 | req := &pb.DeactivateRequest{ 120 | Range: conv.RangeIDToProto(rID), 121 | } 122 | 123 | // TODO: Retry a few times before giving up. 124 | res, err := n.Client.Deactivate(ctx, req) 125 | if err != nil { 126 | return pb.RangeNodeState_UNKNOWN, err 127 | } 128 | 129 | return res.State, nil 130 | } 131 | 132 | func drop(ctx context.Context, n *roster.Node, p *ranje.Placement) (pb.RangeNodeState, error) { 133 | rID := p.Range().Meta.Ident 134 | req := &pb.DropRequest{ 135 | Range: conv.RangeIDToProto(rID), 136 | } 137 | 138 | // TODO: Retry a few times before giving up. 139 | res, err := n.Client.Drop(ctx, req) 140 | if err != nil { 141 | return pb.RangeNodeState_UNKNOWN, err 142 | } 143 | 144 | return res.State, nil 145 | } 146 | -------------------------------------------------------------------------------- /docs/split.md: -------------------------------------------------------------------------------- 1 | # Split 2 | 3 | When a range (1) is assigned to a node (a), and we want to split it into two 4 | separate ranges (2, 3) assigned to different nodes (b, c), we **split** it. 5 | 6 | 1. Prepare 7 | 1. Prepare(b, 2) 8 | 2. Prepare(c, 3) 9 | 2. Deactivate(a, 1) 10 | 3. Activate 11 | 1. Activate(b, 2) 12 | 2. Activate(c, 3) 13 | 4. Drop(a, 1) 14 | 15 | [_TestSplit_](https://cs.github.com/adammck/ranger?q=symbol%3ATestSplit) 16 | 17 | ## Failures 18 | 19 | If any of the Prepare commands in step 1 fail, just destroy the failed 20 | placement(s) and try again on some other node. The predecessor range is still 21 | active, so there is no particular harm in waiting while we try again. 22 | 23 | 1. Prepare 24 | 1. Prepare(b, 2) 25 | 2. Prepare(c, 3) 26 | 1. Prepare (retry) 27 | 1. Prepare(d, 2) 28 | 2. Prepare(e, 3) 29 | 30 | or 31 | 32 | 1. Prepare 33 | 1. Prepare(b, 2) 34 | 2. Prepare(c, 3) 35 | 2. Prepare (retry) 36 | 1. Prepare(d, 2) 37 | 38 | or 39 | 40 | 1. Prepare 41 | 1. Prepare(b, 2) 42 | 2. Prepare(c, 3) 43 | 2. Prepare (retry) 44 | 1. Prepare(d, 3) 45 | 46 | [_TestSplitFailure_Prepare_](https://cs.github.com/adammck/ranger?q=symbol%3ATestSplitFailure_Prepare) 47 | 48 | Note that cancellation isn't currently part of the Rangelet API (because the 49 | command methods don't include a context param), so we have to dumbly wait until 50 | both sides complete their Prepare before we can proceed, even if one fails fast. 51 | Not a huge deal, but pointless work. 52 | 53 | --- 54 | 55 | If step 2 fails -- the source placement failed to Deactivate -- just retry 56 | forever (and probably alert an operator). This isn't an emergency (the source 57 | placement is still active), but indicates that something is quite broken. 58 | 59 | 1. Prepare 60 | 1. Prepare(b, 2) 61 | 2. Prepare(c, 3) 62 | 2. Deactivate(a, 1) 63 | 3. Deactivate(a, 1) (retry) 64 | 65 | 66 | [_TestSplitFailure_Deactivate_](https://cs.github.com/adammck/ranger?q=symbol%3ATestSplitFailure_Deactivate) 67 | 68 | --- 69 | 70 | If step 3 fails, deactivate any destination placements which became active (i.e. 71 | the ones which _didn't_ fail), reactivate the source placement, drop the 72 | placements which failed to activate, and retry their placement. 73 | 74 | 1. Prepare 75 | 1. Prepare(b, 2) 76 | 2. Prepare(c, 3) 77 | 2. Deactivate(a, 1) 78 | 3. Activate 79 | 1. Activate(b, 2) 80 | 2. Activate(c, 3) 81 | 4. Activate(a, 1) 82 | 5. Drop 83 | 1. Drop(b, 2) 84 | 2. Drop(c, 3) 85 | 6. Prepare (retry) 86 | 1. Prepare(d, 2) 87 | 2. Prepare(e, 3) 88 | 89 | or 90 | 91 | 1. Prepare 92 | 1. Prepare(b, 2) 93 | 2. Prepare(c, 3) 94 | 2. Deactivate(a, 1) 95 | 3. Activate 96 | 1. Activate(b, 2) 97 | 2. Activate(c, 3) 98 | 4. Deactivate 99 | 1. Deactivate(c, 3) 100 | 5. Activate(a, 1) 101 | 6. Drop 102 | 1. Drop(b, 2) 103 | 7. Prepare (retry) 104 | 1. Prepare(d, 2) 105 | 106 | or 107 | 108 | 1. Prepare 109 | 1. Prepare(b, 2) 110 | 2. Prepare(c, 3) 111 | 2. Deactivate(a, 1) 112 | 3. Activate 113 | 1. Activate(b, 2) 114 | 2. Activate(c, 3) 115 | 4. Deactivate 116 | 1. Deactivate(b, 2) 117 | 5. Activate(a, 1) 118 | 6. Drop 119 | 1. Drop(c, 3) 120 | 7. Prepare (retry) 121 | 1. Prepare(c, 3) 122 | 123 | [_TestSplitFailure_Activate_](https://cs.github.com/adammck/ranger?q=symbol%3ATestSplitFailure_Activate) 124 | 125 | This one is probably the most complex to recover from. 126 | 127 | --- 128 | 129 | If step 4 fails, do nothing but keep trying forever: 130 | 131 | 1. Prepare 132 | 1. Prepare(b, 2) 133 | 2. Prepare(c, 3) 134 | 2. Deactivate(a, 1) 135 | 3. Activate 136 | 1. Activate(b, 2) 137 | 2. Activate(c, 3) 138 | 4. Drop(a, 1) 139 | 140 | [_TestSplitFailure_Drop_](https://cs.github.com/adammck/ranger?q=symbol%3ATestSplitFailure_Drop) 141 | -------------------------------------------------------------------------------- /examples/kv/pkg/node/control.go: -------------------------------------------------------------------------------- 1 | package node 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "math/rand" 7 | "sort" 8 | "sync/atomic" 9 | "time" 10 | 11 | "github.com/adammck/ranger/pkg/api" 12 | ) 13 | 14 | func (n *Node) GetLoadInfo(rID api.RangeID) (api.LoadInfo, error) { 15 | n.rangesMu.RLock() 16 | defer n.rangesMu.RUnlock() 17 | 18 | r, ok := n.ranges[rID] 19 | if !ok { 20 | return api.LoadInfo{}, api.ErrNotFound 21 | } 22 | 23 | keys := []string{} 24 | 25 | // Find mid-point in an extremely inefficient manner. 26 | // While holding the lock, no less. 27 | func() { 28 | r.dataMu.RLock() 29 | defer r.dataMu.RUnlock() 30 | for k := range r.data { 31 | keys = append(keys, k) 32 | } 33 | }() 34 | 35 | var split api.Key 36 | if len(keys) > 2 { 37 | sort.Strings(keys) 38 | split = api.Key(keys[len(keys)/2]) 39 | } 40 | 41 | return api.LoadInfo{ 42 | Keys: len(keys), 43 | Splits: []api.Key{split}, 44 | }, nil 45 | } 46 | 47 | // Prepare: Create the range, but don't do anything with it yet. 48 | func (n *Node) Prepare(rm api.Meta, parents []api.Parent) error { 49 | if err := n.performChaos(); err != nil { 50 | return err 51 | } 52 | 53 | n.rangesMu.Lock() 54 | defer n.rangesMu.Unlock() 55 | 56 | _, ok := n.ranges[rm.Ident] 57 | if ok { 58 | panic("rangelet gave duplicate range!") 59 | } 60 | 61 | // TODO: Ideally we would perform most of the fetch here, and only exchange 62 | // the delta (keys which have changed since then) in Activate. 63 | 64 | n.ranges[rm.Ident] = &Range{ 65 | data: map[string][]byte{}, 66 | fetcher: newFetcher(rm, parents), 67 | writable: 0, 68 | } 69 | 70 | return nil 71 | } 72 | 73 | // Activate: 74 | func (n *Node) Activate(rID api.RangeID) error { 75 | if err := n.performChaos(); err != nil { 76 | return err 77 | } 78 | 79 | n.rangesMu.RLock() 80 | r, ok := n.ranges[rID] 81 | n.rangesMu.RUnlock() 82 | if !ok { 83 | panic("rangelet called Activate with unknown range!") 84 | } 85 | 86 | err := r.fetcher.Fetch(r) 87 | if err != nil { 88 | return fmt.Errorf("error fetching range: %s", err) 89 | } 90 | 91 | r.fetcher = nil 92 | atomic.StoreUint32(&r.writable, 1) 93 | 94 | return nil 95 | } 96 | 97 | // Deactivate: Disable writes to the range, because we're about to move it and I 98 | // don't have the time to implement something better today. In this example, 99 | // keys are writable on exactly one node. (Or zero, during failures!) 100 | func (n *Node) Deactivate(rID api.RangeID) error { 101 | if err := n.performChaos(); err != nil { 102 | return err 103 | } 104 | 105 | n.rangesMu.Lock() 106 | defer n.rangesMu.Unlock() 107 | 108 | r, ok := n.ranges[rID] 109 | if !ok { 110 | panic("rangelet called Deactivate with unknown range!") 111 | } 112 | 113 | // Prevent further writes to the range. 114 | atomic.StoreUint32(&r.writable, 0) 115 | 116 | return nil 117 | } 118 | 119 | // Drop: Discard the range. 120 | func (n *Node) Drop(rID api.RangeID) error { 121 | if err := n.performChaos(); err != nil { 122 | return err 123 | } 124 | 125 | n.rangesMu.Lock() 126 | defer n.rangesMu.Unlock() 127 | 128 | _, ok := n.ranges[rID] 129 | if !ok { 130 | panic("rangelet called Drop with unknown range!") 131 | } 132 | 133 | delete(n.ranges, rID) 134 | 135 | return nil 136 | } 137 | 138 | // performChaos optionally (if chaos is enabled, via the -chaos flag) sleeps for 139 | // a random amount of time between zero and 5000ms, biased towards zero. Then 140 | // returns an error 5% of the time. This is of course intended to make our 141 | // manual testing a little more chaotic. 142 | func (n *Node) performChaos() error { 143 | if !n.chaos { 144 | return nil 145 | } 146 | 147 | ms := int(3000 * math.Pow(rand.Float64(), 2)) 148 | d := time.Duration(ms) * time.Millisecond 149 | time.Sleep(d) 150 | 151 | // TODO: This causes actual problems really fast if raised significantly. 152 | // Looks like an orchestrator bug. Look into it. 153 | if rand.Float32() < 0.05 { 154 | return fmt.Errorf("it's your unlucky day") 155 | } 156 | 157 | return nil 158 | } 159 | -------------------------------------------------------------------------------- /examples/kv/test/controller.bats: -------------------------------------------------------------------------------- 1 | setup_file() { 2 | go build 3 | } 4 | 5 | setup() { 6 | # TODO: Use this instead: https://github.com/ztombol/bats-docs#homebrew 7 | load '/Users/adammck/code/src/github.com/bats-core/bats-support/load.bash' 8 | load '/Users/adammck/code/src/github.com/bats-core/bats-assert/load.bash' 9 | load test_helper 10 | start_consul 11 | } 12 | 13 | teardown() { 14 | stop_cmds 15 | } 16 | 17 | @test "place" { 18 | start_node 8001 19 | 20 | # Try to write something to the node. This should fail, because no ranges 21 | # are assigned. 22 | run bin/client.sh 8001 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 23 | assert_failure 24 | assert_line -n 0 'ERROR:' 25 | assert_line -n 1 ' Code: FailedPrecondition' 26 | assert_line -n 2 ' Message: no valid range' 27 | 28 | # Run a single rebalance cycle. 29 | ./kv -controller -addr ":9000" -once 30 | 31 | # Try the same write again. It should succeed this time, because the 32 | # controller has assigned the first (infinite) range to it. 33 | run bin/client.sh 8001 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 34 | assert_success 35 | } 36 | 37 | @test "move" { 38 | start_node 8001 39 | start_node 8002 40 | start_controller 9000 41 | sleep 0.5 42 | 43 | # Write a key. 44 | run bin/client.sh 8001 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 45 | assert_success 46 | 47 | # Check that we can't write it to node 2. 48 | run bin/client.sh 8002 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 49 | assert_failure 50 | assert_line -n 0 'ERROR:' 51 | assert_line -n 1 ' Code: FailedPrecondition' 52 | assert_line -n 2 ' Message: no valid range' 53 | 54 | # Move the range from node 1 to node 2. 55 | run $(ranger_client 9000) move 1 8002 56 | assert_success 57 | 58 | # Check that the range is gone from node 1. 59 | run bin/client.sh 8001 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 60 | assert_failure 61 | 62 | # Check that it is now available on node 2. 63 | run bin/client.sh 8002 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 64 | assert_success 65 | } 66 | 67 | @test "split" { 68 | start_node 8001 69 | start_node 8002 70 | start_node 8003 71 | start_controller 9000 72 | sleep 0.5 73 | 74 | # Write a key on either side of the split. 75 | run bin/client.sh 8001 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 76 | assert_success 77 | run bin/client.sh 8001 kv.KV.Put '{"key": "'$c'", "value": "'$yyy'"}' 78 | assert_success 79 | 80 | # Split the range from node 1 to nodes 2 and three. 81 | run $(ranger_client 9000) split 1 b64:"$b" 8002 8003 82 | assert_success 83 | 84 | # Check that the range is gone from node 1. 85 | run bin/client.sh 8001 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 86 | assert_failure 87 | 88 | # Check that the left side is available on node 1, but the right is not. 89 | run bin/client.sh 8002 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 90 | assert_success 91 | run bin/client.sh 8002 kv.KV.Put '{"key": "'$c'", "value": "'$zzz'"}' 92 | assert_failure 93 | 94 | # Check that the right side is available on node 2, but the left is not. 95 | run bin/client.sh 8003 kv.KV.Put '{"key": "'$c'", "value": "'$zzz'"}' 96 | assert_success 97 | run bin/client.sh 8003 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 98 | assert_failure 99 | } 100 | 101 | @test "move across crash" { 102 | start_node 8001 103 | start_node 8002 104 | start_controller 9000 105 | sleep 0.5 106 | 107 | # Move the range from node 1 to node 2. 108 | run $(ranger_client 9000) move 1 8002 109 | assert_success 110 | 111 | crash_cmd 9000 112 | start_controller 9000 113 | sleep 0.5 114 | 115 | # Move the range from node 2 to node 1. 116 | run $(ranger_client 9000) move 1 8001 117 | assert_success 118 | 119 | # Check that the range is available on node 1, and not on node 2. 120 | run bin/client.sh 8001 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 121 | assert_success 122 | run bin/client.sh 8002 kv.KV.Put '{"key": "'$a'", "value": "'$zzz'"}' 123 | assert_failure 124 | } 125 | -------------------------------------------------------------------------------- /pkg/discovery/consul/consul_discoverer.go: -------------------------------------------------------------------------------- 1 | package consul 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | 7 | "github.com/adammck/ranger/pkg/api" 8 | discovery "github.com/adammck/ranger/pkg/discovery" 9 | consulapi "github.com/hashicorp/consul/api" 10 | ) 11 | 12 | type Discoverer struct { 13 | consul *consulapi.Client 14 | } 15 | 16 | // TODO: Deactivate a consul API client here, not a cfg. 17 | func NewDiscoverer(cfg *consulapi.Config) (*Discoverer, error) { 18 | client, err := consulapi.NewClient(cfg) 19 | if err != nil { 20 | return nil, err 21 | } 22 | 23 | d := &Discoverer{ 24 | consul: client, 25 | } 26 | 27 | return d, nil 28 | } 29 | 30 | type discoveryGetter struct { 31 | disc *Discoverer 32 | name string 33 | 34 | // stop is closed to signal that run should stop ticking and return. 35 | stop chan bool 36 | 37 | // running can be waited on to block until run is about to return. Wait on 38 | // this after closing stop to ensure that no more ticks will happen. 39 | running sync.WaitGroup 40 | 41 | // Remotes that we know about. 42 | remotes map[string]api.Remote 43 | remotesMu sync.RWMutex 44 | 45 | // Functions to be called when new remotes are added and removed. 46 | add func(api.Remote) 47 | remove func(api.Remote) 48 | } 49 | 50 | func (d *Discoverer) Discover(svcName string, add, remove func(api.Remote)) discovery.Getter { 51 | dg := &discoveryGetter{ 52 | disc: d, 53 | name: svcName, 54 | stop: make(chan bool), 55 | remotes: map[string]api.Remote{}, 56 | add: add, 57 | remove: remove, 58 | } 59 | 60 | dg.running.Add(1) 61 | go dg.run() 62 | 63 | return dg 64 | } 65 | 66 | func (dg *discoveryGetter) tick() error { 67 | 68 | // Fetch all entries (remotes) for the service name. 69 | res, _, err := dg.disc.consul.Catalog().Service(dg.name, "", &consulapi.QueryOptions{}) 70 | if err != nil { 71 | return err 72 | } 73 | 74 | seen := map[string]struct{}{} 75 | added := []api.Remote{} 76 | removed := []api.Remote{} 77 | 78 | dg.remotesMu.Lock() 79 | 80 | // Check every remote, see if it needs adding to our cache. 81 | for _, r := range res { 82 | svcID := r.ServiceID 83 | seen[svcID] = struct{}{} 84 | 85 | // Already known 86 | if _, ok := dg.remotes[svcID]; ok { 87 | continue 88 | } 89 | 90 | rem := api.Remote{ 91 | Ident: svcID, 92 | Host: r.Address, // https://github.com/hashicorp/consul/issues/2076 93 | Port: r.ServicePort, 94 | } 95 | 96 | // New remote 97 | dg.remotes[svcID] = rem 98 | added = append(added, rem) 99 | //log.Printf("Added: %s", svcID) 100 | } 101 | 102 | // Remove any nodes which have gone from consul. 103 | for svcID, rem := range dg.remotes { 104 | if _, ok := seen[svcID]; !ok { 105 | delete(dg.remotes, svcID) 106 | removed = append(removed, rem) 107 | //log.Printf("Removing: %s", svcID) 108 | } 109 | } 110 | 111 | dg.remotesMu.Unlock() 112 | 113 | // Call add/remove callbacks outside of lock. But still synchronously inside 114 | // this function, so that we won't tick again until they return. Should keep 115 | // things linear (i.e. no remotes being removed before they're added). 116 | 117 | if dg.add != nil { 118 | for _, rem := range added { 119 | dg.add(rem) 120 | } 121 | } 122 | 123 | if dg.remove != nil { 124 | for _, rem := range removed { 125 | dg.remove(rem) 126 | } 127 | } 128 | 129 | return nil 130 | } 131 | 132 | func (dg *discoveryGetter) run() { 133 | ticker := time.NewTicker(1 * time.Second) 134 | 135 | for { 136 | select { 137 | case <-ticker.C: 138 | dg.tick() 139 | case <-dg.stop: 140 | ticker.Stop() 141 | dg.running.Done() 142 | return 143 | } 144 | } 145 | } 146 | 147 | func (dg *discoveryGetter) Get() ([]api.Remote, error) { 148 | dg.remotesMu.RLock() 149 | defer dg.remotesMu.RUnlock() 150 | 151 | res := make([]api.Remote, len(dg.remotes)) 152 | i := 0 153 | for _, v := range dg.remotes { 154 | res[i] = v 155 | i += 1 156 | } 157 | 158 | return res, nil 159 | } 160 | 161 | // TODO: Could probably accomplish this with a cancellable context instead? 162 | func (dg *discoveryGetter) Stop() error { 163 | 164 | // Signal run to return instead of tick again. 165 | close(dg.stop) 166 | 167 | // Block until any in-progress ticks are finished. 168 | dg.running.Wait() 169 | 170 | return nil 171 | } 172 | -------------------------------------------------------------------------------- /pkg/orchestrator/server_debug.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/adammck/ranger/pkg/keyspace" 8 | "github.com/adammck/ranger/pkg/proto/conv" 9 | pb "github.com/adammck/ranger/pkg/proto/gen" 10 | "github.com/adammck/ranger/pkg/ranje" 11 | "github.com/adammck/ranger/pkg/roster" 12 | "google.golang.org/grpc/codes" 13 | "google.golang.org/grpc/status" 14 | ) 15 | 16 | type debugServer struct { 17 | pb.UnsafeDebugServer 18 | orch *Orchestrator 19 | } 20 | 21 | func rangeResponse(r *ranje.Range, rost *roster.Roster) *pb.RangeResponse { 22 | parents := make([]uint64, len(r.Parents)) 23 | for i, rID := range r.Parents { 24 | parents[i] = conv.RangeIDToProto(rID) 25 | } 26 | 27 | children := make([]uint64, len(r.Children)) 28 | for i, rID := range r.Children { 29 | children[i] = conv.RangeIDToProto(rID) 30 | } 31 | 32 | res := &pb.RangeResponse{ 33 | Meta: conv.MetaToProto(r.Meta), 34 | State: conv.RangeStateToProto(r.State), 35 | Parents: parents, 36 | Children: children, 37 | } 38 | 39 | for _, p := range r.Placements { 40 | plc := &pb.PlacementWithRangeInfo{ 41 | Placement: &pb.Placement{ 42 | Node: conv.NodeIDToProto(p.NodeID), 43 | State: conv.PlacementStateToProto(p.StateCurrent), 44 | }, 45 | } 46 | 47 | // If RangeInfo is available include it. 48 | // Might not be, if the node has just vanished or forgotten the range. 49 | nod, _ := rost.NodeByIdent(p.NodeID) 50 | if nod != nil { 51 | if ri, ok := nod.Get(r.Meta.Ident); ok { 52 | plc.RangeInfo = conv.RangeInfoToProto(ri) 53 | } 54 | } 55 | 56 | res.Placements = append(res.Placements, plc) 57 | } 58 | 59 | return res 60 | } 61 | 62 | func nodeResponse(ks *keyspace.Keyspace, n *roster.Node) *pb.NodeResponse { 63 | res := &pb.NodeResponse{ 64 | Node: &pb.NodeMeta{ 65 | Ident: conv.NodeIDToProto(n.Ident()), 66 | Address: n.Addr(), 67 | WantDrain: n.WantDrain(), 68 | }, 69 | } 70 | 71 | for _, pl := range ks.PlacementsByNodeID(n.Ident()) { 72 | res.Ranges = append(res.Ranges, &pb.NodeRange{ 73 | Meta: conv.MetaToProto(pl.Range.Meta), 74 | State: conv.PlacementStateToProto(pl.Placement.StateCurrent), 75 | }) 76 | } 77 | 78 | return res 79 | } 80 | 81 | func (srv *debugServer) RangesList(ctx context.Context, req *pb.RangesListRequest) (*pb.RangesListResponse, error) { 82 | res := &pb.RangesListResponse{} 83 | 84 | ranges, unlocker := srv.orch.ks.Ranges() 85 | defer unlocker() 86 | 87 | for _, r := range ranges { 88 | r.Mutex.Lock() 89 | res.Ranges = append(res.Ranges, rangeResponse(r, srv.orch.rost)) 90 | r.Mutex.Unlock() 91 | } 92 | 93 | return res, nil 94 | } 95 | 96 | func (srv *debugServer) Range(ctx context.Context, req *pb.RangeRequest) (*pb.RangeResponse, error) { 97 | if req.Range == 0 { 98 | return nil, status.Error(codes.InvalidArgument, "missing: range") 99 | } 100 | 101 | rID, err := conv.RangeIDFromProto(req.Range) 102 | if err != nil { 103 | return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("IdentFromProto failed: %v", err)) 104 | } 105 | 106 | r, err := srv.orch.ks.GetRange(rID) 107 | if err != nil { 108 | return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("GetByIdent failed: %v", err)) 109 | } 110 | 111 | r.Mutex.Lock() 112 | defer r.Mutex.Unlock() 113 | res := rangeResponse(r, srv.orch.rost) 114 | 115 | return res, nil 116 | } 117 | 118 | func (srv *debugServer) Node(ctx context.Context, req *pb.NodeRequest) (*pb.NodeResponse, error) { 119 | nID, err := conv.NodeIDFromProto(req.Node) 120 | if err != nil { 121 | return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("NodeIDFromProto failed: %v", err)) 122 | } 123 | 124 | node, err := srv.orch.rost.NodeByIdent(nID) 125 | if err != nil { 126 | return nil, status.Error(codes.InvalidArgument, err.Error()) 127 | } 128 | 129 | res := nodeResponse(srv.orch.ks, node) 130 | 131 | return res, nil 132 | } 133 | 134 | func (srv *debugServer) NodesList(ctx context.Context, req *pb.NodesListRequest) (*pb.NodesListResponse, error) { 135 | rost := srv.orch.rost 136 | rost.RLock() 137 | defer rost.RUnlock() 138 | 139 | res := &pb.NodesListResponse{} 140 | 141 | for _, n := range rost.Nodes { 142 | res.Nodes = append(res.Nodes, nodeResponse(srv.orch.ks, n)) 143 | } 144 | 145 | return res, nil 146 | } 147 | -------------------------------------------------------------------------------- /examples/kv/pkg/node/data.go: -------------------------------------------------------------------------------- 1 | package node 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "sync/atomic" 7 | 8 | pbkv "github.com/adammck/ranger/examples/kv/proto/gen" 9 | "github.com/adammck/ranger/pkg/api" 10 | "google.golang.org/grpc/codes" 11 | "google.golang.org/grpc/status" 12 | ) 13 | 14 | type kvServer struct { 15 | pbkv.UnimplementedKVServer 16 | node *Node 17 | } 18 | 19 | func init() { 20 | // Ensure that kvServer implements the KVServer interface 21 | var kvs *kvServer = nil 22 | var _ pbkv.KVServer = kvs 23 | } 24 | 25 | // Get reads a single value by its key. 26 | // 27 | // Returns Aborted if the key is not in any of the ranges assigned to this node. 28 | // This should not occur under normal circumstances. It means that the caller is 29 | // confused about which range is assigned to which node. Most likely, the range 30 | // has moved very recently and the caller hasn't heard about it yet. 31 | // 32 | // Returns NotFound If the key is not found but is within a valid range. 33 | func (s *kvServer) Get(ctx context.Context, req *pbkv.GetRequest) (*pbkv.GetResponse, error) { 34 | k := string(req.Key) 35 | if k == "" { 36 | return nil, status.Error(codes.InvalidArgument, "missing: key") 37 | } 38 | 39 | rID, ok := s.node.rglt.Find(api.Key(k)) 40 | if !ok { 41 | return nil, status.Error(codes.Aborted, "no such range") 42 | } 43 | 44 | s.node.rangesMu.RLock() 45 | r, ok := s.node.ranges[rID] 46 | s.node.rangesMu.RUnlock() 47 | if !ok { 48 | panic("rangelet found unknown range!") 49 | } 50 | 51 | r.dataMu.RLock() 52 | v, ok := r.data[k] 53 | r.dataMu.RUnlock() 54 | if !ok { 55 | return nil, status.Error(codes.NotFound, "no such key") 56 | } 57 | 58 | if s.node.logReqs { 59 | // TODO: Also log errors. 60 | log.Printf("Get: %q", k) 61 | } 62 | 63 | return &pbkv.GetResponse{ 64 | Value: v, 65 | }, nil 66 | } 67 | 68 | // Put writes a single value by its key. 69 | // 70 | // Returns Aborted if the key is not in any of the assigned ranges, like Get. 71 | // 72 | // Returns FailedPrecondition if an appropriate range is assigned but read-only. 73 | // This is normal, and occurs while a range is being moved away from the node. 74 | // The caller will have to wait until the range is available elsewhere. (Unless 75 | // the move fails, then it may become writable here!) 76 | func (s *kvServer) Put(ctx context.Context, req *pbkv.PutRequest) (*pbkv.PutResponse, error) { 77 | k := string(req.Key) 78 | if k == "" { 79 | return nil, status.Error(codes.InvalidArgument, "missing: key") 80 | } 81 | 82 | rID, ok := s.node.rglt.Find(api.Key(k)) 83 | if !ok { 84 | return nil, status.Error(codes.Aborted, "no such range") 85 | } 86 | 87 | s.node.rangesMu.RLock() 88 | r, ok := s.node.ranges[rID] 89 | s.node.rangesMu.RUnlock() 90 | if !ok { 91 | panic("rangelet found unknown range!") 92 | } 93 | 94 | if atomic.LoadUint32(&r.writable) == 0 { 95 | return nil, status.Error(codes.FailedPrecondition, "can't PUT to read-only range") 96 | } 97 | 98 | if req.Value == nil { 99 | r.dataMu.Lock() 100 | delete(r.data, k) 101 | r.dataMu.Unlock() 102 | } else { 103 | r.dataMu.Lock() 104 | r.data[k] = req.Value 105 | r.dataMu.Unlock() 106 | } 107 | 108 | if s.node.logReqs { 109 | // TODO: Also log errors. 110 | log.Printf("Put: %q", k) 111 | } 112 | 113 | return &pbkv.PutResponse{}, nil 114 | } 115 | 116 | // Dump is called by other nodes during range moves, splits, and joins, to fetch 117 | // data currently stores on this node. 118 | func (s *kvServer) Dump(ctx context.Context, req *pbkv.DumpRequest) (*pbkv.DumpResponse, error) { 119 | ident := api.RangeID(req.RangeIdent) 120 | if ident == 0 { 121 | return nil, status.Error(codes.InvalidArgument, "missing: range_ident") 122 | } 123 | 124 | s.node.rangesMu.RLock() 125 | r, ok := s.node.ranges[ident] 126 | s.node.rangesMu.RUnlock() 127 | if !ok { 128 | return nil, status.Error(codes.Aborted, "range not found") 129 | } 130 | 131 | if atomic.LoadUint32(&r.writable) == 1 { 132 | return nil, status.Error(codes.FailedPrecondition, "can't dump wriable range") 133 | } 134 | 135 | res := &pbkv.DumpResponse{} 136 | 137 | func() { 138 | r.dataMu.RLock() 139 | defer r.dataMu.RUnlock() 140 | for k, v := range r.data { 141 | res.Pairs = append(res.Pairs, &pbkv.Pair{Key: k, Value: v}) 142 | } 143 | }() 144 | 145 | if s.node.logReqs { 146 | log.Printf("Dumped: %s", ident) 147 | } 148 | 149 | return res, nil 150 | } 151 | -------------------------------------------------------------------------------- /cmd/rangerd/controller.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "net" 7 | "time" 8 | 9 | "github.com/adammck/ranger/pkg/actuator" 10 | rpc_actuator "github.com/adammck/ranger/pkg/actuator/rpc" 11 | "github.com/adammck/ranger/pkg/keyspace" 12 | "github.com/adammck/ranger/pkg/orchestrator" 13 | "github.com/adammck/ranger/pkg/ranje" 14 | "github.com/adammck/ranger/pkg/roster" 15 | "google.golang.org/grpc" 16 | "google.golang.org/grpc/reflection" 17 | 18 | consuldisc "github.com/adammck/ranger/pkg/discovery/consul" 19 | consulpers "github.com/adammck/ranger/pkg/persister/consul" 20 | consulapi "github.com/hashicorp/consul/api" 21 | ) 22 | 23 | type Controller struct { 24 | addrLis string 25 | addrPub string // do we actually need this? maybe only discovery does. 26 | interval time.Duration 27 | once bool // run one rebalance cycle and exit 28 | 29 | srv *grpc.Server 30 | ks *keyspace.Keyspace 31 | rost *roster.Roster 32 | act *actuator.Actuator 33 | orch *orchestrator.Orchestrator 34 | } 35 | 36 | func New(addrLis, addrPub string, interval time.Duration, once bool) (*Controller, error) { 37 | var opts []grpc.ServerOption 38 | srv := grpc.NewServer(opts...) 39 | 40 | // Register reflection service, so client can introspect (for debugging). 41 | // TODO: Make this optional. 42 | reflection.Register(srv) 43 | 44 | // TODO: Pass in the Consul client here. 45 | disc, err := consuldisc.NewDiscoverer(consulapi.DefaultConfig()) 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | api, err := consulapi.NewClient(consulapi.DefaultConfig()) 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | pers := consulpers.New(api) 56 | 57 | // This loads the ranges from storage, so will fail if the persister (e.g. 58 | // Consul) isn't available. Starting with an empty keyspace should be rare. 59 | // TODO: Allow replication to be customized via flags or config. 60 | ks, err := keyspace.New(pers, ranje.R1) 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | // TODO: Hook up the callbacks (or replace with channels) 66 | rost := roster.New(disc, nil, nil, nil) 67 | 68 | actImpl := rpc_actuator.New(ks, rost) 69 | act := actuator.New(ks, rost, time.Duration(3*time.Second), actImpl) 70 | 71 | orch := orchestrator.New(ks, rost, srv) 72 | 73 | return &Controller{ 74 | addrLis: addrLis, 75 | addrPub: addrPub, 76 | interval: interval, 77 | once: once, 78 | srv: srv, 79 | ks: ks, 80 | rost: rost, 81 | act: act, 82 | orch: orch, 83 | }, nil 84 | } 85 | 86 | func (c *Controller) Run(ctx context.Context) error { 87 | 88 | // For the gRPC server. 89 | lis, err := net.Listen("tcp", c.addrLis) 90 | if err != nil { 91 | return err 92 | } 93 | 94 | log.Printf("listening on: %s", c.addrLis) 95 | 96 | // Start the gRPC server in a background routine. 97 | errChan := make(chan error) 98 | go func() { 99 | err := c.srv.Serve(lis) 100 | if err != nil { 101 | errChan <- err 102 | } 103 | close(errChan) 104 | }() 105 | 106 | // Wait a bit for other services to come up before starting. This makes 107 | // development easier my minimizing log spam, and is no big deal in prod. 108 | time.Sleep(1 * time.Second) 109 | 110 | // Perform a single blocking probe cycle, to ensure that the first rebalance 111 | // happens after we have the current state of the nodes. 112 | c.rost.Tick() 113 | 114 | if c.once { 115 | c.orch.Tick() 116 | c.act.Tick() 117 | 118 | } else { 119 | 120 | // Periodically probe all nodes to keep their state up to date. 121 | ticker := time.NewTicker(1 * time.Second) 122 | go c.rost.Run(ticker) 123 | 124 | // Start rebalancing loop. 125 | go c.orch.Run(time.NewTicker(c.interval)) 126 | 127 | // Start incredibly actuation loop. The interval only affects how soon 128 | // it will notice a pending actuation. Failed actuations should retry 129 | // slower than this. 130 | go c.act.Run(time.NewTicker(500 * time.Millisecond)) 131 | 132 | // Block until context is cancelled, indicating that caller wants 133 | // shutdown. 134 | if !c.once { 135 | <-ctx.Done() 136 | } 137 | } 138 | 139 | // Let in-flight commands finish. This isn't strictly necessary, but allows 140 | // us to minmize the stuff which will need reconciling at next startup. 141 | c.act.Wait() 142 | 143 | // Let in-flight incoming RPCs finish and then stop. errChan will contain 144 | // the error returned by srv.Serve (above) or be closed with no error. 145 | c.srv.GracefulStop() 146 | err = <-errChan 147 | if err != nil { 148 | log.Printf("Error from srv.Serve: %v", err) 149 | return err 150 | } 151 | 152 | return nil 153 | } 154 | -------------------------------------------------------------------------------- /pkg/rangelet/mirror/mirror_test.go: -------------------------------------------------------------------------------- 1 | package mirror 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "log" 8 | "net" 9 | "sync" 10 | "testing" 11 | "time" 12 | 13 | "github.com/adammck/ranger/pkg/api" 14 | mock_disc "github.com/adammck/ranger/pkg/discovery/mock" 15 | "github.com/adammck/ranger/pkg/proto/conv" 16 | pb "github.com/adammck/ranger/pkg/proto/gen" 17 | "google.golang.org/grpc" 18 | "google.golang.org/grpc/credentials/insecure" 19 | "google.golang.org/grpc/test/bufconn" 20 | "gotest.tools/assert" 21 | "gotest.tools/assert/cmp" 22 | ) 23 | 24 | func TestEmpty(t *testing.T) { 25 | h := setup(t) 26 | res := h.mirror.Find(api.Key("aaa")) 27 | assert.Assert(t, cmp.Len(res, 0)) 28 | } 29 | 30 | func TestStatic(t *testing.T) { 31 | h := setup(t) 32 | h.add(t, api.Remote{ 33 | Ident: "aaa", 34 | Host: "host-aaa", 35 | Port: 1, 36 | }, []api.RangeInfo{ 37 | { 38 | Meta: api.Meta{Ident: 1, End: api.Key("ggg")}, 39 | State: api.NsActive, 40 | }, 41 | { 42 | Meta: api.Meta{Ident: 2, Start: api.Key("ggg"), End: api.Key("sss")}, 43 | State: api.NsActive, 44 | }, 45 | }) 46 | h.add(t, api.Remote{ 47 | Ident: "bbb", 48 | Host: "host-bbb", 49 | Port: 2, 50 | }, []api.RangeInfo{ 51 | { 52 | Meta: api.Meta{Ident: 3, Start: api.Key("sss")}, 53 | State: api.NsActive, 54 | }, 55 | }) 56 | 57 | // TODO: Add some kind of sync method to block until the mirror has fetched 58 | // something (or nothing) from each node. 59 | time.Sleep(100 * time.Millisecond) 60 | 61 | res := h.mirror.Find(api.Key("ccc")) 62 | assert.DeepEqual(t, []Result{{ 63 | RangeID: 1, 64 | Remote: api.Remote{ 65 | Ident: "aaa", 66 | Host: "host-aaa", 67 | Port: 1, 68 | }, 69 | State: api.NsActive, 70 | }}, res) 71 | 72 | res = h.mirror.Find(api.Key("hhh")) 73 | assert.DeepEqual(t, []Result{{ 74 | RangeID: 2, 75 | Remote: api.Remote{ 76 | Ident: "aaa", 77 | Host: "host-aaa", 78 | Port: 1, 79 | }, 80 | State: api.NsActive, 81 | }}, res) 82 | 83 | res = h.mirror.Find(api.Key("zzz")) 84 | assert.DeepEqual(t, []Result{{ 85 | RangeID: 3, 86 | Remote: api.Remote{ 87 | Ident: "bbb", 88 | Host: "host-bbb", 89 | Port: 2, 90 | }, 91 | State: api.NsActive, 92 | }}, res) 93 | } 94 | 95 | // ----------------------------------------------------------------------------- 96 | 97 | type testHarness struct { 98 | disc *mock_disc.Discoverer 99 | nodes map[api.Remote]*nodeServer 100 | mirror *Mirror 101 | } 102 | 103 | func setup(t *testing.T) *testHarness { 104 | h := &testHarness{ 105 | disc: mock_disc.NewDiscoverer(), 106 | nodes: map[api.Remote]*nodeServer{}, 107 | } 108 | 109 | h.mirror = New(h.disc).WithDialler(h.dial) // SUT 110 | 111 | t.Cleanup(func() { 112 | // Ignore errors 113 | _ = h.mirror.Stop() 114 | }) 115 | 116 | return h 117 | } 118 | 119 | func (h *testHarness) add(t *testing.T, rem api.Remote, ranges []api.RangeInfo) { 120 | ns := newNodeServer(ranges) 121 | t.Cleanup(ns.stop) 122 | h.nodes[rem] = ns 123 | h.disc.Add("node", rem) 124 | } 125 | 126 | func (h *testHarness) dial(ctx context.Context, rem api.Remote) (*grpc.ClientConn, error) { 127 | node, ok := h.nodes[rem] 128 | if !ok { 129 | log.Printf("No such remote: %s", rem.Ident) 130 | return nil, fmt.Errorf("No such remote: %s", rem.Ident) 131 | } 132 | 133 | return grpc.DialContext(ctx, "", grpc.WithContextDialer(func(context.Context, string) (net.Conn, error) { 134 | return node.listener.Dial() 135 | }), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) 136 | } 137 | 138 | type nodeServer struct { 139 | pb.UnimplementedNodeServer 140 | 141 | server *grpc.Server 142 | listener *bufconn.Listener 143 | ranges []api.RangeInfo 144 | quit *sync.WaitGroup 145 | } 146 | 147 | func newNodeServer(ranges []api.RangeInfo) *nodeServer { 148 | wg := &sync.WaitGroup{} 149 | wg.Add(1) 150 | 151 | ns := &nodeServer{ 152 | server: grpc.NewServer(), 153 | listener: bufconn.Listen(1024 * 1024), 154 | ranges: ranges, 155 | quit: wg, 156 | } 157 | 158 | pb.RegisterNodeServer(ns.server, ns) 159 | 160 | go func() { 161 | if err := ns.server.Serve(ns.listener); err != nil { 162 | panic(err) 163 | } 164 | }() 165 | 166 | return ns 167 | } 168 | 169 | func (ns *nodeServer) stop() { 170 | ns.server.Stop() 171 | ns.quit.Done() 172 | } 173 | 174 | func (ns *nodeServer) Ranges(req *pb.RangesRequest, stream pb.Node_RangesServer) error { 175 | for _, ri := range ns.ranges { 176 | stream.Send(&pb.RangesResponse{ 177 | Meta: conv.MetaToProto(ri.Meta), 178 | State: conv.RemoteStateToProto(ri.State), 179 | }) 180 | } 181 | 182 | ns.quit.Wait() 183 | 184 | return io.EOF 185 | } 186 | -------------------------------------------------------------------------------- /pkg/keyspace/replication_state_test.go: -------------------------------------------------------------------------------- 1 | package keyspace 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | "github.com/adammck/ranger/pkg/ranje" 8 | "gotest.tools/assert" 9 | ) 10 | 11 | func TestFlatRanges_Empty(t *testing.T) { 12 | // ┌─────┐ 13 | // │ 1 a │ 14 | // └─────┘ 15 | // 16 | // Just one range covering the whole keyspace. 17 | 18 | r1 := &ranje.Range{ 19 | State: api.RsActive, 20 | Meta: api.Meta{Ident: 1}, 21 | } 22 | 23 | ks := makeKeyspace(t, r1) 24 | 25 | actual := flatRanges(ks) 26 | assert.DeepEqual(t, []Repl{ 27 | { 28 | Start: api.ZeroKey, 29 | End: api.ZeroKey, 30 | }, 31 | }, actual) 32 | } 33 | 34 | func TestFlatRanges_ThreeStable(t *testing.T) { 35 | // ┌─────┐ 36 | // ┌─────│ 1 o │─────┐ 37 | // │ └─────┘ │ 38 | // │ │ │ 39 | // ▼ ▼ ▼ 40 | // ┌─────┐ ┌─────┐ ┌─────┐ 41 | // │ 2 a │ │ 3 a │ │ 4 a │ 42 | // └─────┘ └─────┘ └─────┘ 43 | // 44 | // Three active ranges which were split out from the genesis range, which is 45 | // now obsolete. Should return flat ranges with the same boundaries. 46 | 47 | r1 := &ranje.Range{ 48 | State: api.RsObsolete, 49 | Children: []api.RangeID{2, 3, 4}, 50 | Meta: api.Meta{ 51 | Ident: 1, 52 | Start: api.ZeroKey, 53 | End: api.ZeroKey, 54 | }, 55 | } 56 | 57 | r2 := &ranje.Range{ 58 | State: api.RsActive, 59 | Parents: []api.RangeID{1}, 60 | Meta: api.Meta{ 61 | Ident: 2, 62 | Start: api.ZeroKey, 63 | End: api.Key("nnn"), 64 | }, 65 | } 66 | 67 | r3 := &ranje.Range{ 68 | State: api.RsActive, 69 | Parents: []api.RangeID{1}, 70 | Meta: api.Meta{ 71 | Ident: 3, 72 | Start: api.Key("nnn"), 73 | End: api.Key("ttt"), 74 | }, 75 | } 76 | 77 | r4 := &ranje.Range{ 78 | State: api.RsActive, 79 | Parents: []api.RangeID{1}, 80 | Meta: api.Meta{ 81 | Ident: 4, 82 | Start: api.Key("ttt"), 83 | End: api.ZeroKey, 84 | }, 85 | } 86 | 87 | ks := makeKeyspace(t, r1, r2, r3, r4) 88 | 89 | actual := flatRanges(ks) 90 | assert.DeepEqual(t, []Repl{ 91 | { 92 | Start: api.ZeroKey, 93 | End: api.Key("nnn"), 94 | }, 95 | { 96 | Start: api.Key("nnn"), 97 | End: api.Key("ttt"), 98 | }, 99 | { 100 | Start: api.Key("ttt"), 101 | End: api.ZeroKey, 102 | }, 103 | }, actual) 104 | } 105 | 106 | func TestFlatRanges_FromFixture(t *testing.T) { 107 | ks := testFixtureR3(t) 108 | 109 | actual := ks.ReplicationState() 110 | assert.DeepEqual(t, []Repl{ 111 | { 112 | Start: api.ZeroKey, 113 | End: api.Key("bbb"), 114 | Total: 6, 115 | Active: 3, 116 | }, 117 | { 118 | Start: api.Key("bbb"), 119 | End: api.Key("ccc"), 120 | Total: 6, 121 | Active: 2, 122 | }, 123 | { 124 | Start: api.Key("ccc"), 125 | End: api.Key("ddd"), 126 | Total: 6, 127 | Active: 3, 128 | }, 129 | { 130 | Start: api.Key("ddd"), 131 | End: api.ZeroKey, 132 | Total: 6, 133 | Active: 2, 134 | }, 135 | }, actual) 136 | } 137 | 138 | func TestReplicationState_OneRange(t *testing.T) { 139 | // 140 | // ┌─────┐ 141 | // │ 1 a │ 142 | // | aii | 143 | // └─────┘ 144 | // 145 | // Just one range covering the whole keyspace, but with three placements: 146 | // one active, two inactive. 147 | 148 | r1 := &ranje.Range{ 149 | State: api.RsActive, 150 | Meta: api.Meta{Ident: 1}, 151 | Placements: []*ranje.Placement{ 152 | { 153 | NodeID: "aaa", 154 | StateCurrent: api.PsActive, 155 | StateDesired: api.PsActive, 156 | }, 157 | { 158 | NodeID: "bbb", 159 | StateCurrent: api.PsInactive, 160 | StateDesired: api.PsInactive, 161 | }, 162 | { 163 | NodeID: "bbb", 164 | StateCurrent: api.PsInactive, 165 | StateDesired: api.PsInactive, 166 | }, 167 | }, 168 | } 169 | 170 | ks := makeKeyspace(t, r1) 171 | 172 | actual := ks.ReplicationState() 173 | assert.DeepEqual(t, []Repl{ 174 | { 175 | Start: api.ZeroKey, 176 | End: api.ZeroKey, 177 | Total: 3, 178 | Active: 1, 179 | }, 180 | }, actual) 181 | } 182 | 183 | func TestReplicationState_FromFixture(t *testing.T) { 184 | ks := testFixtureR3(t) 185 | 186 | actual := ks.ReplicationState() 187 | assert.DeepEqual(t, []Repl{ 188 | { 189 | Start: api.ZeroKey, 190 | End: "bbb", 191 | Total: 6, 192 | Active: 3, 193 | }, 194 | { 195 | Start: "bbb", 196 | End: "ccc", 197 | Total: 6, 198 | Active: 2, 199 | }, 200 | { 201 | Start: "ccc", 202 | End: "ddd", 203 | Total: 6, 204 | Active: 3, 205 | }, 206 | { 207 | Start: "ddd", 208 | End: api.ZeroKey, 209 | Total: 6, 210 | Active: 2, 211 | }, 212 | }, actual) 213 | } 214 | -------------------------------------------------------------------------------- /pkg/rangelet/server.go: -------------------------------------------------------------------------------- 1 | package rangelet 2 | 3 | import ( 4 | "context" 5 | "io" 6 | 7 | "github.com/adammck/ranger/pkg/api" 8 | "github.com/adammck/ranger/pkg/proto/conv" 9 | pb "github.com/adammck/ranger/pkg/proto/gen" 10 | "google.golang.org/grpc" 11 | "google.golang.org/grpc/codes" 12 | "google.golang.org/grpc/status" 13 | ) 14 | 15 | type NodeServer struct { 16 | pb.UnsafeNodeServer 17 | r *Rangelet 18 | } 19 | 20 | func newNodeServer(rglt *Rangelet) *NodeServer { 21 | ns := &NodeServer{r: rglt} 22 | return ns 23 | } 24 | 25 | func (ns *NodeServer) Register(sr grpc.ServiceRegistrar) { 26 | pb.RegisterNodeServer(sr, ns) 27 | } 28 | 29 | func parentsFromProto(prot []*pb.Parent) ([]api.Parent, error) { 30 | p := []api.Parent{} 31 | 32 | for _, pp := range prot { 33 | m, err := conv.MetaFromProto(pp.Range) 34 | if err != nil { 35 | return p, err 36 | } 37 | 38 | parentIds := make([]api.RangeID, len(pp.Parent)) 39 | for i := range pp.Parent { 40 | parentIds[i] = api.RangeID(pp.Parent[i]) 41 | } 42 | 43 | placements := make([]api.Placement, len(pp.Placements)) 44 | for i := range pp.Placements { 45 | placements[i] = api.Placement{ 46 | Node: pp.Placements[i].Node, 47 | State: conv.PlacementStateFromProto(pp.Placements[i].State), 48 | } 49 | } 50 | 51 | p = append(p, api.Parent{ 52 | Meta: m, 53 | Parents: parentIds, 54 | Placements: placements, 55 | }) 56 | } 57 | 58 | return p, nil 59 | } 60 | 61 | func (ns *NodeServer) Prepare(ctx context.Context, req *pb.PrepareRequest) (*pb.PrepareResponse, error) { 62 | meta, err := conv.MetaFromProto(req.Range) 63 | if err != nil { 64 | return nil, status.Errorf(codes.InvalidArgument, "error parsing range meta: %v", err) 65 | } 66 | 67 | parents, err := parentsFromProto(req.Parents) 68 | if err != nil { 69 | return nil, status.Errorf(codes.InvalidArgument, "error parsing parents: %v", err) 70 | } 71 | 72 | ri, err := ns.r.prepare(meta, parents) 73 | if err != nil { 74 | return nil, err 75 | } 76 | 77 | return &pb.PrepareResponse{ 78 | RangeInfo: conv.RangeInfoToProto(ri), 79 | }, nil 80 | } 81 | 82 | func (ns *NodeServer) Activate(ctx context.Context, req *pb.ServeRequest) (*pb.ServeResponse, error) { 83 | rID, err := conv.RangeIDFromProto(req.Range) 84 | if err != nil { 85 | return nil, status.Error(codes.InvalidArgument, err.Error()) 86 | } 87 | 88 | ri, err := ns.r.serve(rID) 89 | if err != nil { 90 | return nil, err 91 | } 92 | 93 | return &pb.ServeResponse{ 94 | State: conv.RemoteStateToProto(ri.State), 95 | }, nil 96 | } 97 | 98 | func (ns *NodeServer) Deactivate(ctx context.Context, req *pb.DeactivateRequest) (*pb.DeactivateResponse, error) { 99 | rID, err := conv.RangeIDFromProto(req.Range) 100 | if err != nil { 101 | return nil, status.Error(codes.InvalidArgument, err.Error()) 102 | } 103 | 104 | ri, err := ns.r.take(rID) 105 | if err != nil { 106 | return nil, err 107 | } 108 | 109 | return &pb.DeactivateResponse{ 110 | State: conv.RemoteStateToProto(ri.State), 111 | }, nil 112 | } 113 | 114 | func (ns *NodeServer) Drop(ctx context.Context, req *pb.DropRequest) (*pb.DropResponse, error) { 115 | 116 | rID, err := conv.RangeIDFromProto(req.Range) 117 | if err != nil { 118 | return nil, status.Error(codes.InvalidArgument, err.Error()) 119 | } 120 | 121 | ri, err := ns.r.drop(rID) 122 | if err != nil { 123 | // This is NOT a failure. 124 | if err == api.ErrNotFound { 125 | return &pb.DropResponse{ 126 | State: conv.RemoteStateToProto(api.NsNotFound), 127 | }, nil 128 | } 129 | 130 | // But other errors are. 131 | return nil, err 132 | } 133 | 134 | return &pb.DropResponse{ 135 | State: conv.RemoteStateToProto(ri.State), 136 | }, nil 137 | } 138 | 139 | func (ns *NodeServer) Info(ctx context.Context, req *pb.InfoRequest) (*pb.InfoResponse, error) { 140 | err := ns.r.gatherLoadInfo() 141 | if err != nil { 142 | return nil, status.Error(codes.Internal, err.Error()) 143 | } 144 | 145 | res := &pb.InfoResponse{ 146 | WantDrain: ns.r.wantDrain(), 147 | } 148 | 149 | ns.r.walk(func(ri *api.RangeInfo) bool { 150 | res.Ranges = append(res.Ranges, conv.RangeInfoToProto(*ri)) 151 | return true 152 | }) 153 | 154 | return res, nil 155 | } 156 | 157 | func (ns *NodeServer) Ranges(req *pb.RangesRequest, stream pb.Node_RangesServer) error { 158 | conv := func(ri *api.RangeInfo) *pb.RangesResponse { 159 | return &pb.RangesResponse{ 160 | Meta: conv.MetaToProto(ri.Meta), 161 | State: conv.RemoteStateToProto(ri.State), 162 | } 163 | } 164 | 165 | var err error 166 | 167 | ns.r.watch(func(ri *api.RangeInfo) bool { 168 | err = stream.Send(conv(ri)) 169 | return (err == nil) 170 | }) 171 | 172 | if err != nil { 173 | return err 174 | } 175 | 176 | return io.EOF 177 | } 178 | -------------------------------------------------------------------------------- /pkg/orchestrator/server_orchestrator.go: -------------------------------------------------------------------------------- 1 | package orchestrator 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strings" 7 | 8 | "github.com/adammck/ranger/pkg/api" 9 | "github.com/adammck/ranger/pkg/proto/conv" 10 | pb "github.com/adammck/ranger/pkg/proto/gen" 11 | "google.golang.org/grpc/codes" 12 | "google.golang.org/grpc/status" 13 | ) 14 | 15 | type orchestratorServer struct { 16 | pb.UnsafeOrchestratorServer 17 | orch *Orchestrator 18 | } 19 | 20 | func (bs *orchestratorServer) Move(ctx context.Context, req *pb.MoveRequest) (*pb.MoveResponse, error) { 21 | rID, err := getRange(bs, req.Range, "range") 22 | if err != nil { 23 | return nil, err 24 | } 25 | 26 | // NodeID is optional for this endpoint. 27 | // TODO: Verify that the NodeID is valid (at least right now), if given. 28 | nID, err := conv.NodeIDFromProto(req.Node) 29 | if err != nil && err != conv.ErrMissingNodeID { 30 | return nil, err 31 | } 32 | 33 | op := OpMove{ 34 | Range: rID, 35 | Dest: nID, // Might be ZeroNodeID 36 | Err: make(chan error), 37 | } 38 | 39 | bs.orch.opMovesMu.Lock() 40 | bs.orch.opMoves = append(bs.orch.opMoves, op) 41 | bs.orch.opMovesMu.Unlock() 42 | 43 | errs := []string{} 44 | for { 45 | err, ok := <-op.Err 46 | if !ok { // closed 47 | break 48 | } 49 | errs = append(errs, err.Error()) 50 | } 51 | 52 | // There's probably only one error. But who knows. 53 | if len(errs) > 0 { 54 | return nil, status.Error( 55 | codes.Aborted, 56 | fmt.Sprintf("move operation failed: %v", strings.Join(errs, "; "))) 57 | } 58 | 59 | return &pb.MoveResponse{}, nil 60 | } 61 | 62 | func (bs *orchestratorServer) Split(ctx context.Context, req *pb.SplitRequest) (*pb.SplitResponse, error) { 63 | rID, err := getRange(bs, req.Range, "range") 64 | if err != nil { 65 | return nil, err 66 | } 67 | 68 | boundary := api.Key(req.Boundary) 69 | if boundary == "" { 70 | return nil, status.Error(codes.InvalidArgument, "missing: boundary") 71 | } 72 | 73 | // NodeID (on both sides) is optional for this endpoint. 74 | // TODO: Verify that the NodeIDs are valid if given. 75 | nID1, err := conv.NodeIDFromProto(req.NodeLeft) 76 | if err != nil && err != conv.ErrMissingNodeID { 77 | return nil, err 78 | } 79 | nID2, err := conv.NodeIDFromProto(req.NodeLeft) 80 | if err != nil && err != conv.ErrMissingNodeID { 81 | return nil, err 82 | } 83 | 84 | op := OpSplit{ 85 | Range: rID, 86 | Key: boundary, 87 | Left: nID1, 88 | Right: nID2, 89 | Err: make(chan error), 90 | } 91 | 92 | bs.orch.opSplitsMu.Lock() 93 | bs.orch.opSplits[rID] = op 94 | bs.orch.opSplitsMu.Unlock() 95 | 96 | errs := []string{} 97 | for { 98 | err, ok := <-op.Err 99 | if !ok { // closed 100 | break 101 | } 102 | errs = append(errs, err.Error()) 103 | } 104 | 105 | if len(errs) > 0 { 106 | return nil, status.Error( 107 | codes.Aborted, 108 | fmt.Sprintf("split operation failed: %v", strings.Join(errs, "; "))) 109 | } 110 | 111 | return &pb.SplitResponse{}, nil 112 | } 113 | 114 | func (bs *orchestratorServer) Join(ctx context.Context, req *pb.JoinRequest) (*pb.JoinResponse, error) { 115 | left, err := getRange(bs, req.RangeLeft, "range_left") 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | right, err := getRange(bs, req.RangeRight, "range_right") 121 | if err != nil { 122 | return nil, err 123 | } 124 | 125 | // NodeID is optional for this endpoint. 126 | // TODO: Verify that the NodeID is valid if given. 127 | nID, err := conv.NodeIDFromProto(req.Node) 128 | if err != nil && err != conv.ErrMissingNodeID { 129 | return nil, err 130 | } 131 | 132 | op := OpJoin{ 133 | Left: left, 134 | Right: right, 135 | Dest: nID, 136 | Err: make(chan error), 137 | } 138 | 139 | bs.orch.opJoinsMu.Lock() 140 | bs.orch.opJoins = append(bs.orch.opJoins, op) 141 | bs.orch.opJoinsMu.Unlock() 142 | 143 | errs := []string{} 144 | for { 145 | err, ok := <-op.Err 146 | if !ok { // closed 147 | break 148 | } 149 | errs = append(errs, err.Error()) 150 | } 151 | 152 | if len(errs) > 0 { 153 | return nil, status.Error( 154 | codes.Aborted, 155 | fmt.Sprintf("join operation failed: %v", strings.Join(errs, "; "))) 156 | } 157 | 158 | return &pb.JoinResponse{}, nil 159 | } 160 | 161 | // getRange examines the given range ident and returns the corresponding Range 162 | // or an error suitable for a gRPC response. 163 | func getRange(bs *orchestratorServer, pbid uint64, field string) (api.RangeID, error) { 164 | if pbid == 0 { 165 | return api.ZeroRange, status.Error(codes.InvalidArgument, fmt.Sprintf("missing: %s", field)) 166 | } 167 | 168 | id, err := conv.RangeIDFromProto(pbid) 169 | if err != nil { 170 | return api.ZeroRange, status.Error(codes.InvalidArgument, fmt.Sprintf("invalid %s: %s", field, err.Error())) 171 | } 172 | 173 | return id, nil 174 | } 175 | -------------------------------------------------------------------------------- /pkg/roster/roster_test.go: -------------------------------------------------------------------------------- 1 | package roster 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "testing" 7 | 8 | "github.com/adammck/ranger/pkg/api" 9 | "github.com/adammck/ranger/pkg/ranje" 10 | "github.com/adammck/ranger/pkg/test/fake_nodes" 11 | "github.com/stretchr/testify/suite" 12 | ) 13 | 14 | // TODO: Remove this suite, just use funcs. 15 | type RosterSuite struct { 16 | suite.Suite 17 | ctx context.Context 18 | nodes *fake_nodes.TestNodes 19 | rost *Roster 20 | 21 | // Not important 22 | r *ranje.Range 23 | } 24 | 25 | func TestExampleTestSuite(t *testing.T) { 26 | suite.Run(t, new(RosterSuite)) 27 | } 28 | 29 | func (ts *RosterSuite) SetupTest() { 30 | ts.ctx = context.Background() 31 | 32 | // Just to avoid constructing this thing everywhere. 33 | ts.r = &ranje.Range{ 34 | Meta: api.Meta{Ident: 1}, 35 | State: api.RsActive, 36 | } 37 | 38 | // Empty by default. 39 | ts.nodes = fake_nodes.NewTestNodes() 40 | } 41 | 42 | func (ts *RosterSuite) Init() { 43 | ts.rost = New(ts.nodes.Discovery(), nil, nil, nil) 44 | ts.rost.NodeConnFactory = ts.nodes.NodeConnFactory 45 | } 46 | 47 | func (ts *RosterSuite) TestNoCandidates() { 48 | ts.Init() 49 | 50 | nID, err := ts.rost.Candidate(ts.r, ranje.AnyNode) 51 | if ts.Error(err) { 52 | ts.Equal(fmt.Errorf("no candidates available (rID=1, c=Constraint{any})"), err) 53 | } 54 | ts.Equal(nID, api.ZeroNodeID) 55 | } 56 | 57 | func (ts *RosterSuite) TestCandidateByNodeID() { 58 | 59 | aRem := api.Remote{ 60 | Ident: "test-aaa", 61 | Host: "host-aaa", 62 | Port: 1, 63 | } 64 | 65 | bRem := api.Remote{ 66 | Ident: "test-bbb", 67 | Host: "host-bbb", 68 | Port: 1, 69 | } 70 | 71 | cRem := api.Remote{ 72 | Ident: "test-ccc", 73 | Host: "host-ccc", 74 | Port: 1, 75 | } 76 | 77 | r := &ranje.Range{ 78 | Meta: api.Meta{ 79 | Ident: 1, 80 | }, 81 | State: api.RsActive, 82 | Placements: []*ranje.Placement{{ 83 | NodeID: aRem.NodeID(), 84 | StateCurrent: api.PsActive, 85 | }}, 86 | } 87 | 88 | aInfos := map[api.RangeID]*api.RangeInfo{ 89 | r.Meta.Ident: { 90 | Meta: r.Meta, 91 | State: api.NsActive, 92 | }, 93 | } 94 | 95 | ts.nodes.Add(ts.ctx, aRem, aInfos) 96 | ts.nodes.Add(ts.ctx, bRem, nil) 97 | ts.nodes.Add(ts.ctx, cRem, nil) 98 | ts.nodes.Get("test-ccc").SetWantDrain(true) 99 | 100 | ts.Init() 101 | ts.rost.Tick() 102 | 103 | // ------------------------------------------------------------------------- 104 | 105 | nID, err := ts.rost.Candidate(ts.r, ranje.Constraint{NodeID: "test-bbb"}) 106 | if ts.NoError(err) { 107 | ts.Equal(api.NodeID("test-bbb"), nID) 108 | } 109 | 110 | // This one doesn't exist 111 | _, err = ts.rost.Candidate(ts.r, ranje.Constraint{NodeID: "test-ddd"}) 112 | if ts.Error(err) { 113 | ts.Equal("no such node: test-ddd", err.Error()) 114 | } 115 | 116 | // This one already has the range 117 | _, err = ts.rost.Candidate(ts.r, ranje.Constraint{NodeID: "test-aaa"}) 118 | if ts.Error(err) { 119 | ts.Equal("node already has range: test-aaa", err.Error()) 120 | } 121 | 122 | // This one doesn't want any more ranges, because it's drained. 123 | // Note that we only know that the node wants to be drained because of the 124 | // roster tick, above. If we just called SetWantDrain(true) and didn't tick, 125 | // the "remote" node would want drain, but the roster wouldn't know that. 126 | _, err = ts.rost.Candidate(ts.r, ranje.Constraint{NodeID: "test-ccc"}) 127 | if ts.Error(err) { 128 | ts.Equal("node wants drain: test-ccc", err.Error()) 129 | } 130 | } 131 | 132 | func (ts *RosterSuite) TestProbeOne() { 133 | 134 | rem := api.Remote{ 135 | Ident: "test-aaa", 136 | Host: "host-aaa", 137 | Port: 1, 138 | } 139 | 140 | r := &ranje.Range{ 141 | Meta: api.Meta{ 142 | Ident: 1, 143 | Start: api.ZeroKey, 144 | End: api.Key("ggg"), 145 | }, 146 | State: api.RsActive, 147 | Placements: []*ranje.Placement{{ 148 | NodeID: rem.NodeID(), 149 | StateCurrent: api.PsActive, 150 | }}, 151 | } 152 | 153 | fakeInfos := map[api.RangeID]*api.RangeInfo{ 154 | r.Meta.Ident: { 155 | Meta: r.Meta, 156 | State: api.NsActive, 157 | Info: api.LoadInfo{ 158 | Keys: 123, 159 | }, 160 | }, 161 | } 162 | 163 | ts.nodes.Add(ts.ctx, rem, fakeInfos) 164 | ts.Init() 165 | 166 | ts.rost.Discover() 167 | 168 | // Far as the roster is concerned, this is a real node. 169 | rostNode, err := ts.rost.NodeByIdent("test-aaa") 170 | ts.Require().NoError(err) 171 | ts.Require().NotNil(rostNode) 172 | 173 | err = ts.rost.probeOne(ts.ctx, rostNode) 174 | if ts.NoError(err) { 175 | if rostInfo, ok := rostNode.Get(1); ts.True(ok) { 176 | 177 | // The "real" RangeInfo, which we got from the (fake) remote via 178 | // gRPC (via bufconn) through its rangelet should match the fake 179 | // RangeInfo above. 180 | ts.Equal(*fakeInfos[r.Meta.Ident], rostInfo) 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /examples/kv/test/test_helper.bash: -------------------------------------------------------------------------------- 1 | # map of port -> pid 2 | declare -A cmds 3 | 4 | # keys 5 | a=$(echo -n a | base64); export a 6 | a1=$(echo -n a1 | base64); export a1 7 | a2=$(echo -n a2 | base64); export a2 8 | b=$(echo -n b | base64); export b 9 | b1=$(echo -n b1 | base64); export b1 10 | b2=$(echo -n b2 | base64); export b2 11 | c=$(echo -n c | base64); export c 12 | 13 | # vals 14 | zzz=$(echo -n zzz | base64); export zzz 15 | yyy=$(echo -n yyy | base64); export yyy 16 | xxx=$(echo -n xxx | base64); export xxx 17 | www=$(echo -n www | base64); export www 18 | 19 | # Returns a command which can be `run` to access the ranger client aimed at a 20 | # controller on the given port. 21 | ranger_client() { 22 | #>&3 echo "# ranger_client $@" 23 | local port=$1 24 | shift 25 | 26 | if [[ -z "${RANGER_CLIENT}" ]]; then 27 | fail "RANGER_CLIENT must be set" 28 | fi 29 | 30 | echo "$RANGER_CLIENT" -addr "127.0.0.1:${port}" 31 | } 32 | 33 | # Fail if the given port number is currently in use. This is better than trying 34 | # to run some command and failing with a weird error. 35 | assert_port_available() { 36 | #>&3 echo "# assert_port_available $@" 37 | local port=$1 38 | nc -z localhost "$port" && fail "port $port is already in use" 39 | return 0 40 | } 41 | 42 | # Start a node in the background on the given port. 43 | # Stop it by calling defer_stop_cmds in teardown. 44 | start_node() { 45 | #>&3 echo "# start_node $@" 46 | local port=$1 47 | start_cmd "$port" ./kv -node -addr "127.0.0.1:$port" -log-reqs 48 | } 49 | 50 | # Start a controller in the background on the given port. 51 | # Stop it by calling defer_stop_cmds in teardown. 52 | start_controller() { 53 | #>&3 echo "# start_controller $@" 54 | local port=$1 55 | start_cmd "$port" ./kv -controller -addr "127.0.0.1:$port" 56 | } 57 | 58 | start_proxy() { 59 | #>&3 echo "# start_proxy $@" 60 | local port=$1 61 | start_cmd "$port" ./kv -proxy -addr "127.0.0.1:$port" -log-reqs 62 | } 63 | 64 | start_consul() { 65 | #>&3 echo "# start_consul $@" 66 | 67 | # We don't actually set the ports here because I'm too lazy to pass them 68 | # around. This means that only a single consul-using test can run at once. 69 | # TODO: Fix this. 70 | 71 | start_cmd 8500 consul agent -dev 1>/dev/null 72 | } 73 | 74 | 75 | # Run a command which is expected to listen on a port in the background. Block 76 | # until the port is open, even if that's forever. Store the PID of the command, 77 | # so it can be killed by calling defer_stop_cmds (probably in teardown). 78 | # 79 | # Usage: start_cmd port cmd [args...] 80 | # E.g.: start_cmd 8000 81 | start_cmd() { 82 | #>&3 echo "# start_cmd $@" 83 | local port=$1 84 | shift 85 | 86 | assert_port_available "$port" 87 | "$@" & 88 | local pid=$! 89 | 90 | defer_stop_cmds "$port" "$pid" 91 | wait_port "$port" "$pid" 92 | } 93 | 94 | # Send the given signal to the command serving the given port. 95 | send_signal() { 96 | #>&3 echo "# send_signal $@" 97 | local pid=$1 98 | local signal=$2 99 | 100 | if test -n "$pid"; then # if non-empty 101 | if kill -s 0 "$pid" 2>/dev/null; then # and still running 102 | kill -s "$signal" "$pid" 103 | fi 104 | fi 105 | } 106 | 107 | stop_cmd() { 108 | #>&3 echo "# stop_cmd $@" 109 | send_signal "$pid" "INT" 110 | } 111 | 112 | # Sends SIGQUIT to the command serving the given port. 113 | crash_cmd() { 114 | #>&3 echo "# crash_cmd $@" 115 | local port=$1 116 | local pid=${cmds[$port]} 117 | 118 | if test "$pid" = ""; then 119 | fail "no command serving port $port" 120 | fi 121 | 122 | kill -s QUIT "$pid" 123 | 124 | # Block until the process is no longer running. 125 | while : ; do 126 | if ! kill -s 0 "$pid" 2>/dev/null; then 127 | break 128 | fi 129 | 130 | sleep 0.1 131 | done 132 | 133 | unset "cmds[$port]" 134 | } 135 | 136 | defer_stop_cmds() { 137 | #>&3 echo "# defer_stop_cmds $@" 138 | local port=$1 139 | local pid=$2 140 | 141 | if test -n "${cmds[$port]}"; then 142 | fail "already have command on port $port" 143 | return 144 | fi 145 | 146 | cmds[$port]=$pid 147 | } 148 | 149 | stop_cmds() { 150 | #>&3 echo "# stop_cmds $@" 151 | for pid in "${cmds[@]}"; do 152 | stop_cmd "$pid" 153 | done 154 | } 155 | 156 | # Block until either the given port is listening, or the given PID is no longer 157 | # running. 158 | wait_port() { 159 | #>&3 echo "# wait_port $@" 160 | local port=$1 161 | local pid=$2 162 | 163 | while : ; do 164 | if ! kill -s 0 "$pid" 2>/dev/null; then 165 | fail "program terminated while waiting for port $port to listen" 166 | fi 167 | 168 | if nc -z localhost "$port"; then 169 | return 0; 170 | fi 171 | 172 | sleep 0.1 173 | done 174 | } 175 | -------------------------------------------------------------------------------- /examples/kv/pkg/node/node.go: -------------------------------------------------------------------------------- 1 | package node 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "net" 7 | "sync" 8 | "time" 9 | 10 | "google.golang.org/grpc" 11 | "google.golang.org/grpc/reflection" 12 | 13 | pbkv "github.com/adammck/ranger/examples/kv/proto/gen" 14 | "github.com/adammck/ranger/pkg/api" 15 | "github.com/adammck/ranger/pkg/discovery" 16 | consuldisc "github.com/adammck/ranger/pkg/discovery/consul" 17 | "github.com/adammck/ranger/pkg/rangelet" 18 | "github.com/adammck/ranger/pkg/rangelet/storage/null" 19 | consulapi "github.com/hashicorp/consul/api" 20 | ) 21 | 22 | type Range struct { 23 | data map[string][]byte 24 | dataMu sync.RWMutex // guards data 25 | fetcher *fetcher 26 | writable uint32 // semantically bool, but uint so we can read atomically 27 | } 28 | 29 | type Node struct { 30 | DrainBeforeShutdown bool 31 | 32 | ranges map[api.RangeID]*Range 33 | rangesMu sync.RWMutex // guards ranges 34 | 35 | addrLis string 36 | addrPub string 37 | srv *grpc.Server 38 | disc discovery.Discoverable 39 | 40 | rglt *rangelet.Rangelet 41 | 42 | // Options 43 | logReqs bool 44 | chaos bool 45 | } 46 | 47 | func init() { 48 | // Ensure that nodeServer implements the NodeServer interface 49 | var ns *Node = nil 50 | var _ api.Node = ns 51 | } 52 | 53 | func New(addrLis, addrPub string, drainBeforeShutdown bool, logReqs bool, chaos bool) (*Node, error) { 54 | var opts []grpc.ServerOption 55 | srv := grpc.NewServer(opts...) 56 | 57 | // Register reflection service, so client can introspect (for debugging). 58 | // TODO: Make this optional. 59 | reflection.Register(srv) 60 | 61 | disc, err := consuldisc.New("node", addrPub, consulapi.DefaultConfig(), srv) 62 | if err != nil { 63 | return nil, err 64 | } 65 | 66 | n := &Node{ 67 | DrainBeforeShutdown: drainBeforeShutdown, 68 | ranges: map[api.RangeID]*Range{}, 69 | addrLis: addrLis, 70 | addrPub: addrPub, 71 | srv: srv, 72 | disc: disc, 73 | 74 | logReqs: logReqs, 75 | chaos: chaos, 76 | } 77 | 78 | rglt := rangelet.New(n, srv, &null.NullStorage{}) 79 | n.rglt = rglt 80 | 81 | kv := kvServer{node: n} 82 | pbkv.RegisterKVServer(srv, &kv) 83 | 84 | return n, nil 85 | } 86 | 87 | func (n *Node) Run(ctx context.Context) error { 88 | 89 | // For the gRPC server. 90 | lis, err := net.Listen("tcp", n.addrLis) 91 | if err != nil { 92 | return err 93 | } 94 | 95 | log.Printf("listening on: %s", n.addrLis) 96 | 97 | // Start the gRPC server in a background routine. 98 | errChan := make(chan error) 99 | go func() { 100 | err := n.srv.Serve(lis) 101 | if err != nil { 102 | errChan <- err 103 | } 104 | close(errChan) 105 | }() 106 | 107 | // Register with service discovery 108 | err = n.disc.Start() 109 | if err != nil { 110 | return err 111 | } 112 | 113 | // Block until context is cancelled, indicating that caller wants shutdown. 114 | <-ctx.Done() 115 | 116 | // We're shutting down. First drain all of the ranges off this node (which 117 | // may take a while and involve a bunch of RPCs.) If this is disabled, the 118 | // node will just disappear, and the controller will wait until it expires 119 | // before assigning the range to other nodes. 120 | if n.DrainBeforeShutdown { 121 | n.DrainRanges() 122 | } else { 123 | log.Print("not draining ranges") 124 | } 125 | 126 | // Remove ourselves from service discovery, to minimize the number of new 127 | // requests coming in. 128 | err = n.disc.Stop() 129 | if err != nil { 130 | return err 131 | } 132 | 133 | // Interrupt any in-flight RPCs and stop serving. Hopefully by this time 134 | // (after draining ranges) the only RPCs remaining are streaming range 135 | // watches or other control-plane stuff, not client requests. In production 136 | // we'd probably have two separate servers, and call GracefulStop on the 137 | // data-plane first. 138 | // 139 | // errChan will contain the error returned by srv.Serve (see above), or be 140 | // closed with no error. 141 | n.srv.Stop() 142 | err = <-errChan 143 | if err != nil { 144 | log.Printf("error from srv.Serve: %v", err) 145 | return err 146 | } 147 | 148 | return nil 149 | } 150 | 151 | // TODO: Move this to rangelet? 152 | func (n *Node) DrainRanges() { 153 | log.Print("draining ranges...") 154 | 155 | // This is included in probe responses. The next time the controller probes 156 | // this node, it will notice that the node wants to drain (probably because 157 | // it's shutting down), and start removing ranges. 158 | n.rglt.SetWantDrain(true) 159 | 160 | // Log the number of ranges remaining every five seconds while waiting. 161 | 162 | tick := time.NewTicker(5 * time.Second) 163 | done := make(chan bool) 164 | 165 | go func() { 166 | for { 167 | select { 168 | case <-done: 169 | return 170 | case <-tick.C: 171 | c := n.rglt.Len() 172 | log.Printf("ranges remaining: %d", c) 173 | } 174 | } 175 | }() 176 | 177 | // Block until the number of ranges hits zero. 178 | 179 | for { 180 | if c := n.rglt.Len(); c == 0 { 181 | break 182 | } 183 | 184 | time.Sleep(50 * time.Millisecond) 185 | } 186 | 187 | // Stop the logger. 188 | 189 | tick.Stop() 190 | done <- true 191 | 192 | log.Print("finished draining ranges.") 193 | } 194 | -------------------------------------------------------------------------------- /pkg/actuator/mock/mock_actuator.go: -------------------------------------------------------------------------------- 1 | package mock 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "sort" 7 | "strings" 8 | "sync" 9 | 10 | "github.com/adammck/ranger/pkg/api" 11 | "github.com/adammck/ranger/pkg/ranje" 12 | "github.com/adammck/ranger/pkg/roster" 13 | ) 14 | 15 | type Actuator struct { 16 | injects map[api.Command]*Inject 17 | strict bool 18 | 19 | commands []api.Command 20 | unexpected []api.Command 21 | 22 | // mu guards everything. 23 | // No need for granularity. 24 | mu sync.Mutex 25 | } 26 | 27 | func New(strict bool) *Actuator { 28 | return &Actuator{ 29 | injects: map[api.Command]*Inject{}, 30 | strict: strict, 31 | commands: []api.Command{}, 32 | unexpected: []api.Command{}, 33 | } 34 | } 35 | 36 | func (a *Actuator) Reset() { 37 | a.commands = []api.Command{} 38 | a.unexpected = []api.Command{} 39 | } 40 | 41 | func (a *Actuator) Unexpected() []api.Command { 42 | return a.unexpected 43 | } 44 | 45 | // TODO: This is currently duplicated. 46 | func (a *Actuator) Command(cmd api.Command, p *ranje.Placement, n *roster.Node) error { 47 | s, err := a.cmd(cmd.Action, p, n) 48 | if err != nil { 49 | return err 50 | } 51 | 52 | // TODO: This special case is weird. It was less so when Prepare was a 53 | // separate method. Think about it or something. 54 | if cmd.Action == api.Prepare { 55 | n.UpdateRangeInfo(&api.RangeInfo{ 56 | Meta: p.Range().Meta, 57 | State: s, 58 | Info: api.LoadInfo{}, 59 | }) 60 | } else { 61 | n.UpdateRangeState(p.Range().Meta.Ident, s) 62 | } 63 | 64 | return nil 65 | } 66 | 67 | // command logs a command (to be retrived later via Commands), and returns the 68 | // remote state which the (imaginary) remote node is now in, to be passed along. 69 | // to the Roster. The default return given via def, but may be overriden via 70 | // Expect to simulate failures. 71 | func (a *Actuator) cmd(action api.Action, p *ranje.Placement, n *roster.Node) (api.RemoteState, error) { 72 | cmd := api.Command{ 73 | RangeIdent: p.Range().Meta.Ident, 74 | NodeIdent: n.Ident(), 75 | Action: action, 76 | } 77 | 78 | a.mu.Lock() 79 | defer a.mu.Unlock() 80 | 81 | a.commands = append(a.commands, cmd) 82 | exp, ok := a.injects[cmd] 83 | 84 | // If strict mode is enabled (i.e. we expected all commands to be mocked), 85 | // and this command is not mocked, append it to unexpected commands. This 86 | // will (hopefully) be checked later by a test helper at the end of the 87 | // tick, to avoid the error message leading here, devoid of context. 88 | if a.strict && exp == nil { 89 | a.unexpected = append(a.unexpected, cmd) 90 | return api.NsUnknown, fmt.Errorf("no hook injected for command while strict enabled: %s", cmd.String()) 91 | } 92 | 93 | // Default (no override) is to succeed and advance to the default. 94 | if !ok { 95 | return mustDefault(action), nil 96 | } 97 | 98 | if exp.success { 99 | return exp.ns, nil 100 | } else { 101 | // TODO: Allow actual errors to be injected? 102 | return api.NsUnknown, errors.New("injected error") 103 | } 104 | } 105 | 106 | // Default resulting state of each action. Note that we don't validate that the 107 | // fake remote transition at all, because real nodes (with rangelets) can assume 108 | // whatever state they like. 109 | var defaults = map[api.Action]api.RemoteState{ 110 | api.Prepare: api.NsInactive, 111 | api.Activate: api.NsActive, 112 | api.Deactivate: api.NsInactive, 113 | api.Drop: api.NsNotFound, 114 | } 115 | 116 | func mustDefault(action api.Action) api.RemoteState { 117 | s, ok := defaults[action] 118 | if !ok { 119 | panic(fmt.Sprintf("no default state for action: %s", action)) 120 | } 121 | 122 | return s 123 | } 124 | 125 | // TODO: Make private once orch tests fixed. 126 | type Inject struct { 127 | success bool 128 | ns api.RemoteState 129 | } 130 | 131 | func (ij *Inject) Success() *Inject { 132 | ij.success = true 133 | return ij 134 | } 135 | 136 | func (ij *Inject) Failure() *Inject { 137 | ij.success = false 138 | return ij 139 | } 140 | 141 | func (ij *Inject) Response(ns api.RemoteState) *Inject { 142 | ij.ns = ns 143 | return ij 144 | } 145 | 146 | func (a *Actuator) Inject(nID api.NodeID, rID api.RangeID, act api.Action) *Inject { 147 | cmd := api.Command{ 148 | RangeIdent: rID, 149 | NodeIdent: nID, 150 | Action: act, 151 | } 152 | 153 | exp := &Inject{ 154 | success: true, 155 | ns: api.NsUnknown, 156 | } 157 | 158 | a.mu.Lock() 159 | a.injects[cmd] = exp 160 | a.mu.Unlock() 161 | 162 | return exp 163 | } 164 | 165 | // Unject removes a hook. 166 | func (a *Actuator) Unject(ij *Inject) { 167 | a.mu.Lock() 168 | defer a.mu.Unlock() 169 | 170 | for k, v := range a.injects { 171 | if ij == v { 172 | delete(a.injects, k) 173 | return 174 | } 175 | } 176 | 177 | panic(fmt.Sprintf("unknown inject: %v", ij)) 178 | } 179 | 180 | func (a *Actuator) Commands() string { 181 | a.mu.Lock() 182 | cmds := a.commands 183 | a.mu.Unlock() 184 | 185 | // Sort them into constant order. 186 | sort.Slice(cmds, func(i, j int) bool { 187 | return cmds[i].Less(cmds[j]) 188 | }) 189 | 190 | // Cast to strings. 191 | strs := make([]string, len(cmds)) 192 | for i := range cmds { 193 | strs[i] = cmds[i].String() 194 | } 195 | 196 | // Return a single string. 197 | return strings.Join(strs, ", ") 198 | } 199 | -------------------------------------------------------------------------------- /pkg/keyspace/x_test.go: -------------------------------------------------------------------------------- 1 | package keyspace 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/adammck/ranger/pkg/api" 7 | "github.com/adammck/ranger/pkg/ranje" 8 | ) 9 | 10 | func testFixtureR3(t *testing.T) *Keyspace { 11 | p := true 12 | 13 | // ┌─────┐ 14 | // ┌─────│ 1 o │─────┐ 15 | // │ | ... | │ 16 | // │ └─────┘ │ 17 | // ▼ ▼ 18 | // ┌─────┐ ┌─────┐ 19 | // ┌─│ 2 o │─┐ ┌─│ 3 s │─┐ 20 | // │ | ... | │ │ | iia | │ 21 | // │ └─────┘ │ │ └─────┘ │ 22 | // ▼ ▼ ▼ ▼ 23 | // ┌─────┐ ┌─────┐ ┌─────┐ ┌─────┐ 24 | // │ 4 j │ │ 5 j │ │ 6 a │ │ 7 a │ 25 | // │ iaa │ │ iia │ │ aai │ │ aii │ 26 | // └─────┘ └─────┘ └─────┘ └─────┘ 27 | // │ │ 28 | // └────┬────┘ 29 | // ▼ 30 | // ┌─────┐ 31 | // │ 8 a │ 32 | // │ aii │ 33 | // └─────┘ 34 | // 35 | // R1 obsolete (was split into R2, R3 at ccc) 36 | // R2 obsolete (was split into R4, R5 at bbb) 37 | // R3 splitting into R6, R7 at ddd 38 | // R4 joining with R5 into R8 39 | // R5 joining with R4 into R8 40 | // R6 active (splitting from R3) 41 | // R7 active (splitting from R7) 42 | // R8 active (joining from R4, R5) 43 | 44 | r1 := &ranje.Range{ 45 | State: api.RsObsolete, 46 | Children: []api.RangeID{2, 3}, 47 | Meta: api.Meta{Ident: 1, Start: api.ZeroKey, End: api.ZeroKey}, 48 | Placements: []*ranje.Placement{}, 49 | } 50 | 51 | r2 := &ranje.Range{ 52 | State: api.RsObsolete, 53 | Parents: []api.RangeID{1}, 54 | Children: []api.RangeID{4, 5}, 55 | Meta: api.Meta{Ident: 2, End: api.Key("ccc")}, 56 | Placements: []*ranje.Placement{}, 57 | } 58 | 59 | r3 := &ranje.Range{ 60 | State: api.RsSubsuming, 61 | Parents: []api.RangeID{1}, 62 | Children: []api.RangeID{6, 7}, 63 | Meta: api.Meta{Ident: 3, Start: api.Key("ccc")}, 64 | } 65 | if p { 66 | r3.Placements = []*ranje.Placement{ 67 | {NodeID: "aaa", StateCurrent: api.PsActive, StateDesired: api.PsActive}, 68 | {NodeID: "bbb", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 69 | {NodeID: "ccc", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 70 | } 71 | } 72 | 73 | r4 := &ranje.Range{ 74 | State: api.RsSubsuming, 75 | Parents: []api.RangeID{2}, 76 | Children: []api.RangeID{8}, 77 | Meta: api.Meta{Ident: 4, End: api.Key("bbb")}, 78 | } 79 | if p { 80 | r4.Placements = []*ranje.Placement{ 81 | {NodeID: "ddd", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 82 | {NodeID: "eee", StateCurrent: api.PsActive, StateDesired: api.PsActive}, 83 | {NodeID: "fff", StateCurrent: api.PsActive, StateDesired: api.PsActive}, 84 | } 85 | } 86 | 87 | r5 := &ranje.Range{ 88 | State: api.RsSubsuming, 89 | Parents: []api.RangeID{2}, 90 | Children: []api.RangeID{8}, 91 | Meta: api.Meta{Ident: 5, Start: api.Key("bbb"), End: api.Key("ccc")}, 92 | } 93 | if p { 94 | r5.Placements = []*ranje.Placement{ 95 | {NodeID: "ggg", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 96 | {NodeID: "hhh", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 97 | {NodeID: "iii", StateCurrent: api.PsActive, StateDesired: api.PsActive}, 98 | } 99 | } 100 | 101 | r6 := &ranje.Range{ 102 | State: api.RsActive, 103 | Parents: []api.RangeID{3}, 104 | Children: []api.RangeID{}, 105 | Meta: api.Meta{Ident: 6, Start: api.Key("ccc"), End: api.Key("ddd")}, 106 | } 107 | if p { 108 | r6.Placements = []*ranje.Placement{ 109 | {NodeID: "jjj", StateCurrent: api.PsActive, StateDesired: api.PsActive}, 110 | {NodeID: "kkk", StateCurrent: api.PsActive, StateDesired: api.PsActive}, 111 | {NodeID: "lll", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 112 | } 113 | } 114 | 115 | r7 := &ranje.Range{ 116 | State: api.RsActive, 117 | Parents: []api.RangeID{3}, 118 | Children: []api.RangeID{}, 119 | Meta: api.Meta{Ident: 7, Start: api.Key("ddd")}, 120 | } 121 | if p { 122 | r7.Placements = []*ranje.Placement{ 123 | {NodeID: "mmm", StateCurrent: api.PsActive, StateDesired: api.PsActive}, 124 | {NodeID: "nnn", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 125 | {NodeID: "ooo", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 126 | } 127 | } 128 | 129 | r8 := &ranje.Range{ 130 | State: api.RsActive, 131 | Parents: []api.RangeID{4, 5}, 132 | Children: []api.RangeID{}, 133 | Meta: api.Meta{Ident: 8, End: api.Key("ccc")}, 134 | } 135 | if p { 136 | r8.Placements = []*ranje.Placement{ 137 | {NodeID: "mmm", StateCurrent: api.PsActive, StateDesired: api.PsActive}, 138 | {NodeID: "nnn", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 139 | {NodeID: "ooo", StateCurrent: api.PsInactive, StateDesired: api.PsInactive}, 140 | } 141 | } 142 | 143 | return makeKeyspace(t, r1, r2, r3, r4, r5, r6, r7, r8) 144 | } 145 | 146 | func makeKeyspace(t *testing.T, ranges ...*ranje.Range) *Keyspace { 147 | pers := &FakePersister{ranges: ranges} 148 | ks, err := New(pers, ranje.R1) 149 | if err != nil { 150 | t.Fatalf("unexpected failure making keyspace: %v", err) 151 | return nil // unreachable 152 | } 153 | 154 | return ks 155 | } 156 | 157 | // mustGetRange returns a range from the given keyspace or fails the test. 158 | // TODO: This is pasted from orchestrator_test; deduplicate it. 159 | func mustGetRange(t *testing.T, ks *Keyspace, rID int) *ranje.Range { 160 | r, err := ks.GetRange(api.RangeID(rID)) 161 | if err != nil { 162 | t.Fatalf("ks.GetRange(%d): %v", rID, err) 163 | return nil // unreachable 164 | } 165 | return r 166 | } 167 | 168 | // rangeGetter partially applies mustGetRange. 169 | func rangeGetter(t *testing.T, ks *Keyspace) func(rID int) *ranje.Range { 170 | return func(rID int) *ranje.Range { 171 | return mustGetRange(t, ks, rID) 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /pkg/roster/node.go: -------------------------------------------------------------------------------- 1 | package roster 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "strings" 7 | "sync" 8 | "time" 9 | 10 | "github.com/adammck/ranger/pkg/api" 11 | pb "github.com/adammck/ranger/pkg/proto/gen" 12 | "google.golang.org/grpc" 13 | ) 14 | 15 | type PlacementFailure struct { 16 | rID api.RangeID 17 | when time.Time 18 | } 19 | 20 | type Node struct { 21 | Remote api.Remote 22 | 23 | // when was this created? needed to drop nodes which never connect. 24 | init time.Time 25 | 26 | // When this node was last seen in service discovery. Doesn't necessarily 27 | // mean that it's actually alive, though. 28 | whenLastSeen time.Time 29 | 30 | // When this node was last successfully whenLastProbed. This means that it's 31 | // actually up and healthy enough to respond. 32 | whenLastProbed time.Time 33 | 34 | // Keep track of when placements are attempted but fail, so that we can try 35 | // placement on a different node rather than the same one again. Note that 36 | // this is (currently) volatile, and is forgotten when the controller restarts. 37 | placementFailures []PlacementFailure 38 | muPF sync.RWMutex 39 | 40 | // The gRPC connection to the actual remote node. 41 | conn *grpc.ClientConn 42 | Client pb.NodeClient 43 | 44 | // Populated by probeOne 45 | wantDrain bool 46 | ranges map[api.RangeID]*api.RangeInfo 47 | muRanges sync.RWMutex 48 | } 49 | 50 | func NewNode(remote api.Remote, conn *grpc.ClientConn) *Node { 51 | return &Node{ 52 | Remote: remote, 53 | init: time.Now(), 54 | whenLastSeen: time.Time{}, // never 55 | whenLastProbed: time.Time{}, // never 56 | placementFailures: []PlacementFailure{}, 57 | conn: conn, 58 | Client: pb.NewNodeClient(conn), 59 | ranges: make(map[api.RangeID]*api.RangeInfo), 60 | } 61 | } 62 | 63 | func (n *Node) UpdateRangeInfo(ri *api.RangeInfo) { 64 | n.muRanges.Lock() 65 | defer n.muRanges.Unlock() 66 | n.ranges[ri.Meta.Ident] = ri 67 | } 68 | 69 | func (n *Node) UpdateRangeState(rID api.RangeID, s api.RemoteState) error { 70 | n.muRanges.Lock() 71 | defer n.muRanges.Unlock() 72 | 73 | // Forget the range. If the range is not in the map, that's probably a race 74 | // condition between an RPC and a probe. That shouldn't happen, but is a 75 | // no-op anyway, so let's ignore it. 76 | if s == api.NsNotFound { 77 | delete(n.ranges, rID) 78 | return nil 79 | } 80 | 81 | ri, ok := n.ranges[rID] 82 | if ok { 83 | ri.State = s 84 | return nil 85 | } 86 | 87 | return fmt.Errorf( 88 | "missing from range cache: nID=%v, rID=%v", 89 | n.Ident(), rID) 90 | } 91 | 92 | // TODO: This is only used by tests. Maybe move it there? 93 | func (n *Node) TestString() string { 94 | n.muRanges.RLock() 95 | defer n.muRanges.RUnlock() 96 | 97 | rIDs := []api.RangeID{} 98 | for rID := range n.ranges { 99 | rIDs = append(rIDs, rID) 100 | } 101 | 102 | // Sort by (numeric) range ID to make output stable. 103 | // (Unstable sort is fine, because range IDs are unique.) 104 | sort.Slice(rIDs, func(i, j int) bool { 105 | return uint64(rIDs[i]) < uint64(rIDs[j]) 106 | }) 107 | 108 | s := make([]string, len(rIDs)) 109 | for i, rID := range rIDs { 110 | ri := n.ranges[rID] 111 | s[i] = fmt.Sprintf("%s:%s", ri.Meta.Ident, ri.State) 112 | } 113 | 114 | return fmt.Sprintf("{%s [%s]}", n.Ident(), strings.Join(s, " ")) 115 | } 116 | 117 | func (n *Node) Get(rangeID api.RangeID) (api.RangeInfo, bool) { 118 | n.muRanges.RLock() 119 | defer n.muRanges.RUnlock() 120 | 121 | ri, ok := n.ranges[rangeID] 122 | if !ok { 123 | return api.RangeInfo{}, false 124 | } 125 | 126 | return *ri, true 127 | } 128 | 129 | func (n *Node) Ident() api.NodeID { 130 | return n.Remote.NodeID() 131 | } 132 | 133 | func (n *Node) Addr() string { 134 | return n.Remote.Addr() 135 | } 136 | 137 | func (n *Node) String() string { 138 | return fmt.Sprintf("N{%s}", n.Ident()) 139 | } 140 | 141 | func (n *Node) IsGoneFromServiceDiscovery(now time.Time) bool { 142 | return n.whenLastSeen.Before(now.Add(-10 * time.Second)) 143 | } 144 | 145 | // IsMissing returns true if this node hasn't responded to a probe in long 146 | // enough that we think it's dead, and should move its ranges elsewhere. 147 | func (n *Node) IsMissing(expireDuration time.Duration, now time.Time) bool { 148 | return (!n.whenLastProbed.IsZero()) && n.whenLastProbed.Before(now.Add(-expireDuration)) 149 | } 150 | 151 | // Utilization returns a uint in [0, 255], indicating how busy this node is. 152 | // Ranges should generally be placed on nodes with lower utilization. 153 | func (n *Node) Utilization() uint8 { 154 | n.muRanges.RLock() 155 | defer n.muRanges.RUnlock() 156 | 157 | l := len(n.ranges) 158 | if l > 255 { 159 | return 255 160 | } 161 | 162 | return uint8(l) // lol 163 | } 164 | 165 | func (n *Node) WantDrain() bool { 166 | // TODO: Use a differet lock for this! 167 | n.muRanges.RLock() 168 | defer n.muRanges.RUnlock() 169 | return n.wantDrain 170 | } 171 | 172 | // HasRange returns whether we think this node has the given range. 173 | func (n *Node) HasRange(rID api.RangeID) bool { 174 | n.muRanges.RLock() 175 | defer n.muRanges.RUnlock() 176 | ri, ok := n.ranges[rID] 177 | 178 | // Note that if we have an entry for the range, but it's NsNotFound, that 179 | // means that the node told us (in response to a command RPC) that it does 180 | // NOT have that range. I don't remember why we do that as opposed to clear 181 | // the range state. 182 | // TODO: Find out why and update this comment. Might be obsolete. 183 | 184 | return ok && !(ri.State == api.NsNotFound) 185 | } 186 | 187 | func (n *Node) PlacementFailed(rID api.RangeID, t time.Time) { 188 | n.muPF.Lock() 189 | defer n.muPF.Unlock() 190 | n.placementFailures = append(n.placementFailures, PlacementFailure{rID: rID, when: t}) 191 | } 192 | 193 | func (n *Node) PlacementFailures(rID api.RangeID, after time.Time) int { 194 | n.muPF.RLock() 195 | defer n.muPF.RUnlock() 196 | 197 | c := 0 198 | for _, pf := range n.placementFailures { 199 | if rID != api.ZeroRange && pf.rID != rID { 200 | continue 201 | } 202 | if pf.when.Before(after) { 203 | continue 204 | } 205 | 206 | c += 1 207 | } 208 | 209 | return c 210 | } 211 | -------------------------------------------------------------------------------- /cmd/dumbbal/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "os" 8 | "sort" 9 | "sync" 10 | "time" 11 | 12 | pb "github.com/adammck/ranger/pkg/proto/gen" 13 | "google.golang.org/grpc" 14 | "google.golang.org/grpc/credentials/insecure" 15 | ) 16 | 17 | func main() { 18 | w := flag.CommandLine.Output() 19 | 20 | addr := flag.String("addr", "localhost:5000", "controller address") 21 | dryRun := flag.Bool("dry-run", false, "just print operations") 22 | force := flag.Bool("force", false, "actually run operations") 23 | num := flag.Uint("num", 10, "maximum number of operations to perform") 24 | splitScore := flag.Uint64("split", 100, "score threshold to split ranges") 25 | joinScore := flag.Uint64("join", 20, "score threshold to join adjacent ranges") 26 | flag.Parse() 27 | 28 | if !*dryRun && !*force { 29 | fmt.Fprint(w, "Error: either -dry-run or -force is required\n") 30 | os.Exit(1) 31 | } 32 | 33 | // TODO: Catch signals for cancellation. 34 | ctx := context.Background() 35 | 36 | // -- Create a debug client (like rangerctl) 37 | 38 | ctxDial, cancel := context.WithTimeout(ctx, 1*time.Second) 39 | defer cancel() 40 | 41 | conn, err := grpc.DialContext(ctxDial, *addr, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) 42 | if err != nil { 43 | fmt.Fprintf(w, "Error dialing controller: %v\n", err) 44 | os.Exit(1) 45 | } 46 | 47 | client := pb.NewDebugClient(conn) 48 | 49 | // -- Fetch the current state of all the ranges 50 | 51 | ctxRL, cancel := context.WithTimeout(ctx, 10*time.Second) 52 | defer cancel() 53 | 54 | req := &pb.RangesListRequest{} 55 | 56 | res, err := client.RangesList(ctxRL, req) 57 | 58 | if err != nil { 59 | fmt.Fprintf(w, "Debug.RangesList returned: %v\n", err) 60 | os.Exit(1) 61 | } 62 | 63 | // -- Build list of ranges to examine 64 | 65 | type Placement struct { 66 | index int 67 | info *pb.LoadInfo 68 | } 69 | 70 | type Range struct { 71 | rID uint64 72 | placements []Placement 73 | } 74 | 75 | ranges := []Range{} 76 | 77 | for _, r := range res.Ranges { 78 | 79 | // Ignore non-active ranges (probably already being split/joined). 80 | if r.State != pb.RangeState_RS_ACTIVE { 81 | continue 82 | } 83 | 84 | placements := []Placement{} 85 | for i, p := range r.Placements { 86 | if p.Placement.State == pb.PlacementState_PS_ACTIVE { 87 | placements = append(placements, Placement{ 88 | index: i, 89 | info: p.RangeInfo.Info, 90 | }) 91 | } 92 | } 93 | 94 | // Ignore ranges with no active placements. 95 | if len(placements) == 0 { 96 | continue 97 | } 98 | 99 | ranges = append(ranges, Range{ 100 | rID: r.Meta.Ident, 101 | placements: placements, 102 | }) 103 | } 104 | 105 | // -- Score each range 106 | 107 | type RangeWithScore struct { 108 | rID uint64 109 | score uint64 110 | split string 111 | } 112 | 113 | scores := make([]RangeWithScore, len(ranges)) 114 | 115 | for i, r := range ranges { 116 | var maxScore uint64 117 | var maxSplit string 118 | 119 | for _, p := range r.placements { 120 | s := p.info.Keys 121 | if s > maxScore { 122 | maxScore = s 123 | 124 | if len(p.info.Splits) > 0 { 125 | maxSplit = p.info.Splits[0] 126 | } 127 | } 128 | } 129 | 130 | scores[i] = RangeWithScore{ 131 | rID: r.rID, 132 | score: maxScore, 133 | split: maxSplit, 134 | } 135 | } 136 | 137 | // -- Find any ranges higher than the threshold, having splits, in descending order 138 | 139 | splits := []int{} 140 | 141 | for i, r := range scores { 142 | if r.split == "" { 143 | continue 144 | } 145 | if r.score > *splitScore { 146 | splits = append(splits, i) 147 | } 148 | } 149 | 150 | sort.Slice(splits, func(i, j int) bool { 151 | return scores[i].score > scores[j].score 152 | }) 153 | 154 | // -- Send RPCs (in parallel) to split each one 155 | 156 | var ops uint 157 | orchClient := pb.NewOrchestratorClient(conn) 158 | ctxSp, cancel := context.WithTimeout(ctx, 30*time.Second) 159 | wg := sync.WaitGroup{} 160 | 161 | for _, s := range splits { 162 | if *dryRun { 163 | fmt.Printf("Would split: range %d (score: %d) at %q\n", scores[s].rID, scores[s].score, scores[s].split) 164 | continue 165 | } 166 | if ops >= *num { 167 | continue 168 | } 169 | 170 | s := s 171 | ops += 1 172 | wg.Add(1) 173 | go func() { 174 | defer cancel() 175 | 176 | req := &pb.SplitRequest{ 177 | Range: scores[s].rID, 178 | Boundary: []byte(scores[s].split), 179 | } 180 | 181 | fmt.Printf("Splitting: range %d (score: %d)\n", scores[s].rID, scores[s].score) 182 | _, err := orchClient.Split(ctxSp, req) 183 | 184 | if err != nil { 185 | fmt.Printf("Error splitting range %d: %v\n", scores[s].rID, err) 186 | wg.Done() 187 | return 188 | } 189 | 190 | wg.Done() 191 | }() 192 | } 193 | 194 | wg.Wait() 195 | 196 | // -- Find any adjacent range pairs lower than the threshold, in ascending order 197 | 198 | joins := []int{} 199 | 200 | for i := 0; i < len(scores)-1; i++ { 201 | if scores[i].score+scores[i+1].score < *joinScore { 202 | joins = append(joins, i) 203 | } 204 | } 205 | 206 | sort.Slice(joins, func(i, j int) bool { 207 | return (scores[i].score + scores[i+1].score) < (scores[j].score + scores[j+1].score) // uhhh 208 | }) 209 | 210 | // -- Send RPCs (in parallel) to join each pair 211 | 212 | ctxJo, cancel := context.WithTimeout(ctx, 30*time.Second) 213 | 214 | for _, j := range joins { 215 | left := scores[j].rID 216 | right := scores[j+1].rID 217 | score := scores[j].score + scores[j].score 218 | if *dryRun { 219 | fmt.Printf("Would join: range %d, range %d (combined score: %d)\n", left, right, score) 220 | continue 221 | } 222 | if ops >= *num { 223 | continue 224 | } 225 | 226 | ops += 1 227 | wg.Add(1) 228 | go func() { 229 | defer cancel() 230 | 231 | req := &pb.JoinRequest{ 232 | RangeLeft: left, 233 | RangeRight: right, 234 | } 235 | 236 | fmt.Printf("Joining: range %d, range %d (combined score: %d)\n", left, right, score) 237 | _, err := orchClient.Join(ctxJo, req) 238 | 239 | if err != nil { 240 | fmt.Printf("Error joining ranges %d and %d: %v\n", left, right, err) 241 | wg.Done() 242 | return 243 | } 244 | 245 | wg.Done() 246 | }() 247 | } 248 | 249 | wg.Wait() 250 | 251 | // -- Exit 252 | } 253 | -------------------------------------------------------------------------------- /pkg/actuator/actuator.go: -------------------------------------------------------------------------------- 1 | package actuator 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/adammck/ranger/pkg/api" 9 | "github.com/adammck/ranger/pkg/keyspace" 10 | "github.com/adammck/ranger/pkg/ranje" 11 | "github.com/adammck/ranger/pkg/roster" 12 | ) 13 | 14 | type Impl interface { 15 | // TODO: This can probably be simplified further. Ideally just the command, 16 | // and the implementation can embed a keyspace or roster to look the other 17 | // stuff up if they want. 18 | Command(cmd api.Command, p *ranje.Placement, n *roster.Node) error 19 | } 20 | 21 | type Actuator struct { 22 | ks *keyspace.Keyspace 23 | ros roster.NodeGetter 24 | Impl Impl // TODO: Make private once orch tests fixed. 25 | 26 | // TODO: Move this into the Tick method; just pass it along. 27 | wg sync.WaitGroup 28 | 29 | // RPCs which have been sent (via orch.RPC) but not yet completed. Used to 30 | // avoid sending the same RPC redundantly every single tick. (Though RPCs 31 | // *can* be re-sent an arbitrary number of times. Rangelet will dedup them.) 32 | inFlight map[api.Command]struct{} 33 | inFlightMu sync.Mutex 34 | 35 | // TODO: Use api.Command as the key. 36 | // TODO: Trim contents periodically. 37 | failures map[api.Command][]time.Time 38 | failuresMu sync.RWMutex 39 | 40 | backoff time.Duration 41 | } 42 | 43 | func New(ks *keyspace.Keyspace, ros *roster.Roster, backoff time.Duration, impl Impl) *Actuator { 44 | return &Actuator{ 45 | ks: ks, 46 | ros: ros, 47 | Impl: impl, 48 | inFlight: map[api.Command]struct{}{}, 49 | failures: map[api.Command][]time.Time{}, 50 | backoff: backoff, 51 | } 52 | } 53 | 54 | func (a *Actuator) Run(t *time.Ticker) { 55 | // TODO: Replace this with something reactive; maybe chan from keyspace? 56 | for ; true; <-t.C { 57 | a.Tick() 58 | } 59 | } 60 | 61 | // Tick checks every placement, and actuates it (e.g. sends an RPC) if the 62 | // current state is not the desired state. 63 | func (a *Actuator) Tick() { 64 | rs, unlock := a.ks.Ranges() 65 | defer unlock() 66 | 67 | for _, r := range rs { 68 | for _, p := range r.Placements { 69 | a.consider(p) 70 | } 71 | } 72 | } 73 | 74 | func (a *Actuator) Wait() { 75 | a.wg.Wait() 76 | } 77 | 78 | var maxFailures = map[api.Action]int{ 79 | api.Prepare: 3, 80 | api.Deactivate: 3, 81 | api.Activate: 3, 82 | api.Drop: 30, // Not quite forever 83 | } 84 | 85 | func (a *Actuator) consider(p *ranje.Placement) { 86 | 87 | // nothing to do 88 | if p.StateDesired == p.StateCurrent { 89 | return 90 | } 91 | 92 | // unknown desired state 93 | if p.StateDesired == api.PsUnknown { 94 | return 95 | } 96 | 97 | action, err := actuation(p) 98 | if err != nil { 99 | // TODO: Should we return an error instead? What could the caller do with it? 100 | return 101 | } 102 | 103 | // error getting node (invalid?) 104 | n, err := a.ros.NodeByIdent(p.NodeID) 105 | if err != nil { 106 | return 107 | } 108 | 109 | // command previously failed 110 | if p.Failed(action) { 111 | return 112 | } 113 | 114 | cmd := api.Command{ 115 | RangeIdent: p.Range().Meta.Ident, 116 | NodeIdent: n.Remote.NodeID(), 117 | Action: action, 118 | } 119 | 120 | // backing off 121 | // TODO: Use a proper increasing backoff and jitter. 122 | // TODO: Also use clockwork to make this testable. 123 | if a.backoff > 0 && a.LastFailure(cmd).After(time.Now().Add(-a.backoff)) { 124 | return 125 | } 126 | 127 | a.Exec(cmd, p, n) 128 | } 129 | 130 | func (a *Actuator) Exec(cmd api.Command, p *ranje.Placement, n *roster.Node) { 131 | a.inFlightMu.Lock() 132 | _, ok := a.inFlight[cmd] 133 | if !ok { 134 | a.inFlight[cmd] = struct{}{} 135 | } 136 | a.inFlightMu.Unlock() 137 | 138 | // Same command is currently in flight. This is a dupe, so drop it. 139 | if ok { 140 | return 141 | } 142 | 143 | a.wg.Add(1) 144 | 145 | go func() { 146 | 147 | // TODO: Inject some client-side chaos here, too. RPCs complete very 148 | // quickly locally, which doesn't test our in-flight thing well. 149 | 150 | err := a.Impl.Command(cmd, p, n) 151 | if err != nil { 152 | a.incrementError(cmd, p, n) 153 | } 154 | 155 | a.inFlightMu.Lock() 156 | if _, ok := a.inFlight[cmd]; !ok { 157 | // Critical this works, because could drop all RPCs. Note that we 158 | // don't release the lock, so no more RPCs even if the panic is 159 | // caught, which it shouldn't be. 160 | panic(fmt.Sprintf("no record of in-flight command: %s", cmd)) 161 | } 162 | 163 | delete(a.inFlight, cmd) 164 | a.inFlightMu.Unlock() 165 | 166 | a.wg.Done() 167 | }() 168 | } 169 | 170 | // TODO: Move this out to some outer actuator. 171 | type transitions struct { 172 | from api.PlacementState 173 | to api.PlacementState 174 | act api.Action 175 | } 176 | 177 | // TODO: Move this out to some outer actuator. 178 | // Somewhat duplicated from placement_state.go. 179 | var actuations = []transitions{ 180 | {api.PsPending, api.PsInactive, api.Prepare}, 181 | {api.PsInactive, api.PsActive, api.Activate}, 182 | {api.PsActive, api.PsInactive, api.Deactivate}, 183 | {api.PsInactive, api.PsDropped, api.Drop}, 184 | } 185 | 186 | // TODO: Move this out to some outer actuator. 187 | func actuation(p *ranje.Placement) (api.Action, error) { 188 | for _, aa := range actuations { 189 | if p.StateCurrent == aa.from && p.StateDesired == aa.to { 190 | return aa.act, nil 191 | } 192 | } 193 | 194 | return api.NoAction, fmt.Errorf( 195 | "no actuation: from=%s, to:%s", 196 | p.StateCurrent.String(), 197 | p.StateDesired.String()) 198 | } 199 | 200 | func (a *Actuator) incrementError(cmd api.Command, p *ranje.Placement, n *roster.Node) { 201 | f := 0 202 | func() { 203 | a.failuresMu.Lock() 204 | defer a.failuresMu.Unlock() 205 | 206 | t, ok := a.failures[cmd] 207 | if !ok { 208 | t = []time.Time{} 209 | } 210 | 211 | t = append(t, time.Now()) 212 | a.failures[cmd] = t 213 | 214 | f = len(t) 215 | }() 216 | 217 | if f >= maxFailures[cmd.Action] { 218 | delete(a.failures, cmd) 219 | p.SetFailed(cmd.Action, true) 220 | 221 | // TODO: Can this go somewhere else? The roster needs to know that the 222 | // failure happened so it can avoid placing ranges on the node. 223 | if cmd.Action == api.Prepare || cmd.Action == api.Activate { 224 | n.PlacementFailed(p.Range().Meta.Ident, time.Now()) 225 | } 226 | } 227 | } 228 | 229 | func (a *Actuator) LastFailure(cmd api.Command) time.Time { 230 | a.failuresMu.RLock() 231 | defer a.failuresMu.RUnlock() 232 | 233 | t, ok := a.failures[cmd] 234 | if !ok { 235 | return time.Time{} 236 | } 237 | 238 | return t[len(t)-1] 239 | } 240 | -------------------------------------------------------------------------------- /examples/kv/proto/gen/kv_grpc.pb.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-go-grpc. DO NOT EDIT. 2 | // versions: 3 | // - protoc-gen-go-grpc v1.2.0 4 | // - protoc v3.21.4 5 | // source: kv.proto 6 | 7 | package proto 8 | 9 | import ( 10 | context "context" 11 | grpc "google.golang.org/grpc" 12 | codes "google.golang.org/grpc/codes" 13 | status "google.golang.org/grpc/status" 14 | ) 15 | 16 | // This is a compile-time assertion to ensure that this generated file 17 | // is compatible with the grpc package it is being compiled against. 18 | // Requires gRPC-Go v1.32.0 or later. 19 | const _ = grpc.SupportPackageIsVersion7 20 | 21 | // KVClient is the client API for KV service. 22 | // 23 | // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. 24 | type KVClient interface { 25 | Get(ctx context.Context, in *GetRequest, opts ...grpc.CallOption) (*GetResponse, error) 26 | Put(ctx context.Context, in *PutRequest, opts ...grpc.CallOption) (*PutResponse, error) 27 | // Only used for nodes to transfer state. 28 | // Move this to a separate service so the proxy doesn't have to implement it. 29 | Dump(ctx context.Context, in *DumpRequest, opts ...grpc.CallOption) (*DumpResponse, error) 30 | } 31 | 32 | type kVClient struct { 33 | cc grpc.ClientConnInterface 34 | } 35 | 36 | func NewKVClient(cc grpc.ClientConnInterface) KVClient { 37 | return &kVClient{cc} 38 | } 39 | 40 | func (c *kVClient) Get(ctx context.Context, in *GetRequest, opts ...grpc.CallOption) (*GetResponse, error) { 41 | out := new(GetResponse) 42 | err := c.cc.Invoke(ctx, "/kv.KV/Get", in, out, opts...) 43 | if err != nil { 44 | return nil, err 45 | } 46 | return out, nil 47 | } 48 | 49 | func (c *kVClient) Put(ctx context.Context, in *PutRequest, opts ...grpc.CallOption) (*PutResponse, error) { 50 | out := new(PutResponse) 51 | err := c.cc.Invoke(ctx, "/kv.KV/Put", in, out, opts...) 52 | if err != nil { 53 | return nil, err 54 | } 55 | return out, nil 56 | } 57 | 58 | func (c *kVClient) Dump(ctx context.Context, in *DumpRequest, opts ...grpc.CallOption) (*DumpResponse, error) { 59 | out := new(DumpResponse) 60 | err := c.cc.Invoke(ctx, "/kv.KV/Dump", in, out, opts...) 61 | if err != nil { 62 | return nil, err 63 | } 64 | return out, nil 65 | } 66 | 67 | // KVServer is the server API for KV service. 68 | // All implementations must embed UnimplementedKVServer 69 | // for forward compatibility 70 | type KVServer interface { 71 | Get(context.Context, *GetRequest) (*GetResponse, error) 72 | Put(context.Context, *PutRequest) (*PutResponse, error) 73 | // Only used for nodes to transfer state. 74 | // Move this to a separate service so the proxy doesn't have to implement it. 75 | Dump(context.Context, *DumpRequest) (*DumpResponse, error) 76 | mustEmbedUnimplementedKVServer() 77 | } 78 | 79 | // UnimplementedKVServer must be embedded to have forward compatible implementations. 80 | type UnimplementedKVServer struct { 81 | } 82 | 83 | func (UnimplementedKVServer) Get(context.Context, *GetRequest) (*GetResponse, error) { 84 | return nil, status.Errorf(codes.Unimplemented, "method Get not implemented") 85 | } 86 | func (UnimplementedKVServer) Put(context.Context, *PutRequest) (*PutResponse, error) { 87 | return nil, status.Errorf(codes.Unimplemented, "method Put not implemented") 88 | } 89 | func (UnimplementedKVServer) Dump(context.Context, *DumpRequest) (*DumpResponse, error) { 90 | return nil, status.Errorf(codes.Unimplemented, "method Dump not implemented") 91 | } 92 | func (UnimplementedKVServer) mustEmbedUnimplementedKVServer() {} 93 | 94 | // UnsafeKVServer may be embedded to opt out of forward compatibility for this service. 95 | // Use of this interface is not recommended, as added methods to KVServer will 96 | // result in compilation errors. 97 | type UnsafeKVServer interface { 98 | mustEmbedUnimplementedKVServer() 99 | } 100 | 101 | func RegisterKVServer(s grpc.ServiceRegistrar, srv KVServer) { 102 | s.RegisterService(&KV_ServiceDesc, srv) 103 | } 104 | 105 | func _KV_Get_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { 106 | in := new(GetRequest) 107 | if err := dec(in); err != nil { 108 | return nil, err 109 | } 110 | if interceptor == nil { 111 | return srv.(KVServer).Get(ctx, in) 112 | } 113 | info := &grpc.UnaryServerInfo{ 114 | Server: srv, 115 | FullMethod: "/kv.KV/Get", 116 | } 117 | handler := func(ctx context.Context, req interface{}) (interface{}, error) { 118 | return srv.(KVServer).Get(ctx, req.(*GetRequest)) 119 | } 120 | return interceptor(ctx, in, info, handler) 121 | } 122 | 123 | func _KV_Put_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { 124 | in := new(PutRequest) 125 | if err := dec(in); err != nil { 126 | return nil, err 127 | } 128 | if interceptor == nil { 129 | return srv.(KVServer).Put(ctx, in) 130 | } 131 | info := &grpc.UnaryServerInfo{ 132 | Server: srv, 133 | FullMethod: "/kv.KV/Put", 134 | } 135 | handler := func(ctx context.Context, req interface{}) (interface{}, error) { 136 | return srv.(KVServer).Put(ctx, req.(*PutRequest)) 137 | } 138 | return interceptor(ctx, in, info, handler) 139 | } 140 | 141 | func _KV_Dump_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { 142 | in := new(DumpRequest) 143 | if err := dec(in); err != nil { 144 | return nil, err 145 | } 146 | if interceptor == nil { 147 | return srv.(KVServer).Dump(ctx, in) 148 | } 149 | info := &grpc.UnaryServerInfo{ 150 | Server: srv, 151 | FullMethod: "/kv.KV/Dump", 152 | } 153 | handler := func(ctx context.Context, req interface{}) (interface{}, error) { 154 | return srv.(KVServer).Dump(ctx, req.(*DumpRequest)) 155 | } 156 | return interceptor(ctx, in, info, handler) 157 | } 158 | 159 | // KV_ServiceDesc is the grpc.ServiceDesc for KV service. 160 | // It's only intended for direct use with grpc.RegisterService, 161 | // and not to be introspected or modified (even as a copy) 162 | var KV_ServiceDesc = grpc.ServiceDesc{ 163 | ServiceName: "kv.KV", 164 | HandlerType: (*KVServer)(nil), 165 | Methods: []grpc.MethodDesc{ 166 | { 167 | MethodName: "Get", 168 | Handler: _KV_Get_Handler, 169 | }, 170 | { 171 | MethodName: "Put", 172 | Handler: _KV_Put_Handler, 173 | }, 174 | { 175 | MethodName: "Dump", 176 | Handler: _KV_Dump_Handler, 177 | }, 178 | }, 179 | Streams: []grpc.StreamDesc{}, 180 | Metadata: "kv.proto", 181 | } 182 | -------------------------------------------------------------------------------- /pkg/proto/gen/controller_grpc.pb.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-go-grpc. DO NOT EDIT. 2 | // versions: 3 | // - protoc-gen-go-grpc v1.3.0 4 | // - protoc v4.23.2 5 | // source: controller.proto 6 | 7 | package proto 8 | 9 | import ( 10 | context "context" 11 | grpc "google.golang.org/grpc" 12 | codes "google.golang.org/grpc/codes" 13 | status "google.golang.org/grpc/status" 14 | ) 15 | 16 | // This is a compile-time assertion to ensure that this generated file 17 | // is compatible with the grpc package it is being compiled against. 18 | // Requires gRPC-Go v1.32.0 or later. 19 | const _ = grpc.SupportPackageIsVersion7 20 | 21 | const ( 22 | Orchestrator_Move_FullMethodName = "/ranger.Orchestrator/Move" 23 | Orchestrator_Split_FullMethodName = "/ranger.Orchestrator/Split" 24 | Orchestrator_Join_FullMethodName = "/ranger.Orchestrator/Join" 25 | ) 26 | 27 | // OrchestratorClient is the client API for Orchestrator service. 28 | // 29 | // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. 30 | type OrchestratorClient interface { 31 | // Place a range on specific node, moving it from the node it is currently 32 | // placed on, if any. 33 | Move(ctx context.Context, in *MoveRequest, opts ...grpc.CallOption) (*MoveResponse, error) 34 | // Split a range in two. 35 | Split(ctx context.Context, in *SplitRequest, opts ...grpc.CallOption) (*SplitResponse, error) 36 | // Join two ranges into one. 37 | Join(ctx context.Context, in *JoinRequest, opts ...grpc.CallOption) (*JoinResponse, error) 38 | } 39 | 40 | type orchestratorClient struct { 41 | cc grpc.ClientConnInterface 42 | } 43 | 44 | func NewOrchestratorClient(cc grpc.ClientConnInterface) OrchestratorClient { 45 | return &orchestratorClient{cc} 46 | } 47 | 48 | func (c *orchestratorClient) Move(ctx context.Context, in *MoveRequest, opts ...grpc.CallOption) (*MoveResponse, error) { 49 | out := new(MoveResponse) 50 | err := c.cc.Invoke(ctx, Orchestrator_Move_FullMethodName, in, out, opts...) 51 | if err != nil { 52 | return nil, err 53 | } 54 | return out, nil 55 | } 56 | 57 | func (c *orchestratorClient) Split(ctx context.Context, in *SplitRequest, opts ...grpc.CallOption) (*SplitResponse, error) { 58 | out := new(SplitResponse) 59 | err := c.cc.Invoke(ctx, Orchestrator_Split_FullMethodName, in, out, opts...) 60 | if err != nil { 61 | return nil, err 62 | } 63 | return out, nil 64 | } 65 | 66 | func (c *orchestratorClient) Join(ctx context.Context, in *JoinRequest, opts ...grpc.CallOption) (*JoinResponse, error) { 67 | out := new(JoinResponse) 68 | err := c.cc.Invoke(ctx, Orchestrator_Join_FullMethodName, in, out, opts...) 69 | if err != nil { 70 | return nil, err 71 | } 72 | return out, nil 73 | } 74 | 75 | // OrchestratorServer is the server API for Orchestrator service. 76 | // All implementations must embed UnimplementedOrchestratorServer 77 | // for forward compatibility 78 | type OrchestratorServer interface { 79 | // Place a range on specific node, moving it from the node it is currently 80 | // placed on, if any. 81 | Move(context.Context, *MoveRequest) (*MoveResponse, error) 82 | // Split a range in two. 83 | Split(context.Context, *SplitRequest) (*SplitResponse, error) 84 | // Join two ranges into one. 85 | Join(context.Context, *JoinRequest) (*JoinResponse, error) 86 | mustEmbedUnimplementedOrchestratorServer() 87 | } 88 | 89 | // UnimplementedOrchestratorServer must be embedded to have forward compatible implementations. 90 | type UnimplementedOrchestratorServer struct { 91 | } 92 | 93 | func (UnimplementedOrchestratorServer) Move(context.Context, *MoveRequest) (*MoveResponse, error) { 94 | return nil, status.Errorf(codes.Unimplemented, "method Move not implemented") 95 | } 96 | func (UnimplementedOrchestratorServer) Split(context.Context, *SplitRequest) (*SplitResponse, error) { 97 | return nil, status.Errorf(codes.Unimplemented, "method Split not implemented") 98 | } 99 | func (UnimplementedOrchestratorServer) Join(context.Context, *JoinRequest) (*JoinResponse, error) { 100 | return nil, status.Errorf(codes.Unimplemented, "method Join not implemented") 101 | } 102 | func (UnimplementedOrchestratorServer) mustEmbedUnimplementedOrchestratorServer() {} 103 | 104 | // UnsafeOrchestratorServer may be embedded to opt out of forward compatibility for this service. 105 | // Use of this interface is not recommended, as added methods to OrchestratorServer will 106 | // result in compilation errors. 107 | type UnsafeOrchestratorServer interface { 108 | mustEmbedUnimplementedOrchestratorServer() 109 | } 110 | 111 | func RegisterOrchestratorServer(s grpc.ServiceRegistrar, srv OrchestratorServer) { 112 | s.RegisterService(&Orchestrator_ServiceDesc, srv) 113 | } 114 | 115 | func _Orchestrator_Move_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { 116 | in := new(MoveRequest) 117 | if err := dec(in); err != nil { 118 | return nil, err 119 | } 120 | if interceptor == nil { 121 | return srv.(OrchestratorServer).Move(ctx, in) 122 | } 123 | info := &grpc.UnaryServerInfo{ 124 | Server: srv, 125 | FullMethod: Orchestrator_Move_FullMethodName, 126 | } 127 | handler := func(ctx context.Context, req interface{}) (interface{}, error) { 128 | return srv.(OrchestratorServer).Move(ctx, req.(*MoveRequest)) 129 | } 130 | return interceptor(ctx, in, info, handler) 131 | } 132 | 133 | func _Orchestrator_Split_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { 134 | in := new(SplitRequest) 135 | if err := dec(in); err != nil { 136 | return nil, err 137 | } 138 | if interceptor == nil { 139 | return srv.(OrchestratorServer).Split(ctx, in) 140 | } 141 | info := &grpc.UnaryServerInfo{ 142 | Server: srv, 143 | FullMethod: Orchestrator_Split_FullMethodName, 144 | } 145 | handler := func(ctx context.Context, req interface{}) (interface{}, error) { 146 | return srv.(OrchestratorServer).Split(ctx, req.(*SplitRequest)) 147 | } 148 | return interceptor(ctx, in, info, handler) 149 | } 150 | 151 | func _Orchestrator_Join_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { 152 | in := new(JoinRequest) 153 | if err := dec(in); err != nil { 154 | return nil, err 155 | } 156 | if interceptor == nil { 157 | return srv.(OrchestratorServer).Join(ctx, in) 158 | } 159 | info := &grpc.UnaryServerInfo{ 160 | Server: srv, 161 | FullMethod: Orchestrator_Join_FullMethodName, 162 | } 163 | handler := func(ctx context.Context, req interface{}) (interface{}, error) { 164 | return srv.(OrchestratorServer).Join(ctx, req.(*JoinRequest)) 165 | } 166 | return interceptor(ctx, in, info, handler) 167 | } 168 | 169 | // Orchestrator_ServiceDesc is the grpc.ServiceDesc for Orchestrator service. 170 | // It's only intended for direct use with grpc.RegisterService, 171 | // and not to be introspected or modified (even as a copy) 172 | var Orchestrator_ServiceDesc = grpc.ServiceDesc{ 173 | ServiceName: "ranger.Orchestrator", 174 | HandlerType: (*OrchestratorServer)(nil), 175 | Methods: []grpc.MethodDesc{ 176 | { 177 | MethodName: "Move", 178 | Handler: _Orchestrator_Move_Handler, 179 | }, 180 | { 181 | MethodName: "Split", 182 | Handler: _Orchestrator_Split_Handler, 183 | }, 184 | { 185 | MethodName: "Join", 186 | Handler: _Orchestrator_Join_Handler, 187 | }, 188 | }, 189 | Streams: []grpc.StreamDesc{}, 190 | Metadata: "controller.proto", 191 | } 192 | -------------------------------------------------------------------------------- /pkg/rangelet/mirror/mirror.go: -------------------------------------------------------------------------------- 1 | // Package mirror provides a range assignment mirror, which can maintain a map 2 | // of all range assignments via the streaming Node.Ranges endpoint provided by 3 | // the rangelet. This is useful for proxies and clients wishing to forward 4 | // requests to the relevant node(s). 5 | // 6 | // Note that this interface is eventually consistent, in that the orchestrator 7 | // doesn't wait for clients to ack changes to the keyspace or anything like it. 8 | // This interface doesn't even care what the orchestrator or even keyspace say; 9 | // it simply reports what placements nodes report when probed or in response to 10 | // actuations. The clients' mirror will thus always be a bit out of date. 11 | package mirror 12 | 13 | import ( 14 | "context" 15 | "errors" 16 | "io" 17 | "log" 18 | "sync" 19 | 20 | "github.com/adammck/ranger/pkg/api" 21 | "github.com/adammck/ranger/pkg/discovery" 22 | "github.com/adammck/ranger/pkg/proto/conv" 23 | pb "github.com/adammck/ranger/pkg/proto/gen" 24 | "google.golang.org/grpc" 25 | ) 26 | 27 | // Result is returned by the Find method. Don't use it for anything else. 28 | type Result struct { 29 | RangeID api.RangeID 30 | Remote api.Remote 31 | State api.RemoteState 32 | } 33 | 34 | func (r *Result) NodeID() api.NodeID { 35 | return r.Remote.NodeID() 36 | } 37 | 38 | type Dialler func(context.Context, api.Remote) (*grpc.ClientConn, error) 39 | 40 | type Mirror struct { 41 | ctx context.Context 42 | disc discovery.Getter 43 | 44 | nodes map[api.NodeID]*node 45 | nodesMu sync.RWMutex 46 | 47 | // Dialler takes a remote and returns a gRPC client connection. This is only 48 | // parameterized for testing. 49 | dialler Dialler 50 | } 51 | 52 | type node struct { 53 | closer chan bool 54 | conn *grpc.ClientConn 55 | remote api.Remote 56 | ranges []api.RangeInfo 57 | rangesMu sync.RWMutex 58 | } 59 | 60 | func (n *node) stop() { 61 | close(n.closer) 62 | n.conn.Close() 63 | } 64 | 65 | func New(disc discovery.Discoverer) *Mirror { 66 | m := &Mirror{ 67 | ctx: context.Background(), 68 | nodes: map[api.NodeID]*node{}, 69 | } 70 | m.disc = disc.Discover("node", m.add, m.remove) 71 | return m 72 | } 73 | 74 | func (m *Mirror) WithDialler(d Dialler) *Mirror { 75 | m.dialler = d 76 | return m 77 | } 78 | 79 | func (m *Mirror) add(rem api.Remote) { 80 | log.Printf("Adding: %s", rem.NodeID()) 81 | 82 | conn, err := m.dialler(m.ctx, rem) 83 | if err != nil { 84 | log.Printf("Error dialling new remote: %v", err) 85 | return 86 | } 87 | 88 | n := &node{ 89 | closer: make(chan bool, 1), 90 | conn: conn, 91 | remote: rem, 92 | } 93 | 94 | m.nodesMu.Lock() 95 | m.nodes[rem.NodeID()] = n 96 | m.nodesMu.Unlock() 97 | 98 | go run(conn, n) 99 | } 100 | 101 | func (m *Mirror) remove(rem api.Remote) { 102 | nID := rem.NodeID() 103 | log.Printf("Removing: %s", nID) 104 | 105 | m.nodesMu.Lock() 106 | 107 | n, ok := m.nodes[nID] 108 | if !ok { 109 | m.nodesMu.Unlock() 110 | return 111 | } 112 | 113 | delete(m.nodes, nID) 114 | 115 | // Release and let closers happen outside the lock. 116 | m.nodesMu.Unlock() 117 | 118 | n.stop() 119 | } 120 | 121 | func (m *Mirror) Stop() error { 122 | 123 | err := m.disc.Stop() 124 | if err != nil { 125 | // Not ideal, but we still want to stop the nodes. 126 | log.Printf("Error stopping discovery getter: %s", err) 127 | } 128 | 129 | m.nodesMu.Lock() 130 | defer m.nodesMu.Unlock() 131 | 132 | // Call all closers concurrently. 133 | wg := sync.WaitGroup{} 134 | for k := range m.nodes { 135 | wg.Add(1) 136 | n := m.nodes[k] 137 | go func() { 138 | n.stop() 139 | wg.Done() 140 | }() 141 | } 142 | wg.Wait() 143 | 144 | m.nodes = map[api.NodeID]*node{} 145 | 146 | return nil 147 | } 148 | 149 | var ErrNoSuchClientConn = errors.New("no such connection") 150 | 151 | // Conn returns a connection to the given node ID. This is just a convenience 152 | // for callers; the mirror needs to have a connection to every node in order to 153 | // receive their range assignments, so callers may wish to reuse it to exchange 154 | // other RPCs rather than creating a new one. 155 | // 156 | // Note that because the connection is owned by the Mirror, may be closed while 157 | // the user is trying to use it. If that isn't acceptable, callers should manage 158 | // their own connections. 159 | func (m *Mirror) Conn(nID api.NodeID) (*grpc.ClientConn, bool) { 160 | m.nodesMu.RLock() 161 | n, ok := m.nodes[nID] 162 | m.nodesMu.RUnlock() 163 | return n.conn, ok 164 | } 165 | 166 | func (m *Mirror) Find(key api.Key, states ...api.RemoteState) []Result { 167 | results := []Result{} 168 | 169 | m.nodesMu.RLock() 170 | defer m.nodesMu.RUnlock() 171 | 172 | // look i'm in a hurry here okay 173 | for _, n := range m.nodes { 174 | n.rangesMu.RLock() 175 | defer n.rangesMu.RUnlock() 176 | 177 | for _, ri := range n.ranges { 178 | if ri.Meta.Contains(key) { 179 | 180 | // Skip if not in one of given states. 181 | if len(states) > 0 { 182 | ok := false 183 | for _, s := range states { 184 | if ri.State == s { 185 | ok = true 186 | break 187 | } 188 | } 189 | if !ok { 190 | continue 191 | } 192 | } 193 | 194 | results = append(results, Result{ 195 | RangeID: ri.Meta.Ident, 196 | Remote: n.remote, 197 | State: ri.State, 198 | }) 199 | } 200 | } 201 | } 202 | 203 | return results 204 | } 205 | 206 | func run(conn *grpc.ClientConn, node *node) { 207 | client := pb.NewNodeClient(conn) 208 | 209 | req := &pb.RangesRequest{} 210 | stream, err := client.Ranges(context.Background(), req) 211 | if err != nil { 212 | log.Printf("Error fetching ranges: %s", err) 213 | return 214 | } 215 | 216 | for { 217 | res, err := stream.Recv() 218 | if err == io.EOF { 219 | break 220 | } 221 | if err != nil { 222 | log.Printf("Error fetching ranges: %s", err) 223 | break 224 | } 225 | 226 | update(node, res) 227 | } 228 | } 229 | 230 | func update(n *node, res *pb.RangesResponse) { 231 | log.Printf("res: %v", res) 232 | 233 | meta, err := conv.MetaFromProto(res.Meta) 234 | if err != nil { 235 | // This should never happen. 236 | log.Printf("Error parsing Range Meta from proto: %s", err) 237 | return 238 | } 239 | 240 | state := conv.RemoteStateFromProto(res.State) 241 | if state == api.NsUnknown { 242 | // This should also never happen. 243 | log.Printf("Error updating range state: got NsUnknown for rid=%s", meta.Ident) 244 | return 245 | } 246 | 247 | // TODO: This is pretty coarse, maybe optimize. 248 | n.rangesMu.Lock() 249 | defer n.rangesMu.Unlock() 250 | 251 | // Find the range by Range ID, and update the state. Meta is immutable after 252 | // range construction, so assume it hasn't changed. God help us if it has. 253 | for i := range n.ranges { 254 | if n.ranges[i].Meta.Ident == meta.Ident { 255 | if state == api.NsNotFound { 256 | // Special case: Remove the range if it's NotFound. 257 | x := len(n.ranges) - 1 258 | n.ranges[i] = n.ranges[x] 259 | n.ranges = n.ranges[:x] 260 | } else { 261 | // Normal case: Update the state. 262 | n.ranges[i].State = state 263 | } 264 | return 265 | } 266 | } 267 | 268 | // Haven't returned? First time we're seeing this range, so insert it unless 269 | // it was NotFound (which could happen if the proxy is starting up just as a 270 | // node is dropping a range, but probably never otherwise). 271 | 272 | if state == api.NsNotFound { 273 | return 274 | } 275 | 276 | n.ranges = append(n.ranges, api.RangeInfo{ 277 | Meta: meta, 278 | State: state, 279 | }) 280 | } 281 | --------------------------------------------------------------------------------