├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── cluster-test └── test-orchestrator │ ├── analysis_examples_test.go │ ├── assertion.go │ ├── assertion_test.go │ ├── calc.go │ ├── calc_test.go │ ├── command.go │ ├── measurement.go │ ├── scenario.go │ ├── section_scanner.go │ ├── stat_analysis.go │ ├── stat_ingester.go │ ├── stat_ingester_test.go │ ├── test_yaml_parser.go │ └── udp_scanner.go ├── docs ├── architecture_design.md ├── community.md ├── conf.py ├── getting_started.md ├── index.rst ├── make-partition.md ├── partitions.md ├── programming_ringpop.md ├── references.md └── running_ringpop.md ├── package.json ├── schema ├── admin-lookup.json ├── admin-stats-response.json ├── change.json ├── labels.json ├── protocol-join-request.json ├── protocol-join-response.json ├── protocol-ping-request.json ├── protocol-ping-response.json ├── protocol-pingreq-request.json ├── protocol-pingreq-response.json └── status.json ├── test ├── README.md ├── admin-tests.js ├── bidir-full-sync-tests.js ├── double-reincarnation.js ├── events.js ├── fake-node.js ├── identity.js ├── incarnation-no-tests.js ├── it-tests.js ├── join-tests.js ├── labels.js ├── lookup-tests.js ├── membership-checksum.js ├── package.json ├── partition-healing-tests.js ├── piggyback-tests.js ├── ping-req-tests.js ├── ping-tests.js ├── protocol-join.js ├── protocol-ping-req.js ├── protocol-ping.js ├── reaping-faulty-nodes.js ├── reincarnation-source.js ├── ringpop-assert.js ├── self-eviction-tests.js ├── tap-filter ├── test-coordinator.js ├── test-util.js └── util.js └── tools ├── generate-hosts.js ├── grafana-dash ├── README.md ├── config │ └── common.json └── gen-dashboard.js ├── make_partition └── tick-cluster.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/* 2 | hosts.json 3 | npm-debug.log 4 | test/.DS_Store 5 | test/node_modules 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "admin"] 2 | path = admin 3 | url = git@github.com:uber/ringpop-admin.git 4 | [submodule "lang/go"] 5 | path = lang/go 6 | url = git@github.com:uber/ringpop-go.git 7 | [submodule "lang/node"] 8 | path = lang/node 9 | url = git@github.com:uber/ringpop-node.git 10 | [submodule "ui"] 11 | path = ui 12 | url = git@github.com:uber/ringpop-ui.git 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Uber Technologies, Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | clean-tick: 2 | rm -rf node_modules 3 | 4 | install-node: 5 | npm install lang/node/ --prefix ./lang/node 6 | 7 | install-tick: 8 | npm install 9 | 10 | tick-node: 11 | ./tools/tick-cluster.js -n 10 -i node lang/node/main.js 12 | 13 | update-all: 14 | git submodule foreach git pull origin master 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ringpop-common 2 | Ringpop is a library for scalable, fault-tolerant application layer sharding. 3 | There are currently implementations of it in Node.js and Go. ringpop-common 4 | is the home for cross-language compatible tools and documentation for all 5 | of Ringpop's implementations. 6 | 7 | # Layout 8 | * admin/ is the place to find CLI tools for live operation of Ringpop 9 | * lang/ is the parent directory for all implementations (go, node) of Ringpop 10 | * test/ is the home for cross-platform integration tests 11 | * tools/ is the home for cross-platform development tools 12 | * ui/ is the home of a web visualization tool for Ringpop 13 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/analysis_examples_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "strings" 7 | ) 8 | 9 | // Check that a section scanner get's the stats between the time labels t0 and t1 10 | func ExampleSectionScanner() { 11 | s := bufio.NewScanner(strings.NewReader(stats)) 12 | scanner, _ := NewSectionScanner(s, "t0", "t1") 13 | 14 | for scanner.Scan() { 15 | fmt.Println(scanner.Text()) 16 | } 17 | 18 | // Output: 19 | // 2016-06-15T16:11:08.246816444Z|ringpop.172_18_24_220_3000.protocol.frequency:200.833341|ms 20 | // 2016-06-15T16:11:08.246954825Z|ringpop.172_18_24_220_3000.protocol.delay:200|ms 21 | // 2016-06-15T16:11:08.247013319Z|ringpop.172_18_24_220_3000.changes.disseminate:0|g 22 | // 2016-06-15T16:11:08.247032205Z|ringpop.172_18_24_220_3000.ping.send:1|c 23 | // 2016-06-15T16:11:08.247344365Z|ringpop.172_18_24_220_3008.ping.recv:1|c 24 | } 25 | 26 | func ExampleCountAnalysis() { 27 | s := bufio.NewScanner(strings.NewReader(stats)) 28 | c1, _ := CountAnalysis(s, "ping.send") 29 | s = bufio.NewScanner(strings.NewReader(stats)) 30 | c2, _ := CountAnalysis(s, "changes.disseminate") 31 | fmt.Println(c1, c2) 32 | 33 | // Output: 34 | // 2 4 35 | } 36 | 37 | var stats = ` 38 | 2016-06-15T16:11:08.198146603Z|ringpop.172_18_24_220_3007.protocol.delay:200|ms 39 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3007.changes.disseminate:0|g 40 | 2016-06-15T16:11:08.198212784Z|ringpop.172_18_24_220_3007.ping.send:1|c 41 | 2016-06-15T16:11:08.198622397Z|ringpop.172_18_24_220_3000.ping.recv:1|c 42 | 2016-06-15T16:11:08.198694026Z|ringpop.172_18_24_220_3000.changes.disseminate:0|g 43 | 2016-06-15T16:11:08.19884693Z|ringpop.172_18_24_220_3007.ping:0.593162|ms 44 | label:t0|cmd: kill 1 45 | 2016-06-15T16:11:08.246816444Z|ringpop.172_18_24_220_3000.protocol.frequency:200.833341|ms 46 | 2016-06-15T16:11:08.246954825Z|ringpop.172_18_24_220_3000.protocol.delay:200|ms 47 | 2016-06-15T16:11:08.247013319Z|ringpop.172_18_24_220_3000.changes.disseminate:0|g 48 | 2016-06-15T16:11:08.247032205Z|ringpop.172_18_24_220_3000.ping.send:1|c 49 | 2016-06-15T16:11:08.247344365Z|ringpop.172_18_24_220_3008.ping.recv:1|c 50 | label:t1|cmd: wait-for-stable 51 | 2016-06-15T16:11:08.247388872Z|ringpop.172_18_24_220_3008.changes.disseminate:0|g 52 | 2016-06-15T16:11:08.247506122Z|ringpop.172_18_24_220_3000.ping:0.447996|ms 53 | 2016-06-15T16:11:08.25451275Z|ringpop.172_18_24_220_3003.protocol.frequency:203.362966|ms 54 | 2016-06-15T16:11:08.254576313Z|ringpop.172_18_24_220_3003.protocol.delay:200|ms 55 | ` 56 | 57 | func ExampleChecksumAnalysis() { 58 | s := bufio.NewScanner(strings.NewReader(csumStats)) 59 | csums, _ := ChecksumsAnalysis(s) 60 | fmt.Println(csums) 61 | 62 | // Output: 63 | // 3 64 | } 65 | 66 | var csumStats = ` 67 | 2016-06-17T11:29:18.254046798Z|ringpop.172_18_24_192_3005.checksum:4321|g 68 | 2016-06-17T11:29:18.254046798Z|ringpop.172_18_24_192_3002.checksum:1234|g 69 | 2016-06-17T11:29:18.254046798Z|ringpop.172_18_24_192_3000.checksum:1000|g 70 | 2016-06-17T11:29:18.254046798Z|ringpop.172_18_24_192_3001.checksum:1234|g 71 | 2016-06-17T11:29:18.254046798Z|ringpop.172_18_24_192_3003.checksum:1234|g 72 | 2016-06-17T11:29:18.254046798Z|ringpop.172_18_24_192_3004.checksum:4321|g 73 | 2016-06-17T11:29:18.254046798Z|ringpop.172_18_24_192_3006.checksum:4321|g 74 | ` 75 | 76 | func ExampleConvergenceTimeAnalysis() { 77 | s := bufio.NewScanner(strings.NewReader(convtimeStats)) 78 | convtime, _ := ConvergenceTimeAnalysis(s) 79 | fmt.Println(convtime) 80 | 81 | // Output: 82 | // 8s 83 | } 84 | 85 | // time between the first and last recoreded change is 8 seconds 86 | var convtimeStats = ` 87 | 2016-06-17T11:29:15.0Z|ringpop.172_18_24_192_3000.noise 88 | 2016-06-17T11:29:16.0Z|ringpop.172_18_24_192_3000.noise 89 | 2016-06-17T11:29:17.0Z|ringpop.172_18_24_192_3000.noise 90 | 2016-06-17T11:29:18.0Z|ringpop.172_18_24_192_3000.membership-set.suspect:1|c 91 | 2016-06-17T11:29:19.0Z|ringpop.172_18_24_192_3001.membership-set.suspect:1|c 92 | 2016-06-17T11:29:20.0Z|ringpop.172_18_24_192_3002.membership-set.suspect:1|c 93 | 2016-06-17T11:29:21.0Z|ringpop.172_18_24_192_3002.membership-set.suspect:1|c 94 | 2016-06-17T11:29:21.0Z|ringpop.172_18_24_192_3000.noise 95 | 2016-06-17T11:29:21.0Z|ringpop.172_18_24_192_3000.noise 96 | 2016-06-17T11:29:22.0Z|ringpop.172_18_24_192_3003.membership-set.suspect:1|c 97 | 2016-06-17T11:29:23.0Z|ringpop.172_18_24_192_3003.membership-set.suspect:1|c 98 | 2016-06-17T11:29:24.0Z|ringpop.172_18_24_192_3003.membership-set.suspect:1|c 99 | 2016-06-17T11:29:25.0Z|ringpop.172_18_24_192_3004.membership-set.suspect:1|c 100 | 2016-06-17T11:29:26.0Z|ringpop.172_18_24_192_3005.membership-set.suspect:1|c 101 | 2016-06-17T11:29:27.0Z|ringpop.172_18_24_192_3000.noise 102 | 2016-06-17T11:29:28.0Z|ringpop.172_18_24_192_3000.noise 103 | 2016-06-17T11:29:29.0Z|ringpop.172_18_24_192_3000.noise 104 | ` 105 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/assertion.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | package main 22 | 23 | import ( 24 | "errors" 25 | "fmt" 26 | "log" 27 | "reflect" 28 | "time" 29 | ) 30 | 31 | // An Assertion checks if a Value is equal or is contained by an interval. 32 | type Assertion struct { 33 | Type AssertionType // can be is or in 34 | 35 | // This is the Value of the Assertion in case of AssertionTypeIs 36 | // or the first Value of the interval in case of AssertionTypeIn. 37 | V1 Value 38 | 39 | // The second Value of the interval in case of AssertionTypeIn. 40 | // This value is ignored in case of AssertionTypeIs. 41 | V2 Value 42 | } 43 | 44 | // AssertionType is the type (in or is) of an Assertion 45 | type AssertionType string 46 | 47 | const ( 48 | // AssertionTypeIs is the type that is used for exact comparisons 49 | AssertionTypeIs AssertionType = "is" 50 | 51 | // AssertionTypeIn is the type that is used to check a value is contained 52 | // by an interval. 53 | AssertionTypeIn AssertionType = "in" 54 | ) 55 | 56 | // String converts an assertion to its string representation. Some examples: 57 | // 58 | // - is 4 59 | // - in (90, 110) 60 | // - in (1s, 2s) 61 | func (a *Assertion) String() string { 62 | if a == nil { 63 | return "" 64 | } 65 | if a.Type == AssertionTypeIs { 66 | return fmt.Sprintf("is %v", a.V1) 67 | } 68 | if a.Type == AssertionTypeIn { 69 | return fmt.Sprintf("in (%v,%v)", a.V1, a.V2) 70 | } 71 | 72 | log.Fatalf("Unknown assertion %s", a.Type) 73 | return "" 74 | } 75 | 76 | // Assert makes the assertion. Returns an error if the assertion failed. 77 | func (a *Assertion) Assert(v Value) error { 78 | if a == nil { 79 | return nil 80 | } 81 | 82 | switch a.Type { 83 | case AssertionTypeIs: 84 | return equalsAssert(v, a.V1) 85 | case AssertionTypeIn: 86 | return rangeAssert(v, a.V1, a.V2) 87 | } 88 | 89 | msg := fmt.Sprintf("FAILED assertion: type must be 'in' or 'is' but is %v", a.Type) 90 | return errors.New(msg) 91 | } 92 | 93 | // isAssert checks if the Values are equal and returns an error otherwise. 94 | func equalsAssert(v, V1 Value) error { 95 | if reflect.DeepEqual(v, V1) { 96 | return nil 97 | } 98 | 99 | msg := fmt.Sprintf("FAILED assertion: expected %v got %v", V1, v) 100 | return errors.New(msg) 101 | } 102 | 103 | // inAssert checks if the Value is contained by the interval (V1, V2) and 104 | // returns an error otherwise. 105 | func rangeAssert(v, V1, V2 Value) error { 106 | // check if types match 107 | tv := reflect.TypeOf(v) 108 | tv1 := reflect.TypeOf(V1) 109 | tv2 := reflect.TypeOf(V2) 110 | if tv != tv1 || tv != tv2 { 111 | msg := fmt.Sprintf("FAILED assertion: type mismatch %v (%v,%v)", v, V1, V2) 112 | return errors.New(msg) 113 | } 114 | 115 | // convert to float for easy comparison 116 | f := toFloat64(v) 117 | f1 := toFloat64(V1) 118 | f2 := toFloat64(V2) 119 | 120 | if f < f1 || f2 < f { 121 | msg := fmt.Sprintf("FAILED assertion: %v not in (%v,%v)", v, V1, V2) 122 | return errors.New(msg) 123 | } 124 | 125 | return nil 126 | } 127 | 128 | // toFloat64 converts a value into a float64. Even is the value is a duration 129 | // because time.Duration is a uint64 which we can convert to a float64. 130 | func toFloat64(v Value) float64 { 131 | if f, ok := v.(float64); ok { 132 | return f 133 | } 134 | return float64(v.(time.Duration)) 135 | } 136 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/assertion_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | func ExampleIsAssertion() { 9 | a := &Assertion{AssertionTypeIs, 2 * time.Second, nil} 10 | fmt.Println(a) 11 | fmt.Println(a.Assert(2 * time.Second)) 12 | fmt.Println(a.Assert(0 * time.Second).Error()) 13 | fmt.Println(a.Assert(3 * time.Second).Error()) 14 | fmt.Println() 15 | 16 | // Output: 17 | // is 2s 18 | // 19 | // FAILED assertion: expected 2s got 0 20 | // FAILED assertion: expected 2s got 3s 21 | } 22 | 23 | func ExampleInAssertion() { 24 | a := &Assertion{AssertionTypeIn, 1.0, 3.0} 25 | fmt.Println(a) 26 | fmt.Println(a.Assert(0.0)) 27 | fmt.Println(a.Assert(1.0)) 28 | fmt.Println(a.Assert(2.0)) 29 | fmt.Println(a.Assert(3.0)) 30 | fmt.Println(a.Assert(4.0)) 31 | fmt.Println(a.Assert(2 * time.Second)) 32 | 33 | // in (1,3) 34 | // FAILED assertion: 0 not in (1,3) 35 | // 36 | // 37 | // 38 | // FAILED assertion: 4 not in (1,3) 39 | // FAILED assertion: type mismatch 2s (1,3) 40 | } 41 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/calc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | // The file contains a utility for calculation expressions. This is useful when 22 | // parsing the tests because when for example we want to assert that the 23 | // number of suspect declarations in a split brane is equal to 24 | // "N/2 * N/2 * 2" where N is the cluster size. 25 | 26 | package main 27 | 28 | import ( 29 | "fmt" 30 | "go/ast" 31 | "go/parser" 32 | "strconv" 33 | 34 | "github.com/pkg/errors" 35 | ) 36 | 37 | // Eval evaluates the value of an expression to a float64. It can be used 38 | // as a simple calculator. e.g: `"2+3*4" -> 14.0`. 39 | func Eval(expression string) (f float64, err error) { 40 | // recover from panic and change the err return value 41 | defer func() { 42 | if r := recover(); r != nil { 43 | err = errors.New(fmt.Sprint(r)) 44 | } 45 | }() 46 | 47 | // parse expression 48 | expr, err := parser.ParseExpr(expression) 49 | if err != nil { 50 | msg := fmt.Sprintf("eval error for expression: \"%s\"", expression) 51 | return 0, errors.New(msg) 52 | } 53 | 54 | // evaluate expression 55 | return eval(expr), nil 56 | } 57 | 58 | // eval evaluates an ast.Expr, panicking when there is a problem. 59 | // This function is called by Eval which recovers from the panics. 60 | func eval(expr ast.Expr) float64 { 61 | switch e := expr.(type) { 62 | case *ast.ParenExpr: 63 | return eval(e.X) 64 | 65 | case *ast.BinaryExpr: 66 | return evalBin(e) 67 | 68 | case *ast.BasicLit: 69 | v, err := strconv.ParseFloat(e.Value, 64) 70 | if err != nil { 71 | panic(fmt.Sprintf("cannot convert BasicLit to float, %v", e)) 72 | } 73 | return v 74 | } 75 | 76 | panic(fmt.Sprintf("calculator doesn't handle type %T", expr)) 77 | return 0 78 | } 79 | 80 | // evalBin executes the binary operator "+", "-", "*" or "/" on two 81 | func evalBin(expr *ast.BinaryExpr) float64 { 82 | x := eval(expr.X) 83 | y := eval(expr.Y) 84 | 85 | switch expr.Op.String() { 86 | case "*": 87 | return x * y 88 | case "/": 89 | return x / y 90 | case "+": 91 | return x + y 92 | case "-": 93 | return x - y 94 | } 95 | 96 | panic(fmt.Sprintf("unsupported operator %s", expr.Op)) 97 | return 0 98 | } 99 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/calc_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func ExampleEval() { 6 | fmt.Println(Eval("")) 7 | fmt.Println(Eval("1s")) 8 | fmt.Println(Eval("2+(3*4")) 9 | fmt.Println(Eval("2+3*4)")) 10 | fmt.Println(Eval("(1.5+)*(3+4)")) 11 | 12 | fmt.Println(Eval("123")) 13 | fmt.Println(Eval("12.34")) 14 | fmt.Println(Eval("2+3*4")) 15 | fmt.Println(Eval("2*(3+5)")) 16 | fmt.Println(Eval("(1.5*3)*(3+4)")) 17 | fmt.Println(Eval("(1.5*(3))*(3+4)")) 18 | 19 | // Output: 20 | // 0 eval error for expression: "" 21 | // 0 eval error for expression: "1s" 22 | // 0 eval error for expression: "2+(3*4" 23 | // 0 eval error for expression: "2+3*4)" 24 | // 0 eval error for expression: "(1.5+)*(3+4)" 25 | // 123 26 | // 12.34 27 | // 14 28 | // 16 29 | // 31.5 30 | // 31.5 31 | 32 | } 33 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/command.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | package main 22 | 23 | import ( 24 | "fmt" 25 | "strings" 26 | ) 27 | 28 | var ringpopPort = "3000" 29 | 30 | // Command runs command that affect the cluster in different ways. Commands are 31 | // commenly used to form the Script field of the Scenario struct. 32 | type Command struct { 33 | // Indicates when the command is run. 34 | Label string 35 | 36 | // Cmd can be one of: 37 | // - `cluster-kill` 38 | // - `cluster-start` 39 | // - `cluster-rolling-restart` 40 | // - `network-drop ` 41 | // - `network-delay ` 42 | // - `wait-for-stable` 43 | Cmd string 44 | 45 | // The arguments of the command. 46 | Args []string 47 | } 48 | 49 | // String converts a Command to a string. 50 | func (cmd Command) String() string { 51 | return fmt.Sprintf("%s %s", cmd.Cmd, strings.Join(cmd.Args, " ")) 52 | } 53 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/measurement.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | package main 22 | 23 | import ( 24 | "fmt" 25 | "strings" 26 | 27 | "github.com/pkg/errors" 28 | ) 29 | 30 | // Value should ever be either a float64 or a time.Duration. 31 | type Value interface{} 32 | 33 | // A Measurement generates a Value which can be either a duration or a number 34 | // from ringpop stats. The Measurement can count stat occurrences, analyze 35 | // convergence time, and analyze membership checksum convergence. 36 | // 37 | // The Measurement also carries an assertion that determines whether the 38 | // measured Value is as expected. 39 | type Measurement struct { 40 | // Selects a window of the stats that we want to measure 41 | // the values should be equal to one of the Labels in the 42 | // Commands of the script. 43 | Start, End string 44 | 45 | // One of count, convtime or checksums. 46 | Quantity string 47 | 48 | // Currently only count accepts an argument, which is the statpath of 49 | // the stats we want to count. 50 | Args []string 51 | 52 | // The expected result of this measurement. 53 | Assertion *Assertion 54 | } 55 | 56 | // String converts the Measurement into a string. 57 | func (m *Measurement) String() string { 58 | strs := []string{m.Quantity} 59 | strs = append(strs, m.Args...) 60 | if m.Assertion != nil { 61 | strs = append(strs, m.Assertion.String()) 62 | } 63 | return strings.Join(strs, " ") 64 | } 65 | 66 | // Measure performs the measurement and returns the resulting value on stats 67 | // that are extracted from the given Scanner. 68 | func (m *Measurement) Measure(s Scanner) (Value, error) { 69 | // select stats window we want to to measure on 70 | var err error 71 | s, err = NewSectionScanner(s, m.Start, m.End) 72 | if err != nil { 73 | return nil, errors.Wrapf(err, "measure %s\n", m) 74 | } 75 | switch m.Quantity { 76 | case "convtime": 77 | convtime, err := ConvergenceTimeAnalysis(s) 78 | if err != nil { 79 | return nil, errors.Wrapf(err, "measure %s\n", m) 80 | } 81 | return convtime, nil 82 | case "checksums": 83 | csums, err := ChecksumsAnalysis(s) 84 | if err != nil { 85 | return nil, errors.Wrapf(err, "measure %s\n", m) 86 | } 87 | return float64(csums), nil 88 | case "count": 89 | if len(m.Args) != 1 { 90 | msg := fmt.Sprintf("count expects one argument, has %v", m.Args) 91 | return nil, errors.New(msg) 92 | } 93 | statpath := m.Args[0] 94 | count, err := CountAnalysis(s, statpath) 95 | if err != nil { 96 | return nil, errors.Wrapf(err, "measure %s\n", m) 97 | } 98 | return float64(count), nil 99 | } 100 | 101 | msg := fmt.Sprintf("no such quantity: %s", m.Quantity) 102 | return nil, errors.New(msg) 103 | } 104 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/scenario.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | package main 22 | 23 | // A Scenario is a structure that captures the information of a single 24 | // cluster-test. It contains a script of commands that exercise different 25 | // failure conditions on ringpop cluster. After the script has run different 26 | // measurements on the ringpop stats that the cluster emits are executed. 27 | // It is possible to add constraints in the form of Assertions to these 28 | // measurements. 29 | type Scenario struct { 30 | Name string 31 | Size int 32 | Desc string 33 | 34 | Script []*Command 35 | Measure []*Measurement 36 | } 37 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/section_scanner.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | // A SectionScanner wraps a scanner and filters out all data before the start- 22 | // label and after the end-label, keeping only the data between the labels. 23 | // A label indicates when what command of the script of a scenario is ran. The 24 | // lines that look like "label:t0|cmd: kill 1"" are inserted into the ringpop 25 | // stats. 26 | 27 | package main 28 | 29 | import ( 30 | "errors" 31 | "strings" 32 | ) 33 | 34 | // Scanner is inspired on bufio.Scanner. It provides an interface that is 35 | // commonly used in the following pattern. 36 | // 37 | // ``` 38 | // for s.Scan() { 39 | // // do something with s.Text() 40 | // } 41 | // if s.Err()!=nil { 42 | // panic(s.Err()) 43 | // } 44 | // ``` 45 | type Scanner interface { 46 | Scan() bool 47 | Text() string 48 | Err() error 49 | } 50 | 51 | // A SectionScanner wraps a Scanner and is a Scanner that only scans between 52 | // the given Start and End labels. 53 | type SectionScanner struct { 54 | Scanner 55 | Start string 56 | End string 57 | } 58 | 59 | const ( 60 | scriptStartLabel = ".." 61 | scriptEndLabel = ".." 62 | ) 63 | 64 | // NewSectionScanner returns a Section scanner given Scanner and a start and 65 | // end label. The scanner is progressed to the Start label and returns an 66 | // error if that label isn't present. 67 | func NewSectionScanner(scanner Scanner, start, end string) (*SectionScanner, error) { 68 | s := &SectionScanner{ 69 | Scanner: scanner, 70 | Start: start, 71 | End: end, 72 | } 73 | 74 | if start == scriptStartLabel { 75 | return s, nil 76 | } 77 | 78 | // find section start 79 | for s.Scanner.Scan() { 80 | if strings.HasPrefix(s.Text(), "label:"+s.Start) { 81 | return s, nil 82 | } 83 | } 84 | 85 | return nil, errors.New("section start not found, " + s.Start) 86 | } 87 | 88 | // Scan progresses performs one scan on the wrapped Scanner. Returns whether 89 | // the End label is reached or the wrapped Scanner is finished. 90 | func (s *SectionScanner) Scan() bool { 91 | if s.Scanner.Scan() == false { 92 | return false 93 | } 94 | 95 | if s.End == scriptEndLabel { 96 | return true 97 | } 98 | 99 | if strings.HasPrefix(s.Scanner.Text(), "label:"+s.End) { 100 | return false 101 | } 102 | 103 | return true 104 | } 105 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/stat_analysis.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | // This file contains the static ringpop stats analysis for: convergence time; 22 | // number of converged checksums; and counting of individual stats. 23 | 24 | package main 25 | 26 | import ( 27 | "fmt" 28 | "regexp" 29 | "strings" 30 | "time" 31 | 32 | "github.com/pkg/errors" 33 | ) 34 | 35 | const ( 36 | membershipChecksumPath = ".checksum:" 37 | changesDisseminatePath = "changes.disseminate:" 38 | membershipSetPath = "membership-set" 39 | hostportRegex = "[0-9]{1,3}_[0-9]{1,3}_[0-9]{1,3}_[0-9]{1,3}_[0-9]{1,6}" 40 | ) 41 | 42 | // CountAnalysis counts the number of occurences of stat in the scanner. 43 | func CountAnalysis(s Scanner, stat string) (int, error) { 44 | stat += ":" 45 | count := 0 46 | for s.Scan() { 47 | // TODO fetch actual count from stat line (don't just count number of lines) 48 | if ok, err := regexp.MatchString(stat, s.Text()); ok && err == nil { 49 | count++ 50 | } 51 | } 52 | if s.Err() != nil { 53 | return 0, errors.Wrap(s.Err(), "count analysis\n") 54 | } 55 | 56 | return count, nil 57 | } 58 | 59 | // ChecksumsAnalysis counts the number of unique checksums among nodes after 60 | // scanning all the stats in the scanner. 61 | func ChecksumsAnalysis(s Scanner) (int, error) { 62 | m := make(map[string]string) 63 | for s.Scan() { 64 | line := s.Text() 65 | ix := strings.Index(line, membershipChecksumPath) 66 | 67 | // filter out everything that is not a membership checksum 68 | if ix == -1 || strings.Contains(line, "ring.checksum") { 69 | continue 70 | } 71 | 72 | csum := line[ix+len(membershipChecksumPath):] 73 | if csum[len(csum)-2:] != "|g" { 74 | msg := fmt.Sprintf("membership.checksum is not a gauge. csum=%s", csum) 75 | return 0, errors.New(msg) 76 | } 77 | csum = csum[:len(csum)-2] 78 | 79 | r := regexp.MustCompile(hostportRegex) 80 | host := r.FindString(line) 81 | if host == "" { 82 | msg := fmt.Sprintf("membership.checksum stat \"%s\" does not contain host", line) 83 | return 0, errors.New(msg) 84 | } 85 | m[host] = csum 86 | } 87 | if s.Err() != nil { 88 | return 0, errors.Wrap(s.Err(), "checksums analysis\n") 89 | } 90 | 91 | return uniq(m), nil 92 | } 93 | 94 | // uniq returns the number of unique values in a map. 95 | func uniq(m map[string]string) int { 96 | u := make(map[string]struct{}) 97 | for _, csum := range m { 98 | u[csum] = struct{}{} 99 | } 100 | return len(u) 101 | } 102 | 103 | // ConvergenceTimeAnalysis measures the time it takes from the first changes is 104 | // applied until the last. 105 | func ConvergenceTimeAnalysis(s Scanner) (time.Duration, error) { 106 | var firstChange string 107 | var lastChange string 108 | for s.Scan() { 109 | if strings.Contains(s.Text(), membershipSetPath) { 110 | firstChange = s.Text() 111 | lastChange = s.Text() 112 | break 113 | } 114 | } 115 | if firstChange == "" { 116 | return 0, errors.New("first membership change not found in convergence time analysis") 117 | } 118 | 119 | for s.Scan() { 120 | if strings.Contains(s.Text(), membershipSetPath) { 121 | lastChange = s.Text() 122 | } 123 | } 124 | if s.Err() != nil { 125 | return 0, errors.Wrap(s.Err(), "convergence time analysis\n") 126 | } 127 | 128 | d, err := timeDiff(firstChange, lastChange) 129 | if err != nil { 130 | return 0, errors.Wrap(err, "convergence time analaysis\n") 131 | } 132 | 133 | // force millisecond precission 134 | return d / time.Millisecond * time.Millisecond, nil 135 | } 136 | 137 | // timeDiff returns the duration between two stat lines. 138 | func timeDiff(stat1, stat2 string) (time.Duration, error) { 139 | i1 := strings.Index(stat1, "|") 140 | if i1 == -1 { 141 | msg := fmt.Sprintf("stat1 \"%s\" doesn't contain a timestamp", stat1) 142 | return 0, errors.New(msg) 143 | } 144 | i2 := strings.Index(stat2, "|") 145 | if i2 == -1 { 146 | msg := fmt.Sprintf("stat2 \"%s\" doesn't contain a timestamp", stat2) 147 | return 0, errors.New(msg) 148 | } 149 | 150 | t1, err := time.Parse(time.RFC3339Nano, stat1[:i1]) 151 | if err != nil { 152 | return 0, errors.Wrap(err, "parse timestamp stat1\n") 153 | } 154 | t2, err := time.Parse(time.RFC3339Nano, stat2[:i2]) 155 | if err != nil { 156 | return 0, errors.Wrap(err, "parse timestamp stat2\n") 157 | } 158 | 159 | return t2.Sub(t1), nil 160 | } 161 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/stat_ingester.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | // This file contains is responsible for ingesting the ringpop stats of the 22 | // entire cluster. The stats are analyzed in real-time to assess cluster 23 | // stability and the stats are at the same time written to a file for later 24 | // analysis. 25 | 26 | package main 27 | 28 | import ( 29 | "fmt" 30 | "io" 31 | "log" 32 | "strings" 33 | "sync" 34 | "time" 35 | 36 | "github.com/pkg/errors" 37 | ) 38 | 39 | // The StatIngester is a UDP server that accepts ringpop stats with added 40 | // timestamps. The StatIngester analyzes the stream so that it knows when the 41 | // cluster reaches a stable state. It also writes the stream into a file for 42 | // later analysis. 43 | type StatIngester struct { 44 | // The where the stats are written to. 45 | writer io.Writer 46 | 47 | // Protects emptyNodes and wasUnstable 48 | sync.Mutex 49 | 50 | // The stat ingester listens for dissemination stats to determine if the 51 | // cluster has reached a stable state. When there are no changes being 52 | // disseminated by any node, the cluster is said to be stable. 53 | // emptyNodes holds track of which nodes are empty and which nodes still 54 | // have changes to disseminate. 55 | emptyNodes map[string]bool 56 | 57 | // When waiting for the cluster to be stable, we first want to make sure 58 | // that the cluster was unstable at some point. This makes sure that any 59 | // failure condition we throw at the cluster has taken effect before we 60 | // move onto the next failure condition. 61 | wasUnstable bool 62 | } 63 | 64 | // NewStatIngester creates a new StatIngester 65 | func NewStatIngester(w io.Writer) *StatIngester { 66 | return &StatIngester{ 67 | emptyNodes: make(map[string]bool), 68 | writer: w, 69 | } 70 | } 71 | 72 | // WaitForStable blocks and waits until the cluster has reached a stable state. 73 | // waits for the cluster to first become unstable if it isn't already, and then 74 | // blocks until the cluster has reached a stable state again. 75 | func (si *StatIngester) WaitForStable(hosts []string) { 76 | // wait for cluster to become unstable 77 | for !si.wasUnstable { 78 | time.Sleep(200 * time.Millisecond) 79 | } 80 | // wait for cluster to become stable 81 | for !si.IsClusterStable(hosts) { 82 | time.Sleep(200 * time.Millisecond) 83 | } 84 | si.wasUnstable = false 85 | } 86 | 87 | // IsClusterStable indicates, judging from the processed stats, whether the 88 | // cluster is in a stable state. The input are the hosts that should be 89 | // alive. 90 | func (si *StatIngester) IsClusterStable(hosts []string) bool { 91 | si.Lock() 92 | defer si.Unlock() 93 | 94 | for _, h := range hosts { 95 | hs := strings.Replace(h, ".", "_", -1) 96 | hs = strings.Replace(hs, ":", "_", -1) 97 | if empty, ok := si.emptyNodes[hs]; !ok || !empty { 98 | return false 99 | } 100 | } 101 | return true 102 | } 103 | 104 | // IngestStats starts listening on the specified port for ringpop stats. The 105 | // stats are analyzed to determine cluster-stability and written to a file. 106 | func (si *StatIngester) IngestStats(s Scanner) error { 107 | for s.Scan() { 108 | // handle stat for cluster stability analysis 109 | err := si.handleStat(s.Text()) 110 | if err != nil { 111 | err = errors.Wrap(err, "stat ingestion") 112 | log.Fatalf(err.Error()) 113 | } 114 | 115 | // write stat to file 116 | _, err = fmt.Fprintln(si.writer, s.Text()) 117 | if err != nil { 118 | log.Fatalln(err) 119 | } 120 | } 121 | 122 | return nil 123 | } 124 | 125 | // InsertLabel writes a line like "label:t0|cmd: kill 1" into the stats file. 126 | // The line indicates at what time a command is run. The idea is that all stats 127 | // that are recorded between two labels can be used to measure the effect of 128 | // the command associated with the first label. 129 | func (si *StatIngester) InsertLabel(label, cmd string) { 130 | fmt.Fprintf(si.writer, "label:%s|cmd: %s\n", label, cmd) 131 | } 132 | 133 | // handleStat handles a single stat to determine cluster-stability. 134 | func (si *StatIngester) handleStat(str string) error { 135 | si.Lock() 136 | defer si.Unlock() 137 | 138 | // check if changes were disseminated 139 | changes, ok := getBetween(str, "changes.disseminate:", "|") 140 | if !ok { 141 | return nil 142 | } 143 | empty := changes == "0" 144 | 145 | // lookup hostport 146 | hostport, ok := getBetween(str, "ringpop.", ".") 147 | if !ok { 148 | msg := fmt.Sprintf("no hostport found in stat \"%s\"", str) 149 | return errors.New(msg) 150 | } 151 | 152 | if !empty { 153 | si.wasUnstable = true 154 | } 155 | si.emptyNodes[hostport] = empty 156 | 157 | return nil 158 | } 159 | 160 | // getBetween get a substring from the input buffer between before and after. 161 | // The function returns whether this was a success. 162 | func getBetween(str, before, after string) (string, bool) { 163 | start := strings.Index(str, before) 164 | if start == -1 { 165 | return "", false 166 | } 167 | start += len(before) 168 | 169 | end := strings.Index(str[start:], after) 170 | if end == -1 { 171 | return "", false 172 | } 173 | 174 | return str[start : start+end], true 175 | } 176 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/stat_ingester_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "strings" 7 | ) 8 | 9 | type nopWriter struct{} 10 | 11 | func (r nopWriter) Write(bts []byte) (int, error) { 12 | return len(bts), nil 13 | } 14 | 15 | func ExampleStatIngester() { 16 | si := NewStatIngester(nopWriter{}) 17 | scanner := bufio.NewScanner(strings.NewReader(stats2)) 18 | si.IngestStats(scanner) 19 | fmt.Println(si.IsClusterStable( 20 | []string{"172.18.24.220:3000", "172.18.24.220:3001", "172.18.24.220:3002"}, 21 | )) 22 | 23 | si = NewStatIngester(nopWriter{}) 24 | scanner = bufio.NewScanner(strings.NewReader(stats2)) 25 | si.IngestStats(scanner) 26 | fmt.Println(si.IsClusterStable( 27 | []string{"172.18.24.220:3000", "172.18.24.220:3001"}, 28 | )) 29 | 30 | // Output: 31 | // false 32 | // true 33 | } 34 | 35 | func ExampleWaitForStable() { 36 | si := NewStatIngester(nopWriter{}) 37 | scanner := bufio.NewScanner(strings.NewReader(stats2)) 38 | si.IngestStats(scanner) 39 | si.WaitForStable( 40 | []string{"172.18.24.220:3000", "172.18.24.220:3001"}, 41 | ) 42 | 43 | // Output: 44 | } 45 | 46 | var stats2 = ` 47 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3000.changes.disseminate:0|g 48 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3001.changes.disseminate:0|g 49 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3002.changes.disseminate:0|g 50 | 51 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3000.changes.disseminate:1|g 52 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3001.changes.disseminate:1|g 53 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3002.changes.disseminate:1|g 54 | 55 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3000.changes.disseminate:0|g 56 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3001.changes.disseminate:0|g 57 | 2016-06-15T16:11:08.198191045Z|ringpop.172_18_24_220_3002.changes.disseminate:1|g 58 | ` 59 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/test_yaml_parser.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | package main 22 | 23 | import ( 24 | "fmt" 25 | "reflect" 26 | "strconv" 27 | "strings" 28 | "time" 29 | 30 | "github.com/pkg/errors" 31 | 32 | "gopkg.in/yaml.v2" 33 | ) 34 | 35 | // testYaml is used to unmarshal test declared in the yaml files. 36 | type testYaml struct { 37 | Config configYaml 38 | Scenarios []*scenarioYaml 39 | } 40 | 41 | type configYaml struct { 42 | // TODO(wieger): define 43 | } 44 | 45 | // scenarioYaml captures the information of a scenario. 46 | type scenarioYaml struct { 47 | Name string 48 | Size string 49 | Desc string 50 | 51 | Script []map[string]string 52 | Measure []string 53 | Runs [][]string 54 | } 55 | 56 | func parse(bts []byte) (scns []*Scenario, err error) { 57 | defer func() { 58 | if r := recover(); r != nil { 59 | scns = nil 60 | err = errors.New(fmt.Sprint(r)) 61 | } 62 | }() 63 | 64 | return parseScenarios(bts), nil 65 | } 66 | 67 | func parseScenarios(bts []byte) []*Scenario { 68 | testYaml := &testYaml{} 69 | err := yaml.Unmarshal([]byte(bts), testYaml) 70 | if err != nil { 71 | panic("failed to unmarshal scenario yaml") 72 | } 73 | 74 | return extractScenarios(testYaml) 75 | } 76 | 77 | // extractScenarios returns a scenario for every element in the runs list. 78 | func extractScenarios(runs *testYaml) []*Scenario { 79 | var result []*Scenario 80 | for _, scenarioData := range runs.Scenarios { 81 | for _, vari := range scenarioData.Runs[0] { 82 | if vari[0] != '<' || vari[len(vari)-1] != '>' { 83 | panic(fmt.Sprintf("variable '%s' not of the form ", vari)) 84 | } 85 | } 86 | // We start at i=1 because the first entry of the runs declares the 87 | // variables names. e.g. [, , ]. 88 | for i := 1; i < len(scenarioData.Runs); i++ { 89 | s := extractScenario(scenarioData, i) 90 | result = append(result, s) 91 | } 92 | } 93 | 94 | return result 95 | } 96 | 97 | // extractScenario returns a scenario given the index of a specific run. 98 | func extractScenario(data *scenarioYaml, runIx int) *Scenario { 99 | varsData := data.Runs[0] 100 | runData := data.Runs[runIx] 101 | defer wrapPanicf("Failed to parse scenario '%s'", data.Name) 102 | defer wrapPanicf("in run %d, [%v] = [%v]", runIx, strings.Join(varsData, ", "), strings.Join(runData, ", ")) 103 | 104 | if len(varsData) != len(runData) { 105 | msg := fmt.Sprintf("var count of run %v should match var count of %v", runData, varsData) 106 | panic(msg) 107 | } 108 | 109 | // don't find and replace on name 110 | name := data.Name 111 | desc := replace(data.Desc, varsData, runData) 112 | sizeStr := replace(data.Size, varsData, runData) 113 | 114 | // extract size 115 | size, err := strconv.Atoi(sizeStr) 116 | if err != nil { 117 | panic("size convert: " + err.Error()) 118 | } 119 | 120 | // extract script 121 | labels, cmds := extractScript(data.Script, varsData, runData) 122 | script := parseScript(labels, cmds) 123 | 124 | // extract Measure 125 | measureStrs := make([]string, len(data.Measure)) 126 | for i := range data.Measure { 127 | measureStrs[i] = replace(data.Measure[i], varsData, runData) 128 | } 129 | measure := parseMeasurements(measureStrs) 130 | 131 | return &Scenario{ 132 | Name: name, 133 | Desc: desc, 134 | Size: size, 135 | Script: script, 136 | Measure: measure, 137 | } 138 | } 139 | 140 | func extractScript(script []map[string]string, varsData, runData []string) (labels, cmds []string) { 141 | labels = make([]string, 0, len(script)) 142 | cmds = make([]string, 0, len(script)) 143 | for _, cmdData := range script { 144 | if len(cmdData) != 1 { 145 | // We are asserting that commands are one line only to comply with 146 | // the yaml that looks like: 147 | // 148 | // script: 149 | // - t0: command1 150 | // - t1: command2 151 | panic(fmt.Sprintf("command '%v' should contain exactly one entry", cmdData)) 152 | } 153 | 154 | for label, cmd := range cmdData { 155 | labels = append(labels, replace(label, varsData, runData)) 156 | cmds = append(cmds, replace(cmd, varsData, runData)) 157 | } 158 | } 159 | return labels, cmds 160 | } 161 | 162 | func parseScript(labels, cmdStrs []string) []*Command { 163 | defer wrapPanicf("in parse script") 164 | var cmds []*Command 165 | for i := range labels { 166 | cmd := parseCommand(labels[i], cmdStrs[i]) 167 | cmds = append(cmds, cmd) 168 | } 169 | 170 | return cmds 171 | } 172 | 173 | func parseCommand(label, cmdString string) *Command { 174 | defer wrapPanicf("in parse command '%s: %s'", label, cmdString) 175 | fields := strings.Fields(cmdString) 176 | if len(fields) == 0 { 177 | panic("empty command") 178 | } 179 | 180 | return &Command{ 181 | Label: label, 182 | Cmd: fields[0], 183 | Args: fields[1:], 184 | } 185 | } 186 | 187 | func parseMeasurements(msData []string) []*Measurement { 188 | var ms []*Measurement 189 | for _, mData := range msData { 190 | ms = append(ms, parseMeasurement(mData)) 191 | } 192 | return ms 193 | } 194 | 195 | func parseMeasurement(str string) *Measurement { 196 | defer wrapPanicf("in parse measure '%s'", str) 197 | 198 | fields := strings.Fields(str) 199 | if len(fields) < 3 { 200 | panic("contains too few fields") 201 | } 202 | 203 | measurementArgs := fields[3:] 204 | 205 | // search for optional assertion 206 | var assertion *Assertion 207 | for i, s := range measurementArgs { 208 | if s == "is" || s == "in" { 209 | interval := strings.Join(measurementArgs[i+1:], "") 210 | assertion = parseAssertion(s, interval) 211 | measurementArgs = measurementArgs[:i] 212 | } 213 | } 214 | 215 | return &Measurement{ 216 | Start: fields[0], 217 | End: fields[1], 218 | Quantity: fields[2], 219 | Args: measurementArgs, 220 | Assertion: assertion, 221 | } 222 | } 223 | 224 | func parseAssertion(typeStr string, arg string) *Assertion { 225 | defer wrapPanicf("in parse assertion '%s %s'", typeStr, arg) 226 | 227 | switch typeStr { 228 | case "is": 229 | typ := AssertionTypeIs 230 | v := parseValue(arg) 231 | return &Assertion{ 232 | Type: typ, 233 | V1: v, 234 | } 235 | 236 | case "in": 237 | typ := AssertionTypeIn 238 | v1, v2 := parseRange(arg) 239 | return &Assertion{ 240 | Type: typ, 241 | V1: v1, 242 | V2: v2, 243 | } 244 | } 245 | 246 | panic("not valid assertion type") 247 | } 248 | 249 | func parseRange(rng string) (v1, v2 Value) { 250 | defer wrapPanicf("in parse range '%s'", rng) 251 | 252 | if rng[0] != '(' || rng[len(rng)-1] != ')' { 253 | panic("should be enclosed by parenthesis") 254 | } 255 | split := strings.Split(rng[1:len(rng)-1], ",") 256 | if len(split) != 2 { 257 | panic("should be split by a comma") 258 | } 259 | 260 | v1 = parseValue(split[0]) 261 | v2 = parseValue(split[1]) 262 | 263 | if reflect.TypeOf(v1) != reflect.TypeOf(v2) { 264 | panic(fmt.Sprintf("types %T %T should be equal", v1, v2)) 265 | } 266 | 267 | return v1, v2 268 | } 269 | 270 | func parseValue(str string) Value { 271 | defer wrapPanicf("in parse value '%s", str) 272 | 273 | // First check if the input is a number or expression. 274 | v, err := Eval(str) 275 | if err == nil { 276 | return v 277 | } 278 | 279 | // Then check if the input is a duration. Duration check needs 280 | // to be after Eval to prevent "0" to parse as a duration. 281 | d, err := time.ParseDuration(str) 282 | if err == nil { 283 | return Value(d) 284 | } 285 | 286 | panic("value is not a number duration or expression") 287 | } 288 | 289 | // replace finds occurrences of varsData and replaces them by the respective 290 | // element in the runsData. 291 | func replace(str string, varsData []string, runData []string) string { 292 | for i := range varsData { 293 | str = strings.Replace(str, varsData[i], runData[i], -1) 294 | } 295 | return str 296 | } 297 | 298 | // wrapPanicf recovers from a panic and then starts to panic with a message 299 | // that adds to the message of the previous panic. This function should always 300 | // be defered because of the recover and is commonly at the start of a 301 | // function. 302 | func wrapPanicf(format string, args ...interface{}) { 303 | if r := recover(); r != nil { 304 | msg := fmt.Sprintf(format, args...) 305 | panic(fmt.Sprintf("%s:\n- %v", msg, r)) 306 | } 307 | } 308 | -------------------------------------------------------------------------------- /cluster-test/test-orchestrator/udp_scanner.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net" 5 | 6 | "github.com/pkg/errors" 7 | ) 8 | 9 | // UDPScanner listens to a port for udp messages and converts them so that they 10 | // can be read through the Scanner interface. 11 | type UDPScanner struct { 12 | buf []byte 13 | text string 14 | err error 15 | sConn *net.UDPConn 16 | } 17 | 18 | // NewUDPScanner starts listening on the specified port and returns a new 19 | // UDPScanner. 20 | func NewUDPScanner(port string) (*UDPScanner, error) { 21 | // setup udp connection 22 | sAddr, err := net.ResolveUDPAddr("udp", ":"+port) 23 | if err != nil { 24 | return nil, errors.Wrap(err, "udp scanner") 25 | } 26 | 27 | sConn, err := net.ListenUDP("udp", sAddr) 28 | if err != nil { 29 | return nil, errors.Wrap(err, "udp scanner") 30 | } 31 | 32 | return &UDPScanner{ 33 | buf: make([]byte, 1024), 34 | sConn: sConn, 35 | }, nil 36 | } 37 | 38 | // Scans the next line, and returns whether there is one. 39 | func (s *UDPScanner) Scan() bool { 40 | // read a single stat 41 | n, err := s.sConn.Read(s.buf) 42 | if err != nil { 43 | s.err = errors.Wrap(err, "udp scan") 44 | return false 45 | } 46 | 47 | s.text = string(s.buf[0:n]) 48 | 49 | return true 50 | } 51 | 52 | // Returns the scanned line. 53 | func (s *UDPScanner) Text() string { 54 | return s.text 55 | } 56 | 57 | // Returns whether an error occured during scanning. 58 | func (s *UDPScanner) Err() error { 59 | return s.err 60 | } 61 | -------------------------------------------------------------------------------- /docs/community.md: -------------------------------------------------------------------------------- 1 | # Community 2 | 3 | ## Google Group 4 | 5 | ## Contributing 6 | 7 | ## License 8 | 9 | Ringpop is available under the MIT license. See the [LICENSE](https://github.com/uber/ringpop/blob/master/LICENSE) file for more info. 10 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Ringpop documentation build configuration file, created by 4 | # sphinx-quickstart on Thu Feb 19 12:49:33 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | sys.path.insert(0, os.path.abspath('..')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.todo', 34 | 'sphinx.ext.coverage', 35 | 'sphinx.ext.viewcode', 36 | ] 37 | 38 | # Add any paths that contain templates here, relative to this directory. 39 | templates_path = ['_templates'] 40 | 41 | from recommonmark.parser import CommonMarkParser 42 | 43 | source_parsers = { 44 | '.md': CommonMarkParser, 45 | } 46 | 47 | # The suffix of source filenames. 48 | source_suffix = ['.rst', '.md'] 49 | 50 | # The encoding of source files. 51 | #source_encoding = 'utf-8-sig' 52 | 53 | # The master toctree document. 54 | master_doc = 'index' 55 | 56 | # General information about the project. 57 | project = u'Ringpop' 58 | copyright = u'2015, Uber Technologies, Inc.' 59 | 60 | # The version info for the project you're documenting, acts as replacement for 61 | # |version| and |release|, also used in various other places throughout the 62 | # built documents. 63 | # 64 | # The short X.Y version. 65 | version = '0.1.0' 66 | # The full version, including alpha/beta/rc tags. 67 | release = '0.1.0' 68 | 69 | # The language for content autogenerated by Sphinx. Refer to documentation 70 | # for a list of supported languages. 71 | #language = None 72 | 73 | # There are two options for replacing |today|: either, you set today to some 74 | # non-false value, then it is used: 75 | #today = '' 76 | # Else, today_fmt is used as the format for a strftime call. 77 | #today_fmt = '%B %d, %Y' 78 | 79 | # List of patterns, relative to source directory, that match files and 80 | # directories to ignore when looking for source files. 81 | exclude_patterns = ['_build'] 82 | 83 | # The reST default role (used for this markup: `text`) to use for all 84 | # documents. 85 | #default_role = None 86 | 87 | # If true, '()' will be appended to :func: etc. cross-reference text. 88 | #add_function_parentheses = True 89 | 90 | # If true, the current module name will be prepended to all description 91 | # unit titles (such as .. function::). 92 | #add_module_names = True 93 | 94 | # If true, sectionauthor and moduleauthor directives will be shown in the 95 | # output. They are ignored by default. 96 | #show_authors = False 97 | 98 | # The name of the Pygments (syntax highlighting) style to use. 99 | pygments_style = 'sphinx' 100 | 101 | # A list of ignored prefixes for module index sorting. 102 | #modindex_common_prefix = [] 103 | 104 | # If true, keep warnings as "system message" paragraphs in the built documents. 105 | #keep_warnings = False 106 | 107 | 108 | # -- Options for HTML output ---------------------------------------------- 109 | 110 | # The theme to use for HTML and HTML Help pages. See the documentation for 111 | # a list of builtin themes. 112 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 113 | 114 | if not on_rtd: # only import and set the theme if we're building docs locally 115 | import sphinx_rtd_theme 116 | html_theme = 'sphinx_rtd_theme' 117 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 118 | 119 | # Theme options are theme-specific and customize the look and feel of a theme 120 | # further. For a list of options available for each theme, see the 121 | # documentation. 122 | #html_theme_options = {} 123 | 124 | # Add any paths that contain custom themes here, relative to this directory. 125 | #html_theme_path = [] 126 | 127 | # The name for this set of Sphinx documents. If None, it defaults to 128 | # " v documentation". 129 | #html_title = None 130 | 131 | # A shorter title for the navigation bar. Default is the same as html_title. 132 | #html_short_title = None 133 | 134 | # The name of an image file (relative to this directory) to place at the top 135 | # of the sidebar. 136 | #html_logo = None 137 | 138 | # The name of an image file (within the static path) to use as favicon of the 139 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 140 | # pixels large. 141 | #html_favicon = None 142 | 143 | # Add any paths that contain custom static files (such as style sheets) here, 144 | # relative to this directory. They are copied after the builtin static files, 145 | # so a file named "default.css" will overwrite the builtin "default.css". 146 | html_static_path = ['_static'] 147 | 148 | # Add any extra paths that contain custom files (such as robots.txt or 149 | # .htaccess) here, relative to this directory. These files are copied 150 | # directly to the root of the documentation. 151 | #html_extra_path = [] 152 | 153 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 154 | # using the given strftime format. 155 | #html_last_updated_fmt = '%b %d, %Y' 156 | 157 | # If true, SmartyPants will be used to convert quotes and dashes to 158 | # typographically correct entities. 159 | #html_use_smartypants = True 160 | 161 | # Custom sidebar templates, maps document names to template names. 162 | #html_sidebars = {} 163 | 164 | # Additional templates that should be rendered to pages, maps page names to 165 | # template names. 166 | #html_additional_pages = {} 167 | 168 | # If false, no module index is generated. 169 | #html_domain_indices = True 170 | 171 | # If false, no index is generated. 172 | #html_use_index = True 173 | 174 | # If true, the index is split into individual pages for each letter. 175 | #html_split_index = False 176 | 177 | # If true, links to the reST sources are added to the pages. 178 | #html_show_sourcelink = True 179 | 180 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 181 | #html_show_sphinx = True 182 | 183 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 184 | #html_show_copyright = True 185 | 186 | # If true, an OpenSearch description file will be output, and all pages will 187 | # contain a tag referring to it. The value of this option must be the 188 | # base URL from which the finished HTML is served. 189 | #html_use_opensearch = '' 190 | 191 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 192 | #html_file_suffix = None 193 | 194 | # Output file base name for HTML help builder. 195 | htmlhelp_basename = 'Ringpopdoc' 196 | 197 | 198 | # -- Options for LaTeX output --------------------------------------------- 199 | 200 | latex_elements = { 201 | # The paper size ('letterpaper' or 'a4paper'). 202 | #'papersize': 'letterpaper', 203 | 204 | # The font size ('10pt', '11pt' or '12pt'). 205 | #'pointsize': '10pt', 206 | 207 | # Additional stuff for the LaTeX preamble. 208 | #'preamble': '', 209 | } 210 | 211 | # Grouping the document tree into LaTeX files. List of tuples 212 | # (source start file, target name, title, 213 | # author, documentclass [howto, manual, or own class]). 214 | latex_documents = [ 215 | ('index', 'Ringpop.tex', u'Ringpop Documentation', 216 | u'Uber Technologies, Inc.', 'manual'), 217 | ] 218 | 219 | # The name of an image file (relative to this directory) to place at the top of 220 | # the title page. 221 | #latex_logo = None 222 | 223 | # For "manual" documents, if this is true, then toplevel headings are parts, 224 | # not chapters. 225 | #latex_use_parts = False 226 | 227 | # If true, show page references after internal links. 228 | #latex_show_pagerefs = False 229 | 230 | # If true, show URL addresses after external links. 231 | #latex_show_urls = False 232 | 233 | # Documents to append as an appendix to all manuals. 234 | #latex_appendices = [] 235 | 236 | # If false, no module index is generated. 237 | #latex_domain_indices = True 238 | 239 | 240 | # -- Options for manual page output --------------------------------------- 241 | 242 | # One entry per manual page. List of tuples 243 | # (source start file, name, description, authors, manual section). 244 | man_pages = [ 245 | ('index', 'Ringpop', u'Ringpop Documentation', 246 | [u'Uber Technologies, Inc.'], 1) 247 | ] 248 | 249 | # If true, show URL addresses after external links. 250 | #man_show_urls = False 251 | 252 | 253 | # -- Options for Texinfo output ------------------------------------------- 254 | 255 | # Grouping the document tree into Texinfo files. List of tuples 256 | # (source start file, target name, title, author, 257 | # dir menu entry, description, category) 258 | texinfo_documents = [ 259 | ('index', 'Ringpop', u'Ringpop Documentation', 260 | u'Uber Technologies, Inc.', 'Ringpop', 'Application cooperation and coordination library' 261 | 'Miscellaneous'), 262 | ] 263 | 264 | # Documents to append as an appendix to all manuals. 265 | #texinfo_appendices = [] 266 | 267 | # If false, no module index is generated. 268 | #texinfo_domain_indices = True 269 | 270 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 271 | #texinfo_show_urls = 'footnote' 272 | 273 | # If true, do not generate a @detailmenu in the "Top" node's menu. 274 | #texinfo_no_detailmenu = False 275 | -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | If you're looking for more information about what Ringpop is all about and how it might be able to help, you've come to the right place. Let's begin by digging deeper into Ringpop and why you'd want to use it. 3 | 4 | ## What is Ringpop? 5 | As we've stated in the introduction, Ringpop is a library that maintains a consistent hash ring and can be used to arbitrarily shard the data in your application in a way that's adaptable to capacity changes and resilient to failure. 6 | 7 | Ringpop is best described by introducing its 3 core features: a membership protocol, a consistent hash ring and request forwarding. You can find more information about each of these in the Architecture and Design section. For the eager reader, its membership protocol provides a distributed application, whose instances were once completely unaware of one another, with the ability to discover one another, self-organize and cooperate. The instances communicate over a TCP backchannel and pass information between them in an infection-style manner. Enough information is shared to allow these instances to come to an agreement, or converge, on whom the participating instances, or members, are of the distributed application. 8 | 9 | With a consistent membership view, Ringpop arranges the members along a consistent hash ring, divides up the integer keyspace into partitions and assigns ownership of the partitions to the individual instances of your application. It then projects a keyspace of your choosing, say the ID range of the objects in your application, onto that same ring and resolves an owner for each ID. In the face of failure, the underlying membership protocol is resilient and automatically reassigns ownership, also known as rebalancing, to the surviving instances. 10 | 11 | Requests that your application serves, be it ones that create new objects, update or read or delete existing ones, may be sent to any instance. Each Ringpop instance is equipped to route the request to the correct owner should the shard key, for example the object's ID, resolve to an instance that is not the one that received the original request. 12 | 13 | By maintaining a consistent hash ring based upon the information that is collected by its membership protocol and offering request forwarding as a routing convenience, Ringpop provides some very powerful and cool building blocks. What you as an application developer choose to do with these building blocks is entirely up to you. It may lead you to build an ultra scalable and highly available database, an actor model system, systems that are capable of electing a leader and delegating work to it, a request coalescing proxy, general purpose application-layer caches and much more. If you find some cool new ways to leverage Ringpop, let us know! 14 | 15 | ## Why use Ringpop? 16 | Ringpop is first and foremost an application developer's library. It is not an external system nor a shared infrastructure resource used by many applications. It allows your application to remain autonomous and not beholden to a dependency for its ability to scale and remain available. Ringpop promotes scalability and fault tolerance as an application layer concern while keeping complexity and operational overhead to a minimum. Application developers have to be aware how their data is distributed, what makes that data available and how their application degrades in the face of failure. When using Ringpop you are sacrificing consistency for higher availability and one must take into consideration how even higher degrees of availability are achieved through techniques like replication and application-side conflict resolution. We've found that taking ownership of your application to such a degree is not only empowering, but a very sustainable and scalable practice. 17 | 18 | Clients of your application can remain completely unaware that Ringpop is being used. They neither have to understand the underlying partitioning scheme nor who the correct recipient is for a request. There is no special technology that need exist between client and server. You may use load balancers, proxies, overlay networks, etc without fear of Ringpop incompatibilities. 19 | 20 | Ringpop offers a rich administration API to inspect and control your cooperative application and easy to use tooling to help you understand the behavior of Ringpop and your application. 21 | 22 | Lastly, Ringpop's sharding capabilities are just one application of what we see as a collection of composable distributed systems programming building blocks. We typically want our applications to be more cooperative when something needs to be made more efficient at a large scale or a resource in your system has to have a particular home or rendezvous point. We discover new ways to use Ringpop all the time and you'll likely run into a few interesting ways too. 23 | 24 | ## Installation 25 | `npm install ringpop` 26 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Ringpop 2 | ======= 3 | 4 | Ringpop is a library that maintains a consistent hash ring atop a membership protocol. It can be used by applications to arbitrarily shard data in a scalable and fault-tolerant manner. 5 | 6 | To find out more, head to the Getting Started section below. 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | getting_started 12 | running_ringpop 13 | programming_ringpop 14 | architecture_design 15 | partitions 16 | make-partition 17 | references 18 | community 19 | -------------------------------------------------------------------------------- /docs/make-partition.md: -------------------------------------------------------------------------------- 1 | # Forming a partition 2 | 3 | With the current implementation of tick-cluster, it is non-trivial to form a 4 | partition. To understand why, we need to understand how connections are 5 | established. 6 | 7 | ## Port Allocation 8 | 9 | A ringpop instance opens a local tchannel socket (=listening tcp socket) to 10 | accept incoming connections from other ringpops. By default, on a 2-node 11 | tick-cluster, this is `127.0.0.1:3000`. Let's call it instance `a`. For 12 | instance `a` to establish a connection to instance `b` (`127.0.0.1:3001`), 13 | instance `a` will open an ephemeral port, e.g. `43323`, to connect to instance 14 | `b`. This connection, from `127.0.0.1:43323` (`a`) to `127.0.0.1:3001` (`b`) is 15 | used for messages initiated by node `a`. The other connection (example below), 16 | from `127.0.0.1:36113` (`b`) to `127.0.0.1:3000` (`a`), is used for messages 17 | initiated by `b`. Here is a snapshot of `lsof` from a two-node cluster: 18 | 19 | ``` 20 | root:/# lsof -Pnni 21 | COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME 22 | node 64 root 10u IPv4 217924 0t0 TCP 127.0.0.1:3000 (LISTEN) 23 | node 64 root 11u IPv4 217925 0t0 TCP 127.0.0.1:43323->127.0.0.1:3001 (ESTABLISHED) 24 | node 64 root 12u IPv4 217926 0t0 TCP 127.0.0.1:3000->127.0.0.1:36113 (ESTABLISHED) 25 | node 66 root 10u IPv4 219916 0t0 TCP 127.0.0.1:3001 (LISTEN) 26 | node 66 root 11u IPv4 219917 0t0 TCP 127.0.0.1:36113->127.0.0.1:3000 (ESTABLISHED) 27 | node 66 root 12u IPv4 219918 0t0 TCP 127.0.0.1:3001->127.0.0.1:43323 (ESTABLISHED) 28 | root:/# 29 | ``` 30 | 31 | Armed with this knowledge, we can try to make a partition. 32 | 33 | ## Manually forming a partition 34 | 35 | The naïve approach to make a partition between `a` and `b` is to block incoming 36 | connections from and to port `3000`: then no packet will leave `a`, and we will 37 | have a partition. However, this misses the fact that ephemeral connections are 38 | used for relaying traffic between nodes, and, in this case, connection from 39 | `127.0.0.1:43323` (`a`) to `127.0.0.1:3001` is established and... misses the 40 | firewall! We could block port `3001` too, but, with more nodes, that 41 | would create a cluster with N partitions (N being the number of nodes) -- not 42 | what we want. In our example, we want two partitions. 43 | 44 | With that in mind, a bit more sophistication in firewall rules is required. To 45 | easily create a partition in `tick-cluster` locally, we created 46 | `tools/make_partitions`, which, by reading the state of the connections from 47 | `lsof`, will emit `iptables`/`pf` commands accordingly. 48 | 49 | In the example above, firewall rules to create a partition will look as follows 50 | (OS X): 51 | 52 | ``` 53 | $ sudo lsof -Pnni | ./tools/make_partition 3000 3001 --platform darwin 54 | block drop in proto tcp from 127.0.0.1 port 3000 flags S/S 55 | block drop in proto tcp from 127.0.0.1 port 3001 flags S/S 56 | block drop in proto tcp from 127.0.0.1 port 43323 to 127.0.0.1 port 3001 57 | block drop in proto tcp from 127.0.0.1 port 3001 to 127.0.0.1 port 43323 58 | block drop in proto tcp from 127.0.0.1 port 36113 to 127.0.0.1 port 3000 59 | block drop in proto tcp from 127.0.0.1 port 3000 to 127.0.0.1 port 36113 60 | ``` 61 | 62 | Linux: 63 | 64 | ``` 65 | $ sudo lsof -Pnni | ./tools/make_partition 3000 3001 --platform linux 66 | *filter 67 | -A INPUT -p tcp -s 127.0.0.1 -d 127.0.0.1 --tcp-flags RST RST -j ACCEPT 68 | -A INPUT -p tcp --syn -m state --state NEW -d 127.0.0.1 --dport 3000 -j REJECT --reject-with tcp-reset 69 | -A INPUT -p tcp --syn -m state --state NEW -d 127.0.0.1 --dport 3001 -j REJECT --reject-with tcp-reset 70 | -A INPUT -p tcp -s 127.0.0.1 --sport 43323 -d 127.0.0.1 --dport 3001 -j REJECT --reject-with tcp-reset 71 | -A INPUT -p tcp -s 127.0.0.1 --sport 3001 -d 127.0.0.1 --dport 43323 -j REJECT --reject-with tcp-reset 72 | -A INPUT -p tcp -s 127.0.0.1 --sport 36113 -d 127.0.0.1 --dport 3000 -j REJECT --reject-with tcp-reset 73 | -A INPUT -p tcp -s 127.0.0.1 --sport 3000 -d 127.0.0.1 --dport 36113 -j REJECT --reject-with tcp-reset 74 | COMMIT 75 | ``` 76 | 77 | To sum up: 78 | 79 | * New connections to the listening ports (`3000`, `3001`) will be blocked. This 80 | prevents tchannel to re-open new valid connections. 81 | * Relevant existing connections will be terminated (e.g. `3000` to ephemeral 82 | ports). 83 | * Linux only: for the above to work, the firewall needs to explicitly accept 84 | `RST` packets. 85 | 86 | During the partition, new connections to the nodes will be impossible to make. 87 | This is important to keep in mind when using `ringpop-admin`: **invoke 88 | ringpop-admin before forming the partition**. 89 | 90 | Armed with background how this works, we can go and make a local partition: 91 | 92 | ## Start the tick-cluster 93 | 94 | In this example, we use Node version of ringpop, but we can use `testpop` from 95 | go too: 96 | 97 | ```shell 98 | $ ./scripts/tick-cluster.js -n 4 ./main.js # node 99 | ``` 100 | 101 | ## Open the ringpop-admin to observe the cluster state 102 | 103 | We shall open `ringpop-admin partitions` and `ringpop-admin top` before making the 104 | partition. This way, the "management" connections will be open and status will 105 | be visible during the partition: 106 | 107 | ```shell 108 | $ ringpop-admin top 127.0.0.1:3000 109 | $ ringpop-admin partitions -w 1 127.0.0.1:3000 # other terminal 110 | ``` 111 | 112 | `ringpop-admin top` will show something like this: 113 | 114 | ``` 115 | Address P1 116 | 127.0.0.1:3000 alive 117 | 127.0.0.1:3001 alive 118 | 127.0.0.1:3002 alive 119 | 127.0.0.1:3003 alive 120 | 1 of 4 121 | ``` 122 | 123 | `ringpop-admin partitions` will show a single partition, updated every second: 124 | 125 | ``` 126 | 10:27:09.615 Checksum # Nodes # Alive # Suspect # Faulty Sample Host 127 | 192859590 4 4 0 0 127.0.0.1:3000 128 | 10:27:10.607 Checksum # Nodes # Alive # Suspect # Faulty Sample Host 129 | 192859590 4 4 0 0 127.0.0.1:3000 130 | ``` 131 | 132 | ## Start the partition 133 | 134 | First, check how the firewall rules would look like before applying them to the 135 | firewall (optionally, you can pass `--platform=darwin` or `--platform=linux` to 136 | the `make_partition` script: 137 | 138 | ```shell 139 | $ sudo lsof -Pnni | ./tools/make_partition 3000,3001 3002,3003 140 | ``` 141 | 142 | If you are happy with the output, apply the rules: 143 | 144 | OS X: 145 | 146 | ```shell 147 | $ sudo -v && sudo lsof -Pnni | ./tools/make_partition 3000,3001 3002,3003 | sudo pfctl -emf - 148 | ``` 149 | 150 | Linux: 151 | 152 | ``` 153 | $ sudo -v && sudo lsof -Pnni | ./tools/make_partition 3000,3001 3002,3003 | sudo iptables-restore 154 | ``` 155 | 156 | In a few seconds, you should see output from `tick-cluster` that some of the 157 | nodes aren't able to ping each other. Let's verify we actuall have a partition. 158 | 159 | ## Checking in the tools 160 | 161 | On partition, `ringpop-admin top` (opened before the partition) should display 162 | something like this: 163 | 164 | ``` 165 | Address P1 P2 166 | 127.0.0.1:3000 faulty alive 167 | 127.0.0.1:3001 faulty alive 168 | 127.0.0.1:3002 alive faulty 169 | 127.0.0.1:3003 alive faulty 170 | ``` 171 | 172 | `ringpop-admin partitions` (opened before forming a partition) shows a more 173 | high-level view: 174 | 175 | ``` 176 | 10:37:04.878 Checksum # Nodes # Alive # Suspect # Faulty Sample Host 177 | 400620880 2 2 0 2 127.0.0.1:3002 178 | 3283514511 2 2 0 2 127.0.0.1:3000 179 | ``` 180 | 181 | That's it, we have a partition! To break it, we need to wipe the firewall rules: 182 | 183 | * OS X: `pfctl -f /etc/pf.conf`. 184 | * Linux: `iptables -F`. 185 | 186 | ... and wait for partition healing to kick in, or execute the `ringpop-admin 187 | heal` [command][1]. 188 | 189 | ## Final remarks 190 | 191 | * `tools/make_partition` can only create two partitions. It can work with 192 | arbitrary partition sizes; for usage, run `tools/make_partition --help`. 193 | * `tools/make_partition` is not intended to be used in an automated way. See 194 | `--help` to learn about the limitations. 195 | 196 | 197 | [1]: https://github.com/uber/ringpop-admin 198 | -------------------------------------------------------------------------------- /docs/partitions.md: -------------------------------------------------------------------------------- 1 | # Partition Healing 2 | 3 | In the original implementation of ringpop, if a cluster is split to multiple partitions due to failing network connectivity between them, nodes in each partition declare each other as faulty, and afterward will no longer communicate. Ringpop implemented support for merging the partitions, which we call `healing`. If the network partition is healed, the ringpop cluster is still partitioned into smaller ringpop clusters that don't ping each other. We introduce Partition Healing to merge these smaller clusters back into one healthy ringpop cluster. 4 | 5 | ## Basic algorithm 6 | 7 | In order for two partitions to heal, the algorithm does the following, periodically (this is the TLDR version of it; for the full algorithm, see below): 8 | 9 | 1. Randomly select a `faulty` node. 10 | 2. Send it a `/join` request, get its membership list. 11 | 3. If the local and retrieved lists are incompatible (merging them will introduce new faulties), mark all incompatible nodes suspect. When receiving this change the respective node will reassert that it is actually alive and update its incarnation number making it compatible for merge. 12 | 4. If the local and retrieved lists are compatible (merging them will not introduce new faulties), merge the membership list with the local node's membership and disseminate the changes. 13 | 14 | We test this feature in 3 ways: 15 | 16 | 1. Unit tests asserting the correct behavior. 17 | 2. Integration tests, which will be the same for Go and Node implementations, checking the behavior of a node in isolation. 18 | 3. Manual acceptance test to see partitions actually getting healed. 19 | 20 | ## Detailed algorithm 21 | 22 | This chapter describes a strategy to heal a partitioned ringpop cluster. The first section describes the mechanism a node triggers when it attempts to heal a partition, the second describes how, how often and when a node should trigger this mechanism. 23 | 24 | Glossary: 25 | 26 | * _Ringpop Partition_: a ringpop cluster split into two or more parts which can't communicate to each other, and believe the other parts are faulty. 27 | * _Network Partition_: an event in network infrastructure which denies two (or more) parts of ringpop cluster to communicate, causing a _Ringpop Partition_. 28 | * _Discovery Provider_: a mechanism to retrieve a list of members that should be in the ringpop cluster. Can be local (e.g. list of hosts in a file), or remote (e.g. remote member discovery service). 29 | * _Partition Healing_: mechanism in ringpop to resolve _Ringpop Partitions_. 30 | 31 | ### When and How Often 32 | 33 | Executing the algorithm from the previous section on every tick doesn't scale well. The discovery provider will be overloaded and the amount of requests grows linearly with the cluster size. To deal with this issue, we introduce a separate timer that periodically ticks on a configurable duration `T`. Every tick, there is a probability `P` of executing the partition healing algorithm. 34 | 35 | Let `N` be the number of hosts the previous query to the discovery provider has given us. If we, for example set `P = 3/N` and `T = 30 s`, we get on average 6 heal attempts per minute in the entire cluster, with a 95% probability that there is at least one heal attempt in 30 seconds. This means that we only query the discovery provider six times per minute regardless the size of the cluster. 36 | 37 | ### Partition Healing Parameters 38 | 39 | The variables below are from within ringpop, and may be exposed to application 40 | developers in the future. 41 | 42 | * `T` -- partition healer execution interval, seconds. 43 | * `P` -- probability of executing the partition healing algorithm. 44 | 45 | ### Algorithm Flow 46 | 47 | When a ringpop cluster is partitioned, some nodes are viewed alive by some nodes, and faulty by others. It's worth noting that the node, in both cases, has the same incarnation number. The goal of the heal algorithm is to make this node alive for all nodes. Since the faulty state has precedence over the alive state, we need to bump the incarnation number of the node for it to be accepted as alive by others; this has to be done on both sides. 48 | 49 | #### Part 1 -- making memberships compatible 50 | 51 | Goal of the first part is to make sure the membership lists of both partitions are _compatible_. That is, if they are merged according to the SWIM rules, no new faulty nodes should be created. To reach our goal we need to bump the incarnation numbers of nodes on both sides of the partition (because a state with a higher incarnation number always has precedence regardless of the status). Here's how we do it: 52 | 53 | 1. `c` (coordinator) downloads `t`'s (target's, in the other partition) membership list by doing a `/join` call. 54 | 2. `c` pings node `t` saying: the nodes that are faulty according to `c`, but alive according to `t`, are suspects. Then `t` will disseminate suspect messages to its own partition, and all nodes in `t`'s partition will reincarnate. 55 | 3. mark all `c`'s nodes, which are faulty according to `t`, suspect, and disseminate that information in `c`'s cluster. That will trigger the `c`'s partition to reincarnate. 56 | 57 | After reincarnations are complete, the membership lists are compatible. Now we need to merge them. 58 | 59 | #### Part 2 - merging memberships 60 | 61 | When membership lists from both partitions are _compatible_, they can be merged, and no new faulty nodes will be induced. How? 62 | 63 | 1. `c` applies the membership list of `t` locally. 64 | 2. `c` disseminates the changes to its own partition (according to `c`'s partition, now `t`s partition is reachable). 65 | 3. `c` sends its membership to `t` over a ping, thus making `c`'s partition alive according to `t`. 66 | 67 | #### Conclusion 68 | 69 | Steps above describe how to heal two partitions in ringpop without inducing new faulty nodes and overloading the discovery provider. 70 | -------------------------------------------------------------------------------- /docs/references.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | Learn more about key concepts related to Ringpop. 4 | 5 | ## FAQ 6 | 7 | ## Glossary 8 | 9 | ### A 10 | - **Actor model**: Concurrent computation model used by Ringpop that allows messages to arrive concurrently, then processed one by one. Messages are placed in a mailbox based on the sharding key, and then processed one by one. Each request that’s processed one by one may result in some other request to another service, or a request for more actors to be spun up. 11 | Alive: A membership status signifying the node is healthy, and not suspect, faulty, or damped. 12 | 13 | ### B 14 | 15 | - **Bad actor**: A slow node that’s overwhelmed by traffic. 16 | 17 | ### C 18 | 19 | ### D 20 | 21 | - **Damped**: Flap damping is a technique used to identify and evict bad nodes from a cluster. Flaps are detected by storing membership update history and penalize nodes when flap is detected. When the penalty exceeds a specified suppress limit, the node is damped. The damped status is disseminated throughout the cluster and removed from the ring. 22 | 23 | ### E 24 | 25 | ### F 26 | 27 | - **Flap damping**: Flap damping is a technique used to identify and evict bad nodes from a cluster. 28 | - **FarmHash**: Hashing function used by Ringpop. 29 | - **Faulty**: A state of the node that is reached after a defined “suspect” period, where a node is unstable or not responding to pings from other nodes. A suspect period will begin, and if it ends with the node not recovering, the node is considered faulty and is removed from the ring. 30 | 31 | ### G 32 | 33 | - **Gossip**: A type of protocol where nodes disseminate information about each other using pings. 34 | 35 | ### H 36 | 37 | - **Handle or forward**: This is Ringpop’s forwarding approach. If a key hashes to an instance that is not the one that received the request, then that request is simply forwarded to the proper instance and everything is taken care of under the hood. This acts like a middleware layer for applications that before the request even gets to your business logic, it is already routed to the appropriate node. 38 | - **Hash ring**: Ringpop leverages consistent hashing to minimize the number of keys to rebalance when your application cluster is resized. Ringpop’s consistent hashing allows the nodes to rebalance themselves and evenly distribute traffic. Ringpop maintains a consistent hash ring of its members. Once members are discovered to join or leave the cluster, that information is added into the consistent hash ring. Then the instances’ addresses along that ring are hashed. 39 | 40 | ### I 41 | 42 | ### J 43 | 44 | ### K 45 | 46 | ### L 47 | 48 | ### M 49 | 50 | - **Membership list**: Ringpop uses a variation of SWIM to disseminate membership updates across the members of a membership list, which contains additional metadata like the incarnation number, instances’ addresses, and status (alive, suspect, faulty, etc.). Members ping each other in random fashion until they get through the full membership list, rotate the list, then repeat the full round of pinging. 51 | - **Multi-cast**: 52 | 53 | ### N 54 | 55 | ### O 56 | 57 | ### P 58 | 59 | - **Ping**: Ringpop uses pings to disseminate information and for fault detection. Members ping each other in random fashion until they get through the full membership list, rotate the list, then repeat the full round of pinging. 60 | 61 | ### Q 62 | 63 | ### R 64 | 65 | - **Replica points**: Ringpop adds a uniform number of replica points per node to spread the nodes around the ring for a more even distribution. Ringpop also adds a uniform number of replica points so the nodes and the hosts running these nodes are treated as homogeneous. 66 | - **Ringpop**: Ringpop is a library that brings application-layer sharding to your services, partitioning data in a way that’s reliable, scalable and fault tolerant. 67 | - **Ringpop forwarding**: 68 | 69 | ### S 70 | 71 | - **SERF**: Gossip-based membership that exchanges messages to quickly and efficiently communicate with nodes. 72 | - **Sharding**: A way of partitioning data, which Ringpop does at the application layer of your services in a way that’s reliable, scalable and fault tolerant. 73 | - **Suspect**: A state of the node where it is unstable or not responding to pings from other nodes. If nodes stay suspect during the pre-defined suspect period without recovering, it will then be considered faulty and removed from the ring. 74 | - **SWIM**: Scalable Weakly-consistent Infection-style Process Group Membership Protocol 75 | 76 | ### T 77 | 78 | - **TChannel**: TChannel is a network multiplexing and framing protocol for RPC. TChannel is the transport of choice for Ringpop’s proxying channel. 79 | 80 | ### V 81 | 82 | ### W 83 | 84 | ### X 85 | 86 | ### Y 87 | 88 | ### Z 89 | 90 | ## Use Cases 91 | 92 | ## Papers 93 | 94 | - [BGP Route Flap Damping](http://www2.ensc.sfu.ca/~ljilja/cnl/pdf/steve_thesis.pdf) 95 | - [Dynamo: Amazon’s Highly Available Key-value Store](http://www.allthingsdistributed.com/files/amazon-dynamo-sosp2007.pdf) 96 | - [Efficient Reconciliation and Flow Control for Anti-Entropy Protocols](http://www.cs.cornell.edu/home/rvr/papers/flowgossip.pdf) 97 | - [Epidemic Broadcast Trees](http://www.gsd.inesc-id.pt/~jleitao/pdf/srds07-leitao.pdf) 98 | - [FarmHash](https://code.google.com/p/farmhash/) 99 | - [Riak](http://basho.com/riak/) 100 | - [SWIM Presentation Slides by Armon Dadgar from Hashicorp](https://speakerd.s3.amazonaws.com/presentations/5d140b302fbf01327e4e42c106afd3ef/2014-SWIM.pdf) 101 | - [SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf) 102 | - [TChannel](https://github.com/uber/tchannel) 103 | - [The ϕ Accrual Failure Detector](http://ddg.jaist.ac.jp/pub/HDY+04.pdf) 104 | - [Time, Clocks, and the Ordering of Events in a Distributed System](http://web.stanford.edu/class/cs240/readings/lamport.pdf) 105 | 106 | 107 | ## Presentations 108 | -------------------------------------------------------------------------------- /docs/running_ringpop.md: -------------------------------------------------------------------------------- 1 | # Running Ringpop 2 | Before we get to programming against Ringpop, let's just run it by itself and see what happens. There are several ways to accomplish this and they are documented below. There's nothing too fancy going on when Ringpop runs by itself. To reap the full potential of it, you'll need to embed it into your application and start divvying incoming requests based on the sharding key of your choice. No matter, we're here, in this section, to get a look at what happens in Ringpop at steady-state, how its membership protocol behaves and how to launch a standalone version of it from the command-line. Let's get to it! 3 | 4 | ## Running with tick-cluster 5 | `tick-cluster` is a utility located in the `scripts/` directory of the Ringpop repo that allows you to quickly spin up a Ringpop cluster of arbitrary size and test basic failure modes: suspending, killing and respawning nodes. 6 | 7 | To use `tick-cluster`, first clone the repo and install Ringpop's dependencies: 8 | 9 | ``` 10 | $ git clone git@github.com:uber/ringpop.git 11 | $ npm install 12 | ``` 13 | 14 | Then run `tick-cluster`: 15 | 16 | ``` 17 | $ ./scripts/tick-cluster.js [-n size-of-cluster] [-i interpreter-that-runs-program] 18 | ``` 19 | 20 | `tick-cluster` will spawn a child process for each node in the cluster. They will bootstrap themselves using an auto-generated `hosts.json` bootstrap file and converge on a single membership list within seconds. Commands can be issued against the cluster while `tick-cluster` runs. Press `h` or `?` to see which commands are available. 21 | 22 | Whenever it is specified, the program is run by an interpreter, otherwise the program should be a binary. The cluster size defaults to 5. 23 | 24 | Here's a sample of the output you may see after launching a 7-node cluster with `tick-cluster`: 25 | 26 | ``` 27 | $ ./scripts/tick-cluster.js -n 7 -i node ./main.js 28 | [init] 11:11:52.805 tick-cluster started d: debug flags, g: gossip, j: join, k: kill, K: revive all, l: sleep, p: protocol stats, q: quit, s: cluster stats, t: tick 29 | [cluster] 11:11:52.807 using 10.80.135.224 to listen 30 | [init] 11:11:52.818 started 7 procs: 76365, 76366, 76367, 76368, 76369, 76370, 76371 31 | ``` 32 | 33 | ## Running from the command-line 34 | Content coming soon... 35 | 36 | ## Administration 37 | Content coming soon... 38 | 39 | ## Configuration 40 | Content coming soon... 41 | 42 | ## Deploying 43 | Content coming soon... 44 | 45 | ## Monitoring 46 | Ringpop emits stats by making use of the dependency it has on a Statsd- 47 | compatible client. It emits all stats with a prefix that includes its 48 | identity in the stat path, e.g. `ringpop.10_30_8_26_20600.*`; the dots 49 | and colon are replaced by underscores. The table below lists all stats 50 | that Ringpop emits: 51 | 52 | |Node.js Path|Description|Type 53 | |----|----|---- 54 | |changes.apply|Number of changes applied per membership update|gauge 55 | |changes.disseminate|Number of changes disseminated per request/response|gauge 56 | |checksum|Value of membership checksum|gauge 57 | |compute-checksum|Time required to compute membership checksum|timer 58 | |damp-req.recv|Damp-req request received|count 59 | |damp-req.send|Damp-req request sent|count 60 | |damper.damp-req.damped|Damp-req resulted in members being damped|count 61 | |damper.damp-req.error|Damp-req resulted in an error|count 62 | |damper.damp-req.inconclusive|Damp-req results were inconclusive|count 63 | |damper.flapper.added|Flap damping detected a flappy node|count 64 | |damper.flapper.removed|Flap damping removed a flappy node|count 65 | |damper.flappers|Number of current flappers|gauge 66 | |dissemination.bump-bypass|Number of times piggyback count is preserved after failed ping or ping-req|count 67 | |filtered-change|A change to be disseminated was deduped|count 68 | |full-sync|Number of full syncs transmitted|count 69 | |heal.triggered|Number of times the partition healing is initiated. Note: this stat will be emitted even if there is no faulty or unknown target found; the actual number of heal attempts can be measured using `heal.attempt`.|count 70 | |heal.attempt|Number of times a heal opeartion is performed to a target-node|count 71 | |join|Time required to complete join process successfully|timer 72 | |join.complete|Join process completed successfully|count 73 | |join.failed.destroyed|Join process failed because Ringpop had been destroyed|count 74 | |join.failed.err|Join process failed because of an error|count 75 | |join.recv|Join request received|count 76 | |join.retries|Number of retries required by join process|gauge 77 | |join.succeeded|Join process succeeded|count 78 | |lookup|Time required to perform a ring lookup|timer 79 | |make-alive|A member was declared alive|count 80 | |make-damped|A member was declared damped|count 81 | |make-faulty|A member was declared faulty|count 82 | |make-leave|A member was declared leave|count 83 | |make-suspect|A member was declared suspect|count 84 | |max-piggyback|Value of the max piggyback factor|gauge 85 | |membership-set.alive|A member was initialized in the alive state|count 86 | |membership-set.faulty|A member was initialized in the faulty state|count 87 | |membership-set.leave|A member was initialized in the leave state|count 88 | |membership-set.suspect|A member was initialized in the suspect state|count 89 | |membership-set.unknown|A member was initialized in an unknown state|count 90 | |membership-update.alive|A member was updated to be alive|count 91 | |membership-update.faulty|A member was updated to be faulty|count 92 | |membership-update.leave|A member was updated in the leave state|count 93 | |membership-update.suspect|A member was updated to be suspect|count 94 | |membership-update.unknown|A member was updated in the unknown state|count 95 | |membership.checksum-computed|Membership checksum was computed|count 96 | |not-ready.ping|Ping received before Ringpop was ready|count 97 | |not-ready.ping-req|Ping-req received before Ringpop was ready|count 98 | |num-members|Number of members in the membership|gauge 99 | |ping|Ping response time|timer 100 | |ping-req|Ping-req response time|timer 101 | |ping-req-ping|Indirect ping sent|timer 102 | |ping-req.other-members|Number of members selected for ping-req fanout|timer 103 | |ping-req.recv|Ping-req request received|count 104 | |ping-req.send|Ping-req request sent|count 105 | |ping.recv|Ping request received|count 106 | |ping.send|Ping request sent|count 107 | |protocol.damp-req|Damp-req response time|timer 108 | |protocol.delay|How often gossip protocol is expected to tick|timer 109 | |protocol.frequency|How often gossip protocol actually ticks|timer 110 | |refuted-update|A member refuted an update for itself|count 111 | |requestProxy.checksumsDiffer|Checksums differed when a forwarded request was received|count 112 | |requestProxy.egress|Request was forwarded|count 113 | |requestProxy.inflight|Number of inflight forwarded requests|gauge 114 | |requestProxy.ingress|Forward request was received|count 115 | |requestProxy.miscount.decrement|Number of inflight requests were miscounted after decrement|count 116 | |requestProxy.miscount.increment|Number of inflight requests were miscounted after increment|count 117 | |requestProxy.refused.eventloop|Request was refused due to event loop lag|count 118 | |requestProxy.refused.inflight|Request was refused due to number of inflight requests|count 119 | |requestProxy.retry.aborted|Forwarded request retry was aborted|count 120 | |requestProxy.retry.attempted|Forwarded request retry was attempted|count 121 | |requestProxy.retry.failed|Forwarded request failed after retries|count 122 | |requestProxy.retry.reroute.local|Forwarded request retry was rerouted to local node|count 123 | |requestProxy.retry.reroute.remote|Forwarded request retry was rerouted to remote node|count 124 | |requestProxy.retry.succeeded|Forwarded request succeeded after retries|count 125 | |requestProxy.send.error|Forwarded request failed|count 126 | |requestProxy.send.success|Forwarded request was successful|count 127 | |ring.change|Hash ring keyspace changed|gauge 128 | |ring.checksum-computed|Hash ring checksum was computed|count 129 | |ring.server-added|Node (and its points) added to hash ring|count 130 | |ring.server-removed|Node (and its points) removed from hash ring|count 131 | |updates|Number of membership updates applied|timer 132 | 133 | ## Benchmarks 134 | Content coming soon... 135 | 136 | ## Troubleshooting 137 | Content coming soon... 138 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ringpop-common", 3 | "description": "The home for all things Ringpop", 4 | "contributors": [ 5 | "Jeff Wolski " 6 | ], 7 | "version": "0.0.1", 8 | "repository": "git://github.com/uber/ringpop-common.git", 9 | "dependencies": { 10 | "cli-color": "^0.3.2", 11 | "commander": "^2.6.0", 12 | "farmhash": "^1.1.0", 13 | "grafana-dash-gen": "uber/grafana-dash-gen#41a17abfa174fdda048f9a1084bc302dec28e6ff", 14 | "lodash": "^4.6.1", 15 | "strformat": "^0.0.7", 16 | "uber-licence": "^2.0.1", 17 | "tchannel": "^3.6.24", 18 | "zero-config": "^5.0.0" 19 | }, 20 | "pre-commit": [ 21 | "check-licence" 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /schema/admin-lookup.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/AdminLookupResponse", 3 | "title": "/admin/lookup response", 4 | "type": "object", 5 | "properties": { 6 | "dest": { 7 | "type": "string" 8 | } 9 | }, 10 | "required": [ 11 | "dest" 12 | ], 13 | "additionalProperties": false 14 | } 15 | -------------------------------------------------------------------------------- /schema/admin-stats-response.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/StatsResponse", 3 | "title": "Admin Stats Response", 4 | "type": "object", 5 | "properties": { 6 | "hooks": { 7 | "type": "null" 8 | }, 9 | "membership": { 10 | "type": "object", 11 | "properties": { 12 | "checksum": { 13 | "type": "number" 14 | }, 15 | "members": { 16 | "type": "array", 17 | "items": { 18 | "type": "object", 19 | "properties": { 20 | "address": { 21 | "type": "string" 22 | }, 23 | "status": { 24 | "$ref": "/Status" 25 | }, 26 | "incarnationNumber": { 27 | "type": "number" 28 | }, 29 | "dampScore": { 30 | "type": "number" 31 | }, 32 | "labels": { 33 | "$ref": "/Labels" 34 | } 35 | }, 36 | "required": [ 37 | "address", 38 | "status", 39 | "incarnationNumber" 40 | ], 41 | "additionalProperties": false 42 | } 43 | } 44 | }, 45 | "required": [ 46 | "checksum", 47 | "members" 48 | ], 49 | "additionalProperties": false 50 | }, 51 | "process": { 52 | "type": "object", 53 | "properties": { 54 | "memory": { 55 | "type": "object", 56 | "properties": { 57 | "rss": { 58 | "type": "number" 59 | }, 60 | "heapTotal": { 61 | "type": "number" 62 | }, 63 | "heapUsed": { 64 | "type": "number" 65 | } 66 | }, 67 | "required": [ 68 | "rss", 69 | "heapTotal", 70 | "heapUsed" 71 | ], 72 | "additionalProperties": false 73 | }, 74 | "pid": { 75 | "type": "number" 76 | } 77 | }, 78 | "required": [ 79 | "memory", 80 | "pid" 81 | ], 82 | "additionalProperties": false 83 | }, 84 | "protocol": { 85 | "type": "object", 86 | "properties": { 87 | "timing": { 88 | "type": "object" 89 | }, 90 | "protocolRate": { 91 | "type": "number" 92 | }, 93 | "clientRate": { 94 | "type": "number" 95 | }, 96 | "serverRate": { 97 | "type": "number" 98 | }, 99 | "totalRate": { 100 | "type": "number" 101 | } 102 | }, 103 | "required": [ 104 | "timing", 105 | "protocolRate", 106 | "clientRate", 107 | "serverRate", 108 | "totalRate" 109 | ], 110 | "additionalProperties": false 111 | }, 112 | "ring": { 113 | "type": "object", 114 | "properties": { 115 | "checksum": { 116 | "type": "number" 117 | }, 118 | "checksums": { 119 | "type": "object", 120 | "patternProperties": { 121 | "^.*$": { 122 | "type": "number" 123 | } 124 | } 125 | }, 126 | "servers": { 127 | "type": "array", 128 | "items": { 129 | "type": "string" 130 | } 131 | } 132 | }, 133 | "required": [ 134 | "checksum", 135 | "servers" 136 | ], 137 | "additionalProperties": false 138 | }, 139 | "version": { 140 | "type": "string" 141 | }, 142 | "timestamp": { 143 | "type": "number" 144 | }, 145 | "uptime": { 146 | "type": "number" 147 | }, 148 | "tchannelVersion": { 149 | "type": "string" 150 | } 151 | }, 152 | "required": [ 153 | "hooks", 154 | "membership", 155 | "process", 156 | "protocol", 157 | "ring", 158 | "version", 159 | "timestamp", 160 | "uptime", 161 | "tchannelVersion" 162 | ], 163 | "additionalProperties": false 164 | } 165 | -------------------------------------------------------------------------------- /schema/change.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/Change", 3 | "title": "Change", 4 | "type": "object", 5 | "properties": { 6 | "address": { 7 | "type": "string" 8 | }, 9 | "status": { 10 | "$ref": "/Status" 11 | }, 12 | "incarnationNumber": { 13 | "type": "number" 14 | }, 15 | "source": { 16 | "type": "string" 17 | }, 18 | "id": { 19 | "type": "string" 20 | }, 21 | "sourceIncarnationNumber": { 22 | "type": "number" 23 | }, 24 | "timestamp": { 25 | "type": "number" 26 | }, 27 | "tombstone": { 28 | "type": "boolean" 29 | }, 30 | "labels": { 31 | "$ref": "/Labels" 32 | } 33 | }, 34 | "required": [ 35 | "address", 36 | "status", 37 | "incarnationNumber" 38 | ], 39 | "additionalProperties": false 40 | } 41 | -------------------------------------------------------------------------------- /schema/labels.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/Labels", 3 | "title": "Labels", 4 | "type": "object", 5 | "patternProperties": { 6 | "^.*$": { 7 | "type": "string" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /schema/protocol-join-request.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/JoinRequest", 3 | "title": "Join Request", 4 | "type": "object", 5 | "properties": { 6 | "app": { 7 | "type": "string" 8 | }, 9 | "source": { 10 | "type": "string" 11 | }, 12 | "incarnationNumber": { 13 | "type": "number" 14 | }, 15 | "timeout": { 16 | "type": "number" 17 | }, 18 | "labels": { 19 | "$ref": "/Labels" 20 | } 21 | }, 22 | "required": [ 23 | "app", 24 | "source", 25 | "incarnationNumber" 26 | ], 27 | "additionalProperties": false 28 | } 29 | -------------------------------------------------------------------------------- /schema/protocol-join-response.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/JoinResponse", 3 | "title": "Join Response", 4 | "type": "object", 5 | "properties": { 6 | "app": { 7 | "type": "string" 8 | }, 9 | "coordinator": { 10 | "type": "string" 11 | }, 12 | "membership": { 13 | "type": "array", 14 | "items": { 15 | "type": "object", 16 | "properties": { 17 | "source": { 18 | "type": "string" 19 | }, 20 | "address": { 21 | "type": "string" 22 | }, 23 | "status": { 24 | "$ref": "/Status" 25 | }, 26 | "incarnationNumber": { 27 | "type": "number" 28 | }, 29 | "labels": { 30 | "$ref": "/Labels" 31 | }, 32 | 33 | "timestamp": { 34 | "type": "number" 35 | }, 36 | "sourceIncarnationNumber": { 37 | "type": "number" 38 | }, 39 | "tombstone": { 40 | "type": "boolean" 41 | } 42 | }, 43 | "required": [ 44 | "source", 45 | "address", 46 | "status", 47 | "incarnationNumber" 48 | ], 49 | "additionalProperties": false 50 | } 51 | }, 52 | "membershipChecksum": { 53 | "type": "number" 54 | } 55 | }, 56 | "required": [ 57 | "app", 58 | "coordinator", 59 | "membership", 60 | "membershipChecksum" 61 | ], 62 | "additionalProperties": false 63 | } 64 | -------------------------------------------------------------------------------- /schema/protocol-ping-request.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/PingRequest", 3 | "title": "Ping Request", 4 | "type": "object", 5 | "properties": { 6 | "checksum": { 7 | "type": "number" 8 | }, 9 | "changes": { 10 | "type": "array", 11 | "items": { 12 | "$ref": "/Change" 13 | } 14 | }, 15 | "source": { 16 | "type": "string" 17 | }, 18 | "app": { 19 | "type": "string" 20 | }, 21 | "sourceIncarnationNumber": { 22 | "type": "number" 23 | } 24 | }, 25 | "required": [ 26 | "checksum", 27 | "changes", 28 | "source", 29 | "sourceIncarnationNumber" 30 | ], 31 | "additionalProperties": false 32 | } 33 | -------------------------------------------------------------------------------- /schema/protocol-ping-response.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/PingResponse", 3 | "title": "Ping Response", 4 | "type": "object", 5 | "properties": { 6 | "checksum": { 7 | "type": "number" 8 | }, 9 | "changes": { 10 | "type": "array", 11 | "items": { 12 | "$ref": "/Change" 13 | } 14 | }, 15 | "source": { 16 | "type": "string" 17 | }, 18 | "app": { 19 | "type": "string" 20 | }, 21 | "sourceIncarnationNumber": { 22 | "type": "number" 23 | }, 24 | "target": { 25 | "type": "string" 26 | } 27 | }, 28 | "required": [ 29 | "changes" 30 | ], 31 | "additionalProperties": false 32 | } 33 | -------------------------------------------------------------------------------- /schema/protocol-pingreq-request.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/PingReqRequest", 3 | "title": "PingReq Request", 4 | "type": "object", 5 | "properties": { 6 | "checksum": { 7 | "type": "number" 8 | }, 9 | "changes": { 10 | "type": "array", 11 | "items": { 12 | "$ref": "/Change" 13 | } 14 | }, 15 | "source": { 16 | "type": "string" 17 | }, 18 | "sourceIncarnationNumber": { 19 | "type": "number" 20 | }, 21 | "target": { 22 | "type": "string" 23 | } 24 | }, 25 | "required": [ 26 | "checksum", 27 | "changes", 28 | "source", 29 | "sourceIncarnationNumber", 30 | "target" 31 | ], 32 | "additionalProperties": false 33 | } 34 | -------------------------------------------------------------------------------- /schema/protocol-pingreq-response.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/PingReqResponse", 3 | "title": "PingReq Request", 4 | "type": "object", 5 | "properties": { 6 | "changes": { 7 | "type": "array", 8 | "items": { 9 | "$ref": "/Change" 10 | } 11 | }, 12 | "pingStatus": { 13 | "type": "boolean" 14 | }, 15 | "target": { 16 | "type": "string" 17 | } 18 | }, 19 | "required": [ 20 | "changes", 21 | "pingStatus", 22 | "target" 23 | ], 24 | "additionalProperties": false 25 | } 26 | -------------------------------------------------------------------------------- /schema/status.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "/Status", 3 | "title": "Status", 4 | "enum": [ 5 | "alive", 6 | "suspect", 7 | "faulty", 8 | "leave", 9 | "tombstone" 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # ringpop integration tests 2 | 3 | These tests can be run against any ringpop application that exposes the `/admin/*` endpoints and command-line parameters specified below. 4 | 5 | The ringpop-node and ringpop-go projects provide test executables that are compatible out-of-the-box. See the *Examples* section below for instructions on how to run the integration tests against those codebases. 6 | 7 | ## Usage 8 | 9 | it-tests.js performs an integration test on a ringpop program 10 | ``` 11 | Usage: it-tests [options] 12 | 13 | Options: 14 | -h, --help output usage information 15 | -V, --version output the version number 16 | -s, --sizes Cluster sizes to test against. Default: '[1,2,3,4,5,6,7,10,21,25,30]' 17 | -i, --interpreter Interpreter that runs program. 18 | ``` 19 | 20 | To run the tests, pass the path to a ringpop application executable: 21 | 22 | cd tests/ 23 | npm install 24 | node ./it-tests.js -i node ~/uber/projects/ringpop-node/main.js 25 | 26 | ## Command-line parameters 27 | 28 | The test scripts will invoke the ringpop executable with the following command-line parameters. The application must start up and accept these parameters, otherwise the tests will fail: 29 | 30 | * `--hosts=`: Should set the path to the JSON file containing a list of bootstrap hosts. The test scripts will create this file automatically. 31 | * `--listen=` Should set the interface address and port that the ringpop application should listen on. The test scripts will use this address to connect via TChannel and issue requests. 32 | 33 | ## Examples 34 | 35 | There are test executables for included in both the [ringpop-node](https://github.com/uber/ringpop-node) and [ringpop-go](https://github.com/uber/ringpop-go) projects. 36 | 37 | ### Running integration tests against ringpop-node 38 | 39 | Clone and install dependencies for tests in ringpop-common: 40 | 41 | git clone git@github.com:uber/ringpop-common.git 42 | (cd ringpop-common/test && npm install) 43 | 44 | Clone and install dependencies for ringpop-node: 45 | 46 | git clone git@github.com:uber/ringpop-node.git 47 | (cd ringpop-node && npm install) 48 | 49 | Run the tests: 50 | 51 | node ringpop-common/test/it-tests.js ringpop-node/main.js 52 | 53 | ### Running integration tests against ringpop-go: 54 | 55 | Clone and install dependencies for tests in ringpop-common: 56 | 57 | git clone git@github.com:uber/ringpop-common.git 58 | (cd ringpop-common/test && npm install) 59 | 60 | Install ringpop-go and build the `testpop` executable: 61 | 62 | go get -u github.com/uber/ringpop-go 63 | (cd $GOPATH/src/github.com/uber/ringpop-go && make testpop) 64 | 65 | Run the tests: 66 | 67 | node ringpop-common/test/it-tests.js $GOPATH/src/github.com/uber/ringpop-go/testpop 68 | 69 | # Understanding the integration tests 70 | 71 | This is a written summary of a one-hour workshop of digging into the integration tests and asking questions from the people who implemented it. As of now, it might be out of date (we will try to keep it up to date though), but the text below can give a good starting point while trying to wrap your head around. 72 | 73 | ## Glossary 74 | 75 | *Slanted names* mean general concepts, names in `monospace` common variable names. 76 | 77 | * *SUT*: Subject Under Test. Also referred to as "the real node". 78 | * *testpop*: very basic application using ringpop. Commonly used by 79 | tick-cluster. It also acts as the SUT for the integration test. 80 | * `t`: test object. Instance of `tap`. 81 | * `tc`: test coordinator. The thing that runs and controls testpop, SUT and 82 | fake nodes. Handles coordination between the fake and real nodes. 83 | * `n` : size of the cluster. 84 | * `ns`: list of cluster sizes. Treat it like *plural n* -- *ns*. 85 | * `nodeIx`: node index. Any variable that ends with `Ix` is an index variable. 86 | * `cb`: callback. 87 | 88 | ## High-level overview 89 | 90 | Tests are usually composed of a *Real Node* (*SUT*) and a number of fake nodes. The real node is the actual ringpop instance (`testpop`), and the fake nodes are the test harness. 91 | 92 | The test harness (fake nodes) send messages to the real node, and listen on what SUTS sends back. The test harness then asserts whether whatever the SUT is sending back matches expectations. With this black-box structure, tests are verifying the node behaves as expected given certain messages. Therefore, this test harness is used to measure feature parity between different ringpop implementations. 93 | 94 | Here's what it looks like in the code: 95 | 96 | 1. SUT is initialized (responsible by the function `test2` in `test-utils.js`). 97 | 2. User callback is executed, which returns a list of closures. See documentation of `test-utils.test2()`. 98 | 3. Every closure is executed with a list of messages from the SUT. See documentation of `ringpop-assert.validate()`. 99 | 4. The closure either succeeds by calling a callback, or fails by calling a well-documented function in `t`. 100 | -------------------------------------------------------------------------------- /test/admin-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var events = require('./events'); 22 | var test2 = require('./test-util').test2; 23 | var prepareCluster = require('./test-util').prepareCluster; 24 | var dsl = require('./ringpop-assert'); 25 | var getClusterSizes = require('./it-tests').getClusterSizes; 26 | 27 | // TODO endpoints 28 | // /admin/debugClear (NOOP in go, toggle between ping logs in node2) 29 | // /admin/debugSet 30 | 31 | test2('endpoint: /admin/gossip/stop', getClusterSizes(), 5000, prepareCluster(function(t,tc,n) { 32 | return [ 33 | dsl.callEndpoint(t, tc, '/admin/gossip/stop'), 34 | dsl.validateEventBody(t, tc, { 35 | type: events.Types.AdminGossipStop, 36 | direction: 'response' 37 | }, "Wait for AdminGossipStop response", function (response) { 38 | return true; 39 | }), 40 | 41 | dsl.wait(10), // racecondition if a ping was inbound during stopping 42 | dsl.consumePings(t, tc), 43 | dsl.wait(1000), // normally you would expect pings to be transfered in this time 44 | 45 | dsl.expectOnlyPings(t, tc, 0) 46 | ]; 47 | })); 48 | 49 | test2('endpoint: /admin/gossip/start', getClusterSizes(), 5000, prepareCluster(function(t, tc, n) { 50 | return [ 51 | dsl.callEndpoint(t, tc, '/admin/gossip/stop'), 52 | dsl.validateEventBody(t, tc, { 53 | type: events.Types.AdminGossipStop, 54 | direction: 'response' 55 | }, "Wait for AdminGossipStop response", function (response) { 56 | return true; 57 | }), 58 | 59 | dsl.wait(10), // racecondition if a ping was inbound during stopping 60 | dsl.consumePings(t, tc), 61 | 62 | dsl.callEndpoint(t, tc, '/admin/gossip/start'), 63 | dsl.validateEventBody(t, tc, { 64 | type: events.Types.AdminGossipStart, 65 | direction: 'response' 66 | }, "Wait for AdminGossipStart response", function (response) { 67 | return true; 68 | }), 69 | 70 | dsl.waitForPing(t, tc) 71 | ]; 72 | })); 73 | 74 | test2('endpoint: /admin/gossip/tick', getClusterSizes(), 5000, prepareCluster(function(t, tc, n) { 75 | return [ 76 | // stop gossip before invoking tick to make sure the tick is from the invocation 77 | dsl.callEndpoint(t, tc, '/admin/gossip/stop'), 78 | dsl.validateEventBody(t, tc, { 79 | type: events.Types.AdminGossipStop, 80 | direction: 'response' 81 | }, "Wait for AdminGossipStop response", function (response) { 82 | return true; 83 | }), 84 | 85 | dsl.wait(10), // racecondition if a ping was inbound during stopping 86 | dsl.consumePings(t, tc), 87 | 88 | dsl.callEndpoint(t, tc, '/admin/gossip/tick'), 89 | dsl.validateEventBody(t, tc, { 90 | type: events.Types.AdminGossipTick, 91 | direction: 'response' 92 | }, "Wait for AdminGossipTick response", function (response) { 93 | console.log("responded!"); 94 | return true; 95 | }), 96 | 97 | dsl.wait(1000), // make sure it only send 1 ping 98 | 99 | dsl.expectOnlyPings(t, tc, 1) 100 | ]; 101 | })); 102 | 103 | var lookupKey = 'Hello World ' + Math.random(); 104 | test2('endpoint: /admin/lookup ('+lookupKey+')', getClusterSizes(), 5000, prepareCluster(function(t, tc, n) { 105 | return [ 106 | dsl.callEndpoint(t, tc, '/admin/lookup', { key: lookupKey}), 107 | dsl.validateEventBody(t, tc, { 108 | type: events.Types.AdminLookup, 109 | direction: 'response' 110 | }, "Wait for AdminLookup response", function (response) { 111 | var should = tc.lookup(lookupKey); 112 | t.equal(response.body.dest, should, "Testing chosen dest"); 113 | return true; 114 | }) 115 | ]; 116 | })); 117 | 118 | test2('endpoint: /admin/stats', getClusterSizes(), 5000, prepareCluster(function(t, tc, n) { 119 | return [ 120 | dsl.callEndpoint(t, tc, '/admin/stats'), 121 | dsl.validateEventBody(t, tc, { 122 | type: events.Types.Stats, 123 | direction: 'response' 124 | }, "Wait for Stats response", function (response) { 125 | // TODO do validation of specific values, the payload structure is already validated 126 | return true; 127 | }) 128 | ]; 129 | })); 130 | 131 | test2('endpoint: /admin/member/leave', getClusterSizes(), 10000, prepareCluster(function(t, tc, n) { 132 | return [ 133 | // this makes testing the piggy backed status easier 134 | dsl.drainDisseminator(t, tc), 135 | 136 | // instruct node to leave cluster 137 | dsl.callEndpoint(t, tc, '/admin/member/leave'), 138 | dsl.validateEventBody(t, tc, { 139 | type: events.Types.AdminMemberLeave, 140 | direction: 'response' 141 | }, "Waiting /admin/member/leave response", function (response) { 142 | return response.arg3.toString() === 'ok' || (response.body && response.body.status === 'ok'); 143 | }), 144 | 145 | // check status in ping 146 | dsl.sendPing(t, tc, 0), 147 | dsl.validateEventBody(t, tc, { 148 | type: events.Types.Ping, 149 | direction: 'response' 150 | }, "Test if ping contains leave message for SUT", function (ping) { 151 | return ping.body && 152 | ping.body.changes && 153 | ping.body.changes.length === 1 && 154 | ping.body.changes[0].address === tc.sutHostPort && 155 | ping.body.changes[0].status === 'leave'; 156 | }) 157 | ]; 158 | })); 159 | 160 | test2('endpoint: /admin/member/join', getClusterSizes(), 10000, prepareCluster(function(t, tc, n) { 161 | return [ 162 | // this makes testing the piggy backed status easier 163 | dsl.drainDisseminator(t, tc), // problem is that if decay is not working you might never get to this point 164 | 165 | // instruct node to leave cluster 166 | dsl.callEndpoint(t, tc, '/admin/member/leave'), 167 | dsl.validateEventBody(t, tc, { 168 | type: events.Types.AdminMemberLeave, 169 | direction: 'response' 170 | }, "Waiting /admin/member/leave response", function (response) { 171 | return response.arg3.toString() === 'ok' || (response.body && response.body.status === 'ok'); 172 | }), 173 | 174 | // check status in ping 175 | dsl.sendPing(t, tc, 0), 176 | dsl.validateEventBody(t, tc, { 177 | type: events.Types.Ping, 178 | direction: 'response' 179 | }, "Test if ping contains leave message for SUT", function (ping) { 180 | return ping.body && 181 | ping.body.changes && 182 | ping.body.changes.length === 1 && 183 | ping.body.changes[0].address === tc.sutHostPort && 184 | ping.body.changes[0].status === 'leave'; 185 | }), 186 | 187 | // rejoin 188 | dsl.callEndpoint(t, tc, '/admin/member/join'), 189 | dsl.validateEventBody(t, tc, { 190 | type: events.Types.AdminMemberJoin, 191 | direction: 'response' 192 | }, "Waiting /admin/member/join response", function (response) { 193 | return response.arg3.toString() === 'rejoined' || (response.body && response.body.status === 'rejoined'); 194 | }), 195 | 196 | // check status in ping 197 | dsl.sendPing(t, tc, 0), 198 | dsl.validateEventBody(t, tc, { 199 | type: events.Types.Ping, 200 | direction: 'response' 201 | }, "Test if ping contains alive message for SUT", function (ping) { 202 | return ping.body && 203 | ping.body.changes && 204 | ping.body.changes.length === 1 && 205 | ping.body.changes[0].address === tc.sutHostPort && 206 | ping.body.changes[0].status === 'alive'; 207 | }) 208 | ]; 209 | })); 210 | -------------------------------------------------------------------------------- /test/bidir-full-sync-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var events = require('./events'); 22 | var test2 = require('./test-util').test2; 23 | var dsl = require('./ringpop-assert'); 24 | var prepareCluster = require('./test-util').prepareCluster; 25 | var getClusterSizes = require('./it-tests').getClusterSizes; 26 | 27 | var async = require('async'); 28 | var _ = require('lodash'); 29 | 30 | test2('bidirectional full sync test', getClusterSizes(5), 20000, 31 | prepareCluster(function(t, tc, n) { 32 | return [ 33 | dsl.assertStats(t, tc, n+1, 0, 0), 34 | dsl.drainDisseminator(t, tc), 35 | 36 | // create faulties so that the membership of the faulty nodes is 37 | // out of sync with the membership of the SUT 38 | makeThreeFakeNodesFaulty(t, tc), 39 | 40 | // this causes the SUT to full sync since the chechsums do not match 41 | dsl.sendPing(t, tc, 0), 42 | dsl.waitForPingResponse(t, tc, 0), 43 | 44 | // the SUT sends a join request to perform a bidirectional full sync 45 | dsl.waitForJoins(t, tc, 1), 46 | dsl.assertStats(t, tc, n+1-3, 0, 3), 47 | ]; 48 | }) 49 | ); 50 | 51 | // As a safety check, both the ringpop-go and ringpop-node limit the number 52 | // (defaults to 5) of concurrent bidirectional full syncs. 53 | // 54 | // A bidirectional full sync is triggered by a ping to a node without outstanding 55 | // membership changes where the checksum in the ping doesn't match the node's checksum. 56 | // This results in the node sending it's full membership. At the same time, the node 57 | // will send a join to the source of the ping request to get it's full membership list. 58 | // 59 | // This test pauses responding to the join request to make sure there are multiple 60 | // joins happening at the same time. If the throttling is working, there should 61 | // never be more than 5 joins in the queue. 62 | test2('bidirectional full sync throttling test', getClusterSizes(3), 20000, 63 | prepareCluster(function(t, tc, n) { 64 | var numberOfFullSyncsToTrigger = Math.max(6, n*2); 65 | var expectedJoins = 5; 66 | return [ 67 | dsl.assertStats(t, tc, n+1, 0, 0), 68 | dsl.drainDisseminator(t, tc), 69 | 70 | assertReverseFullSyncThrottling(t, tc, 0, numberOfFullSyncsToTrigger, expectedJoins), 71 | 72 | // wait for the joins to be handled. 73 | dsl.waitForJoins(t, tc, expectedJoins), 74 | dsl.expectOnlyPings(t, tc), 75 | dsl.assertStats(t, tc, n+1, 0, 0) 76 | ]; 77 | }) 78 | ); 79 | 80 | 81 | function assertReverseFullSyncThrottling(t, tc, nodeIx, numberOfFullSyncsToTrigger, expectedJoins){ 82 | 83 | var cachedJoins; 84 | var originalJoinHandler; 85 | 86 | return [ 87 | //pause handling joins to stall reverse full syncs 88 | overwriteJoinHandlerWithPause(tc, nodeIx), 89 | 90 | // trigger the reverse full syncs 91 | _.times(numberOfFullSyncsToTrigger, function(){ 92 | return [ 93 | dsl.sendPing(t, tc, nodeIx, undefined, {checksum: 1}), 94 | dsl.waitForPingResponse(t, tc, nodeIx) 95 | ]; 96 | }), 97 | 98 | // verify the number of (cached) join requests 99 | verifyCachedJoins(t, expectedJoins), 100 | 101 | // process the cached joins requests and restore the original join-handler. 102 | handleCachedJoinsAndRestoreJoinHandler(tc, nodeIx) 103 | ]; 104 | 105 | function overwriteJoinHandlerWithPause(tc, idx) { 106 | return function overwriteJoinHandlerWithPause(list, cb) { 107 | var fakeNode = tc.fakeNodes[idx]; 108 | 109 | cachedJoins = []; 110 | originalJoinHandler = fakeNode.endpoints['Join'].handler; 111 | fakeNode.endpoints['Join'].handler = function() { 112 | cachedJoins.push(arguments) 113 | }; 114 | 115 | cb(list); 116 | } 117 | } 118 | 119 | function verifyCachedJoins(t, expectedJoins) { 120 | return function verifyCachedJoins(list, cb) { 121 | if (cachedJoins.length < expectedJoins) { 122 | cb(null); 123 | return; 124 | } 125 | t.equal(cachedJoins.length, expectedJoins); 126 | 127 | cb(list); 128 | } 129 | } 130 | 131 | function handleCachedJoinsAndRestoreJoinHandler(tc, idx) { 132 | return function handleCachedJoinsAndRestoreJoinHandler(list, cb) { 133 | var fakeNode = tc.fakeNodes[idx]; 134 | fakeNode.endpoints['Join'].handler = originalJoinHandler; 135 | 136 | for(var i=0; i< cachedJoins.length; i++) { 137 | originalJoinHandler.apply(fakeNode, cachedJoins[i]); 138 | } 139 | 140 | cb(list); 141 | } 142 | } 143 | } 144 | 145 | // create partition marks some fake nodes as faulty but doesn't inform the SUT 146 | // on the changes. 147 | function makeThreeFakeNodesFaulty(t, tc) { 148 | var f = _.once(function (list, cb) { 149 | for (var i=0; i < 3; i++) { 150 | tc.fakeNodes[i].status = 'faulty'; 151 | } 152 | 153 | cb(list) 154 | }); 155 | f.callerName = 'changeStatus'; 156 | return f; 157 | } 158 | -------------------------------------------------------------------------------- /test/double-reincarnation.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var dsl = require('./ringpop-assert'); 22 | var getClusterSizes = require('./it-tests').getClusterSizes; 23 | var prepareCluster = require('./test-util').prepareCluster; 24 | var test2 = require('./test-util').test2; 25 | 26 | test2('ringpop should not bump incarnation number when gossip is older', getClusterSizes(2), 20000, 27 | prepareCluster(function(t, tc, n) { return [ 28 | // do not disable node 29 | dsl.sendPing(t, tc, 0, { 30 | sourceIx: 0, 31 | subjectIx: 'sut', 32 | status: 'suspect', 33 | subjectIncNoDelta: -1 // send a gossip with an older incarnation number 34 | }), 35 | dsl.waitForPingResponse(t, tc, 0, 1), 36 | 37 | // assert stats relies on the incarnation number not being bumped 38 | dsl.assertStats(t, tc, n+1, 0, 0), 39 | ];}) 40 | ); -------------------------------------------------------------------------------- /test/events.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var safeJSONParse = require('./util').safeParse; 22 | 23 | var Types = { 24 | Join: 'Join', 25 | Ping: 'Ping', 26 | PingReq: 'PingReq', 27 | ProxyReq: 'ProxyReq', 28 | UnknownRequest: 'UnknownRequest', 29 | Stats: 'Stats', 30 | AdminGossipStart: 'AdminGossipStart', 31 | AdminGossipStop: 'AdminGossipStop', 32 | AdminGossipTick: 'AdminGossipTick', 33 | AdminLookup: 'AdminLookup', 34 | AdminMemberLeave: 'AdminMemberLeave', 35 | AdminMemberJoin: 'AdminMemberJoin', 36 | AdminHealPartitionDisco: 'AdminHealPartitionDisco', 37 | AdminReap: 'AdminReap' 38 | }; 39 | 40 | function endpointToEventType(endpoint) { 41 | switch(endpoint) { 42 | case '/protocol/join': 43 | return Types.Join; 44 | case '/protocol/ping': 45 | return Types.Ping; 46 | case '/protocol/ping-req': 47 | return Types.PingReq; 48 | case '/proxy/req': 49 | return Types.ProxyReq; 50 | case '/admin/stats': 51 | return Types.Stats; 52 | case '/admin/gossip/start': 53 | return Types.AdminGossipStart; 54 | case '/admin/gossip/stop': 55 | return Types.AdminGossipStop; 56 | case '/admin/gossip/tick': 57 | return Types.AdminGossipTick; 58 | case '/admin/lookup': 59 | return Types.AdminLookup; 60 | case '/admin/member/leave': 61 | return Types.AdminMemberLeave; 62 | case '/admin/member/join': 63 | return Types.AdminMemberJoin; 64 | case '/admin/healpartition/disco': 65 | return Types.AdminHealPartitionDisco; 66 | case '/admin/reap': 67 | return Types.AdminReap 68 | default: 69 | return Types.UnknownRequest; 70 | } 71 | } 72 | 73 | function RequestEvent(req, arg2, arg3, receiver) { 74 | this.type = endpointToEventType(req.endpoint); 75 | this.direction = 'request'; 76 | this.endpoint = req.endpoint; 77 | this.time = Date.now(); 78 | this.receiver = receiver; 79 | this.req = req; 80 | this.arg2 = arg2; 81 | this.arg3 = arg3; 82 | 83 | this.head = safeJSONParse(arg2); 84 | this.body = safeJSONParse(arg3); 85 | } 86 | 87 | function ResponseEvent(res, arg2, arg3, receiver) { 88 | this.type = endpointToEventType(res.span.name); 89 | this.direction = 'response'; 90 | this.endpoint = res.span.name; 91 | this.time = Date.now(); 92 | this.receiver = receiver; 93 | this.res = res; 94 | this.arg2 = arg2; 95 | this.arg3 = arg3; 96 | 97 | this.head = safeJSONParse(arg2); 98 | this.body = safeJSONParse(arg3); 99 | } 100 | 101 | module.exports = { 102 | Types: Types, 103 | RequestEvent: RequestEvent, 104 | ResponseEvent: ResponseEvent 105 | }; 106 | -------------------------------------------------------------------------------- /test/identity.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var _ = require('lodash'); 22 | 23 | var dsl = require('./ringpop-assert'); 24 | var events = require('./events'); 25 | var getClusterSizes = require('./it-tests').getClusterSizes; 26 | var prepareCluster = require('./test-util').prepareCluster; 27 | var prepareWithStatus = require('./test-util').prepareWithStatus; 28 | var test2 = require('./test-util').test2; 29 | 30 | test2('ringpop should set it\'s identity during bootstrap', getClusterSizes(2), 20000, function init(t, tc, callback) { 31 | tc.sutIdentity = 'identity'; 32 | 33 | callback(); 34 | }, prepareCluster(function(t, tc, n) { 35 | return [ 36 | dsl.assertStats(t, tc, n + 1, 0, 0, { 37 | 'sut': { 38 | labels: {"__identity": "identity"} 39 | } 40 | }) 41 | ]; 42 | }) 43 | ); 44 | 45 | test2('ringpop - with identity - full lookup returns correct values', getClusterSizes(1), 20000, function init(t, tc, callback) { 46 | tc.sutIdentity = 'sut'; 47 | tc.fakeNodes[0].labels = {'__identity' : 'fake-node'} 48 | 49 | callback(); 50 | }, prepareCluster(function(t, tc, n) { 51 | return dsl.assertFullHashring(t, tc, {0: 'fake-node', 'sut': 'sut'}); 52 | })); 53 | 54 | test2('ringpop - when identity changes, hashring is updated', getClusterSizes(1), 20000, prepareCluster(function(t, tc, n) { 55 | return [ 56 | dsl.assertFullHashring(t, tc), 57 | dsl.changeStatus(t, tc, 0, 0, { 58 | subjectIncNoDelta: +1, 59 | status: 'alive', 60 | labels: { 61 | '__identity': 'identity' 62 | } 63 | }), 64 | dsl.waitForPingResponse(t, tc, 0), 65 | dsl.assertFullHashring(t, tc, {0: 'identity'}), // validate change from no identity to 'identity' 66 | dsl.changeStatus(t, tc, 0, 0, { 67 | subjectIncNoDelta: +1, 68 | status: 'alive', 69 | labels: { 70 | '__identity': 'identity2' 71 | } 72 | }), 73 | dsl.waitForPingResponse(t, tc, 0), 74 | dsl.assertFullHashring(t, tc, {0: 'identity2'}), // validate change from 'identity' to 'identity2' 75 | 76 | dsl.changeStatus(t, tc, 0, 0, { 77 | subjectIncNoDelta: +1, 78 | status: 'alive', 79 | labels: {} 80 | }), 81 | dsl.waitForPingResponse(t, tc, 0), 82 | dsl.assertFullHashring(t, tc) // validate change from 'identity2' to no identity 83 | ] 84 | })); 85 | -------------------------------------------------------------------------------- /test/incarnation-no-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var events = require('./events'); 22 | var dsl = require('./ringpop-assert'); 23 | var test2 = require('./test-util').test2; 24 | var prepareCluster = require('./test-util').prepareCluster; 25 | var prepareWithStatus = require('./test-util').prepareWithStatus; 26 | var _ = require('lodash'); 27 | var getClusterSizes = require('./it-tests').getClusterSizes; 28 | 29 | test2('ringpop doesn\'t bump incarnation number after being piggybacked to alive', getClusterSizes(2), 20000, 30 | prepareCluster(function(t, tc, n) { return [ 31 | // do not disable node 32 | dsl.sendPing(t, tc, 0, 33 | {sourceIx: 0, subjectIx: 'sut', status: 'alive'}), 34 | dsl.waitForPingResponse(t, tc, 0, 1, true), 35 | // check if piggyback update has no effect on incarnation number 36 | dsl.assertStats(t, tc, n+1, 0, 0), 37 | ];}) 38 | ); 39 | 40 | test2('ringpop bumps incarnation number after being piggybacked to suspect', getClusterSizes(2), 20000, 41 | prepareCluster(function(t, tc, n) { return [ 42 | // do not disable node 43 | dsl.sendPing(t, tc, 0, 44 | {sourceIx: 0, subjectIx: 'sut', status: 'suspect'}), 45 | dsl.waitForPingResponse(t, tc, 0, 1, true), 46 | // check if piggyback update has taken effect 47 | // dsl.assertMembership(t, tc, {''}) 48 | dsl.assertBumpedIncarnationNumber(t, tc), 49 | dsl.assertStats(t, tc, n+1, 0, 0), 50 | ];}) 51 | ); 52 | 53 | test2('ringpop bumps incarnation number after being piggybacked to faulty', getClusterSizes(2), 20000, 54 | prepareCluster(function(t, tc, n) { return [ 55 | // do not disable node 56 | dsl.sendPing(t, tc, 0, 57 | {sourceIx: 0, subjectIx: 'sut', status: 'faulty'}), 58 | dsl.waitForPingResponse(t, tc, 0, 1, true), 59 | // check if piggyback update has taken effect 60 | // dsl.assertMembership(t, tc, {''}) 61 | dsl.assertBumpedIncarnationNumber(t, tc), 62 | dsl.assertStats(t, tc, n+1, 0, 0), 63 | ];}) 64 | ); 65 | 66 | -------------------------------------------------------------------------------- /test/it-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var _ = require('lodash'); 22 | var program = require('commander'); 23 | var fs = require('fs'); 24 | var farmhash = require('farmhash'); 25 | var TestCoordinator = require('./test-coordinator'); 26 | var dsl = require('./ringpop-assert'); 27 | var programPath, programInterpreter; 28 | var clusterSizes = [1, 2, 3, 4, 5, 6, 7, 10, 21, 25, 30]; 29 | 30 | // Global counter to record how many tests have failed. 31 | var testFailures = 0; 32 | 33 | var features = { 34 | 'join': { 35 | mandatory: true, 36 | tests: [ 37 | './join-tests' 38 | ] 39 | }, 40 | 'ping': { 41 | mandatory: true, 42 | tests: [ 43 | './ping-tests' 44 | ] 45 | }, 46 | 'ping-req':{ 47 | mandatory: true, 48 | tests: [ 49 | './ping-req-tests' 50 | ] 51 | }, 52 | 'reincarnate': { 53 | mandatory: true, 54 | tests: [ 55 | './incarnation-no-tests', 56 | './reincarnation-source', 57 | './double-reincarnation' 58 | ] 59 | }, 60 | 'gossip': { 61 | mandatory: true, 62 | tests: [ 63 | './piggyback-tests' 64 | ] 65 | }, 66 | 'admin': { 67 | mandatory: true, 68 | tests: [ 69 | './admin-tests' 70 | ] 71 | }, 72 | 'reaping-faulty-nodes': { 73 | mandatory: true, 74 | tests: [ 75 | './reaping-faulty-nodes' 76 | ] 77 | }, 78 | 'bidirectional-full-syncs': { 79 | mandatory: true, 80 | tests: [ 81 | './bidir-full-sync-tests' 82 | ] 83 | }, 84 | 'partition-healing': { 85 | mandatory: true, 86 | tests: [ 87 | './partition-healing-tests' 88 | ] 89 | }, 90 | 'self-eviction': { 91 | mandatory: false, 92 | tests: [ 93 | './self-eviction-tests' 94 | ], 95 | }, 96 | 'labels': { 97 | mandatory: false, 98 | tests: [ 99 | './labels' 100 | ] 101 | }, 102 | 'lookup': { 103 | mandatory: true, 104 | tests: [ 105 | './lookup-tests' 106 | ] 107 | }, 108 | 'identity': { 109 | mandatory: false, 110 | tests: [ 111 | './identity' 112 | ] 113 | } 114 | }; 115 | 116 | function selectFeatures(options) { 117 | var only = options.only || []; 118 | var selectedFeatures = options.features || []; 119 | 120 | if (only.length > 0) { 121 | return function (obj, feature) { 122 | // add selected features 123 | if (only.indexOf(feature) >= 0) { 124 | return true; 125 | } 126 | }; 127 | } else { 128 | return function (obj, feature) { 129 | // add selected features 130 | if (selectedFeatures.indexOf(feature) >= 0) { 131 | return true; 132 | } 133 | 134 | // always run all mandatory features 135 | if (obj.mandatory === true) { 136 | return true; 137 | } 138 | 139 | // drop other tests 140 | return false; 141 | }; 142 | } 143 | } 144 | 145 | // collect is a commander helper function 146 | function collect(val, memo) { 147 | memo.push(val); 148 | return memo; 149 | } 150 | 151 | function main() { 152 | program 153 | .version(require('../package.json').version) 154 | .option('-s, --sizes ', 'Cluster sizes to test against. Default: \'' + 155 | JSON.stringify(clusterSizes) + '\'') 156 | .option('--enable-feature ', 'Run tests for experimental features', collect, []) 157 | .option('--only ', 'Run tests for experimental features', collect, []) 158 | .option('-i, --interpreter ', 'Interpreter that runs program.') 159 | .arguments('') 160 | .description('it-tests.js performs an integration test on a ringpop program') 161 | .action(function onAction(path, options) { 162 | programPath = path; 163 | if (programPath[0] !== '/') { 164 | programPath = './' + programPath; 165 | } 166 | programInterpreter = options.interpreter; 167 | if (options.sizes) { 168 | clusterSizes = JSON.parse(options.sizes); 169 | } 170 | }); 171 | 172 | program.parse(process.argv); 173 | 174 | if (!programPath) { 175 | console.error('Error: program is required'); 176 | process.exit(1); 177 | } 178 | 179 | if (!fs.existsSync(programPath)) { 180 | console.error('Error: program ' + programPath + ' does not exist. Check path'); 181 | process.exit(1); 182 | } 183 | 184 | var shouldRunFeature = selectFeatures({ 185 | features: program['enableFeature'], 186 | only: program['only'] 187 | }); 188 | 189 | _.each(features, function (obj, feature) { 190 | if (!shouldRunFeature(obj, feature)) { 191 | console.log("#: WARNING skipping test suite:", feature); 192 | return; 193 | } 194 | 195 | _.each(obj.tests, function (test) { 196 | require(test) 197 | }); 198 | }); 199 | // require('./network-blip-tests'); 200 | // require('./revive-tests'); 201 | 202 | // If one or more tests failed, exit with a non-zero exit code. 203 | 204 | if (testFailures > 0) { 205 | process.exit(1); 206 | } 207 | } 208 | 209 | function getProgramPath() { 210 | return programPath; 211 | } 212 | 213 | function getProgramInterpreter() { 214 | return programInterpreter; 215 | } 216 | 217 | function getClusterSizes(min) { 218 | if (min) { 219 | return _.filter(clusterSizes, function(n) { return n >= min; }); 220 | } 221 | return clusterSizes; 222 | } 223 | 224 | // Exported function that increments the testFailures counter. Called by the 225 | // tests when a failure occurs. 226 | function incrementFailureCount() { 227 | testFailures++; 228 | } 229 | 230 | // ./util uses this so we want to export it before require('./util') happens somewhere 231 | module.exports = { 232 | getProgramInterpreter: getProgramInterpreter, 233 | getProgramPath: getProgramPath, 234 | getClusterSizes: getClusterSizes, 235 | incrementFailureCount: incrementFailureCount, 236 | }; 237 | 238 | if (require.main === module) { 239 | main(); 240 | } 241 | -------------------------------------------------------------------------------- /test/join-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var events = require('./events'); 22 | var util = require('util'); 23 | var test2 = require('./test-util').test2; 24 | var dsl = require('./ringpop-assert'); 25 | var prepareCluster = require('./test-util').prepareCluster; 26 | var prepareWithStatus = require('./test-util').prepareWithStatus; 27 | var getClusterSizes = require('./it-tests').getClusterSizes; 28 | 29 | function joinFakeCluster(n) { 30 | test2('join cluster of 1+' + n + ' nodes', [n], 20000, 31 | prepareCluster(function(t, tc, n) { return [ 32 | dsl.assertStats(t, tc, n+1, 0, 0), 33 | 34 | // Wait for a ping from the SUT and validate that it has the piggybacked information in there 35 | dsl.validateEventBody(t, tc, { 36 | type: events.Types.Ping, 37 | direction: 'request' 38 | }, "Check if node doesn't disseminate join list and only disseminates itself", function (ping) { 39 | return ping.body && ping.body.changes && 40 | ping.body.changes.length === 1 && 41 | ping.body.changes[0].status === 'alive' && 42 | ping.body.changes[0].address === tc.sutHostPort; 43 | }), 44 | 45 | dsl.expectOnlyPings(t, tc), 46 | ];}) 47 | ); 48 | } 49 | 50 | getClusterSizes().forEach(function(n) { 51 | joinFakeCluster(n); 52 | }); 53 | 54 | test2('join ringpop with fake node', getClusterSizes(), 20000, 55 | prepareCluster(function(t, tc, n) { return [ 56 | dsl.joinNewNode(t, tc, n), 57 | dsl.waitForJoinResponse(t, tc, n), 58 | dsl.wait(100), 59 | // node is supposed to disseminate itself 60 | // remove this node to keep in sync with membership of real node 61 | dsl.removeFakeNode(t, tc), 62 | dsl.assertStats(t, tc, n+1, 0, 0), 63 | dsl.expectOnlyPings(t, tc), 64 | ];}) 65 | ); 66 | 67 | test2('5-second suspect to faulty window on join', 68 | getClusterSizes(2), 69 | 20000, 70 | function init(t, tc, callback) { 71 | tc.addMembershipInformation('192.0.2.100:1234', 'suspect', 127); 72 | callback(); 73 | }, 74 | prepareCluster({suspect: 1}, function(t, tc, n) { 75 | return [ 76 | dsl.assertStateChange(t, tc, '192.0.2.100:1234', 'faulty', 5000, true) 77 | ]; 78 | }) 79 | ); 80 | -------------------------------------------------------------------------------- /test/lookup-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var _ = require('lodash'); 22 | 23 | var dsl = require('./ringpop-assert'); 24 | var getClusterSizes = require('./it-tests').getClusterSizes; 25 | var prepareCluster = require('./test-util').prepareCluster; 26 | var test2 = require('./test-util').test2; 27 | 28 | test2('ringpop full lookup returns correct values', getClusterSizes(1), 20000, prepareCluster(function(t, tc, n) { 29 | return dsl.assertFullHashring(t, tc); 30 | })); 31 | 32 | test2('ringpop lookup of faulty member should return different member', getClusterSizes(2), 20000, prepareCluster(function(t, tc){ 33 | // pick a node 34 | var hostPort = tc.getFakeNodes()[0].getHostPort(); 35 | // replica point 0 36 | var key = hostPort + '0'; 37 | return [ 38 | // validate if lookup hashes to the node 39 | dsl.assertLookup(t, tc, key, hostPort), 40 | 41 | // change it to faulty so it should be removed from the ring 42 | dsl.changeStatus(t, tc, 1, 0, 'faulty'), 43 | dsl.waitForPingResponse(t, tc, 1), 44 | 45 | // assert that it does not hash to the node anymore 46 | dsl.assertLookup(t, tc, key, function(dest){ 47 | return dest !== hostPort; 48 | }) 49 | ]; 50 | })); 51 | 52 | 53 | validateLookupAfterStatusChange('suspect', true); 54 | validateLookupAfterStatusChange('faulty', false); 55 | validateLookupAfterStatusChange('tombstone', false); 56 | validateLookupAfterStatusChange('leave', false); 57 | 58 | function validateLookupAfterStatusChange(newStatus, shouldStayInRing) { 59 | test2('ringpop lookup after changing status to ' + newStatus + ' is correct', getClusterSizes(2), 20000, prepareCluster(function(t, tc) { 60 | // pick a node 61 | var hostPort = tc.getFakeNodes()[0].getHostPort(); 62 | // replica point 0 63 | var key = hostPort + '0'; 64 | return [ 65 | // validate if lookup hashes to the node 66 | dsl.assertLookup(t, tc, key, hostPort), 67 | 68 | // change it to faulty so it should be removed from the ring 69 | dsl.changeStatus(t, tc, 1, 0, newStatus), 70 | dsl.waitForPingResponse(t, tc, 1), 71 | 72 | // assert that the node 73 | dsl.assertLookup(t, tc, key, function validateLookup(dest) { 74 | if (shouldStayInRing) { 75 | // dest is still the same member 76 | return dest === hostPort; 77 | } else { 78 | // dest is now a different member 79 | return dest !== hostPort; 80 | } 81 | }) 82 | ]; 83 | })); 84 | } 85 | -------------------------------------------------------------------------------- /test/membership-checksum.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | // hashing algorithm might change upon ringpop implementation 22 | var _ = require('lodash'); 23 | var farmhash = require('farmhash'); 24 | 25 | function generateChecksumString(members) { 26 | return _ 27 | .chain(members) 28 | .filter(function (member) { 29 | // remove members that are in the tombstone state 30 | return ['tombstone'].indexOf(member.status) < 0; 31 | }) 32 | .map(function (member) { 33 | var address = member.address || (member.host + ':' + member.port); 34 | 35 | var labelChecksum = 0; 36 | 37 | if (member.labels) { 38 | Object.keys(member.labels).forEach(function (key) { 39 | var value = member.labels[key]; 40 | 41 | var keyLength = Buffer.byteLength(key); 42 | var valueLength = Buffer.byteLength(value); 43 | 44 | var pos = 0; 45 | var buf = new Buffer(8 + keyLength + valueLength); 46 | buf.writeInt32BE(keyLength, pos); 47 | pos += 4 48 | pos += buf.write(key, pos); 49 | buf.writeInt32BE(valueLength, pos); 50 | pos += 4 51 | pos += buf.write(value, pos); 52 | 53 | labelChecksum ^= farmhash.fingerprint32(buf); 54 | }); 55 | } 56 | 57 | // concat all parts of the checksum string 58 | var checksumString = ''; 59 | checksumString += address; 60 | checksumString += member.status; 61 | checksumString += member.incarnationNumber; 62 | 63 | if (labelChecksum != 0) { 64 | // we only add the label checksum to the string when it is not 0 65 | // This guarantees that the change is backwards compatible and 66 | // ringpop with label support can correctly synchronize with a 67 | // version that has no labels as long as there are no labels set 68 | // on ringpop. 69 | checksumString += '#labels' + labelChecksum; 70 | } 71 | 72 | return checksumString; 73 | }) 74 | .value() 75 | .sort() 76 | .join(';'); 77 | } 78 | // entries must have address (hostport), status (e.g. "alive"), and incarnation numbers 79 | function checksumGo(members) { 80 | //add extra semi-colon to be compatible with the go implementation 81 | var checksumString = generateChecksumString(members) + ';'; 82 | return farmhash.fingerprint32(checksumString); 83 | } 84 | 85 | function checksumNode(members) { 86 | var checksumString = generateChecksumString(members); 87 | return farmhash.hash32(checksumString); 88 | } 89 | 90 | function checksumNodeCrossPlatform(members) { 91 | var checksumString = generateChecksumString(members); 92 | return farmhash.fingerprint32(checksumString); 93 | } 94 | 95 | function detect(members, checksum) { 96 | if(checksumGo(members) === checksum) { 97 | return checksumGo; 98 | } else if(checksumNode(members) === checksum) { 99 | return checksumNode; 100 | } else if (checksumNodeCrossPlatform(members) === checksum) { 101 | return checksumNodeCrossPlatform; 102 | } else { 103 | throw new Error('checksum method undetectable!'); 104 | } 105 | } 106 | 107 | module.exports = { 108 | detect: detect 109 | }; 110 | -------------------------------------------------------------------------------- /test/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ringpop-common", 3 | "description": "The home for all things Ringpop", 4 | "contributors": [ 5 | "Jeff Wolski " 6 | ], 7 | "version": "0.0.1", 8 | "repository": "git://github.com/uber/ringpop-common.git", 9 | "dependencies": { 10 | "async": "^0.9.2", 11 | "cli-color": "^0.3.2", 12 | "commander": "^2.6.0", 13 | "farmhash": "^1.1.0", 14 | "glob": "^5.0.14", 15 | "jshint": "^2.8.0", 16 | "jsonschema": "^1.0.2", 17 | "lodash": "^3.10.1", 18 | "node-uuid": "^1.4.3", 19 | "pre-commit": "^0.0.9", 20 | "tap-parser": "^1.2.2", 21 | "tape": "^4.2.0", 22 | "tape-catch": "^1.0.5", 23 | "tchannel": "^3.8.5", 24 | "underscore": "^1.8.3" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /test/piggyback-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var events = require('./events'); 22 | var test2 = require('./test-util').test2; 23 | var prepareCluster = require('./test-util').prepareCluster; 24 | var dsl = require('./ringpop-assert'); 25 | var _ = require('lodash'); 26 | var getClusterSizes = require('./it-tests').getClusterSizes; 27 | 28 | test2('ringpop sends piggyback info in ping request', getClusterSizes(3), 20000, prepareCluster(function(t, tc, n) { 29 | return [ 30 | // TODO clear the dissemination information from the SUT by flooding it with pings instead of waiting for it 31 | dsl.drainDisseminator(t, tc), // problem is that if decay is not working you might never get to this point 32 | 33 | dsl.changeStatus(t, tc, 0, 1, 'suspect'), 34 | dsl.waitForPingResponse(t, tc, 0), 35 | 36 | // Wait for a ping from the SUT and validate that it has the piggybacked information in there 37 | dsl.validateEventBody(t, tc, { 38 | type: events.Types.Ping, 39 | direction: 'request' 40 | }, "Test if piggybacked information is in ping request", function (ping) { 41 | return ping.body && 42 | ping.body.changes && 43 | ping.body.changes.length === 1 && 44 | ping.body.changes[0] && 45 | ping.body.changes[0].source === tc.fakeNodes[0].getHostPort() && 46 | ping.body.changes[0].status === "suspect"; 47 | }) 48 | ]; 49 | })); 50 | 51 | test2('ringpop updates its dissimination list on pingreq', getClusterSizes(3), 20000, prepareCluster(function(t, tc, n) { 52 | return [ 53 | // TODO clear the dissemination information from the SUT by flooding it with pings instead of waiting for it 54 | dsl.drainDisseminator(t, tc), // problem is that if decay is not working you might never get to this point 55 | 56 | // send information to be piggy backed via pingreq 57 | dsl.sendPingReq(t, tc, 0, 2, { 58 | sourceIx: 0, 59 | subjectIx: 1, 60 | status: 'suspect', 61 | }), 62 | dsl.waitForPingReqResponse(t, tc, 0, 2, true), 63 | 64 | // Wait for a ping from the SUT and validate that it has the piggybacked information in there 65 | dsl.validateEventBody(t, tc, { 66 | type: events.Types.Ping, 67 | direction: 'request' 68 | }, "Test if piggybacked information is in ping request", function (ping) { 69 | return ping.body && 70 | ping.body.changes && 71 | ping.body.changes.length === 1 && 72 | ping.body.changes[0] && 73 | ping.body.changes[0].source === tc.fakeNodes[0].getHostPort() && 74 | ping.body.changes[0].status === "suspect"; 75 | }) 76 | ]; 77 | })); 78 | 79 | test2('ringpop sends piggyback info in ping-req response', getClusterSizes(3), 20000, prepareCluster(function(t, tc, n) { 80 | return [ 81 | // TODO clear the dissemination information from the SUT by flooding it with pings instead of waiting for it 82 | dsl.drainDisseminator(t, tc), // problem is that if decay is not working you might never get to this point 83 | 84 | // send information to be piggy backed 85 | dsl.changeStatus(t, tc, 0, 1, 'suspect'), 86 | dsl.waitForPingResponse(t, tc, 0), 87 | 88 | dsl.sendPingReq(t, tc, 1, 2), 89 | 90 | // Wait for a ping from the SUT and validate that it has the piggybacked information in there 91 | dsl.validateEventBody(t, tc, { 92 | type: events.Types.PingReq, 93 | direction: 'response' 94 | }, "Test if piggybacked information is in pingreq response", function (ping) { 95 | return ping.body && 96 | ping.body.changes && 97 | ping.body.changes.length === 1 && 98 | ping.body.changes[0] && 99 | ping.body.changes[0].source === tc.fakeNodes[0].getHostPort() && 100 | ping.body.changes[0].status === "suspect"; 101 | }) 102 | ]; 103 | })); 104 | 105 | // The timeout here is increased because it's possible to have a faulty to 106 | // tombstone transition during the test that will also be disseminated. 107 | // This happens when the cluster size >= 10 and causes the piggy back counter 108 | // to double to 30. 109 | test2('ringpop piggybacking decays', getClusterSizes(3), 40000, prepareCluster(function(t, tc, n) { 110 | return [ 111 | // TODO clear the dissemination information from the SUT by flooding it with pings instead of waiting for it 112 | dsl.drainDisseminator(t, tc), // problem is that if decay is not working you might never get to this point 113 | 114 | // send information to be piggy backed 115 | dsl.changeStatus(t, tc, 1, 2, 'suspect'), 116 | dsl.waitForPingResponse(t, tc, 1), 117 | 118 | // if the SUT decays the updates it will start pinging with 0 updates at some point 119 | // TODO do this with a set number of pings to the SUT to speed up the test 120 | dsl.drainDisseminator(t, tc) 121 | ]; 122 | })); 123 | 124 | test2('ringpop piggybacking should ignore updates when it already knows about', getClusterSizes(3), 20000, prepareCluster(function(t, tc, n) { 125 | return [ 126 | // TODO clear the dissemination information from the SUT by flooding it with pings instead of waiting for it 127 | dsl.drainDisseminator(t, tc), // problem is that if decay is not working you might never get to this point 128 | 129 | // send information that is already known to the SUT 130 | dsl.changeStatus(t, tc, 0, 1, 'alive'), 131 | dsl.waitForPingResponse(t, tc, 0), 132 | 133 | // TODO speed this up by sending a ping with a correct checksum 134 | // Send ping and validate the body 135 | dsl.validateEventBody(t, tc, { 136 | type: events.Types.Ping, 137 | direction: 'request' 138 | }, "Test if piggybacked information is not in ping request", function (ping) { 139 | return !ping.body || 140 | !ping.body.changes || 141 | ping.body.changes.length === 0; 142 | }) 143 | ]; 144 | })); 145 | 146 | test2('ringpop sends piggyback info in ping response', getClusterSizes(3), 20000, prepareCluster(function(t, tc, n) { 147 | return [ 148 | // TODO clear the dissemination information from the SUT by flooding it with pings instead of waiting for it 149 | dsl.drainDisseminator(t, tc), // problem is that if decay is not working you might never get to this point 150 | 151 | // send information to be piggy backed 152 | dsl.changeStatus(t, tc, 0, 1, 'suspect'), 153 | dsl.waitForPingResponse(t, tc, 0), 154 | 155 | // Send ping and validate the body 156 | dsl.sendPing(t, tc, 2), 157 | dsl.validateEventBody(t, tc, { 158 | type: events.Types.Ping, 159 | direction: 'response' 160 | }, "Test if piggybacked information is in ping response", function (ping) { 161 | return ping.body && 162 | ping.body.changes && 163 | ping.body.changes.length === 1 && 164 | ping.body.changes[0] && 165 | ping.body.changes[0].source === tc.fakeNodes[0].getHostPort() && 166 | ping.body.changes[0].status === "suspect"; 167 | }) 168 | ]; 169 | })); 170 | 171 | test2('ringpop sends piggyback info in ping-req request', getClusterSizes(3), 20000, prepareCluster(function(t, tc, n) { 172 | return [ 173 | // TODO clear the dissemination information from the SUT by flooding it with pings instead of waiting for it 174 | dsl.drainDisseminator(t, tc), // problem is that if decay is not working you might never get to this point 175 | 176 | // send information to be piggy backed 177 | dsl.changeStatus(t, tc, 0, 1, 'suspect'), 178 | dsl.waitForPingResponse(t, tc, 0), 179 | 180 | // cause the SUT to send a ping-req 181 | dsl.disableAllNodesPing(t, tc), 182 | 183 | // Wait for a ping from the SUT and validate that it has the piggybacked information in there 184 | dsl.validateEventBody(t, tc, { 185 | type: events.Types.PingReq, 186 | direction: 'request' 187 | }, "Test if piggybacked information is in pingreq request", function (ping) { 188 | return ping.body && 189 | ping.body.changes && 190 | ping.body.changes.length === 1 && 191 | ping.body.changes[0] && 192 | ping.body.changes[0].source === tc.fakeNodes[0].getHostPort() && 193 | ping.body.changes[0].status === "suspect"; 194 | }) 195 | ]; 196 | })); 197 | -------------------------------------------------------------------------------- /test/ping-req-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var events = require('./events'); 22 | var dsl = require('./ringpop-assert'); 23 | var test2 = require('./test-util').test2; 24 | var prepareCluster = require('./test-util').prepareCluster; 25 | var prepareWithStatus = require('./test-util').prepareWithStatus; 26 | var _ = require('lodash'); 27 | var getClusterSizes = require('./it-tests').getClusterSizes; 28 | 29 | test2('ping-req real-node with a disabled target', getClusterSizes(2), 20000, 30 | prepareCluster(function(t, tc, n) { return [ 31 | // Our goal in this test is to make a Fake node (F1) do a ping-req 32 | // through the Real node (R) to another Fake node (F2), and see if R 33 | // correctly performs the ping-req: returns not-ok since F2 is disabled. 34 | 35 | // However, it's possible that R will determine that F2 is dead trough 36 | // it's own separate indirect ping mechanism. This will cause R to 37 | // declare F2 as suspect, which will interfere with the test. 38 | 39 | // To avoid R declaring F2 a suspect, we disable ping and ping-req in 40 | // all Fake nodes, so that R can never perform a successful indirect 41 | // health check itself. 42 | 43 | dsl.disableAllNodesPing(t, tc), 44 | dsl.sendPingReq(t, tc, 0, 1), 45 | dsl.waitForPingReqResponse(t, tc, 0, 1, false), 46 | // do not make suspect after ping status = false 47 | dsl.assertStats(t, tc, n+1, 0, 0), 48 | ];}) 49 | ); 50 | 51 | test2('ping-req real-node with enabled target', getClusterSizes(2), 20000, 52 | prepareCluster(function(t, tc, n) { return [ 53 | // do not disable node 54 | dsl.sendPingReq(t, tc, 0, 1), 55 | dsl.waitForPingReqResponse(t, tc, 0, 1, true), 56 | // safety check 57 | dsl.assertStats(t, tc, n+1, 0, 0), 58 | ];}) 59 | ); 60 | 61 | test2('become suspect through disabling ping response', getClusterSizes(2), 20000, 62 | prepareCluster(function(t, tc, n) { return [ 63 | dsl.disableNode(t, tc, 1), 64 | dsl.waitForPingReqs(t, tc, 3), 65 | dsl.wait(100), 66 | dsl.assertStats(t, tc, n, 1, 0, {1: {status: 'suspect'}}), 67 | ];}) 68 | ); 69 | 70 | test2('5-second suspect to faulty window', getClusterSizes(2), 20000, 71 | prepareWithStatus(1, 'suspect', function(t, tc, n) { return [ 72 | dsl.assertStats(t, tc, n, 1, 0, {1: {status: 'suspect'}}), 73 | dsl.assertStateChange(t, tc, 1, 'faulty', 5000, false), 74 | ];}) 75 | ); 76 | 77 | 78 | function testSetStatusViaPiggyback(ns, status, deltaAlive, nSuspect, nFaulty) { 79 | test2('prepare node with status ' + status, ns, 20000, 80 | prepareWithStatus(1, status, function(t, tc, n) { return [ 81 | dsl.assertStats(t, tc, n + deltaAlive, nSuspect, nFaulty, {1: {status: status}}), 82 | ];}) 83 | ); 84 | } 85 | 86 | testSetStatusViaPiggyback(getClusterSizes(2), 'alive', 1, 0, 0); 87 | testSetStatusViaPiggyback(getClusterSizes(2), 'suspect', 0, 1, 0); 88 | testSetStatusViaPiggyback(getClusterSizes(2), 'faulty', 0, 0, 1); 89 | 90 | test2('change nodes status to suspect piggybacked on a ping-req', _.filter(getClusterSizes(), function(n) { return n > 2; }), 20000, 91 | prepareCluster(function(t, tc, n) { return [ 92 | // do not disable node 93 | dsl.sendPingReq(t, tc, 0, 1, 94 | {sourceIx: 0, subjectIx: 2, status: 'suspect'}), 95 | dsl.waitForPingReqResponse(t, tc, 0, 1, true), 96 | // check if piggyback update has taken effect 97 | dsl.assertStats(t, tc, n, 1, 0, {2: {status: 'suspect'}}), 98 | ];}) 99 | ); 100 | -------------------------------------------------------------------------------- /test/ping-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var events = require('./events'); 22 | var test2 = require('./test-util').test2; 23 | var testStateTransitions = require('./test-util').testStateTransitions; 24 | var dsl = require('./ringpop-assert'); 25 | var prepareCluster = require('./test-util').prepareCluster; 26 | var prepareWithStatus = require('./test-util').prepareWithStatus; 27 | var getClusterSizes = require('./it-tests').getClusterSizes; 28 | var _ = require('lodash'); 29 | 30 | test2('fair round robin pings', getClusterSizes(7) , 20000, 31 | prepareCluster(function(t, tc, n) { return [ 32 | dsl.assertRoundRobinPings(t, tc, 30, 6000), 33 | ];}) 34 | ); 35 | 36 | test2('ping ringpop from fake-nodes', getClusterSizes(), 20000, 37 | prepareCluster(function(t, tc, n) { 38 | pingList = _.filter([0,1,1,1,5,6,7], function(i) { return i < n; }); 39 | return [ 40 | dsl.sendPings(t, tc, pingList), 41 | dsl.waitForPingResponses(t, tc, pingList), 42 | ]; 43 | }) 44 | ); 45 | 46 | // piggyback {alive, suspect, faulty} status of fake-node 47 | // who is {alive, suspect, faulty} with {lower, equal, higher} 48 | // incarnation number than the fake-node (27 combinations) 49 | testStateTransitions(getClusterSizes(2), 'alive', 'alive', 'alive', -1, {alive: 1, suspect: 0, faulty: 0}); 50 | testStateTransitions(getClusterSizes(2), 'alive', 'alive', 'alive', 0, {alive: 1, suspect: 0, faulty: 0}); 51 | testStateTransitions(getClusterSizes(2), 'alive', 'alive', 'alive', 1, {alive: 1, suspect: 0, faulty: 0}); 52 | 53 | testStateTransitions(getClusterSizes(2), 'alive', 'suspect', 'alive', -1, {alive: 1, suspect: 0, faulty: 0}); 54 | testStateTransitions(getClusterSizes(2), 'alive', 'suspect', 'suspect', 0, {alive: 0, suspect: 1, faulty: 0}); 55 | testStateTransitions(getClusterSizes(2), 'alive', 'suspect', 'suspect', 1, {alive: 0, suspect: 1, faulty: 0}); 56 | 57 | testStateTransitions(getClusterSizes(2), 'alive', 'faulty', 'alive', -1, {alive: 1, suspect: 0, faulty: 0}); 58 | testStateTransitions(getClusterSizes(2), 'alive', 'faulty', 'faulty', 0, {alive: 0, suspect: 0, faulty: 1}); 59 | testStateTransitions(getClusterSizes(2), 'alive', 'faulty', 'faulty', 1, {alive: 0, suspect: 0, faulty: 1}); 60 | 61 | testStateTransitions(getClusterSizes(2), 'suspect', 'alive', 'suspect', -1, {alive: 0, suspect: 1, faulty: 0}); 62 | testStateTransitions(getClusterSizes(2), 'suspect', 'alive', 'suspect', 0, {alive: 0, suspect: 1, faulty: 0}); 63 | testStateTransitions(getClusterSizes(2), 'suspect', 'alive', 'alive', 1, {alive: 1, suspect: 0, faulty: 0}); 64 | 65 | testStateTransitions(getClusterSizes(2), 'suspect', 'suspect', 'suspect', -1, {alive: 0, suspect: 1, faulty: 0}); 66 | testStateTransitions(getClusterSizes(2), 'suspect', 'suspect', 'suspect', 0, {alive: 0, suspect: 1, faulty: 0}); 67 | testStateTransitions(getClusterSizes(2), 'suspect', 'suspect', 'suspect', 1, {alive: 0, suspect: 1, faulty: 0}); 68 | 69 | testStateTransitions(getClusterSizes(2), 'suspect', 'faulty', 'suspect', -1, {alive: 0, suspect: 1, faulty: 0}); 70 | testStateTransitions(getClusterSizes(2), 'suspect', 'faulty', 'faulty', 0, {alive: 0, suspect: 0, faulty: 1}); 71 | testStateTransitions(getClusterSizes(2), 'suspect', 'faulty', 'faulty', 1, {alive: 0, suspect: 0, faulty: 1}); 72 | 73 | testStateTransitions(getClusterSizes(2), 'faulty', 'alive', 'faulty', -1, {alive: 0, suspect: 0, faulty: 1}); 74 | testStateTransitions(getClusterSizes(2), 'faulty', 'alive', 'faulty', 0, {alive: 0, suspect: 0, faulty: 1}); 75 | testStateTransitions(getClusterSizes(2), 'faulty', 'alive', 'alive', 1, {alive: 1, suspect: 0, faulty: 0}); 76 | 77 | testStateTransitions(getClusterSizes(2), 'faulty', 'suspect', 'faulty', -1, {alive: 0, suspect: 0, faulty: 1}); 78 | testStateTransitions(getClusterSizes(2), 'faulty', 'suspect', 'faulty', 0, {alive: 0, suspect: 0, faulty: 1}); 79 | testStateTransitions(getClusterSizes(2), 'faulty', 'suspect', 'suspect', 1, {alive: 0, suspect: 1, faulty: 0}); 80 | 81 | testStateTransitions(getClusterSizes(2), 'faulty', 'faulty', 'faulty', -1, {alive: 0, suspect: 0, faulty: 1}); 82 | testStateTransitions(getClusterSizes(2), 'faulty', 'faulty', 'faulty', 0, {alive: 0, suspect: 0, faulty: 1}); 83 | testStateTransitions(getClusterSizes(2), 'faulty', 'faulty', 'faulty', 1, {alive: 0, suspect: 0, faulty: 1}); 84 | -------------------------------------------------------------------------------- /test/protocol-join.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var farmhash = require('farmhash'); 22 | var makeHostPort = require('./util').makeHostPort; 23 | var _ = require('underscore'); 24 | 25 | // send and handle join requests (check bottom of file for example request) 26 | 27 | // Responding node and allNodes must have host, port, status, and incarnationNumber fields 28 | function handleJoin(req, res, respondingNode, membershipList, checksumMethod) { 29 | res.headers.as = 'raw'; 30 | res.sendOk(null, JSON.stringify(getJoinResponsePayload(respondingNode, membershipList, checksumMethod))); 31 | } 32 | 33 | // Responding node and allNodes must have host, port, status, and incarnationNumber fields 34 | function getJoinResponsePayload(respondingNode, membershipList, checksumMethod) { 35 | // add fake nodes to membership 36 | var responderHostPort = makeHostPort(respondingNode.host, respondingNode.port); 37 | 38 | var membership = membershipList.map(function(member) { 39 | return _.extend({ 40 | source: responderHostPort, 41 | address: makeHostPort(member.host, member.port), 42 | status: member.status, 43 | incarnationNumber: member.incarnationNumber, 44 | labels: member.labels 45 | }, member.extraFields); 46 | }); 47 | 48 | return { 49 | app: 'ringpop', 50 | coordinator: responderHostPort, 51 | membershipChecksum: checksumMethod(membership), 52 | membership: membership 53 | }; 54 | } 55 | 56 | module.exports = { 57 | handleJoin: handleJoin, 58 | getJoinResponsePayload: getJoinResponsePayload 59 | }; 60 | 61 | 62 | // protocol/join 63 | 64 | // JOIN REQUEST (arg3) 65 | // { "app": "ringpop", 66 | // "source": "10.80.134.35.3010", 67 | // "incarnationNumber": 12236} 68 | 69 | // JOIN RESPONSE (arg3) 70 | // { app: 'ringpop', 71 | // coordinator: '10.80.134.35:3002', 72 | // membership: 73 | // [ { source: '10.80.134.35:3002', 74 | // address: '10.80.134.35:3000', 75 | // status: 'alive', 76 | // incarnationNumber: 1439642728689 }, 77 | // { source: '10.80.134.35:3002', 78 | // address: '10.80.134.35:3004', 79 | // status: 'alive', 80 | // incarnationNumber: 1439642728722 }, 81 | // { source: '10.80.134.35:3002', 82 | // address: '10.80.134.35:3001', 83 | // status: 'alive', 84 | // incarnationNumber: 1439642728720 }, 85 | // { source: '10.80.134.35:3002', 86 | // address: '10.80.134.35:3002', 87 | // status: 'alive', 88 | // incarnationNumber: 1439642728707 }, 89 | // { source: '10.80.134.35:3002', 90 | // address: '10.80.134.35.3010', 91 | // status: 'alive', 92 | // incarnationNumber: 12236 }, 93 | // { source: '10.80.134.35:3002', 94 | // address: '10.80.134.35:3003', 95 | // status: 'alive', 96 | // incarnationNumber: 1439642728674 } ], 97 | // membershipChecksum: 3982923156 } 98 | 99 | -------------------------------------------------------------------------------- /test/protocol-ping-req.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var safeJSONParse = require('./util').safeParse; 22 | 23 | function handlePingReq(req, res, pingStatus, checksum) { 24 | var request = safeJSONParse(req.arg3); 25 | res.headers.as = 'raw'; 26 | var response = { 27 | changes: [], 28 | pingStatus: pingStatus, 29 | target: request.target, 30 | checksum: checksum, 31 | }; 32 | 33 | res.sendOk(null, JSON.stringify(response)); 34 | } 35 | 36 | module.exports = { 37 | handlePingReq: handlePingReq, 38 | }; 39 | 40 | 41 | // PING-REQ REQUEST (arg3) 42 | // { 43 | // "checksum": 2281494811, 44 | // "changes": [ 45 | // { 46 | // "id": "e713e71f-2299-4753-ac3a-8b296df247d3", 47 | // "source": "10.80.134.35:3000", 48 | // "sourceIncarnationNumber": 1440006862476, 49 | // "address": "10.80.134.35:3001", 50 | // "status": "suspect", 51 | // "incarnationNumber": 1337 52 | // } 53 | // ], 54 | // "source": "10.80.134.35:3000", 55 | // "sourceIncarnationNumber": 1440006862476, 56 | // "target": "10.80.134.35:3001" 57 | // } 58 | 59 | //{ 60 | // changes: ringpop.dissemination.issueAsReceiver(source, 61 | // sourceIncarnationNumber, checksum), 62 | // pingStatus: isOk, 63 | // target: target 64 | // } 65 | -------------------------------------------------------------------------------- /test/protocol-ping.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | function handlePing(res, checksum) { 22 | // TODO (wieger): validate request 23 | res.headers.as = 'raw'; 24 | res.sendOk(null, JSON.stringify({changes: [], checksum: checksum})); 25 | } 26 | 27 | function pingNotOkHandler() { 28 | // TODO (wieger): validate request 29 | console.log('pingNotOkHandler'); 30 | res.headers.as = 'raw'; 31 | res.sendNotOk(null, 'I am a fake node who does\'t like pings'); 32 | } 33 | 34 | function noResponseHandler() { 35 | console.log('noResponseHandler'); 36 | } 37 | 38 | module.exports = { 39 | handlePing: handlePing, 40 | pingNotOkHandler: pingNotOkHandler, 41 | }; 42 | 43 | // PING REQUEST 44 | // just call the endpoint (empty arg3) 45 | 46 | // PING RESPONSE (arg3) 47 | // { changes: [] } 48 | // 49 | // where changes is array of these: 50 | // { source: '10.80.134.35:3000', 51 | // address: '10.80.134.35:3006', 52 | // status: 'alive', 53 | // incarnationNumber: 1337 } 54 | -------------------------------------------------------------------------------- /test/reincarnation-source.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var events = require('./events'); 22 | var dsl = require('./ringpop-assert'); 23 | var test2 = require('./test-util').test2; 24 | var prepareCluster = require('./test-util').prepareCluster; 25 | var _ = require('lodash'); 26 | var getClusterSizes = require('./it-tests').getClusterSizes; 27 | 28 | test2('ringpop gossips its reincarnation with itself as the source', getClusterSizes(2), 20000, 29 | prepareCluster(function(t, tc, n) { return [ 30 | // send ping that causes the sut to reincarnate 31 | dsl.sendPing(t, tc, 0, { 32 | sourceIx: 0, 33 | subjectIx: 'sut', 34 | status: 'suspect' 35 | }), 36 | dsl.validateEventBody(t, tc, { 37 | type: events.Types.Ping, 38 | direction: 'response' 39 | }, 'find and validate reincarnation gossip', function (ping) { 40 | var reincarnation = _.find(ping.body.changes, { 41 | address: tc.sutHostPort 42 | }); 43 | return reincarnation 44 | && reincarnation.source == tc.sutHostPort 45 | && reincarnation.sourceIncarnationNumber == reincarnation.incarnationNumber 46 | && reincarnation.status == 'alive'; 47 | }), 48 | // if this test fails due to join requests being left in the queue the 49 | // test triggered a full sync. 50 | ];}) 51 | ); 52 | -------------------------------------------------------------------------------- /test/self-eviction-tests.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var _ = require('lodash'); 22 | var dsl = require('./ringpop-assert'); 23 | var events = require('./events'); 24 | var getClusterSizes = require('./it-tests').getClusterSizes; 25 | var prepareCluster = require('./test-util').prepareCluster; 26 | var test2 = require('./test-util').test2; 27 | 28 | 29 | test2('self eviction changes state to faulty and pings members', getClusterSizes(2), 20000, prepareCluster(function(t, tc, n) { 30 | return [ 31 | 32 | // disable pinging from fake nodes and stop gossip on the SUT 33 | // this is necessary to make sure all the pings assert later on 34 | // are sent from the graceful shutdown. 35 | dsl.disableAllNodesPing(t, tc), 36 | dsl.stopGossip(t, tc), 37 | 38 | // graceful shutdown SUT (sent SIGTERM and wait till exit) 39 | dsl.waitForGracefulShutdown(t, tc), 40 | 41 | // assert pings 42 | assertValidPings(t, tc, n) 43 | ]; 44 | })); 45 | 46 | /** 47 | * Validate the pings. Assert that: 48 | * - the number of pings doesn't exceed 40% of the cluster 49 | * - the pings are all to different members 50 | * - the pings should declare the SUT faulty and originate from the SUT itself 51 | * 52 | * @param t the current test suite 53 | * @param tc the test coordinated 54 | * @param n the number of servers in the cluster 55 | * @return {assertValidPings} the assert functions 56 | */ 57 | function assertValidPings(t, tc, n) { 58 | return function assertValidPings(list, cb) { 59 | var pings = _.filter(list, { 60 | type: events.Types.Ping, 61 | direction: 'request' 62 | }); 63 | 64 | if (pings.length === 0) { 65 | // We should have at least one ping here, 66 | // otherwise self-eviction is not working. 67 | t.fail('no pings received'); 68 | return cb(pings); 69 | } 70 | 71 | var maxPings = Math.ceil(n * 0.4); 72 | t.ok(pings.length <= maxPings, 'number of pings does not exceed 40% of cluster'); 73 | 74 | var receivers = _.pluck(pings, 'receiver'); 75 | t.ok(_.uniq(receivers).length === receivers.length, 'all pings are to unique members'); 76 | 77 | _.each(pings, function validatePing(ping) { 78 | t.equal(ping.body.changes.length, 1); 79 | 80 | var change = ping.body.changes[0]; 81 | t.equal(change.source, tc.sutHostPort); 82 | t.equal(change.status, 'faulty'); 83 | t.equal(ping.body.source, tc.sutHostPort); 84 | }); 85 | 86 | cb(_.reject(list, pings)); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /test/tap-filter: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Tap parser that filters out test success and only prints the errors and 4 | # test summary. 5 | # 6 | # The parser keeps a buffer of all output during a test (including non-tap 7 | # data). If that test has a failing assertion, the entire buffer is printed. 8 | # 9 | # This filter adds one non-standard tap output line: 10 | # 11 | # #: This is an example line that will be printed immediately 12 | # 13 | # You can use the "#:" prefix to a line and it will be passed through the 14 | # filter to stdout. This is using for printing status information. 15 | # 16 | declare test_name= 17 | declare test_error=false 18 | declare buffer= 19 | declare exit_code=0 20 | 21 | # 22 | # Add the passed-in data to the buffer. 23 | # 24 | _add_to_buffer() { 25 | for data in "$@"; do 26 | buffer="$buffer"$'\n'"$data" 27 | done 28 | } 29 | 30 | # 31 | # Print the name of the test and the contents of the current buffer. 32 | # 33 | _print_last_test_error() { 34 | if $test_error; then 35 | echo >&2 36 | echo -e "\033[1;31m${test_name}\033[0m" >&2 37 | echo "$buffer" >&2 38 | fi 39 | } 40 | 41 | while IFS= read -r line; do 42 | case $line in 43 | "# tests"*|"# pass"*|"# fail"*) 44 | # Immediately print summary lines 45 | echo "$line" 46 | ;; 47 | 48 | \#:*) 49 | # Use the special syntax "#: Foo..." and these lines will also be 50 | # printed immediately. 51 | echo "$line" 52 | ;; 53 | 54 | \#*) 55 | # Beginning of new test... 56 | 57 | # If there was an error during the last test, print the error 58 | # before resetting 59 | _print_last_test_error 60 | 61 | # Reset the test name, buffer, etc. 62 | test_name="$line" 63 | test_error=false 64 | buffer= 65 | ;; 66 | 67 | "not ok"*) 68 | _add_to_buffer "$line" 69 | 70 | # Flag a failure 71 | test_error=true 72 | exit_code=1 73 | ;; 74 | 75 | *) 76 | # For all normal lines, add them to the buffer; the buffer will 77 | # only be printed if the test fails. 78 | _add_to_buffer "$line" 79 | ;; 80 | esac 81 | done 82 | 83 | # Print last test output if there was an error 84 | _print_last_test_error 85 | 86 | exit $exit_code 87 | -------------------------------------------------------------------------------- /test/test-util.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var dsl = require('./ringpop-assert'); 22 | var _ = require('lodash'); 23 | var TestCoordinator = require('./test-coordinator'); 24 | var getProgramPath = require('./it-tests').getProgramPath; 25 | var getProgramInterpreter = require('./it-tests').getProgramInterpreter; 26 | var main = require('./it-tests'); 27 | // test is like normal tape-catch test but also prints t.error.details if a fail occured. 28 | // tape-catch catches JS exceptions and reports them as test failures. 29 | var Test = require('tape-catch'); 30 | 31 | function test(msg, opts, cb) { 32 | var t = Test(msg, opts, cb); 33 | t.on('result', function(res) { 34 | if (!res.ok) { 35 | main.incrementFailureCount() 36 | } 37 | if(!res.ok && res.error.details !== undefined) { 38 | console.log('============== error details ==============='); 39 | console.log(); 40 | console.log(typeof res.error.details === 'object' ? 41 | JSON.stringify(res.error.details, null, 2) : res.error.details); 42 | console.log(); 43 | console.log('============================================'); 44 | console.log(); 45 | if(t._tc) { 46 | console.log('============== node index : hostport ==============='); 47 | console.log("sut: "+ t._tc.sutHostPort); 48 | t._tc.fakeNodes.forEach(function(fakeNode, i) { 49 | console.log(i + " => " + fakeNode.host + ":" + fakeNode.port); 50 | }); 51 | console.log('===================================================='); 52 | } 53 | } 54 | }); 55 | } 56 | 57 | // callback returns a list of closures, which will be validated at a later 58 | // stage. For documentation on validation, see documentation of 59 | // ringpop-assert.validate(). 60 | function test2(str, ns, deadline, init, callback) { 61 | if (typeof callback === 'undefined') { 62 | callback = init; 63 | init = function(t, tc, cb) { cb(); return; } 64 | } 65 | 66 | ns.forEach(function(n) { 67 | test('cluster-size ' + n + ': ' + str, function(t) { 68 | var tc = new TestCoordinator({ 69 | sut: { 70 | program: getProgramPath(), 71 | interpreter: getProgramInterpreter(), 72 | }, 73 | numNodes: n, 74 | }); 75 | t._tc = tc; 76 | 77 | init(t, tc, function onInit() { 78 | tc.start(function onTCStarted() { 79 | dsl.validate(t, tc, callback(t, tc, n), deadline); 80 | }); 81 | }); 82 | 83 | }); 84 | }); 85 | } 86 | 87 | // testStateTransitions tests state transitions from an initial state to a newState and 88 | // asserts the final state. During the transition an relative incarnation number 89 | // can be sent in the piggybacked ping to test different rules 90 | // 91 | // statusCounts is an hash in the following form: 92 | // { 93 | // : count 94 | // } 95 | // 96 | // The alive status is treated as a relative number that is added to the size of the 97 | // cluster being tested. eg: 98 | // { 99 | // alive: -1 100 | // } 101 | // for a clustersize of 5 will assert that there will be 4 nodes alive after the test. 102 | function testStateTransitions(ns, initial, newState, finalState, incNoDelta, statusCounts) { 103 | var ix = 1; 104 | test2('change status from ' + initial + ', to ' + newState + ' with incNoDelta ' + incNoDelta + ' via piggybacking', 105 | ns, 20000, prepareWithStatus(ix, initial, function(t, tc, n) { 106 | expectedMembers = {}; 107 | expectedMembers[ix] = {status: finalState}; 108 | 109 | // alive is a delta 110 | var counts = _.extend({}, statusCounts); 111 | counts.alive = (counts.alive || 0) + n; 112 | return [ 113 | dsl.changeStatus(t, tc, 0, 1, newState, incNoDelta), 114 | dsl.waitForPingResponse(t, tc, 0), 115 | dsl.assertStats(t, tc, counts, expectedMembers), 116 | ]; 117 | }) 118 | ); 119 | } 120 | 121 | function prepareCluster(assertOverrides, insert_fns) { 122 | if (typeof assertOverrides === 'function') { 123 | insert_fns = assertOverrides; 124 | assertOverrides = null; 125 | } 126 | assertOverrides = assertOverrides || {}; 127 | 128 | return function(t, tc, n) { 129 | return [ 130 | // By waiting for the first ping we make sure the SUT is ready to go. 131 | // We don't consume it so other tests (especially the ping-tests) 132 | // can still assert implementation details related to ping distribution. 133 | dsl.waitForPing(t, tc, false), 134 | 135 | // We're waiting for joins AFTER the first ping so we're able to detect 136 | // if there are too many joins. 137 | dsl.waitForJoins(t, tc, n), 138 | 139 | dsl.assertDetectChecksumMethod(t, tc), 140 | dsl.assertStats(t, tc, assertOverrides.alive || (n+1), assertOverrides.suspect || 0, assertOverrides.faulty || 0), 141 | insert_fns(t, tc, n), 142 | dsl.expectOnlyPingsAndPingReqs(t, tc), 143 | ]; 144 | }; 145 | } 146 | 147 | function prepareWithStatus(ix, status, insert_fns) { 148 | var sourceIx = 0; 149 | if (ix == sourceIx) { 150 | sourceIx = 1; 151 | } 152 | 153 | return prepareCluster(function(t, tc, n) { 154 | return [ 155 | dsl.changeStatus(t, tc, sourceIx, ix, status), 156 | dsl.waitForPingResponse(t, tc, sourceIx), 157 | insert_fns(t, tc, n), 158 | ]; 159 | }); 160 | } 161 | 162 | 163 | module.exports = { 164 | test: test, 165 | test2: test2, 166 | testStateTransitions: testStateTransitions, 167 | prepareCluster: prepareCluster, 168 | prepareWithStatus: prepareWithStatus, 169 | }; 170 | -------------------------------------------------------------------------------- /test/util.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Uber Technologies, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | var color = require('cli-color'); 22 | var fs = require('fs'); 23 | var util = require('util'); 24 | 25 | function safeParse(data) { 26 | try { 27 | return JSON.parse(data); 28 | } catch (e) { 29 | return null; 30 | } 31 | } 32 | 33 | function parseArg(opt) { 34 | var args = Array.prototype.slice.call(process.argv, 2); 35 | 36 | var matches = args.filter(function(arg) { 37 | return arg.indexOf(opt) > -1; 38 | }); 39 | 40 | if (Array.isArray(matches) && matches.length) { 41 | return matches[0].split('=')[1]; 42 | } 43 | } 44 | 45 | function lpad(num, len) { 46 | var ret = String(num); 47 | while (ret.length < len) { 48 | ret = '0' + ret; 49 | } 50 | return ret; 51 | } 52 | 53 | function makeHostPort(host, port) { 54 | return util.format('%s:%d', host, port); 55 | } 56 | 57 | function formatDate() { 58 | var now = new Date(); 59 | return lpad(now.getHours(), 2) + ':' + lpad(now.getMinutes(), 2) + ':' + lpad(now.getSeconds(), 2) + '.' + lpad(now.getMilliseconds(), 3); 60 | } 61 | 62 | function findLocalIP(interfaces) { 63 | interfaces = interfaces || os.networkInterfaces(); 64 | 65 | function getIPv4Addr(iface) { 66 | var addresses = interfaces[iface]; 67 | 68 | if (!Array.isArray(addresses)) { 69 | return null; 70 | } 71 | 72 | for (var i = 0; i < addresses.length; i++) { 73 | var address = addresses[i]; 74 | 75 | if (address.family === 'IPv4' && !address.internal) { 76 | return address.address; 77 | } 78 | } 79 | 80 | return null; 81 | } 82 | 83 | return getIPv4Addr('en0') || getIPv4Addr('eth0') || '127.0.0.1'; 84 | } 85 | 86 | function logMsg(who, msg) { 87 | console.log(color.blue('[' + who + '] ') + color.yellow(formatDate()) + ' ' + msg); 88 | } 89 | 90 | function range(start, end) { 91 | var res = []; 92 | for (var i = start; i <= end; i++) { 93 | res.push(i); 94 | } 95 | return res; 96 | } 97 | 98 | module.exports = { 99 | safeParse: safeParse, 100 | parseArg: parseArg, 101 | formatDate: formatDate, 102 | findLocalIP: findLocalIP, 103 | logMsg: logMsg, 104 | range: range, 105 | makeHostPort: makeHostPort, 106 | }; 107 | -------------------------------------------------------------------------------- /tools/generate-hosts.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | // Copyright (c) 2016 Uber Technologies, Inc. 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | // 11 | // The above copyright notice and this permission notice shall be included in 12 | // all copies or substantial portions of the Software. 13 | // 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | // THE SOFTWARE. 21 | 22 | var fs = require('fs'); 23 | 24 | function parseArg(opt) { 25 | var args = Array.prototype.slice.call(process.argv, 2); 26 | 27 | var matches = args.filter(function(arg) { 28 | return arg.indexOf(opt) > -1; 29 | }); 30 | 31 | if (Array.isArray(matches) && matches.length) { 32 | return matches[0].split('=')[1]; 33 | } 34 | } 35 | 36 | function main(hosts, basePort, numPorts, outputFile) { 37 | var usageDie = function(msg) { 38 | console.error("error: " + msg); 39 | console.error(); 40 | console.error("usage: " + process.argv[0] + " --hosts=addresses --base-port=port --num-ports=count --output-file=filename"); 41 | console.error(); 42 | console.error(" --hosts=addresses\t- Comma-separated list of hostnames or IPs of hosts in cluster"); 43 | console.error(" --base-port=port\t- Starting port of node on host"); 44 | console.error(" --num-ports=count\t- Number of ports on which nodes will listen"); 45 | console.error(" --output-file=filename.json\t- Filename of hosts file"); 46 | process.exit(1); 47 | }; 48 | 49 | hosts = hosts || parseArg('--hosts'); 50 | if (!hosts) usageDie('invalid hosts'); 51 | basePort = basePort || parseInt(parseArg('--base-port'), 10); 52 | if (!basePort) usageDie('invalid base port'); 53 | numPorts = numPorts || parseInt(parseArg('--num-ports'), 10); 54 | if (!numPorts) usageDie('invalid num ports'); 55 | outputFile = outputFile || parseArg('--output-file') || './hosts.json'; 56 | if (!outputFile.match(/\.json$/)) usageDie('output file must have .json extension'); 57 | 58 | var nodes = []; 59 | 60 | hosts.split(',').forEach(function(host) { 61 | for (var nextPort = basePort; nextPort < basePort + numPorts; nextPort++) { 62 | nodes.push(host + ':' + nextPort); 63 | } 64 | }); 65 | 66 | fs.writeFileSync(outputFile, JSON.stringify(nodes)); 67 | 68 | return nodes; 69 | } 70 | 71 | if (require.main === module) { 72 | main(); 73 | } 74 | 75 | module.exports = main; 76 | -------------------------------------------------------------------------------- /tools/grafana-dash/README.md: -------------------------------------------------------------------------------- 1 | Tool to create a nice Grafana dashboard. 2 | 3 | # Usage 4 | 5 | * You can either modify supplied example config files (`config/common.json`) or use zero-config's functionality to override select values using command-line parametrs. 6 | * Configure Graphite paths. You can use variable substitution using `{var}` tags inside paths and variables themselves, as long as there are no circular dependencies. 7 | * Configure `grafana.url` and `grafana.cookie` with your Grafana endpoint and auth-cookie. 8 | * Run `NODE_ENV=development node gen-dashboard.js [path to where your config dir is]` 9 | * Alternatively using command-line parameters: `NODE_ENV=development node gen-dashboard.js [path to where your config dir is] --grafana.url=yoururl --grafana.cookie=auth-cookie=yourcookie --gen-dashboard.variable.dc=yourdc`. 10 | -------------------------------------------------------------------------------- /tools/grafana-dash/config/common.json: -------------------------------------------------------------------------------- 1 | { 2 | "grafana": { 3 | "cookie": "auth-openid=", 4 | "url": "https://grafana.example.com/grafana2/api/dashboards/db/" 5 | }, 6 | "gen-dashboard": { 7 | "dashboard-title": "my-ringpop-dashboard", 8 | "template": { 9 | "system": { 10 | "process-cpu": "{system-path}.cpu.user", 11 | "process-rss": "{system-path}.memory.rss", 12 | "process-fds": "{system-path}.num_fds" 13 | }, 14 | "gossip": { 15 | "ping-send": "{counts-path}.ping.send", 16 | "ping-recv": "{counts-path}.ping.recv", 17 | "ping-p95": "{timers-path}.ping.p95", 18 | "ping-p99": "{timers-path}.ping.p99", 19 | "ping-req-send": "{counts-path}.ping-req.send", 20 | "ping-req-recv": "{counts-path}.ping-req.recv", 21 | "ping-req-p95": "{timers-path}.ping-req.p95", 22 | "ping-req-p99": "{timers-path}.ping-req.p99", 23 | "protocol-freq-p99": "{timers-path}.protocol.frequency.p99" 24 | }, 25 | "bootstrap": { 26 | "join-p95": "{timers-path}.join.p95" 27 | }, 28 | "membership": { 29 | "full-sync": "{counts-path}.full-sync", 30 | "membership-update-alive": "{counts-path}.membership-update.alive", 31 | "membership-update-suspect": "{counts-path}.membership-update.suspect", 32 | "membership-update-faulty": "{counts-path}.membership-update.faulty", 33 | "compute-checksum-p95": "{timers-path}.compute-checksum.p95", 34 | "compute-checksum-p99": "{timers-path}.compute-checksum.p99", 35 | "compute-checksum-upper": "{timers-path}.compute-checksum.upper" 36 | }, 37 | "dissemination": { 38 | "max-piggyback": "{gauges-path}.max-piggyback" 39 | } 40 | }, 41 | "variable": { 42 | "dc": "your-data-center", 43 | "system-path": "stats.{dc}.system", 44 | "counts-path": "stats.{dc}.counts", 45 | "timers-path": "stats.{dc}.timers", 46 | "gauges-path": "stats.{dc}.gauges" 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /tools/make_partition: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Usage: 4 | sudo -v && sudo lsof -Pnni | ./make_partition 3000,3001 3002,3003 |\ 5 | sudo FIREWALL 6 | 7 | FIREWALL := 8 | pfctl -emf - # Darwin 9 | iptables-restore # Linux 10 | 11 | Clear the firewall: 12 | pfctl -f /etc/pf.conf # Darwin 13 | iptables -F # Linux 14 | Check current firewall rules: 15 | pfctl -sr # Darwin 16 | iptables -nvL # Linux 17 | 18 | === Specifying more ports === 19 | Port definitions accept port ranges. For example, this is valid: 20 | 21 | sudo lsof -Pnni | ./make_partition 3000-3009 3010-3019 22 | 23 | ... and this: 24 | 25 | sudo lsof -Pnni | ./make_partition 3001-3009,3019 3000,3010-3018 26 | 27 | === Limitations === 28 | Undefined behavior when something else is listening on the same port, but 29 | different IP address, e.g. 30 | 31 | pop 1 r 3u IPv4 84 0t0 TCP 127.0.0.1:3000 (LISTEN) 32 | met 2 m 3u IPv4 84 0t0 TCP 10.0.0.10:3000 (LISTEN) 33 | 34 | If the above holds true, it can break firewall in spectacular ways. So not 35 | intended for unattended use. 36 | """ 37 | 38 | import sys 39 | import argparse 40 | import unittest 41 | 42 | 43 | def _parse_ports(ports): 44 | ret = [] 45 | for port in ports.split(','): 46 | if '-' in port: 47 | pfrom, pto = port.split('-') 48 | for p in range(int(pfrom), int(pto)+1): 49 | ret.append(str(p)) 50 | else: 51 | ret.append(port) 52 | return ret 53 | 54 | 55 | def _port_to_owner(lsof, A, B): 56 | """Create a dictionary of {port -> 'A' | 'B'}. 57 | 58 | 1. Matches A and B to pids by '(LISTEN)' lines in lsof. 59 | 2. Matches opened ports (src_ip:src_port->dst_ip:dst_port; the src:port) to 60 | pids. 61 | 3. Matches the pids from (1) and (2) to a port -> owner dictionary. 62 | """ 63 | port_to_pid = {} 64 | ports = A + B 65 | for line in lsof: 66 | if '(LISTEN)' in line: 67 | port = line.split()[8].split(':')[-1] 68 | if port in ports: 69 | port_to_pid[port] = line.split()[1] 70 | for line in lsof: 71 | if '(ESTABLISHED)' in line: 72 | col = line.split()[8] 73 | dst_port = col.split(':')[-1] 74 | if dst_port in port_to_pid: 75 | src_port = col.split('->')[0].split(':')[-1] 76 | port_to_pid[src_port] = line.split()[1] 77 | 78 | pids_A, pids_B = [], [] 79 | for port, pid in port_to_pid.items(): 80 | if port in A: 81 | pids_A.append(pid) 82 | elif port in B: 83 | pids_B.append(pid) 84 | 85 | port_to_owner = {} 86 | for port, pid in port_to_pid.items(): 87 | if pid in pids_A: 88 | port_to_owner[port] = 'A' 89 | elif pid in pids_B: 90 | port_to_owner[port] = 'B' 91 | else: 92 | # If there is an owner we don't understand, just skip it. 93 | # print("Unknown owner for %s" % pid) 94 | pass 95 | 96 | return port_to_owner 97 | 98 | 99 | def _block_tuples(lsof, port_to_owner, AB): 100 | """Returns IP and a list of (src_port, dst_port)-tuples.""" 101 | ret = [] 102 | for line in [l for l in lsof if '(ESTABLISHED)' in l]: 103 | col = line.split()[8] 104 | src = col.split('->')[0].split(':')[-1] 105 | dst = col.split(':')[-1] 106 | if port_to_owner.get(src) and port_to_owner.get(dst): 107 | ip = col.split('->')[0].split(':')[0] 108 | if port_to_owner[src] != port_to_owner[dst] and (dst in AB): 109 | ret.append((src, dst)) 110 | if not ret: 111 | raise ValueError("Could not find ringpops on provided ports") 112 | return ip, ret 113 | 114 | 115 | def _block_preamble_darwin(ip, ports): 116 | """Block new connections to listening ports on Darwin.""" 117 | tpl = "block return in proto tcp from %s port %s flags S/S" 118 | return [tpl % (ip, port) for port in ports] 119 | 120 | 121 | def _block_preamble_linux(ip, ports): 122 | """Block new connections to listening ports on Linux.""" 123 | rst = "-A INPUT -p tcp -s %s -d %s --tcp-flags RST RST -j ACCEPT" 124 | tpl = ("-A INPUT -p tcp --syn -m state --state NEW " 125 | "-d %s --dport %s -j REJECT --reject-with tcp-reset") 126 | return ["*filter", rst % (ip, ip)] + [tpl % (ip, port) for port in ports] 127 | 128 | 129 | def _block_pair_darwin(ip, blocks): 130 | """Block a known connection from A to B on Darwin.""" 131 | tpl = "block return in proto tcp from %s port %s to %s port %s" 132 | ret = [] 133 | for src, dst in blocks: 134 | ret.append(tpl % (ip, src, ip, dst)) 135 | ret.append(tpl % (ip, dst, ip, src)) 136 | return ret 137 | 138 | 139 | def _block_pair_linux(ip, blocks): 140 | """Block a known connection from A to B on Linux.""" 141 | tpl = ("-A INPUT -p tcp -s %s --sport %s -d %s " 142 | "--dport %s -j REJECT --reject-with tcp-reset") 143 | ret = [] 144 | for src, dst in blocks: 145 | ret.append(tpl % (ip, src, ip, dst)) 146 | ret.append(tpl % (ip, dst, ip, src)) 147 | return ret 148 | 149 | 150 | def _block_appendix_linux(): 151 | """iptables-save requires "COMMIT" at the end.""" 152 | return ["COMMIT"] 153 | 154 | 155 | def main(): 156 | parser = argparse.ArgumentParser( 157 | formatter_class=argparse.RawDescriptionHelpFormatter, 158 | description='Generate firewall rules to create a partition.', 159 | epilog=__doc__) 160 | parser.add_argument('A', help='Ports of A, e.g. 3000,3001-3002') 161 | parser.add_argument('B', help='Ports of B, e.g. 3003,3004') 162 | parser.add_argument( 163 | '--platform', 164 | help='darwin|linux, default: '+sys.platform, 165 | default=sys.platform) 166 | args = parser.parse_args() 167 | A, B = _parse_ports(args.A), _parse_ports(args.B) 168 | 169 | lsof = sys.stdin.readlines() 170 | port_to_owner = _port_to_owner(lsof, A, B) 171 | ip, blocks = _block_tuples(lsof, port_to_owner, A + B) 172 | 173 | if args.platform.startswith('linux'): 174 | print ("\n".join(_block_preamble_linux(ip, A+B))) 175 | print ("\n".join(_block_pair_linux(ip, blocks))) 176 | print ("\n".join(_block_appendix_linux())) 177 | elif args.platform.startswith('darwin'): 178 | print ("\n".join(_block_preamble_darwin(ip, A+B))) 179 | print ("\n".join(_block_pair_darwin(ip, blocks))) 180 | 181 | 182 | # Block scenario: 3000,3001 3002,3003 183 | SAMPLE_LSOF = """ 184 | pop 1 r 3u IPv4 80 0t0 TCP 127.0.0.1:3000 (LISTEN) 185 | pop 1 r 5u IPv4 93 0t0 TCP 127.0.0.1:3000->127.0.0.1:54602 (ESTABLISHED) 186 | pop 1 r 6u IPv4 02 0t0 TCP 127.0.0.1:3000->127.0.0.1:54603 (ESTABLISHED) 187 | pop 1 r 7u IPv4 06 0t0 TCP 127.0.0.1:3000->127.0.0.1:54607 (ESTABLISHED) 188 | pop 2 r 3u IPv4 29 0t0 TCP 127.0.0.1:3001 (LISTEN) 189 | pop 2 r 5u IPv4 31 0t0 TCP 127.0.0.1:43057->127.0.0.1:3002 (ESTABLISHED) 190 | pop 2 r 6u IPv4 34 0t0 TCP 127.0.0.1:3001->127.0.0.1:39889 (ESTABLISHED) 191 | pop 2 r 7u IPv4 33 0t0 TCP 127.0.0.1:54602->127.0.0.1:3000 (ESTABLISHED) 192 | pop 2 r 8u IPv4 39 0t0 TCP 127.0.0.1:3001->127.0.0.1:39890 (ESTABLISHED) 193 | pop 3 r 3u IPv4 91 0t0 TCP 127.0.0.1:3002 (LISTEN) 194 | pop 3 r 5u IPv4 99 0t0 TCP 127.0.0.1:3002->127.0.0.1:43057 (ESTABLISHED) 195 | pop 3 r 6u IPv4 01 0t0 TCP 127.0.0.1:54603->127.0.0.1:3000 (ESTABLISHED) 196 | pop 3 r 7u IPv4 04 0t0 TCP 127.0.0.1:60367->127.0.0.1:3003 (ESTABLISHED) 197 | pop 3 r 8u IPv4 05 0t0 TCP 127.0.0.1:39889->127.0.0.1:3001 (ESTABLISHED) 198 | pop 4 r 3u IPv4 98 0t0 TCP 127.0.0.1:3003 (LISTEN) 199 | pop 4 r 5u IPv4 36 0t0 TCP 127.0.0.1:3003->127.0.0.1:60367 (ESTABLISHED) 200 | pop 4 r 6u IPv4 38 0t0 TCP 127.0.0.1:39890->127.0.0.1:3001 (ESTABLISHED) 201 | pop 4 r 7u IPv4 41 0t0 TCP 127.0.0.1:54607->127.0.0.1:3000 (ESTABLISHED) 202 | node 99 r 1u IPv4 83 0t0 TCP 127.0.0.1:50000->127.0.0.1:3000 (ESTABLISHED) 203 | """.split("\n") 204 | 205 | 206 | class TestMapping(unittest.TestCase): 207 | """This test case can be executed by running '$0 test'.""" 208 | def test_mapping(self): 209 | expect_block = [ 210 | ('43057', '3002'), 211 | ('54603', '3000'), 212 | ('39889', '3001'), 213 | ('39890', '3001'), 214 | ('54607', '3000'), 215 | ] 216 | A = ['3000', '3001'] 217 | B = ['3002', '3003'] 218 | port_to_owner = _port_to_owner(SAMPLE_LSOF, A, B) 219 | ip, tuples = _block_tuples(SAMPLE_LSOF, port_to_owner, A + B) 220 | self.assertEqual(expect_block, tuples) 221 | self.assertEqual('127.0.0.1', ip) 222 | 223 | if __name__ == '__main__': 224 | if len(sys.argv) > 1 and sys.argv[1] == 'test': 225 | suite = unittest.TestLoader().loadTestsFromTestCase(TestMapping) 226 | unittest.TextTestRunner(verbosity=2).run(suite) 227 | else: 228 | main() 229 | --------------------------------------------------------------------------------