├── .github
└── workflows
│ ├── publish.yaml
│ └── test.yml
├── .gitignore
├── .luacheckrc
├── CHANGELOG.md
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake
├── FindLdoc.cmake
└── FindTarantool.cmake
├── config.ld
├── doc
└── swim-paper.pdf
├── membership-scm-1.rockspec
├── membership.lua
├── membership
├── events.lua
├── members.lua
├── network.lua
├── options.lua
└── stash.lua
└── test
├── helpers
├── cluster.lua
├── instance.lua
└── server.lua
└── integration
├── allowed_members_test.lua
├── broadcast_test.lua
├── dead_myself_test.lua
├── dissemination_test.lua
├── encryption_test.lua
├── false_rumors_test.lua
├── init_test.lua
├── member_clock_diff_test.lua
├── payload_test.lua
├── probe_uri_test.lua
├── quit_test.lua
├── reload_test.lua
├── subscribe_test.lua
└── sync_test.lua
/.github/workflows/publish.yaml:
--------------------------------------------------------------------------------
1 | name: Publish
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | tags: ['*']
7 |
8 | jobs:
9 | publish-scm-1:
10 | if: github.ref == 'refs/heads/master'
11 | runs-on: ubuntu-22.04
12 | steps:
13 | - uses: actions/checkout@v4
14 | - uses: tarantool/rocks.tarantool.org/github-action@master
15 | with:
16 | auth: ${{ secrets.ROCKS_AUTH }}
17 | files: membership-scm-1.rockspec
18 |
19 | publish-tag:
20 | if: startsWith(github.ref, 'refs/tags/')
21 | runs-on: ubuntu-22.04
22 | env:
23 | CMAKE_LDOC_FIND_REQUIRED: 'YES'
24 | steps:
25 | - uses: actions/checkout@v4
26 | - uses: tarantool/setup-tarantool@v2
27 | with:
28 | tarantool-version: '2.11'
29 |
30 | # Setup ldoc
31 | - run: tarantoolctl rocks install ldoc
32 | --server=https://tarantool.github.io/LDoc/
33 | - run: echo $PWD/.rocks/bin >> $GITHUB_PATH
34 |
35 | # Make a release
36 | - run: echo TAG=${GITHUB_REF##*/} >> $GITHUB_ENV
37 | - run: tarantoolctl rocks new_version --tag ${{ env.TAG }}
38 | - run: tarantoolctl rocks install membership-${{ env.TAG }}-1.rockspec
39 | - run: tarantoolctl rocks pack membership ${{ env.TAG }}
40 |
41 | - uses: tarantool/rocks.tarantool.org/github-action@master
42 | with:
43 | auth: ${{ secrets.ROCKS_AUTH }}
44 | files: |
45 | membership-${{ env.TAG }}-1.rockspec
46 | membership-${{ env.TAG }}-1.all.rock
47 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on:
4 | push:
5 | workflow_dispatch:
6 |
7 | jobs:
8 | test:
9 | strategy:
10 | fail-fast: false
11 | matrix:
12 | tarantool:
13 | - '1.10'
14 | - '2.10'
15 | - '2.11'
16 |
17 | env:
18 | CMAKE_LDOC_FIND_REQUIRED: 'YES'
19 |
20 | runs-on: ubuntu-22.04
21 | steps:
22 | - uses: actions/checkout@v4
23 |
24 | - uses: tarantool/setup-tarantool@v3
25 | with:
26 | tarantool-version: ${{ matrix.tarantool }}
27 |
28 | - name: Cache rocks
29 | uses: actions/cache@v3
30 | id: cache-rocks
31 | with:
32 | path: .rocks/
33 | key: cache-rocks-${{ matrix.tarantool }}-${{ hashFiles('membership-scm-1.rockspec') }}
34 |
35 | - name: Install dependencies
36 | if: steps.cache-rocks.outputs.cache-hit != 'true'
37 | run: |
38 | tarantoolctl rocks install luacheck
39 | tarantoolctl rocks install luatest
40 | tarantoolctl rocks install ldoc --server=https://tarantool.github.io/LDoc/
41 |
42 | - name: Add rocks to PATH
43 | run: echo ".rocks/bin" >> $GITHUB_PATH
44 |
45 | - name: Run luacheck
46 | run: luacheck membership-scm-1.rockspec membership.lua membership/
47 |
48 | - name: Build project
49 | run: tarantoolctl rocks make
50 |
51 | - name: Run tests
52 | run: .rocks/bin/luatest -v
53 |
54 | - name: Cleanup cached paths
55 | run: tarantoolctl rocks remove membership
56 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .rocks
2 | __pycache__
3 | .cache/
4 | build.luarocks
5 | doc
6 | build
7 |
--------------------------------------------------------------------------------
/.luacheckrc:
--------------------------------------------------------------------------------
1 | redefined = false
2 |
3 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | All notable changes to this project will be documented in this file.
3 |
4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
5 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
6 |
7 | ## [Unreleased]
8 |
9 | ## [2.5.2] - 2025-03-31
10 |
11 | ### Fixed
12 |
13 | - Dead members which weren't allowed now are removed from the list.
14 |
15 | ## [2.5.1] - 2025-03-10
16 |
17 | ### Fixed
18 |
19 | - Protocol step error when the members list were cleared in process.
20 |
21 | ## [2.5.0] - 2025-03-06
22 |
23 | ### Added
24 |
25 | - `set_allowed_members` function to add only allowed members to membership process.
26 |
27 | ## [2.4.6] - 2025-01-13
28 |
29 | ### Added
30 |
31 | - `mark_left` function to mark removed members as `left`.
32 |
33 | ## [2.4.5] - 2024-06-24
34 |
35 | ### Fixed
36 |
37 | - Invalid events parsing.
38 |
39 | ## [2.4.4] - 2024-04-09
40 |
41 | ### Fixed
42 |
43 | - Invalid payload parsing in anti entropy step.
44 |
45 | ## [2.4.3] - 2024-01-29
46 |
47 | ### Fixed
48 |
49 | - Invalid payload parsing when the message is broken.
50 |
51 | ## [2.4.2] - 2024-01-18
52 |
53 | ### Added
54 |
55 | - IPv6 support.
56 | - `remove_member` function to clean up member data.
57 |
58 |
59 | ## [2.4.1] - 2023-09-28
60 |
61 | ### Fixed
62 |
63 | - Log overflow when inner fibers weren't canceled.
64 |
65 | ## [2.4.0] - 2021-07-12
66 |
67 | ### Added
68 |
69 | - New option `SUSPICIOUSNESS` (default: `true`) allows to
70 | disable generation of rumors about suspected members. Also,
71 | membership won't produce any rumors unless myself is alive.
72 |
73 | - New option `log_debug` which can be easily overridden to
74 | control the verbosity level.
75 |
76 | ### Fixed
77 |
78 | - Uncaught exception which prevented discovering
79 | non-decryptable members.
80 |
81 | - Avoid event duplication due to a bug.
82 |
83 | - Properly handle the internal option `NUM_FAILURE_DETECTION_SUBGROUPS`
84 | which controls the number of indirect pings.
85 |
86 | ## [2.3.2] - 2021-04-22
87 |
88 | ### Fixed
89 |
90 | - Enhance logging of `getaddrinfo` errors when DNS malfunctions.
91 |
92 | ## [2.3.1] - 2020-11-18
93 |
94 | ### Fixed
95 |
96 | - Make the initialization error more informative.
97 |
98 | ## [2.3.0] - 2020-11-17
99 |
100 | ### Added
101 |
102 | - Allow reloading the code on the fly without status intervention.
103 |
104 | ### Fixed
105 |
106 | - Make subscriptions garbage-collectible. Previously, `fiber.cond`
107 | objects obtained from `membership.subscribe` should have been
108 | unsubscribed manually, otherwise, they would never be GC'ed.
109 | And now they are.
110 |
111 | ## [2.2.0] - 2019-10-22
112 |
113 | ### Added
114 |
115 | - New field `member.clock_delta`, which indicates difference between
116 | remote and local clocks.
117 |
118 | ## [2.1.4] - 2019-08-25
119 |
120 | ### Fixed
121 |
122 | - In some cases membership did disseminate invalid (nil) payload.
123 | The bug relates versions 2.1.2, 2.1.3.
124 |
125 | ## [2.1.3] - 2019-08-01
126 |
127 | ### Fixed
128 |
129 | - Leaving membership with encryption enabled.
130 | Due to the bug, other members reported 'dead' status instead of 'left'.
131 |
132 | ## [2.1.2] - 2019-06-02
133 |
134 | ### Added
135 |
136 | - Ldoc api documentation
137 |
138 | ### Fixed
139 |
140 | - Fairly calculate size of UDP packets
141 | - Speed up events dissemination by fully utilizing
142 | PING and ACK packets
143 | - Restrict packet size for anti-entropy sync.
144 | Due to the lack of restriction it used to fail
145 | which plagued members detection
146 |
147 | ### Minor
148 |
149 | - Make tests lighter by using `console` connection instead of `net.box`
150 |
151 | ## [2.1.1] - 2019-01-09
152 |
153 | ### Fixed
154 |
155 | - Obtain UDP broadcast address from `getifaddrs` C call
156 |
157 | ### Updated
158 |
159 | - Module `checks` dependency updated to v3.0.0
160 |
161 | ## [2.1.0] - 2018-09-04
162 |
163 | ### Added
164 |
165 | - API method `probe_uri()`
166 | - API method `get_member()`
167 | - Low-level encryption support
168 | - API methods `set_encryption_key()`, `get_encryption_key()`
169 | - API method `broadcast()`
170 | - API methods `subscribe()`, `unsubscribe()`
171 |
172 | ### Changed
173 |
174 | - API method `set_payload()` now sets only the given key within payload table
175 | - Hide internal numeric `status` from public API
176 |
177 | ## [2.0.0] - 2018-04-03
178 |
179 | ### Changed
180 |
181 | - Rename API method: `quit()` -> `leave()`
182 |
183 | ## [1.0.0] - 2018-04-02
184 |
185 | ### Added
186 |
187 | - Basic functionality
188 | - Integration tests
189 | - Luarock-based packaging
190 | - Gitlab CI integration
191 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
2 |
3 | project(membership C)
4 |
5 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
6 | set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY TRUE)
7 |
8 | # Find Tarantool and Lua dependecies
9 | set(TARANTOOL_FIND_REQUIRED ON)
10 | find_package(Tarantool)
11 | include_directories(${TARANTOOL_INCLUDE_DIRS})
12 |
13 | file(GLOB_RECURSE LUA_FILES
14 | "${CMAKE_CURRENT_SOURCE_DIR}/membership.lua"
15 | "${CMAKE_CURRENT_SOURCE_DIR}/membership/*.lua"
16 | )
17 |
18 | ## API doc ####################################################################
19 | ###############################################################################
20 |
21 | if(DEFINED ENV{CMAKE_LDOC_FIND_REQUIRED})
22 | set(LDOC_FIND_REQUIRED "$ENV{CMAKE_LDOC_FIND_REQUIRED}")
23 | endif()
24 | find_package(Ldoc)
25 |
26 | set(DOC_OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/doc/index.html)
27 |
28 | add_custom_command(
29 | OUTPUT DOC_OUTPUT
30 | COMMAND ${LDOC} --all .
31 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
32 | DEPENDS ${LUA_FILES}
33 | COMMENT "Building API documentation"
34 | VERBATIM
35 | )
36 |
37 | if(LDOC_FOUND)
38 | add_custom_target(doc ALL
39 | DEPENDS DOC_OUTPUT)
40 | else()
41 | add_custom_target(doc
42 | DEPENDS DOC_OUTPUT)
43 | endif()
44 |
45 | ## Testing ####################################################################
46 | ###############################################################################
47 |
48 | enable_testing()
49 |
50 | add_test(
51 | NAME test_integration
52 | COMMAND pytest -v
53 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
54 | )
55 |
56 | ## Install ####################################################################
57 | ###############################################################################
58 |
59 | install(CODE "")
60 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 2-Clause License
2 |
3 | Copyright (c) 2019-2024, Tarantool AUTHORS.
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # Membership library for Tarantool based on a gossip protocol
6 |
7 | This library builds a mesh from multiple tarantool instances. The
8 | mesh monitors itself, helps members discover everyone else and get
9 | notified about their status changes with low latency.
10 |
11 | It is built upon the ideas from consul, or, more precisely,
12 | the [SWIM](doc/swim-paper.pdf) algorithm.
13 |
14 | Membership module works over UDP protocol and can operate
15 | even before tarantool `box.cfg` was initialized.
16 |
17 | ## Member data structure
18 |
19 | A member is represented by the table with fields:
20 |
21 | * `uri`
22 | * `status` is a string: `alive`, `suspect`, `dead` or `left`
23 | * `incarnation` which is incremented every time the instance is being
24 | suspected or dead or updates its payload
25 | * `payload` is a table with auxiliary data, which can be used by various
26 | modules to do whatever they want
27 | * `timestamp` is a value of `fiber.time64()` (in microseconds),
28 | corresponding to the last update of status or incarnation. `timestamp`
29 | is always local and does not depent on other members' clock setting.
30 | * `clock_delta` is a time drift between member's clock (remote) and the
31 | local one (in microseconds).
32 |
33 | Example:
34 |
35 | ```yaml
36 | ---
37 | uri: "localhost:33001"
38 | status: "alive"
39 | incarnation: 1
40 | payload:
41 | uuid: "2d00c500-2570-4019-bfcc-ab25e5096b73"
42 | timestamp: 1522427330993752
43 | clock_delta: 27810
44 | ...
45 | ```
46 |
47 | ## Reloadability
48 |
49 | Membership module supports hot-reload:
50 |
51 | ```lua
52 | package.loaded['membership'] = nil
53 | require('membership')
54 | ```
55 |
56 | ## Changing options
57 |
58 | You can change membership options directly by using:
59 |
60 | ```lua
61 | require("membership.options")[opt_name] = opt_value
62 | ```
63 |
64 | Available options:
65 | * Period of sending direct PINGs.
66 | `PROTOCOL_PERIOD_SECONDS`, default: 1.0
67 |
68 | * Time to wait for ACK message after PING.
69 | If a member does not reply within this time,
70 | the indirect ping algorithm is invoked.
71 | `ACK_TIMEOUT_SECONDS`, default: 0.2
72 |
73 | * Period to perform anti-entropy sync.
74 | `ANTI_ENTROPY_PERIOD_SECONDS`, default: 10
75 |
76 | * Toggle producing `suspect` rumors when ping fails. Even if disabled,
77 | it doesn't affect neither gossip dissemination nor other statuses
78 | generation (e.g. `dead` and `non-decryptable`).
79 | `SUSPICIOUSNESS`, default: true
80 |
81 | * Timeout to mark `suspect` members as `dead`.
82 | `SUSPECT_TIMEOUT_SECONDS`, default: 3
83 |
84 | * Number of members to try indirectly pinging a `suspect`.
85 | Denoted as `k` in [SWIM paper](swim-paper.pdf).
86 | `NUM_FAILURE_DETECTION_SUBGROUPS`, default: 3
87 |
88 | * Maximum size of UPD packets to send.
89 | `MAX_PACKET_SIZE`, default: 1472 (`Default-MTU (1500) - IP-Header (20) - UDP-Header (8)`)
90 |
91 | ## Payload
92 |
93 | You can add payload to any member by calling:
94 |
95 | ```lua
96 | membership.set_payload(key, value)
97 | ```
98 |
--------------------------------------------------------------------------------
/cmake/FindLdoc.cmake:
--------------------------------------------------------------------------------
1 | find_program(LDOC ldoc
2 | HINTS .rocks/
3 | PATH_SUFFIXES bin
4 | DOC "Documentation generator tool for Lua source code"
5 | )
6 |
7 | include(FindPackageHandleStandardArgs)
8 | find_package_handle_standard_args(LDOC
9 | REQUIRED_VARS LDOC
10 | )
11 |
12 | mark_as_advanced(LDOC)
13 |
--------------------------------------------------------------------------------
/cmake/FindTarantool.cmake:
--------------------------------------------------------------------------------
1 | # Define GNU standard installation directories
2 | include(GNUInstallDirs)
3 |
4 | macro(extract_definition name output input)
5 | string(REGEX MATCH "#define[\t ]+${name}[\t ]+\"([^\"]*)\""
6 | _t "${input}")
7 | string(REGEX REPLACE "#define[\t ]+${name}[\t ]+\"(.*)\"" "\\1"
8 | ${output} "${_t}")
9 | endmacro()
10 |
11 | find_path(TARANTOOL_INCLUDE_DIR tarantool/module.h
12 | HINTS ${TARANTOOL_DIR} ENV TARANTOOL_DIR
13 | PATH_SUFFIXES include
14 | )
15 |
16 | if(TARANTOOL_INCLUDE_DIR)
17 | set(_config "-")
18 | file(READ "${TARANTOOL_INCLUDE_DIR}/tarantool/module.h" _config0)
19 | string(REPLACE "\\" "\\\\" _config ${_config0})
20 | unset(_config0)
21 | extract_definition(PACKAGE_VERSION TARANTOOL_VERSION ${_config})
22 | extract_definition(INSTALL_PREFIX _install_prefix ${_config})
23 | unset(_config)
24 | endif()
25 |
26 | include(FindPackageHandleStandardArgs)
27 | find_package_handle_standard_args(TARANTOOL
28 | REQUIRED_VARS TARANTOOL_INCLUDE_DIR VERSION_VAR TARANTOOL_VERSION)
29 | if(TARANTOOL_FOUND)
30 | set(TARANTOOL_INCLUDE_DIRS "${TARANTOOL_INCLUDE_DIR}"
31 | "${TARANTOOL_INCLUDE_DIR}/tarantool/"
32 | CACHE PATH "Include directories for Tarantool")
33 | set(TARANTOOL_INSTALL_LIBDIR "${CMAKE_INSTALL_LIBDIR}/tarantool"
34 | CACHE PATH "Directory for storing Lua modules written in Lua")
35 | set(TARANTOOL_INSTALL_LUADIR "${CMAKE_INSTALL_DATADIR}/tarantool"
36 | CACHE PATH "Directory for storing Lua modules written in C")
37 |
38 | if (NOT TARANTOOL_FIND_QUIETLY AND NOT FIND_TARANTOOL_DETAILS)
39 | set(FIND_TARANTOOL_DETAILS ON CACHE INTERNAL "Details about TARANTOOL")
40 | message(STATUS "Tarantool LUADIR is ${TARANTOOL_INSTALL_LUADIR}")
41 | message(STATUS "Tarantool LIBDIR is ${TARANTOOL_INSTALL_LIBDIR}")
42 | endif ()
43 | endif()
44 | mark_as_advanced(TARANTOOL_INCLUDE_DIRS TARANTOOL_INSTALL_LIBDIR
45 | TARANTOOL_INSTALL_LUADIR)
46 |
--------------------------------------------------------------------------------
/config.ld:
--------------------------------------------------------------------------------
1 | project = 'membership'
2 | file = {
3 | 'membership.lua',
4 | 'membership/options.lua',
5 | }
6 | topics = {
7 | 'README.md',
8 | 'CHANGELOG.md',
9 | }
10 | format = 'markdown'
11 |
--------------------------------------------------------------------------------
/doc/swim-paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tarantool/membership/46487f0cfdf35989f141df0816492960fbb6c224/doc/swim-paper.pdf
--------------------------------------------------------------------------------
/membership-scm-1.rockspec:
--------------------------------------------------------------------------------
1 | package = 'membership'
2 | version = 'scm-1'
3 | source = {
4 | url = 'git+https://github.com/tarantool/membership.git',
5 | branch = 'master',
6 | }
7 | dependencies = {
8 | 'lua >= 5.1',
9 | 'checks ~> 3',
10 | }
11 |
12 | external_dependencies = {
13 | TARANTOOL = {
14 | header = 'tarantool/module.h',
15 | },
16 | }
17 |
18 | build = {
19 | type = 'cmake',
20 | variables = {
21 | TARANTOOL_DIR = '$(TARANTOOL_DIR)',
22 | TARANTOOL_INSTALL_LIBDIR = '$(LIBDIR)',
23 | TARANTOOL_INSTALL_LUADIR = '$(LUADIR)',
24 | },
25 | install = {
26 | lua = {
27 | ['membership'] = 'membership.lua',
28 | ['membership.stash'] = 'membership/stash.lua',
29 | ['membership.events'] = 'membership/events.lua',
30 | ['membership.members'] = 'membership/members.lua',
31 | ['membership.options'] = 'membership/options.lua',
32 | ['membership.network'] = 'membership/network.lua',
33 | }
34 | },
35 | copy_directories = {"doc"},
36 | }
37 |
--------------------------------------------------------------------------------
/membership.lua:
--------------------------------------------------------------------------------
1 | --- Membership library for Tarantool based on a gossip protocol.
2 | -- This library builds a mesh from multiple tarantool instances. The
3 | -- mesh monitors itself, helps members discover everyone else and get
4 | -- notified about their status changes with low latency.
5 | --
6 | -- It is built upon the ideas from consul, or, more precisely,
7 | -- the [SWIM](swim-paper.pdf) algorithm.
8 | --
9 | -- Membership module works over UDP protocol and can operate
10 | -- even before tarantool [`box.cfg`](https://tarantool.io/en/doc/latest/book/box/box_cfg/) was initialized.
11 | -- @module membership
12 |
13 | local log = require('log')
14 | local uri_tools = require('uri')
15 | local fiber = require('fiber')
16 | local checks = require('checks')
17 | local socket = require('socket')
18 | local msgpack = require('msgpack')
19 |
20 | for _, m in ipairs({
21 | 'membership.stash',
22 | 'membership.events',
23 | 'membership.options',
24 | 'membership.members',
25 | 'membership.network',
26 | }) do
27 | package.loaded[m] = nil
28 | end
29 |
30 | local opts = require('membership.options')
31 | local stash = require('membership.stash')
32 | local events = require('membership.events')
33 | local members = require('membership.members')
34 | local network = require('membership.network')
35 |
36 | local _sync_trigger = stash.get('_sync_trigger') or fiber.cond()
37 | local _ack_trigger = stash.get('_ack_trigger') or fiber.cond()
38 | local _ack_cache = stash.get('_ack_cache') or {}
39 | local _resolve_cache = stash.get('_resolve_cache') or {}
40 | local _allowed_uri_set = stash.get('_allowed_uri_set')
41 |
42 | local function after_reload()
43 | stash.set('_ack_cache', _ack_cache)
44 | stash.set('_ack_trigger', _ack_trigger)
45 | stash.set('_sync_trigger', _sync_trigger)
46 | stash.set('_resolve_cache', _resolve_cache)
47 | stash.set('_allowed_uri_set', _allowed_uri_set)
48 | end
49 |
50 | local _sock = stash.get('_sock')
51 | local advertise_uri = stash.get('advertise_uri')
52 |
53 | local function resolve(uri)
54 | checks('string')
55 |
56 | if _resolve_cache[uri] then
57 | local member = members.get(uri)
58 | if member and member.status == opts.ALIVE then
59 | return _resolve_cache[uri]
60 | else
61 | _resolve_cache[uri] = nil
62 | end
63 | end
64 |
65 | local parts = uri_tools.parse(uri)
66 | if not parts then
67 | if _resolve_cache[uri] == nil then
68 | _resolve_cache[uri] = false
69 | log.warn("parse error (%s)", uri)
70 | end
71 | return nil
72 | end
73 |
74 | local family = parts.ipv6 and 'AF_INET6' or 'AF_INET'
75 | local addrinfo, err = socket.getaddrinfo(
76 | parts.host, parts.service,
77 | {family=family, type='SOCK_DGRAM'}
78 | )
79 | if addrinfo == nil then
80 | if _resolve_cache[uri] == nil then
81 | _resolve_cache[uri] = false
82 | log.warn("%s (%s)", err or 'getaddrinfo: Unknown error', uri)
83 | end
84 | return nil
85 | end
86 |
87 | _resolve_cache[uri] = addrinfo[1]
88 | return addrinfo[1]
89 | end
90 |
91 | local function nslookup(host, port)
92 | checks('string', 'number')
93 |
94 | for uri, cache in pairs(_resolve_cache) do
95 | if cache
96 | and cache.host == host
97 | and cache.port == port
98 | then
99 | return uri
100 | end
101 | end
102 |
103 | return nil
104 | end
105 |
106 | local function random_permutation(tbl)
107 | local cnt = #tbl
108 | for src = 1, cnt-1 do
109 | local dst = math.random(src, cnt)
110 | local x = tbl[dst]
111 | tbl[dst] = tbl[src]
112 | tbl[src] = x
113 | end
114 | return tbl
115 | end
116 |
117 | --
118 | -- MESSAGE SENDING
119 | --
120 |
121 | local function send_message(uri, msg_type, msg_data)
122 | checks('string', 'string', 'table')
123 | local addr = resolve(uri)
124 | if not addr then
125 | return false
126 | end
127 |
128 | local events_to_send = {}
129 | local msg_raw = {advertise_uri, msg_type, msg_data, events_to_send}
130 | local msg_size = #msgpack.encode(msg_raw)
131 |
132 | -- Always tell the recipient what current instance thinks about it.
133 | -- It's necessary to refute rumors faster.
134 | local member = members.get(uri)
135 | if member then
136 | local extra_event = events.get(uri) or {
137 | uri = uri,
138 | status = member.status,
139 | incarnation = member.incarnation,
140 | ttl = 1,
141 | }
142 | table.insert(events_to_send, events.pack(extra_event))
143 | msg_size = msg_size + events.estimate_msgpacked_size(extra_event)
144 | events_to_send[uri] = true
145 | end
146 |
147 | -- And always tell about myself to speed up payload dissemination.
148 | if not events_to_send[advertise_uri] then
149 | local myself = members.get(advertise_uri)
150 | local extra_event = events.get(advertise_uri) or {
151 | uri = advertise_uri,
152 | status = myself.status,
153 | incarnation = myself.incarnation,
154 | payload = myself.payload,
155 | ttl = 1,
156 | }
157 | table.insert(events_to_send, events.pack(extra_event))
158 | msg_size = msg_size + events.estimate_msgpacked_size(extra_event)
159 | events_to_send[advertise_uri] = true
160 | end
161 |
162 | for uri, event in events.pairs() do
163 | if not events_to_send[uri] then
164 | local evt_size = events.estimate_msgpacked_size(event)
165 | if #events_to_send+1 == 16 then
166 | evt_size = evt_size + 2
167 | end
168 | local enc_size = opts.encrypted_size(msg_size + evt_size)
169 | if enc_size > opts.MAX_PACKET_SIZE then
170 | break
171 | else
172 | table.insert(events_to_send, events.pack(event))
173 | events_to_send[uri] = true
174 | msg_size = msg_size + evt_size
175 | end
176 | end
177 | end
178 |
179 | local random_members = random_permutation(members.filter_excluding(nil))
180 | for _, member_uri in ipairs(random_members) do
181 | if not events_to_send[member_uri] then
182 | local member = members.get(member_uri)
183 | local event = {
184 | uri = member_uri,
185 | status = member.status,
186 | incarnation = member.incarnation,
187 | payload = member.payload,
188 | ttl = 1,
189 | }
190 |
191 | local evt_size = events.estimate_msgpacked_size(event)
192 | if #events_to_send+1 == 16 then
193 | evt_size = evt_size + 2
194 | end
195 | local enc_size = opts.encrypted_size(msg_size + evt_size)
196 | if enc_size > opts.MAX_PACKET_SIZE then
197 | break
198 | else
199 | table.insert(events_to_send, events.pack(event))
200 | events_to_send[member_uri] = true
201 | msg_size = msg_size + evt_size
202 | end
203 | end
204 | end
205 |
206 | for k, _ in pairs(events_to_send) do
207 | if type(k) == 'string' then
208 | events_to_send[k] = nil
209 | end
210 | end
211 |
212 | events.gc()
213 |
214 | local msg_msgpacked = msgpack.encode(msg_raw)
215 | local msg_encrypted = opts.encrypt(msg_msgpacked)
216 | local ret = _sock:sendto(addr.host, addr.port, msg_encrypted)
217 | return ret and ret > 0
218 | end
219 |
220 | local function send_anti_entropy(uri, msg_type, remote_tbl)
221 | -- send to `uri` all local members that are not in `remote_tbl`
222 | -- well, not all actualy, but all that fits into UDP packet
223 | checks('string', 'string', 'table')
224 | local addr = resolve(uri)
225 | if not addr then
226 | return false
227 | end
228 |
229 | local members_to_send = {}
230 | local msg_raw = {advertise_uri, msg_type, members_to_send, {}}
231 | local msg_size = #msgpack.encode(msg_raw)
232 |
233 | local random_members = random_permutation(members.filter_excluding(nil))
234 | for _, member_uri in ipairs(random_members) do
235 | local member = members.get(member_uri)
236 |
237 | if events.should_overwrite(member, remote_tbl[member_uri]) then
238 | local member_size = members.estimate_msgpacked_size(member_uri, member)
239 | if #members_to_send+1 == 16 then
240 | -- msgpack:
241 | -- `fixarray` stores an array whose length is upto 15 elements
242 | -- `array 16` stores an array whose length is upto (2^16)-1 elements
243 | -- it's 2 bytes larger
244 | member_size = member_size + 2
245 | end
246 | local enc_size = opts.encrypted_size(msg_size + member_size)
247 | if enc_size > opts.MAX_PACKET_SIZE then
248 | break
249 | else
250 | table.insert(members_to_send, members.pack(member_uri, member))
251 | msg_size = msg_size + member_size
252 | end
253 | end
254 | end
255 |
256 | local msg_msgpacked = msgpack.encode(msg_raw)
257 | local msg_encrypted = opts.encrypt(msg_msgpacked)
258 | local ret = _sock:sendto(addr.host, addr.port, msg_encrypted)
259 | return ret and ret > 0
260 | end
261 |
262 | --
263 | -- MESSAGE RECEIVING
264 | --
265 |
266 | local function handle_message(msg)
267 | local ok, decrypted = pcall(opts.decrypt, msg)
268 | if not ok then
269 | return false
270 | end
271 |
272 | local ok, decoded = pcall(msgpack.decode, decrypted)
273 | if not ok
274 | or type(decoded) ~= 'table'
275 | or #decoded ~= 4 then
276 | -- sometimes misencrypted messages
277 | -- are successfully decodes
278 | -- as a valid msgpack with useless data
279 | return false
280 | end
281 |
282 | local sender_uri = decoded[1]
283 | local msg_type = decoded[2]
284 | local msg_data = decoded[3]
285 | local new_events = decoded[4]
286 |
287 | for _, event in ipairs(new_events or {}) do
288 | local event = events.unpack(event)
289 |
290 | if event.uri == advertise_uri then
291 | -- this is a rumor about ourselves
292 | local myself = members.get(advertise_uri)
293 |
294 | if event.status ~= opts.ALIVE and event.incarnation >= myself.incarnation then
295 | -- someone thinks that we are dead
296 | log.info('Refuting the rumor that we are %s', opts.STATUS_NAMES[event.status])
297 | event.incarnation = event.incarnation + 1
298 | event.status = opts.ALIVE
299 | event.payload = myself.payload
300 | event.ttl = members.count()
301 | elseif event.incarnation > myself.incarnation then
302 | -- this branch can be called after quick restart
303 | -- when the member who PINGs us does not know we were dead
304 | -- so we increment incarnation and start spreading
305 | -- the rumor with our current payload
306 |
307 | event.ttl = members.count()
308 | event.incarnation = event.incarnation + 1
309 | event.payload = myself.payload
310 | end
311 | end
312 |
313 | events.handle(event)
314 | end
315 |
316 | -- luacheck:ignore 542
317 | if msg_type == 'PING' then
318 | if msg_data.dst == advertise_uri then
319 | -- set ack timestamp
320 | msg_data.ats = fiber.time64()
321 | send_message(sender_uri, 'ACK', msg_data)
322 | elseif sender_uri == advertise_uri then
323 | -- seems to be a local loop
324 | -- drop it
325 | elseif msg_data.dst ~= nil then
326 | -- forward
327 | send_message(msg_data.dst, 'PING', msg_data)
328 | else
329 | log.error('Message PING without destination uri')
330 | end
331 | elseif msg_type == 'ACK' then
332 | if msg_data.src == advertise_uri then
333 | -- set receive timestamp
334 | msg_data.rts = fiber.time64()
335 | table.insert(_ack_cache, msg_data)
336 | _ack_trigger:broadcast()
337 | elseif msg_data.src ~= nil then
338 | -- forward
339 | send_message(msg_data.src, 'ACK', msg_data)
340 | else
341 | log.error('Message ACK without source uri')
342 | end
343 | elseif msg_type == 'SYNC_REQ' or msg_type == 'SYNC_ACK' then
344 | local remote_tbl = {}
345 | for _, member in ipairs(msg_data) do
346 | local member_uri, member = members.unpack(member)
347 | remote_tbl[member_uri] = member
348 |
349 | if events.should_overwrite(member, members.get(member_uri)) then
350 | events.generate(member_uri, member.status, member.incarnation, member.payload)
351 | end
352 | end
353 |
354 | if msg_type == 'SYNC_REQ' then
355 | send_anti_entropy(sender_uri, 'SYNC_ACK', remote_tbl)
356 | else
357 | _sync_trigger:broadcast()
358 | end
359 | elseif msg_type == 'LEAVE' then
360 | -- just handle the event
361 | -- do nothing more
362 | else
363 | error('Unknown message ' .. tostring(msg_type))
364 | end
365 |
366 | return true
367 | end
368 |
369 | local function _handle_message_step()
370 | if _sock == nil then
371 | return
372 | end
373 | local ok = _sock:readable(opts.PROTOCOL_PERIOD_SECONDS)
374 | if not ok then
375 | return
376 | end
377 |
378 | if _sock == nil then
379 | return false
380 | end
381 | local msg, from = _sock:recvfrom(opts.MAX_PACKET_SIZE)
382 | local ok = handle_message(msg)
383 |
384 | if not ok and type(from) == 'table' then
385 | local uri = nslookup(from.host, from.port)
386 | local member = nil
387 | if uri ~= nil then
388 | member = members.get(uri)
389 | end
390 | if member and member.status == opts.DEAD then
391 | log.info('Broken UDP packet from %s - %s',
392 | uri, opts.STATUS_NAMES[opts.NONDECRYPTABLE]
393 | )
394 | events.generate(uri, opts.NONDECRYPTABLE)
395 | end
396 | end
397 | end
398 |
399 | local function handle_message_step()
400 | local ok, res = xpcall(_handle_message_step, debug.traceback)
401 | fiber.testcancel()
402 |
403 | if not ok then
404 | log.error(res)
405 | end
406 | end
407 |
408 | --
409 | -- PROTOCOL LOOP
410 | --
411 |
412 | local function wait_ack(uri, ts, timeout)
413 | local now
414 | local deadline = ts + timeout
415 | repeat
416 | fiber.testcancel()
417 | now = fiber.time64()
418 |
419 | for _, ack in ipairs(_ack_cache) do
420 | if ack.dst == uri and ack.ts == ts then
421 | return ack
422 | end
423 | end
424 | until (now >= deadline) or not _ack_trigger:wait(tonumber(deadline - now) / 1.0e6)
425 |
426 | return nil
427 | end
428 |
429 | local function _get_clock_delta(ack_data)
430 | checks('table')
431 | local ack_ts = tonumber(ack_data.ats)
432 | local recv_ts = tonumber(ack_data.rts)
433 | local start_ts = tonumber(ack_data.ts)
434 |
435 | if ack_ts == nil or recv_ts == nil or start_ts == nil then
436 | return nil
437 | end
438 |
439 | return ack_ts - (recv_ts + start_ts) / 2
440 | end
441 |
442 | local _protocol_round_list = {}
443 | local _protocol_round_iter = 1
444 | local function _protocol_step()
445 | local loop_now = fiber.time64()
446 |
447 | -- expire suspected members
448 | local expiry = loop_now - opts.SUSPECT_TIMEOUT_SECONDS * 1.0e6
449 | for uri, member in members.pairs() do
450 | if member.status == opts.SUSPECT and member.timestamp < expiry then
451 | log.info('Node timed out: %s - %s', uri, opts.STATUS_NAMES[opts.DEAD])
452 | events.generate(uri, opts.DEAD)
453 | end
454 | end
455 |
456 | -- cleanup ack cache
457 | table.clear(_ack_cache)
458 |
459 | -- prepare to send ping
460 | _protocol_round_iter = _protocol_round_iter + 1
461 |
462 | if _protocol_round_list[_protocol_round_iter] == nil then
463 | _protocol_round_iter = 1
464 | _protocol_round_list = members.filter_excluding('left')
465 | random_permutation(_protocol_round_list)
466 | end
467 |
468 | local uri = _protocol_round_list[_protocol_round_iter]
469 | if uri == nil then
470 | return
471 | end
472 |
473 | local msg_data = {
474 | ts = loop_now,
475 | src = advertise_uri,
476 | dst = uri,
477 | }
478 |
479 | -- try direct ping
480 | if send_message(uri, 'PING', msg_data) then
481 | local ack_data = wait_ack(uri, loop_now, opts.ACK_TIMEOUT_SECONDS * 1.0e6)
482 | if ack_data ~= nil then
483 | local member = members.get(uri)
484 | if member == nil then
485 | return
486 | end
487 | -- calculate time difference between local time and member time
488 | local delta = _get_clock_delta(ack_data)
489 | members.set(uri, member.status, member.incarnation, { clock_delta = delta }) -- update timstamp
490 | return
491 | end
492 | end
493 | local member = members.get(uri)
494 | if member ~= nil and member.status >= opts.DEAD then
495 | -- still dead, do nothing
496 | return
497 | end
498 |
499 | local sent_indirect = 0
500 | local through_uri_list = random_permutation(
501 | members.filter_excluding('unhealthy', advertise_uri, uri)
502 | )
503 | for _, through_uri in ipairs(through_uri_list) do
504 | if sent_indirect >= opts.NUM_FAILURE_DETECTION_SUBGROUPS then
505 | break
506 | end
507 |
508 | if send_message(through_uri, 'PING', msg_data) then
509 | sent_indirect = sent_indirect + 1
510 | end
511 | end
512 |
513 | local ack_data
514 | if sent_indirect > 0 then
515 | ack_data = wait_ack(uri, loop_now, opts.PROTOCOL_PERIOD_SECONDS * 1.0e6)
516 | end
517 |
518 | -- check again in case if members list has been cleared
519 | local member = members.get(uri)
520 | if member == nil then
521 | return
522 | end
523 | if sent_indirect > 0 and ack_data ~= nil then
524 | -- calculate time difference between local time and member time
525 | local delta = _get_clock_delta(ack_data)
526 | members.set(uri, member.status, member.incarnation, { clock_delta = delta })
527 | return
528 | elseif member.status == opts.ALIVE then
529 | local myself = members.get(advertise_uri)
530 | if myself.status ~= opts.ALIVE then
531 | opts.log_debug('Could not reach node: %s (%s myself)', uri, myself.status)
532 | elseif opts.SUSPICIOUSNESS == false then
533 | opts.log_debug('Could not reach node: %s (ignored)', uri)
534 | else
535 | log.info('Could not reach node: %s - %s', uri,
536 | opts.STATUS_NAMES[opts.SUSPECT]
537 | )
538 | events.generate(uri, opts.SUSPECT)
539 | end
540 | return
541 | end
542 | end
543 |
544 | local function protocol_step()
545 | local t1 = fiber.clock()
546 | local ok, res = xpcall(_protocol_step, debug.traceback)
547 | fiber.testcancel()
548 |
549 | if not ok then
550 | log.error(res)
551 | end
552 |
553 | local t2 = fiber.clock()
554 | fiber.sleep(t1 + opts.PROTOCOL_PERIOD_SECONDS - t2)
555 | end
556 |
557 | --
558 | -- ANTI ENTROPY SYNC
559 | --
560 |
561 | local function _anti_entropy_step()
562 | local alive_members = members.filter_excluding('unhealthy', opts.advertise_uri)
563 | local alive_cnt = #alive_members
564 | if alive_cnt == 0 then
565 | return false
566 | end
567 |
568 | local uri = alive_members[math.random(alive_cnt)]
569 | send_anti_entropy(uri, 'SYNC_REQ', {})
570 | return _sync_trigger:wait(opts.PROTOCOL_PERIOD_SECONDS)
571 | end
572 |
573 | local function anti_entropy_step()
574 | local ok, res = xpcall(_anti_entropy_step, debug.traceback)
575 | fiber.testcancel()
576 |
577 | if not ok then
578 | log.error(res)
579 | fiber.sleep(opts.PROTOCOL_PERIOD_SECONDS)
580 | elseif not res then
581 | fiber.sleep(opts.PROTOCOL_PERIOD_SECONDS)
582 | else
583 | fiber.sleep(opts.ANTI_ENTROPY_PERIOD_SECONDS)
584 | end
585 | end
586 |
587 | --
588 | -- PUBLIC API
589 | --
590 |
591 | --- Initialize the membership module.
592 | -- Bind a UDP socket to `0.0.0.0:`,
593 | -- set the `advertise_uri` parameter to `:`,
594 | -- and `incarnation` to `1`.
595 | --
596 | -- The `init()` function can be called several times,
597 | -- the old socket will be closed and a new one opened.
598 | --
599 | -- If the `advertise_uri` changes during the next `init()`,
600 | -- the old URI is considered `DEAD`.
601 | -- In order to leave the group gracefully use the @{leave} function.
602 | --
603 | -- @function init
604 | -- @tparam string advertise_host
605 | -- either hostname or IP address being advertised to other members
606 | -- @tparam number port
607 | -- UDP port to bind and advertise
608 | -- @treturn boolean `true`
609 | -- @raise Socket bind error
610 | local function init(advertise_host, port)
611 | checks('string', 'number')
612 |
613 | local parts = uri_tools.parse(advertise_host)
614 | if _sock == nil or _sock:name().port ~= port then
615 | local family = parts.ipv6 and 'AF_INET6' or 'AF_INET'
616 | local addr = parts.ipv6 and '::' or '0.0.0.0'
617 | local sock = socket(family, 'SOCK_DGRAM', 'udp')
618 | local ok = sock:bind(addr, port)
619 | if not ok then
620 | local err = string.format(
621 | 'Socket bind error (%s/udp): %s',
622 | port, sock:error()
623 | )
624 | log.error(err)
625 | error(err, 2)
626 | end
627 | sock:nonblock(true)
628 | sock:setsockopt('SOL_SOCKET', 'SO_BROADCAST', 1)
629 |
630 | if _sock then
631 | _sock:close()
632 | end
633 |
634 | _sock = sock
635 | end
636 |
637 | advertise_uri = uri_tools.format({
638 | host = advertise_host,
639 | service = tostring(port)
640 | })
641 | events.generate(advertise_uri, opts.ALIVE, 1, {})
642 |
643 | stash.fiber_cancel('protocol_step')
644 | stash.fiber_cancel('anti_entropy_step')
645 | stash.fiber_cancel('handle_message_step')
646 | stash.fiber_new('protocol_step'):name('membership.main')
647 | stash.fiber_new('anti_entropy_step'):name('membership.entropy')
648 | stash.fiber_new('handle_message_step'):name('membership.handle')
649 | stash.set('advertise_uri', advertise_uri)
650 | stash.set('_sock', _sock)
651 |
652 | return true
653 | end
654 |
655 |
656 | --- Discover members in local network.
657 | -- Send UDP broadcast on the specified `port`
658 | -- to all networks discovered by `getifaddrs()` C call
659 | -- @function broadcast
660 | -- @tparam number port UDP port of the broadcast
661 | -- @return[1] `true` if broadcast was sent
662 | -- @return[2] `false` if `getifaddrs()` fails.
663 | local function broadcast(port)
664 | checks('number')
665 |
666 | local msg_data = {
667 | ts = fiber.time64(),
668 | src = advertise_uri,
669 | dst = advertise_uri,
670 | }
671 |
672 | local ok, netlist = pcall(network.getifaddrs)
673 | if not ok then
674 | log.warn('Membership BROADCAST impossible: %s', netlist)
675 | return false
676 | end
677 |
678 | local bcast_sent = false
679 |
680 | for _, addr in pairs(netlist) do
681 | local uri = addr.bcast or addr.inet4
682 | if uri then
683 | local uri = string.format('%s:%s', uri, port)
684 | send_message(uri, 'PING', msg_data)
685 | log.info('Membership BROADCAST sent to %s', uri)
686 | bcast_sent = true
687 | end
688 | end
689 |
690 | if not bcast_sent then
691 | log.warn('Membership BROADCAST not sent: No suitable ifaddrs found')
692 | return false
693 | end
694 | return true
695 | end
696 |
697 | --- Gracefully leave the membership group.
698 | -- The node will be marked with the status `left`
699 | -- and no other members will ever try to reconnect it.
700 | -- @function leave
701 | -- @treturn boolean
702 | -- `true` if call succeeds,
703 | -- `false` if member has already left.
704 | local function leave()
705 | if _sock == nil then
706 | return false
707 | end
708 |
709 | -- First, we need to stop all fibers
710 | stash.fiber_cancel('protocol_step')
711 | stash.fiber_cancel('anti_entropy_step')
712 | stash.fiber_cancel('handle_message_step')
713 |
714 | -- Perform artificial events.generate() and instantly send it
715 | local myself = members.get(advertise_uri)
716 | local event = events.pack({
717 | uri = advertise_uri,
718 | status = opts.LEFT,
719 | incarnation = myself.incarnation,
720 | ttl = members.count(),
721 | })
722 | local msg_msgpacked = msgpack.encode({advertise_uri, 'LEAVE', msgpack.NULL, {event}})
723 | local msg_encrypted = opts.encrypt(msg_msgpacked)
724 | for _, uri in ipairs(members.filter_excluding('unhealthy', advertise_uri)) do
725 | local addr = resolve(uri)
726 | if addr then
727 | _sock:sendto(addr.host, addr.port, msg_encrypted)
728 | end
729 | end
730 |
731 | _sock:close()
732 | _sock = nil
733 | stash.set('_sock', nil)
734 |
735 | advertise_uri = nil
736 | stash.set('advertise_uri', nil)
737 |
738 | members.clear()
739 | events.clear()
740 | table.clear(_protocol_round_list)
741 | return true
742 | end
743 |
744 | --- Forcefully send leave message about an instance.
745 | -- @function mark_left
746 | -- @treturn boolean
747 | -- `true` if call succeeds,
748 | -- `false` if member has already left.
749 | local function mark_left(uri_to_leave)
750 | if _sock == nil then
751 | return false
752 | end
753 |
754 | -- Perform artificial events.generate() and instantly send it
755 | local myself = members.get(uri_to_leave)
756 | if not myself or myself.status == opts.LEFT then
757 | return false
758 | end
759 | local event = events.pack({
760 | uri = uri_to_leave,
761 | status = opts.LEFT,
762 | incarnation = myself.incarnation,
763 | ttl = members.count(),
764 | })
765 | local msg_msgpacked = msgpack.encode({uri_to_leave, 'LEAVE', msgpack.NULL, {event}})
766 | local msg_encrypted = opts.encrypt(msg_msgpacked)
767 | for _, uri in ipairs(members.filter_excluding('unhealthy', uri_to_leave)) do
768 | local addr = resolve(uri)
769 | if addr then
770 | _sock:sendto(addr.host, addr.port, msg_encrypted)
771 | end
772 | end
773 |
774 | return true
775 | end
776 |
777 | --- Member data structure.
778 | -- A member is represented by the table with the following fields:
779 | --
780 | -- @table MemberInfo
781 | -- @tfield string uri `` of a member
782 | --
783 | -- @tfield string status a string that takes one of the values below
784 | --
785 | -- * `alive`: a member that replies to ping-messages is alive and well.
786 | -- * `suspect`: if any member in the group cannot get a reply from any other member, the first member asks
787 | -- three other alive members to send a ping-message to the member in question. If there is no response,
788 | -- the latter becomes a suspect.
789 | -- * `dead`: a `suspect` becomes `dead` after a timeout.
790 | -- * `left`: a member gets the `left` status after executing the @{leave} function.
791 | --
792 | -- @tfield number incarnation a value incremented every time
793 | -- the instance status changes, or its payload is updated
794 | --
795 | -- @tfield table payload an auxiliary data that can be used by various modules
796 | --
797 | -- @tfield number timestamp a value of fiber.time64()
798 | -- which corresponds to the last update of status or incarnation;
799 | -- it is always local and does not depend on other members’ clock setting.
800 | --
801 | -- @tfield number clock_delta difference of clocks (fiber.time64) between self and peer
802 | -- calculated during ping/ack protocol step or while probe_uri call
803 | --
804 | -- @usage tarantool> membership.myself()
805 | -- ---
806 | -- uri: "localhost:33001"
807 | -- status: "alive"
808 | -- incarnation: 1
809 | -- payload:
810 | -- uuid: "2d00c500-2570-4019-bfcc-ab25e5096b73"
811 | -- timestamp: 1522427330993752
812 | -- clock_delta: 700
813 | -- ...
814 | local function _member_pack(uri, member)
815 | checks('string', '?table')
816 | if not member then
817 | return nil
818 | end
819 |
820 | local payload = member.payload
821 | if payload == msgpack.NULL
822 | or type(payload) ~= 'table'
823 | then
824 | payload = {}
825 | end
826 |
827 | return {
828 | uri = uri,
829 | status = opts.STATUS_NAMES[member.status] or tostring(member.status),
830 | payload = payload,
831 | incarnation = member.incarnation,
832 | timestamp = member.timestamp,
833 | clock_delta = member.clock_delta,
834 | }
835 | end
836 |
837 | --- Obtain all members known to the current instance.
838 | --
839 | -- Editing this table has no effect.
840 | -- @function members
841 | -- @treturn table a table with URIs as keys and corresponding @{MemberInfo} as values.
842 | local function get_members()
843 | local ret = {}
844 | for uri, member in members.pairs() do
845 | ret[uri] = _member_pack(uri, member)
846 | end
847 | return ret
848 | end
849 |
850 | --- Iterate over members.
851 | -- A shorthand for `pairs(membership.members())`.
852 | -- @function pairs
853 | -- @return Lua iterator
854 | -- @usage for uri, member in membership.pairs() do end
855 |
856 | --- Get info about member with the given URI.
857 | -- @function get_member
858 | -- @tparam string uri `` of member of interest
859 | -- @treturn MemberInfo the member data structure of the instance with the given URI.
860 | local function get_member(uri)
861 | local member = members.get(uri)
862 | return _member_pack(uri, member)
863 | end
864 |
865 | --- Get info about the current instance.
866 | -- @function myself
867 | -- @treturn MemberInfo the member data structure of the current instance.
868 | local function get_myself()
869 | return _member_pack(
870 | advertise_uri,
871 | members.get(advertise_uri)
872 | )
873 | end
874 |
875 | --- Add a member to the group.
876 | -- Also propagate this event to other members.
877 | -- Adding a member to a single instance is enough
878 | -- as everybody else in the group will receive the update with time.
879 | -- It does not matter who adds whom.
880 | --
881 | -- **Warning:** The gossip protocol guarantees
882 | -- that every member in the group becomes aware
883 | -- of any status change in two communication cycles.
884 | --
885 | -- @function add_member
886 | -- @tparam string uri `` of member to add
887 | -- @treturn true|nil
888 | -- @treturn ?string Possible errors:
889 | --
890 | -- * `"parse error"` - if the URI can not be parsed
891 | local function add_member(uri)
892 | checks('string')
893 | local parts = uri_tools.parse(uri)
894 | if not parts then
895 | return nil, 'parse error'
896 | end
897 |
898 | if parts.ipv6 then
899 | parts.host = '[' .. parts.host .. ']'
900 | end
901 |
902 | local uri = uri_tools.format({host = parts.host, service = parts.service})
903 | local member = members.get(uri)
904 | local incarnation = nil
905 | if member and member.status == opts.LEFT then
906 | incarnation = member.incarnation + 1
907 | end
908 |
909 | events.generate(uri, opts.ALIVE, incarnation)
910 |
911 | return true
912 | end
913 |
914 | --- Send a ping to a member.
915 | -- Send a ping-message to a member to make sure it is in the group.
916 | --
917 | -- If the member responds but not in the group, it is added.
918 | --
919 | -- If it already is in the group, nothing happens.
920 | --
921 | -- **Warning:** When destination IP can be resolved in several diffent
922 | -- ways (by different hostnames) it is possible that `probe_uri()` function returns
923 | -- `"no response"` error, but the member is added to the group with another URI,
924 | -- corresponding to its ``.
925 | --
926 | -- @function probe_uri
927 | -- @tparam string uri `` of member to ping
928 | -- @treturn true|nil
929 | -- @treturn ?string Possible errors:
930 | --
931 | -- * `"parse error"` - if the URI can not be parsed
932 | -- * `"ping was not sent"` - if hostname could not be reloved
933 | -- * `"no reponce"` - if member does not responf within 0.2 seconds
934 | local function probe_uri(uri)
935 | checks('string')
936 | local parts = uri_tools.parse(uri)
937 | if not parts then
938 | return nil, 'parse error'
939 | end
940 |
941 | if parts.ipv6 then
942 | parts.host = '[' .. parts.host .. ']'
943 | end
944 |
945 | local uri = uri_tools.format({host = parts.host, service = parts.service})
946 |
947 | local loop_now = fiber.time64()
948 | local msg_data = {
949 | ts = loop_now,
950 | src = advertise_uri,
951 | dst = uri,
952 | }
953 |
954 | local ok = send_message(uri, 'PING', msg_data)
955 | if not ok then
956 | return nil, 'ping was not sent'
957 | end
958 |
959 | local ack_data = wait_ack(uri, loop_now, opts.ACK_TIMEOUT_SECONDS * 1.0e6)
960 | if ack_data == nil then
961 | return nil, 'no response'
962 | end
963 |
964 | local member = members.get(uri)
965 | if member ~= nil then
966 | local delta = _get_clock_delta(ack_data)
967 | members.set(uri, member.status, member.incarnation, { clock_delta = delta }) -- update timstamp
968 | end
969 |
970 | return true
971 | end
972 |
973 | --- Update payload and disseminate it along with the member status.
974 | -- Also increments `incarnation`.
975 | -- @function set_payload
976 | -- @tparam string key a key to set in payload table
977 | -- @param value auxiliary data
978 | local function set_payload(key, value)
979 | checks('string', '?')
980 | local myself = members.get(advertise_uri)
981 | local payload = myself.payload
982 | if type(payload) ~= 'table' then
983 | payload = {}
984 | end
985 | if payload[key] == value then
986 | return true
987 | end
988 |
989 | payload[key] = value
990 | events.generate(
991 | advertise_uri,
992 | myself.status,
993 | myself.incarnation + 1,
994 | payload
995 | )
996 | return true
997 | end
998 |
999 | --- Remove a member. Don't use it unless you having a trouble with stale members.
1000 | -- @function remove_member
1001 | -- @tparam uri string
1002 | local function remove_member(uri)
1003 | checks('string')
1004 | local member = members.get(uri)
1005 | if member == nil then
1006 | return
1007 | end
1008 |
1009 | members.remove(uri)
1010 | end
1011 |
1012 | --- Filter out members from the list.
1013 | --- If the function wasn't called or allowed uri list
1014 | --- if empty, all members are allowed.
1015 | -- @function set_allowed_members
1016 | -- @tparam uris table URIs to leave in the list
1017 | local function set_allowed_members(uris)
1018 | checks('table')
1019 | events.clear()
1020 | table.clear(_protocol_round_list)
1021 | table.clear(_allowed_uri_set)
1022 | if next(uris) == nil then
1023 | return
1024 | end
1025 | for _, uri in ipairs(uris) do
1026 | _allowed_uri_set[uri] = true
1027 | end
1028 | for uri in pairs(stash.get('members._all_members') or {}) do
1029 | if not _allowed_uri_set[uri] then
1030 | members.remove(uri)
1031 | end
1032 | end
1033 | end
1034 |
1035 | do -- finish module loading
1036 | opts.after_reload()
1037 | events.after_reload()
1038 | members.after_reload()
1039 | after_reload()
1040 | stash.set('protocol_step', protocol_step)
1041 | stash.set('anti_entropy_step', anti_entropy_step)
1042 | stash.set('handle_message_step', handle_message_step)
1043 | end
1044 |
1045 | return {
1046 | init = init,
1047 | leave = leave,
1048 | mark_left = mark_left,
1049 | members = get_members,
1050 | broadcast = broadcast,
1051 | pairs = function() return pairs(get_members()) end,
1052 | myself = get_myself,
1053 | probe_uri = probe_uri,
1054 | add_member = add_member,
1055 | get_member = get_member,
1056 | remove_member = remove_member,
1057 | set_payload = set_payload,
1058 | set_allowed_members = set_allowed_members,
1059 |
1060 | --- Encryption Functions.
1061 | -- The encryption is handled by the
1062 | -- [`crypto.cipher.aes256.cbc`](https://tarantool.io/en/doc/latest/reference/reference_lua/crypto/)
1063 | -- Tarantool module.
1064 | --
1065 | -- For proper communication, all members must be configured
1066 | -- to use the same encryption key. Otherwise, members report
1067 | -- either `dead` or `non-decryptable` in their status.
1068 | -- @section encryption
1069 |
1070 | --- Retrieve the encryption key that is currently in use.
1071 | -- @function get_encryption_key
1072 | -- @treturn string encryption key
1073 | get_encryption_key = assert(opts.get_encryption_key),
1074 |
1075 | --- Set the key used for low-level message encryption.
1076 | -- The key is either trimmed or padded automatically to be exactly 32 bytes.
1077 | -- If the `key` value is `nil`, the encryption is disabled.
1078 | --
1079 | -- @function set_encryption_key
1080 | -- @tparam string key encryption key
1081 | -- @treturn nil
1082 | set_encryption_key = assert(opts.set_encryption_key),
1083 |
1084 | --- Subscription Functions.
1085 | -- A subscription is implemented with Tarantool built-in
1086 | -- [`fiber.cond`](https://tarantool.io/en/doc/latest/reference/reference_lua/fiber/#fiber-cond)
1087 | -- objects.
1088 | -- @section subsrcription
1089 |
1090 | --- Subscribe for updates in the members table.
1091 | -- @function subscribe
1092 | -- @return `fiber.cond` object which is
1093 | -- broadcasted whenever the members table changes
1094 | subscribe = assert(events.subscribe),
1095 |
1096 | --- Unsubscribe from membership updates.
1097 | -- Remove subscription on `cond` object.
1098 | --
1099 | -- If parameter passed is already unsubscribed o invaled nothing happens.
1100 | -- @function unsubscribe
1101 | -- @param cond `fiber.cond` object obtained from `subscribe` function
1102 | -- @treturn nil
1103 | unsubscribe = assert(events.unsubscribe),
1104 | }
1105 |
--------------------------------------------------------------------------------
/membership/events.lua:
--------------------------------------------------------------------------------
1 | local fiber = require('fiber')
2 | local checks = require('checks')
3 | local msgpack = require('msgpack')
4 |
5 | local opts = require('membership.options')
6 | local stash = require('membership.stash')
7 | local members = require('membership.members')
8 |
9 | local events = {}
10 | local _all_events = table.copy(stash.get('events._all_events')) or {
11 | -- [uri] = {
12 | -- uri = string,
13 | -- status = number,
14 | -- incarnation = number,
15 | -- ttl = number,
16 | -- }
17 |
18 | -- uri is a string in format ':'
19 | }
20 | local _expired = table.copy(stash.get('events._expired')) or {
21 | -- [uri] = true
22 | }
23 | local _subscribers = table.copy(stash.get('events._subscribers')) or {
24 | -- [fiber.cond] = true
25 | }
26 | setmetatable(_subscribers, {__mode = 'k'})
27 |
28 | function events.after_reload()
29 | stash.set('events._expired', _expired)
30 | stash.set('events._all_events', _all_events)
31 | stash.set('events._subscribers', _subscribers)
32 | end
33 |
34 | function events.clear()
35 | table.clear(_all_events)
36 | table.clear(_expired)
37 | end
38 |
39 | function events.get(uri)
40 | checks('string')
41 | return _all_events[uri]
42 | end
43 |
44 | function events.all()
45 | return _all_events
46 | end
47 |
48 | function events.pairs()
49 | return pairs(_all_events)
50 | end
51 |
52 | function events.estimate_msgpacked_size(event)
53 | local sum = 0
54 | sum = sum + #msgpack.encode(event.uri)
55 | sum = sum + #msgpack.encode(event.status)
56 | sum = sum + #msgpack.encode(event.incarnation)
57 | sum = sum + #msgpack.encode(event.payload or msgpack.NULL)
58 | sum = sum + #msgpack.encode(event.ttl)
59 | return sum + 1
60 | end
61 |
62 | function events.pack(event)
63 | checks('table')
64 | event.ttl = event.ttl - 1
65 | if event.ttl <= 0 then
66 | _expired[event.uri] = true
67 | end
68 |
69 | return {
70 | event.uri,
71 | event.status,
72 | event.incarnation,
73 | event.payload or msgpack.NULL,
74 | event.ttl,
75 | }
76 | end
77 |
78 | function events.gc()
79 | for uri, _ in pairs(_expired) do
80 | _all_events[uri] = nil
81 | _expired[uri] = nil
82 | end
83 | end
84 |
85 | function events.unpack(event)
86 | checks('table')
87 | local payload = event[4]
88 | if payload == msgpack.NULL
89 | or type(payload) ~= 'table'
90 | then
91 | payload = nil
92 | end
93 | return {
94 | uri = tostring(event[1]),
95 | status = tonumber(event[2]) or opts.DEAD,
96 | incarnation = tonumber(event[3]) or 1,
97 | payload = payload,
98 | ttl = tonumber(event[5]) or 0,
99 | }
100 | end
101 |
102 | function events.should_overwrite(first, second)
103 | checks('table', '?table')
104 | if not second or first.incarnation > second.incarnation then
105 | return true
106 | elseif first.incarnation == second.incarnation then
107 | if first.status > second.status then
108 | return true
109 | end
110 | end
111 | return false
112 | end
113 |
114 | function events.generate(uri, status, incarnation, payload)
115 | checks('string', 'number', '?number', '?table')
116 | events.handle({
117 | uri = uri,
118 | status = status or opts.ALIVE,
119 | incarnation = incarnation
120 | or (members.get(uri) or {}).incarnation
121 | or 1,
122 | payload = payload,
123 | ttl = math.floor(math.log(members.count(), 2)) + 2,
124 | })
125 | end
126 |
127 | function events.handle(event)
128 | -- drop outdated events
129 | local member = members.get(event.uri)
130 |
131 | if events.should_overwrite(event, member) then
132 | _all_events[event.uri] = event
133 | else
134 | return
135 | end
136 |
137 | -- update members list
138 | if not member then
139 | opts.log_debug(
140 | 'Adding: %s (inc. %d) is %s',
141 | event.uri, event.incarnation,
142 | opts.STATUS_NAMES[event.status]
143 | )
144 | elseif member.status ~= event.status or member.incarnation ~= event.incarnation then
145 | opts.log_debug(
146 | 'Rumor: %s (inc. %d) is %s',
147 | event.uri, event.incarnation,
148 | opts.STATUS_NAMES[event.status]
149 | )
150 | end
151 | members.set(event.uri, event.status, event.incarnation, { payload = event.payload })
152 |
153 | for cond, _ in pairs(_subscribers) do
154 | cond:broadcast()
155 | end
156 | end
157 |
158 | function events.subscribe()
159 | local cond = fiber.cond()
160 | _subscribers[cond] = true
161 | return cond
162 | end
163 |
164 | function events.unsubscribe(cond)
165 | _subscribers[cond] = nil
166 | return nil
167 | end
168 |
169 | return events
170 |
--------------------------------------------------------------------------------
/membership/members.lua:
--------------------------------------------------------------------------------
1 | local fiber = require('fiber')
2 | local checks = require('checks')
3 | local msgpack = require('msgpack')
4 |
5 | local opts = require('membership.options')
6 | local stash = require('membership.stash')
7 |
8 | local members = {}
9 | local _all_members = table.copy(stash.get('members._all_members')) or {
10 | -- [uri] = {
11 | -- status = number,
12 | -- incarnation = number,
13 | -- timestamp = time64,
14 | -- payload = ?table,
15 | -- clock_delta = ?number
16 | -- }
17 |
18 | -- uri is a string in format ':'
19 | }
20 |
21 | local _allowed_uri_set = stash.get('_allowed_uri_set')
22 |
23 | function members.after_reload()
24 | stash.set('members._all_members', _all_members)
25 | end
26 |
27 | function members.clear()
28 | table.clear(_all_members)
29 | end
30 |
31 | function members.pairs()
32 | return pairs(_all_members)
33 | end
34 |
35 | function members.get(uri)
36 | return _all_members[uri]
37 | end
38 |
39 | function members.estimate_msgpacked_size(uri, member)
40 | local sum = 0
41 | sum = sum + #msgpack.encode(uri)
42 | sum = sum + #msgpack.encode(member.status)
43 | sum = sum + #msgpack.encode(member.incarnation)
44 | sum = sum + #msgpack.encode(member.payload or msgpack.NULL)
45 | return sum + 1
46 | end
47 |
48 | function members.pack(uri, member)
49 | checks('string', 'table')
50 | return {
51 | uri,
52 | member.status,
53 | member.incarnation,
54 | member.payload or msgpack.NULL,
55 | }
56 | end
57 |
58 | function members.unpack(member)
59 | checks('table')
60 | local payload = member[4]
61 | if payload == msgpack.NULL
62 | or type(payload) ~= 'table'
63 | then
64 | payload = nil
65 | end
66 | return member[1], {
67 | status = tonumber(member[2]),
68 | incarnation = tonumber(member[3]),
69 | payload = payload,
70 | }
71 | end
72 |
73 | function members.filter_excluding(state, uri1, uri2)
74 | assert(state == nil or state == 'left' or state == 'unhealthy')
75 | local ret = {}
76 | for uri, member in pairs(_all_members) do
77 | if (uri ~= uri1) and (uri ~= uri2)
78 | and (
79 | (state == nil)
80 | or (state == 'unhealthy' and member.status == opts.ALIVE)
81 | or (state == 'left' and member.status ~= opts.LEFT)
82 | ) then
83 | table.insert(ret, uri)
84 | end
85 | end
86 | return ret
87 | end
88 |
89 | function members.set(uri, status, incarnation, params)
90 | checks('string', 'number', 'number', { payload = '?table', clock_delta = '?number' })
91 |
92 | local member = _all_members[uri]
93 |
94 | if next(_allowed_uri_set) and not _allowed_uri_set[uri]
95 | and (status == opts.SUSPECT or status == opts.LEFT or status == opts.DEAD) then
96 | opts.log_debug('Ignoring member %s with status %s', uri, opts.STATUS_NAMES[status])
97 | -- removes instance if it is not in the allowed list and dead
98 | members.remove(uri)
99 | return
100 | end
101 |
102 | if member and incarnation < member.incarnation then
103 | error('Can not downgrade incarnation')
104 | end
105 |
106 | local payload
107 | if params ~= nil and params.payload ~= nil then
108 | payload = params.payload
109 | elseif member ~= nil then
110 | payload = member.payload
111 | end
112 |
113 | local clock_delta
114 | if params ~= nil and params.clock_delta ~= nil then
115 | clock_delta = params.clock_delta
116 | elseif member ~= nil then
117 | clock_delta = member.clock_delta
118 | end
119 |
120 | _all_members[uri] = {
121 | status = status,
122 | incarnation = incarnation,
123 | payload = payload,
124 | timestamp = fiber.time64(),
125 | clock_delta = clock_delta
126 | }
127 | end
128 |
129 | function members.count()
130 | local count = 0
131 | for _ in pairs(_all_members) do
132 | count = count + 1
133 | end
134 | return count
135 | end
136 |
137 | function members.remove(uri)
138 | checks('string')
139 |
140 | _all_members[uri] = nil
141 | local stash = rawget(_G, '__membership_stash')
142 | stash['members._all_members'][uri] = nil
143 | stash['_resolve_cache'][uri] = nil
144 | end
145 |
146 | return members
147 |
--------------------------------------------------------------------------------
/membership/network.lua:
--------------------------------------------------------------------------------
1 | local ffi = require('ffi')
2 | local bit = require('bit')
3 |
4 | local stash = require('membership.stash')
5 | if not stash.get('network.cdef_getifaddrs') then
6 | ffi.cdef([[
7 | struct ifaddrs {
8 | struct ifaddrs *ifa_next; /* Next item in list */
9 | char *ifa_name; /* Name of interface */
10 | unsigned int ifa_flags; /* Flags from SIOCGIFFLAGS */
11 | struct sockaddr *ifa_addr; /* Address of interface */
12 | struct sockaddr *ifa_netmask; /* Netmask of interface */
13 | union {
14 | struct sockaddr *ifu_broadaddr; /* Broadcast address of interface */
15 | struct sockaddr *ifu_dstaddr; /* Point-to-point destination address */
16 | } ifa_ifu;
17 | void *ifa_data; /* Address-specific data */
18 | };
19 |
20 | struct in_addr {
21 | uint32_t s_addr;
22 | };
23 |
24 | enum {
25 | IFF_UP = 0x1, /* interface is up */
26 | IFF_BROADCAST = 0x2, /* broadcast address valid */
27 | IFF_POINTOPOINT = 0x10 /* interface is has p-p link */
28 | };
29 |
30 | enum {
31 | AF_INET = 2 /* Internet IP Protocol */
32 | };
33 |
34 | const char *strerror(int errno);
35 | int getifaddrs(struct ifaddrs **ifap);
36 | void freeifaddrs(struct ifaddrs *ifa);
37 | const char *inet_ntop(int af, const void *src,
38 | char *dst, socklen_t size);
39 | ]])
40 |
41 | if ffi.os == "Linux" then
42 | ffi.cdef([[
43 | struct sockaddr {
44 | uint16_t sa_family; /* address family, AF_xxx */
45 | char sa_data[14]; /* 14 bytes of protocol address */
46 | };
47 |
48 | /* Structure describing an Internet (IP) socket address. */
49 | struct sockaddr_in {
50 | uint16_t sin_family; /* Address family */
51 | uint16_t sin_port; /* Port number */
52 | struct in_addr sin_addr; /* Internet address */
53 | };
54 | ]])
55 | elseif ffi.os == "OSX" then
56 | ffi.cdef([[
57 | struct sockaddr {
58 | uint8_t sa_len;
59 | uint8_t sa_family; /* address family, AF_xxx */
60 | char sa_data[14]; /* 14 bytes of protocol address */
61 | };
62 |
63 | /* Structure describing an Internet (IP) socket address. */
64 | struct sockaddr_in {
65 | uint8_t sin_len;
66 | uint8_t sin_family; /* Address family */
67 | uint16_t sin_port; /* Port number */
68 | struct in_addr sin_addr; /* Internet address */
69 | };
70 | ]])
71 | end
72 |
73 | stash.set('network.cdef_getifaddrs', true)
74 | end
75 |
76 |
77 | --- List active AF_INET interfaces.
78 | -- Compose a table of the following structure:
79 | -- {
80 | -- [1] = {
81 | -- name = ifa_name,
82 | -- inet4 = "0.0.0.0",
83 | -- bcast = "0.0.0.0", -- if broadcast flag is set
84 | -- },
85 | -- }
86 | local function getifaddrs()
87 | local ifaddrs_root = ffi.new("struct ifaddrs *[1]")
88 | local res = ffi.C.getifaddrs(ifaddrs_root)
89 | if res ~= 0 then
90 | local errno = ffi.errno()
91 | local strerr = ffi.C.strerror(errno)
92 | error(ffi.string(strerr))
93 | end
94 |
95 | local ret = {}
96 | local buf = ffi.new("char[32]")
97 | local iap = ifaddrs_root[0]
98 | while iap ~= nil do
99 | if bit.band(iap.ifa_flags, ffi.C.IFF_UP) ~= 0 then
100 | local ifa = {}
101 | ifa.name = ffi.string(iap.ifa_name)
102 |
103 | if iap.ifa_addr ~= nil and iap.ifa_addr.sa_family == ffi.C.AF_INET then
104 | local sa = ffi.cast("struct sockaddr_in *", iap.ifa_addr)
105 | ffi.C.inet_ntop(sa.sin_family, sa.sin_addr, buf, ffi.sizeof(buf))
106 | ifa.inet4 = ffi.string(buf)
107 |
108 | if bit.band(iap.ifa_flags, ffi.C.IFF_BROADCAST) ~= 0 then
109 | local sa = ffi.cast("struct sockaddr_in *", iap.ifa_ifu.ifu_broadaddr)
110 | ffi.C.inet_ntop(sa.sin_family, sa.sin_addr, buf, ffi.sizeof(buf))
111 | ifa.bcast = ffi.string(buf)
112 | end
113 |
114 | table.insert(ret, ifa)
115 | end
116 | end
117 | iap = iap.ifa_next
118 | end
119 |
120 | ffi.C.freeifaddrs(ifaddrs_root[0])
121 | return ret
122 | end
123 |
124 | return {
125 | getifaddrs = getifaddrs,
126 | }
127 |
--------------------------------------------------------------------------------
/membership/options.lua:
--------------------------------------------------------------------------------
1 | --- Tuning options for membership module.
2 | -- This module should normally never be used
3 | --
4 | -- @submodule membership
5 |
6 | local log = require('log')
7 | local cbc = require('crypto').cipher.aes256.cbc
8 |
9 | local stash = require('membership.stash')
10 |
11 | local options = stash.get('options')
12 | if options == nil then
13 | options = {}
14 | else
15 | options = setmetatable(table.copy(options), nil)
16 | end
17 |
18 | function options.after_reload()
19 | stash.set('options', options)
20 | end
21 |
22 | options.STATUS_NAMES = {'alive', 'suspect', 'dead', 'non-decryptable', 'left'}
23 | options.ALIVE = 1
24 | options.SUSPECT = 2
25 | options.DEAD = 3
26 | options.NONDECRYPTABLE = 4
27 | options.LEFT = 5
28 |
29 | --- Period of sending direct PINGs.
30 | -- Denoted as `T'` in [SWIM paper](swim-paper.pdf).
31 | --
32 | -- Default is 1
33 | options.PROTOCOL_PERIOD_SECONDS = 1.0
34 |
35 | --- Time to wait for ACK message after PING.
36 | -- If a member does not reply within this time,
37 | -- the indirect ping algorithm is invoked.
38 | --
39 | -- Default is 0.2
40 | options.ACK_TIMEOUT_SECONDS = 0.200
41 |
42 | --- Period to perform anti-entropy sync.
43 | -- Algorithm is described in [SWIM paper](swim-paper.pdf).
44 | --
45 | -- Default is 10
46 | options.ANTI_ENTROPY_PERIOD_SECONDS = 10.0
47 |
48 | --- Toggle producing `suspect` rumors when ping fails. Even if disabled,
49 | -- it doesn't affect neither gossip dissemination nor other statuses
50 | -- generation (e.g. `dead` and `non-decryptable`).
51 | --
52 | -- Default is `true`
53 | options.SUSPICIOUSNESS = true
54 |
55 | --- Timeout to mark `suspect` members as `dead`.
56 | --
57 | -- Default is 3
58 | options.SUSPECT_TIMEOUT_SECONDS = 3
59 |
60 | --- Number of members to try indirectly pinging a `suspect`.
61 | -- Denoted as `k` in [SWIM paper](swim-paper.pdf).
62 | --
63 | -- Default is 3
64 | options.NUM_FAILURE_DETECTION_SUBGROUPS = 3
65 |
66 | --- Maximum size of UPD packets to send.
67 | --
68 | -- Default is 1472 (`Default-MTU (1500) - IP-Header (20) - UDP-Header (8)`)
69 | options.MAX_PACKET_SIZE = 1472
70 |
71 | --- Initialization vector for aes256 CBC encryption.
72 | options.ENCRYPTION_INIT = 'init-key-16-byte'
73 |
74 |
75 | options.log_debug = log.debug
76 |
77 | function options.get_encryption_key()
78 | return options.encryption_key
79 | end
80 |
81 | function options.set_encryption_key(key)
82 | if key == nil then
83 | rawset(options, 'encryption_key', nil)
84 | log.info('Membership encryption disabled')
85 | else
86 | if key:len() < 32 then
87 | rawset(options, 'encryption_key', key:rjust(32))
88 | else
89 | rawset(options, 'encryption_key', key:sub(1, 32))
90 | end
91 | log.info('Membership encryption enabled')
92 | end
93 | end
94 |
95 | function options.encrypted_size(len)
96 | if not options.encryption_key then
97 | return len
98 | else
99 | return math.ceil((len+1)/16)*16
100 | end
101 | end
102 |
103 | function options.encrypt(msg)
104 | if not options.encryption_key then
105 | return msg, nil
106 | else
107 | return cbc.encrypt(
108 | msg,
109 | options.encryption_key,
110 | options.ENCRYPTION_INIT
111 | )
112 | end
113 | end
114 |
115 | function options.decrypt(msg)
116 | if not options.encryption_key then
117 | return msg, nil
118 | else
119 | return cbc.decrypt(
120 | msg,
121 | options.encryption_key,
122 | options.ENCRYPTION_INIT
123 | )
124 | end
125 | end
126 |
127 | setmetatable(options, {
128 | __newindex = function(_, idx, val)
129 | print(idx, val)
130 | error("options table is readonly")
131 | end
132 | })
133 |
134 | return options
135 |
--------------------------------------------------------------------------------
/membership/stash.lua:
--------------------------------------------------------------------------------
1 | local S = rawget(_G, '__membership_stash') or {}
2 |
3 | S['_allowed_uri_set'] = S['_allowed_uri_set'] or {}
4 |
5 | local log = require('log')
6 |
7 | local function f_body(fn_name, ...)
8 | local fiber = require('fiber')
9 | while true do
10 | S[fn_name](...)
11 | fiber.testcancel()
12 | end
13 | end
14 |
15 | assert(
16 | debug.getinfo(f_body, 'u').nups == 1,
17 | 'Exceess closure upvalue'
18 | )
19 |
20 | local function fiber_new(fn_name, ...)
21 | if not S[fn_name] then
22 | error(('function %s not implemented'):format(fn_name), 2)
23 | end
24 |
25 | local k = 'fiber.' .. fn_name
26 | S[k] = require('fiber').new(f_body, fn_name, ...)
27 | return S[k]
28 | end
29 |
30 | local function fiber_cancel(fn_name)
31 | local k = 'fiber.' .. fn_name
32 | if S[k] ~= nil and S[k]:status() ~= 'dead' then
33 | local ok, err = pcall(S[k].cancel, S[k])
34 | if not ok then
35 | log.error('Fiber %s cancel error: %s', fn_name, err)
36 | end
37 | S[k] = nil
38 | end
39 | end
40 |
41 | rawset(_G, '__membership_stash', S)
42 |
43 | return {
44 | get = function(k) return S[k] end,
45 | set = function(k, v) S[k] = v end,
46 | fiber_new = fiber_new,
47 | fiber_cancel = fiber_cancel,
48 | }
49 |
--------------------------------------------------------------------------------
/test/helpers/cluster.lua:
--------------------------------------------------------------------------------
1 | local fio = require('fio')
2 | local log = require('log')
3 | local socket = require('socket')
4 | local Server = require('test.helpers.server')
5 | local cluster = {}
6 |
7 | function cluster.start(hostname, ports)
8 | local datadir = fio.pathjoin(fio.cwd(), 'test_cluster_data')
9 | if fio.path.exists(datadir) then
10 | fio.rmtree(datadir)
11 | end
12 | fio.mkdir(datadir)
13 |
14 | if cluster.servers ~= nil then
15 | log.warn("Cluster is already running")
16 | return
17 | end
18 |
19 | if type(ports) ~= 'table' or #ports == 0 then
20 | error("Ports for cluster servers are not specified")
21 | end
22 |
23 | for _, port in ipairs(ports) do
24 | local sock = socket.tcp()
25 | local is_busy = sock:connect(hostname, port)
26 | sock:close()
27 | if is_busy then
28 | error("Port " .. port .. " is already in use!")
29 | end
30 | end
31 |
32 | log.info("Starting a cluster with ports: " .. table.concat(ports, ", "))
33 |
34 | cluster.servers = {}
35 |
36 | local instance_path = fio.pathjoin(fio.cwd(), "test", "helpers", 'instance.lua')
37 |
38 | for i, port in ipairs(ports) do
39 | local alias = 'server-' .. i
40 | local workdir = fio.pathjoin(datadir, 'server-' .. i)
41 |
42 | fio.mkdir(workdir)
43 | fio.mkdir(fio.pathjoin(workdir, 'wal'))
44 | fio.mkdir(fio.pathjoin(workdir, 'vinyl'))
45 |
46 | local server_config = {
47 | alias = alias,
48 | command = instance_path,
49 | workdir = workdir,
50 | args = {
51 | '--wal-dir', fio.pathjoin(workdir, 'wal'),
52 | '--vinyl-dir', fio.pathjoin(workdir, 'vinyl')
53 | },
54 | advertise_port = tonumber(port),
55 | env = {
56 | TARANTOOL_LISTEN = tostring(port),
57 | TARANTOOL_HOSTNAME = hostname,
58 | },
59 |
60 | net_box_credentials = {
61 | user = 'guest',
62 | password = "",
63 | },
64 | cluster_cookie = ""
65 |
66 | }
67 |
68 | local server = Server:new(server_config)
69 | table.insert(cluster.servers, server)
70 |
71 | server:start()
72 |
73 | log.info("Server " .. alias .. " is running on port " .. port)
74 | end
75 |
76 | for _, server in ipairs(cluster.servers) do
77 | server:wait_until_ready({ timeout = 10 })
78 | end
79 |
80 | log.info("The cluster was successfully started, the number of servers: " .. #cluster.servers)
81 | return true
82 | end
83 |
84 | function cluster.stop()
85 | if cluster.servers == nil then
86 | log.warn("The cluster was not started")
87 | return
88 | end
89 |
90 | log.info("Stopping the cluster...")
91 |
92 | for _, server in ipairs(cluster.servers) do
93 | server:stop()
94 | log.info("The server " .. server.alias .. " is stopped")
95 | end
96 |
97 | cluster.servers = nil
98 |
99 | log.info("Cluster has been successfully stopped")
100 | return true
101 | end
102 |
103 | return cluster
104 |
--------------------------------------------------------------------------------
/test/helpers/instance.lua:
--------------------------------------------------------------------------------
1 | require('strict').on()
2 | local log = require('log')
3 | local fiber = require('fiber')
4 |
5 | local checks = require('checks')
6 | package.loaded['checks'] = function(...)
7 | if rawget(_G, "checks_disabled") == true then
8 | return
9 | end
10 | return checks(...)
11 | end
12 |
13 | local membership = require('membership')
14 | _G.membership = membership
15 |
16 | if rawget(_G, "is_initialized") == nil then
17 | _G.is_initialized = false
18 | end
19 |
20 | local listen = os.getenv('TARANTOOL_LISTEN') or '13301'
21 | print("Starting Tarantool instance on port:", listen)
22 | local wal_dir = arg[2] or './wal'
23 | local vinyl_dir = arg[4] or './vinyl'
24 |
25 | box.cfg({
26 | listen = listen,
27 | wal_dir = os.getenv('TARANTOOL_WAL_DIR') or wal_dir,
28 | vinyl_dir = os.getenv('TARANTOOL_VINYL_DIR') or vinyl_dir,
29 | work_dir = os.getenv('TARANTOOL_WORKDIR') or '.'
30 | })
31 |
32 | print("Starting server on port:", listen)
33 | local hostname = os.getenv('TARANTOOL_HOSTNAME') or 'localhost'
34 |
35 | box.schema.user.grant('guest', 'execute', 'universe', nil, { if_not_exists = true })
36 |
37 | -- Tune periods to speed up tests
38 | -- Supposing loopback roundtrip is about 0.1ms
39 | local opts = require('membership.options')
40 | opts.PROTOCOL_PERIOD_SECONDS = 0.2
41 | opts.ACK_TIMEOUT_SECONDS = 0.1
42 | opts.ANTI_ENTROPY_PERIOD_SECONDS = 2
43 | opts.SUSPECT_TIMEOUT_SECONDS = 2
44 |
45 | if not _G.is_initialized then
46 | -- Monkeypatch socket library to validate MAX_PACKET_SIZE
47 | local socket_lib = require('socket')
48 |
49 | local socket_mt = getmetatable(socket_lib)
50 | local create_socket = socket_mt.__call
51 | socket_mt.__call = function(...)
52 | log.error('Monkeypatching socket')
53 | local sock = create_socket(...)
54 | local sendto = sock.sendto
55 | function sock.sendto(self, host, port, msg)
56 | if #msg > opts.MAX_PACKET_SIZE then
57 | log.error('Packet too big, %d > %d', #msg, opts.MAX_PACKET_SIZE)
58 | os.exit(220)
59 | end
60 | return sendto(self, host, port, msg)
61 | end
62 |
63 | return sock
64 | end
65 | end
66 |
67 | membership.init(hostname, tonumber(listen))
68 | _G.is_initialized = true
69 |
70 | _G.package.reload = function()
71 | local csw1 = fiber.info()[fiber.id()].csw
72 |
73 | package.loaded['membership'] = nil
74 | log.info('Doing file %s...', arg[0])
75 | dofile(arg[0])
76 |
77 | local csw2 = fiber.info()[fiber.id()].csw
78 | assert(csw1 == csw2, 'Unexpected yield')
79 |
80 | log.info('Dofile succeeded')
81 | return true
82 | end
83 |
--------------------------------------------------------------------------------
/test/helpers/server.lua:
--------------------------------------------------------------------------------
1 | --- Extended luatest.Server class to run a cartridge instance.
2 | --
3 | -- @classmod cartridge.test-helpers.server
4 |
5 | local fun = require('fun')
6 | local log = require('log')
7 | local fio = require('fio')
8 | local luatest = require('luatest')
9 | local yaml = require('yaml')
10 | local checks = require('checks')
11 |
12 | --- Build server object.
13 | -- @function new
14 | -- @param object
15 | -- @string object.command Command to start server process.
16 | -- @string object.workdir Value to be passed in `TARANTOOL_WORKDIR`.
17 | -- @bool[opt] object.chdir Path to cwd before starting a process.
18 | -- @tab[opt] object.env Table to pass as env variables to process.
19 | -- @tab[opt] object.args Args to run command with.
20 | -- @int[opt] object.http_port Value to be passed in `TARANTOOL_HTTP_PORT` and used to perform HTTP requests.
21 | -- @int object.advertise_port Value to generate `TARANTOOL_ADVERTISE_URI` and used for net_box connection.
22 | -- @int[opt] object.net_box_port Alias for `advertise_port`.
23 | -- @tab[opt] object.net_box_credentials Override default net_box credentials.
24 | -- @string object.alias Instance alias.
25 | -- @string object.cluster_cookie Value to be passed in `TARANTOOL_CLUSTER_COOKIE` and used as default net_box password.
26 | -- @string[opt] object.instance_uuid Server identifier.
27 | -- @string[opt] object.replicaset_uuid Replicaset identifier.
28 | -- @string[opt] object.zone Vshard zone.
29 | -- @number[opt] object.swim_period SWIM protocol period in seconds.
30 | -- @return input object
31 | local Server = luatest.Server:inherit({})
32 |
33 | Server.constructor_checks = fun.chain(Server.constructor_checks, {
34 | alias = 'string',
35 | cluster_cookie = 'string',
36 |
37 | advertise_port = 'number',
38 | advertise_uri = '?string',
39 |
40 | instance_uuid = '?string',
41 | replicaset_uuid = '?string',
42 | labels = '?table',
43 | zone = '?string',
44 | swim_period = '?number',
45 |
46 | transport = '?string',
47 | ssl_ciphers = '?string',
48 | ssl_server_ca_file = '?string',
49 | ssl_server_cert_file = '?string',
50 | ssl_server_key_file = '?string',
51 | ssl_server_password = '?string',
52 | ssl_client_ca_file = '?string',
53 | ssl_client_cert_file = '?string',
54 | ssl_client_key_file = '?string',
55 | ssl_client_password = '?string',
56 | }):tomap()
57 |
58 | function Server:initialize()
59 | self.net_box_port = self.net_box_port or self.advertise_port
60 | self.net_box_uri = 'localhost:' .. self.net_box_port
61 | self.advertise_uri = self.advertise_uri or self.net_box_uri
62 | self.net_box_credentials = self.net_box_credentials or {
63 | user = 'admin',
64 | password = self.cluster_cookie,
65 | }
66 |
67 | if self.instance_uuid == nil then
68 | self.instance_uuid = require('uuid').str()
69 | end
70 | getmetatable(getmetatable(self)).initialize(self)
71 | end
72 |
73 | --- Generates environment to run process with.
74 | -- The result is merged into os.environ().
75 | -- @return map
76 | function Server:build_env()
77 | return {
78 | TARANTOOL_ALIAS = self.alias,
79 | TARANTOOL_WORKDIR = self.workdir,
80 | TARANTOOL_HTTP_PORT = self.http_port,
81 | TARANTOOL_ADVERTISE_URI = self.advertise_uri,
82 | TARANTOOL_CLUSTER_COOKIE = self.cluster_cookie,
83 | -- speedup tests by amplifying membership message exchange
84 | TARANTOOL_SWIM_PROTOCOL_PERIOD_SECONDS = self.swim_period or 0.2,
85 |
86 | TARANTOOL_TRANSPORT = self.transport,
87 | TARANTOOL_SSL_CIPHERS = self.ssl_ciphers,
88 | TARANTOOL_SSL_SERVER_CA_FILE = self.ssl_server_ca_file,
89 | TARANTOOL_SSL_SERVER_CERT_FILE = self.ssl_server_cert_file,
90 | TARANTOOL_SSL_SERVER_KEY_FILE = self.ssl_server_key_file,
91 | TARANTOOL_SSL_SERVER_PASSWORD = self.ssl_server_password,
92 | TARANTOOL_SSL_CLIENT_CA_FILE = self.ssl_client_ca_file,
93 | TARANTOOL_SSL_CLIENT_CERT_FILE = self.ssl_client_cert_file,
94 | TARANTOOL_SSL_CLIENT_KEY_FILE = self.ssl_client_key_file,
95 | TARANTOOL_SSL_CLIENT_PASSWORD = self.ssl_client_password,
96 | }
97 | end
98 |
99 | local function reconnect(connection_old)
100 | local server = connection_old._server
101 | log.debug(
102 | 'Netbox %s (%s): connection lost',
103 | server.alias, server.advertise_uri
104 | )
105 | local fiber = require('fiber')
106 | fiber.new(function()
107 | if type(server.net_box_uri) == 'string' then
108 | fiber.name(string.format('reconnect/%s', server.net_box_uri))
109 | elseif type(server.net_box_uri) == 'table' then
110 | fiber.name(string.format('reconnect/%s', server.net_box_uri.uri))
111 | end
112 | local uri = server.net_box_uri
113 |
114 | local connection_new = require('net.box').connect(
115 | uri, server.net_box_credentials
116 | )
117 |
118 | if server.net_box ~= connection_old then
119 | -- Someone has already assigned `self.net_box`
120 | -- while this fiber was trying to establish a new one.
121 | -- Don't interfere in this case.
122 | return
123 | end
124 |
125 | if connection_new.error then
126 | log.debug(
127 | 'Netbox %s (%s) reconnect failed: %s',
128 | server.alias, server.advertise_uri, connection_new.error
129 | )
130 | return
131 | else
132 | log.debug(
133 | 'Netbox %s (%s) reconnected',
134 | server.alias, server.advertise_uri
135 | )
136 | end
137 |
138 | connection_new:on_disconnect(reconnect)
139 | server.net_box = connection_new
140 | server.net_box._server = server
141 | end)
142 | end
143 |
144 | function Server:connect_net_box()
145 | local transport = self.transport
146 | if transport ~= nil and type(transport) == 'string' then
147 | transport = transport:lower()
148 | end
149 | if transport == 'ssl' then
150 | if type(self.net_box_uri) == 'string' then
151 | self.net_box_uri = {
152 | uri = self.net_box_uri,
153 | params = {
154 | transport = transport,
155 | ssl_ciphers = self.ssl_ciphers,
156 | ssl_cert_file = self.ssl_client_cert_file,
157 | ssl_key_file = self.ssl_client_key_file,
158 | ssl_password = self.ssl_client_password,
159 | ssl_ca_file = self.ssl_client_ca_file,
160 | }
161 | }
162 | end
163 | end
164 |
165 | getmetatable(getmetatable(self)).connect_net_box(self)
166 | self.net_box._server = self
167 | self.net_box:on_disconnect(reconnect)
168 | return self.net_box
169 | end
170 |
171 | --- Start the server.
172 | function Server:start()
173 | getmetatable(getmetatable(self)).start(self)
174 | luatest.helpers.retrying({}, function()
175 | self:connect_net_box()
176 | end)
177 | end
178 |
179 | --- Stop server process.
180 | function Server:stop()
181 | local process = self.process
182 | if process == nil then
183 | return
184 | end
185 | if self.net_box then
186 | -- Don't try to reconnect anymore
187 | self.net_box:on_disconnect(nil, reconnect)
188 | end
189 | getmetatable(getmetatable(self)).stop(self)
190 | luatest.helpers.retrying({}, function()
191 | luatest.assert_not(
192 | process:is_alive(),
193 | string.format('Process %s is still running', self.alias)
194 | )
195 | end)
196 | log.warn('Process %s killed', self.alias)
197 | end
198 |
199 | --- Perform GraphQL request.
200 | -- @tparam table request
201 | -- @tparam string request.query
202 | -- grapqhl query
203 | -- @tparam ?table request.variables
204 | -- variables for graphql query
205 | -- @tparam ?boolean request.raise
206 | -- raise if response contains an error
207 | -- (default: **true**)
208 | -- @tparam[opt] table http_options
209 | -- passed to `http_request` options.
210 | -- @treturn table parsed response JSON.
211 | -- @raise
212 | -- * HTTPRequest error
213 | -- * GraphQL error
214 | function Server:graphql(request, http_options)
215 | checks('table', {
216 | query = 'string',
217 | variables = '?table',
218 | raise = '?boolean'
219 | }, '?table')
220 |
221 | log.debug('GraphQL request to %s (%s)', self.alias, self.advertise_uri)
222 | log.debug('Query: %s', request.query)
223 | if request.variables ~= nil then
224 | log.debug('Variables:\n%s', yaml.encode(request.variables))
225 | end
226 |
227 | if request.raise == nil then
228 | request.raise = true
229 | end
230 |
231 | http_options = table.copy(http_options) or {}
232 | http_options.json = {
233 | query = request.query,
234 | variables = request.variables,
235 | }
236 |
237 | local webui_prefix = self.env and self.env.TARANTOOL_WEBUI_PREFIX or ''
238 | local api_endpoint = fio.pathjoin('/', webui_prefix, 'admin/api')
239 | local response = self:http_request('post', api_endpoint, http_options)
240 |
241 | local errors = response.json and response.json.errors
242 | if errors and request.raise then
243 | error(errors[1].message, 2)
244 | end
245 | return response.json
246 | end
247 |
248 | --- Advertise this server to the cluster.
249 | -- @param main_server Server to perform GraphQL request on.
250 | -- @param[opt] options
251 | -- @param options.timeout request timeout
252 |
253 | function Server:wait_until_ready()
254 | local timeout = 60 -- Таймаут ожидания
255 | local start_time = os.time()
256 | while os.time() - start_time < timeout do
257 | if self:is_ready() then
258 | print("Server " .. self.alias .. " is ready")
259 | return true
260 | end
261 | require('fiber').sleep(0.1)
262 | end
263 | error("Timed out waiting for server " .. self.alias .. " to become ready")
264 | end
265 |
266 | function Server:is_ready()
267 | local net_box = require('net.box')
268 | local conn = net_box.connect(self.advertise_port, { user = 'guest', password = '' })
269 | if conn:is_connected() then
270 | conn:close()
271 | return true
272 | else
273 | return false
274 | end
275 | end
276 |
277 | function Server:join_cluster(main_server, options)
278 | log.debug('Adding ' .. self.advertise_uri .. '(' .. self.alias .. '):')
279 | return main_server:graphql({
280 | query = [[
281 | mutation(
282 | $uri: String!,
283 | $instance_uuid: String,
284 | $replicaset_uuid: String,
285 | $timeout: Float
286 | $labels: [LabelInput]
287 | ) {
288 | join_server(
289 | uri: $uri,
290 | instance_uuid: $instance_uuid,
291 | replicaset_uuid: $replicaset_uuid,
292 | timeout: $timeout
293 | labels: $labels
294 | )
295 | }
296 | ]],
297 | variables = {
298 | uri = self.advertise_uri,
299 | instance_uuid = self.instance_uuid,
300 | replicaset_uuid = self.replicaset_uuid,
301 | timeout = options and options.timeout,
302 | labels = self.labels,
303 | }
304 | })
305 | end
306 |
307 | --- Update server's replicaset config.
308 | -- @param config
309 | -- @param config.uuid replicaset uuid
310 | -- @param config.roles list of roles
311 | -- @param config.master
312 | -- @param config.weight
313 | function Server:setup_replicaset(config)
314 | self:graphql({
315 | query = [[
316 | mutation(
317 | $uuid: String!,
318 | $alias: String,
319 | $roles: [String!],
320 | $master: [String!],
321 | $weight: Float,
322 | $vshard_group: String
323 | ) {
324 | edit_replicaset(
325 | uuid: $uuid,
326 | alias: $alias,
327 | roles: $roles,
328 | master: $master,
329 | weight: $weight,
330 | vshard_group: $vshard_group
331 | )
332 | }
333 | ]],
334 | variables = {
335 | uuid = config.uuid,
336 | alias = config.alias,
337 | roles = config.roles,
338 | master = config.master,
339 | weight = config.weight,
340 | vshard_group = config.vshard_group,
341 | }
342 | })
343 | end
344 |
345 | --- Upload application config.
346 | -- @tparam string|table config - table will be encoded as yaml and posted to /admin/config.
347 | -- @param table opts - http request options
348 | function Server:upload_config(config, opts)
349 | checks('table', 'string|table', 'table|nil')
350 | if type(config) == 'table' then
351 | config = yaml.encode(config)
352 | end
353 | if opts == nil then
354 | opts = {}
355 | end
356 | opts.body = config
357 | return self:http_request('put', '/admin/config', opts)
358 | end
359 |
360 | --- Download application config.
361 | function Server:download_config()
362 | return yaml.decode(self:http_request('get', '/admin/config').body)
363 | end
364 |
365 | function Server:add_member(uri)
366 | return self:exec(function(u)
367 | return membership.add_member(u)
368 | end, { uri })
369 | end
370 |
371 | function Server:probe_uri(uri)
372 | return self:exec(function(u)
373 | return membership.probe_uri(u)
374 | end, { uri })
375 | end
376 |
377 | function Server:broadcast(port)
378 | return self:exec(function(p)
379 | return membership.broadcast(p)
380 | end, { port })
381 | end
382 |
383 | function Server:members()
384 | return self:exec(function()
385 | return membership.members()
386 | end)
387 | end
388 |
389 | function Server:get_member(uri)
390 | return self:exec(function(u)
391 | return membership.get_member(u)
392 | end, { uri })
393 | end
394 |
395 | function Server:myself()
396 | return self:exec(function()
397 | return membership.myself()
398 | end)
399 | end
400 |
401 | function Server:check_status(uri, status)
402 | local exec_status = self:exec(function(u)
403 | return membership.get_member(u)
404 | end, { uri })['status']
405 | luatest.assert_equals(exec_status, status)
406 | end
407 |
408 | return Server
409 |
--------------------------------------------------------------------------------
/test/integration/allowed_members_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 | local fiber = require('fiber')
5 |
6 | local SERVER_LIST = { 13301, 13302, 13303, 13304, 13305 }
7 | --[[
8 | 13301: myself -> visible
9 | 13302: alive and allowed -> visible
10 | 13303: alive and not allowed -> visible
11 | 13304: dead and allowed -> visible
12 | 13305: dead and not allowed -> removed
13 | ]]
14 |
15 | g.before_all(function()
16 | cluster.start('localhost', SERVER_LIST)
17 | end)
18 |
19 | g.after_all(function()
20 | cluster.stop()
21 | end)
22 |
23 | g.test_smoke = function()
24 | for i = 1, 5 do
25 | t.assert(cluster.servers[1]:exec(function(port)
26 | return membership.probe_uri(string.format('localhost:%d', port))
27 | end, { SERVER_LIST[i] }))
28 | end
29 |
30 | -- Everyone is allowed
31 | cluster.servers[1]:exec(function()
32 | return membership.set_allowed_members({
33 | 'localhost:13301', 'localhost:13302', 'localhost:13304',
34 | })
35 | end)
36 |
37 | -- Wait for the new events
38 | fiber.sleep(2)
39 |
40 | -- Everyone is visible, because everyone is alive
41 | for i = 2, 5 do
42 | t.assert_equals(cluster.servers[1]:get_member(
43 | string.format('localhost:%d', SERVER_LIST[i])
44 | )['status'], 'alive')
45 | end
46 |
47 | cluster.servers[4]:stop()
48 | cluster.servers[5]:stop()
49 |
50 | t.helpers.retrying(
51 | {},
52 | cluster.servers[1].check_status,
53 | cluster.servers[1], 'localhost:13304', 'dead'
54 | )
55 |
56 | t.assert_equals(
57 | cluster.servers[1]:get_member('localhost:13302')['status'],
58 | 'alive'
59 | )
60 | t.assert_equals(
61 | cluster.servers[1]:get_member('localhost:13303')['status'],
62 | 'alive'
63 | )
64 | t.assert_equals(
65 | cluster.servers[1]:get_member('localhost:13304')['status'],
66 | 'dead'
67 | )
68 | t.assert_equals(cluster.servers[1]:get_member('localhost:13305'), nil)
69 | end
70 |
--------------------------------------------------------------------------------
/test/integration/broadcast_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 | local socket = require("socket")
5 |
6 | local function get_local_ip()
7 | local hostname = nil
8 |
9 | local udp_socket = socket('AF_INET', 'SOCK_DGRAM', 'udp')
10 | local ok, _ = pcall(function()
11 | udp_socket:sysconnect("8.8.8.8", 80)
12 | hostname = udp_socket:name().host
13 | udp_socket:close()
14 | end)
15 |
16 | if not ok then
17 | hostname = 'localhost'
18 | end
19 |
20 | return hostname
21 | end
22 |
23 | local HOSTNAME = get_local_ip()
24 | local SERVER_LIST = { 33001, 33002 }
25 |
26 | g.before_all(function()
27 | cluster.start(HOSTNAME, SERVER_LIST)
28 | end)
29 |
30 | g.after_all(function()
31 | cluster.stop()
32 | end)
33 |
34 | g.test_join = function()
35 | cluster.servers[2]:broadcast(33001)
36 |
37 | t.helpers.retrying(
38 | {},
39 | cluster.servers[2].check_status,
40 | cluster.servers[2], HOSTNAME .. ':33001', 'alive'
41 | )
42 | t.helpers.retrying(
43 | {},
44 | cluster.servers[1].check_status,
45 | cluster.servers[1], HOSTNAME .. ':33002', 'alive'
46 | )
47 |
48 | t.assert(cluster.servers[1]:probe_uri(HOSTNAME .. ':33002'))
49 | t.assert(cluster.servers[2]:probe_uri(HOSTNAME .. ':33001'))
50 | end
51 |
--------------------------------------------------------------------------------
/test/integration/dead_myself_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301 }
6 |
7 | g.before_all(function()
8 | cluster.start('not-available', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | g.test_dead = function()
16 | t.helpers.retrying(
17 | {},
18 | cluster.servers[1].check_status,
19 | cluster.servers[1], 'not-available:13301', 'dead'
20 | )
21 | end
22 |
--------------------------------------------------------------------------------
/test/integration/dissemination_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 | local fiber = require('fiber')
5 | local log = require('log')
6 |
7 | local FIRST_PORT = 13301
8 | local SERVER_COUNT = 100
9 | local SERVER_LIST = {}
10 | for i = 1, SERVER_COUNT do
11 | SERVER_LIST[i] = FIRST_PORT + i - 1
12 | end
13 |
14 | g.before_all(function()
15 | cluster.start('localhost', SERVER_LIST)
16 | end)
17 |
18 | g.after_all(function()
19 | cluster.stop()
20 | end)
21 |
22 | g.test_discover_join = function()
23 | local start = fiber.clock()
24 | for i = 1, SERVER_COUNT do
25 | t.assert(cluster.servers[1]:probe_uri(
26 | string.format('localhost:%s', FIRST_PORT + i - 1)))
27 | end
28 | local duration = fiber.clock() - start
29 | log.info(string.format("Probe all in %.3fs", duration))
30 |
31 | start = fiber.clock()
32 | t.helpers.retrying({}, function()
33 | for _, server in ipairs(cluster.servers) do
34 | local alive_count = server:exec(function()
35 | local alive_count = 0
36 | for _, m in membership.pairs() do
37 | if m.status == 'alive' then
38 | alive_count = alive_count + 1
39 | end
40 | end
41 | return alive_count
42 | end)
43 | t.assert_equals(alive_count, SERVER_COUNT)
44 | end
45 | end)
46 | duration = fiber.clock() - start
47 | log.info(string.format('Full mesh in %.3fs', duration))
48 | end
49 |
50 | g.test_discover_kill = function()
51 | cluster.servers[1]:stop()
52 |
53 | t.helpers.retrying({}, function()
54 | -- Check that all members consider URI has given STATUS
55 |
56 | local uri = string.format('localhost:%s', FIRST_PORT)
57 | for i = 2, SERVER_COUNT do
58 | local member = cluster.servers[i]:get_member(uri)
59 | t.assert_not_equals(member, nil)
60 | t.assert_not_equals(member['status'], 'alive')
61 | end
62 | end)
63 | end
64 |
--------------------------------------------------------------------------------
/test/integration/encryption_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301, 13302 }
6 |
7 | g.before_all(function()
8 | cluster.start('localhost', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | g.test_join = function()
16 | t.assert(cluster.servers[1]:add_member('localhost:13302'))
17 | t.assert_equals(cluster.servers[1]:exec(function()
18 | return membership.get_encryption_key()
19 | end), nil)
20 |
21 | t.helpers.retrying(
22 | {},
23 | cluster.servers[2].check_status,
24 | cluster.servers[2], 'localhost:13301', 'alive'
25 | )
26 | t.helpers.retrying(
27 | {},
28 | cluster.servers[1].check_status,
29 | cluster.servers[1], 'localhost:13302', 'alive'
30 | )
31 | end
32 |
33 | g.test_enable_encryption = function()
34 | cluster.servers[2]:exec(function()
35 | return membership.set_encryption_key("XXXXXX")
36 | end)
37 | t.assert_equals(
38 | cluster.servers[2]:exec(function()
39 | return membership.get_encryption_key()
40 | end),
41 | string.rjust("XXXXXX", 32)
42 | )
43 | t.helpers.retrying(
44 | {},
45 | cluster.servers[1].check_status,
46 | cluster.servers[1], 'localhost:13302', 'non-decryptable'
47 | )
48 | t.helpers.retrying(
49 | {},
50 | cluster.servers[2].check_status,
51 | cluster.servers[2], 'localhost:13301', 'non-decryptable'
52 | )
53 |
54 | cluster.servers[1]:exec(function()
55 | return membership.set_encryption_key("XXXXXX")
56 | end)
57 | t.assert_equals(
58 | cluster.servers[1]:exec(function()
59 | return membership.get_encryption_key()
60 | end),
61 | string.rjust("XXXXXX", 32)
62 | )
63 | t.helpers.retrying(
64 | {},
65 | cluster.servers[1].check_status,
66 | cluster.servers[1], 'localhost:13302', 'alive'
67 | )
68 | t.helpers.retrying(
69 | {},
70 | cluster.servers[2].check_status,
71 | cluster.servers[2], 'localhost:13301', 'alive'
72 | )
73 |
74 | cluster.servers[2]:exec(function()
75 | return membership.leave()
76 | end)
77 | cluster.servers[1]:check_status('localhost:13302', 'left')
78 |
79 | cluster.servers[2]:exec(function()
80 | assert(membership.init("localhost", 13302))
81 | assert(membership.probe_uri("localhost:13301"))
82 | end)
83 | t.helpers.retrying(
84 | {},
85 | cluster.servers[1].check_status,
86 | cluster.servers[1], 'localhost:13302', 'alive'
87 | )
88 | end
89 |
90 | g.test_change_encryption = function()
91 | cluster.servers[1]:exec(function()
92 | return membership.set_encryption_key("YY")
93 | end)
94 | t.assert_equals(
95 | cluster.servers[1]:exec(function()
96 | return membership.get_encryption_key()
97 | end),
98 | string.rjust("YY", 32)
99 | )
100 | t.helpers.retrying(
101 | {},
102 | cluster.servers[1].check_status,
103 | cluster.servers[1], 'localhost:13302', 'non-decryptable'
104 | )
105 | t.helpers.retrying(
106 | {},
107 | cluster.servers[2].check_status,
108 | cluster.servers[2], 'localhost:13301', 'non-decryptable'
109 | )
110 |
111 | cluster.servers[2]:exec(function()
112 | return membership.set_encryption_key("YY")
113 | end)
114 | t.assert_equals(
115 | cluster.servers[2]:exec(function()
116 | return membership.get_encryption_key()
117 | end),
118 | string.rjust("YY", 32)
119 | )
120 | t.helpers.retrying(
121 | {},
122 | cluster.servers[1].check_status,
123 | cluster.servers[1], 'localhost:13302', 'alive'
124 | )
125 | t.helpers.retrying(
126 | {},
127 | cluster.servers[2].check_status,
128 | cluster.servers[2], 'localhost:13301', 'alive'
129 | )
130 | end
131 |
132 | g.test_disable_encryption = function()
133 | cluster.servers[2]:exec(function()
134 | return membership.set_encryption_key(nil)
135 | end)
136 | t.assert_equals(cluster.servers[2]:exec(function()
137 | return membership.get_encryption_key()
138 | end), nil)
139 | t.helpers.retrying(
140 | {},
141 | cluster.servers[1].check_status,
142 | cluster.servers[1], 'localhost:13302', 'non-decryptable'
143 | )
144 | t.helpers.retrying(
145 | {},
146 | cluster.servers[2].check_status,
147 | cluster.servers[2], 'localhost:13301', 'non-decryptable'
148 | )
149 |
150 | cluster.servers[1]:exec(function()
151 | return membership.set_encryption_key(nil)
152 | end)
153 | t.assert_equals(cluster.servers[1]:exec(function()
154 | return membership.get_encryption_key()
155 | end), nil)
156 | t.helpers.retrying(
157 | {},
158 | cluster.servers[1].check_status,
159 | cluster.servers[1], 'localhost:13302', 'alive'
160 | )
161 | t.helpers.retrying(
162 | {},
163 | cluster.servers[2].check_status,
164 | cluster.servers[2], 'localhost:13301', 'alive'
165 | )
166 | end
167 |
168 | g.test_gh36 = function()
169 | -- There was a bug in nslookup function which prevented
170 | -- discovering non-decryptable members
171 | for i = 1, 10 do
172 | local uri = string.format("s%03d:oO", i)
173 | cluster.servers[2]:exec(function(u)
174 | membership.probe_uri(u)
175 | end, { uri })
176 | end
177 |
178 | cluster.servers[1]:exec(function()
179 | return membership.set_encryption_key("ZZ")
180 | end)
181 | t.assert_equals(
182 | cluster.servers[1]:exec(function()
183 | return membership.get_encryption_key()
184 | end),
185 | string.rjust("ZZ", 32)
186 | )
187 | t.helpers.retrying(
188 | {},
189 | cluster.servers[1].check_status,
190 | cluster.servers[1], 'localhost:13302', 'non-decryptable'
191 | )
192 | t.helpers.retrying(
193 | {},
194 | cluster.servers[2].check_status,
195 | cluster.servers[2], 'localhost:13301', 'non-decryptable'
196 | )
197 | end
198 |
--------------------------------------------------------------------------------
/test/integration/false_rumors_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 | local fiber = require('fiber')
5 |
6 | local SERVER_LIST = { 13301, 13302, 13303 }
7 |
8 | g.before_all(function()
9 | cluster.start('localhost', SERVER_LIST)
10 | end)
11 |
12 | g.after_all(function()
13 | cluster.stop()
14 | end)
15 |
16 | local function check_rumors(server, expected)
17 | t.assert_equals(server:exec(function() return _G.rumors end), expected)
18 | end
19 |
20 | g.test_setup = function()
21 | -- Monkeypatch the instance to collect all rumors
22 | t.assert(cluster.servers[1]:exec(function()
23 | rawset(_G, "rumors", setmetatable({}, { __serialize = 'map' }))
24 |
25 | local fiber = require('fiber')
26 | local members = require('membership.members')
27 | local opts = require('membership.options')
28 |
29 | local function collect_rumors()
30 | for uri, m in members.pairs() do
31 | if m.status ~= opts.ALIVE then
32 | _G.rumors[uri] = opts.STATUS_NAMES[m.status]
33 | end
34 | end
35 | end
36 |
37 | rawset(_G, "_collector_fiber", fiber.create(function()
38 | local cond = membership.subscribe()
39 | while true do
40 | cond:wait()
41 | fiber.testcancel()
42 | collect_rumors()
43 | end
44 | end))
45 |
46 | return true
47 | end))
48 |
49 | t.assert(cluster.servers[1]:probe_uri('localhost:13302'))
50 | t.assert(cluster.servers[1]:probe_uri('localhost:13303'))
51 | check_rumors(cluster.servers[1], {})
52 | end
53 |
54 | g.test_indirect_ping = function()
55 | -- Ack timeout shouldn't trigger failure detection
56 | -- because indirect pings still work
57 | cluster.servers[1]:exec(function()
58 | local opts = require('membership.options')
59 | opts.ACK_TIMEOUT_SECONDS = 0
60 | end)
61 |
62 | fiber.sleep(2)
63 | check_rumors(cluster.servers[1], {})
64 | end
65 |
66 | g.test_flickering = function()
67 | -- Cluster starts flickering if indirect pings are disabled
68 | cluster.servers[1]:exec(function()
69 | local opts = require('membership.options')
70 | opts.NUM_FAILURE_DETECTION_SUBGROUPS = 0
71 | end)
72 |
73 | t.helpers.retrying(
74 | {},
75 | check_rumors,
76 | cluster.servers[1],
77 | {
78 | ['localhost:13301'] = 'suspect',
79 | ['localhost:13302'] = 'suspect',
80 | ['localhost:13303'] = 'suspect',
81 | }
82 | )
83 | end
84 |
85 | g.test_nonsuspiciousness = function()
86 | -- With disabled suspiciousness it stops flickering again
87 | cluster.servers[1]:exec(function()
88 | local opts = require('membership.options')
89 | opts.SUSPICIOUSNESS = false
90 | end)
91 |
92 | t.helpers.retrying(
93 | {},
94 | cluster.servers[1].check_status,
95 | cluster.servers[1], 'localhost:13301', 'alive'
96 | )
97 | t.helpers.retrying(
98 | {},
99 | cluster.servers[1].check_status,
100 | cluster.servers[1], 'localhost:13302', 'alive'
101 | )
102 | t.helpers.retrying(
103 | {},
104 | cluster.servers[1].check_status,
105 | cluster.servers[1], 'localhost:13303', 'alive'
106 | )
107 | cluster.servers[1]:exec(function() table.clear(rumors) end)
108 |
109 | fiber.sleep(2)
110 | check_rumors(cluster.servers[1], {})
111 | end
112 |
--------------------------------------------------------------------------------
/test/integration/init_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301, 13302 }
6 |
7 | g.before_all(function()
8 | cluster.start('localhost', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | g.test_join = function()
16 | t.assert(cluster.servers[1]:add_member('localhost:13302'))
17 |
18 | t.helpers.retrying(
19 | {},
20 | cluster.servers[2].check_status,
21 | cluster.servers[2], 'localhost:13301', 'alive'
22 | )
23 | end
24 |
25 | g.test_death = function()
26 | cluster.servers[2]:stop()
27 | t.helpers.retrying(
28 | {},
29 | cluster.servers[1].check_status,
30 | cluster.servers[1], 'localhost:13302', 'suspect'
31 | )
32 | t.helpers.retrying(
33 | {},
34 | cluster.servers[1].check_status,
35 | cluster.servers[1], 'localhost:13302', 'dead'
36 | )
37 |
38 | cluster.servers[2]:start()
39 | t.helpers.retrying(
40 | {},
41 | cluster.servers[1].check_status,
42 | cluster.servers[1], 'localhost:13302', 'alive'
43 | )
44 | t.helpers.retrying(
45 | {},
46 | cluster.servers[2].check_status,
47 | cluster.servers[2], 'localhost:13301', 'alive'
48 | )
49 | end
50 |
51 | g.test_reinit = function()
52 | t.assert(cluster.servers[1]:add_member('localhost:13302'))
53 | t.helpers.retrying(
54 | {},
55 | cluster.servers[2].check_status,
56 | cluster.servers[2], 'localhost:13301', 'alive'
57 | )
58 |
59 | -- Change hostname
60 | t.assert(t.helpers.retrying(
61 | {},
62 | cluster.servers[1].exec,
63 | cluster.servers[1], function() return membership.init('127.0.0.1', 13301) end
64 | ))
65 | t.helpers.retrying(
66 | {},
67 | cluster.servers[2].check_status,
68 | cluster.servers[2], 'localhost:13301', 'dead'
69 | )
70 | t.helpers.retrying(
71 | {},
72 | cluster.servers[2].check_status,
73 | cluster.servers[2], '127.0.0.1:13301', 'alive'
74 | )
75 |
76 | -- Change port
77 | t.assert(t.helpers.retrying(
78 | {},
79 | cluster.servers[1].exec,
80 | cluster.servers[1], function() return membership.init('127.0.0.1', 13303) end
81 | ))
82 | t.helpers.retrying(
83 | {},
84 | cluster.servers[2].check_status,
85 | cluster.servers[2], 'localhost:13301', 'dead'
86 | )
87 | t.helpers.retrying(
88 | {},
89 | cluster.servers[2].check_status,
90 | cluster.servers[2], '127.0.0.1:13301', 'dead'
91 | )
92 | t.helpers.retrying(
93 | {},
94 | cluster.servers[2].check_status,
95 | cluster.servers[2], '127.0.0.1:13303', 'alive'
96 | )
97 |
98 | -- Revert all changes
99 | t.assert(t.helpers.retrying(
100 | {},
101 | cluster.servers[1].exec,
102 | cluster.servers[1], function() return membership.init('localhost', 13301) end
103 | ))
104 | t.helpers.retrying(
105 | {},
106 | cluster.servers[1].check_status,
107 | cluster.servers[1], 'localhost:13302', 'alive'
108 | )
109 | t.helpers.retrying(
110 | {},
111 | cluster.servers[2].check_status,
112 | cluster.servers[2], 'localhost:13301', 'alive'
113 | )
114 | end
115 |
116 | g.test_error = function()
117 | t.assert_error_msg_equals(
118 | 'Socket bind error (13302/udp): Address already in use',
119 | cluster.servers[1].exec,
120 | cluster.servers[1], function() return membership.init('localhost', 13302) end
121 | )
122 |
123 | t.assert(cluster.servers[1]:probe_uri('localhost:13301'))
124 | t.assert(cluster.servers[1]:probe_uri('localhost:13302'))
125 | t.assert(cluster.servers[2]:probe_uri('localhost:13301'))
126 | end
127 |
--------------------------------------------------------------------------------
/test/integration/member_clock_diff_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301, 13302 }
6 |
7 | g.before_all(function()
8 | cluster.start('localhost', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | local function check_clock_delta(server, uri)
16 | local member = server:members()[uri]
17 | t.assert(member['clock_delta'] ~= nil)
18 | end
19 |
20 | g.test_clock_diff = function()
21 | cluster.servers[1]:probe_uri('localhost:13302')
22 |
23 | t.helpers.retrying(
24 | {},
25 | cluster.servers[2].check_status,
26 | cluster.servers[2], 'localhost:13301', 'alive'
27 | )
28 | t.helpers.retrying(
29 | {},
30 | cluster.servers[1].check_status,
31 | cluster.servers[1], 'localhost:13302', 'alive'
32 | )
33 |
34 | t.helpers.retrying(
35 | {},
36 | check_clock_delta, cluster.servers[2], 'localhost:13301'
37 | )
38 | t.helpers.retrying(
39 | {},
40 | check_clock_delta, cluster.servers[1], 'localhost:13302'
41 | )
42 | end
43 |
--------------------------------------------------------------------------------
/test/integration/payload_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301, 13302 }
6 |
7 | g.before_all(function()
8 | cluster.start('localhost', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | local function check_payload(server, uri, payload, status)
16 | local member = server:members()[uri]
17 | t.assert_equals(member['status'], status)
18 | t.assert_equals(member['payload'], payload)
19 | end
20 |
21 | g.test_payload = function()
22 | t.assert(cluster.servers[1]:exec(function()
23 | return membership.set_payload("foo1", { bar = "buzz" })
24 | end))
25 | t.assert(cluster.servers[1]:add_member('localhost:13302'))
26 | t.helpers.retrying(
27 | {},
28 | check_payload,
29 | cluster.servers[2], 'localhost:13301',
30 | {
31 | ['foo1'] = {
32 | ['bar'] = 'buzz'
33 | }
34 | },
35 | 'alive'
36 | )
37 |
38 | t.assert(cluster.servers[1]:exec(function()
39 | return membership.set_payload("foo2", 42)
40 | end))
41 | t.helpers.retrying(
42 | {},
43 | check_payload,
44 | cluster.servers[2], 'localhost:13301',
45 | {
46 | ['foo1'] = {
47 | ['bar'] = 'buzz'
48 | },
49 | ['foo2'] = 42
50 | },
51 | 'alive'
52 | )
53 |
54 | t.assert(cluster.servers[1]:exec(function()
55 | return membership.set_payload("foo1", nil)
56 | end))
57 | t.helpers.retrying(
58 | {},
59 | check_payload,
60 | cluster.servers[2], 'localhost:13301',
61 | {
62 | ['foo2'] = 42
63 | },
64 | 'alive'
65 | )
66 |
67 | t.assert(cluster.servers[1]:exec(function()
68 | rawset(_G, "checks_disabled", true)
69 | local opts = require('membership.options')
70 | require('membership.events').generate('13301', opts.DEAD, 31, 37)
71 | rawset(_G, "checks_disabled", false)
72 |
73 | return true
74 | end))
75 | t.helpers.retrying(
76 | {},
77 | check_payload,
78 | cluster.servers[2], '13301',
79 | {},
80 | 'dead'
81 | )
82 | end
83 |
--------------------------------------------------------------------------------
/test/integration/probe_uri_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301 }
6 |
7 | g.before_all(function()
8 | cluster.start('localhost', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | g.test_probe_uri = function()
16 | t.assert(cluster.servers[1]:exec(function()
17 | rawset(_G, "warnings", {})
18 | require('log').warn = function(...)
19 | table.insert(warnings, string.format(...))
20 | end
21 | return true
22 | end))
23 |
24 | t.assert(cluster.servers[1]:probe_uri('localhost:13301'))
25 | t.assert_equals({ cluster.servers[1]:probe_uri('localhost:13302') }, { nil, 'no response' })
26 | t.assert_equals({ cluster.servers[1]:probe_uri('127.0.0.1:13301') }, { nil, 'no response' })
27 | t.assert_equals({ cluster.servers[1]:probe_uri(':::') }, { nil, 'parse error' })
28 |
29 | t.assert_equals({ cluster.servers[1]:probe_uri('unix/:/dev/null') }, { nil, 'ping was not sent' })
30 | t.assert_equals({ cluster.servers[1]:probe_uri('unknown-host:9') }, { nil, 'ping was not sent' })
31 | t.assert_equals({ cluster.servers[1]:probe_uri('-:/') }, { nil, 'ping was not sent' })
32 |
33 | -- https://github.com/tarantool/tarantool/commit/92fe50fa999d6153e8c4d5d43fb0c419ce05350e
34 | -- Tarantool didn't return error message up to 2.5
35 | local version = cluster.servers[1]:exec(function() return _TARANTOOL end)
36 |
37 | local version_parts = string.split(version, '.')
38 | local major = tonumber(version_parts[1])
39 | local minor = tonumber(version_parts[2])
40 |
41 | local is_linux = false
42 | local handle = io.popen("uname -s 2>/dev/null", "r")
43 | if handle then
44 | local os_name = handle:read("*a"):gsub("%s+", "")
45 | handle:close()
46 | is_linux = (os_name == 'Linux')
47 | end
48 |
49 | t.skip_if(
50 | major == 2 and minor == 11,
51 | 'Temporarily skipped due to warning capture issues for Tarantool 2.11'
52 | )
53 |
54 | local expected_warnings
55 | if (major < 2) or (major == 2 and minor < 5) then
56 | expected_warnings = {
57 | 'getaddrinfo: Unknown error (unix/:/dev/null)',
58 | 'getaddrinfo: Unknown error (unknown-host:9)',
59 | 'getaddrinfo: Unknown error (-)'
60 | }
61 | elseif major == 2 and minor == 10 then
62 | expected_warnings = {
63 | 'getaddrinfo: Servname not supported for ai_socktype: Input/output error (unix/:/dev/null)',
64 | 'getaddrinfo: Temporary failure in name resolution: Input/output error (unknown-host:9)',
65 | 'getaddrinfo: Name or service not known: Input/output error (-)'
66 | }
67 | elseif is_linux then
68 | expected_warnings = {
69 | 'getaddrinfo: Servname not supported for ai_socktype (unix/:/dev/null)',
70 | 'getaddrinfo: Temporary failure in name resolution (unknown-host:9)',
71 | 'getaddrinfo: Name or service not known (-)'
72 | }
73 | else
74 | expected_warnings = {
75 | 'getaddrinfo: nodename nor servname provided, or not known (unix/:/dev/null)',
76 | 'getaddrinfo: nodename nor servname provided, or not known (unknown-host:9)',
77 | 'getaddrinfo: nodename nor servname provided, or not known (-)'
78 | }
79 | end
80 |
81 | t.assert_equals(
82 | cluster.servers[1]:exec(function() return warnings end),
83 | expected_warnings
84 | )
85 | end
86 |
--------------------------------------------------------------------------------
/test/integration/quit_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301, 13302 }
6 |
7 | g.before_all(function()
8 | cluster.start('localhost', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | g.test_join = function()
16 | t.assert(cluster.servers[1]:add_member('localhost:13302'))
17 |
18 | t.helpers.retrying(
19 | {},
20 | cluster.servers[2].check_status,
21 | cluster.servers[2], 'localhost:13301', 'alive'
22 | )
23 | end
24 |
25 | g.test_quit = function()
26 | t.assert(cluster.servers[2]:exec(function() return membership.leave() end))
27 |
28 | t.helpers.retrying(
29 | {},
30 | cluster.servers[1].check_status,
31 | cluster.servers[1], 'localhost:13302', 'left'
32 | )
33 |
34 | t.assert(not cluster.servers[2]:exec(function() return membership.leave() end))
35 | end
36 |
37 | g.test_rejoin = function()
38 | t.assert(cluster.servers[2]:exec(function()
39 | return membership.init("localhost", 13302)
40 | end))
41 | t.assert(cluster.servers[1]:add_member('localhost:13302'))
42 |
43 | t.helpers.retrying(
44 | {},
45 | cluster.servers[1].check_status,
46 | cluster.servers[1], 'localhost:13302', 'alive'
47 | )
48 | end
49 |
50 | g.test_mark_left = function()
51 | t.helpers.retrying(
52 | {},
53 | cluster.servers[1].check_status,
54 | cluster.servers[1], 'localhost:13302', 'alive'
55 | )
56 |
57 | t.assert(cluster.servers[1]:exec(function()
58 | return membership.mark_left("localhost:13302")
59 | end))
60 |
61 | t.helpers.retrying(
62 | {},
63 | cluster.servers[1].check_status,
64 | cluster.servers[1], 'localhost:13302', 'left'
65 | )
66 |
67 | -- already has left
68 | t.assert(not cluster.servers[1]:exec(function()
69 | return membership.mark_left("localhost:13302")
70 | end))
71 |
72 | -- there are no such member
73 | t.assert(not cluster.servers[1]:exec(function()
74 | return membership.mark_left("localhost:10000")
75 | end))
76 | end
77 |
--------------------------------------------------------------------------------
/test/integration/reload_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301, 13302 }
6 |
7 | g.before_all(function()
8 | cluster.start('localhost', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | g.test_reload_slow = function()
16 | -- Check that hot-reload doesn't affect statuses
17 |
18 | t.assert(cluster.servers[1]:probe_uri('localhost:13302'))
19 |
20 | local member = cluster.servers[2]:get_member('localhost:13301')
21 | t.assert_equals(member['status'], 'alive')
22 |
23 | cluster.servers[2]:exec(function()
24 | local log = require('log')
25 | local yaml = require('yaml')
26 | local fiber = require('fiber')
27 |
28 | rawset(_G, "guard", fiber.new(function()
29 | membership.subscribe():wait()
30 | fiber.testcancel()
31 | log.error('Unexpected event:')
32 | log.error(yaml.encode(membership.members()))
33 | os.exit(1)
34 | end))
35 | end)
36 |
37 | t.assert(cluster.servers[1]:exec(function()
38 | local log = require('log')
39 | local fiber = require('fiber')
40 |
41 | package.loaded['membership'] = nil
42 | log.info('Membership unloaded')
43 | fiber.sleep(1)
44 |
45 | _G.membership = require('membership')
46 | log.info('Membership reloaded')
47 | fiber.sleep(1)
48 |
49 | log.info('Doing file %s...', arg[0])
50 | dofile(arg[0])
51 | log.info('Dofile succeeded')
52 | fiber.sleep(1)
53 |
54 | return membership.probe_uri('localhost:13302')
55 | end))
56 |
57 | cluster.servers[2]:exec(function() _G.guard:cancel() end)
58 | end
59 |
60 | g.test_reload_fast = function()
61 | -- Check that hot-reload doesn't affect other features
62 |
63 | t.assert(cluster.servers[1]:probe_uri('localhost:13302'))
64 |
65 | local member = cluster.servers[2]:get_member('localhost:13301')
66 | t.assert_equals(member['status'], 'alive')
67 |
68 | t.assert(cluster.servers[1]:exec(function() return package.reload() end))
69 |
70 | t.assert(cluster.servers[2]:exec(function()
71 | return membership.set_payload("k", "v1")
72 | end))
73 | t.assert(cluster.servers[2]:probe_uri('localhost:13301'))
74 | local payload1 = cluster.servers[1]:members()['localhost:13302']['payload']
75 | t.assert_equals(payload1, { ['k'] = 'v1' })
76 |
77 | cluster.servers[1]:exec(function() rawset(_G, "cond", membership.subscribe()) end)
78 |
79 | t.assert(cluster.servers[1]:exec(function() return package.reload() end))
80 | t.assert(cluster.servers[2]:exec(function()
81 | return membership.set_payload("k", "v2")
82 | end))
83 | t.assert(cluster.servers[1]:exec(function() return _G.cond:wait(10) end))
84 | local payload2 = cluster.servers[1]:members()['localhost:13302']['payload']
85 | t.assert_equals(payload2, { ['k'] = 'v2'} )
86 |
87 | cluster.servers[2]:exec(function()
88 | return membership.set_encryption_key("YY")
89 | end)
90 | t.assert(cluster.servers[2]:exec(function() return package.reload() end))
91 | t.helpers.retrying(
92 | {},
93 | cluster.servers[1].check_status,
94 | cluster.servers[1], 'localhost:13302', 'non-decryptable'
95 | )
96 | t.helpers.retrying(
97 | {},
98 | cluster.servers[2].check_status,
99 | cluster.servers[2], 'localhost:13301', 'non-decryptable'
100 | )
101 |
102 | cluster.servers[1]:exec(function()
103 | return membership.set_encryption_key("YY")
104 | end)
105 | t.helpers.retrying(
106 | {},
107 | cluster.servers[1].check_status,
108 | cluster.servers[1], 'localhost:13302', 'alive'
109 | )
110 | t.helpers.retrying(
111 | {},
112 | cluster.servers[2].check_status,
113 | cluster.servers[2], 'localhost:13301', 'alive'
114 | )
115 | end
116 |
--------------------------------------------------------------------------------
/test/integration/subscribe_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 |
5 | local SERVER_LIST = { 13301, 13302 }
6 |
7 | g.before_all(function()
8 | cluster.start('localhost', SERVER_LIST)
9 | end)
10 |
11 | g.after_all(function()
12 | cluster.stop()
13 | end)
14 |
15 | g.test_subscribe = function()
16 | t.assert(cluster.servers[1]:add_member('localhost:13302'))
17 |
18 | cluster.servers[1]:exec(function()
19 | rawset(_G, "cond", membership.subscribe())
20 | end)
21 |
22 | t.assert(not cluster.servers[1]:exec(function()
23 | return _G.cond:wait(1)
24 | end))
25 | t.assert(cluster.servers[2]:exec(function()
26 | return membership.set_payload("foo", "bar")
27 | end))
28 | t.assert(cluster.servers[1]:exec(function()
29 | return _G.cond:wait(1)
30 | end))
31 | end
32 |
--------------------------------------------------------------------------------
/test/integration/sync_test.lua:
--------------------------------------------------------------------------------
1 | local t = require('luatest')
2 | local g = t.group()
3 | local cluster = require('test.helpers.cluster')
4 | local fiber = require('fiber')
5 |
6 | local SERVER_LIST = { 13301, 13302 }
7 |
8 | g.before_all(function()
9 | cluster.start('localhost', SERVER_LIST)
10 | end)
11 |
12 | g.after_all(function()
13 | cluster.stop()
14 | end)
15 |
16 | g.test_sync = function()
17 | t.assert(cluster.servers[1]:add_member('localhost:33088'))
18 |
19 | t.helpers.retrying(
20 | {},
21 | cluster.servers[1].check_status, cluster.servers[1],
22 | 'localhost:33088', 'dead'
23 | )
24 |
25 | -- Wait for dead events to expire
26 | fiber.sleep(2)
27 |
28 | -- Make sure dead members are synced
29 | t.assert(cluster.servers[2]:add_member('localhost:13301'))
30 |
31 | t.helpers.retrying(
32 | {},
33 | cluster.servers[2].check_status,
34 | cluster.servers[2], 'localhost:13301', 'alive'
35 | )
36 |
37 | t.helpers.retrying(
38 | {},
39 | cluster.servers[2].check_status,
40 | cluster.servers[2], 'localhost:33088', 'dead'
41 | )
42 | end
43 |
--------------------------------------------------------------------------------