├── .github └── workflows │ ├── publish.yaml │ └── test.yml ├── .gitignore ├── .luacheckrc ├── CHANGELOG.md ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── FindLdoc.cmake └── FindTarantool.cmake ├── config.ld ├── doc └── swim-paper.pdf ├── membership-scm-1.rockspec ├── membership.lua ├── membership ├── events.lua ├── members.lua ├── network.lua ├── options.lua └── stash.lua └── test ├── helpers ├── cluster.lua ├── instance.lua └── server.lua └── integration ├── allowed_members_test.lua ├── broadcast_test.lua ├── dead_myself_test.lua ├── dissemination_test.lua ├── encryption_test.lua ├── false_rumors_test.lua ├── init_test.lua ├── member_clock_diff_test.lua ├── payload_test.lua ├── probe_uri_test.lua ├── quit_test.lua ├── reload_test.lua ├── subscribe_test.lua └── sync_test.lua /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | tags: ['*'] 7 | 8 | jobs: 9 | publish-scm-1: 10 | if: github.ref == 'refs/heads/master' 11 | runs-on: ubuntu-22.04 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: tarantool/rocks.tarantool.org/github-action@master 15 | with: 16 | auth: ${{ secrets.ROCKS_AUTH }} 17 | files: membership-scm-1.rockspec 18 | 19 | publish-tag: 20 | if: startsWith(github.ref, 'refs/tags/') 21 | runs-on: ubuntu-22.04 22 | env: 23 | CMAKE_LDOC_FIND_REQUIRED: 'YES' 24 | steps: 25 | - uses: actions/checkout@v4 26 | - uses: tarantool/setup-tarantool@v2 27 | with: 28 | tarantool-version: '2.11' 29 | 30 | # Setup ldoc 31 | - run: tarantoolctl rocks install ldoc 32 | --server=https://tarantool.github.io/LDoc/ 33 | - run: echo $PWD/.rocks/bin >> $GITHUB_PATH 34 | 35 | # Make a release 36 | - run: echo TAG=${GITHUB_REF##*/} >> $GITHUB_ENV 37 | - run: tarantoolctl rocks new_version --tag ${{ env.TAG }} 38 | - run: tarantoolctl rocks install membership-${{ env.TAG }}-1.rockspec 39 | - run: tarantoolctl rocks pack membership ${{ env.TAG }} 40 | 41 | - uses: tarantool/rocks.tarantool.org/github-action@master 42 | with: 43 | auth: ${{ secrets.ROCKS_AUTH }} 44 | files: | 45 | membership-${{ env.TAG }}-1.rockspec 46 | membership-${{ env.TAG }}-1.all.rock 47 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | test: 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | tarantool: 13 | - '1.10' 14 | - '2.10' 15 | - '2.11' 16 | 17 | env: 18 | CMAKE_LDOC_FIND_REQUIRED: 'YES' 19 | 20 | runs-on: ubuntu-22.04 21 | steps: 22 | - uses: actions/checkout@v4 23 | 24 | - uses: tarantool/setup-tarantool@v3 25 | with: 26 | tarantool-version: ${{ matrix.tarantool }} 27 | 28 | - name: Cache rocks 29 | uses: actions/cache@v3 30 | id: cache-rocks 31 | with: 32 | path: .rocks/ 33 | key: cache-rocks-${{ matrix.tarantool }}-${{ hashFiles('membership-scm-1.rockspec') }} 34 | 35 | - name: Install dependencies 36 | if: steps.cache-rocks.outputs.cache-hit != 'true' 37 | run: | 38 | tarantoolctl rocks install luacheck 39 | tarantoolctl rocks install luatest 40 | tarantoolctl rocks install ldoc --server=https://tarantool.github.io/LDoc/ 41 | 42 | - name: Add rocks to PATH 43 | run: echo ".rocks/bin" >> $GITHUB_PATH 44 | 45 | - name: Run luacheck 46 | run: luacheck membership-scm-1.rockspec membership.lua membership/ 47 | 48 | - name: Build project 49 | run: tarantoolctl rocks make 50 | 51 | - name: Run tests 52 | run: .rocks/bin/luatest -v 53 | 54 | - name: Cleanup cached paths 55 | run: tarantoolctl rocks remove membership 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .rocks 2 | __pycache__ 3 | .cache/ 4 | build.luarocks 5 | doc 6 | build 7 | -------------------------------------------------------------------------------- /.luacheckrc: -------------------------------------------------------------------------------- 1 | redefined = false 2 | 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 5 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | 9 | ## [2.5.2] - 2025-03-31 10 | 11 | ### Fixed 12 | 13 | - Dead members which weren't allowed now are removed from the list. 14 | 15 | ## [2.5.1] - 2025-03-10 16 | 17 | ### Fixed 18 | 19 | - Protocol step error when the members list were cleared in process. 20 | 21 | ## [2.5.0] - 2025-03-06 22 | 23 | ### Added 24 | 25 | - `set_allowed_members` function to add only allowed members to membership process. 26 | 27 | ## [2.4.6] - 2025-01-13 28 | 29 | ### Added 30 | 31 | - `mark_left` function to mark removed members as `left`. 32 | 33 | ## [2.4.5] - 2024-06-24 34 | 35 | ### Fixed 36 | 37 | - Invalid events parsing. 38 | 39 | ## [2.4.4] - 2024-04-09 40 | 41 | ### Fixed 42 | 43 | - Invalid payload parsing in anti entropy step. 44 | 45 | ## [2.4.3] - 2024-01-29 46 | 47 | ### Fixed 48 | 49 | - Invalid payload parsing when the message is broken. 50 | 51 | ## [2.4.2] - 2024-01-18 52 | 53 | ### Added 54 | 55 | - IPv6 support. 56 | - `remove_member` function to clean up member data. 57 | 58 | 59 | ## [2.4.1] - 2023-09-28 60 | 61 | ### Fixed 62 | 63 | - Log overflow when inner fibers weren't canceled. 64 | 65 | ## [2.4.0] - 2021-07-12 66 | 67 | ### Added 68 | 69 | - New option `SUSPICIOUSNESS` (default: `true`) allows to 70 | disable generation of rumors about suspected members. Also, 71 | membership won't produce any rumors unless myself is alive. 72 | 73 | - New option `log_debug` which can be easily overridden to 74 | control the verbosity level. 75 | 76 | ### Fixed 77 | 78 | - Uncaught exception which prevented discovering 79 | non-decryptable members. 80 | 81 | - Avoid event duplication due to a bug. 82 | 83 | - Properly handle the internal option `NUM_FAILURE_DETECTION_SUBGROUPS` 84 | which controls the number of indirect pings. 85 | 86 | ## [2.3.2] - 2021-04-22 87 | 88 | ### Fixed 89 | 90 | - Enhance logging of `getaddrinfo` errors when DNS malfunctions. 91 | 92 | ## [2.3.1] - 2020-11-18 93 | 94 | ### Fixed 95 | 96 | - Make the initialization error more informative. 97 | 98 | ## [2.3.0] - 2020-11-17 99 | 100 | ### Added 101 | 102 | - Allow reloading the code on the fly without status intervention. 103 | 104 | ### Fixed 105 | 106 | - Make subscriptions garbage-collectible. Previously, `fiber.cond` 107 | objects obtained from `membership.subscribe` should have been 108 | unsubscribed manually, otherwise, they would never be GC'ed. 109 | And now they are. 110 | 111 | ## [2.2.0] - 2019-10-22 112 | 113 | ### Added 114 | 115 | - New field `member.clock_delta`, which indicates difference between 116 | remote and local clocks. 117 | 118 | ## [2.1.4] - 2019-08-25 119 | 120 | ### Fixed 121 | 122 | - In some cases membership did disseminate invalid (nil) payload. 123 | The bug relates versions 2.1.2, 2.1.3. 124 | 125 | ## [2.1.3] - 2019-08-01 126 | 127 | ### Fixed 128 | 129 | - Leaving membership with encryption enabled. 130 | Due to the bug, other members reported 'dead' status instead of 'left'. 131 | 132 | ## [2.1.2] - 2019-06-02 133 | 134 | ### Added 135 | 136 | - Ldoc api documentation 137 | 138 | ### Fixed 139 | 140 | - Fairly calculate size of UDP packets 141 | - Speed up events dissemination by fully utilizing 142 | PING and ACK packets 143 | - Restrict packet size for anti-entropy sync. 144 | Due to the lack of restriction it used to fail 145 | which plagued members detection 146 | 147 | ### Minor 148 | 149 | - Make tests lighter by using `console` connection instead of `net.box` 150 | 151 | ## [2.1.1] - 2019-01-09 152 | 153 | ### Fixed 154 | 155 | - Obtain UDP broadcast address from `getifaddrs` C call 156 | 157 | ### Updated 158 | 159 | - Module `checks` dependency updated to v3.0.0 160 | 161 | ## [2.1.0] - 2018-09-04 162 | 163 | ### Added 164 | 165 | - API method `probe_uri()` 166 | - API method `get_member()` 167 | - Low-level encryption support 168 | - API methods `set_encryption_key()`, `get_encryption_key()` 169 | - API method `broadcast()` 170 | - API methods `subscribe()`, `unsubscribe()` 171 | 172 | ### Changed 173 | 174 | - API method `set_payload()` now sets only the given key within payload table 175 | - Hide internal numeric `status` from public API 176 | 177 | ## [2.0.0] - 2018-04-03 178 | 179 | ### Changed 180 | 181 | - Rename API method: `quit()` -> `leave()` 182 | 183 | ## [1.0.0] - 2018-04-02 184 | 185 | ### Added 186 | 187 | - Basic functionality 188 | - Integration tests 189 | - Luarock-based packaging 190 | - Gitlab CI integration 191 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR) 2 | 3 | project(membership C) 4 | 5 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) 6 | set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY TRUE) 7 | 8 | # Find Tarantool and Lua dependecies 9 | set(TARANTOOL_FIND_REQUIRED ON) 10 | find_package(Tarantool) 11 | include_directories(${TARANTOOL_INCLUDE_DIRS}) 12 | 13 | file(GLOB_RECURSE LUA_FILES 14 | "${CMAKE_CURRENT_SOURCE_DIR}/membership.lua" 15 | "${CMAKE_CURRENT_SOURCE_DIR}/membership/*.lua" 16 | ) 17 | 18 | ## API doc #################################################################### 19 | ############################################################################### 20 | 21 | if(DEFINED ENV{CMAKE_LDOC_FIND_REQUIRED}) 22 | set(LDOC_FIND_REQUIRED "$ENV{CMAKE_LDOC_FIND_REQUIRED}") 23 | endif() 24 | find_package(Ldoc) 25 | 26 | set(DOC_OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/doc/index.html) 27 | 28 | add_custom_command( 29 | OUTPUT DOC_OUTPUT 30 | COMMAND ${LDOC} --all . 31 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 32 | DEPENDS ${LUA_FILES} 33 | COMMENT "Building API documentation" 34 | VERBATIM 35 | ) 36 | 37 | if(LDOC_FOUND) 38 | add_custom_target(doc ALL 39 | DEPENDS DOC_OUTPUT) 40 | else() 41 | add_custom_target(doc 42 | DEPENDS DOC_OUTPUT) 43 | endif() 44 | 45 | ## Testing #################################################################### 46 | ############################################################################### 47 | 48 | enable_testing() 49 | 50 | add_test( 51 | NAME test_integration 52 | COMMAND pytest -v 53 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 54 | ) 55 | 56 | ## Install #################################################################### 57 | ############################################################################### 58 | 59 | install(CODE "") 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019-2024, Tarantool AUTHORS. 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | # Membership library for Tarantool based on a gossip protocol 6 | 7 | This library builds a mesh from multiple tarantool instances. The 8 | mesh monitors itself, helps members discover everyone else and get 9 | notified about their status changes with low latency. 10 | 11 | It is built upon the ideas from consul, or, more precisely, 12 | the [SWIM](doc/swim-paper.pdf) algorithm. 13 | 14 | Membership module works over UDP protocol and can operate 15 | even before tarantool `box.cfg` was initialized. 16 | 17 | ## Member data structure 18 | 19 | A member is represented by the table with fields: 20 | 21 | * `uri` 22 | * `status` is a string: `alive`, `suspect`, `dead` or `left` 23 | * `incarnation` which is incremented every time the instance is being 24 | suspected or dead or updates its payload 25 | * `payload` is a table with auxiliary data, which can be used by various 26 | modules to do whatever they want 27 | * `timestamp` is a value of `fiber.time64()` (in microseconds), 28 | corresponding to the last update of status or incarnation. `timestamp` 29 | is always local and does not depent on other members' clock setting. 30 | * `clock_delta` is a time drift between member's clock (remote) and the 31 | local one (in microseconds). 32 | 33 | Example: 34 | 35 | ```yaml 36 | --- 37 | uri: "localhost:33001" 38 | status: "alive" 39 | incarnation: 1 40 | payload: 41 | uuid: "2d00c500-2570-4019-bfcc-ab25e5096b73" 42 | timestamp: 1522427330993752 43 | clock_delta: 27810 44 | ... 45 | ``` 46 | 47 | ## Reloadability 48 | 49 | Membership module supports hot-reload: 50 | 51 | ```lua 52 | package.loaded['membership'] = nil 53 | require('membership') 54 | ``` 55 | 56 | ## Changing options 57 | 58 | You can change membership options directly by using: 59 | 60 | ```lua 61 | require("membership.options")[opt_name] = opt_value 62 | ``` 63 | 64 | Available options: 65 | * Period of sending direct PINGs. 66 | `PROTOCOL_PERIOD_SECONDS`, default: 1.0 67 | 68 | * Time to wait for ACK message after PING. 69 | If a member does not reply within this time, 70 | the indirect ping algorithm is invoked. 71 | `ACK_TIMEOUT_SECONDS`, default: 0.2 72 | 73 | * Period to perform anti-entropy sync. 74 | `ANTI_ENTROPY_PERIOD_SECONDS`, default: 10 75 | 76 | * Toggle producing `suspect` rumors when ping fails. Even if disabled, 77 | it doesn't affect neither gossip dissemination nor other statuses 78 | generation (e.g. `dead` and `non-decryptable`). 79 | `SUSPICIOUSNESS`, default: true 80 | 81 | * Timeout to mark `suspect` members as `dead`. 82 | `SUSPECT_TIMEOUT_SECONDS`, default: 3 83 | 84 | * Number of members to try indirectly pinging a `suspect`. 85 | Denoted as `k` in [SWIM paper](swim-paper.pdf). 86 | `NUM_FAILURE_DETECTION_SUBGROUPS`, default: 3 87 | 88 | * Maximum size of UPD packets to send. 89 | `MAX_PACKET_SIZE`, default: 1472 (`Default-MTU (1500) - IP-Header (20) - UDP-Header (8)`) 90 | 91 | ## Payload 92 | 93 | You can add payload to any member by calling: 94 | 95 | ```lua 96 | membership.set_payload(key, value) 97 | ``` 98 | -------------------------------------------------------------------------------- /cmake/FindLdoc.cmake: -------------------------------------------------------------------------------- 1 | find_program(LDOC ldoc 2 | HINTS .rocks/ 3 | PATH_SUFFIXES bin 4 | DOC "Documentation generator tool for Lua source code" 5 | ) 6 | 7 | include(FindPackageHandleStandardArgs) 8 | find_package_handle_standard_args(LDOC 9 | REQUIRED_VARS LDOC 10 | ) 11 | 12 | mark_as_advanced(LDOC) 13 | -------------------------------------------------------------------------------- /cmake/FindTarantool.cmake: -------------------------------------------------------------------------------- 1 | # Define GNU standard installation directories 2 | include(GNUInstallDirs) 3 | 4 | macro(extract_definition name output input) 5 | string(REGEX MATCH "#define[\t ]+${name}[\t ]+\"([^\"]*)\"" 6 | _t "${input}") 7 | string(REGEX REPLACE "#define[\t ]+${name}[\t ]+\"(.*)\"" "\\1" 8 | ${output} "${_t}") 9 | endmacro() 10 | 11 | find_path(TARANTOOL_INCLUDE_DIR tarantool/module.h 12 | HINTS ${TARANTOOL_DIR} ENV TARANTOOL_DIR 13 | PATH_SUFFIXES include 14 | ) 15 | 16 | if(TARANTOOL_INCLUDE_DIR) 17 | set(_config "-") 18 | file(READ "${TARANTOOL_INCLUDE_DIR}/tarantool/module.h" _config0) 19 | string(REPLACE "\\" "\\\\" _config ${_config0}) 20 | unset(_config0) 21 | extract_definition(PACKAGE_VERSION TARANTOOL_VERSION ${_config}) 22 | extract_definition(INSTALL_PREFIX _install_prefix ${_config}) 23 | unset(_config) 24 | endif() 25 | 26 | include(FindPackageHandleStandardArgs) 27 | find_package_handle_standard_args(TARANTOOL 28 | REQUIRED_VARS TARANTOOL_INCLUDE_DIR VERSION_VAR TARANTOOL_VERSION) 29 | if(TARANTOOL_FOUND) 30 | set(TARANTOOL_INCLUDE_DIRS "${TARANTOOL_INCLUDE_DIR}" 31 | "${TARANTOOL_INCLUDE_DIR}/tarantool/" 32 | CACHE PATH "Include directories for Tarantool") 33 | set(TARANTOOL_INSTALL_LIBDIR "${CMAKE_INSTALL_LIBDIR}/tarantool" 34 | CACHE PATH "Directory for storing Lua modules written in Lua") 35 | set(TARANTOOL_INSTALL_LUADIR "${CMAKE_INSTALL_DATADIR}/tarantool" 36 | CACHE PATH "Directory for storing Lua modules written in C") 37 | 38 | if (NOT TARANTOOL_FIND_QUIETLY AND NOT FIND_TARANTOOL_DETAILS) 39 | set(FIND_TARANTOOL_DETAILS ON CACHE INTERNAL "Details about TARANTOOL") 40 | message(STATUS "Tarantool LUADIR is ${TARANTOOL_INSTALL_LUADIR}") 41 | message(STATUS "Tarantool LIBDIR is ${TARANTOOL_INSTALL_LIBDIR}") 42 | endif () 43 | endif() 44 | mark_as_advanced(TARANTOOL_INCLUDE_DIRS TARANTOOL_INSTALL_LIBDIR 45 | TARANTOOL_INSTALL_LUADIR) 46 | -------------------------------------------------------------------------------- /config.ld: -------------------------------------------------------------------------------- 1 | project = 'membership' 2 | file = { 3 | 'membership.lua', 4 | 'membership/options.lua', 5 | } 6 | topics = { 7 | 'README.md', 8 | 'CHANGELOG.md', 9 | } 10 | format = 'markdown' 11 | -------------------------------------------------------------------------------- /doc/swim-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tarantool/membership/46487f0cfdf35989f141df0816492960fbb6c224/doc/swim-paper.pdf -------------------------------------------------------------------------------- /membership-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = 'membership' 2 | version = 'scm-1' 3 | source = { 4 | url = 'git+https://github.com/tarantool/membership.git', 5 | branch = 'master', 6 | } 7 | dependencies = { 8 | 'lua >= 5.1', 9 | 'checks ~> 3', 10 | } 11 | 12 | external_dependencies = { 13 | TARANTOOL = { 14 | header = 'tarantool/module.h', 15 | }, 16 | } 17 | 18 | build = { 19 | type = 'cmake', 20 | variables = { 21 | TARANTOOL_DIR = '$(TARANTOOL_DIR)', 22 | TARANTOOL_INSTALL_LIBDIR = '$(LIBDIR)', 23 | TARANTOOL_INSTALL_LUADIR = '$(LUADIR)', 24 | }, 25 | install = { 26 | lua = { 27 | ['membership'] = 'membership.lua', 28 | ['membership.stash'] = 'membership/stash.lua', 29 | ['membership.events'] = 'membership/events.lua', 30 | ['membership.members'] = 'membership/members.lua', 31 | ['membership.options'] = 'membership/options.lua', 32 | ['membership.network'] = 'membership/network.lua', 33 | } 34 | }, 35 | copy_directories = {"doc"}, 36 | } 37 | -------------------------------------------------------------------------------- /membership.lua: -------------------------------------------------------------------------------- 1 | --- Membership library for Tarantool based on a gossip protocol. 2 | -- This library builds a mesh from multiple tarantool instances. The 3 | -- mesh monitors itself, helps members discover everyone else and get 4 | -- notified about their status changes with low latency. 5 | -- 6 | -- It is built upon the ideas from consul, or, more precisely, 7 | -- the [SWIM](swim-paper.pdf) algorithm. 8 | -- 9 | -- Membership module works over UDP protocol and can operate 10 | -- even before tarantool [`box.cfg`](https://tarantool.io/en/doc/latest/book/box/box_cfg/) was initialized. 11 | -- @module membership 12 | 13 | local log = require('log') 14 | local uri_tools = require('uri') 15 | local fiber = require('fiber') 16 | local checks = require('checks') 17 | local socket = require('socket') 18 | local msgpack = require('msgpack') 19 | 20 | for _, m in ipairs({ 21 | 'membership.stash', 22 | 'membership.events', 23 | 'membership.options', 24 | 'membership.members', 25 | 'membership.network', 26 | }) do 27 | package.loaded[m] = nil 28 | end 29 | 30 | local opts = require('membership.options') 31 | local stash = require('membership.stash') 32 | local events = require('membership.events') 33 | local members = require('membership.members') 34 | local network = require('membership.network') 35 | 36 | local _sync_trigger = stash.get('_sync_trigger') or fiber.cond() 37 | local _ack_trigger = stash.get('_ack_trigger') or fiber.cond() 38 | local _ack_cache = stash.get('_ack_cache') or {} 39 | local _resolve_cache = stash.get('_resolve_cache') or {} 40 | local _allowed_uri_set = stash.get('_allowed_uri_set') 41 | 42 | local function after_reload() 43 | stash.set('_ack_cache', _ack_cache) 44 | stash.set('_ack_trigger', _ack_trigger) 45 | stash.set('_sync_trigger', _sync_trigger) 46 | stash.set('_resolve_cache', _resolve_cache) 47 | stash.set('_allowed_uri_set', _allowed_uri_set) 48 | end 49 | 50 | local _sock = stash.get('_sock') 51 | local advertise_uri = stash.get('advertise_uri') 52 | 53 | local function resolve(uri) 54 | checks('string') 55 | 56 | if _resolve_cache[uri] then 57 | local member = members.get(uri) 58 | if member and member.status == opts.ALIVE then 59 | return _resolve_cache[uri] 60 | else 61 | _resolve_cache[uri] = nil 62 | end 63 | end 64 | 65 | local parts = uri_tools.parse(uri) 66 | if not parts then 67 | if _resolve_cache[uri] == nil then 68 | _resolve_cache[uri] = false 69 | log.warn("parse error (%s)", uri) 70 | end 71 | return nil 72 | end 73 | 74 | local family = parts.ipv6 and 'AF_INET6' or 'AF_INET' 75 | local addrinfo, err = socket.getaddrinfo( 76 | parts.host, parts.service, 77 | {family=family, type='SOCK_DGRAM'} 78 | ) 79 | if addrinfo == nil then 80 | if _resolve_cache[uri] == nil then 81 | _resolve_cache[uri] = false 82 | log.warn("%s (%s)", err or 'getaddrinfo: Unknown error', uri) 83 | end 84 | return nil 85 | end 86 | 87 | _resolve_cache[uri] = addrinfo[1] 88 | return addrinfo[1] 89 | end 90 | 91 | local function nslookup(host, port) 92 | checks('string', 'number') 93 | 94 | for uri, cache in pairs(_resolve_cache) do 95 | if cache 96 | and cache.host == host 97 | and cache.port == port 98 | then 99 | return uri 100 | end 101 | end 102 | 103 | return nil 104 | end 105 | 106 | local function random_permutation(tbl) 107 | local cnt = #tbl 108 | for src = 1, cnt-1 do 109 | local dst = math.random(src, cnt) 110 | local x = tbl[dst] 111 | tbl[dst] = tbl[src] 112 | tbl[src] = x 113 | end 114 | return tbl 115 | end 116 | 117 | -- 118 | -- MESSAGE SENDING 119 | -- 120 | 121 | local function send_message(uri, msg_type, msg_data) 122 | checks('string', 'string', 'table') 123 | local addr = resolve(uri) 124 | if not addr then 125 | return false 126 | end 127 | 128 | local events_to_send = {} 129 | local msg_raw = {advertise_uri, msg_type, msg_data, events_to_send} 130 | local msg_size = #msgpack.encode(msg_raw) 131 | 132 | -- Always tell the recipient what current instance thinks about it. 133 | -- It's necessary to refute rumors faster. 134 | local member = members.get(uri) 135 | if member then 136 | local extra_event = events.get(uri) or { 137 | uri = uri, 138 | status = member.status, 139 | incarnation = member.incarnation, 140 | ttl = 1, 141 | } 142 | table.insert(events_to_send, events.pack(extra_event)) 143 | msg_size = msg_size + events.estimate_msgpacked_size(extra_event) 144 | events_to_send[uri] = true 145 | end 146 | 147 | -- And always tell about myself to speed up payload dissemination. 148 | if not events_to_send[advertise_uri] then 149 | local myself = members.get(advertise_uri) 150 | local extra_event = events.get(advertise_uri) or { 151 | uri = advertise_uri, 152 | status = myself.status, 153 | incarnation = myself.incarnation, 154 | payload = myself.payload, 155 | ttl = 1, 156 | } 157 | table.insert(events_to_send, events.pack(extra_event)) 158 | msg_size = msg_size + events.estimate_msgpacked_size(extra_event) 159 | events_to_send[advertise_uri] = true 160 | end 161 | 162 | for uri, event in events.pairs() do 163 | if not events_to_send[uri] then 164 | local evt_size = events.estimate_msgpacked_size(event) 165 | if #events_to_send+1 == 16 then 166 | evt_size = evt_size + 2 167 | end 168 | local enc_size = opts.encrypted_size(msg_size + evt_size) 169 | if enc_size > opts.MAX_PACKET_SIZE then 170 | break 171 | else 172 | table.insert(events_to_send, events.pack(event)) 173 | events_to_send[uri] = true 174 | msg_size = msg_size + evt_size 175 | end 176 | end 177 | end 178 | 179 | local random_members = random_permutation(members.filter_excluding(nil)) 180 | for _, member_uri in ipairs(random_members) do 181 | if not events_to_send[member_uri] then 182 | local member = members.get(member_uri) 183 | local event = { 184 | uri = member_uri, 185 | status = member.status, 186 | incarnation = member.incarnation, 187 | payload = member.payload, 188 | ttl = 1, 189 | } 190 | 191 | local evt_size = events.estimate_msgpacked_size(event) 192 | if #events_to_send+1 == 16 then 193 | evt_size = evt_size + 2 194 | end 195 | local enc_size = opts.encrypted_size(msg_size + evt_size) 196 | if enc_size > opts.MAX_PACKET_SIZE then 197 | break 198 | else 199 | table.insert(events_to_send, events.pack(event)) 200 | events_to_send[member_uri] = true 201 | msg_size = msg_size + evt_size 202 | end 203 | end 204 | end 205 | 206 | for k, _ in pairs(events_to_send) do 207 | if type(k) == 'string' then 208 | events_to_send[k] = nil 209 | end 210 | end 211 | 212 | events.gc() 213 | 214 | local msg_msgpacked = msgpack.encode(msg_raw) 215 | local msg_encrypted = opts.encrypt(msg_msgpacked) 216 | local ret = _sock:sendto(addr.host, addr.port, msg_encrypted) 217 | return ret and ret > 0 218 | end 219 | 220 | local function send_anti_entropy(uri, msg_type, remote_tbl) 221 | -- send to `uri` all local members that are not in `remote_tbl` 222 | -- well, not all actualy, but all that fits into UDP packet 223 | checks('string', 'string', 'table') 224 | local addr = resolve(uri) 225 | if not addr then 226 | return false 227 | end 228 | 229 | local members_to_send = {} 230 | local msg_raw = {advertise_uri, msg_type, members_to_send, {}} 231 | local msg_size = #msgpack.encode(msg_raw) 232 | 233 | local random_members = random_permutation(members.filter_excluding(nil)) 234 | for _, member_uri in ipairs(random_members) do 235 | local member = members.get(member_uri) 236 | 237 | if events.should_overwrite(member, remote_tbl[member_uri]) then 238 | local member_size = members.estimate_msgpacked_size(member_uri, member) 239 | if #members_to_send+1 == 16 then 240 | -- msgpack: 241 | -- `fixarray` stores an array whose length is upto 15 elements 242 | -- `array 16` stores an array whose length is upto (2^16)-1 elements 243 | -- it's 2 bytes larger 244 | member_size = member_size + 2 245 | end 246 | local enc_size = opts.encrypted_size(msg_size + member_size) 247 | if enc_size > opts.MAX_PACKET_SIZE then 248 | break 249 | else 250 | table.insert(members_to_send, members.pack(member_uri, member)) 251 | msg_size = msg_size + member_size 252 | end 253 | end 254 | end 255 | 256 | local msg_msgpacked = msgpack.encode(msg_raw) 257 | local msg_encrypted = opts.encrypt(msg_msgpacked) 258 | local ret = _sock:sendto(addr.host, addr.port, msg_encrypted) 259 | return ret and ret > 0 260 | end 261 | 262 | -- 263 | -- MESSAGE RECEIVING 264 | -- 265 | 266 | local function handle_message(msg) 267 | local ok, decrypted = pcall(opts.decrypt, msg) 268 | if not ok then 269 | return false 270 | end 271 | 272 | local ok, decoded = pcall(msgpack.decode, decrypted) 273 | if not ok 274 | or type(decoded) ~= 'table' 275 | or #decoded ~= 4 then 276 | -- sometimes misencrypted messages 277 | -- are successfully decodes 278 | -- as a valid msgpack with useless data 279 | return false 280 | end 281 | 282 | local sender_uri = decoded[1] 283 | local msg_type = decoded[2] 284 | local msg_data = decoded[3] 285 | local new_events = decoded[4] 286 | 287 | for _, event in ipairs(new_events or {}) do 288 | local event = events.unpack(event) 289 | 290 | if event.uri == advertise_uri then 291 | -- this is a rumor about ourselves 292 | local myself = members.get(advertise_uri) 293 | 294 | if event.status ~= opts.ALIVE and event.incarnation >= myself.incarnation then 295 | -- someone thinks that we are dead 296 | log.info('Refuting the rumor that we are %s', opts.STATUS_NAMES[event.status]) 297 | event.incarnation = event.incarnation + 1 298 | event.status = opts.ALIVE 299 | event.payload = myself.payload 300 | event.ttl = members.count() 301 | elseif event.incarnation > myself.incarnation then 302 | -- this branch can be called after quick restart 303 | -- when the member who PINGs us does not know we were dead 304 | -- so we increment incarnation and start spreading 305 | -- the rumor with our current payload 306 | 307 | event.ttl = members.count() 308 | event.incarnation = event.incarnation + 1 309 | event.payload = myself.payload 310 | end 311 | end 312 | 313 | events.handle(event) 314 | end 315 | 316 | -- luacheck:ignore 542 317 | if msg_type == 'PING' then 318 | if msg_data.dst == advertise_uri then 319 | -- set ack timestamp 320 | msg_data.ats = fiber.time64() 321 | send_message(sender_uri, 'ACK', msg_data) 322 | elseif sender_uri == advertise_uri then 323 | -- seems to be a local loop 324 | -- drop it 325 | elseif msg_data.dst ~= nil then 326 | -- forward 327 | send_message(msg_data.dst, 'PING', msg_data) 328 | else 329 | log.error('Message PING without destination uri') 330 | end 331 | elseif msg_type == 'ACK' then 332 | if msg_data.src == advertise_uri then 333 | -- set receive timestamp 334 | msg_data.rts = fiber.time64() 335 | table.insert(_ack_cache, msg_data) 336 | _ack_trigger:broadcast() 337 | elseif msg_data.src ~= nil then 338 | -- forward 339 | send_message(msg_data.src, 'ACK', msg_data) 340 | else 341 | log.error('Message ACK without source uri') 342 | end 343 | elseif msg_type == 'SYNC_REQ' or msg_type == 'SYNC_ACK' then 344 | local remote_tbl = {} 345 | for _, member in ipairs(msg_data) do 346 | local member_uri, member = members.unpack(member) 347 | remote_tbl[member_uri] = member 348 | 349 | if events.should_overwrite(member, members.get(member_uri)) then 350 | events.generate(member_uri, member.status, member.incarnation, member.payload) 351 | end 352 | end 353 | 354 | if msg_type == 'SYNC_REQ' then 355 | send_anti_entropy(sender_uri, 'SYNC_ACK', remote_tbl) 356 | else 357 | _sync_trigger:broadcast() 358 | end 359 | elseif msg_type == 'LEAVE' then 360 | -- just handle the event 361 | -- do nothing more 362 | else 363 | error('Unknown message ' .. tostring(msg_type)) 364 | end 365 | 366 | return true 367 | end 368 | 369 | local function _handle_message_step() 370 | if _sock == nil then 371 | return 372 | end 373 | local ok = _sock:readable(opts.PROTOCOL_PERIOD_SECONDS) 374 | if not ok then 375 | return 376 | end 377 | 378 | if _sock == nil then 379 | return false 380 | end 381 | local msg, from = _sock:recvfrom(opts.MAX_PACKET_SIZE) 382 | local ok = handle_message(msg) 383 | 384 | if not ok and type(from) == 'table' then 385 | local uri = nslookup(from.host, from.port) 386 | local member = nil 387 | if uri ~= nil then 388 | member = members.get(uri) 389 | end 390 | if member and member.status == opts.DEAD then 391 | log.info('Broken UDP packet from %s - %s', 392 | uri, opts.STATUS_NAMES[opts.NONDECRYPTABLE] 393 | ) 394 | events.generate(uri, opts.NONDECRYPTABLE) 395 | end 396 | end 397 | end 398 | 399 | local function handle_message_step() 400 | local ok, res = xpcall(_handle_message_step, debug.traceback) 401 | fiber.testcancel() 402 | 403 | if not ok then 404 | log.error(res) 405 | end 406 | end 407 | 408 | -- 409 | -- PROTOCOL LOOP 410 | -- 411 | 412 | local function wait_ack(uri, ts, timeout) 413 | local now 414 | local deadline = ts + timeout 415 | repeat 416 | fiber.testcancel() 417 | now = fiber.time64() 418 | 419 | for _, ack in ipairs(_ack_cache) do 420 | if ack.dst == uri and ack.ts == ts then 421 | return ack 422 | end 423 | end 424 | until (now >= deadline) or not _ack_trigger:wait(tonumber(deadline - now) / 1.0e6) 425 | 426 | return nil 427 | end 428 | 429 | local function _get_clock_delta(ack_data) 430 | checks('table') 431 | local ack_ts = tonumber(ack_data.ats) 432 | local recv_ts = tonumber(ack_data.rts) 433 | local start_ts = tonumber(ack_data.ts) 434 | 435 | if ack_ts == nil or recv_ts == nil or start_ts == nil then 436 | return nil 437 | end 438 | 439 | return ack_ts - (recv_ts + start_ts) / 2 440 | end 441 | 442 | local _protocol_round_list = {} 443 | local _protocol_round_iter = 1 444 | local function _protocol_step() 445 | local loop_now = fiber.time64() 446 | 447 | -- expire suspected members 448 | local expiry = loop_now - opts.SUSPECT_TIMEOUT_SECONDS * 1.0e6 449 | for uri, member in members.pairs() do 450 | if member.status == opts.SUSPECT and member.timestamp < expiry then 451 | log.info('Node timed out: %s - %s', uri, opts.STATUS_NAMES[opts.DEAD]) 452 | events.generate(uri, opts.DEAD) 453 | end 454 | end 455 | 456 | -- cleanup ack cache 457 | table.clear(_ack_cache) 458 | 459 | -- prepare to send ping 460 | _protocol_round_iter = _protocol_round_iter + 1 461 | 462 | if _protocol_round_list[_protocol_round_iter] == nil then 463 | _protocol_round_iter = 1 464 | _protocol_round_list = members.filter_excluding('left') 465 | random_permutation(_protocol_round_list) 466 | end 467 | 468 | local uri = _protocol_round_list[_protocol_round_iter] 469 | if uri == nil then 470 | return 471 | end 472 | 473 | local msg_data = { 474 | ts = loop_now, 475 | src = advertise_uri, 476 | dst = uri, 477 | } 478 | 479 | -- try direct ping 480 | if send_message(uri, 'PING', msg_data) then 481 | local ack_data = wait_ack(uri, loop_now, opts.ACK_TIMEOUT_SECONDS * 1.0e6) 482 | if ack_data ~= nil then 483 | local member = members.get(uri) 484 | if member == nil then 485 | return 486 | end 487 | -- calculate time difference between local time and member time 488 | local delta = _get_clock_delta(ack_data) 489 | members.set(uri, member.status, member.incarnation, { clock_delta = delta }) -- update timstamp 490 | return 491 | end 492 | end 493 | local member = members.get(uri) 494 | if member ~= nil and member.status >= opts.DEAD then 495 | -- still dead, do nothing 496 | return 497 | end 498 | 499 | local sent_indirect = 0 500 | local through_uri_list = random_permutation( 501 | members.filter_excluding('unhealthy', advertise_uri, uri) 502 | ) 503 | for _, through_uri in ipairs(through_uri_list) do 504 | if sent_indirect >= opts.NUM_FAILURE_DETECTION_SUBGROUPS then 505 | break 506 | end 507 | 508 | if send_message(through_uri, 'PING', msg_data) then 509 | sent_indirect = sent_indirect + 1 510 | end 511 | end 512 | 513 | local ack_data 514 | if sent_indirect > 0 then 515 | ack_data = wait_ack(uri, loop_now, opts.PROTOCOL_PERIOD_SECONDS * 1.0e6) 516 | end 517 | 518 | -- check again in case if members list has been cleared 519 | local member = members.get(uri) 520 | if member == nil then 521 | return 522 | end 523 | if sent_indirect > 0 and ack_data ~= nil then 524 | -- calculate time difference between local time and member time 525 | local delta = _get_clock_delta(ack_data) 526 | members.set(uri, member.status, member.incarnation, { clock_delta = delta }) 527 | return 528 | elseif member.status == opts.ALIVE then 529 | local myself = members.get(advertise_uri) 530 | if myself.status ~= opts.ALIVE then 531 | opts.log_debug('Could not reach node: %s (%s myself)', uri, myself.status) 532 | elseif opts.SUSPICIOUSNESS == false then 533 | opts.log_debug('Could not reach node: %s (ignored)', uri) 534 | else 535 | log.info('Could not reach node: %s - %s', uri, 536 | opts.STATUS_NAMES[opts.SUSPECT] 537 | ) 538 | events.generate(uri, opts.SUSPECT) 539 | end 540 | return 541 | end 542 | end 543 | 544 | local function protocol_step() 545 | local t1 = fiber.clock() 546 | local ok, res = xpcall(_protocol_step, debug.traceback) 547 | fiber.testcancel() 548 | 549 | if not ok then 550 | log.error(res) 551 | end 552 | 553 | local t2 = fiber.clock() 554 | fiber.sleep(t1 + opts.PROTOCOL_PERIOD_SECONDS - t2) 555 | end 556 | 557 | -- 558 | -- ANTI ENTROPY SYNC 559 | -- 560 | 561 | local function _anti_entropy_step() 562 | local alive_members = members.filter_excluding('unhealthy', opts.advertise_uri) 563 | local alive_cnt = #alive_members 564 | if alive_cnt == 0 then 565 | return false 566 | end 567 | 568 | local uri = alive_members[math.random(alive_cnt)] 569 | send_anti_entropy(uri, 'SYNC_REQ', {}) 570 | return _sync_trigger:wait(opts.PROTOCOL_PERIOD_SECONDS) 571 | end 572 | 573 | local function anti_entropy_step() 574 | local ok, res = xpcall(_anti_entropy_step, debug.traceback) 575 | fiber.testcancel() 576 | 577 | if not ok then 578 | log.error(res) 579 | fiber.sleep(opts.PROTOCOL_PERIOD_SECONDS) 580 | elseif not res then 581 | fiber.sleep(opts.PROTOCOL_PERIOD_SECONDS) 582 | else 583 | fiber.sleep(opts.ANTI_ENTROPY_PERIOD_SECONDS) 584 | end 585 | end 586 | 587 | -- 588 | -- PUBLIC API 589 | -- 590 | 591 | --- Initialize the membership module. 592 | -- Bind a UDP socket to `0.0.0.0:`, 593 | -- set the `advertise_uri` parameter to `:`, 594 | -- and `incarnation` to `1`. 595 | -- 596 | -- The `init()` function can be called several times, 597 | -- the old socket will be closed and a new one opened. 598 | -- 599 | -- If the `advertise_uri` changes during the next `init()`, 600 | -- the old URI is considered `DEAD`. 601 | -- In order to leave the group gracefully use the @{leave} function. 602 | -- 603 | -- @function init 604 | -- @tparam string advertise_host 605 | -- either hostname or IP address being advertised to other members 606 | -- @tparam number port 607 | -- UDP port to bind and advertise 608 | -- @treturn boolean `true` 609 | -- @raise Socket bind error 610 | local function init(advertise_host, port) 611 | checks('string', 'number') 612 | 613 | local parts = uri_tools.parse(advertise_host) 614 | if _sock == nil or _sock:name().port ~= port then 615 | local family = parts.ipv6 and 'AF_INET6' or 'AF_INET' 616 | local addr = parts.ipv6 and '::' or '0.0.0.0' 617 | local sock = socket(family, 'SOCK_DGRAM', 'udp') 618 | local ok = sock:bind(addr, port) 619 | if not ok then 620 | local err = string.format( 621 | 'Socket bind error (%s/udp): %s', 622 | port, sock:error() 623 | ) 624 | log.error(err) 625 | error(err, 2) 626 | end 627 | sock:nonblock(true) 628 | sock:setsockopt('SOL_SOCKET', 'SO_BROADCAST', 1) 629 | 630 | if _sock then 631 | _sock:close() 632 | end 633 | 634 | _sock = sock 635 | end 636 | 637 | advertise_uri = uri_tools.format({ 638 | host = advertise_host, 639 | service = tostring(port) 640 | }) 641 | events.generate(advertise_uri, opts.ALIVE, 1, {}) 642 | 643 | stash.fiber_cancel('protocol_step') 644 | stash.fiber_cancel('anti_entropy_step') 645 | stash.fiber_cancel('handle_message_step') 646 | stash.fiber_new('protocol_step'):name('membership.main') 647 | stash.fiber_new('anti_entropy_step'):name('membership.entropy') 648 | stash.fiber_new('handle_message_step'):name('membership.handle') 649 | stash.set('advertise_uri', advertise_uri) 650 | stash.set('_sock', _sock) 651 | 652 | return true 653 | end 654 | 655 | 656 | --- Discover members in local network. 657 | -- Send UDP broadcast on the specified `port` 658 | -- to all networks discovered by `getifaddrs()` C call 659 | -- @function broadcast 660 | -- @tparam number port UDP port of the broadcast 661 | -- @return[1] `true` if broadcast was sent 662 | -- @return[2] `false` if `getifaddrs()` fails. 663 | local function broadcast(port) 664 | checks('number') 665 | 666 | local msg_data = { 667 | ts = fiber.time64(), 668 | src = advertise_uri, 669 | dst = advertise_uri, 670 | } 671 | 672 | local ok, netlist = pcall(network.getifaddrs) 673 | if not ok then 674 | log.warn('Membership BROADCAST impossible: %s', netlist) 675 | return false 676 | end 677 | 678 | local bcast_sent = false 679 | 680 | for _, addr in pairs(netlist) do 681 | local uri = addr.bcast or addr.inet4 682 | if uri then 683 | local uri = string.format('%s:%s', uri, port) 684 | send_message(uri, 'PING', msg_data) 685 | log.info('Membership BROADCAST sent to %s', uri) 686 | bcast_sent = true 687 | end 688 | end 689 | 690 | if not bcast_sent then 691 | log.warn('Membership BROADCAST not sent: No suitable ifaddrs found') 692 | return false 693 | end 694 | return true 695 | end 696 | 697 | --- Gracefully leave the membership group. 698 | -- The node will be marked with the status `left` 699 | -- and no other members will ever try to reconnect it. 700 | -- @function leave 701 | -- @treturn boolean 702 | -- `true` if call succeeds, 703 | -- `false` if member has already left. 704 | local function leave() 705 | if _sock == nil then 706 | return false 707 | end 708 | 709 | -- First, we need to stop all fibers 710 | stash.fiber_cancel('protocol_step') 711 | stash.fiber_cancel('anti_entropy_step') 712 | stash.fiber_cancel('handle_message_step') 713 | 714 | -- Perform artificial events.generate() and instantly send it 715 | local myself = members.get(advertise_uri) 716 | local event = events.pack({ 717 | uri = advertise_uri, 718 | status = opts.LEFT, 719 | incarnation = myself.incarnation, 720 | ttl = members.count(), 721 | }) 722 | local msg_msgpacked = msgpack.encode({advertise_uri, 'LEAVE', msgpack.NULL, {event}}) 723 | local msg_encrypted = opts.encrypt(msg_msgpacked) 724 | for _, uri in ipairs(members.filter_excluding('unhealthy', advertise_uri)) do 725 | local addr = resolve(uri) 726 | if addr then 727 | _sock:sendto(addr.host, addr.port, msg_encrypted) 728 | end 729 | end 730 | 731 | _sock:close() 732 | _sock = nil 733 | stash.set('_sock', nil) 734 | 735 | advertise_uri = nil 736 | stash.set('advertise_uri', nil) 737 | 738 | members.clear() 739 | events.clear() 740 | table.clear(_protocol_round_list) 741 | return true 742 | end 743 | 744 | --- Forcefully send leave message about an instance. 745 | -- @function mark_left 746 | -- @treturn boolean 747 | -- `true` if call succeeds, 748 | -- `false` if member has already left. 749 | local function mark_left(uri_to_leave) 750 | if _sock == nil then 751 | return false 752 | end 753 | 754 | -- Perform artificial events.generate() and instantly send it 755 | local myself = members.get(uri_to_leave) 756 | if not myself or myself.status == opts.LEFT then 757 | return false 758 | end 759 | local event = events.pack({ 760 | uri = uri_to_leave, 761 | status = opts.LEFT, 762 | incarnation = myself.incarnation, 763 | ttl = members.count(), 764 | }) 765 | local msg_msgpacked = msgpack.encode({uri_to_leave, 'LEAVE', msgpack.NULL, {event}}) 766 | local msg_encrypted = opts.encrypt(msg_msgpacked) 767 | for _, uri in ipairs(members.filter_excluding('unhealthy', uri_to_leave)) do 768 | local addr = resolve(uri) 769 | if addr then 770 | _sock:sendto(addr.host, addr.port, msg_encrypted) 771 | end 772 | end 773 | 774 | return true 775 | end 776 | 777 | --- Member data structure. 778 | -- A member is represented by the table with the following fields: 779 | -- 780 | -- @table MemberInfo 781 | -- @tfield string uri `` of a member 782 | -- 783 | -- @tfield string status a string that takes one of the values below 784 | -- 785 | -- * `alive`: a member that replies to ping-messages is alive and well. 786 | -- * `suspect`: if any member in the group cannot get a reply from any other member, the first member asks 787 | -- three other alive members to send a ping-message to the member in question. If there is no response, 788 | -- the latter becomes a suspect. 789 | -- * `dead`: a `suspect` becomes `dead` after a timeout. 790 | -- * `left`: a member gets the `left` status after executing the @{leave} function. 791 | -- 792 | -- @tfield number incarnation a value incremented every time 793 | -- the instance status changes, or its payload is updated 794 | -- 795 | -- @tfield table payload an auxiliary data that can be used by various modules 796 | -- 797 | -- @tfield number timestamp a value of fiber.time64() 798 | -- which corresponds to the last update of status or incarnation; 799 | -- it is always local and does not depend on other members’ clock setting. 800 | -- 801 | -- @tfield number clock_delta difference of clocks (fiber.time64) between self and peer 802 | -- calculated during ping/ack protocol step or while probe_uri call 803 | -- 804 | -- @usage tarantool> membership.myself() 805 | -- --- 806 | -- uri: "localhost:33001" 807 | -- status: "alive" 808 | -- incarnation: 1 809 | -- payload: 810 | -- uuid: "2d00c500-2570-4019-bfcc-ab25e5096b73" 811 | -- timestamp: 1522427330993752 812 | -- clock_delta: 700 813 | -- ... 814 | local function _member_pack(uri, member) 815 | checks('string', '?table') 816 | if not member then 817 | return nil 818 | end 819 | 820 | local payload = member.payload 821 | if payload == msgpack.NULL 822 | or type(payload) ~= 'table' 823 | then 824 | payload = {} 825 | end 826 | 827 | return { 828 | uri = uri, 829 | status = opts.STATUS_NAMES[member.status] or tostring(member.status), 830 | payload = payload, 831 | incarnation = member.incarnation, 832 | timestamp = member.timestamp, 833 | clock_delta = member.clock_delta, 834 | } 835 | end 836 | 837 | --- Obtain all members known to the current instance. 838 | -- 839 | -- Editing this table has no effect. 840 | -- @function members 841 | -- @treturn table a table with URIs as keys and corresponding @{MemberInfo} as values. 842 | local function get_members() 843 | local ret = {} 844 | for uri, member in members.pairs() do 845 | ret[uri] = _member_pack(uri, member) 846 | end 847 | return ret 848 | end 849 | 850 | --- Iterate over members. 851 | -- A shorthand for `pairs(membership.members())`. 852 | -- @function pairs 853 | -- @return Lua iterator 854 | -- @usage for uri, member in membership.pairs() do end 855 | 856 | --- Get info about member with the given URI. 857 | -- @function get_member 858 | -- @tparam string uri `` of member of interest 859 | -- @treturn MemberInfo the member data structure of the instance with the given URI. 860 | local function get_member(uri) 861 | local member = members.get(uri) 862 | return _member_pack(uri, member) 863 | end 864 | 865 | --- Get info about the current instance. 866 | -- @function myself 867 | -- @treturn MemberInfo the member data structure of the current instance. 868 | local function get_myself() 869 | return _member_pack( 870 | advertise_uri, 871 | members.get(advertise_uri) 872 | ) 873 | end 874 | 875 | --- Add a member to the group. 876 | -- Also propagate this event to other members. 877 | -- Adding a member to a single instance is enough 878 | -- as everybody else in the group will receive the update with time. 879 | -- It does not matter who adds whom. 880 | -- 881 | -- **Warning:** The gossip protocol guarantees 882 | -- that every member in the group becomes aware 883 | -- of any status change in two communication cycles. 884 | -- 885 | -- @function add_member 886 | -- @tparam string uri `` of member to add 887 | -- @treturn true|nil 888 | -- @treturn ?string Possible errors: 889 | -- 890 | -- * `"parse error"` - if the URI can not be parsed 891 | local function add_member(uri) 892 | checks('string') 893 | local parts = uri_tools.parse(uri) 894 | if not parts then 895 | return nil, 'parse error' 896 | end 897 | 898 | if parts.ipv6 then 899 | parts.host = '[' .. parts.host .. ']' 900 | end 901 | 902 | local uri = uri_tools.format({host = parts.host, service = parts.service}) 903 | local member = members.get(uri) 904 | local incarnation = nil 905 | if member and member.status == opts.LEFT then 906 | incarnation = member.incarnation + 1 907 | end 908 | 909 | events.generate(uri, opts.ALIVE, incarnation) 910 | 911 | return true 912 | end 913 | 914 | --- Send a ping to a member. 915 | -- Send a ping-message to a member to make sure it is in the group. 916 | -- 917 | -- If the member responds but not in the group, it is added. 918 | -- 919 | -- If it already is in the group, nothing happens. 920 | -- 921 | -- **Warning:** When destination IP can be resolved in several diffent 922 | -- ways (by different hostnames) it is possible that `probe_uri()` function returns 923 | -- `"no response"` error, but the member is added to the group with another URI, 924 | -- corresponding to its ``. 925 | -- 926 | -- @function probe_uri 927 | -- @tparam string uri `` of member to ping 928 | -- @treturn true|nil 929 | -- @treturn ?string Possible errors: 930 | -- 931 | -- * `"parse error"` - if the URI can not be parsed 932 | -- * `"ping was not sent"` - if hostname could not be reloved 933 | -- * `"no reponce"` - if member does not responf within 0.2 seconds 934 | local function probe_uri(uri) 935 | checks('string') 936 | local parts = uri_tools.parse(uri) 937 | if not parts then 938 | return nil, 'parse error' 939 | end 940 | 941 | if parts.ipv6 then 942 | parts.host = '[' .. parts.host .. ']' 943 | end 944 | 945 | local uri = uri_tools.format({host = parts.host, service = parts.service}) 946 | 947 | local loop_now = fiber.time64() 948 | local msg_data = { 949 | ts = loop_now, 950 | src = advertise_uri, 951 | dst = uri, 952 | } 953 | 954 | local ok = send_message(uri, 'PING', msg_data) 955 | if not ok then 956 | return nil, 'ping was not sent' 957 | end 958 | 959 | local ack_data = wait_ack(uri, loop_now, opts.ACK_TIMEOUT_SECONDS * 1.0e6) 960 | if ack_data == nil then 961 | return nil, 'no response' 962 | end 963 | 964 | local member = members.get(uri) 965 | if member ~= nil then 966 | local delta = _get_clock_delta(ack_data) 967 | members.set(uri, member.status, member.incarnation, { clock_delta = delta }) -- update timstamp 968 | end 969 | 970 | return true 971 | end 972 | 973 | --- Update payload and disseminate it along with the member status. 974 | -- Also increments `incarnation`. 975 | -- @function set_payload 976 | -- @tparam string key a key to set in payload table 977 | -- @param value auxiliary data 978 | local function set_payload(key, value) 979 | checks('string', '?') 980 | local myself = members.get(advertise_uri) 981 | local payload = myself.payload 982 | if type(payload) ~= 'table' then 983 | payload = {} 984 | end 985 | if payload[key] == value then 986 | return true 987 | end 988 | 989 | payload[key] = value 990 | events.generate( 991 | advertise_uri, 992 | myself.status, 993 | myself.incarnation + 1, 994 | payload 995 | ) 996 | return true 997 | end 998 | 999 | --- Remove a member. Don't use it unless you having a trouble with stale members. 1000 | -- @function remove_member 1001 | -- @tparam uri string 1002 | local function remove_member(uri) 1003 | checks('string') 1004 | local member = members.get(uri) 1005 | if member == nil then 1006 | return 1007 | end 1008 | 1009 | members.remove(uri) 1010 | end 1011 | 1012 | --- Filter out members from the list. 1013 | --- If the function wasn't called or allowed uri list 1014 | --- if empty, all members are allowed. 1015 | -- @function set_allowed_members 1016 | -- @tparam uris table URIs to leave in the list 1017 | local function set_allowed_members(uris) 1018 | checks('table') 1019 | events.clear() 1020 | table.clear(_protocol_round_list) 1021 | table.clear(_allowed_uri_set) 1022 | if next(uris) == nil then 1023 | return 1024 | end 1025 | for _, uri in ipairs(uris) do 1026 | _allowed_uri_set[uri] = true 1027 | end 1028 | for uri in pairs(stash.get('members._all_members') or {}) do 1029 | if not _allowed_uri_set[uri] then 1030 | members.remove(uri) 1031 | end 1032 | end 1033 | end 1034 | 1035 | do -- finish module loading 1036 | opts.after_reload() 1037 | events.after_reload() 1038 | members.after_reload() 1039 | after_reload() 1040 | stash.set('protocol_step', protocol_step) 1041 | stash.set('anti_entropy_step', anti_entropy_step) 1042 | stash.set('handle_message_step', handle_message_step) 1043 | end 1044 | 1045 | return { 1046 | init = init, 1047 | leave = leave, 1048 | mark_left = mark_left, 1049 | members = get_members, 1050 | broadcast = broadcast, 1051 | pairs = function() return pairs(get_members()) end, 1052 | myself = get_myself, 1053 | probe_uri = probe_uri, 1054 | add_member = add_member, 1055 | get_member = get_member, 1056 | remove_member = remove_member, 1057 | set_payload = set_payload, 1058 | set_allowed_members = set_allowed_members, 1059 | 1060 | --- Encryption Functions. 1061 | -- The encryption is handled by the 1062 | -- [`crypto.cipher.aes256.cbc`](https://tarantool.io/en/doc/latest/reference/reference_lua/crypto/) 1063 | -- Tarantool module. 1064 | -- 1065 | -- For proper communication, all members must be configured 1066 | -- to use the same encryption key. Otherwise, members report 1067 | -- either `dead` or `non-decryptable` in their status. 1068 | -- @section encryption 1069 | 1070 | --- Retrieve the encryption key that is currently in use. 1071 | -- @function get_encryption_key 1072 | -- @treturn string encryption key 1073 | get_encryption_key = assert(opts.get_encryption_key), 1074 | 1075 | --- Set the key used for low-level message encryption. 1076 | -- The key is either trimmed or padded automatically to be exactly 32 bytes. 1077 | -- If the `key` value is `nil`, the encryption is disabled. 1078 | -- 1079 | -- @function set_encryption_key 1080 | -- @tparam string key encryption key 1081 | -- @treturn nil 1082 | set_encryption_key = assert(opts.set_encryption_key), 1083 | 1084 | --- Subscription Functions. 1085 | -- A subscription is implemented with Tarantool built-in 1086 | -- [`fiber.cond`](https://tarantool.io/en/doc/latest/reference/reference_lua/fiber/#fiber-cond) 1087 | -- objects. 1088 | -- @section subsrcription 1089 | 1090 | --- Subscribe for updates in the members table. 1091 | -- @function subscribe 1092 | -- @return `fiber.cond` object which is 1093 | -- broadcasted whenever the members table changes 1094 | subscribe = assert(events.subscribe), 1095 | 1096 | --- Unsubscribe from membership updates. 1097 | -- Remove subscription on `cond` object. 1098 | -- 1099 | -- If parameter passed is already unsubscribed o invaled nothing happens. 1100 | -- @function unsubscribe 1101 | -- @param cond `fiber.cond` object obtained from `subscribe` function 1102 | -- @treturn nil 1103 | unsubscribe = assert(events.unsubscribe), 1104 | } 1105 | -------------------------------------------------------------------------------- /membership/events.lua: -------------------------------------------------------------------------------- 1 | local fiber = require('fiber') 2 | local checks = require('checks') 3 | local msgpack = require('msgpack') 4 | 5 | local opts = require('membership.options') 6 | local stash = require('membership.stash') 7 | local members = require('membership.members') 8 | 9 | local events = {} 10 | local _all_events = table.copy(stash.get('events._all_events')) or { 11 | -- [uri] = { 12 | -- uri = string, 13 | -- status = number, 14 | -- incarnation = number, 15 | -- ttl = number, 16 | -- } 17 | 18 | -- uri is a string in format ':' 19 | } 20 | local _expired = table.copy(stash.get('events._expired')) or { 21 | -- [uri] = true 22 | } 23 | local _subscribers = table.copy(stash.get('events._subscribers')) or { 24 | -- [fiber.cond] = true 25 | } 26 | setmetatable(_subscribers, {__mode = 'k'}) 27 | 28 | function events.after_reload() 29 | stash.set('events._expired', _expired) 30 | stash.set('events._all_events', _all_events) 31 | stash.set('events._subscribers', _subscribers) 32 | end 33 | 34 | function events.clear() 35 | table.clear(_all_events) 36 | table.clear(_expired) 37 | end 38 | 39 | function events.get(uri) 40 | checks('string') 41 | return _all_events[uri] 42 | end 43 | 44 | function events.all() 45 | return _all_events 46 | end 47 | 48 | function events.pairs() 49 | return pairs(_all_events) 50 | end 51 | 52 | function events.estimate_msgpacked_size(event) 53 | local sum = 0 54 | sum = sum + #msgpack.encode(event.uri) 55 | sum = sum + #msgpack.encode(event.status) 56 | sum = sum + #msgpack.encode(event.incarnation) 57 | sum = sum + #msgpack.encode(event.payload or msgpack.NULL) 58 | sum = sum + #msgpack.encode(event.ttl) 59 | return sum + 1 60 | end 61 | 62 | function events.pack(event) 63 | checks('table') 64 | event.ttl = event.ttl - 1 65 | if event.ttl <= 0 then 66 | _expired[event.uri] = true 67 | end 68 | 69 | return { 70 | event.uri, 71 | event.status, 72 | event.incarnation, 73 | event.payload or msgpack.NULL, 74 | event.ttl, 75 | } 76 | end 77 | 78 | function events.gc() 79 | for uri, _ in pairs(_expired) do 80 | _all_events[uri] = nil 81 | _expired[uri] = nil 82 | end 83 | end 84 | 85 | function events.unpack(event) 86 | checks('table') 87 | local payload = event[4] 88 | if payload == msgpack.NULL 89 | or type(payload) ~= 'table' 90 | then 91 | payload = nil 92 | end 93 | return { 94 | uri = tostring(event[1]), 95 | status = tonumber(event[2]) or opts.DEAD, 96 | incarnation = tonumber(event[3]) or 1, 97 | payload = payload, 98 | ttl = tonumber(event[5]) or 0, 99 | } 100 | end 101 | 102 | function events.should_overwrite(first, second) 103 | checks('table', '?table') 104 | if not second or first.incarnation > second.incarnation then 105 | return true 106 | elseif first.incarnation == second.incarnation then 107 | if first.status > second.status then 108 | return true 109 | end 110 | end 111 | return false 112 | end 113 | 114 | function events.generate(uri, status, incarnation, payload) 115 | checks('string', 'number', '?number', '?table') 116 | events.handle({ 117 | uri = uri, 118 | status = status or opts.ALIVE, 119 | incarnation = incarnation 120 | or (members.get(uri) or {}).incarnation 121 | or 1, 122 | payload = payload, 123 | ttl = math.floor(math.log(members.count(), 2)) + 2, 124 | }) 125 | end 126 | 127 | function events.handle(event) 128 | -- drop outdated events 129 | local member = members.get(event.uri) 130 | 131 | if events.should_overwrite(event, member) then 132 | _all_events[event.uri] = event 133 | else 134 | return 135 | end 136 | 137 | -- update members list 138 | if not member then 139 | opts.log_debug( 140 | 'Adding: %s (inc. %d) is %s', 141 | event.uri, event.incarnation, 142 | opts.STATUS_NAMES[event.status] 143 | ) 144 | elseif member.status ~= event.status or member.incarnation ~= event.incarnation then 145 | opts.log_debug( 146 | 'Rumor: %s (inc. %d) is %s', 147 | event.uri, event.incarnation, 148 | opts.STATUS_NAMES[event.status] 149 | ) 150 | end 151 | members.set(event.uri, event.status, event.incarnation, { payload = event.payload }) 152 | 153 | for cond, _ in pairs(_subscribers) do 154 | cond:broadcast() 155 | end 156 | end 157 | 158 | function events.subscribe() 159 | local cond = fiber.cond() 160 | _subscribers[cond] = true 161 | return cond 162 | end 163 | 164 | function events.unsubscribe(cond) 165 | _subscribers[cond] = nil 166 | return nil 167 | end 168 | 169 | return events 170 | -------------------------------------------------------------------------------- /membership/members.lua: -------------------------------------------------------------------------------- 1 | local fiber = require('fiber') 2 | local checks = require('checks') 3 | local msgpack = require('msgpack') 4 | 5 | local opts = require('membership.options') 6 | local stash = require('membership.stash') 7 | 8 | local members = {} 9 | local _all_members = table.copy(stash.get('members._all_members')) or { 10 | -- [uri] = { 11 | -- status = number, 12 | -- incarnation = number, 13 | -- timestamp = time64, 14 | -- payload = ?table, 15 | -- clock_delta = ?number 16 | -- } 17 | 18 | -- uri is a string in format ':' 19 | } 20 | 21 | local _allowed_uri_set = stash.get('_allowed_uri_set') 22 | 23 | function members.after_reload() 24 | stash.set('members._all_members', _all_members) 25 | end 26 | 27 | function members.clear() 28 | table.clear(_all_members) 29 | end 30 | 31 | function members.pairs() 32 | return pairs(_all_members) 33 | end 34 | 35 | function members.get(uri) 36 | return _all_members[uri] 37 | end 38 | 39 | function members.estimate_msgpacked_size(uri, member) 40 | local sum = 0 41 | sum = sum + #msgpack.encode(uri) 42 | sum = sum + #msgpack.encode(member.status) 43 | sum = sum + #msgpack.encode(member.incarnation) 44 | sum = sum + #msgpack.encode(member.payload or msgpack.NULL) 45 | return sum + 1 46 | end 47 | 48 | function members.pack(uri, member) 49 | checks('string', 'table') 50 | return { 51 | uri, 52 | member.status, 53 | member.incarnation, 54 | member.payload or msgpack.NULL, 55 | } 56 | end 57 | 58 | function members.unpack(member) 59 | checks('table') 60 | local payload = member[4] 61 | if payload == msgpack.NULL 62 | or type(payload) ~= 'table' 63 | then 64 | payload = nil 65 | end 66 | return member[1], { 67 | status = tonumber(member[2]), 68 | incarnation = tonumber(member[3]), 69 | payload = payload, 70 | } 71 | end 72 | 73 | function members.filter_excluding(state, uri1, uri2) 74 | assert(state == nil or state == 'left' or state == 'unhealthy') 75 | local ret = {} 76 | for uri, member in pairs(_all_members) do 77 | if (uri ~= uri1) and (uri ~= uri2) 78 | and ( 79 | (state == nil) 80 | or (state == 'unhealthy' and member.status == opts.ALIVE) 81 | or (state == 'left' and member.status ~= opts.LEFT) 82 | ) then 83 | table.insert(ret, uri) 84 | end 85 | end 86 | return ret 87 | end 88 | 89 | function members.set(uri, status, incarnation, params) 90 | checks('string', 'number', 'number', { payload = '?table', clock_delta = '?number' }) 91 | 92 | local member = _all_members[uri] 93 | 94 | if next(_allowed_uri_set) and not _allowed_uri_set[uri] 95 | and (status == opts.SUSPECT or status == opts.LEFT or status == opts.DEAD) then 96 | opts.log_debug('Ignoring member %s with status %s', uri, opts.STATUS_NAMES[status]) 97 | -- removes instance if it is not in the allowed list and dead 98 | members.remove(uri) 99 | return 100 | end 101 | 102 | if member and incarnation < member.incarnation then 103 | error('Can not downgrade incarnation') 104 | end 105 | 106 | local payload 107 | if params ~= nil and params.payload ~= nil then 108 | payload = params.payload 109 | elseif member ~= nil then 110 | payload = member.payload 111 | end 112 | 113 | local clock_delta 114 | if params ~= nil and params.clock_delta ~= nil then 115 | clock_delta = params.clock_delta 116 | elseif member ~= nil then 117 | clock_delta = member.clock_delta 118 | end 119 | 120 | _all_members[uri] = { 121 | status = status, 122 | incarnation = incarnation, 123 | payload = payload, 124 | timestamp = fiber.time64(), 125 | clock_delta = clock_delta 126 | } 127 | end 128 | 129 | function members.count() 130 | local count = 0 131 | for _ in pairs(_all_members) do 132 | count = count + 1 133 | end 134 | return count 135 | end 136 | 137 | function members.remove(uri) 138 | checks('string') 139 | 140 | _all_members[uri] = nil 141 | local stash = rawget(_G, '__membership_stash') 142 | stash['members._all_members'][uri] = nil 143 | stash['_resolve_cache'][uri] = nil 144 | end 145 | 146 | return members 147 | -------------------------------------------------------------------------------- /membership/network.lua: -------------------------------------------------------------------------------- 1 | local ffi = require('ffi') 2 | local bit = require('bit') 3 | 4 | local stash = require('membership.stash') 5 | if not stash.get('network.cdef_getifaddrs') then 6 | ffi.cdef([[ 7 | struct ifaddrs { 8 | struct ifaddrs *ifa_next; /* Next item in list */ 9 | char *ifa_name; /* Name of interface */ 10 | unsigned int ifa_flags; /* Flags from SIOCGIFFLAGS */ 11 | struct sockaddr *ifa_addr; /* Address of interface */ 12 | struct sockaddr *ifa_netmask; /* Netmask of interface */ 13 | union { 14 | struct sockaddr *ifu_broadaddr; /* Broadcast address of interface */ 15 | struct sockaddr *ifu_dstaddr; /* Point-to-point destination address */ 16 | } ifa_ifu; 17 | void *ifa_data; /* Address-specific data */ 18 | }; 19 | 20 | struct in_addr { 21 | uint32_t s_addr; 22 | }; 23 | 24 | enum { 25 | IFF_UP = 0x1, /* interface is up */ 26 | IFF_BROADCAST = 0x2, /* broadcast address valid */ 27 | IFF_POINTOPOINT = 0x10 /* interface is has p-p link */ 28 | }; 29 | 30 | enum { 31 | AF_INET = 2 /* Internet IP Protocol */ 32 | }; 33 | 34 | const char *strerror(int errno); 35 | int getifaddrs(struct ifaddrs **ifap); 36 | void freeifaddrs(struct ifaddrs *ifa); 37 | const char *inet_ntop(int af, const void *src, 38 | char *dst, socklen_t size); 39 | ]]) 40 | 41 | if ffi.os == "Linux" then 42 | ffi.cdef([[ 43 | struct sockaddr { 44 | uint16_t sa_family; /* address family, AF_xxx */ 45 | char sa_data[14]; /* 14 bytes of protocol address */ 46 | }; 47 | 48 | /* Structure describing an Internet (IP) socket address. */ 49 | struct sockaddr_in { 50 | uint16_t sin_family; /* Address family */ 51 | uint16_t sin_port; /* Port number */ 52 | struct in_addr sin_addr; /* Internet address */ 53 | }; 54 | ]]) 55 | elseif ffi.os == "OSX" then 56 | ffi.cdef([[ 57 | struct sockaddr { 58 | uint8_t sa_len; 59 | uint8_t sa_family; /* address family, AF_xxx */ 60 | char sa_data[14]; /* 14 bytes of protocol address */ 61 | }; 62 | 63 | /* Structure describing an Internet (IP) socket address. */ 64 | struct sockaddr_in { 65 | uint8_t sin_len; 66 | uint8_t sin_family; /* Address family */ 67 | uint16_t sin_port; /* Port number */ 68 | struct in_addr sin_addr; /* Internet address */ 69 | }; 70 | ]]) 71 | end 72 | 73 | stash.set('network.cdef_getifaddrs', true) 74 | end 75 | 76 | 77 | --- List active AF_INET interfaces. 78 | -- Compose a table of the following structure: 79 | -- { 80 | -- [1] = { 81 | -- name = ifa_name, 82 | -- inet4 = "0.0.0.0", 83 | -- bcast = "0.0.0.0", -- if broadcast flag is set 84 | -- }, 85 | -- } 86 | local function getifaddrs() 87 | local ifaddrs_root = ffi.new("struct ifaddrs *[1]") 88 | local res = ffi.C.getifaddrs(ifaddrs_root) 89 | if res ~= 0 then 90 | local errno = ffi.errno() 91 | local strerr = ffi.C.strerror(errno) 92 | error(ffi.string(strerr)) 93 | end 94 | 95 | local ret = {} 96 | local buf = ffi.new("char[32]") 97 | local iap = ifaddrs_root[0] 98 | while iap ~= nil do 99 | if bit.band(iap.ifa_flags, ffi.C.IFF_UP) ~= 0 then 100 | local ifa = {} 101 | ifa.name = ffi.string(iap.ifa_name) 102 | 103 | if iap.ifa_addr ~= nil and iap.ifa_addr.sa_family == ffi.C.AF_INET then 104 | local sa = ffi.cast("struct sockaddr_in *", iap.ifa_addr) 105 | ffi.C.inet_ntop(sa.sin_family, sa.sin_addr, buf, ffi.sizeof(buf)) 106 | ifa.inet4 = ffi.string(buf) 107 | 108 | if bit.band(iap.ifa_flags, ffi.C.IFF_BROADCAST) ~= 0 then 109 | local sa = ffi.cast("struct sockaddr_in *", iap.ifa_ifu.ifu_broadaddr) 110 | ffi.C.inet_ntop(sa.sin_family, sa.sin_addr, buf, ffi.sizeof(buf)) 111 | ifa.bcast = ffi.string(buf) 112 | end 113 | 114 | table.insert(ret, ifa) 115 | end 116 | end 117 | iap = iap.ifa_next 118 | end 119 | 120 | ffi.C.freeifaddrs(ifaddrs_root[0]) 121 | return ret 122 | end 123 | 124 | return { 125 | getifaddrs = getifaddrs, 126 | } 127 | -------------------------------------------------------------------------------- /membership/options.lua: -------------------------------------------------------------------------------- 1 | --- Tuning options for membership module. 2 | -- This module should normally never be used 3 | -- 4 | -- @submodule membership 5 | 6 | local log = require('log') 7 | local cbc = require('crypto').cipher.aes256.cbc 8 | 9 | local stash = require('membership.stash') 10 | 11 | local options = stash.get('options') 12 | if options == nil then 13 | options = {} 14 | else 15 | options = setmetatable(table.copy(options), nil) 16 | end 17 | 18 | function options.after_reload() 19 | stash.set('options', options) 20 | end 21 | 22 | options.STATUS_NAMES = {'alive', 'suspect', 'dead', 'non-decryptable', 'left'} 23 | options.ALIVE = 1 24 | options.SUSPECT = 2 25 | options.DEAD = 3 26 | options.NONDECRYPTABLE = 4 27 | options.LEFT = 5 28 | 29 | --- Period of sending direct PINGs. 30 | -- Denoted as `T'` in [SWIM paper](swim-paper.pdf). 31 | -- 32 | -- Default is 1 33 | options.PROTOCOL_PERIOD_SECONDS = 1.0 34 | 35 | --- Time to wait for ACK message after PING. 36 | -- If a member does not reply within this time, 37 | -- the indirect ping algorithm is invoked. 38 | -- 39 | -- Default is 0.2 40 | options.ACK_TIMEOUT_SECONDS = 0.200 41 | 42 | --- Period to perform anti-entropy sync. 43 | -- Algorithm is described in [SWIM paper](swim-paper.pdf). 44 | -- 45 | -- Default is 10 46 | options.ANTI_ENTROPY_PERIOD_SECONDS = 10.0 47 | 48 | --- Toggle producing `suspect` rumors when ping fails. Even if disabled, 49 | -- it doesn't affect neither gossip dissemination nor other statuses 50 | -- generation (e.g. `dead` and `non-decryptable`). 51 | -- 52 | -- Default is `true` 53 | options.SUSPICIOUSNESS = true 54 | 55 | --- Timeout to mark `suspect` members as `dead`. 56 | -- 57 | -- Default is 3 58 | options.SUSPECT_TIMEOUT_SECONDS = 3 59 | 60 | --- Number of members to try indirectly pinging a `suspect`. 61 | -- Denoted as `k` in [SWIM paper](swim-paper.pdf). 62 | -- 63 | -- Default is 3 64 | options.NUM_FAILURE_DETECTION_SUBGROUPS = 3 65 | 66 | --- Maximum size of UPD packets to send. 67 | -- 68 | -- Default is 1472 (`Default-MTU (1500) - IP-Header (20) - UDP-Header (8)`) 69 | options.MAX_PACKET_SIZE = 1472 70 | 71 | --- Initialization vector for aes256 CBC encryption. 72 | options.ENCRYPTION_INIT = 'init-key-16-byte' 73 | 74 | 75 | options.log_debug = log.debug 76 | 77 | function options.get_encryption_key() 78 | return options.encryption_key 79 | end 80 | 81 | function options.set_encryption_key(key) 82 | if key == nil then 83 | rawset(options, 'encryption_key', nil) 84 | log.info('Membership encryption disabled') 85 | else 86 | if key:len() < 32 then 87 | rawset(options, 'encryption_key', key:rjust(32)) 88 | else 89 | rawset(options, 'encryption_key', key:sub(1, 32)) 90 | end 91 | log.info('Membership encryption enabled') 92 | end 93 | end 94 | 95 | function options.encrypted_size(len) 96 | if not options.encryption_key then 97 | return len 98 | else 99 | return math.ceil((len+1)/16)*16 100 | end 101 | end 102 | 103 | function options.encrypt(msg) 104 | if not options.encryption_key then 105 | return msg, nil 106 | else 107 | return cbc.encrypt( 108 | msg, 109 | options.encryption_key, 110 | options.ENCRYPTION_INIT 111 | ) 112 | end 113 | end 114 | 115 | function options.decrypt(msg) 116 | if not options.encryption_key then 117 | return msg, nil 118 | else 119 | return cbc.decrypt( 120 | msg, 121 | options.encryption_key, 122 | options.ENCRYPTION_INIT 123 | ) 124 | end 125 | end 126 | 127 | setmetatable(options, { 128 | __newindex = function(_, idx, val) 129 | print(idx, val) 130 | error("options table is readonly") 131 | end 132 | }) 133 | 134 | return options 135 | -------------------------------------------------------------------------------- /membership/stash.lua: -------------------------------------------------------------------------------- 1 | local S = rawget(_G, '__membership_stash') or {} 2 | 3 | S['_allowed_uri_set'] = S['_allowed_uri_set'] or {} 4 | 5 | local log = require('log') 6 | 7 | local function f_body(fn_name, ...) 8 | local fiber = require('fiber') 9 | while true do 10 | S[fn_name](...) 11 | fiber.testcancel() 12 | end 13 | end 14 | 15 | assert( 16 | debug.getinfo(f_body, 'u').nups == 1, 17 | 'Exceess closure upvalue' 18 | ) 19 | 20 | local function fiber_new(fn_name, ...) 21 | if not S[fn_name] then 22 | error(('function %s not implemented'):format(fn_name), 2) 23 | end 24 | 25 | local k = 'fiber.' .. fn_name 26 | S[k] = require('fiber').new(f_body, fn_name, ...) 27 | return S[k] 28 | end 29 | 30 | local function fiber_cancel(fn_name) 31 | local k = 'fiber.' .. fn_name 32 | if S[k] ~= nil and S[k]:status() ~= 'dead' then 33 | local ok, err = pcall(S[k].cancel, S[k]) 34 | if not ok then 35 | log.error('Fiber %s cancel error: %s', fn_name, err) 36 | end 37 | S[k] = nil 38 | end 39 | end 40 | 41 | rawset(_G, '__membership_stash', S) 42 | 43 | return { 44 | get = function(k) return S[k] end, 45 | set = function(k, v) S[k] = v end, 46 | fiber_new = fiber_new, 47 | fiber_cancel = fiber_cancel, 48 | } 49 | -------------------------------------------------------------------------------- /test/helpers/cluster.lua: -------------------------------------------------------------------------------- 1 | local fio = require('fio') 2 | local log = require('log') 3 | local socket = require('socket') 4 | local Server = require('test.helpers.server') 5 | local cluster = {} 6 | 7 | function cluster.start(hostname, ports) 8 | local datadir = fio.pathjoin(fio.cwd(), 'test_cluster_data') 9 | if fio.path.exists(datadir) then 10 | fio.rmtree(datadir) 11 | end 12 | fio.mkdir(datadir) 13 | 14 | if cluster.servers ~= nil then 15 | log.warn("Cluster is already running") 16 | return 17 | end 18 | 19 | if type(ports) ~= 'table' or #ports == 0 then 20 | error("Ports for cluster servers are not specified") 21 | end 22 | 23 | for _, port in ipairs(ports) do 24 | local sock = socket.tcp() 25 | local is_busy = sock:connect(hostname, port) 26 | sock:close() 27 | if is_busy then 28 | error("Port " .. port .. " is already in use!") 29 | end 30 | end 31 | 32 | log.info("Starting a cluster with ports: " .. table.concat(ports, ", ")) 33 | 34 | cluster.servers = {} 35 | 36 | local instance_path = fio.pathjoin(fio.cwd(), "test", "helpers", 'instance.lua') 37 | 38 | for i, port in ipairs(ports) do 39 | local alias = 'server-' .. i 40 | local workdir = fio.pathjoin(datadir, 'server-' .. i) 41 | 42 | fio.mkdir(workdir) 43 | fio.mkdir(fio.pathjoin(workdir, 'wal')) 44 | fio.mkdir(fio.pathjoin(workdir, 'vinyl')) 45 | 46 | local server_config = { 47 | alias = alias, 48 | command = instance_path, 49 | workdir = workdir, 50 | args = { 51 | '--wal-dir', fio.pathjoin(workdir, 'wal'), 52 | '--vinyl-dir', fio.pathjoin(workdir, 'vinyl') 53 | }, 54 | advertise_port = tonumber(port), 55 | env = { 56 | TARANTOOL_LISTEN = tostring(port), 57 | TARANTOOL_HOSTNAME = hostname, 58 | }, 59 | 60 | net_box_credentials = { 61 | user = 'guest', 62 | password = "", 63 | }, 64 | cluster_cookie = "" 65 | 66 | } 67 | 68 | local server = Server:new(server_config) 69 | table.insert(cluster.servers, server) 70 | 71 | server:start() 72 | 73 | log.info("Server " .. alias .. " is running on port " .. port) 74 | end 75 | 76 | for _, server in ipairs(cluster.servers) do 77 | server:wait_until_ready({ timeout = 10 }) 78 | end 79 | 80 | log.info("The cluster was successfully started, the number of servers: " .. #cluster.servers) 81 | return true 82 | end 83 | 84 | function cluster.stop() 85 | if cluster.servers == nil then 86 | log.warn("The cluster was not started") 87 | return 88 | end 89 | 90 | log.info("Stopping the cluster...") 91 | 92 | for _, server in ipairs(cluster.servers) do 93 | server:stop() 94 | log.info("The server " .. server.alias .. " is stopped") 95 | end 96 | 97 | cluster.servers = nil 98 | 99 | log.info("Cluster has been successfully stopped") 100 | return true 101 | end 102 | 103 | return cluster 104 | -------------------------------------------------------------------------------- /test/helpers/instance.lua: -------------------------------------------------------------------------------- 1 | require('strict').on() 2 | local log = require('log') 3 | local fiber = require('fiber') 4 | 5 | local checks = require('checks') 6 | package.loaded['checks'] = function(...) 7 | if rawget(_G, "checks_disabled") == true then 8 | return 9 | end 10 | return checks(...) 11 | end 12 | 13 | local membership = require('membership') 14 | _G.membership = membership 15 | 16 | if rawget(_G, "is_initialized") == nil then 17 | _G.is_initialized = false 18 | end 19 | 20 | local listen = os.getenv('TARANTOOL_LISTEN') or '13301' 21 | print("Starting Tarantool instance on port:", listen) 22 | local wal_dir = arg[2] or './wal' 23 | local vinyl_dir = arg[4] or './vinyl' 24 | 25 | box.cfg({ 26 | listen = listen, 27 | wal_dir = os.getenv('TARANTOOL_WAL_DIR') or wal_dir, 28 | vinyl_dir = os.getenv('TARANTOOL_VINYL_DIR') or vinyl_dir, 29 | work_dir = os.getenv('TARANTOOL_WORKDIR') or '.' 30 | }) 31 | 32 | print("Starting server on port:", listen) 33 | local hostname = os.getenv('TARANTOOL_HOSTNAME') or 'localhost' 34 | 35 | box.schema.user.grant('guest', 'execute', 'universe', nil, { if_not_exists = true }) 36 | 37 | -- Tune periods to speed up tests 38 | -- Supposing loopback roundtrip is about 0.1ms 39 | local opts = require('membership.options') 40 | opts.PROTOCOL_PERIOD_SECONDS = 0.2 41 | opts.ACK_TIMEOUT_SECONDS = 0.1 42 | opts.ANTI_ENTROPY_PERIOD_SECONDS = 2 43 | opts.SUSPECT_TIMEOUT_SECONDS = 2 44 | 45 | if not _G.is_initialized then 46 | -- Monkeypatch socket library to validate MAX_PACKET_SIZE 47 | local socket_lib = require('socket') 48 | 49 | local socket_mt = getmetatable(socket_lib) 50 | local create_socket = socket_mt.__call 51 | socket_mt.__call = function(...) 52 | log.error('Monkeypatching socket') 53 | local sock = create_socket(...) 54 | local sendto = sock.sendto 55 | function sock.sendto(self, host, port, msg) 56 | if #msg > opts.MAX_PACKET_SIZE then 57 | log.error('Packet too big, %d > %d', #msg, opts.MAX_PACKET_SIZE) 58 | os.exit(220) 59 | end 60 | return sendto(self, host, port, msg) 61 | end 62 | 63 | return sock 64 | end 65 | end 66 | 67 | membership.init(hostname, tonumber(listen)) 68 | _G.is_initialized = true 69 | 70 | _G.package.reload = function() 71 | local csw1 = fiber.info()[fiber.id()].csw 72 | 73 | package.loaded['membership'] = nil 74 | log.info('Doing file %s...', arg[0]) 75 | dofile(arg[0]) 76 | 77 | local csw2 = fiber.info()[fiber.id()].csw 78 | assert(csw1 == csw2, 'Unexpected yield') 79 | 80 | log.info('Dofile succeeded') 81 | return true 82 | end 83 | -------------------------------------------------------------------------------- /test/helpers/server.lua: -------------------------------------------------------------------------------- 1 | --- Extended luatest.Server class to run a cartridge instance. 2 | -- 3 | -- @classmod cartridge.test-helpers.server 4 | 5 | local fun = require('fun') 6 | local log = require('log') 7 | local fio = require('fio') 8 | local luatest = require('luatest') 9 | local yaml = require('yaml') 10 | local checks = require('checks') 11 | 12 | --- Build server object. 13 | -- @function new 14 | -- @param object 15 | -- @string object.command Command to start server process. 16 | -- @string object.workdir Value to be passed in `TARANTOOL_WORKDIR`. 17 | -- @bool[opt] object.chdir Path to cwd before starting a process. 18 | -- @tab[opt] object.env Table to pass as env variables to process. 19 | -- @tab[opt] object.args Args to run command with. 20 | -- @int[opt] object.http_port Value to be passed in `TARANTOOL_HTTP_PORT` and used to perform HTTP requests. 21 | -- @int object.advertise_port Value to generate `TARANTOOL_ADVERTISE_URI` and used for net_box connection. 22 | -- @int[opt] object.net_box_port Alias for `advertise_port`. 23 | -- @tab[opt] object.net_box_credentials Override default net_box credentials. 24 | -- @string object.alias Instance alias. 25 | -- @string object.cluster_cookie Value to be passed in `TARANTOOL_CLUSTER_COOKIE` and used as default net_box password. 26 | -- @string[opt] object.instance_uuid Server identifier. 27 | -- @string[opt] object.replicaset_uuid Replicaset identifier. 28 | -- @string[opt] object.zone Vshard zone. 29 | -- @number[opt] object.swim_period SWIM protocol period in seconds. 30 | -- @return input object 31 | local Server = luatest.Server:inherit({}) 32 | 33 | Server.constructor_checks = fun.chain(Server.constructor_checks, { 34 | alias = 'string', 35 | cluster_cookie = 'string', 36 | 37 | advertise_port = 'number', 38 | advertise_uri = '?string', 39 | 40 | instance_uuid = '?string', 41 | replicaset_uuid = '?string', 42 | labels = '?table', 43 | zone = '?string', 44 | swim_period = '?number', 45 | 46 | transport = '?string', 47 | ssl_ciphers = '?string', 48 | ssl_server_ca_file = '?string', 49 | ssl_server_cert_file = '?string', 50 | ssl_server_key_file = '?string', 51 | ssl_server_password = '?string', 52 | ssl_client_ca_file = '?string', 53 | ssl_client_cert_file = '?string', 54 | ssl_client_key_file = '?string', 55 | ssl_client_password = '?string', 56 | }):tomap() 57 | 58 | function Server:initialize() 59 | self.net_box_port = self.net_box_port or self.advertise_port 60 | self.net_box_uri = 'localhost:' .. self.net_box_port 61 | self.advertise_uri = self.advertise_uri or self.net_box_uri 62 | self.net_box_credentials = self.net_box_credentials or { 63 | user = 'admin', 64 | password = self.cluster_cookie, 65 | } 66 | 67 | if self.instance_uuid == nil then 68 | self.instance_uuid = require('uuid').str() 69 | end 70 | getmetatable(getmetatable(self)).initialize(self) 71 | end 72 | 73 | --- Generates environment to run process with. 74 | -- The result is merged into os.environ(). 75 | -- @return map 76 | function Server:build_env() 77 | return { 78 | TARANTOOL_ALIAS = self.alias, 79 | TARANTOOL_WORKDIR = self.workdir, 80 | TARANTOOL_HTTP_PORT = self.http_port, 81 | TARANTOOL_ADVERTISE_URI = self.advertise_uri, 82 | TARANTOOL_CLUSTER_COOKIE = self.cluster_cookie, 83 | -- speedup tests by amplifying membership message exchange 84 | TARANTOOL_SWIM_PROTOCOL_PERIOD_SECONDS = self.swim_period or 0.2, 85 | 86 | TARANTOOL_TRANSPORT = self.transport, 87 | TARANTOOL_SSL_CIPHERS = self.ssl_ciphers, 88 | TARANTOOL_SSL_SERVER_CA_FILE = self.ssl_server_ca_file, 89 | TARANTOOL_SSL_SERVER_CERT_FILE = self.ssl_server_cert_file, 90 | TARANTOOL_SSL_SERVER_KEY_FILE = self.ssl_server_key_file, 91 | TARANTOOL_SSL_SERVER_PASSWORD = self.ssl_server_password, 92 | TARANTOOL_SSL_CLIENT_CA_FILE = self.ssl_client_ca_file, 93 | TARANTOOL_SSL_CLIENT_CERT_FILE = self.ssl_client_cert_file, 94 | TARANTOOL_SSL_CLIENT_KEY_FILE = self.ssl_client_key_file, 95 | TARANTOOL_SSL_CLIENT_PASSWORD = self.ssl_client_password, 96 | } 97 | end 98 | 99 | local function reconnect(connection_old) 100 | local server = connection_old._server 101 | log.debug( 102 | 'Netbox %s (%s): connection lost', 103 | server.alias, server.advertise_uri 104 | ) 105 | local fiber = require('fiber') 106 | fiber.new(function() 107 | if type(server.net_box_uri) == 'string' then 108 | fiber.name(string.format('reconnect/%s', server.net_box_uri)) 109 | elseif type(server.net_box_uri) == 'table' then 110 | fiber.name(string.format('reconnect/%s', server.net_box_uri.uri)) 111 | end 112 | local uri = server.net_box_uri 113 | 114 | local connection_new = require('net.box').connect( 115 | uri, server.net_box_credentials 116 | ) 117 | 118 | if server.net_box ~= connection_old then 119 | -- Someone has already assigned `self.net_box` 120 | -- while this fiber was trying to establish a new one. 121 | -- Don't interfere in this case. 122 | return 123 | end 124 | 125 | if connection_new.error then 126 | log.debug( 127 | 'Netbox %s (%s) reconnect failed: %s', 128 | server.alias, server.advertise_uri, connection_new.error 129 | ) 130 | return 131 | else 132 | log.debug( 133 | 'Netbox %s (%s) reconnected', 134 | server.alias, server.advertise_uri 135 | ) 136 | end 137 | 138 | connection_new:on_disconnect(reconnect) 139 | server.net_box = connection_new 140 | server.net_box._server = server 141 | end) 142 | end 143 | 144 | function Server:connect_net_box() 145 | local transport = self.transport 146 | if transport ~= nil and type(transport) == 'string' then 147 | transport = transport:lower() 148 | end 149 | if transport == 'ssl' then 150 | if type(self.net_box_uri) == 'string' then 151 | self.net_box_uri = { 152 | uri = self.net_box_uri, 153 | params = { 154 | transport = transport, 155 | ssl_ciphers = self.ssl_ciphers, 156 | ssl_cert_file = self.ssl_client_cert_file, 157 | ssl_key_file = self.ssl_client_key_file, 158 | ssl_password = self.ssl_client_password, 159 | ssl_ca_file = self.ssl_client_ca_file, 160 | } 161 | } 162 | end 163 | end 164 | 165 | getmetatable(getmetatable(self)).connect_net_box(self) 166 | self.net_box._server = self 167 | self.net_box:on_disconnect(reconnect) 168 | return self.net_box 169 | end 170 | 171 | --- Start the server. 172 | function Server:start() 173 | getmetatable(getmetatable(self)).start(self) 174 | luatest.helpers.retrying({}, function() 175 | self:connect_net_box() 176 | end) 177 | end 178 | 179 | --- Stop server process. 180 | function Server:stop() 181 | local process = self.process 182 | if process == nil then 183 | return 184 | end 185 | if self.net_box then 186 | -- Don't try to reconnect anymore 187 | self.net_box:on_disconnect(nil, reconnect) 188 | end 189 | getmetatable(getmetatable(self)).stop(self) 190 | luatest.helpers.retrying({}, function() 191 | luatest.assert_not( 192 | process:is_alive(), 193 | string.format('Process %s is still running', self.alias) 194 | ) 195 | end) 196 | log.warn('Process %s killed', self.alias) 197 | end 198 | 199 | --- Perform GraphQL request. 200 | -- @tparam table request 201 | -- @tparam string request.query 202 | -- grapqhl query 203 | -- @tparam ?table request.variables 204 | -- variables for graphql query 205 | -- @tparam ?boolean request.raise 206 | -- raise if response contains an error 207 | -- (default: **true**) 208 | -- @tparam[opt] table http_options 209 | -- passed to `http_request` options. 210 | -- @treturn table parsed response JSON. 211 | -- @raise 212 | -- * HTTPRequest error 213 | -- * GraphQL error 214 | function Server:graphql(request, http_options) 215 | checks('table', { 216 | query = 'string', 217 | variables = '?table', 218 | raise = '?boolean' 219 | }, '?table') 220 | 221 | log.debug('GraphQL request to %s (%s)', self.alias, self.advertise_uri) 222 | log.debug('Query: %s', request.query) 223 | if request.variables ~= nil then 224 | log.debug('Variables:\n%s', yaml.encode(request.variables)) 225 | end 226 | 227 | if request.raise == nil then 228 | request.raise = true 229 | end 230 | 231 | http_options = table.copy(http_options) or {} 232 | http_options.json = { 233 | query = request.query, 234 | variables = request.variables, 235 | } 236 | 237 | local webui_prefix = self.env and self.env.TARANTOOL_WEBUI_PREFIX or '' 238 | local api_endpoint = fio.pathjoin('/', webui_prefix, 'admin/api') 239 | local response = self:http_request('post', api_endpoint, http_options) 240 | 241 | local errors = response.json and response.json.errors 242 | if errors and request.raise then 243 | error(errors[1].message, 2) 244 | end 245 | return response.json 246 | end 247 | 248 | --- Advertise this server to the cluster. 249 | -- @param main_server Server to perform GraphQL request on. 250 | -- @param[opt] options 251 | -- @param options.timeout request timeout 252 | 253 | function Server:wait_until_ready() 254 | local timeout = 60 -- Таймаут ожидания 255 | local start_time = os.time() 256 | while os.time() - start_time < timeout do 257 | if self:is_ready() then 258 | print("Server " .. self.alias .. " is ready") 259 | return true 260 | end 261 | require('fiber').sleep(0.1) 262 | end 263 | error("Timed out waiting for server " .. self.alias .. " to become ready") 264 | end 265 | 266 | function Server:is_ready() 267 | local net_box = require('net.box') 268 | local conn = net_box.connect(self.advertise_port, { user = 'guest', password = '' }) 269 | if conn:is_connected() then 270 | conn:close() 271 | return true 272 | else 273 | return false 274 | end 275 | end 276 | 277 | function Server:join_cluster(main_server, options) 278 | log.debug('Adding ' .. self.advertise_uri .. '(' .. self.alias .. '):') 279 | return main_server:graphql({ 280 | query = [[ 281 | mutation( 282 | $uri: String!, 283 | $instance_uuid: String, 284 | $replicaset_uuid: String, 285 | $timeout: Float 286 | $labels: [LabelInput] 287 | ) { 288 | join_server( 289 | uri: $uri, 290 | instance_uuid: $instance_uuid, 291 | replicaset_uuid: $replicaset_uuid, 292 | timeout: $timeout 293 | labels: $labels 294 | ) 295 | } 296 | ]], 297 | variables = { 298 | uri = self.advertise_uri, 299 | instance_uuid = self.instance_uuid, 300 | replicaset_uuid = self.replicaset_uuid, 301 | timeout = options and options.timeout, 302 | labels = self.labels, 303 | } 304 | }) 305 | end 306 | 307 | --- Update server's replicaset config. 308 | -- @param config 309 | -- @param config.uuid replicaset uuid 310 | -- @param config.roles list of roles 311 | -- @param config.master 312 | -- @param config.weight 313 | function Server:setup_replicaset(config) 314 | self:graphql({ 315 | query = [[ 316 | mutation( 317 | $uuid: String!, 318 | $alias: String, 319 | $roles: [String!], 320 | $master: [String!], 321 | $weight: Float, 322 | $vshard_group: String 323 | ) { 324 | edit_replicaset( 325 | uuid: $uuid, 326 | alias: $alias, 327 | roles: $roles, 328 | master: $master, 329 | weight: $weight, 330 | vshard_group: $vshard_group 331 | ) 332 | } 333 | ]], 334 | variables = { 335 | uuid = config.uuid, 336 | alias = config.alias, 337 | roles = config.roles, 338 | master = config.master, 339 | weight = config.weight, 340 | vshard_group = config.vshard_group, 341 | } 342 | }) 343 | end 344 | 345 | --- Upload application config. 346 | -- @tparam string|table config - table will be encoded as yaml and posted to /admin/config. 347 | -- @param table opts - http request options 348 | function Server:upload_config(config, opts) 349 | checks('table', 'string|table', 'table|nil') 350 | if type(config) == 'table' then 351 | config = yaml.encode(config) 352 | end 353 | if opts == nil then 354 | opts = {} 355 | end 356 | opts.body = config 357 | return self:http_request('put', '/admin/config', opts) 358 | end 359 | 360 | --- Download application config. 361 | function Server:download_config() 362 | return yaml.decode(self:http_request('get', '/admin/config').body) 363 | end 364 | 365 | function Server:add_member(uri) 366 | return self:exec(function(u) 367 | return membership.add_member(u) 368 | end, { uri }) 369 | end 370 | 371 | function Server:probe_uri(uri) 372 | return self:exec(function(u) 373 | return membership.probe_uri(u) 374 | end, { uri }) 375 | end 376 | 377 | function Server:broadcast(port) 378 | return self:exec(function(p) 379 | return membership.broadcast(p) 380 | end, { port }) 381 | end 382 | 383 | function Server:members() 384 | return self:exec(function() 385 | return membership.members() 386 | end) 387 | end 388 | 389 | function Server:get_member(uri) 390 | return self:exec(function(u) 391 | return membership.get_member(u) 392 | end, { uri }) 393 | end 394 | 395 | function Server:myself() 396 | return self:exec(function() 397 | return membership.myself() 398 | end) 399 | end 400 | 401 | function Server:check_status(uri, status) 402 | local exec_status = self:exec(function(u) 403 | return membership.get_member(u) 404 | end, { uri })['status'] 405 | luatest.assert_equals(exec_status, status) 406 | end 407 | 408 | return Server 409 | -------------------------------------------------------------------------------- /test/integration/allowed_members_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | local fiber = require('fiber') 5 | 6 | local SERVER_LIST = { 13301, 13302, 13303, 13304, 13305 } 7 | --[[ 8 | 13301: myself -> visible 9 | 13302: alive and allowed -> visible 10 | 13303: alive and not allowed -> visible 11 | 13304: dead and allowed -> visible 12 | 13305: dead and not allowed -> removed 13 | ]] 14 | 15 | g.before_all(function() 16 | cluster.start('localhost', SERVER_LIST) 17 | end) 18 | 19 | g.after_all(function() 20 | cluster.stop() 21 | end) 22 | 23 | g.test_smoke = function() 24 | for i = 1, 5 do 25 | t.assert(cluster.servers[1]:exec(function(port) 26 | return membership.probe_uri(string.format('localhost:%d', port)) 27 | end, { SERVER_LIST[i] })) 28 | end 29 | 30 | -- Everyone is allowed 31 | cluster.servers[1]:exec(function() 32 | return membership.set_allowed_members({ 33 | 'localhost:13301', 'localhost:13302', 'localhost:13304', 34 | }) 35 | end) 36 | 37 | -- Wait for the new events 38 | fiber.sleep(2) 39 | 40 | -- Everyone is visible, because everyone is alive 41 | for i = 2, 5 do 42 | t.assert_equals(cluster.servers[1]:get_member( 43 | string.format('localhost:%d', SERVER_LIST[i]) 44 | )['status'], 'alive') 45 | end 46 | 47 | cluster.servers[4]:stop() 48 | cluster.servers[5]:stop() 49 | 50 | t.helpers.retrying( 51 | {}, 52 | cluster.servers[1].check_status, 53 | cluster.servers[1], 'localhost:13304', 'dead' 54 | ) 55 | 56 | t.assert_equals( 57 | cluster.servers[1]:get_member('localhost:13302')['status'], 58 | 'alive' 59 | ) 60 | t.assert_equals( 61 | cluster.servers[1]:get_member('localhost:13303')['status'], 62 | 'alive' 63 | ) 64 | t.assert_equals( 65 | cluster.servers[1]:get_member('localhost:13304')['status'], 66 | 'dead' 67 | ) 68 | t.assert_equals(cluster.servers[1]:get_member('localhost:13305'), nil) 69 | end 70 | -------------------------------------------------------------------------------- /test/integration/broadcast_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | local socket = require("socket") 5 | 6 | local function get_local_ip() 7 | local hostname = nil 8 | 9 | local udp_socket = socket('AF_INET', 'SOCK_DGRAM', 'udp') 10 | local ok, _ = pcall(function() 11 | udp_socket:sysconnect("8.8.8.8", 80) 12 | hostname = udp_socket:name().host 13 | udp_socket:close() 14 | end) 15 | 16 | if not ok then 17 | hostname = 'localhost' 18 | end 19 | 20 | return hostname 21 | end 22 | 23 | local HOSTNAME = get_local_ip() 24 | local SERVER_LIST = { 33001, 33002 } 25 | 26 | g.before_all(function() 27 | cluster.start(HOSTNAME, SERVER_LIST) 28 | end) 29 | 30 | g.after_all(function() 31 | cluster.stop() 32 | end) 33 | 34 | g.test_join = function() 35 | cluster.servers[2]:broadcast(33001) 36 | 37 | t.helpers.retrying( 38 | {}, 39 | cluster.servers[2].check_status, 40 | cluster.servers[2], HOSTNAME .. ':33001', 'alive' 41 | ) 42 | t.helpers.retrying( 43 | {}, 44 | cluster.servers[1].check_status, 45 | cluster.servers[1], HOSTNAME .. ':33002', 'alive' 46 | ) 47 | 48 | t.assert(cluster.servers[1]:probe_uri(HOSTNAME .. ':33002')) 49 | t.assert(cluster.servers[2]:probe_uri(HOSTNAME .. ':33001')) 50 | end 51 | -------------------------------------------------------------------------------- /test/integration/dead_myself_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301 } 6 | 7 | g.before_all(function() 8 | cluster.start('not-available', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | g.test_dead = function() 16 | t.helpers.retrying( 17 | {}, 18 | cluster.servers[1].check_status, 19 | cluster.servers[1], 'not-available:13301', 'dead' 20 | ) 21 | end 22 | -------------------------------------------------------------------------------- /test/integration/dissemination_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | local fiber = require('fiber') 5 | local log = require('log') 6 | 7 | local FIRST_PORT = 13301 8 | local SERVER_COUNT = 100 9 | local SERVER_LIST = {} 10 | for i = 1, SERVER_COUNT do 11 | SERVER_LIST[i] = FIRST_PORT + i - 1 12 | end 13 | 14 | g.before_all(function() 15 | cluster.start('localhost', SERVER_LIST) 16 | end) 17 | 18 | g.after_all(function() 19 | cluster.stop() 20 | end) 21 | 22 | g.test_discover_join = function() 23 | local start = fiber.clock() 24 | for i = 1, SERVER_COUNT do 25 | t.assert(cluster.servers[1]:probe_uri( 26 | string.format('localhost:%s', FIRST_PORT + i - 1))) 27 | end 28 | local duration = fiber.clock() - start 29 | log.info(string.format("Probe all in %.3fs", duration)) 30 | 31 | start = fiber.clock() 32 | t.helpers.retrying({}, function() 33 | for _, server in ipairs(cluster.servers) do 34 | local alive_count = server:exec(function() 35 | local alive_count = 0 36 | for _, m in membership.pairs() do 37 | if m.status == 'alive' then 38 | alive_count = alive_count + 1 39 | end 40 | end 41 | return alive_count 42 | end) 43 | t.assert_equals(alive_count, SERVER_COUNT) 44 | end 45 | end) 46 | duration = fiber.clock() - start 47 | log.info(string.format('Full mesh in %.3fs', duration)) 48 | end 49 | 50 | g.test_discover_kill = function() 51 | cluster.servers[1]:stop() 52 | 53 | t.helpers.retrying({}, function() 54 | -- Check that all members consider URI has given STATUS 55 | 56 | local uri = string.format('localhost:%s', FIRST_PORT) 57 | for i = 2, SERVER_COUNT do 58 | local member = cluster.servers[i]:get_member(uri) 59 | t.assert_not_equals(member, nil) 60 | t.assert_not_equals(member['status'], 'alive') 61 | end 62 | end) 63 | end 64 | -------------------------------------------------------------------------------- /test/integration/encryption_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301, 13302 } 6 | 7 | g.before_all(function() 8 | cluster.start('localhost', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | g.test_join = function() 16 | t.assert(cluster.servers[1]:add_member('localhost:13302')) 17 | t.assert_equals(cluster.servers[1]:exec(function() 18 | return membership.get_encryption_key() 19 | end), nil) 20 | 21 | t.helpers.retrying( 22 | {}, 23 | cluster.servers[2].check_status, 24 | cluster.servers[2], 'localhost:13301', 'alive' 25 | ) 26 | t.helpers.retrying( 27 | {}, 28 | cluster.servers[1].check_status, 29 | cluster.servers[1], 'localhost:13302', 'alive' 30 | ) 31 | end 32 | 33 | g.test_enable_encryption = function() 34 | cluster.servers[2]:exec(function() 35 | return membership.set_encryption_key("XXXXXX") 36 | end) 37 | t.assert_equals( 38 | cluster.servers[2]:exec(function() 39 | return membership.get_encryption_key() 40 | end), 41 | string.rjust("XXXXXX", 32) 42 | ) 43 | t.helpers.retrying( 44 | {}, 45 | cluster.servers[1].check_status, 46 | cluster.servers[1], 'localhost:13302', 'non-decryptable' 47 | ) 48 | t.helpers.retrying( 49 | {}, 50 | cluster.servers[2].check_status, 51 | cluster.servers[2], 'localhost:13301', 'non-decryptable' 52 | ) 53 | 54 | cluster.servers[1]:exec(function() 55 | return membership.set_encryption_key("XXXXXX") 56 | end) 57 | t.assert_equals( 58 | cluster.servers[1]:exec(function() 59 | return membership.get_encryption_key() 60 | end), 61 | string.rjust("XXXXXX", 32) 62 | ) 63 | t.helpers.retrying( 64 | {}, 65 | cluster.servers[1].check_status, 66 | cluster.servers[1], 'localhost:13302', 'alive' 67 | ) 68 | t.helpers.retrying( 69 | {}, 70 | cluster.servers[2].check_status, 71 | cluster.servers[2], 'localhost:13301', 'alive' 72 | ) 73 | 74 | cluster.servers[2]:exec(function() 75 | return membership.leave() 76 | end) 77 | cluster.servers[1]:check_status('localhost:13302', 'left') 78 | 79 | cluster.servers[2]:exec(function() 80 | assert(membership.init("localhost", 13302)) 81 | assert(membership.probe_uri("localhost:13301")) 82 | end) 83 | t.helpers.retrying( 84 | {}, 85 | cluster.servers[1].check_status, 86 | cluster.servers[1], 'localhost:13302', 'alive' 87 | ) 88 | end 89 | 90 | g.test_change_encryption = function() 91 | cluster.servers[1]:exec(function() 92 | return membership.set_encryption_key("YY") 93 | end) 94 | t.assert_equals( 95 | cluster.servers[1]:exec(function() 96 | return membership.get_encryption_key() 97 | end), 98 | string.rjust("YY", 32) 99 | ) 100 | t.helpers.retrying( 101 | {}, 102 | cluster.servers[1].check_status, 103 | cluster.servers[1], 'localhost:13302', 'non-decryptable' 104 | ) 105 | t.helpers.retrying( 106 | {}, 107 | cluster.servers[2].check_status, 108 | cluster.servers[2], 'localhost:13301', 'non-decryptable' 109 | ) 110 | 111 | cluster.servers[2]:exec(function() 112 | return membership.set_encryption_key("YY") 113 | end) 114 | t.assert_equals( 115 | cluster.servers[2]:exec(function() 116 | return membership.get_encryption_key() 117 | end), 118 | string.rjust("YY", 32) 119 | ) 120 | t.helpers.retrying( 121 | {}, 122 | cluster.servers[1].check_status, 123 | cluster.servers[1], 'localhost:13302', 'alive' 124 | ) 125 | t.helpers.retrying( 126 | {}, 127 | cluster.servers[2].check_status, 128 | cluster.servers[2], 'localhost:13301', 'alive' 129 | ) 130 | end 131 | 132 | g.test_disable_encryption = function() 133 | cluster.servers[2]:exec(function() 134 | return membership.set_encryption_key(nil) 135 | end) 136 | t.assert_equals(cluster.servers[2]:exec(function() 137 | return membership.get_encryption_key() 138 | end), nil) 139 | t.helpers.retrying( 140 | {}, 141 | cluster.servers[1].check_status, 142 | cluster.servers[1], 'localhost:13302', 'non-decryptable' 143 | ) 144 | t.helpers.retrying( 145 | {}, 146 | cluster.servers[2].check_status, 147 | cluster.servers[2], 'localhost:13301', 'non-decryptable' 148 | ) 149 | 150 | cluster.servers[1]:exec(function() 151 | return membership.set_encryption_key(nil) 152 | end) 153 | t.assert_equals(cluster.servers[1]:exec(function() 154 | return membership.get_encryption_key() 155 | end), nil) 156 | t.helpers.retrying( 157 | {}, 158 | cluster.servers[1].check_status, 159 | cluster.servers[1], 'localhost:13302', 'alive' 160 | ) 161 | t.helpers.retrying( 162 | {}, 163 | cluster.servers[2].check_status, 164 | cluster.servers[2], 'localhost:13301', 'alive' 165 | ) 166 | end 167 | 168 | g.test_gh36 = function() 169 | -- There was a bug in nslookup function which prevented 170 | -- discovering non-decryptable members 171 | for i = 1, 10 do 172 | local uri = string.format("s%03d:oO", i) 173 | cluster.servers[2]:exec(function(u) 174 | membership.probe_uri(u) 175 | end, { uri }) 176 | end 177 | 178 | cluster.servers[1]:exec(function() 179 | return membership.set_encryption_key("ZZ") 180 | end) 181 | t.assert_equals( 182 | cluster.servers[1]:exec(function() 183 | return membership.get_encryption_key() 184 | end), 185 | string.rjust("ZZ", 32) 186 | ) 187 | t.helpers.retrying( 188 | {}, 189 | cluster.servers[1].check_status, 190 | cluster.servers[1], 'localhost:13302', 'non-decryptable' 191 | ) 192 | t.helpers.retrying( 193 | {}, 194 | cluster.servers[2].check_status, 195 | cluster.servers[2], 'localhost:13301', 'non-decryptable' 196 | ) 197 | end 198 | -------------------------------------------------------------------------------- /test/integration/false_rumors_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | local fiber = require('fiber') 5 | 6 | local SERVER_LIST = { 13301, 13302, 13303 } 7 | 8 | g.before_all(function() 9 | cluster.start('localhost', SERVER_LIST) 10 | end) 11 | 12 | g.after_all(function() 13 | cluster.stop() 14 | end) 15 | 16 | local function check_rumors(server, expected) 17 | t.assert_equals(server:exec(function() return _G.rumors end), expected) 18 | end 19 | 20 | g.test_setup = function() 21 | -- Monkeypatch the instance to collect all rumors 22 | t.assert(cluster.servers[1]:exec(function() 23 | rawset(_G, "rumors", setmetatable({}, { __serialize = 'map' })) 24 | 25 | local fiber = require('fiber') 26 | local members = require('membership.members') 27 | local opts = require('membership.options') 28 | 29 | local function collect_rumors() 30 | for uri, m in members.pairs() do 31 | if m.status ~= opts.ALIVE then 32 | _G.rumors[uri] = opts.STATUS_NAMES[m.status] 33 | end 34 | end 35 | end 36 | 37 | rawset(_G, "_collector_fiber", fiber.create(function() 38 | local cond = membership.subscribe() 39 | while true do 40 | cond:wait() 41 | fiber.testcancel() 42 | collect_rumors() 43 | end 44 | end)) 45 | 46 | return true 47 | end)) 48 | 49 | t.assert(cluster.servers[1]:probe_uri('localhost:13302')) 50 | t.assert(cluster.servers[1]:probe_uri('localhost:13303')) 51 | check_rumors(cluster.servers[1], {}) 52 | end 53 | 54 | g.test_indirect_ping = function() 55 | -- Ack timeout shouldn't trigger failure detection 56 | -- because indirect pings still work 57 | cluster.servers[1]:exec(function() 58 | local opts = require('membership.options') 59 | opts.ACK_TIMEOUT_SECONDS = 0 60 | end) 61 | 62 | fiber.sleep(2) 63 | check_rumors(cluster.servers[1], {}) 64 | end 65 | 66 | g.test_flickering = function() 67 | -- Cluster starts flickering if indirect pings are disabled 68 | cluster.servers[1]:exec(function() 69 | local opts = require('membership.options') 70 | opts.NUM_FAILURE_DETECTION_SUBGROUPS = 0 71 | end) 72 | 73 | t.helpers.retrying( 74 | {}, 75 | check_rumors, 76 | cluster.servers[1], 77 | { 78 | ['localhost:13301'] = 'suspect', 79 | ['localhost:13302'] = 'suspect', 80 | ['localhost:13303'] = 'suspect', 81 | } 82 | ) 83 | end 84 | 85 | g.test_nonsuspiciousness = function() 86 | -- With disabled suspiciousness it stops flickering again 87 | cluster.servers[1]:exec(function() 88 | local opts = require('membership.options') 89 | opts.SUSPICIOUSNESS = false 90 | end) 91 | 92 | t.helpers.retrying( 93 | {}, 94 | cluster.servers[1].check_status, 95 | cluster.servers[1], 'localhost:13301', 'alive' 96 | ) 97 | t.helpers.retrying( 98 | {}, 99 | cluster.servers[1].check_status, 100 | cluster.servers[1], 'localhost:13302', 'alive' 101 | ) 102 | t.helpers.retrying( 103 | {}, 104 | cluster.servers[1].check_status, 105 | cluster.servers[1], 'localhost:13303', 'alive' 106 | ) 107 | cluster.servers[1]:exec(function() table.clear(rumors) end) 108 | 109 | fiber.sleep(2) 110 | check_rumors(cluster.servers[1], {}) 111 | end 112 | -------------------------------------------------------------------------------- /test/integration/init_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301, 13302 } 6 | 7 | g.before_all(function() 8 | cluster.start('localhost', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | g.test_join = function() 16 | t.assert(cluster.servers[1]:add_member('localhost:13302')) 17 | 18 | t.helpers.retrying( 19 | {}, 20 | cluster.servers[2].check_status, 21 | cluster.servers[2], 'localhost:13301', 'alive' 22 | ) 23 | end 24 | 25 | g.test_death = function() 26 | cluster.servers[2]:stop() 27 | t.helpers.retrying( 28 | {}, 29 | cluster.servers[1].check_status, 30 | cluster.servers[1], 'localhost:13302', 'suspect' 31 | ) 32 | t.helpers.retrying( 33 | {}, 34 | cluster.servers[1].check_status, 35 | cluster.servers[1], 'localhost:13302', 'dead' 36 | ) 37 | 38 | cluster.servers[2]:start() 39 | t.helpers.retrying( 40 | {}, 41 | cluster.servers[1].check_status, 42 | cluster.servers[1], 'localhost:13302', 'alive' 43 | ) 44 | t.helpers.retrying( 45 | {}, 46 | cluster.servers[2].check_status, 47 | cluster.servers[2], 'localhost:13301', 'alive' 48 | ) 49 | end 50 | 51 | g.test_reinit = function() 52 | t.assert(cluster.servers[1]:add_member('localhost:13302')) 53 | t.helpers.retrying( 54 | {}, 55 | cluster.servers[2].check_status, 56 | cluster.servers[2], 'localhost:13301', 'alive' 57 | ) 58 | 59 | -- Change hostname 60 | t.assert(t.helpers.retrying( 61 | {}, 62 | cluster.servers[1].exec, 63 | cluster.servers[1], function() return membership.init('127.0.0.1', 13301) end 64 | )) 65 | t.helpers.retrying( 66 | {}, 67 | cluster.servers[2].check_status, 68 | cluster.servers[2], 'localhost:13301', 'dead' 69 | ) 70 | t.helpers.retrying( 71 | {}, 72 | cluster.servers[2].check_status, 73 | cluster.servers[2], '127.0.0.1:13301', 'alive' 74 | ) 75 | 76 | -- Change port 77 | t.assert(t.helpers.retrying( 78 | {}, 79 | cluster.servers[1].exec, 80 | cluster.servers[1], function() return membership.init('127.0.0.1', 13303) end 81 | )) 82 | t.helpers.retrying( 83 | {}, 84 | cluster.servers[2].check_status, 85 | cluster.servers[2], 'localhost:13301', 'dead' 86 | ) 87 | t.helpers.retrying( 88 | {}, 89 | cluster.servers[2].check_status, 90 | cluster.servers[2], '127.0.0.1:13301', 'dead' 91 | ) 92 | t.helpers.retrying( 93 | {}, 94 | cluster.servers[2].check_status, 95 | cluster.servers[2], '127.0.0.1:13303', 'alive' 96 | ) 97 | 98 | -- Revert all changes 99 | t.assert(t.helpers.retrying( 100 | {}, 101 | cluster.servers[1].exec, 102 | cluster.servers[1], function() return membership.init('localhost', 13301) end 103 | )) 104 | t.helpers.retrying( 105 | {}, 106 | cluster.servers[1].check_status, 107 | cluster.servers[1], 'localhost:13302', 'alive' 108 | ) 109 | t.helpers.retrying( 110 | {}, 111 | cluster.servers[2].check_status, 112 | cluster.servers[2], 'localhost:13301', 'alive' 113 | ) 114 | end 115 | 116 | g.test_error = function() 117 | t.assert_error_msg_equals( 118 | 'Socket bind error (13302/udp): Address already in use', 119 | cluster.servers[1].exec, 120 | cluster.servers[1], function() return membership.init('localhost', 13302) end 121 | ) 122 | 123 | t.assert(cluster.servers[1]:probe_uri('localhost:13301')) 124 | t.assert(cluster.servers[1]:probe_uri('localhost:13302')) 125 | t.assert(cluster.servers[2]:probe_uri('localhost:13301')) 126 | end 127 | -------------------------------------------------------------------------------- /test/integration/member_clock_diff_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301, 13302 } 6 | 7 | g.before_all(function() 8 | cluster.start('localhost', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | local function check_clock_delta(server, uri) 16 | local member = server:members()[uri] 17 | t.assert(member['clock_delta'] ~= nil) 18 | end 19 | 20 | g.test_clock_diff = function() 21 | cluster.servers[1]:probe_uri('localhost:13302') 22 | 23 | t.helpers.retrying( 24 | {}, 25 | cluster.servers[2].check_status, 26 | cluster.servers[2], 'localhost:13301', 'alive' 27 | ) 28 | t.helpers.retrying( 29 | {}, 30 | cluster.servers[1].check_status, 31 | cluster.servers[1], 'localhost:13302', 'alive' 32 | ) 33 | 34 | t.helpers.retrying( 35 | {}, 36 | check_clock_delta, cluster.servers[2], 'localhost:13301' 37 | ) 38 | t.helpers.retrying( 39 | {}, 40 | check_clock_delta, cluster.servers[1], 'localhost:13302' 41 | ) 42 | end 43 | -------------------------------------------------------------------------------- /test/integration/payload_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301, 13302 } 6 | 7 | g.before_all(function() 8 | cluster.start('localhost', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | local function check_payload(server, uri, payload, status) 16 | local member = server:members()[uri] 17 | t.assert_equals(member['status'], status) 18 | t.assert_equals(member['payload'], payload) 19 | end 20 | 21 | g.test_payload = function() 22 | t.assert(cluster.servers[1]:exec(function() 23 | return membership.set_payload("foo1", { bar = "buzz" }) 24 | end)) 25 | t.assert(cluster.servers[1]:add_member('localhost:13302')) 26 | t.helpers.retrying( 27 | {}, 28 | check_payload, 29 | cluster.servers[2], 'localhost:13301', 30 | { 31 | ['foo1'] = { 32 | ['bar'] = 'buzz' 33 | } 34 | }, 35 | 'alive' 36 | ) 37 | 38 | t.assert(cluster.servers[1]:exec(function() 39 | return membership.set_payload("foo2", 42) 40 | end)) 41 | t.helpers.retrying( 42 | {}, 43 | check_payload, 44 | cluster.servers[2], 'localhost:13301', 45 | { 46 | ['foo1'] = { 47 | ['bar'] = 'buzz' 48 | }, 49 | ['foo2'] = 42 50 | }, 51 | 'alive' 52 | ) 53 | 54 | t.assert(cluster.servers[1]:exec(function() 55 | return membership.set_payload("foo1", nil) 56 | end)) 57 | t.helpers.retrying( 58 | {}, 59 | check_payload, 60 | cluster.servers[2], 'localhost:13301', 61 | { 62 | ['foo2'] = 42 63 | }, 64 | 'alive' 65 | ) 66 | 67 | t.assert(cluster.servers[1]:exec(function() 68 | rawset(_G, "checks_disabled", true) 69 | local opts = require('membership.options') 70 | require('membership.events').generate('13301', opts.DEAD, 31, 37) 71 | rawset(_G, "checks_disabled", false) 72 | 73 | return true 74 | end)) 75 | t.helpers.retrying( 76 | {}, 77 | check_payload, 78 | cluster.servers[2], '13301', 79 | {}, 80 | 'dead' 81 | ) 82 | end 83 | -------------------------------------------------------------------------------- /test/integration/probe_uri_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301 } 6 | 7 | g.before_all(function() 8 | cluster.start('localhost', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | g.test_probe_uri = function() 16 | t.assert(cluster.servers[1]:exec(function() 17 | rawset(_G, "warnings", {}) 18 | require('log').warn = function(...) 19 | table.insert(warnings, string.format(...)) 20 | end 21 | return true 22 | end)) 23 | 24 | t.assert(cluster.servers[1]:probe_uri('localhost:13301')) 25 | t.assert_equals({ cluster.servers[1]:probe_uri('localhost:13302') }, { nil, 'no response' }) 26 | t.assert_equals({ cluster.servers[1]:probe_uri('127.0.0.1:13301') }, { nil, 'no response' }) 27 | t.assert_equals({ cluster.servers[1]:probe_uri(':::') }, { nil, 'parse error' }) 28 | 29 | t.assert_equals({ cluster.servers[1]:probe_uri('unix/:/dev/null') }, { nil, 'ping was not sent' }) 30 | t.assert_equals({ cluster.servers[1]:probe_uri('unknown-host:9') }, { nil, 'ping was not sent' }) 31 | t.assert_equals({ cluster.servers[1]:probe_uri('-:/') }, { nil, 'ping was not sent' }) 32 | 33 | -- https://github.com/tarantool/tarantool/commit/92fe50fa999d6153e8c4d5d43fb0c419ce05350e 34 | -- Tarantool didn't return error message up to 2.5 35 | local version = cluster.servers[1]:exec(function() return _TARANTOOL end) 36 | 37 | local version_parts = string.split(version, '.') 38 | local major = tonumber(version_parts[1]) 39 | local minor = tonumber(version_parts[2]) 40 | 41 | local is_linux = false 42 | local handle = io.popen("uname -s 2>/dev/null", "r") 43 | if handle then 44 | local os_name = handle:read("*a"):gsub("%s+", "") 45 | handle:close() 46 | is_linux = (os_name == 'Linux') 47 | end 48 | 49 | t.skip_if( 50 | major == 2 and minor == 11, 51 | 'Temporarily skipped due to warning capture issues for Tarantool 2.11' 52 | ) 53 | 54 | local expected_warnings 55 | if (major < 2) or (major == 2 and minor < 5) then 56 | expected_warnings = { 57 | 'getaddrinfo: Unknown error (unix/:/dev/null)', 58 | 'getaddrinfo: Unknown error (unknown-host:9)', 59 | 'getaddrinfo: Unknown error (-)' 60 | } 61 | elseif major == 2 and minor == 10 then 62 | expected_warnings = { 63 | 'getaddrinfo: Servname not supported for ai_socktype: Input/output error (unix/:/dev/null)', 64 | 'getaddrinfo: Temporary failure in name resolution: Input/output error (unknown-host:9)', 65 | 'getaddrinfo: Name or service not known: Input/output error (-)' 66 | } 67 | elseif is_linux then 68 | expected_warnings = { 69 | 'getaddrinfo: Servname not supported for ai_socktype (unix/:/dev/null)', 70 | 'getaddrinfo: Temporary failure in name resolution (unknown-host:9)', 71 | 'getaddrinfo: Name or service not known (-)' 72 | } 73 | else 74 | expected_warnings = { 75 | 'getaddrinfo: nodename nor servname provided, or not known (unix/:/dev/null)', 76 | 'getaddrinfo: nodename nor servname provided, or not known (unknown-host:9)', 77 | 'getaddrinfo: nodename nor servname provided, or not known (-)' 78 | } 79 | end 80 | 81 | t.assert_equals( 82 | cluster.servers[1]:exec(function() return warnings end), 83 | expected_warnings 84 | ) 85 | end 86 | -------------------------------------------------------------------------------- /test/integration/quit_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301, 13302 } 6 | 7 | g.before_all(function() 8 | cluster.start('localhost', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | g.test_join = function() 16 | t.assert(cluster.servers[1]:add_member('localhost:13302')) 17 | 18 | t.helpers.retrying( 19 | {}, 20 | cluster.servers[2].check_status, 21 | cluster.servers[2], 'localhost:13301', 'alive' 22 | ) 23 | end 24 | 25 | g.test_quit = function() 26 | t.assert(cluster.servers[2]:exec(function() return membership.leave() end)) 27 | 28 | t.helpers.retrying( 29 | {}, 30 | cluster.servers[1].check_status, 31 | cluster.servers[1], 'localhost:13302', 'left' 32 | ) 33 | 34 | t.assert(not cluster.servers[2]:exec(function() return membership.leave() end)) 35 | end 36 | 37 | g.test_rejoin = function() 38 | t.assert(cluster.servers[2]:exec(function() 39 | return membership.init("localhost", 13302) 40 | end)) 41 | t.assert(cluster.servers[1]:add_member('localhost:13302')) 42 | 43 | t.helpers.retrying( 44 | {}, 45 | cluster.servers[1].check_status, 46 | cluster.servers[1], 'localhost:13302', 'alive' 47 | ) 48 | end 49 | 50 | g.test_mark_left = function() 51 | t.helpers.retrying( 52 | {}, 53 | cluster.servers[1].check_status, 54 | cluster.servers[1], 'localhost:13302', 'alive' 55 | ) 56 | 57 | t.assert(cluster.servers[1]:exec(function() 58 | return membership.mark_left("localhost:13302") 59 | end)) 60 | 61 | t.helpers.retrying( 62 | {}, 63 | cluster.servers[1].check_status, 64 | cluster.servers[1], 'localhost:13302', 'left' 65 | ) 66 | 67 | -- already has left 68 | t.assert(not cluster.servers[1]:exec(function() 69 | return membership.mark_left("localhost:13302") 70 | end)) 71 | 72 | -- there are no such member 73 | t.assert(not cluster.servers[1]:exec(function() 74 | return membership.mark_left("localhost:10000") 75 | end)) 76 | end 77 | -------------------------------------------------------------------------------- /test/integration/reload_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301, 13302 } 6 | 7 | g.before_all(function() 8 | cluster.start('localhost', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | g.test_reload_slow = function() 16 | -- Check that hot-reload doesn't affect statuses 17 | 18 | t.assert(cluster.servers[1]:probe_uri('localhost:13302')) 19 | 20 | local member = cluster.servers[2]:get_member('localhost:13301') 21 | t.assert_equals(member['status'], 'alive') 22 | 23 | cluster.servers[2]:exec(function() 24 | local log = require('log') 25 | local yaml = require('yaml') 26 | local fiber = require('fiber') 27 | 28 | rawset(_G, "guard", fiber.new(function() 29 | membership.subscribe():wait() 30 | fiber.testcancel() 31 | log.error('Unexpected event:') 32 | log.error(yaml.encode(membership.members())) 33 | os.exit(1) 34 | end)) 35 | end) 36 | 37 | t.assert(cluster.servers[1]:exec(function() 38 | local log = require('log') 39 | local fiber = require('fiber') 40 | 41 | package.loaded['membership'] = nil 42 | log.info('Membership unloaded') 43 | fiber.sleep(1) 44 | 45 | _G.membership = require('membership') 46 | log.info('Membership reloaded') 47 | fiber.sleep(1) 48 | 49 | log.info('Doing file %s...', arg[0]) 50 | dofile(arg[0]) 51 | log.info('Dofile succeeded') 52 | fiber.sleep(1) 53 | 54 | return membership.probe_uri('localhost:13302') 55 | end)) 56 | 57 | cluster.servers[2]:exec(function() _G.guard:cancel() end) 58 | end 59 | 60 | g.test_reload_fast = function() 61 | -- Check that hot-reload doesn't affect other features 62 | 63 | t.assert(cluster.servers[1]:probe_uri('localhost:13302')) 64 | 65 | local member = cluster.servers[2]:get_member('localhost:13301') 66 | t.assert_equals(member['status'], 'alive') 67 | 68 | t.assert(cluster.servers[1]:exec(function() return package.reload() end)) 69 | 70 | t.assert(cluster.servers[2]:exec(function() 71 | return membership.set_payload("k", "v1") 72 | end)) 73 | t.assert(cluster.servers[2]:probe_uri('localhost:13301')) 74 | local payload1 = cluster.servers[1]:members()['localhost:13302']['payload'] 75 | t.assert_equals(payload1, { ['k'] = 'v1' }) 76 | 77 | cluster.servers[1]:exec(function() rawset(_G, "cond", membership.subscribe()) end) 78 | 79 | t.assert(cluster.servers[1]:exec(function() return package.reload() end)) 80 | t.assert(cluster.servers[2]:exec(function() 81 | return membership.set_payload("k", "v2") 82 | end)) 83 | t.assert(cluster.servers[1]:exec(function() return _G.cond:wait(10) end)) 84 | local payload2 = cluster.servers[1]:members()['localhost:13302']['payload'] 85 | t.assert_equals(payload2, { ['k'] = 'v2'} ) 86 | 87 | cluster.servers[2]:exec(function() 88 | return membership.set_encryption_key("YY") 89 | end) 90 | t.assert(cluster.servers[2]:exec(function() return package.reload() end)) 91 | t.helpers.retrying( 92 | {}, 93 | cluster.servers[1].check_status, 94 | cluster.servers[1], 'localhost:13302', 'non-decryptable' 95 | ) 96 | t.helpers.retrying( 97 | {}, 98 | cluster.servers[2].check_status, 99 | cluster.servers[2], 'localhost:13301', 'non-decryptable' 100 | ) 101 | 102 | cluster.servers[1]:exec(function() 103 | return membership.set_encryption_key("YY") 104 | end) 105 | t.helpers.retrying( 106 | {}, 107 | cluster.servers[1].check_status, 108 | cluster.servers[1], 'localhost:13302', 'alive' 109 | ) 110 | t.helpers.retrying( 111 | {}, 112 | cluster.servers[2].check_status, 113 | cluster.servers[2], 'localhost:13301', 'alive' 114 | ) 115 | end 116 | -------------------------------------------------------------------------------- /test/integration/subscribe_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | 5 | local SERVER_LIST = { 13301, 13302 } 6 | 7 | g.before_all(function() 8 | cluster.start('localhost', SERVER_LIST) 9 | end) 10 | 11 | g.after_all(function() 12 | cluster.stop() 13 | end) 14 | 15 | g.test_subscribe = function() 16 | t.assert(cluster.servers[1]:add_member('localhost:13302')) 17 | 18 | cluster.servers[1]:exec(function() 19 | rawset(_G, "cond", membership.subscribe()) 20 | end) 21 | 22 | t.assert(not cluster.servers[1]:exec(function() 23 | return _G.cond:wait(1) 24 | end)) 25 | t.assert(cluster.servers[2]:exec(function() 26 | return membership.set_payload("foo", "bar") 27 | end)) 28 | t.assert(cluster.servers[1]:exec(function() 29 | return _G.cond:wait(1) 30 | end)) 31 | end 32 | -------------------------------------------------------------------------------- /test/integration/sync_test.lua: -------------------------------------------------------------------------------- 1 | local t = require('luatest') 2 | local g = t.group() 3 | local cluster = require('test.helpers.cluster') 4 | local fiber = require('fiber') 5 | 6 | local SERVER_LIST = { 13301, 13302 } 7 | 8 | g.before_all(function() 9 | cluster.start('localhost', SERVER_LIST) 10 | end) 11 | 12 | g.after_all(function() 13 | cluster.stop() 14 | end) 15 | 16 | g.test_sync = function() 17 | t.assert(cluster.servers[1]:add_member('localhost:33088')) 18 | 19 | t.helpers.retrying( 20 | {}, 21 | cluster.servers[1].check_status, cluster.servers[1], 22 | 'localhost:33088', 'dead' 23 | ) 24 | 25 | -- Wait for dead events to expire 26 | fiber.sleep(2) 27 | 28 | -- Make sure dead members are synced 29 | t.assert(cluster.servers[2]:add_member('localhost:13301')) 30 | 31 | t.helpers.retrying( 32 | {}, 33 | cluster.servers[2].check_status, 34 | cluster.servers[2], 'localhost:13301', 'alive' 35 | ) 36 | 37 | t.helpers.retrying( 38 | {}, 39 | cluster.servers[2].check_status, 40 | cluster.servers[2], 'localhost:33088', 'dead' 41 | ) 42 | end 43 | --------------------------------------------------------------------------------