├── .bumpversion.cfg ├── .dockerignore ├── .gitignore ├── .luacheckrc ├── .luacov ├── .travis.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── autovshard-scm-1.rockspec ├── autovshard ├── config.lua ├── consul.lua ├── init.lua ├── util.lua └── wlock.lua ├── docker-compose.yaml ├── examples ├── consul_kv.sh ├── router.lua └── storage.lua ├── output └── .gitkeep ├── pytest.ini ├── rockspecs ├── autovshard-0.0.3-1.rockspec ├── autovshard-0.1.0-1.rockspec ├── autovshard-0.2.0-1.rockspec ├── autovshard-0.2.1-1.rockspec ├── autovshard-1.0.0-1.rockspec ├── autovshard-1.0.1-1.rockspec └── autovshard-1.0.2-1.rockspec ├── scripts ├── run_tests.lua └── test.sh └── tests ├── bdd ├── __init__.py ├── conftest.py ├── features │ ├── become_ro.feature │ ├── config_application.feature │ ├── failover.feature │ ├── master_election.feature │ └── switchover_delay.feature ├── steps │ ├── templates │ │ └── init.lua.tpl │ └── test_autovshard.py └── util.py ├── test_config.lua ├── test_consul.lua ├── test_util.lua ├── test_wlock.lua └── tnt └── init.lua /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.0.2 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:autovshard/init.lua] 7 | search = _VERSION = "{current_version}" 8 | replace = _VERSION = "{new_version}" 9 | 10 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .* 2 | Makefile 3 | README.md 4 | *.lua 5 | scripts 6 | tests 7 | rockspecs 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Lua sources 2 | luac.out 3 | 4 | # luarocks build files 5 | *.src.rock 6 | *.zip 7 | *.tar.gz 8 | 9 | # Object files 10 | *.o 11 | *.os 12 | *.ko 13 | *.obj 14 | *.elf 15 | 16 | # Precompiled Headers 17 | *.gch 18 | *.pch 19 | 20 | # Libraries 21 | *.lib 22 | *.a 23 | *.la 24 | *.lo 25 | *.def 26 | *.exp 27 | 28 | # Shared objects (inc. Windows DLLs) 29 | *.dll 30 | *.so 31 | *.so.* 32 | *.dylib 33 | 34 | # Executables 35 | *.exe 36 | *.out 37 | *.app 38 | *.i*86 39 | *.x86_64 40 | *.hex 41 | -------------------------------------------------------------------------------- /.luacheckrc: -------------------------------------------------------------------------------- 1 | allow_defined = true 2 | max_line_length = 99 3 | read_globals = {"box", "table.copy", "table.clear", "table.foreach", "package.reload"} 4 | exclude_files = {".luacheckrc"} 5 | ignore = {"631"} 6 | -------------------------------------------------------------------------------- /.luacov: -------------------------------------------------------------------------------- 1 | tick = true 2 | modules = { 3 | ["autovshard.*"] = "autovshard" 4 | } 5 | statsfile = "output/luacov.stats.out" 6 | reportfile = "output/luacov.report.out" 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '3.7' 4 | install: 5 | - python -m pip install pipenv 6 | - pipenv install --dev --deploy 7 | 8 | dist: bionic 9 | 10 | services: 11 | - docker 12 | 13 | script: 14 | - make test-ci 15 | - make test-e2e-ci 16 | 17 | after_success: 18 | - make coverage-ci 19 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:buster-slim 2 | 3 | RUN apt-get update \ 4 | && apt-get install --no-install-recommends -y \ 5 | debian-archive-keyring \ 6 | curl \ 7 | gnupg \ 8 | apt-transport-https \ 9 | ca-certificates \ 10 | # https://packagecloud.io/tarantool/1_10/install#manual 11 | && curl -s -L https://packagecloud.io/tarantool/1_10/gpgkey | apt-key add - \ 12 | && list=/etc/apt/sources.list.d/tarantool_1_10.list \ 13 | && echo 'deb https://packagecloud.io/tarantool/1_10/debian/ buster main' > $list \ 14 | && echo 'deb-src https://packagecloud.io/tarantool/1_10/debian/ buster main' >> $list \ 15 | && apt-get update \ 16 | && apt-get install --no-install-recommends -y \ 17 | tarantool \ 18 | lua5.1-dev \ 19 | luarocks \ 20 | build-essential \ 21 | git \ 22 | && rm -rf /var/lib/apt/lists/* 23 | 24 | RUN luarocks install busted 2.0.rc12-1 \ 25 | && luarocks install luacov 0.13.0 \ 26 | && luarocks install luacov-coveralls 27 | 28 | RUN cd /tmp/ \ 29 | && set -ex \ 30 | && mkdir -p /usr/share/tarantool \ 31 | && echo "---------- vshard --------------" \ 32 | && git clone https://github.com/tarantool/vshard.git \ 33 | && cd vshard \ 34 | && git checkout -q d5faa9c \ 35 | && mv vshard /usr/share/tarantool/vshard \ 36 | && cd /tmp \ 37 | && rm -rf vshard \ 38 | && echo "---------- package.reload --------------" \ 39 | && git clone https://github.com/moonlibs/package-reload.git \ 40 | && cd package-reload \ 41 | && git checkout -q 870a2e3 \ 42 | && mv package /usr/share/tarantool/package \ 43 | && cd /tmp \ 44 | && rm -rf package-reload 45 | 46 | COPY . /tmp/autovshard 47 | 48 | RUN cd /tmp/autovshard \ 49 | && luarocks make \ 50 | && cd /tmp \ 51 | && rm -rf autovshard 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 bofm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DOCKER_IMAGE=tnt-autovshard 2 | 3 | .PHONY: docker clean test test-ci run kill run-bash docker-image-name 4 | 5 | .docker: $(shell find autovshard scripts tests/*.lua Dockerfile docker-compose.yaml *.rockspec .dockerignore -print) 6 | docker build --pull -t "$(DOCKER_IMAGE)" . 7 | docker-compose build --pull 8 | docker pull consul:1.5.3 9 | @echo "$(DOCKER_IMAGE)" > .docker 10 | 11 | docker: .docker 12 | 13 | build: docker 14 | 15 | clean: 16 | docker-compose kill 17 | docker-compose rm -fv 18 | docker rmi "$(DOCKER_IMAGE)" || : 19 | rm -rf \ 20 | ./.docker \ 21 | ./output/* \ 22 | ./.pipenv \ 23 | ./tmp 24 | 25 | test: 26 | docker-compose run --rm a1 ./scripts/test.sh --verbose 27 | 28 | .pipenv: Pipfile.lock 29 | which pipenv || python -m pip install pipenv 30 | pipenv install --dev --deploy 31 | @echo "" > .pipenv 32 | 33 | test-e2e-ci: .pipenv docker 34 | pipenv run pytest -s 35 | 36 | test-coverage: 37 | docker-compose run --rm a1 ./scripts/test.sh --verbose --coverage 38 | 39 | test-ci: build test-coverage 40 | 41 | coverage-ci: 42 | docker-compose run --rm \ 43 | -e TRAVIS=true \ 44 | -e CI=true \ 45 | -e COVERALLS_REPO_TOKEN=${COVERALLS_REPO_TOKEN} \ 46 | -e TRAVIS_JOB_ID=${TRAVIS_JOB_ID} \ 47 | -e TRAVIS_BRANCH=${TRAVIS_BRANCH} \ 48 | -e TRAVIS_REPO_SLUG=${TRAVIS_REPO_SLUG} \ 49 | a1 \ 50 | sh -c "cd output && exec luacov-coveralls -v --root=/usr/share/tarantool/" 51 | 52 | run: 53 | docker-compose run --rm a1 tarantool 54 | 55 | kill: 56 | docker-compose kill 57 | 58 | run-bash: build 59 | docker-compose run --rm a1 bash 60 | 61 | docker-image-name: 62 | @echo $(DOCKER_IMAGE) 63 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | pytest = "*" 8 | pytest-bdd = "*" 9 | docker = "*" 10 | python-consul = "*" 11 | pyyaml = "*" 12 | funcy = "*" 13 | "fn.py" = "*" 14 | pytest-parallel = "*" 15 | 16 | [packages] 17 | 18 | [requires] 19 | python_version = "3.7" 20 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "7496fa5c9944cfdd469c3a3ec84b23daa6395d9aaa5b0ad3dc1dcd33a262625c" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.7" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | 18 | }, 19 | "default": {}, 20 | "develop": { 21 | "atomicwrites": { 22 | "hashes": [ 23 | "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", 24 | "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6" 25 | ], 26 | "version": "==1.3.0" 27 | }, 28 | "attrs": { 29 | "hashes": [ 30 | "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", 31 | "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" 32 | ], 33 | "version": "==19.1.0" 34 | }, 35 | "certifi": { 36 | "hashes": [ 37 | "sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939", 38 | "sha256:945e3ba63a0b9f577b1395204e13c3a231f9bc0223888be653286534e5873695" 39 | ], 40 | "version": "==2019.6.16" 41 | }, 42 | "chardet": { 43 | "hashes": [ 44 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", 45 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" 46 | ], 47 | "version": "==3.0.4" 48 | }, 49 | "docker": { 50 | "hashes": [ 51 | "sha256:acf51b5e3e0d056925c3b780067a6f753c915fffaa46c5f2d79eb0fc1cbe6a01", 52 | "sha256:cc5b2e94af6a2b1e1ed9d7dcbdc77eff56c36081757baf9ada6e878ea0213164" 53 | ], 54 | "index": "pypi", 55 | "version": "==4.0.2" 56 | }, 57 | "fn.py": { 58 | "hashes": [ 59 | "sha256:3191abd12577c983cedca38ede1ab48a790c2456cba690e7be6b3439da33d427" 60 | ], 61 | "index": "pypi", 62 | "version": "==0.5.2" 63 | }, 64 | "funcy": { 65 | "hashes": [ 66 | "sha256:141950038e72bdc2d56fa82468586a1d1291b9cc9346daaaa322dffed1d1da6e", 67 | "sha256:918f333f675d9841ec7d77b9f0d5a272ed290393a33c8ef20e605847de89b1c3" 68 | ], 69 | "index": "pypi", 70 | "version": "==1.13" 71 | }, 72 | "glob2": { 73 | "hashes": [ 74 | "sha256:85c3dbd07c8aa26d63d7aacee34fa86e9a91a3873bc30bf62ec46e531f92ab8c" 75 | ], 76 | "version": "==0.7" 77 | }, 78 | "idna": { 79 | "hashes": [ 80 | "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", 81 | "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" 82 | ], 83 | "version": "==2.8" 84 | }, 85 | "importlib-metadata": { 86 | "hashes": [ 87 | "sha256:23d3d873e008a513952355379d93cbcab874c58f4f034ff657c7a87422fa64e8", 88 | "sha256:80d2de76188eabfbfcf27e6a37342c2827801e59c4cc14b0371c56fed43820e3" 89 | ], 90 | "markers": "python_version < '3.8'", 91 | "version": "==0.19" 92 | }, 93 | "mako": { 94 | "hashes": [ 95 | "sha256:a36919599a9b7dc5d86a7a8988f23a9a3a3d083070023bab23d64f7f1d1e0a4b" 96 | ], 97 | "version": "==1.1.0" 98 | }, 99 | "markupsafe": { 100 | "hashes": [ 101 | "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", 102 | "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", 103 | "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", 104 | "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", 105 | "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", 106 | "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", 107 | "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", 108 | "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", 109 | "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", 110 | "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", 111 | "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", 112 | "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", 113 | "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", 114 | "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", 115 | "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", 116 | "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", 117 | "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", 118 | "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", 119 | "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", 120 | "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", 121 | "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", 122 | "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", 123 | "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", 124 | "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", 125 | "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", 126 | "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", 127 | "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", 128 | "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7" 129 | ], 130 | "version": "==1.1.1" 131 | }, 132 | "more-itertools": { 133 | "hashes": [ 134 | "sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832", 135 | "sha256:92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4" 136 | ], 137 | "version": "==7.2.0" 138 | }, 139 | "packaging": { 140 | "hashes": [ 141 | "sha256:a7ac867b97fdc07ee80a8058fe4435ccd274ecc3b0ed61d852d7d53055528cf9", 142 | "sha256:c491ca87294da7cc01902edbe30a5bc6c4c28172b5138ab4e4aa1b9d7bfaeafe" 143 | ], 144 | "version": "==19.1" 145 | }, 146 | "parse": { 147 | "hashes": [ 148 | "sha256:a5fca7000c6588d77bc65c28f3f21bfce03b5e44daa8f9f07c17fe364990d717" 149 | ], 150 | "version": "==1.12.1" 151 | }, 152 | "parse-type": { 153 | "hashes": [ 154 | "sha256:089a471b06327103865dfec2dd844230c3c658a4a1b5b4c8b6c16c8f77577f9e", 155 | "sha256:7f690b18d35048c15438d6d0571f9045cffbec5907e0b1ccf006f889e3a38c0b" 156 | ], 157 | "version": "==0.5.2" 158 | }, 159 | "pluggy": { 160 | "hashes": [ 161 | "sha256:0825a152ac059776623854c1543d65a4ad408eb3d33ee114dff91e57ec6ae6fc", 162 | "sha256:b9817417e95936bf75d85d3f8767f7df6cdde751fc40aed3bb3074cbcb77757c" 163 | ], 164 | "version": "==0.12.0" 165 | }, 166 | "py": { 167 | "hashes": [ 168 | "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", 169 | "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" 170 | ], 171 | "version": "==1.8.0" 172 | }, 173 | "pyparsing": { 174 | "hashes": [ 175 | "sha256:6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80", 176 | "sha256:d9338df12903bbf5d65a0e4e87c2161968b10d2e489652bb47001d82a9b028b4" 177 | ], 178 | "version": "==2.4.2" 179 | }, 180 | "pytest": { 181 | "hashes": [ 182 | "sha256:95b1f6db806e5b1b5b443efeb58984c24945508f93a866c1719e1a507a957d7c", 183 | "sha256:c3d5020755f70c82eceda3feaf556af9a341334414a8eca521a18f463bcead88" 184 | ], 185 | "index": "pypi", 186 | "version": "==5.1.1" 187 | }, 188 | "pytest-bdd": { 189 | "hashes": [ 190 | "sha256:17e73d2fe119de15bfc7fc1fe639fa4df9ab931e5aa552435fdddcf100c97ec5" 191 | ], 192 | "index": "pypi", 193 | "version": "==3.2.1" 194 | }, 195 | "pytest-parallel": { 196 | "hashes": [ 197 | "sha256:94df6b0ebf48e03cbce66622d55ba6bd2cb116b06cfb39cc5fc4d2d310df25ca", 198 | "sha256:c4226352462ae32ff01d47a642d087005c3b9bf995cea61fc37a4becb146278d" 199 | ], 200 | "index": "pypi", 201 | "version": "==0.0.9" 202 | }, 203 | "python-consul": { 204 | "hashes": [ 205 | "sha256:168f1fa53948047effe4f14d53fc1dab50192e2a2cf7855703f126f469ea11f4", 206 | "sha256:eeaaeeae87807ad1bc0d476ca3a9c53823ed5d514832951acebeca671eb54b20" 207 | ], 208 | "index": "pypi", 209 | "version": "==1.1.0" 210 | }, 211 | "pyyaml": { 212 | "hashes": [ 213 | "sha256:0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9", 214 | "sha256:01adf0b6c6f61bd11af6e10ca52b7d4057dd0be0343eb9283c878cf3af56aee4", 215 | "sha256:5124373960b0b3f4aa7df1707e63e9f109b5263eca5976c66e08b1c552d4eaf8", 216 | "sha256:5ca4f10adbddae56d824b2c09668e91219bb178a1eee1faa56af6f99f11bf696", 217 | "sha256:7907be34ffa3c5a32b60b95f4d95ea25361c951383a894fec31be7252b2b6f34", 218 | "sha256:7ec9b2a4ed5cad025c2278a1e6a19c011c80a3caaac804fd2d329e9cc2c287c9", 219 | "sha256:87ae4c829bb25b9fe99cf71fbb2140c448f534e24c998cc60f39ae4f94396a73", 220 | "sha256:9de9919becc9cc2ff03637872a440195ac4241c80536632fffeb6a1e25a74299", 221 | "sha256:a5a85b10e450c66b49f98846937e8cfca1db3127a9d5d1e31ca45c3d0bef4c5b", 222 | "sha256:b0997827b4f6a7c286c01c5f60384d218dca4ed7d9efa945c3e1aa623d5709ae", 223 | "sha256:b631ef96d3222e62861443cc89d6563ba3eeb816eeb96b2629345ab795e53681", 224 | "sha256:bf47c0607522fdbca6c9e817a6e81b08491de50f3766a7a0e6a5be7905961b41", 225 | "sha256:f81025eddd0327c7d4cfe9b62cf33190e1e736cc6e97502b3ec425f574b3e7a8" 226 | ], 227 | "index": "pypi", 228 | "version": "==5.1.2" 229 | }, 230 | "requests": { 231 | "hashes": [ 232 | "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", 233 | "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" 234 | ], 235 | "version": "==2.22.0" 236 | }, 237 | "six": { 238 | "hashes": [ 239 | "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", 240 | "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" 241 | ], 242 | "version": "==1.12.0" 243 | }, 244 | "urllib3": { 245 | "hashes": [ 246 | "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", 247 | "sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" 248 | ], 249 | "version": "==1.25.3" 250 | }, 251 | "wcwidth": { 252 | "hashes": [ 253 | "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", 254 | "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c" 255 | ], 256 | "version": "==0.1.7" 257 | }, 258 | "websocket-client": { 259 | "hashes": [ 260 | "sha256:1151d5fb3a62dc129164292e1227655e4bbc5dd5340a5165dfae61128ec50aa9", 261 | "sha256:1fd5520878b68b84b5748bb30e592b10d0a91529d5383f74f4964e72b297fd3a" 262 | ], 263 | "version": "==0.56.0" 264 | }, 265 | "zipp": { 266 | "hashes": [ 267 | "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e", 268 | "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335" 269 | ], 270 | "version": "==0.6.0" 271 | } 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Autovshard 2 | 3 | [![Build Status](https://travis-ci.org/bofm/tarantool-autovshard.svg?branch=master)](https://travis-ci.org/bofm/tarantool-autovshard) [![Coverage Status](https://coveralls.io/repos/github/bofm/tarantool-autovshard/badge.svg?branch=coverage)](https://coveralls.io/github/bofm/tarantool-autovshard?branch=coverage) 4 | 5 | A wrapper around Tarantool [Vshard](https://github.com/tarantool/vshard) with automatic master election, failover and 6 | centralized configuration storage in Consul. 7 | 8 | [![Sponsored by Avito](https://cdn.rawgit.com/css/csso/8d1b89211ac425909f735e7d5df87ee16c2feec6/docs/avito.svg)](https://www.avito.ru/) 9 | 10 | ## Features 11 | 12 | * Centralized config storage with [Consul](https://www.consul.io). 13 | * Automatic Vsahrd reconfiguration (both storage and router) when the config 14 | changes in Consul. 15 | * Automatic master election for each replicaset with a distributed lock with Consul. 16 | * Automatic failover when a master instance becomes unavailable. 17 | * Master weight to set the preferred master instance. 18 | * Switchover delay. 19 | 20 | ## Status 21 | 22 | * Works in [Avito](https://www.avito.ru) in production enviromnent. 23 | * Extensively tested. See [tests/bdd/features](tests/bdd/features) and [CI build logs](https://travis-ci.org/bofm/tarantool-autovshard/builds). 24 | * Use at your own risk. No guarantees. 25 | 26 | ## Usage 27 | 28 | 1. Put Autovshard config to Consul KV under `//autovshard_cfg_yaml`. 29 | 30 | ```yaml 31 | # autovshard_cfg.yaml 32 | rebalancer_max_receiving: 10 33 | bucket_count: 100 34 | rebalancer_disbalance_threshold: 10 35 | sharding: 36 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 37 | weight: 10 38 | replicas: 39 | aaaaaaaa-aaaa-aaaa-aaaa-000000000001: 40 | master_weight: 99 41 | switchover_delay: 10 42 | address: a1:3301 43 | name: a1 44 | master: false 45 | aaaaaaaa-aaaa-aaaa-aaaa-000000000002: 46 | master_weight: 20 47 | switchover_delay: 10 48 | address: a2:3301 49 | name: a2 50 | master: false 51 | bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb: 52 | weight: 10 53 | replicas: 54 | bbbbbbbb-bbbb-bbbb-bbbb-000000000001: 55 | master_weight: 10 56 | switchover_delay: 10 57 | address: b1:3301 58 | name: b1 59 | master: false 60 | bbbbbbbb-bbbb-bbbb-bbbb-000000000002: 61 | master_weight: 55 62 | switchover_delay: 10 63 | address: b2:3301 64 | name: b2 65 | master: false 66 | ``` 67 | 68 | ```sh 69 | #!/usr/bin/env sh 70 | 71 | cat autovshard_cfg.yaml | consul kv put "autovshard/mycluster/autovshard_cfg_yaml" - 72 | ``` 73 | 74 | ### Autovshard Consul config parameters 75 | 76 | The config is similar to Vshard config, but it has some extra fields 77 | and has `address` field instead of `uri` because we don't want to 78 | mix config with passwords. 79 | 80 | * `master_weight` - an instance with higher weight in a replica set eventually gets master role. This parameter is dynamic and can be changed by administrator at any time. The number is used only for comparison with the *master_weights* of the other members of a replica set. 81 | * `switchover_delay` - a delay in seconds to wait before taking master role away from another running instance with lower *master_weight*. This parameter is dynamic and can be changed by administrator at any time. A case when this parameter is useful is when an instance with the highest *master_weight* is restarted several times in a short amount of time. If the instance is up for a shorter time than the *switchover_delay* there will be no master switch (switchover) every time the instance is restarted. And when the instance with the highest *master_weight* stays up for longer than the *switchover_delay* then the instance will finally get promoted to master role. 82 | * `address` - TCP address of the Tarantool instance in this format: `:`. It is passed through to Vshard as part of `uri` parameter. 83 | * `name` - same as *name* in Vshard. 84 | * `master` - same as *master* in Vshard. The role of the instance. **DO NOT set *master=true* for multiple instances in one replica set**. This parameter will be changed dynamically during the lifecycle of Autovshard. It can also be changed by administrator at any time. It is safe to set `master=false` for all instances. 85 | 86 | 2. Put this into your tarantool init.lua. 87 | 88 | ```lua 89 | 90 | local box_cfg = { 91 | listen = 3301, -- required 92 | instance_uuid = "aaaaaaaa-aaaa-aaaa-aaaa-000000000001", -- required for storage instances, prefer lowercase 93 | replicaset_uuid = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", -- required for storage instances, prefer lowercase 94 | replication_connect_quorum = 0, -- recommended, search Tarantool issue tracker for "quorum" and "bootstrap" 95 | replication_connect_timeout=5, -- to start faster when some replicas are unavailable 96 | -- ! DO NOT set `replication` parameter, Vshard will take care of it 97 | -- specify any other_box_cfg options 98 | } 99 | 100 | autovshard = require("autovshard").Autovshard.new{ 101 | box_cfg = box_cfg, -- Tarantool instance config 102 | cluster_name = "mycluster", -- the name of your sharding cluster 103 | login = "storage", -- login for Vshard 104 | password = "storage", -- password for Vshard 105 | consul_http_address = "http://127.0.0.1:8500", -- assuming Consul agent is running on localhost 106 | consul_token = nil, 107 | consul_kv_prefix = "autovshard", 108 | -- consul_session_ttl = 60 -- optional, not recommended to change, default is 15 seconds 109 | router = true, -- true for Vshard router instance 110 | storage = true, -- true for Vshard storage instance 111 | automaster = true, -- enables automatic master election and auto-failover 112 | } 113 | 114 | autovshard:start() -- autovshard will run in the background 115 | -- to stop it call autovshard:stop() 116 | 117 | -- This might be helpful (Tarantool >= 2.0) 118 | -- box.ctl.on_shutdown(function() autovshard:stop(); require("fiber").sleep(2) end) 119 | 120 | -- If you use package.reload (https://github.com/moonlibs/package-reload) 121 | -- package.reload:register(autovshard, autovshard.stop) 122 | 123 | ``` 124 | 125 | **Important:** If Consul is unreachable the Tarantool instance is set to **read-only** mode. 126 | 127 | ### Autovshard Tarantool config parameters 128 | 129 | * `box_cfg` - table, parameters for `box.cfg` call 130 | * `cluster_name` - string, the name of your sharding cluster 131 | * `login` - string, login for Vshard 132 | * `password` - string, password for Vshard 133 | * `consul_http_address` - a string with Consul address or a table of multiple Consul addresses. 134 | Examples: `"http://127.0.0.1:8500"`, `{"https://consul1.example.com:8501", "https://consul2.example.com:8501"}` 135 | If multiple Consul addresses are set and Consul is unreachable at an address, Autovshard will use the 136 | next address from the array for the subsequent requests to Consul. 137 | **Note**: All addresses must point to the instances of the same Consul cluster in the 138 | same [Consul datacenter](https://www.consul.io/docs/commands/catalog/datacenters.html). 139 | * `consul_token` - optional string, Consul token (if you use ACLs) 140 | * `consul_kv_prefix` - string, a prefix in Consul KV storage. Must be the same on all instances in a Tarantool cluster. 141 | * `consul_session_ttl` - optional number, Consul session TTL. Not recommended to change, default is 15 seconds. Must be 142 | between 10 and 86400. 143 | * `router` - boolean, true for Vshard router instances 144 | * `storage` - boolean, - true for Vshard storage instance 145 | * `automaster` - boolean, enables automatic master election and auto-failover 146 | 147 | ### See also 148 | 149 | * [examples](examples) 150 | * [docker-compose.yaml](docker-compose.yaml) 151 | 152 | ## Installation 153 | 154 | Luarocks sucks at pinning dependencies, and Vshard does not support (as of 2019-07-01) painless 155 | installation without Tarantool sources. Therefore Vshard is not mentioned in the rockspec. 156 | 157 | 1. Install [Vshard](https://github.com/tarantool/vshard) first. 158 | 2. Install Autovshard. Autovshard depends only on Vshard. Replace `` with the [version](rockspecs) you want to install: 159 | ``` 160 | luarocks install "https://raw.githubusercontent.com/bofm/tarantool-autovshard/master/rockspecs/autovshard--1.rockspec" 161 | ``` 162 | or 163 | ``` 164 | tarantoolctl rocks install "https://raw.githubusercontent.com/bofm/tarantool-autovshard/master/rockspecs/autovshard--1.rockspec" 165 | ``` 166 | 167 | ## How it works 168 | 169 | Internally Autovshard does 2 things (which are almost independent of each other): 170 | * Watch the config in Consul and apply it as soon as it changes. Whatever the config is, it is converted to Vshard config and passed to `vshard.storage.cfg()` and `vshard.router.cfg()` according to the parameters of the Autovshard Tarantool config. If Consul is unreachable, Autovshard sets the Tarantool instance to read-only mode to avoid having multiple master instances in a replicaset (this feature is called *fencing*). 171 | * Maintain master election with a distributed lock and change the config in Consul when the lock is acquired. This is done only on Vshard storage instances when `automaster` is enabled. **Autovshard only changes `master` field** of the Autovshard Consul config. 172 | 173 | You can check out [CI e2e tests logs](https://travis-ci.org/bofm/tarantool-autovshard/builds) to get familiar with what Autovshard prints to the Tarantool log in different situations. 174 | 175 | ## Notes on Consul 176 | 177 | It is recommended to run Consul agent on each server with Tarantool instances and set `consul_http_address` to the address of the agent on localhost. 178 | 179 | ## TODO 180 | 181 | - [x] More testing 182 | - [x] Integration testing and CI 183 | - [x] e2e tests with Gherkin and BDD 184 | - [ ] Improve logging 185 | - [ ] See todo's in the sources 186 | -------------------------------------------------------------------------------- /autovshard-scm-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "autovshard" 2 | version = "scm-1" 3 | source = { 4 | url = "git://github.com/bofm/tarantool-autovshard.git", 5 | } 6 | description = { 7 | summary = "autovshard", 8 | detailed = [[ 9 | Vshard wrapper with automatic master election, failover and centralized 10 | configuration storage in Consul. 11 | ]], 12 | homepage = "https://github.com/bofm/tarantool-autovshard", 13 | } 14 | dependencies = { 15 | "lua >= 5.1", 16 | } 17 | build = { 18 | type = "builtin", 19 | modules = { 20 | ["autovshard"] = "autovshard/init.lua", 21 | ["autovshard.util"] = "autovshard/util.lua", 22 | ["autovshard.consul"] = "autovshard/consul.lua", 23 | ["autovshard.wlock"] = "autovshard/wlock.lua", 24 | ["autovshard.config"] = "autovshard/config.lua", 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /autovshard/config.lua: -------------------------------------------------------------------------------- 1 | local yaml = require("yaml") 2 | 3 | local util = require("autovshard.util") 4 | 5 | local M = {} 6 | 7 | function M.get_replicaset_params(autovshard_cfg, replicaset_uuid) 8 | return util.table_get(autovshard_cfg, {"sharding", replicaset_uuid}) 9 | end 10 | 11 | -- sets master=true in autovshard config in Consul for a given instance_uuid 12 | -- and master=false for all other instance_uuids 13 | function M.promote_to_master(autovshard_cfg, replicaset_uuid, instance_uuid) 14 | new_cfg = table.deepcopy(autovshard_cfg) 15 | -- config yaml: 16 | -- 17 | -- sharding: 18 | -- cb0e44ec-a468-4bcb-b6ff-341899c87d7c: 19 | -- replicas: 20 | -- 6dee1389-3984-4744-8ae1-6be55a92f66f: 21 | -- master_weight: 10 22 | -- switchover_delay: 10 23 | -- # master: true 24 | -- address: 127.0.0.1:3303 25 | -- name: t1 26 | -- 27 | local replicaset_params = M.get_replicaset_params(autovshard_cfg, replicaset_uuid) 28 | for replica_uuid, _ in pairs(replicaset_params.replicas) do 29 | new_cfg["sharding"][replicaset_uuid]["replicas"][replica_uuid]["master"] = 30 | replica_uuid == instance_uuid 31 | end 32 | return new_cfg 33 | end 34 | 35 | function M.set_instance_read_only(cfg, instance_uuid) 36 | local new_cfg = table.deepcopy(cfg) 37 | local changed = false 38 | for rs_uuid, rs in pairs(cfg.sharding) do 39 | for rs_param, rs_param_value in pairs(rs) do 40 | if rs_param == "replicas" then 41 | for replica_uuid, replica_params in pairs(rs_param_value) do 42 | if replica_uuid == instance_uuid and replica_params.master then 43 | new_cfg["sharding"][rs_uuid]["replicas"][replica_uuid]["master"] = false 44 | changed = true 45 | end 46 | end 47 | end 48 | end 49 | end 50 | return changed, new_cfg 51 | end 52 | 53 | function M.get_instance_params(autovshard_cfg, instance_uuid) 54 | for _, rs in pairs(autovshard_cfg.sharding) do 55 | for rs_param, rs_param_value in pairs(rs) do 56 | if rs_param == "replicas" then 57 | for replica_uuid, instance_params in pairs(rs_param_value) do 58 | if replica_uuid == instance_uuid then return instance_params end 59 | end 60 | end 61 | end 62 | end 63 | end 64 | 65 | function M.get_master_weight(autovshard_cfg, instance_uuid) 66 | local params = M.get_instance_params(autovshard_cfg, instance_uuid) 67 | return params and params.master_weight or 0 68 | end 69 | 70 | function M.get_switchover_delay(autovshard_cfg, instance_uuid) 71 | local params = M.get_instance_params(autovshard_cfg, instance_uuid) 72 | return params and params.switchover_delay 73 | end 74 | 75 | function M.is_master(autovshard_cfg, instance_uuid) 76 | local params = M.get_instance_params(autovshard_cfg, instance_uuid) 77 | return params ~= nil and params.master == true 78 | end 79 | 80 | function M.master_count(autovshard_cfg, replicaset_uuid) 81 | local rs = autovshard_cfg.sharding[replicaset_uuid] 82 | if not rs or not rs.replicas then return 0 end 83 | local master_count = 0 84 | for _, replica_params in pairs(rs.replicas) do 85 | if replica_params.master == true then -- 86 | master_count = master_count + 1 87 | end 88 | end 89 | return master_count 90 | end 91 | 92 | ---make_vshard_config 93 | ---@param autovshard_cfg table 94 | ---@param login string 95 | ---@param password string 96 | ---@param box_cfg table 97 | ---@return table 98 | function M.make_vshard_config(autovshard_cfg, login, password, box_cfg) 99 | local cfg = table.deepcopy(box_cfg) 100 | autovshard_cfg = table.deepcopy(autovshard_cfg) 101 | local sharding = autovshard_cfg.sharding 102 | -- sharding: 103 | -- cb0e44ec-a468-4bcb-b6ff-341899c87d7c: 104 | -- replicas: 105 | -- 6dee1389-3984-4744-8ae1-6be55a92f66f: 106 | -- master_weight: 10 107 | -- switchover_delay: 10 108 | -- # master: true 109 | -- address: 127.0.0.1:3303 110 | -- name: t1 111 | autovshard_cfg.sharding = nil 112 | util.table_update(cfg, autovshard_cfg) 113 | cfg.sharding = {} 114 | for rs_uuid, rs in pairs(sharding) do 115 | for rs_param, rs_param_value in pairs(rs) do 116 | if rs_param == "replicas" then 117 | for replica_uuid, replica in pairs(rs_param_value) do 118 | for replica_param, replica_param_value in pairs(replica) do 119 | if replica_param == "address" then 120 | util.table_set(cfg.sharding, {rs_uuid, rs_param, replica_uuid, "uri"}, 121 | string.format("%s:%s@%s", login, password, 122 | replica_param_value)) 123 | elseif replica_param == "switchover_delay" then 124 | -- Skip. This is an autovshard parameter. Not relevant for vshard. 125 | elseif replica_param == "master_weight" then 126 | -- Skip. This is an autovshard parameter. Not relevant for vshard. 127 | else 128 | util.table_set(cfg.sharding, 129 | {rs_uuid, rs_param, replica_uuid, replica_param}, 130 | replica_param_value) 131 | end 132 | end 133 | if replica_uuid == cfg.instance_uuid then 134 | cfg.replicaset_uuid = rs_uuid 135 | end 136 | end 137 | else 138 | util.table_set(cfg.sharding, {rs_uuid, rs_param}, rs_param_value) 139 | end 140 | end 141 | end 142 | return cfg 143 | end 144 | 145 | M.decode = yaml.decode 146 | M.encode = util.yaml_encode_pretty_mapping 147 | 148 | return M 149 | -------------------------------------------------------------------------------- /autovshard/consul.lua: -------------------------------------------------------------------------------- 1 | local json = require("json") 2 | local yaml = require("yaml") 3 | local digest = require("digest") 4 | local http = require("http.client") 5 | local fiber = require("fiber") 6 | local log = require("log") 7 | 8 | local util = require("autovshard.util") 9 | 10 | local _ 11 | 12 | -- in seconds 13 | local DEFAULT_WAIT = 20 14 | local HTTP_TIMEOUT = 2 15 | local RETRY_TIMEOUT = 2 16 | local DEFAULT_WATCH_RATE_LIMIT = 1 17 | local DEFAULT_WATCH_RATE_LIMIT_BURST = 10 18 | local DEFAULT_WATCH_RATE_LIMIT_INIT_BURST = 5 19 | 20 | local KV = {} 21 | KV.__index = KV 22 | 23 | local kv_valid_keys = { 24 | create_index = 0, 25 | modify_index = 0, 26 | lock_index = 0, 27 | key = 0, 28 | flags = 0, 29 | value = 0, 30 | session = 0, 31 | } 32 | 33 | function KV.__eq(self, other) 34 | for k, _ in pairs(kv_valid_keys) do if self[k] ~= other[k] then return false end end 35 | return true 36 | end 37 | 38 | function KV.new(kv) 39 | assert(type(kv) == "table", "KV.new parameter must be a table") 40 | assert(kv.key, "missing 'key' field") 41 | for k, _ in pairs(kv) do assert(kv_valid_keys[k], string.format("unexpected key: %s", k)) end 42 | return setmetatable(table.copy(kv), KV) 43 | end 44 | 45 | function KV.from_consul_response(kv) 46 | --[[ 47 | https://www.consul.io/api/kv.html 48 | Sample response: 49 | { 50 | "CreateIndex": 100, 51 | "ModifyIndex": 200, 52 | "LockIndex": 200, 53 | "Key": "zip", 54 | "Flags": 0, 55 | "Value": "dGVzdA==", 56 | "Session": "adf4238a-882b-9ddc-4a9d-5b6758e4159e" 57 | } 58 | ]] -- 59 | 60 | local value 61 | if kv.Value ~= nil then -- 62 | value = digest.base64_decode(kv.Value) 63 | end 64 | 65 | return KV.new{ 66 | create_index = kv.CreateIndex, 67 | modify_index = kv.ModifyIndex, 68 | lock_index = kv.LockIndex, 69 | key = kv.Key, 70 | flags = kv.Flags, 71 | value = value, 72 | session = kv.Session, 73 | } 74 | end 75 | 76 | local function url_params(params) 77 | local res = {} 78 | local sep = "?" 79 | for k, v in pairs(params) do 80 | table.insert(res, sep) 81 | table.insert(res, k) 82 | table.insert(res, "=") 83 | table.insert(res, v) 84 | sep = "&" 85 | end 86 | return table.concat(res) 87 | end 88 | 89 | local ConsulClient = {} 90 | ConsulClient.__index = ConsulClient 91 | 92 | function ConsulClient:put(key, value, cas, acquire) 93 | local response = self.request{ 94 | method = "PUT", 95 | url_path = {"kv", key}, 96 | body = value, 97 | params = {cas = cas or nil, acquire = acquire or nil}, 98 | } 99 | if response.status == 200 then 100 | local body = json.decode(response.body) 101 | return body == true 102 | else 103 | error(string.format("consul kv put error: %s", yaml.encode(response))) 104 | end 105 | end 106 | 107 | function ConsulClient:delete(key, cas) 108 | local response = self.request{ 109 | method = "DELETE", 110 | url_path = {"kv", key}, 111 | params = {cas = cas or nil}, 112 | } 113 | if response.status == 200 then 114 | local body = json.decode(response.body) 115 | assert(type(body) == "boolean") 116 | return body == true 117 | else 118 | error(string.format("consul kv delete error: %s", yaml.encode(response))) 119 | end 120 | end 121 | 122 | function ConsulClient:get(key, wait_seconds, index, prefix, consistent) 123 | wait_seconds = (wait_seconds or 0) 124 | 125 | -- https://www.consul.io/api/features/blocking.html 126 | -- A small random amount of additional wait time is added to the supplied 127 | -- maximum wait time to spread out the wake up time of any concurrent 128 | -- requests. This adds up to wait / 16 additional time to the maximum 129 | -- duration. 130 | wait_seconds = wait_seconds + wait_seconds / 16 131 | 132 | local response = self.request{ 133 | method = "GET", 134 | url_path = {"kv", key}, 135 | params = { 136 | wait = wait_seconds and (wait_seconds .. "s") or nil, 137 | index = index or nil, 138 | recurse = prefix and "" or nil, 139 | consistent = consistent and "" or nil, 140 | }, 141 | timeout = (wait_seconds or 0) + HTTP_TIMEOUT, 142 | } 143 | 144 | local new_index 145 | if response and response.headers and response.headers["x-consul-index"] then 146 | new_index = tonumber(response.headers["x-consul-index"]) 147 | end 148 | -- log.info("new_index %s", new_index) 149 | -- if response and response.headers then 150 | -- for k, v in pairs(response.headers) do 151 | -- log.info("response.headers %s %s", k, v) 152 | -- end 153 | -- end 154 | 155 | if new_index then 156 | if new_index <= 0 then 157 | -- https://www.consul.io/api/features/blocking.html 158 | -- Sanity check index is greater than zero 159 | error(string.format('Consul kv "%s" modify index=%d <= 0', key, new_index)) 160 | elseif index and new_index < index then 161 | -- https://www.consul.io/api/features/blocking.html 162 | -- Implementations must check to see if a returned index is lower than 163 | -- the previous value, and if it is, should reset index to 0 164 | new_index = 0 165 | end 166 | end 167 | 168 | if response.status == 200 then 169 | local body = json.decode(response.body) 170 | assert(type(body) == "table") 171 | assert(body[1], string.format("empty array in the response, %s", body)) 172 | assert(body[1].Value, string.format("missing Value field in the response, %s", body)) 173 | local result 174 | if prefix then 175 | result = {} 176 | for _, kv in ipairs(body) do 177 | table.insert(result, KV.from_consul_response(kv)) 178 | end 179 | else 180 | result = KV.from_consul_response(body[1]) 181 | end 182 | return result, new_index 183 | elseif response.status == 404 then 184 | return nil, new_index 185 | else 186 | error(string.format("consul kv get error: %s", yaml.encode(response))) 187 | end 188 | end 189 | 190 | local function watch_error(err) log.error("error in Consul watch: " .. tostring(err)) end 191 | 192 | --- Watch consul key 193 | -- @tparam table opts available options are: 194 | -- @tparam string opts.key key 195 | -- @tparam bool opts.prefix watch key prefix 196 | -- @tparam number opts.wait_seconds long polling wait time 197 | -- @tparam function opts.on_change 198 | -- @tparam ?function opts.on_error 199 | -- @tparam ?number opts.rate_limit 200 | -- @tparam ?number opts.rate_limit_burst 201 | -- @tparam ?index opts.index index for CAS operation 202 | -- @return[1] fiber 203 | -- @treturn[2] function a function to stop the watch fiber 204 | function ConsulClient:watch(opts) 205 | if not type(opts.key) == "string" then error("bad or missing key") end 206 | if not type(opts.on_change) == "function" then error("on_change missing or not a function") end 207 | if opts.on_error and type(opts.on_change) ~= "function" then 208 | error("on_error is not a function") 209 | end 210 | local wait_seconds = opts.wait_seconds or DEFAULT_WAIT 211 | 212 | -- see Rate Limit section on https://www.consul.io/api/features/blocking.html 213 | local rate_limit = opts and opts.rate_limit or DEFAULT_WATCH_RATE_LIMIT 214 | local rate_limit_burst = opts and opts.rate_limit_burst or DEFAULT_WATCH_RATE_LIMIT_BURST 215 | local rate_limit_init_burst = opts and opts.rate_limit_init_burst or 216 | DEFAULT_WATCH_RATE_LIMIT_INIT_BURST 217 | 218 | local key = opts.key 219 | local on_change = opts.on_change 220 | local on_error = opts.on_error or watch_error 221 | 222 | local prev_kv = {} 223 | local prev_index = opts.index 224 | local done_ch = fiber.channel() 225 | 226 | local got_error 227 | 228 | local function error_handler(err) 229 | got_error = true 230 | on_error(err) 231 | end 232 | 233 | local function get() 234 | if got_error then 235 | prev_index = 0 236 | done_ch:get(RETRY_TIMEOUT) 237 | if done_ch:is_closed() then return end 238 | end 239 | local kv, index = self:get(key, wait_seconds, prev_index, opts.prefix, opts.consistent) 240 | if done_ch:is_closed() then return end 241 | local changed = index ~= prev_index 242 | prev_kv, prev_index = kv, index 243 | if changed then on_change(kv, index) end 244 | got_error = false 245 | end 246 | 247 | get = util.rate_limited(get, rate_limit, rate_limit_burst, rate_limit_init_burst) 248 | 249 | local watcher = fiber.create(function() 250 | repeat xpcall(get, error_handler) until done_ch:is_closed() 251 | end) 252 | watcher:name("consul_watch_" .. key, {truncate = true}) 253 | 254 | return watcher, util.partial(done_ch.close, done_ch) 255 | end 256 | 257 | local function make_request(http_client, http_addresses, default_headers) 258 | assert(type(http_addresses) == "table", "baseurl must be a table") 259 | 260 | local function is_bad_response(response) 261 | return not response or not response.headers or 262 | (response.status and response.status >= 500 and response.status < 600) 263 | end 264 | 265 | local address_count = #http_addresses 266 | local next_address = util.cycle(http_addresses) 267 | local address = next_address() 268 | 269 | return function(options) 270 | assert(options.method, "method must be set") 271 | local url 272 | if options.url then 273 | url = util.urljoin(address, "v1", options.url) 274 | elseif options.url_path then 275 | assert(type(options.url_path) == "table" and #options.url_path > 0, 276 | "url_path must be a non-empty array") 277 | url = util.urljoin(address, "v1", unpack(options.url_path)) 278 | else 279 | error("url or url_path must be set") 280 | end 281 | local headers = table.copy(default_headers) 282 | if options.headers then headers = util.table_update(headers, options.headers) end 283 | if options.params then url = url .. url_params(options.params) end 284 | local body = options.body 285 | if options.json then 286 | body = json.encode(options.json) 287 | headers["Content-Type"] = "application/json" 288 | end 289 | local opts = {timeout = options.timeout or HTTP_TIMEOUT, headers = headers} 290 | if options.opts then util.table_update(opts, options.opts) end 291 | -- log.info(require("yaml").encode({"request", options.method, url, {body = body}, opts})) 292 | local response = util.ok_or_log_error(http_client.request, http_client, options.method, 293 | url, body, opts) 294 | if address_count > 1 and is_bad_response(response) then -- 295 | log.error("Got bad Consul HTTP response from %q. Will try another address.", address) 296 | address = next_address() 297 | end 298 | return response 299 | end 300 | end 301 | 302 | --- Create Consul client 303 | -- @tparam ?string|table consul_http_address 304 | -- @tparam table opts available options are: 305 | -- @tparam string opts.token 306 | -- @return consul client 307 | function ConsulClient.new(consul_http_address, opts) 308 | if consul_http_address == nil then 309 | consul_http_address = {"http://localhost:8500"} 310 | elseif type(consul_http_address) == "table" then 311 | -- ok 312 | elseif type(consul_http_address) == "string" then 313 | consul_http_address = {consul_http_address} 314 | else 315 | error("bad consul_http_address: " .. tostring(consul_http_address)) 316 | end 317 | if opts and type(opts) ~= "table" then error("opts must be a table or nil") end 318 | 319 | local c = {} 320 | 321 | c.token = opts and opts.token 322 | local default_headers = {["X-Consul-Token"] = opts and opts.token or nil} 323 | c.http_client = http.new({1}) 324 | c.http_address = consul_http_address 325 | c.request = make_request(c.http_client, consul_http_address, default_headers) 326 | assert(c.request) 327 | return setmetatable(c, ConsulClient) 328 | end 329 | 330 | local Session = {} 331 | Session.__index = Session 332 | 333 | function ConsulClient:session(ttl, behavior) 334 | local session = setmetatable({}, Session) 335 | session.consul = assert(self) 336 | session.behavior = behavior or "delete" 337 | session.ttl = assert(ttl, "bad session ttl") 338 | 339 | local response = self.request{ 340 | method = "PUT", 341 | url = "session/create", 342 | json = {["TTL"] = session.ttl .. "s", ["Behavior"] = session.behavior}, 343 | } 344 | if response.status == 200 then 345 | local body = json.decode(response.body) 346 | assert(type(body.ID) == "string", string.format( 347 | "could not create Consul session, missing or bad ID field in the response, %s", 348 | body)) 349 | session.id = body.ID 350 | else 351 | error(string.format("could not create Consul session, unknown response: %s %s", 352 | response.status, response.reason)) 353 | end 354 | 355 | return session 356 | end 357 | 358 | local function rstrip_s(str) 359 | if string.sub(str, string.len(str)) == "s" then -- 360 | return string.sub(str, 1, string.len(str) - 1) 361 | end 362 | return str 363 | end 364 | 365 | function Session:renew() 366 | local response = self.consul.request{method = "PUT", url_path = {"session/renew", self.id}} 367 | if response.status == 200 then 368 | return true, json.decode(response.body)[1] 369 | elseif response.status == 404 then 370 | return false, json.decode(response.body)[1] 371 | else 372 | error(string.format("could not renew Consul session %s, unknown response: %s %s", self.id, 373 | response.status, response.reason)) 374 | end 375 | local session_json = json.decode(response.body)[1] 376 | assert(session_json.ID == self.id) 377 | -- Note: Consul may return a TTL value higher than the one specified during session creation. 378 | -- This indicates the server is under high load and is requesting clients renew less often. 379 | self.ttl = 380 | tonumber(rstrip_s(assert(session_json.TTL, "missing TTL in session renew response"))) 381 | return session_json 382 | end 383 | 384 | function Session:delete() 385 | local response = self.consul.request{method = "PUT", url_path = {"session/destroy", self.id}} 386 | if response.status ~= 200 then 387 | error(string.format("could not delete Consul session %s, unknown response: %s %s", self.id, 388 | response.status, response.reason)) 389 | end 390 | local ok, response_json = pcall(json.decode, response.body) 391 | return ok and response_json == true 392 | end 393 | 394 | return {ConsulClient = ConsulClient, Session = Session, KV = KV} 395 | -------------------------------------------------------------------------------- /autovshard/init.lua: -------------------------------------------------------------------------------- 1 | local yaml = require("yaml") 2 | local log = require("log") 3 | local fiber = require("fiber") 4 | 5 | local vshard = require("vshard") 6 | local consul = require("autovshard.consul") 7 | local util = require("autovshard.util") 8 | local wlock = require("autovshard.wlock") 9 | local config = require("autovshard.config") 10 | 11 | local CONSUL_CONFIG_KEY = "autovshard_cfg_yaml" 12 | local ERR_FIBER_CANCELLED = "fiber is cancelled" 13 | 14 | -- events 15 | local EVENT_STOP = "STOP" 16 | local EVENT_LOCK_LOCKED = "LOCK_LOCKED" 17 | local EVENT_LOCK_RELEASED = "LOCK_RELEASED" 18 | local EVENT_NEW_CONFIG = "NEW_CONFIG" 19 | local EVENT_CONFIG_REMOVED = "CONFIG_REMOVED" 20 | local EVENT_CONSUL_ERROR = "CONSUL_ERROR" 21 | 22 | local function lock_manager(events, lock) 23 | local done 24 | local ok, err = pcall(function() 25 | log.info("autovshard: lock manager started") 26 | while true do 27 | local locked 28 | done = fiber.channel() 29 | locked = lock:acquire(done) 30 | if locked then 31 | log.info("autovshard: lock acquired") 32 | events:put{EVENT_LOCK_LOCKED} 33 | done:get() 34 | log.info("autovshard: lock released") 35 | events:put{EVENT_LOCK_RELEASED} 36 | end 37 | end 38 | end) 39 | if done ~= nil then done:close() end 40 | if not ok and tostring(err) ~= ERR_FIBER_CANCELLED then 41 | log.error("autovshard: lock manager error: %s", err) 42 | end 43 | log.info("autovshard: lock manager stopped") 44 | end 45 | 46 | local function watch_config(events, consul_client, consul_kv_config_path) 47 | local _, stop_watching = consul_client:watch{ 48 | key = consul_kv_config_path, 49 | on_change = function(kv) 50 | if kv and kv.value then 51 | local autovshard_cfg = util.ok_or_log_error(config.decode, kv.value) 52 | if autovshard_cfg then 53 | autovshard_cfg = autovshard_cfg 54 | events:put{EVENT_NEW_CONFIG, {autovshard_cfg, kv.modify_index}} 55 | end 56 | else 57 | -- config removed 58 | log.error("autovshard: config not found in Consul") 59 | events:put{EVENT_CONFIG_REMOVED} 60 | end 61 | end, 62 | on_error = function(err) 63 | log.error("autovshard: config watch error: %s", err) 64 | events:put{EVENT_CONSUL_ERROR, err} 65 | end, 66 | consistent = true, 67 | } 68 | return stop_watching 69 | end 70 | 71 | local Autovshard = {} 72 | Autovshard.__index = Autovshard 73 | 74 | function Autovshard:_validate_opts(opts) 75 | assert(type(opts) == "table", "opts must be a table") 76 | 77 | assert(type(opts.router) == "boolean", "missing or bad router parameter") 78 | assert(type(opts.storage) == "boolean", "missing or bad storage parameter") 79 | assert(opts.router or opts.storage, "at least one of [router, storage] must be true") 80 | 81 | -- validate box_cfg 82 | assert(type(opts.box_cfg) == "table", "missing or bad box_cfg parameter") 83 | if opts.storage then 84 | assert(type(opts.box_cfg.instance_uuid) == "string", 85 | "missing or bad box_cfg.instance_uuid parameter") 86 | assert(type(opts.box_cfg.replicaset_uuid) == "string", 87 | "missing or bad box_cfg.replicaset_uuid parameter") 88 | end 89 | 90 | assert(type(opts.cluster_name == "string"), "missing or bad cluster_name parameter") 91 | assert(type(opts.password) == "string", "missing or bad password parameter") 92 | assert(type(opts.login) == "string", "missing or bad login parameter") 93 | 94 | if type(opts.consul_http_address) == "table" then 95 | for _, a in pairs(opts.consul_http_address) do 96 | assert(type(a) == "string", "missing or bad consul_http_address parameter") 97 | end 98 | else 99 | assert(type(opts.consul_http_address) == "string", 100 | "missing or bad consul_http_address parameter") 101 | end 102 | assert(type(opts.consul_token) == "string" or opts.consul_token == nil, 103 | "bad consul_token parameter") 104 | assert(type(opts.consul_kv_prefix) == "string", "missing or bad consul_kv_prefix parameter") 105 | 106 | assert(opts.consul_session_ttl == nil or 107 | (type(opts.consul_session_ttl) == "number" and opts.consul_session_ttl >= 10 and 108 | opts.consul_session_ttl <= 86400), 109 | "consul_session_ttl must be a number between 10 and 86400") 110 | end 111 | 112 | ---@tparam table opts available options are 113 | ---@tparam table opts.box_cfg table 114 | ---@tparam string opts.cluster_name 115 | ---@tparam string opts.login 116 | ---@tparam string opts.password 117 | ---@tparam string|table opts.consul_http_address 118 | ---@tparam string opts.consul_token 119 | ---@tparam string opts.consul_kv_prefix 120 | ---@tparam boolean opts.router 121 | ---@tparam boolean opts.storage 122 | ---@tparam boolean opts.automaster 123 | ---@tparam number opts.consul_session_ttl 124 | --- 125 | function Autovshard.new(opts) 126 | local self = setmetatable({}, Autovshard) 127 | self:_validate_opts(opts) 128 | 129 | -- immutable attributes 130 | self.box_cfg = opts.box_cfg 131 | self.cluster_name = opts.cluster_name 132 | self.password = opts.password 133 | self.login = opts.login 134 | self.consul_http_address = opts.consul_http_address 135 | self.consul_token = opts.consul_token 136 | self.consul_kv_prefix = opts.consul_kv_prefix 137 | self.router = opts.router and true or false 138 | self.storage = opts.storage and true or false 139 | self.automaster = opts.automaster and true or false 140 | self.consul_session_ttl = opts.consul_session_ttl or 15 141 | 142 | self.consul_kv_config_path = util.urljoin(self.consul_kv_prefix, self.cluster_name, 143 | CONSUL_CONFIG_KEY) 144 | 145 | self.consul_client = consul.ConsulClient.new(self.consul_http_address, 146 | {token = self.consul_token}) 147 | return self 148 | end 149 | 150 | function Autovshard:_vshard_apply_config(vshard_cfg) 151 | local config_yaml = yaml.encode{cfg = vshard_cfg} 152 | 153 | -- sanitize config, replace passwords 154 | -- uri: username:password@host:3301 155 | config_yaml = config_yaml:gsub("(uri%s*:[^:]+:)[^@]+(@)", "%1%2") 156 | 157 | log.info("autovshard: applying vshard config:\n" .. config_yaml) 158 | if self.storage then 159 | util.ok_or_log_error(vshard.storage.cfg, vshard_cfg, self.box_cfg.instance_uuid) 160 | util.ok_or_log_error(vshard.storage.rebalancer_wakeup) 161 | end 162 | if self.router then 163 | util.ok_or_log_error(vshard.router.cfg, vshard_cfg) 164 | util.ok_or_log_error(vshard.router.bootstrap) 165 | util.ok_or_log_error(vshard.router.discovery_wakeup) 166 | end 167 | end 168 | 169 | function Autovshard:_set_instance_read_only(autovshard_cfg) 170 | local _, already_read_only = pcall(function() return box.info().ro end) 171 | if already_read_only then return end 172 | 173 | local vshard_cfg = config.make_vshard_config(autovshard_cfg, self.login, self.password, 174 | self.box_cfg) 175 | local changed, new_vshard_cfg = config.set_instance_read_only(vshard_cfg, 176 | self.box_cfg.instance_uuid) 177 | if changed then 178 | log.info("autovshard: setting instance to read-only...") 179 | self:_vshard_apply_config(new_vshard_cfg) 180 | end 181 | end 182 | 183 | function Autovshard:_promote_to_master(autovshard_cfg, cfg_modify_index) 184 | log.info("autovshard: promoting this Tarantool instance_uuid=%q to master", 185 | self.box_cfg.instance_uuid) 186 | local new_cfg = config.promote_to_master(autovshard_cfg, self.box_cfg.replicaset_uuid, 187 | self.box_cfg.instance_uuid) 188 | 189 | -- update autovshard config in Consul 190 | local ok = util.ok_or_log_error(self.consul_client.put, self.consul_client, 191 | self.consul_kv_config_path, config.encode(new_cfg), 192 | cfg_modify_index) 193 | if not ok then 194 | log.error("autovshard: failed promoting this Tarantool " .. 195 | "instance_uuid=%q to master, will retry later", self.box_cfg.instance_uuid) 196 | end 197 | return ok 198 | end 199 | 200 | function Autovshard:_mainloop() 201 | self.events = fiber.channel() 202 | 203 | local cfg 204 | local cfg_modify_index 205 | 206 | local lock 207 | local lock_fiber 208 | 209 | local locked = false 210 | local bootstrap_done = false 211 | 212 | local stop_watch_config = watch_config(self.events, self.consul_client, 213 | self.consul_kv_config_path) 214 | 215 | while true do 216 | -- ! To avoid deadlock DO NOT put into `self.events` channel IN THIS FIBER. 217 | -- * Only get events from `self.events` channel and react to the events. 218 | local msg = self.events:get() 219 | local event, data = unpack(msg) 220 | log.info("autovshard: got event: %s", event) 221 | 222 | if event == EVENT_STOP then 223 | self.events:close() 224 | stop_watch_config() 225 | if lock_fiber then pcall(lock_fiber.cancel, lock_fiber) end 226 | break 227 | elseif event == EVENT_LOCK_LOCKED then 228 | locked = true 229 | self:_promote_to_master(cfg, cfg_modify_index) 230 | elseif event == EVENT_LOCK_RELEASED then 231 | locked = false 232 | elseif event == EVENT_CONSUL_ERROR then 233 | if self.storage and cfg and bootstrap_done then 234 | self:_set_instance_read_only(cfg) 235 | end 236 | elseif event == EVENT_NEW_CONFIG then 237 | cfg, cfg_modify_index = unpack(data) 238 | assert(cfg, "autovshard: missing cfg in EVENT_NEW_CONFIG") 239 | assert(cfg_modify_index, "autovshard: missing cfg_modify_index in EVENT_NEW_CONFIG") 240 | 241 | -- reconfigure vshard 242 | 243 | local allow_apply_config = true 244 | 245 | if self.storage then 246 | 247 | if config.get_instance_params(cfg, self.box_cfg.instance_uuid) == nil then 248 | -- For storage instances we need to verify that the instance 249 | -- is present in the cluster config, otherwise the instance 250 | -- may boot as a separate cluster 251 | 252 | allow_apply_config = false 253 | 254 | log.info("autovshasrd: won't apply the config, instance_uuid=%q ".. 255 | "not found in consul config", self.box_cfg.instance_uuid) 256 | elseif not bootstrap_done and config.master_count(cfg, self.box_cfg.replicaset_uuid) ~= 1 then 257 | -- For storage instances we need to check for bootstrap_done to 258 | -- handle the case when this is the first ever call to 259 | -- vshard.storage.cfg(cfg) on current instance. 260 | -- If there is no master defined in cfg for the current 261 | -- replica set, then vshard.storage.cfg call will block forever 262 | -- with this messages in log: 263 | -- 264 | -- E> ER_LOADING: Instance bootstrap hasn't finished yet 265 | -- I> will retry every 1.00 second 266 | -- 267 | -- During bootstrap we should first elect master, so we ignore 268 | -- the config when no master is set for current replica set. 269 | 270 | allow_apply_config = false 271 | 272 | log.info("autovshasrd: won't apply the config, master_count != 1, " .. 273 | "cannot bootstrap with this config.") 274 | end 275 | 276 | end 277 | 278 | -- [TODO] do not apply new config if it is the same as the current one 279 | if allow_apply_config then 280 | self:_vshard_apply_config(config.make_vshard_config(cfg, self.login, self.password, 281 | self.box_cfg)) 282 | 283 | bootstrap_done = true 284 | end 285 | 286 | 287 | if self.storage and self.automaster then 288 | -- maybe update lock weight 289 | local lock_weight = assert( 290 | config.get_master_weight(cfg, self.box_cfg.instance_uuid), 291 | "cannot get master weight") 292 | if lock and lock_weight ~= lock.weight then 293 | util.ok_or_log_error(lock.set_weight, lock, lock_weight) 294 | end 295 | 296 | -- maybe update lock delay 297 | local lock_delay = config.get_switchover_delay(cfg, self.box_cfg.instance_uuid) or 298 | 0 299 | if lock and lock_delay ~= lock.delay then 300 | util.ok_or_log_error(lock.set_delay, lock, lock_delay) 301 | end 302 | 303 | -- start lock manager 304 | if not lock_fiber then 305 | local lock_prefix = util.urljoin(self.consul_kv_prefix, self.cluster_name, 306 | self.box_cfg.replicaset_uuid) 307 | lock = wlock.WLock.new(self.consul_client, lock_prefix, lock_weight, 308 | lock_delay, 309 | {instance_uuid = self.box_cfg.instance_uuid}, 310 | self.consul_session_ttl) 311 | 312 | lock_fiber = fiber.new(util.ok_or_log_error, lock_manager, self.events, lock) 313 | lock_fiber:name("autovshard_lock_manager", {truncate = true}) 314 | end 315 | 316 | log.debug("autovshard: locked: %s, is_master: %s", locked, 317 | config.is_master(cfg, self.box_cfg.instance_uuid)) 318 | if locked and (not config.is_master(cfg, self.box_cfg.instance_uuid) or 319 | not config.master_count(cfg, self.box_cfg.replicaset_uuid) == 1) then 320 | self:_promote_to_master(cfg, cfg_modify_index) 321 | end 322 | end 323 | elseif event == EVENT_CONFIG_REMOVED then 324 | if self.storage and cfg and bootstrap_done then 325 | self:_set_instance_read_only(cfg) 326 | end 327 | end 328 | end 329 | end 330 | 331 | function Autovshard:stop() 332 | if not self.started then return false end 333 | self.events:put{EVENT_STOP} 334 | self.started = false 335 | return true 336 | end 337 | 338 | function Autovshard:start() 339 | if self.started then return false end 340 | self.started = true 341 | fiber.create(self._mainloop, self) 342 | return true 343 | end 344 | 345 | return {Autovshard = Autovshard, _VERSION = "1.0.2"} 346 | -------------------------------------------------------------------------------- /autovshard/util.lua: -------------------------------------------------------------------------------- 1 | local yaml = require("yaml") 2 | local fiber = require("fiber") 3 | local log = require("log") 4 | 5 | local _ 6 | local util = {} 7 | 8 | -- token bucket rate limiter 9 | function util.rate_limited(fn, rate, burst, initial_burst) 10 | assert(rate > 0, "rate must be > 0") 11 | assert(rate < 1000, "too high rate, this will not work well") 12 | assert(burst >= 0 and burst <= 1000, "burst must be between 0 and 1000") 13 | initial_burst = initial_burst or 0 14 | assert(initial_burst <= burst, "initial_burst must be <= burst") 15 | 16 | local throttle = fiber.channel(burst) 17 | for _ = 1, initial_burst do throttle:put(1) end 18 | 19 | fiber.create(function() 20 | local sleep_time = 1 / rate 21 | while throttle:put(1) do fiber.sleep(sleep_time) end 22 | end) 23 | 24 | local weakrefs = setmetatable({}, {__mode = "v"}) 25 | 26 | fiber.new(function() 27 | while weakrefs.f do fiber.sleep(1) end 28 | throttle:close() 29 | end) 30 | 31 | local function wrapper(...) 32 | throttle:get() 33 | return fn(...) 34 | end 35 | 36 | weakrefs.f = wrapper 37 | 38 | return wrapper 39 | end 40 | 41 | local function loop(stopped, fn, ...) repeat fn(...) until stopped() end 42 | 43 | util.loop = loop 44 | 45 | function util.fiber_loop(id, fn, ...) 46 | local stop_flag = false 47 | local function stopped() return stop_flag end 48 | 49 | local function stop() stop_flag = true end 50 | 51 | local f = fiber.create(util.loop, stopped, fn, ...) 52 | f:name("loop_" .. id, {truncate = true}) 53 | return f, stop 54 | end 55 | 56 | function util.table_update(t1, t2) for k, v in pairs(t2) do t1[k] = v end end 57 | 58 | function util.urljoin(...) 59 | local parts = {...} 60 | local new_parts = {} 61 | for _, part in ipairs(parts) do 62 | part = part:gsub("/*$", ""):gsub("^/*", "") 63 | table.insert(new_parts, part) 64 | end 65 | return table.concat(new_parts, "/") 66 | end 67 | 68 | function util.pluck(records, key) 69 | local res = {} 70 | for k, v in pairs(records) do table.insert(res, v[key]) end 71 | return res 72 | end 73 | 74 | function util.has(t, member) 75 | for _, m in pairs(t) do if m == member then return true end end 76 | return false 77 | end 78 | 79 | function util.keys(t) 80 | local keys = {} 81 | for key, _ in pairs(t) do table.insert(keys, key) end 82 | return keys 83 | end 84 | 85 | function util.table_set(t, path, value) 86 | for i, k in ipairs(path) do 87 | if i == #path then 88 | t[k] = value 89 | else 90 | local v = t[k] 91 | if v == nil then 92 | v = {} 93 | t[k] = v 94 | end 95 | t = v 96 | end 97 | end 98 | end 99 | 100 | function util.table_get(t, path, default) 101 | for _, k in ipairs(path) do 102 | t = t[k] 103 | if t == nil then return default end 104 | end 105 | return t 106 | end 107 | 108 | local function partial(f, first_arg, ...) 109 | if first_arg == nil then return f end 110 | return partial(function(...) return f(first_arg, ...) end, ...) 111 | end 112 | 113 | util.partial = partial 114 | 115 | function util.compose(...) 116 | local fs = {...} 117 | local function recurse(i, ...) 118 | if i == 1 then return fs[i](...) end 119 | return recurse(i - 1, fs[i](...)) 120 | end 121 | return function(...) return recurse(#fs, ...) end 122 | end 123 | 124 | function util.ok_or_log_error(fn, ...) return select(2, xpcall(fn, log.error, ...)) end 125 | 126 | function util.select(channels_or_conds, timeout) 127 | local first_channel, first_message 128 | local cond = fiber.cond() 129 | local fibers = {} 130 | for _, c in ipairs(channels_or_conds) do 131 | table.insert(fibers, fiber.new(function() 132 | local msg 133 | if c.get then 134 | -- c is a fiber.channel 135 | msg = c:get() 136 | else 137 | -- c is a fiber.cond 138 | msg = c:wait() 139 | end 140 | if not first_channel then first_message, first_channel = msg, c end 141 | cond:signal() 142 | end)) 143 | end 144 | local ok, got_signal = pcall(cond.wait, cond, timeout) 145 | for _, f in ipairs(fibers) do pcall(f.cancel, f) end 146 | if ok and got_signal then return first_channel, first_message end 147 | return nil, "timeout" 148 | end 149 | 150 | function util.string_replace(s, what, with, n) 151 | while true do 152 | local b, e = s:find(what, 1, true) 153 | if b == nil then 154 | return s 155 | else 156 | s = s:sub(1, b - 1) .. with .. s:sub(e + 1) 157 | end 158 | end 159 | end 160 | 161 | local function with_mt_serialize(t, serialize, recursion_lvl) 162 | recursion_lvl = recursion_lvl or 0 163 | assert(recursion_lvl <= 20, "too deep table") 164 | if recursion_lvl == 0 then t = table.deepcopy(t) end 165 | for k, v in pairs(t) do 166 | if type(v) == "table" then -- 167 | with_mt_serialize(v, serialize, recursion_lvl + 1) 168 | end 169 | end 170 | return setmetatable(t, {__serialize = serialize}) 171 | end 172 | 173 | function util.yaml_encode_pretty_mapping(t) -- 174 | return yaml.encode(with_mt_serialize(t, "mapping")) 175 | end 176 | 177 | local function finalize(finalizer, ...) 178 | finalizer() 179 | return ... 180 | end 181 | 182 | function util.finally(finalizer, fn, ...) -- 183 | return finalize(finalizer, util.ok_or_log_error(fn, ...)) 184 | end 185 | 186 | function util.deepcompare(a, b) 187 | if type(b) == "number" or type(a) == "number" then 188 | if a ~= a and b ~= b then 189 | return true -- nan 190 | end 191 | return a == b 192 | end 193 | 194 | if type(a) == "boolean" then a = (a == 1) end 195 | if type(b) == "boolean" then b = (b == 1) end 196 | 197 | if type(a) ~= "table" or type(b) ~= "table" then return a == b end 198 | 199 | local visited_keys = {} 200 | 201 | for i, v in pairs(a) do 202 | visited_keys[i] = true 203 | if not util.deepcompare(v, b[i]) then return false end 204 | end 205 | 206 | -- check if expected contains more keys then got 207 | for i, _ in pairs(b) do if visited_keys[i] ~= true then return false end end 208 | 209 | return true 210 | end 211 | 212 | function util.cycle(t) 213 | local n = #t 214 | if n == 1 then 215 | local res = t[1] 216 | return function() return res end 217 | end 218 | local i = 0 219 | return function() 220 | i = i >= n and 1 or i + 1 221 | return t[i] 222 | end 223 | end 224 | 225 | return util 226 | -------------------------------------------------------------------------------- /autovshard/wlock.lua: -------------------------------------------------------------------------------- 1 | -- Wlock 2 | -- `W` stands for 2 things: 3 | -- 1. Weight. Wlock has weight. A contenter with higher weight gets the lock. 4 | -- 2. Wait. Wlock can be configured to wait for `delay` before acquiring the 5 | -- lock which is already held by other contender with lower weight. 6 | -- 7 | -- Inspired by: 8 | -- 9 | -- Consul KV store structure: 10 | -- /session1: value={weight:10}, session=session1 11 | -- /session2: value={weight:20}, session=session2 12 | -- /lock: value={holder:session2} 13 | -- 14 | -- The lock is considered held if there are: 15 | -- * a `session key` with an alive Consul session associated with the key 16 | -- * a `lock key` with the session (a Wlock holder) as the `holder` field value 17 | -- 18 | local json = require("json") 19 | local fiber = require("fiber") 20 | local log = require("log") 21 | local uuid = require("uuid") 22 | 23 | local util = require("autovshard.util") 24 | local _ 25 | 26 | local M = {} 27 | 28 | local CONSUL_LOCK_KEY = "lock" 29 | local CONSUL_DEFAULT_SESSION_TTL = 15 30 | local RETRY_TIMEOUT = 10 31 | 32 | local WLock = {} 33 | M.WLock = WLock 34 | WLock.__index = WLock 35 | 36 | ---@param consul_client any 37 | ---@param kv_prefix string 38 | ---@param weight number lock weight 39 | ---@param delay number delay in seconds to wait before taking the lock away from other contender 40 | ---@param info any a json/yaml serializable object to attach to the lock for information purpose 41 | ---@param session_ttl number Consul session TTL in seconds 42 | function WLock.new(consul_client, kv_prefix, weight, delay, info, session_ttl) 43 | self = setmetatable({}, WLock) 44 | self._consul_client = consul_client 45 | self._prefix = kv_prefix 46 | self._weight = weight 47 | self._delay = delay or 0 48 | self._info = info 49 | self._weight_updated = fiber.cond() -- for updating weight at runtime 50 | self._delay_updated = fiber.cond() -- for updating delay at runtime 51 | self._session_ttl = session_ttl or CONSUL_DEFAULT_SESSION_TTL 52 | return self 53 | end 54 | 55 | function WLock:_get_lock_kv(kvs) 56 | if kvs == nil then return end 57 | local LOCK_KEY = util.urljoin(self._prefix, CONSUL_LOCK_KEY) 58 | for _, kv in ipairs(kvs) do if kv.key == LOCK_KEY then return kv end end 59 | end 60 | 61 | -- @param kvs consul KV's 62 | -- @param prefix consul kv prefix 63 | -- @treturn[1] table contender_weights - a mapping of weights by contender 64 | -- @treturn[2] ?string holder session id of the current lock holder 65 | -- @treturn[3] number max_weight maximum weight of all the contenders 66 | function M.parse_kvs(kvs, prefix) 67 | local LOCK_KEY = util.urljoin(prefix, CONSUL_LOCK_KEY) 68 | 69 | local max_weight = 0 70 | local holder 71 | 72 | -- a map of session_id to weight 73 | local contender_weights = {} 74 | 75 | if kvs == nil then return contender_weights, holder, max_weight end 76 | 77 | local lock_value 78 | 79 | for _, kv in ipairs(kvs) do 80 | if kv.key == LOCK_KEY then 81 | -- the kv's value must be LockValue 82 | local ok, value = pcall(json.decode, kv.value) 83 | if ok then 84 | lock_value = value 85 | else 86 | log.error("cannot decode Consul lock key %q: value=%q, error=%q", kv.key, kv.value, 87 | lock_value) 88 | end 89 | else 90 | -- need to check if the last key part is a valid UUID and if the key is 91 | -- locked with the session with the same id as the last key part 92 | local session_id = string.sub(kv.key, string.len(prefix) + 2) 93 | if pcall(uuid.fromstr, session_id) and kv.session == session_id then 94 | local ok, value = pcall(json.decode, kv.value) 95 | if not ok then 96 | log.error("cannot decode Consul key %q: value=%q, error=%q", kv.key, kv.value, 97 | value) 98 | end 99 | if ok and type(value.weight) ~= "number" then 100 | log.error("missing weight in Consul contender key %q: value=%q", kv.key, 101 | kv.value) 102 | end 103 | if ok and value and value.weight and type(value.weight) == "number" then 104 | contender_weights[session_id] = value.weight 105 | max_weight = math.max(max_weight, value.weight) 106 | end 107 | end 108 | end 109 | end 110 | 111 | if lock_value and lock_value.holder and contender_weights[lock_value.holder] then 112 | holder = lock_value.holder 113 | end 114 | 115 | return contender_weights, holder, max_weight 116 | end 117 | 118 | function WLock:_create_session(done_ch, info) 119 | info = info or self._info 120 | local session 121 | while not done_ch:is_closed() do 122 | session = util.ok_or_log_error(self._consul_client.session, self._consul_client, 123 | self._session_ttl, "delete") 124 | if session then 125 | log.info("created Consul session %q", session.id) 126 | -- put contender key with acquire 127 | if self:_put_contender_key(session.id) then break end 128 | end 129 | done_ch:get(RETRY_TIMEOUT) 130 | end 131 | 132 | -- renew session in the background 133 | fiber.create(util.ok_or_log_error, self._renew_session_periodically, self, done_ch, session) 134 | return session 135 | end 136 | 137 | function WLock:_renew_session_periodically(done_ch, session) 138 | local weight = self._weight 139 | 140 | local tick = fiber.cond() 141 | 142 | local weight_change_waiter = fiber.new(function() 143 | while true do 144 | self._weight_updated:wait() 145 | tick:broadcast() 146 | end 147 | end) 148 | local done_waiter = fiber.new(function() 149 | done_ch:get() 150 | tick:broadcast() 151 | end) 152 | 153 | util.ok_or_log_error(function() 154 | while true do 155 | tick:wait(0.66 * session.ttl) 156 | if done_ch:is_closed() then break end 157 | if not util.ok_or_log_error(session.renew, session) then 158 | log.error("could not renew Consul session %q", session.id) 159 | -- if renew fails then we release the lock and return 160 | done_ch:close() 161 | end 162 | 163 | if self._weight ~= weight then 164 | if self:_put_contender_key(session.id) then 165 | weight = self._weight 166 | else 167 | log.error("could not put contentder key for Consul session %q", session.id) 168 | done_ch:close() 169 | end 170 | end 171 | end 172 | if util.ok_or_log_error(session.delete, session) then 173 | log.info("released lock and deleted Consul session %q", session.id) 174 | end 175 | end) 176 | pcall(done_waiter.cancel, done_waiter) 177 | pcall(weight_change_waiter.cancel, weight_change_waiter) 178 | end 179 | 180 | function WLock:_wait_ready_to_lock(done_ch, session_id) 181 | -- watch kv prefix and check if we can should attempt to acquire the lock 182 | local ready_to_lock = fiber.channel() 183 | 184 | local delay_f 185 | local can_lock_since 186 | local function ensure_delay_f_stopped() 187 | if delay_f ~= nil then 188 | pcall(delay_f.cancel, delay_f) 189 | delay_f = nil 190 | end 191 | end 192 | 193 | local function start_delay_f(kvs) 194 | delay_f = fiber.new(function() 195 | while true do 196 | local remaining_delay = math.max(0, can_lock_since + self._delay - fiber.time()) 197 | log.info( 198 | "(re)started lock delay: waiting %s seconds before acquiring lock with Consul session %q", 199 | math.floor(remaining_delay), session_id) 200 | local c, _ = util.select({done_ch, self._delay_updated}, remaining_delay) 201 | if c == done_ch then 202 | break 203 | elseif c == self._delay_updated then 204 | -- continue 205 | else 206 | ready_to_lock:put(kvs) 207 | break 208 | end 209 | end 210 | end) 211 | delay_f:name("lock_delay", {truncate = true}) 212 | end 213 | 214 | local function on_change(kvs) 215 | -- need to restart delay fiber on kvs change 216 | ensure_delay_f_stopped() 217 | 218 | local contender_weights, holder, max_weight = M.parse_kvs(kvs, self._prefix) 219 | 220 | -- check if we should preceed with the lock 221 | local can_lock = (contender_weights[session_id] or 0) >= max_weight and 222 | (not holder or (contender_weights[holder] or 0) < max_weight) 223 | 224 | if can_lock and holder ~= nil and self._delay > 0 then 225 | can_lock_since = can_lock_since or fiber.time() 226 | start_delay_f(kvs) 227 | elseif can_lock then 228 | ready_to_lock:put(kvs) 229 | elseif not can_lock then 230 | can_lock_since = nil 231 | end 232 | end 233 | 234 | local _, stop_watching = self._consul_client:watch{ 235 | key = self._prefix, 236 | prefix = true, 237 | on_change = on_change, 238 | consistent = true, 239 | } 240 | 241 | -- stop watching when we are done 242 | local watchdog = fiber.create(function() 243 | done_ch:get() 244 | stop_watching() 245 | end) 246 | 247 | -- wait until we are ready to lock or until done_ch is closed 248 | local ch, kvs = util.select({done_ch, ready_to_lock}) 249 | 250 | -- cleanup 251 | stop_watching() 252 | pcall(watchdog.cancel, watchdog) 253 | ready_to_lock:close() 254 | ensure_delay_f_stopped() 255 | ----------- 256 | 257 | if ch == ready_to_lock then -- 258 | log.info("ready to lock with Consul session %q", session_id) 259 | end 260 | return kvs 261 | end 262 | 263 | function WLock:_put_lock_key(session_id, kvs) 264 | local lock_key = util.urljoin(self._prefix, CONSUL_LOCK_KEY) 265 | local value = json.encode({holder = session_id, info = self._info}) 266 | local lock_kv = self:_get_lock_kv(kvs) 267 | local cas = 0 268 | if lock_kv then cas = lock_kv.modify_index end 269 | local put_ok = util.ok_or_log_error(self._consul_client.put, self._consul_client, lock_key, 270 | value, cas) 271 | if put_ok then 272 | log.info("acquired lock for Consul session %q", session_id) 273 | return true 274 | else 275 | return false 276 | end 277 | end 278 | 279 | function WLock:_put_contender_key(session_id) 280 | local key = util.urljoin(self._prefix, session_id) 281 | local value = json.encode({weight = self._weight, info = self._info}) 282 | local acquire = session_id 283 | local put_ok = util.ok_or_log_error(self._consul_client.put, self._consul_client, key, value, 284 | nil, acquire) 285 | if put_ok then 286 | log.info("put Consul contender key: session=%q weight=%q", session_id, self._weight) 287 | end 288 | return put_ok 289 | end 290 | 291 | function WLock:_hold_lock(done_ch, session_id) 292 | -- ? maybe merge this watch section into the previous one 293 | -- [todo]: delete lock key on cleanup 294 | -- [todo]: retry when monitoring lock 295 | 296 | -- watch lock key prefix 297 | local _, stop_watching = self._consul_client:watch{ 298 | key = self._prefix, 299 | prefix = true, 300 | on_change = function(kvs) 301 | -- wait until the lock session is invalidated or the lock key is changed 302 | local _, holder, _ = M.parse_kvs(kvs, self._prefix) 303 | -- check if we are still the holder 304 | if holder ~= session_id then 305 | log.info("lost lock for Consul session %q: holder changed", session_id) 306 | done_ch:close() 307 | end 308 | end, 309 | on_error = function(err) 310 | log.error("lock watch error for Consul session %q: %s", session_id, err) 311 | done_ch:close() 312 | end, 313 | consistent = true, 314 | } 315 | 316 | -- stop watching when we are done 317 | fiber.create(function() 318 | done_ch:get() 319 | stop_watching() 320 | end) 321 | end 322 | 323 | function WLock:set_weight(weight) 324 | self._weight = weight 325 | self._weight_updated:broadcast() 326 | end 327 | 328 | function WLock:set_delay(delay) 329 | self._delay = delay 330 | self._delay_updated:broadcast() 331 | end 332 | 333 | -- @param done_ch "done channel". It will be closed when the lock is released or invalidated. 334 | -- And vice-versa, if `done_ch` is closed, the lock gets released. 335 | -- @param info any info to attach to the contender key in Consul 336 | -- @treturn boolean whether the lock has been acquired 337 | function WLock:acquire(done_ch, info) 338 | -- [todo] delete lock key if there are no session keys 339 | -- "Done channel" must be closed if the lock is released or lost (probably due 340 | -- to Consul session invalidation or a network error). 341 | -- "Done channel" can also be closed by user. The function should return immediately 342 | -- in this case. 343 | assert(done_ch, "'done channel' must be passed as 1st parameter") 344 | local locked = false 345 | while not done_ch:is_closed() do 346 | -- We need to create session so that other contenders could take into 347 | -- account our weight. 348 | local session = self:_create_session(done_ch, info) 349 | if done_ch:is_closed() then break end 350 | local kvs = self:_wait_ready_to_lock(done_ch, session.id) 351 | if done_ch:is_closed() then break end 352 | if self:_put_lock_key(session.id, kvs) then 353 | self:_hold_lock(done_ch, session.id) 354 | locked = true 355 | break 356 | end 357 | end 358 | return locked 359 | end 360 | 361 | return M 362 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.7' 2 | 3 | x-common: &common 4 | depends_on: 5 | - consul 6 | volumes: 7 | - ./examples/storage.lua:/opt/tarantool/storage.lua:ro 8 | - ./examples/router.lua:/opt/tarantool/router.lua:ro 9 | - ./autovshard:/usr/share/tarantool/autovshard:ro 10 | - ./tests:/opt/tarantool/tests:ro 11 | - ./scripts:/opt/tarantool/scripts:ro 12 | - ./.luacov:/opt/tarantool/.luacov:ro 13 | - ./output:/opt/tarantool/output 14 | build: . 15 | image: tnt-autovshard 16 | entrypoint: '' 17 | working_dir: /opt/tarantool 18 | command: 19 | - tarantool 20 | - storage.lua 21 | 22 | services: 23 | a1: 24 | <<: *common 25 | ports: 26 | - 3301:3301 27 | hostname: a1 28 | environment: 29 | CONSUL_HTTP_ADDR: "http://consul:8500" 30 | TARANTOOL_INSTANCE_UUID: aaaaaaaa-aaaa-aaaa-aaaa-000000000001 31 | TARANTOOL_REPLICASET_UUID: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa 32 | 33 | a2: 34 | <<: *common 35 | ports: 36 | - 3302:3301 37 | hostname: a2 38 | environment: 39 | CONSUL_HTTP_ADDR: "http://consul:8500" 40 | TARANTOOL_INSTANCE_UUID: aaaaaaaa-aaaa-aaaa-aaaa-000000000002 41 | TARANTOOL_REPLICASET_UUID: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa 42 | 43 | b1: 44 | <<: *common 45 | ports: 46 | - 3303:3301 47 | hostname: b1 48 | environment: 49 | CONSUL_HTTP_ADDR: "http://consul:8500" 50 | TARANTOOL_INSTANCE_UUID: bbbbbbbb-bbbb-bbbb-bbbb-000000000001 51 | TARANTOOL_REPLICASET_UUID: bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb 52 | 53 | b2: 54 | <<: *common 55 | ports: 56 | - 3304:3301 57 | hostname: b2 58 | environment: 59 | CONSUL_HTTP_ADDR: "http://consul:8500" 60 | TARANTOOL_INSTANCE_UUID: bbbbbbbb-bbbb-bbbb-bbbb-000000000002 61 | TARANTOOL_REPLICASET_UUID: bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb 62 | 63 | router: 64 | <<: *common 65 | hostname: router 66 | ports: 67 | - 3300:3301 68 | command: 69 | - tarantool 70 | - router.lua 71 | 72 | consul: 73 | image: consul:1.5.2 74 | volumes: 75 | - "./examples/consul_kv.sh:/consul_kv.sh:ro" 76 | ports: 77 | - 8500:8500 78 | command: 79 | - sh 80 | - -c 81 | - | 82 | 83 | ( 84 | sleep 2 85 | sh /consul_kv.sh 86 | )& 87 | 88 | consul agent -dev -client 0.0.0.0 89 | -------------------------------------------------------------------------------- /examples/consul_kv.sh: -------------------------------------------------------------------------------- 1 | consul kv put "autovshard/mycluster/autovshard_cfg_yaml" ' 2 | --- 3 | rebalancer_max_receiving: 10 4 | bucket_count: 100 5 | rebalancer_disbalance_threshold: 10 6 | sharding: 7 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 8 | weight: 10 9 | replicas: 10 | aaaaaaaa-aaaa-aaaa-aaaa-000000000001: 11 | master_weight: 99 12 | switchover_delay: 10 13 | address: a1:3301 14 | name: a1 15 | master: false 16 | aaaaaaaa-aaaa-aaaa-aaaa-000000000002: 17 | master_weight: 20 18 | switchover_delay: 10 19 | address: a2:3301 20 | name: a2 21 | master: false 22 | bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb: 23 | weight: 10 24 | replicas: 25 | bbbbbbbb-bbbb-bbbb-bbbb-000000000001: 26 | master_weight: 10 27 | switchover_delay: 10 28 | address: b1:3301 29 | name: b1 30 | master: false 31 | bbbbbbbb-bbbb-bbbb-bbbb-000000000002: 32 | master_weight: 55 33 | switchover_delay: 10 34 | address: b2:3301 35 | name: b2 36 | master: false 37 | 38 | ' 39 | #consul kv delete "autovshard/mycluster/autovshard_cfg_yaml" 40 | -------------------------------------------------------------------------------- /examples/router.lua: -------------------------------------------------------------------------------- 1 | local fiber = require("fiber") 2 | 3 | require("package.reload") 4 | 5 | vshard = require("vshard") 6 | 7 | local box_cfg = { 8 | listen = 3301, 9 | wal_mode = "none", 10 | feedback_enabled = false, 11 | replication_connect_quorum = 0, 12 | replication_connect_timeout=1, 13 | } 14 | 15 | autovshard = require("autovshard").Autovshard.new{ 16 | box_cfg = box_cfg, 17 | cluster_name = "mycluster", 18 | login = "storage", 19 | password = "storage", 20 | consul_http_address = "http://consul:8500", 21 | consul_token = nil, 22 | consul_kv_prefix = "autovshard", 23 | router = true, 24 | storage = false, 25 | } 26 | autovshard:start() 27 | package.reload:register(autovshard, autovshard.stop) 28 | 29 | function test(x) 30 | local bucket_id = vshard.router.bucket_id(x) 31 | return vshard.router.callrw(bucket_id, "tostring", {"test ok"}) 32 | end 33 | 34 | function get(x) 35 | local bucket_id = vshard.router.bucket_id(x) 36 | return vshard.router.callrw(bucket_id, "get", {x}) 37 | end 38 | 39 | function put(x, ...) 40 | local bucket_id = vshard.router.bucket_id(x) 41 | return vshard.router.callrw(bucket_id, "put", {x, bucket_id, ...}) 42 | end 43 | 44 | function delete(x, ...) 45 | local bucket_id = vshard.router.bucket_id(x) 46 | return vshard.router.callrw(bucket_id, "delete", {x, bucket_id, ...}) 47 | end 48 | 49 | local function err_if_not_started() 50 | -- check if tarantool instance bootstrap is done 51 | box.info() 52 | -- check if vshard cfg is applied 53 | vshard.router.bucket_count() 54 | end 55 | repeat fiber.sleep(0.1) until pcall(err_if_not_started) 56 | 57 | if not box.info().ro then -- 58 | box.schema.user.grant('guest', 'super', nil, nil, {if_not_exists = true}) 59 | end 60 | -------------------------------------------------------------------------------- /examples/storage.lua: -------------------------------------------------------------------------------- 1 | local fiber = require("fiber") 2 | 3 | require("package.reload") 4 | 5 | vshard = require("vshard") 6 | 7 | local box_cfg = { 8 | listen = 3301, 9 | feedback_enabled = false, 10 | replication_connect_quorum = 0, 11 | replication_connect_timeout=1, 12 | instance_uuid = assert(os.getenv("TARANTOOL_INSTANCE_UUID"), 13 | "TARANTOOL_INSTANCE_UUID env variable must be set"), 14 | replicaset_uuid = assert(os.getenv("TARANTOOL_REPLICASET_UUID"), 15 | "TARANTOOL_REPLICASET_UUID env variable must be set"), 16 | } 17 | 18 | autovshard = require("autovshard").Autovshard.new{ 19 | box_cfg = box_cfg, 20 | cluster_name = "mycluster", 21 | login = "storage", 22 | password = "storage", 23 | consul_http_address = "http://consul:8500", 24 | consul_token = nil, 25 | consul_kv_prefix = "autovshard", 26 | router = true, 27 | storage = true, 28 | automaster = true, 29 | } 30 | 31 | autovshard:start() 32 | package.reload:register(autovshard, autovshard.stop) 33 | -- box.ctl.on_shutdown(function() autovshard:stop() end) -- tarantool 2.x only 34 | 35 | -- public storage API 36 | function put(x, bucket_id, ...) 37 | -- 38 | return box.space.test:put(box.tuple.new(x, bucket_id, ...)) 39 | end 40 | 41 | function get(x) -- 42 | return box.space.test:get(x) 43 | end 44 | 45 | function delete(x) -- 46 | return box.space.test:delete(x) 47 | end 48 | 49 | local function err_if_not_started() 50 | -- check if tarantool instance bootstrap is done 51 | box.info() 52 | -- check if vshard cfg is applied 53 | vshard.storage.buckets_count() 54 | end 55 | repeat fiber.sleep(0.1) until pcall(err_if_not_started) 56 | 57 | if not box.info().ro then 58 | -- perform write operation 59 | 60 | -- Not using box.once because it is not compatible with package.reload. 61 | -- box.once calls box.ctl.wait_rw() internally. 62 | -- And box.ctl.wait_rw blocks forever on subsequent calls on a RW instance. 63 | 64 | box.schema.user.grant('guest', 'super', nil, nil, {if_not_exists = true}) 65 | 66 | local s = box.schema.space.create("test", { 67 | format = { -- 68 | {'id', 'unsigned'}, -- 69 | {'bucket_id', 'unsigned'}, -- 70 | {'data', 'scalar'}, -- 71 | }, 72 | if_not_exists = true, 73 | }) 74 | s:create_index("pk", {parts = {'id'}, if_not_exists = true}) 75 | s:create_index("bucket_id", {parts = {'bucket_id'}, unique = false, if_not_exists = true}) 76 | end 77 | -------------------------------------------------------------------------------- /output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bofm/tarantool-autovshard/a04a455cb86545072c948ce50d2c06a6d5f5cabc/output/.gitkeep -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -vv --gherkin-terminal-reporter --gherkin-terminal-reporter-expanded --tests-per-worker 10 --workers 1 --basetemp=./tmp 3 | log_format = %(asctime)s %(name)20s %(levelname)s %(message)s 4 | log_date_format = %Y-%m-%d %H:%M:%S 5 | log_level = debug 6 | -------------------------------------------------------------------------------- /rockspecs/autovshard-0.0.3-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "autovshard" 2 | version = "0.0.3-1" 3 | source = { 4 | url = "git://github.com/bofm/tarantool-autovshard.git", 5 | tag = "v0.0.3", 6 | } 7 | description = { 8 | summary = "autovshard", 9 | detailed = [[ 10 | Vshard wrapper with automatic master election, failover and centralized 11 | configuration storage in Consul. 12 | ]], 13 | homepage = "https://github.com/bofm/tarantool-autovshard", 14 | } 15 | dependencies = { 16 | "lua >= 5.1", 17 | } 18 | build = { 19 | type = "builtin", 20 | modules = { 21 | ["autovshard"] = "autovshard/init.lua", 22 | ["autovshard.util"] = "autovshard/util.lua", 23 | ["autovshard.consul"] = "autovshard/consul.lua", 24 | ["autovshard.wlock"] = "autovshard/wlock.lua", 25 | ["autovshard.config"] = "autovshard/config.lua", 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /rockspecs/autovshard-0.1.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "autovshard" 2 | version = "0.1.0-1" 3 | source = { 4 | url = "git://github.com/bofm/tarantool-autovshard.git", 5 | tag = "v0.1.0", 6 | } 7 | description = { 8 | summary = "autovshard", 9 | detailed = [[ 10 | Vshard wrapper with automatic master election, failover and centralized 11 | configuration storage in Consul. 12 | ]], 13 | homepage = "https://github.com/bofm/tarantool-autovshard", 14 | } 15 | dependencies = { 16 | "lua >= 5.1", 17 | } 18 | build = { 19 | type = "builtin", 20 | modules = { 21 | ["autovshard"] = "autovshard/init.lua", 22 | ["autovshard.util"] = "autovshard/util.lua", 23 | ["autovshard.consul"] = "autovshard/consul.lua", 24 | ["autovshard.wlock"] = "autovshard/wlock.lua", 25 | ["autovshard.config"] = "autovshard/config.lua", 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /rockspecs/autovshard-0.2.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "autovshard" 2 | version = "0.2.0-1" 3 | source = { 4 | url = "git://github.com/bofm/tarantool-autovshard.git", 5 | tag = "v0.2.0", 6 | } 7 | description = { 8 | summary = "autovshard", 9 | detailed = [[ 10 | Vshard wrapper with automatic master election, failover and centralized 11 | configuration storage in Consul. 12 | ]], 13 | homepage = "https://github.com/bofm/tarantool-autovshard", 14 | } 15 | dependencies = { 16 | "lua >= 5.1", 17 | } 18 | build = { 19 | type = "builtin", 20 | modules = { 21 | ["autovshard"] = "autovshard/init.lua", 22 | ["autovshard.util"] = "autovshard/util.lua", 23 | ["autovshard.consul"] = "autovshard/consul.lua", 24 | ["autovshard.wlock"] = "autovshard/wlock.lua", 25 | ["autovshard.config"] = "autovshard/config.lua", 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /rockspecs/autovshard-0.2.1-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "autovshard" 2 | version = "0.2.1-1" 3 | source = { 4 | url = "git://github.com/bofm/tarantool-autovshard.git", 5 | tag = "v0.2.1", 6 | } 7 | description = { 8 | summary = "autovshard", 9 | detailed = [[ 10 | Vshard wrapper with automatic master election, failover and centralized 11 | configuration storage in Consul. 12 | ]], 13 | homepage = "https://github.com/bofm/tarantool-autovshard", 14 | } 15 | dependencies = { 16 | "lua >= 5.1", 17 | } 18 | build = { 19 | type = "builtin", 20 | modules = { 21 | ["autovshard"] = "autovshard/init.lua", 22 | ["autovshard.util"] = "autovshard/util.lua", 23 | ["autovshard.consul"] = "autovshard/consul.lua", 24 | ["autovshard.wlock"] = "autovshard/wlock.lua", 25 | ["autovshard.config"] = "autovshard/config.lua", 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /rockspecs/autovshard-1.0.0-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "autovshard" 2 | version = "1.0.0-1" 3 | source = { 4 | url = "git://github.com/bofm/tarantool-autovshard.git", 5 | tag = "v1.0.0", 6 | } 7 | description = { 8 | summary = "autovshard", 9 | detailed = [[ 10 | Vshard wrapper with automatic master election, failover and centralized 11 | configuration storage in Consul. 12 | ]], 13 | homepage = "https://github.com/bofm/tarantool-autovshard", 14 | } 15 | dependencies = { 16 | "lua >= 5.1", 17 | } 18 | build = { 19 | type = "builtin", 20 | modules = { 21 | ["autovshard"] = "autovshard/init.lua", 22 | ["autovshard.util"] = "autovshard/util.lua", 23 | ["autovshard.consul"] = "autovshard/consul.lua", 24 | ["autovshard.wlock"] = "autovshard/wlock.lua", 25 | ["autovshard.config"] = "autovshard/config.lua", 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /rockspecs/autovshard-1.0.1-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "autovshard" 2 | version = "1.0.1-1" 3 | source = { 4 | url = "git://github.com/bofm/tarantool-autovshard.git", 5 | tag = "v1.0.1", 6 | } 7 | description = { 8 | summary = "autovshard", 9 | detailed = [[ 10 | Vshard wrapper with automatic master election, failover and centralized 11 | configuration storage in Consul. 12 | ]], 13 | homepage = "https://github.com/bofm/tarantool-autovshard", 14 | } 15 | dependencies = { 16 | "lua >= 5.1", 17 | } 18 | build = { 19 | type = "builtin", 20 | modules = { 21 | ["autovshard"] = "autovshard/init.lua", 22 | ["autovshard.util"] = "autovshard/util.lua", 23 | ["autovshard.consul"] = "autovshard/consul.lua", 24 | ["autovshard.wlock"] = "autovshard/wlock.lua", 25 | ["autovshard.config"] = "autovshard/config.lua", 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /rockspecs/autovshard-1.0.2-1.rockspec: -------------------------------------------------------------------------------- 1 | package = "autovshard" 2 | version = "1.0.2-1" 3 | source = { 4 | url = "git://github.com/bofm/tarantool-autovshard.git", 5 | tag = "v1.0.2", 6 | } 7 | description = { 8 | summary = "autovshard", 9 | detailed = [[ 10 | Vshard wrapper with automatic master election, failover and centralized 11 | configuration storage in Consul. 12 | ]], 13 | homepage = "https://github.com/bofm/tarantool-autovshard", 14 | } 15 | dependencies = { 16 | "lua >= 5.1", 17 | } 18 | build = { 19 | type = "builtin", 20 | modules = { 21 | ["autovshard"] = "autovshard/init.lua", 22 | ["autovshard.util"] = "autovshard/util.lua", 23 | ["autovshard.consul"] = "autovshard/consul.lua", 24 | ["autovshard.wlock"] = "autovshard/wlock.lua", 25 | ["autovshard.config"] = "autovshard/config.lua", 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /scripts/run_tests.lua: -------------------------------------------------------------------------------- 1 | local fio = require("fio") 2 | local script_dir = debug.getinfo(1, "S").source:sub(2):match(".*/") 3 | if script_dir == nil then error("cannot determine script dir") end 4 | local tnt = require "tests.tnt" 5 | local root = fio.abspath(fio.pathjoin(script_dir, '..')) 6 | 7 | local paths = { 8 | root .. "/app/?.lua", 9 | root .. "/?.lua", 10 | root .. "/?/init.lua", 11 | root .. "/app/?/init.lua", 12 | root .. "/.rocks/share/lua/5.1/?.lua", 13 | root .. "/.rocks/share/lua/5.1/?/init.lua", 14 | } 15 | 16 | local cpaths = { 17 | root .. "/.rocks/lib/lua/5.1/?.so;", 18 | root .. "/.rocks/lib/lua/?.so;", 19 | root .. "/.rocks/lib64/lua/5.1/?.so;", 20 | } 21 | package.path = table.concat(paths, ';') .. ';' .. package.path 22 | package.cpath = table.concat(cpaths, ';') .. ';' .. package.cpath 23 | 24 | tnt.cfg{wal_mode = 'none', checkpoint_interval = 0} 25 | 26 | require("package.reload") 27 | 28 | local code, err = pcall(function() return require "busted.runner"{standalone = false} end) 29 | 30 | -- [todo] coverage 31 | 32 | if not code then 33 | print("\n========== Tarantool logs =================") 34 | print(tnt.log()) 35 | end 36 | 37 | tnt.finish() 38 | 39 | os.exit(code) 40 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )" 6 | cd "$ROOT" 7 | 8 | tarantool "scripts/run_tests.lua" --pattern "^test_.*%.lua$" "${ROOT}/tests" "$@" 9 | -------------------------------------------------------------------------------- /tests/bdd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bofm/tarantool-autovshard/a04a455cb86545072c948ce50d2c06a6d5f5cabc/tests/bdd/__init__.py -------------------------------------------------------------------------------- /tests/bdd/conftest.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import logging 3 | from os import getenv 4 | from random import randint 5 | 6 | import docker as docker 7 | import pytest 8 | from docker.errors import APIError, NotFound 9 | from funcy import ignore, retry 10 | 11 | from .util import get_port_on_host, wait_tcp_port 12 | 13 | counter = itertools.count() 14 | 15 | 16 | @pytest.fixture 17 | def run_id(): 18 | return f"{next(counter):03d}" 19 | 20 | 21 | @pytest.fixture(scope="session") 22 | def docker_client(): 23 | if getenv("TRAVIS") == "true": 24 | return docker.DockerClient( 25 | base_url="unix://var/run/docker.sock", version="1.38", 26 | ) 27 | return docker.from_env() 28 | 29 | 30 | @pytest.fixture(scope="module") 31 | def docker_network(docker_client): 32 | network_name = f"test_network_{randint(1, 99999)}" 33 | 34 | @ignore(NotFound) 35 | @retry(3, APIError, 0.2) 36 | def cleanup(): 37 | for c in docker_client.containers.list(all=True): 38 | if c.attrs.get("HostConfig", {}).get("NetworkMode") == network_name: 39 | c.remove(force=True, v=True) 40 | for n in docker_client.networks.list(): 41 | if n.name == network_name: 42 | n.remove() 43 | 44 | cleanup() 45 | network = docker_client.networks.create(network_name) 46 | yield network.name 47 | cleanup() 48 | 49 | 50 | @pytest.fixture(autouse=True) 51 | def setup_logging(caplog): 52 | caplog.set_level(logging.DEBUG) 53 | logging.getLogger("urllib3.connectionpool").disabled = True 54 | 55 | 56 | @pytest.fixture 57 | def create_container(run_id, docker_client, docker_network): 58 | containers = [] 59 | 60 | @retry(3, APIError, 0.2) 61 | def really_create_container(image, expose_port, **kwargs): 62 | nonlocal containers 63 | kw = {**kwargs} 64 | if expose_port: 65 | kw.setdefault("ports", {})[f"{expose_port}/tcp"] = None 66 | logging.debug( 67 | f"create container: %s", 68 | dict( 69 | image=image, detach=True, mem_limit="256m", network=docker_network, **kw, 70 | ), 71 | ) 72 | c = docker_client.containers.create( 73 | image, detach=True, mem_limit="256m", network=docker_network, **kw, 74 | ) 75 | c.reload() 76 | containers.append(c) 77 | return c 78 | 79 | yield really_create_container 80 | for c in containers: 81 | c.remove(force=True, v=True) 82 | 83 | 84 | @pytest.fixture 85 | def run_container(create_container): 86 | def really_run_container(image, expose_port, **kwargs): 87 | c = create_container(image, expose_port, **kwargs) 88 | c.start() 89 | c.reload() 90 | if expose_port: 91 | wait_tcp_port("localhost", get_port_on_host(c, expose_port)) 92 | return c 93 | 94 | return really_run_container 95 | -------------------------------------------------------------------------------- /tests/bdd/features/become_ro.feature: -------------------------------------------------------------------------------- 1 | Feature: Fencing. If Consul is unavailable, Tarantool instances should become RO. 2 | 3 | Background: 4 | Given autovshard consul config: 5 | rs1: 6 | _default: 7 | switchover_delay: 0 8 | master: false 9 | t1: 10 | master_weight: 1 11 | t2: 12 | master_weight: 2 13 | t3: 14 | master_weight: 1 15 | master: true 16 | And Tarantool autovshard cluster: 17 | rs1: 18 | _default: 19 | automaster: true 20 | router: true 21 | storage: true 22 | t1: {} 23 | t2: {} 24 | t3: {} 25 | 26 | Scenario: Become RO if Consul is unavailable. 27 | When all instances in rs1 are started 28 | And after 5 seconds have passed 29 | And t1 should be RO 30 | And t2 should be RW 31 | And t3 should be RO 32 | And vshard router API should work on all instances 33 | And consul becomes unreachable 34 | Then t1 should become RO in less than 2 seconds 35 | Then t2 should become RO in less than 2 seconds 36 | Then t3 should become RO in less than 2 seconds 37 | -------------------------------------------------------------------------------- /tests/bdd/features/config_application.feature: -------------------------------------------------------------------------------- 1 | Feature: Config application 2 | 3 | Background: 4 | Given autovshard consul config: 5 | rs1: 6 | t1: 7 | switchover_delay: 0 8 | master_weight: 10 9 | master: true 10 | t2: 11 | switchover_delay: 0 12 | master_weight: 5 13 | master: false 14 | 15 | And Tarantool autovshard cluster: 16 | rs1: 17 | _default: 18 | automaster: false 19 | router: true 20 | storage: true 21 | t1: {} 22 | t2: {} 23 | 24 | 25 | Scenario: Config is applied automatically 26 | When all instances in rs1 are started 27 | Then t1 should become RW in less than 5 seconds 28 | And t2 should become RO in less than 2 seconds 29 | And vshard router API should work on all instances 30 | And autovshard consul config is changed: 31 | rs1: 32 | t1: 33 | switchover_delay: 0 34 | master_weight: 10 35 | master: false 36 | t2: 37 | switchover_delay: 0 38 | master_weight: 5 39 | master: true 40 | Then t2 should become RW in less than 5 seconds 41 | Then t1 should become RO in less than 2 seconds 42 | And vshard router API should work on all instances 43 | 44 | 45 | -------------------------------------------------------------------------------- /tests/bdd/features/failover.feature: -------------------------------------------------------------------------------- 1 | Feature: Failover 2 | 3 | Background: 4 | Given autovshard consul config: 5 | rs1: 6 | _default: 7 | switchover_delay: 0 8 | master: false 9 | t1: 10 | master_weight: 13 11 | t2: 12 | master_weight: 11 13 | t3: 14 | master_weight: 12 15 | 16 | And Tarantool autovshard cluster: 17 | rs1: 18 | _default: 19 | automaster: true 20 | router: true 21 | storage: true 22 | consul_session_ttl: 10 23 | t1: {} 24 | t2: {} 25 | t3: {} 26 | 27 | Scenario: A new master is elected if the old master crashes 28 | When all instances in rs1 are started 29 | Then t1 should become RW in less than 10 seconds 30 | And after 1 seconds have passed 31 | And t2 should be RO 32 | And t3 should be RO 33 | And vshard router API should work on all instances 34 | # Crash master instance 35 | And t1 is crashed 36 | And t2 should be RO 37 | And t3 should be RO 38 | And t1 should be down 39 | # Wait for consul_session_ttl + Cosul lock delay (15s by default) + 10 sec 40 | And t3 should become RW in less than 35 seconds 41 | And t2 should be RO 42 | And t1 should be down 43 | And t2 vshard router API should work 44 | And t3 vshard router API should work 45 | # Master with highest master_weight comes back 46 | And t1 is started 47 | Then t1 should become RW in less than 10 seconds 48 | And t2 should be RO 49 | And t3 should be RO 50 | And vshard router API should work on all instances 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /tests/bdd/features/master_election.feature: -------------------------------------------------------------------------------- 1 | Feature: Master election 2 | 3 | Background: 4 | Given autovshard consul config: 5 | rs1: 6 | _default: 7 | switchover_delay: 0 8 | master: false 9 | t1: 10 | master_weight: 1 11 | t2: 12 | master_weight: 2 13 | t3: 14 | master_weight: 1 15 | master: true 16 | 17 | Scenario: Master is elected accorting to master_weigh after replicaset is started 18 | Given Tarantool autovshard cluster: 19 | rs1: 20 | _default: 21 | automaster: true 22 | router: true 23 | storage: true 24 | t1: {} 25 | t2: {} 26 | t3: {} 27 | When all instances in rs1 are started 28 | Then t2 should become RW in less than 10 seconds 29 | And after 1 seconds have passed 30 | And t1 should be RO 31 | And t3 should be RO 32 | And vshard router API should work on all instances 33 | 34 | Scenario: Master is elected after master_weight is changed in Consul 35 | Given Tarantool autovshard cluster: 36 | rs1: 37 | _default: 38 | automaster: true 39 | router: true 40 | storage: true 41 | t1: {} 42 | t2: {} 43 | t3: {} 44 | When all instances in rs1 are started 45 | Then t2 should become RW in less than 10 seconds 46 | And after 1 seconds have passed 47 | And t1 should be RO 48 | And t3 should be RO 49 | And vshard router API should work on all instances 50 | And autovshard consul config is changed: 51 | rs1: 52 | _default: 53 | switchover_delay: 0 54 | master: false 55 | t1: 56 | master_weight: 1 57 | t2: 58 | master_weight: 1 59 | master: true 60 | t3: 61 | master_weight: 2 62 | Then after 3 seconds have passed 63 | And t1 should be RO 64 | And t2 should be RO 65 | And t3 should be RW 66 | And vshard router API should work on all instances 67 | 68 | Scenario: Master should not be elected if automaster is false 69 | Given Tarantool autovshard cluster: 70 | rs1: 71 | _default: 72 | automaster: false 73 | router: true 74 | storage: true 75 | t1: {} 76 | t2: {} 77 | t3: {} 78 | When all instances in rs1 are started 79 | Then after 5 seconds have passed 80 | And t1 should be RO 81 | # t2 has the highest weight 82 | And t2 should be RO 83 | And t3 should be RW 84 | And vshard router API should work on all instances 85 | -------------------------------------------------------------------------------- /tests/bdd/features/switchover_delay.feature: -------------------------------------------------------------------------------- 1 | Feature: Switchover_delay 2 | 3 | Background: 4 | Given autovshard consul config: 5 | rs1: 6 | _default: 7 | switchover_delay: 20 8 | master: false 9 | t1: 10 | master_weight: 1 11 | t2: 12 | master_weight: 2 13 | 14 | And Tarantool autovshard cluster: 15 | rs1: 16 | _default: 17 | automaster: true 18 | router: true 19 | storage: true 20 | consul_session_ttl: 10 21 | t1: {} 22 | t2: {} 23 | 24 | Scenario: Switchover is delayed. 25 | When t1 is started 26 | And after 5 seconds have passed 27 | And t2 is started 28 | And after 5 seconds have passed 29 | And t1 should be RW 30 | And t2 should be RO 31 | And after 20 seconds have passed 32 | And t1 should be RO 33 | And t2 should be RW 34 | -------------------------------------------------------------------------------- /tests/bdd/steps/templates/init.lua.tpl: -------------------------------------------------------------------------------- 1 | fiber = require("fiber") 2 | 3 | require("package.reload") 4 | 5 | vshard = require("vshard") 6 | autovshard = require("autovshard") 7 | 8 | local box_cfg = { 9 | listen = 3301, 10 | feedback_enabled = false, 11 | instance_uuid = "$instance_uuid", 12 | replicaset_uuid = "$replicaset_uuid", 13 | replication_connect_quorum = 0, 14 | replication_connect_timeout = 20, 15 | } 16 | 17 | autovshard = require("autovshard").Autovshard.new{ 18 | box_cfg = box_cfg, 19 | cluster_name = "$cluster_name", 20 | login = "storage", 21 | password = "storage", 22 | consul_http_address = "$consul_http_address", 23 | consul_token = nil, 24 | consul_kv_prefix = "autovshard", 25 | consul_session_ttl = $consul_session_ttl, 26 | router = $router, 27 | storage = $storage, 28 | automaster = $automaster, 29 | } 30 | 31 | autovshard:start() 32 | package.reload:register(autovshard, autovshard.stop) 33 | -- box.ctl.on_shutdown(function() autovshard:stop() end) -- tarantool 2.x only 34 | 35 | -- public storage API 36 | if $storage then 37 | storage = {} 38 | 39 | function storage.put(x, bucket_id, ...) 40 | -- 41 | return box.space.test:put(box.tuple.new(x, bucket_id, ...)) 42 | end 43 | 44 | function storage.get(x) 45 | return box.space.test:get(x) 46 | end 47 | 48 | function storage.delete(x) 49 | return box.space.test:delete(x) 50 | end 51 | end 52 | 53 | if $router then 54 | router = {} 55 | 56 | function router.test(x) 57 | local bucket_id = vshard.router.bucket_id(x) 58 | return vshard.router.callrw(bucket_id, "tostring", {"test ok"}) 59 | end 60 | 61 | function router.get(x) 62 | local bucket_id = vshard.router.bucket_id(x) 63 | return vshard.router.callrw(bucket_id, "get", {x}) 64 | end 65 | 66 | function router.put(x, ...) 67 | local bucket_id = vshard.router.bucket_id(x) 68 | return vshard.router.callrw(bucket_id, "put", {x, bucket_id, ...}) 69 | end 70 | 71 | function router.delete(x, ...) 72 | local bucket_id = vshard.router.bucket_id(x) 73 | return vshard.router.callrw(bucket_id, "delete", {x, bucket_id, ...}) 74 | end 75 | end 76 | 77 | local function err_if_not_started() 78 | -- check if tarantool instance bootstrap is done 79 | box.info() 80 | -- check if vshard cfg is applied 81 | vshard.storage.buckets_count() 82 | end 83 | repeat fiber.sleep(0.1) until pcall(err_if_not_started) 84 | 85 | if not box.info().ro then 86 | -- perform write operation 87 | 88 | -- Not using box.once because it is not compatible with package.reload. 89 | -- box.once calls box.ctl.wait_rw() internally. 90 | -- And box.ctl.wait_rw blocks forever on subsequent calls on a RW instance. 91 | 92 | box.schema.user.grant('guest', 'super', nil, nil, {if_not_exists = true}) 93 | 94 | local s = box.schema.space.create("test", { 95 | format = { -- 96 | {'id', 'unsigned'}, -- 97 | {'bucket_id', 'unsigned'}, -- 98 | {'data', 'scalar'}, -- 99 | }, 100 | if_not_exists = true, 101 | }) 102 | s:create_index("pk", {parts = {'id'}, if_not_exists = true}) 103 | s:create_index("bucket_id", {parts = {'bucket_id'}, unique = false, if_not_exists = true}) 104 | end 105 | 106 | -------------------------------------------------------------------------------- /tests/bdd/steps/test_autovshard.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import subprocess 3 | from dataclasses import dataclass, field 4 | from itertools import count 5 | from pathlib import Path 6 | from string import Template 7 | from textwrap import dedent 8 | from time import sleep 9 | from typing import Dict, Tuple 10 | 11 | import pytest 12 | import yaml 13 | 14 | # noinspection PyPackageRequirements 15 | from consul import Consul 16 | from docker.errors import APIError 17 | from docker.models.containers import Container 18 | from funcy import ( 19 | compose, 20 | get_in, 21 | lcat, 22 | lfilter, 23 | lmap, 24 | log_calls, 25 | log_durations, 26 | memoize, 27 | none_fn, 28 | partial, 29 | pluck_attr, 30 | re_test, 31 | retry, 32 | ) 33 | from pytest_bdd import given, parsers, scenarios, then, when 34 | from requests import RequestException 35 | 36 | from bdd.util import get_port_on_host, process_defaults 37 | 38 | CONSUL_KV_PREFIX = "autovshard" 39 | CONSUL_CONFIG_KEY = "autovshard_cfg_yaml" 40 | 41 | scenarios(Path(__file__).parent / "../features") 42 | 43 | 44 | @dataclass(frozen=True) 45 | class ConsulBag: 46 | container: Container 47 | 48 | @property 49 | def client(self): 50 | self.container.reload() 51 | return Consul(port=get_port_on_host(self.container, 8500)) 52 | 53 | 54 | @pytest.fixture 55 | def consul(run_container, run_id): 56 | """runs Consul in Docker and returns ConsulClient""" 57 | container = run_container( 58 | "consul:1.5.3", 59 | 8500, 60 | command="consul agent -dev -client 0.0.0.0", 61 | name=f"{run_id}_consul", 62 | hostname="consul", 63 | ) 64 | # wait for Consul to start 65 | bag = ConsulBag(container=container) 66 | retry(20, RequestException, 0.1)(lambda: bag.client.kv.get("1"))() 67 | return bag 68 | 69 | 70 | @dataclass(frozen=True) 71 | class AutovshardReplicaConfig: 72 | name: str 73 | uuid: str 74 | address: str 75 | master: bool = False 76 | master_weight: int = 10 77 | switchover_delay: int = 0 78 | 79 | 80 | @dataclass(frozen=True) 81 | class AutovshardRSConfig: 82 | name: str 83 | uuid: str 84 | replicas: Dict[str, AutovshardReplicaConfig] = field(default_factory=dict) 85 | weight: int = 10 86 | 87 | 88 | @dataclass(frozen=True) 89 | class AutovshardConfig: 90 | cluster_name: str 91 | sharding: Dict[str, AutovshardRSConfig] 92 | rebalancer_max_receiving: int = 10 93 | bucket_count: int = 100 94 | rebalancer_disbalance_threshold: int = 10 95 | 96 | def toyaml(self) -> str: 97 | return yaml.safe_dump( 98 | { 99 | "bucket_count": self.bucket_count, 100 | "rebalancer_disbalance_threshold": self.rebalancer_disbalance_threshold, 101 | "rebalancer_max_receiving": self.rebalancer_max_receiving, 102 | "sharding": { 103 | rs.uuid: { 104 | "weight": rs.weight, 105 | "replicas": { 106 | replica.uuid: { 107 | "name": replica.name, 108 | "address": replica.address, 109 | "master": replica.master, 110 | "master_weight": replica.master_weight, 111 | "switchover_delay": replica.switchover_delay, 112 | } 113 | for replica_name, replica in rs.replicas.items() 114 | }, 115 | } 116 | for rs_name, rs in self.sharding.items() 117 | }, 118 | }, 119 | ) 120 | 121 | 122 | @pytest.fixture(scope="session") 123 | def uuids(): 124 | """ 125 | Monotonically increasing uuids for idempotent tests because Tarantool 126 | has bugs dependent on the uuid comparison. 127 | """ 128 | return (f"aaaaaaaa-aaaa-aaaa-aaaa-{f'{i:x}'.zfill(12)}" for i in count()) 129 | 130 | 131 | @pytest.fixture() 132 | def uuid(run_id, uuids): 133 | uuid_by_param = memoize(lambda *a: next(uuids)) 134 | return partial(uuid_by_param, run_id) 135 | 136 | 137 | parse_config = compose(process_defaults, yaml.safe_load, dedent) 138 | 139 | 140 | def parse_autovshard_feature_consul_config(config, run_id, uuid): 141 | config = parse_config(config) 142 | 143 | return AutovshardConfig( 144 | cluster_name=f"cluster_{run_id}", 145 | sharding={ 146 | rs: AutovshardRSConfig( 147 | name=rs, 148 | uuid=uuid(rs), 149 | replicas={ 150 | replica_name: AutovshardReplicaConfig( 151 | name=replica_name, 152 | uuid=uuid(replica_name), 153 | address=f"{run_id}_{replica_name}:3301", 154 | **config[rs][replica_name], 155 | ) 156 | for replica_name in config[rs].keys() 157 | }, 158 | ) 159 | for rs in config.keys() 160 | }, 161 | ) 162 | 163 | 164 | def autovshard_consul_config_key(cluster_name): 165 | return f"{CONSUL_KV_PREFIX}/{cluster_name}/{CONSUL_CONFIG_KEY}" 166 | 167 | 168 | def write_consul_config(consul_config: str, consul: Consul, run_id: str, uuid): 169 | """puts autovshard config to Consul""" 170 | cfg = parse_autovshard_feature_consul_config(consul_config, run_id, uuid) 171 | consul.kv.put( 172 | key=autovshard_consul_config_key(cfg.cluster_name), value=cfg.toyaml(), 173 | ) 174 | return cfg 175 | 176 | 177 | @given(parsers.parse("autovshard consul config:\n{consul_config}")) 178 | def autovshard_config_in_consul(consul_config: str, consul: ConsulBag, run_id, uuid): 179 | return write_consul_config(consul_config, consul.client, run_id, uuid) 180 | 181 | 182 | @then(parsers.parse("autovshard consul config is changed:\n{consul_config}")) 183 | def autovshard_consul_config(consul_config: str, consul: ConsulBag, run_id, uuid): 184 | return write_consul_config(consul_config, consul.client, run_id, uuid) 185 | 186 | 187 | def render_template(name, **kwargs): 188 | template = Template( 189 | Path(__file__).parent.joinpath("templates").joinpath(name).read_text(), 190 | ) 191 | return template.substitute(**kwargs) 192 | 193 | 194 | generate_storage_init = partial(render_template, "init.lua.tpl") 195 | 196 | 197 | @pytest.fixture(scope="session") 198 | def tarantool_docker_image(): 199 | subprocess.check_call(["make", "-s", "docker"], shell=False) 200 | image_name = ( 201 | subprocess.check_output(["make", "-s", "docker-image-name"], shell=False) 202 | .decode() 203 | .strip() 204 | ) 205 | assert re_test(r"^\S+$", image_name), f"bad image name: {image_name!r}" 206 | return image_name 207 | 208 | 209 | @dataclass(frozen=True) 210 | class DockerTarantoolReplicaset: 211 | name: str 212 | replicas: Dict[str, Container] 213 | 214 | @property 215 | def containers(self): 216 | return list(self.replicas.values()) 217 | 218 | 219 | @dataclass(frozen=True) 220 | class DockerTarantoolCluster: 221 | name: str 222 | replicasets: Dict[str, DockerTarantoolReplicaset] 223 | 224 | @property 225 | def containers(self): 226 | return lcat(pluck_attr("containers", self.replicasets.values())) 227 | 228 | def find_container(self, name) -> Container: 229 | return next( 230 | container 231 | for rs in self.replicasets.values() 232 | for replica_name, container in rs.replicas.items() 233 | if replica_name == name 234 | ) 235 | 236 | 237 | @pytest.fixture() 238 | def ok_if_errors_in_logs(): 239 | return set() 240 | 241 | 242 | def check_tarantool_logs(logs: str): 243 | lines = logs.splitlines() 244 | is_ok = [ 245 | # errors that are not actually errors and are OK to occur 246 | lambda l: "E> ER_LOADING: Instance bootstrap hasn't finished yet" in l, 247 | lambda l: "E> Cleanup" in l and "reload.lua" in l, 248 | lambda l: "SystemError unexpected EOF when reading from socket" in l, 249 | lambda l: "SystemError getaddrinfo: Name does not resolve" in l, 250 | lambda l: "ER_NO_SUCH_USER: User 'storage' is not found" in l, 251 | lambda l: "ER_ACCESS_DENIED: Session access to universe '' is denied for user 'storage'" 252 | in l, 253 | lambda l: "ER_ACCESS_DENIED: Read access to universe '' is denied for user 'storage'" 254 | in l, 255 | lambda l: "Exception during calling 'vshard.storage.buckets_count' on " in l 256 | and ": Invalid argument" in l, 257 | lambda l: "Connection refused" in l, 258 | ] 259 | all_errors = lfilter(lambda l: "E>" in l, lines) 260 | is_real_error = none_fn(*is_ok) 261 | real_errors = lfilter(is_real_error, all_errors) 262 | assert not real_errors, "found errors in Tarantool logs" 263 | 264 | 265 | @given(parsers.parse("Tarantool autovshard cluster:\n{docker_config}")) 266 | def autovshard_docker_cluster( 267 | docker_config: str, 268 | autovshard_config_in_consul: AutovshardConfig, 269 | tmp_path: Path, 270 | create_container, 271 | run_id, 272 | tarantool_docker_image, 273 | ok_if_errors_in_logs, 274 | ) -> DockerTarantoolCluster: 275 | """deploys tarantool cluster""" 276 | autovshard_config_in_consul: AutovshardConfig 277 | 278 | cluster = DockerTarantoolCluster( 279 | name=autovshard_config_in_consul.cluster_name, replicasets={}, 280 | ) 281 | 282 | def to_lua_bool_str(b: bool): 283 | return "true" if b else "false" 284 | 285 | for rs_name, rs in parse_config(docker_config).items(): 286 | for replica_name, replica in rs.items(): 287 | rs = autovshard_config_in_consul.sharding[rs_name] 288 | tarantool_init_script = generate_storage_init( 289 | replicaset_uuid=rs.uuid, 290 | instance_uuid=rs.replicas[replica_name].uuid, 291 | cluster_name=autovshard_config_in_consul.cluster_name, 292 | router=to_lua_bool_str(replica["router"]), 293 | storage=to_lua_bool_str(replica["storage"]), 294 | automaster=to_lua_bool_str(replica["automaster"]), 295 | consul_session_ttl=replica.get("consul_session_ttl", "nil"), 296 | consul_http_address=f"http://{run_id}_consul:8500", 297 | ) 298 | init_file: Path = tmp_path.joinpath(f"storage_{replica_name}.lua") 299 | init_file.write_text(tarantool_init_script) 300 | c = create_container( 301 | tarantool_docker_image, 302 | None, 303 | command=["tarantool", f"/{init_file.name}"], 304 | entrypoint="", 305 | name=f"{run_id}_{replica_name}", 306 | hostname=replica_name, 307 | volumes={ 308 | str(init_file.resolve()): { 309 | "bind": f"/{init_file.name}", 310 | "mode": "ro", 311 | }, 312 | }, 313 | ) 314 | cluster.replicasets.setdefault( 315 | rs_name, DockerTarantoolReplicaset(name=rs_name, replicas={}), 316 | ).replicas[replica_name] = c 317 | 318 | yield cluster 319 | 320 | for rs in cluster.replicasets.values(): 321 | for name, container in rs.replicas.items(): 322 | container.reload() 323 | logs = container.logs().decode() 324 | print(f"================= {container.name} logs ======================") 325 | print(logs) 326 | if container not in ok_if_errors_in_logs: 327 | check_tarantool_logs(logs) 328 | 329 | 330 | def start_container(c: Container): 331 | c.reload() 332 | c.start() 333 | c.reload() 334 | return c 335 | 336 | 337 | @when(parsers.parse("all instances in {rs} are started")) 338 | def start_rs_containers(rs, autovshard_docker_cluster): 339 | return lmap(start_container, autovshard_docker_cluster.containers) 340 | 341 | 342 | @when(parsers.parse("{t} is started")) 343 | @then(parsers.parse("{t} is started")) 344 | @log_calls(logging.debug) 345 | def start_container_by_name(t, autovshard_docker_cluster): 346 | c = autovshard_docker_cluster.find_container(t) 347 | start_container(c) 348 | 349 | 350 | @then(parsers.parse("{replica_name} autovshard consul config role should be {role}")) 351 | def check_t_consul_config_role( 352 | replica_name, role, consul, autovshard_config_in_consul, uuid, 353 | ): 354 | key = autovshard_consul_config_key(autovshard_config_in_consul.cluster_name) 355 | _, kv = consul.kv.get(key) 356 | assert kv["Value"] 357 | cfg = yaml.safe_load(kv["Value"]) 358 | 359 | def getmaster(rs_uuid): 360 | return get_in( 361 | cfg, ["sharding", rs_uuid, "replicas", uuid(replica_name), "master"], 362 | ) 363 | 364 | master = next(map(getmaster, cfg["sharding"].keys())) 365 | expected_master = role == "master" 366 | assert master == expected_master, ( 367 | f"unexpected master parameter in " f"consul config for {replica_name}" 368 | ) 369 | 370 | 371 | @when(parsers.parse("after {seconds:g} seconds have passed")) 372 | @then(parsers.parse("after {seconds:g} seconds have passed")) 373 | def step_sleep(seconds): 374 | sleep(seconds) 375 | 376 | 377 | def container_tnt_eval( 378 | c: Container, lua_script, suppress_errors=False, 379 | ) -> Tuple[int, bytes]: 380 | code, output = c.exec_run( 381 | ["bash", "-ec", f'cat <<"EOF" | tarantoolctl connect 3301\n{lua_script}\nEOF\n'], 382 | ) 383 | if not suppress_errors: 384 | assert ( 385 | code == 0 386 | ), f"{c.name} exec returned status {code} and output: {output.decode()}" 387 | return output 388 | 389 | 390 | def do_check_t_actual_role(t, mode, autovshard_docker_cluster): 391 | container = autovshard_docker_cluster.find_container(t) 392 | if mode == "down": 393 | container.reload() 394 | assert container.status == "exited", f"unexpected mode for {t}" 395 | return 396 | 397 | output = container_tnt_eval( 398 | container, 399 | "local i=box.info() return i.ro and 'I_AM_' .. 'RO' or 'I_AM_' .. 'RW'", 400 | ) 401 | 402 | actual_mode = "unknown" 403 | if b"I_AM_RO" in output: 404 | actual_mode = "RO" 405 | elif b"I_AM_RW" in output: 406 | actual_mode = "RW" 407 | 408 | assert actual_mode == mode, f"unexpected mode for {t}" 409 | 410 | 411 | @when(parsers.parse("{t} should become {mode} in less than {seconds:d} seconds")) 412 | @then(parsers.parse("{t} should become {mode} in less than {seconds:d} seconds")) 413 | @log_durations(logging.debug) 414 | def wait_for_t_role(t, mode, seconds, autovshard_docker_cluster): 415 | check = retry(seconds, errors=(AssertionError, APIError), timeout=1)( 416 | do_check_t_actual_role, 417 | ) 418 | check(t, mode, autovshard_docker_cluster) 419 | 420 | 421 | @when(parsers.parse("{t} should be {mode}")) 422 | @then(parsers.parse("{t} should be {mode}")) 423 | def check_t_actual_role(t, mode, autovshard_docker_cluster): 424 | do_check_t_actual_role(t, mode, autovshard_docker_cluster) 425 | 426 | 427 | @when("vshard router API should work on all instances") 428 | @then("vshard router API should work on all instances") 429 | def check_public_router_api(autovshard_docker_cluster): 430 | for c in autovshard_docker_cluster.containers: 431 | check_router_api(c) 432 | 433 | 434 | @when(parsers.parse("{t} vshard router API should work")) 435 | @then(parsers.parse("{t} vshard router API should work")) 436 | @log_calls(logging.debug) 437 | def check_public_router_api(t, autovshard_docker_cluster): 438 | c = autovshard_docker_cluster.find_container(t) 439 | check_router_api(c) 440 | 441 | 442 | @log_calls(logging.debug) 443 | def check_router_api(c: Container): 444 | c.reload() 445 | # fmt: off 446 | script = dedent(''' 447 | do 448 | assert(router.test(1) == "test ok") 449 | assert(router.test(2) == "test ok") 450 | assert(router.test(3) == "test ok") 451 | return "CHECK" .. "OK" 452 | end 453 | ''') 454 | # fmt: on 455 | output = container_tnt_eval(c, script) 456 | assert b"CHECKOK" in output 457 | 458 | 459 | @when(parsers.parse("{t} is crashed")) 460 | @then(parsers.parse("{t} is crashed")) 461 | @log_calls(logging.debug) 462 | def crash_tarantool(t, autovshard_docker_cluster, ok_if_errors_in_logs): 463 | container = autovshard_docker_cluster.find_container(t) 464 | container_tnt_eval(container, "require('ffi').cast('char *', 0)[0] = 48", True) 465 | ok_if_errors_in_logs.add(container) 466 | 467 | 468 | @when("consul becomes unreachable") 469 | def kill_consul(consul, autovshard_docker_cluster, ok_if_errors_in_logs): 470 | c = consul.container 471 | c.kill() 472 | lmap(ok_if_errors_in_logs.add, autovshard_docker_cluster.containers) 473 | -------------------------------------------------------------------------------- /tests/bdd/util.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import time 3 | 4 | from docker.models.containers import Container 5 | 6 | 7 | def get_port_on_host(container: Container, port_in_container: int): 8 | return int( 9 | container.attrs['NetworkSettings']['Ports'][f'{port_in_container}/tcp'][0][ 10 | 'HostPort' 11 | ], 12 | ) 13 | 14 | 15 | def wait_tcp_port(address, port, timeout=10): 16 | t = time.monotonic() 17 | while time.monotonic() - t < timeout: 18 | try: 19 | s = socket.create_connection((address, port), timeout=timeout) 20 | s.close() 21 | return 22 | except socket.error: 23 | time.sleep(0.02) 24 | raise TimeoutError('Port is closed %s:%i' % (address, port)) 25 | 26 | 27 | def process_defaults(d: dict, defaults2=None): 28 | if not isinstance(d, dict): 29 | return d 30 | defaults2 = defaults2 or {} 31 | new_d = defaults2.copy() 32 | defaults1 = d.get("_default", {}) 33 | for k, v in filter(lambda item: item[0] != "_default", d.items()): 34 | if isinstance(v, dict): 35 | new_d[k] = process_defaults(v, {**defaults2.get(k, {}), **defaults1}) 36 | else: 37 | new_d[k] = v 38 | return new_d 39 | -------------------------------------------------------------------------------- /tests/test_config.lua: -------------------------------------------------------------------------------- 1 | local yaml = require("yaml") 2 | 3 | describe("autovshard.config", function() 4 | local config = require("autovshard.config") 5 | 6 | it("promote_to_master", function() -- 7 | local autovshard_cfg = yaml.decode([[ 8 | rebalancer_max_receiving: 10 9 | bucket_count: 100 10 | rebalancer_disbalance_threshold: 10 11 | sharding: 12 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 13 | weight: 10 14 | replicas: 15 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 16 | master_weight: 10 17 | switchover_delay: 10 18 | address: a1:3301 19 | name: a1 20 | master: false 21 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 22 | master_weight: 20 23 | switchover_delay: 10 24 | address: a2:3301 25 | name: a2 26 | master: false 27 | ]]) 28 | local expected_new_cfg = yaml.decode([[ 29 | rebalancer_max_receiving: 10 30 | bucket_count: 100 31 | rebalancer_disbalance_threshold: 10 32 | sharding: 33 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 34 | weight: 10 35 | replicas: 36 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 37 | master_weight: 10 38 | switchover_delay: 10 39 | address: a1:3301 40 | name: a1 41 | master: true 42 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 43 | master_weight: 20 44 | switchover_delay: 10 45 | address: a2:3301 46 | name: a2 47 | master: false 48 | ]]) 49 | 50 | end) 51 | 52 | describe("set_instance_read_only", function() -- 53 | 54 | it("changed", function() 55 | local autovshard_cfg = yaml.decode([[ 56 | rebalancer_max_receiving: 10 57 | bucket_count: 100 58 | rebalancer_disbalance_threshold: 10 59 | sharding: 60 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 61 | weight: 10 62 | replicas: 63 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 64 | master_weight: 10 65 | switchover_delay: 10 66 | address: a1:3301 67 | name: a1 68 | master: true 69 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 70 | master_weight: 20 71 | switchover_delay: 10 72 | address: a2:3301 73 | name: a2 74 | master: false 75 | ]]) 76 | local expected_new_cfg = yaml.decode([[ 77 | rebalancer_max_receiving: 10 78 | bucket_count: 100 79 | rebalancer_disbalance_threshold: 10 80 | sharding: 81 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 82 | weight: 10 83 | replicas: 84 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 85 | master_weight: 10 86 | switchover_delay: 10 87 | address: a1:3301 88 | name: a1 89 | master: false 90 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 91 | master_weight: 20 92 | switchover_delay: 10 93 | address: a2:3301 94 | name: a2 95 | master: false 96 | ]]) 97 | local changed, new_cfg = config.set_instance_read_only(autovshard_cfg, 98 | "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1") 99 | assert.are_same(expected_new_cfg, new_cfg) 100 | assert.is_true(changed) 101 | end) 102 | 103 | it("not changed", function() 104 | local autovshard_cfg = yaml.decode([[ 105 | rebalancer_max_receiving: 10 106 | bucket_count: 100 107 | rebalancer_disbalance_threshold: 10 108 | sharding: 109 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 110 | weight: 10 111 | replicas: 112 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 113 | master_weight: 10 114 | switchover_delay: 10 115 | address: a1:3301 116 | name: a1 117 | master: false 118 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 119 | master_weight: 20 120 | switchover_delay: 10 121 | address: a2:3301 122 | name: a2 123 | master: false 124 | ]]) 125 | 126 | local changed, new_cfg = config.set_instance_read_only(autovshard_cfg, 127 | "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1") 128 | assert.are_same(autovshard_cfg, new_cfg) 129 | assert.is_false(changed, new_cfg) 130 | end) 131 | end) 132 | 133 | it("master_count", function() 134 | local cfg = yaml.decode([[ 135 | rebalancer_max_receiving: 10 136 | bucket_count: 100 137 | rebalancer_disbalance_threshold: 10 138 | sharding: 139 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 140 | weight: 10 141 | replicas: 142 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 143 | master_weight: 10 144 | switchover_delay: 10 145 | address: a1:3301 146 | name: a1 147 | master: false 148 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 149 | master_weight: 20 150 | switchover_delay: 10 151 | address: a2:3301 152 | name: a2 153 | master: false 154 | ]]) 155 | assert.are.equal(0, config.master_count(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")) 156 | 157 | local cfg = yaml.decode([[ 158 | rebalancer_max_receiving: 10 159 | bucket_count: 100 160 | rebalancer_disbalance_threshold: 10 161 | sharding: 162 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 163 | weight: 10 164 | replicas: 165 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 166 | master_weight: 10 167 | switchover_delay: 10 168 | address: a1:3301 169 | name: a1 170 | master: false 171 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 172 | master_weight: 20 173 | switchover_delay: 10 174 | address: a2:3301 175 | name: a2 176 | master: true 177 | ]]) 178 | assert.are.equal(1, config.master_count(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")) 179 | 180 | local cfg = yaml.decode([[ 181 | rebalancer_max_receiving: 10 182 | bucket_count: 100 183 | rebalancer_disbalance_threshold: 10 184 | sharding: 185 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 186 | weight: 10 187 | replicas: 188 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 189 | master_weight: 10 190 | switchover_delay: 10 191 | address: a1:3301 192 | name: a1 193 | master: true 194 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 195 | master_weight: 20 196 | switchover_delay: 10 197 | address: a2:3301 198 | name: a2 199 | master: true 200 | ]]) 201 | assert.are.equal(2, config.master_count(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")) 202 | assert.are.equal(0, config.master_count(cfg, "0aaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")) 203 | 204 | end) 205 | 206 | it("is_master", function() 207 | local cfg = yaml.decode([[ 208 | rebalancer_max_receiving: 10 209 | bucket_count: 100 210 | rebalancer_disbalance_threshold: 10 211 | sharding: 212 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 213 | weight: 10 214 | replicas: 215 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 216 | master_weight: 10 217 | switchover_delay: 10 218 | address: a1:3301 219 | name: a1 220 | master: true 221 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 222 | master_weight: 20 223 | switchover_delay: 10 224 | address: a2:3301 225 | name: a2 226 | master: false 227 | ]]) 228 | assert.is_true(config.is_master(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1")) 229 | assert.is_false(config.is_master(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2")) 230 | assert.is_false(config.is_master(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaa99")) 231 | end) 232 | 233 | it("get_switchover_delay", function() 234 | local cfg = yaml.decode([[ 235 | rebalancer_max_receiving: 10 236 | bucket_count: 100 237 | rebalancer_disbalance_threshold: 10 238 | sharding: 239 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 240 | weight: 10 241 | replicas: 242 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 243 | master_weight: 10 244 | switchover_delay: 11 245 | address: a1:3301 246 | name: a1 247 | master: true 248 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 249 | master_weight: 20 250 | switchover_delay: 99 251 | address: a2:3301 252 | name: a2 253 | master: false 254 | ]]) 255 | assert.are.equal(11, 256 | config.get_switchover_delay(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1")) 257 | assert.are.equal(99, 258 | config.get_switchover_delay(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2")) 259 | assert.is_nil(config.get_switchover_delay(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaa99")) 260 | end) 261 | 262 | it("get_master_weight", function() 263 | local cfg = yaml.decode([[ 264 | rebalancer_max_receiving: 10 265 | bucket_count: 100 266 | rebalancer_disbalance_threshold: 10 267 | sharding: 268 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 269 | weight: 10 270 | replicas: 271 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1: 272 | master_weight: 10 273 | switchover_delay: 11 274 | address: a1:3301 275 | name: a1 276 | master: true 277 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2: 278 | master_weight: 20 279 | switchover_delay: 99 280 | address: a2:3301 281 | name: a2 282 | master: false 283 | ]]) 284 | assert.are.equal(10, config.get_master_weight(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1")) 285 | assert.are.equal(20, config.get_master_weight(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2")) 286 | assert.are.equal(0, config.get_master_weight(cfg, "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaa99")) 287 | end) 288 | 289 | it("make_vshard_config", function() 290 | local autovshard_cfg = yaml.decode([[ 291 | rebalancer_max_receiving: 3 292 | bucket_count: 4 293 | rebalancer_disbalance_threshold: 5 294 | sharding: 295 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 296 | weight: 44 297 | replicas: 298 | aaaaaaaa-aaaa-aaaa-aaaa-000000000001: 299 | master_weight: 2 300 | switchover_delay: 5 301 | address: a1:3301 302 | name: a1 303 | master: false 304 | aaaaaaaa-aaaa-aaaa-aaaa-000000000002: 305 | master_weight: 3 306 | switchover_delay: 10 307 | address: a2:3301 308 | name: a2 309 | master: true 310 | bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb: 311 | weight: 55 312 | replicas: 313 | bbbbbbbb-bbbb-bbbb-bbbb-000000000001: 314 | master_weight: 6 315 | switchover_delay: 20 316 | address: b1:3301 317 | name: b1 318 | master: true 319 | bbbbbbbb-bbbb-bbbb-bbbb-000000000002: 320 | master_weight: 8 321 | switchover_delay: 30 322 | address: b2:3301 323 | name: b2 324 | master: false 325 | ]]) 326 | 327 | local box_cfg = yaml.decode([[ 328 | listen: 9999 329 | replicaset_uuid: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa 330 | instance_uiid: aaaaaaaa-aaaa-aaaa-aaaa-000000000001 331 | wal_dir: /tmp 332 | ]]) 333 | 334 | local login = "storage" 335 | local password = "secret" 336 | 337 | local expected_vshard_cfg = yaml.decode([[ 338 | # box cfg 339 | listen: 9999 340 | replicaset_uuid: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa 341 | instance_uiid: aaaaaaaa-aaaa-aaaa-aaaa-000000000001 342 | wal_dir: /tmp 343 | 344 | # vshard cfg 345 | rebalancer_max_receiving: 3 346 | rebalancer_disbalance_threshold: 5 347 | bucket_count: 4 348 | sharding: 349 | aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa: 350 | weight: 44 351 | replicas: 352 | aaaaaaaa-aaaa-aaaa-aaaa-000000000001: 353 | name: a1 354 | uri: storage:secret@a1:3301 355 | master: false 356 | aaaaaaaa-aaaa-aaaa-aaaa-000000000002: 357 | name: a2 358 | uri: storage:secret@a2:3301 359 | master: true 360 | bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb: 361 | weight: 55 362 | replicas: 363 | bbbbbbbb-bbbb-bbbb-bbbb-000000000001: 364 | name: b1 365 | uri: storage:secret@b1:3301 366 | master: true 367 | bbbbbbbb-bbbb-bbbb-bbbb-000000000002: 368 | name: b2 369 | uri: storage:secret@b2:3301 370 | master: false 371 | ]]) 372 | assert.are.same(expected_vshard_cfg, 373 | config.make_vshard_config(autovshard_cfg, login, password, box_cfg)) 374 | end) 375 | end) 376 | -------------------------------------------------------------------------------- /tests/test_consul.lua: -------------------------------------------------------------------------------- 1 | describe("test consul", function() 2 | local fiber = require("fiber") 3 | local log = require("log") 4 | local clock = require("clock") 5 | 6 | local consul = require("autovshard.consul") 7 | local consul_client 8 | 9 | local function wait_consul() 10 | local http_client = require("http.client").new() 11 | local response 12 | local t = fiber.time() 13 | while fiber.time() - t < 10 do 14 | fiber.sleep(0.1) 15 | response = http_client:get(os.getenv("CONSUL_HTTP_ADDR") .. "/v1/status/leader", 16 | {timeout = 0.2}) 17 | if response.status == 200 then 18 | log.info("Consul is up") 19 | return 20 | end 21 | log.error("Consul is DOWN") 22 | end 23 | error("Consul did not start") 24 | end 25 | 26 | setup(function() 27 | assert(os.getenv("CONSUL_HTTP_ADDR"), "CONSUL_HTTP_ADDR env variable is not set") 28 | end) 29 | 30 | before_each(function() 31 | local c = require("http.client").new() 32 | local resp = c:delete(os.getenv("CONSUL_HTTP_ADDR") .. "/v1/kv/test?recurse=") 33 | consul_client = consul.ConsulClient.new(os.getenv("CONSUL_HTTP_ADDR")) 34 | assert(resp.status == 200, resp) 35 | end) 36 | 37 | it("request 1", function() 38 | local r = consul_client.request{method = "GET", url = "status/leader"} 39 | assert.are.equal(200, r.status, r) 40 | end) 41 | 42 | it("request 2", function() 43 | local r = consul_client.request{ 44 | method = "GET", 45 | url_path = {"status/leader"}, 46 | body = "", 47 | params = {pretty = ""}, 48 | headers = {test = "x"}, 49 | } 50 | assert.are.equal(200, r.status, r) 51 | end) 52 | 53 | it("session", function() 54 | local s = consul_client:session(15) 55 | assert.truthy(s.id) 56 | assert.truthy(type(s.id) == "string") 57 | assert.truthy(s.ttl > 0) 58 | assert.are.equal(s.behavior, "delete") 59 | local ok, session_json = s:renew() 60 | assert.is_true(ok) 61 | assert.are.equal(session_json.ID, s.id) 62 | assert.is_true(s:delete()) 63 | assert.truthy(type(s.id) == "string") 64 | end) 65 | 66 | describe("kv", function() 67 | it("tests put get delete", function() 68 | assert.truthy(consul_client.put) 69 | assert.truthy(consul_client.get) 70 | 71 | assert.truthy(consul_client:put("test/put_get_delete_key", "test_put_get_delete_value")) 72 | 73 | -- cas = 0 74 | assert.is_true(consul_client:put("test/put_get_delete_key2", 75 | "test_put_get_delete_value", 0)) 76 | 77 | -- should fail to put with cas=0 if the key exists 78 | assert.is_false(consul_client:put("test/put_get_delete_key2", 79 | "test_put_get_delete_value", 0)) 80 | 81 | local kv, index = consul_client:get("test/put_get_delete_key") 82 | assert.are.equal("test_put_get_delete_value", kv.value) 83 | assert.are.equal("test/put_get_delete_key", kv.key) 84 | assert.is_true(index > 0) 85 | 86 | assert.is_false(consul_client:delete("test/put_get_delete_key", 999)) 87 | assert.is_true(consul_client:delete("test/put_get_delete_key")) 88 | assert.is_true(consul_client:delete("test/put_get_delete_key")) 89 | assert.is_true(consul_client:delete("non_existent_key")) 90 | assert.is_true(consul_client:delete("non_existent_key", 999)) 91 | end) 92 | 93 | it("tests get blocking", function() 94 | assert.is_nil(consul_client:get("test/blocking_key")) 95 | local kv, index1 = consul_client:get("test/blocking_key") 96 | assert.is_nil(kv) 97 | assert.is_true(index1 > 0) 98 | local t = clock.monotonic() 99 | fiber.create(function() 100 | fiber.sleep(0.2) 101 | consul_client:put("test/blocking_key", "test_blocking_value") 102 | end) 103 | local kv, index2 = consul_client:get("test/blocking_key", 2, index1) 104 | local elapsed = clock.monotonic() - t 105 | assert.truthy(elapsed >= 0.2, elapsed) 106 | assert.truthy(elapsed < 1, elapsed) 107 | assert.are.equal("test_blocking_value", kv.value) 108 | assert.is_true(index2 > index1, string.format("index1=%s, index2=%s", index1, index2)) 109 | end) 110 | 111 | it("watch", function() 112 | fiber.create(function() 113 | fiber.sleep(0.2) 114 | consul_client:put("test/watch_key", "test_watch_value") 115 | end) 116 | 117 | local changes = {} 118 | 119 | local expected_changes = {"no_key", "test_watch_value1", "test_watch_value2", "no_key"} 120 | 121 | local function on_change(kv) 122 | if kv == nil then 123 | table.insert(changes, "no_key") 124 | else 125 | table.insert(changes, kv.value) 126 | end 127 | end 128 | 129 | assert.is_nil(consul_client:get("test/watch_key")) 130 | local fib, stop_watch = consul_client:watch{ 131 | key = "test/watch_key", 132 | on_change = on_change, 133 | index = 1, 134 | rate_limit = 100, 135 | rate_limit_burst = 100, 136 | rate_limit_init_burst = 100, 137 | } 138 | assert.truthy(fib) 139 | assert.truthy(stop_watch) 140 | fiber.sleep(0.01) 141 | assert.are.equal("suspended", fib:status()) 142 | consul_client:put("test/watch_key", "test_watch_value1") 143 | fiber.sleep(0.01) 144 | consul_client:put("test/watch_key", "test_watch_value2") 145 | fiber.sleep(0.01) 146 | consul_client:delete("test/watch_key") 147 | fiber.sleep(0.01) 148 | assert.are.equal("suspended", fib:status()) 149 | stop_watch() 150 | fiber.sleep(0.01) 151 | consul_client:put("test/watch_key", "test_watch_value3") 152 | fiber.sleep(0.01) 153 | assert.are.equal("dead", fib:status()) 154 | assert.are.same(expected_changes, changes) 155 | end) 156 | end) 157 | 158 | it("multiple addresses", function() 159 | xzz = 1 160 | local addresses = {"http://localhost:60666", assert(os.getenv("CONSUL_HTTP_ADDR"))} 161 | local client = consul.ConsulClient.new(addresses) 162 | local put = function() return client:put("a", "1") end 163 | assert.has_error(put) 164 | -- should swithch to the next address if got error 165 | assert.is_true(put()) 166 | -- should not swithch to the next address if no errors 167 | assert.is_true(put()) 168 | assert.is_nil(client:get("no-such-key")) 169 | -- should not swithch to the next address if not found a key 170 | assert.is_true(put()) 171 | end) 172 | end) 173 | -------------------------------------------------------------------------------- /tests/test_util.lua: -------------------------------------------------------------------------------- 1 | local fiber = require("fiber") 2 | local clock = require("clock") 3 | 4 | describe("test util", function() 5 | local util = require("autovshard.util") 6 | 7 | it("urljoin", function() assert.are.equal("aa/b/c", util.urljoin("/aa/", "b/", "/c/")) end) 8 | 9 | it("ok_or_log_error", function() 10 | assert.are.same({8, 9}, {util.ok_or_log_error(function() return 8, 9 end)}) 11 | assert.is_nil(util.ok_or_log_error(function() 12 | error("oh") 13 | return 8 14 | end)) 15 | end) 16 | 17 | it("rate limiter", function() 18 | local n = 0 19 | local function incr() n = n + 1 end 20 | 21 | local incr1 = util.rate_limited(incr, 100, 10) 22 | local f = fiber.create(function() while not fiber.testcancel() do incr1() end end) 23 | fiber.sleep(1) 24 | f:cancel() 25 | assert.truthy(n >= 90, "cnt=" .. n .. " must be > 90") 26 | assert.truthy(n <= 110, "cnt=" .. n .. " must be < 110") 27 | 28 | n = 0 29 | local incr2 = util.rate_limited(incr, 10, 0) 30 | incr2() 31 | local t = clock.monotonic() 32 | incr2() 33 | incr2() 34 | local ela = clock.monotonic() - t 35 | assert.truthy(ela > 0.2, ela) 36 | assert.truthy(ela < 0.3, ela) 37 | 38 | -- test initial burst 39 | n = 0 40 | local incr3 = util.rate_limited(incr, 1, 100, 100) 41 | t = clock.monotonic() 42 | for _ = 1, 100 do incr3() end 43 | ela = clock.monotonic() - t 44 | assert.truthy(ela < 0.1, ela) 45 | end) 46 | 47 | it("table_set simple", function() 48 | local t = {} 49 | util.table_set(t, {"a", "b", "c"}, 2) 50 | util.table_set(t, {"a", "b", "z"}, 5) 51 | assert.are.same({a = {b = {c = 2, z = 5}}}, t) 52 | end) 53 | 54 | it("deepcompare", function() 55 | local a 56 | local b 57 | a = {1, 2, a = 1, b = 2, c = {d = 4}} 58 | b = {b = 2, a = 1, c = {d = 4}, 1, 2} 59 | assert.is_true(util.deepcompare(a, b)) 60 | 61 | a = 1 62 | b = 1 63 | assert.is_true(util.deepcompare(a, b)) 64 | 65 | a = "a" 66 | b = "a" 67 | assert.is_true(util.deepcompare(a, b)) 68 | 69 | a = {1, 2, a = 1, b = 2, c = {d = 4}} 70 | b = {1, 2, a = 1, b = 2, c = {d = 99}} 71 | assert.is_false(util.deepcompare(a, b)) 72 | 73 | a = {1, 2, a = 1, b = 2, c = {d = 4}} 74 | b = {1, 2, a = 1, b = 2, c = {d = ""}} 75 | assert.is_false(util.deepcompare(a, b)) 76 | 77 | a = {1, 2, a = 1, b = 2, c = {d = 4}} 78 | b = {1, 2, a = 1, b = 2, c = {9, 8}} 79 | assert.is_false(util.deepcompare(a, b)) 80 | end) 81 | 82 | it("cycle", function() 83 | local xs = {1,2,3} 84 | local getnext= util.cycle(xs) 85 | local t = {} 86 | for i=1,10 do 87 | table.insert(t, getnext()) 88 | end 89 | local expected_t = {1,2,3,1,2,3,1,2,3,1} 90 | assert.are.same(expected_t, t) 91 | end) 92 | end) 93 | -------------------------------------------------------------------------------- /tests/test_wlock.lua: -------------------------------------------------------------------------------- 1 | local clock = require("clock") 2 | local util = require("autovshard.util") 3 | local yaml = require("yaml") 4 | 5 | describe("test wlock", function() 6 | local wlock = require("autovshard.wlock") 7 | local consul = require("autovshard.consul") 8 | 9 | local fiber = require("fiber") 10 | local log = require("log") 11 | 12 | local consul_client 13 | 14 | local function wait_consul() 15 | local http_client = require("http.client").new() 16 | local response 17 | local t = fiber.time() 18 | while fiber.time() - t < 10 do 19 | fiber.sleep(0.1) 20 | response = http_client:get(os.getenv("CONSUL_HTTP_ADDR") .. "/v1/status/leader", 21 | {timeout = 0.2}) 22 | if response.status == 200 then 23 | log.info("Consul is up") 24 | return 25 | end 26 | log.error("Consul is DOWN") 27 | end 28 | error("Consul did not start") 29 | end 30 | 31 | setup(function() 32 | assert(os.getenv("CONSUL_HTTP_ADDR"), "CONSUL_HTTP_ADDR env variable is not set") 33 | consul_client = require("autovshard.consul").ConsulClient.new( 34 | assert(os.getenv("CONSUL_HTTP_ADDR"))) 35 | end) 36 | 37 | before_each(function() 38 | local c = require("http.client").new() 39 | local resp = c:delete(os.getenv("CONSUL_HTTP_ADDR") .. "/v1/kv/test?recurse=") 40 | assert(resp.status == 200, resp) 41 | assert(consul_client) 42 | end) 43 | 44 | it("parse_kvs", function() 45 | local kvs = { 46 | consul.KV.new{ 47 | create_index = 0, 48 | modify_index = 0, 49 | lock_index = 0, 50 | key = "test/aaaaaaaa-aaaa-aaaa-aaaa-000000000001", 51 | flags = 0, 52 | value = '{"weight": 10}', 53 | session = "aaaaaaaa-aaaa-aaaa-aaaa-000000000001", 54 | }, consul.KV.new{ 55 | create_index = 0, 56 | modify_index = 0, 57 | lock_index = 0, 58 | key = "test/aaaaaaaa-aaaa-aaaa-aaaa-000000000002", 59 | flags = 0, 60 | value = '{"weight": 20}', 61 | session = "aaaaaaaa-aaaa-aaaa-aaaa-000000000002", 62 | }, consul.KV.new{ 63 | create_index = 0, 64 | modify_index = 0, 65 | lock_index = 0, 66 | key = "test/lock", 67 | flags = 0, 68 | value = '{"holder": "aaaaaaaa-aaaa-aaaa-aaaa-000000000002"}', 69 | session = nil, 70 | }, 71 | } 72 | local contender_weights, holder, max_weight = wlock.parse_kvs(kvs, "test") 73 | assert.are.same({ 74 | ["aaaaaaaa-aaaa-aaaa-aaaa-000000000001"] = 10, 75 | ["aaaaaaaa-aaaa-aaaa-aaaa-000000000002"] = 20, 76 | }, contender_weights, "parsed contender_weights are wrong") 77 | 78 | assert.are.same("aaaaaaaa-aaaa-aaaa-aaaa-000000000002", holder, "bad holder") 79 | assert.are.equal(20, max_weight, "bad max_weight") 80 | 81 | end) 82 | 83 | it("lock-unlock", function() 84 | local l1 = wlock.WLock.new(consul_client, "test/wlock", 10, 0) 85 | local l1_locked = fiber.cond() 86 | local l1_acquire_ok 87 | 88 | local done = fiber.channel() 89 | 90 | fiber.new(util.ok_or_log_error, function() 91 | l1_acquire_ok = l1:acquire(done) 92 | l1_locked:broadcast() 93 | end) 94 | l1_locked:wait(3) 95 | assert.is_true(l1_acquire_ok, "l1 should have acquired the lock") 96 | 97 | local l1_released = false 98 | fiber.new(util.ok_or_log_error, function() 99 | done:get() 100 | l1_released = true 101 | end) 102 | 103 | fiber.sleep(0.01) 104 | assert.is_false(l1_released, "l1 should not be released") 105 | 106 | -- create another lock with higher weight 107 | local l2 = wlock.WLock.new(consul_client, "test/wlock", 20, 0) 108 | local l2_locked = fiber.cond() 109 | local l2_acquire_ok 110 | 111 | local done2 = fiber.channel() 112 | fiber.new(function() 113 | l2_acquire_ok = l2:acquire(done2) 114 | l2_locked:broadcast() 115 | end) 116 | l2_locked:wait(3) 117 | assert.is_true(l2_acquire_ok, "l2 lock did not lock") 118 | -- make sure the lock with lower weight released 119 | fiber.sleep(0.1) 120 | assert.is_true(l1_released, "l1 should be released") 121 | done:close() 122 | done2:close() 123 | end) 124 | 125 | describe("lock-weight", function() 126 | local l1, l2, done1, done2, c 127 | 128 | setup(function() 129 | l1 = wlock.WLock.new(consul_client, "test/wlock", 10) 130 | l2 = wlock.WLock.new(consul_client, "test/wlock", 20) 131 | done1 = fiber.channel() 132 | done2 = fiber.channel() 133 | c = fiber.channel() 134 | end) 135 | 136 | teardown(function() 137 | done1:close() 138 | done2:close() 139 | c:close() 140 | l1, l2, done1, done2, c = nil, nil, nil, nil, nil 141 | end) 142 | 143 | it("weight", function() 144 | assert.truthy(c) 145 | local l2_locked = fiber.channel() 146 | local f1 = fiber.new(function(c, done1) 147 | assert.is_nil(l2_locked:get(10)) 148 | assert(l1:acquire(done1)) 149 | c:put{"l1", "locked"} 150 | done1:get() 151 | c:put{"l1", "released"} 152 | end, c, done1) 153 | 154 | local f2 = fiber.new(function(c, done2) 155 | l2:acquire(done2) 156 | l2_locked:close() 157 | c:put{"l2", "locked"} 158 | done2:get() 159 | c:put{"l2", "released"} 160 | end, c, done2) 161 | 162 | local l, event = unpack(c:get(2)) 163 | assert.is_true(l2_locked:is_closed()) 164 | assert.is_nil(c:get(1)) 165 | 166 | l1:set_weight(30) 167 | local msg = c:get(2) 168 | assert.truthy(msg) 169 | l, event = unpack(msg) 170 | 171 | -- lock and release events order is arbitrary 172 | if l == "l1" and event == "locked" then 173 | l, event = unpack(c:get(2)) 174 | assert.are.equal(l, "l2") 175 | assert.are.equal(event, "released") 176 | else 177 | assert.are.equal(l, "l2") 178 | assert.are.equal(event, "released") 179 | l, event = unpack(c:get(2)) 180 | assert.are.equal(l, "l1") 181 | assert.are.equal(event, "locked") 182 | end 183 | end) 184 | 185 | end) 186 | describe("lock delay", function() 187 | local l1, l2, done1, done2, c, events 188 | 189 | setup(function() 190 | l1 = wlock.WLock.new(consul_client, "test/wlock", 10, 1) 191 | l2 = wlock.WLock.new(consul_client, "test/wlock", 20, 999) 192 | done1 = fiber.channel() 193 | done2 = fiber.channel() 194 | c = fiber.channel(999) 195 | end) 196 | 197 | teardown(function() 198 | done1:close() 199 | done2:close() 200 | c:close() 201 | l1, l2, done1, done2, c = nil, nil, nil, nil, nil 202 | end) 203 | 204 | it("delay and set_delay", function() 205 | assert.truthy(c) 206 | local l1_locked 207 | local f1 = fiber.new(function(c, done1) 208 | l1_locked = l1:acquire(done1) 209 | c:put "l1 locked" 210 | done1:get() 211 | c:put "l1 released" 212 | end, c, done1) 213 | 214 | -- let l1 lock 215 | fiber.sleep(0.1) 216 | 217 | local l2_locked 218 | local f2 = fiber.new(function(c, done2) 219 | local t = clock.monotonic() 220 | l2_locked = l2:acquire(done2) 221 | local elapsed = clock.monotonic() - t 222 | c:put{"l2 locked", elapsed > 2 and elapsed < 3} 223 | done2:get() 224 | c:put "l2 released" 225 | end, c, done2) 226 | 227 | fiber.new(function() 228 | fiber.sleep(1) 229 | l2:set_delay(2) 230 | c:put("l2:set_delay") 231 | end) 232 | 233 | local expected_events = { -- 234 | "l1 locked", -- 235 | "l2:set_delay", -- 236 | {"l2 locked", true}, -- 237 | "l1 released", -- 238 | } 239 | 240 | fiber.sleep(3) 241 | local events = {} 242 | repeat 243 | local msg = c:get(0) 244 | if msg then table.insert(events, msg) end 245 | until msg == nil 246 | 247 | assert.equal(#expected_events, #events) 248 | -- lock and release events order is arbitrary 249 | if events[3] == "l1 released" then 250 | expected_events[3], expected_events[4] = expected_events[4], expected_events[3] 251 | end 252 | assert.are.same(expected_events, events) 253 | end) 254 | end) 255 | end) 256 | -------------------------------------------------------------------------------- /tests/tnt/init.lua: -------------------------------------------------------------------------------- 1 | -- src: https://github.com/moonlibs/tarantoolapp/blob/d97e1226e9d448ad2e2f3dfd9ec2f18f0dead662/templates/basic/template/t/tnt/init.lua 2 | local fio = require 'fio' 3 | local errno = require 'errno' 4 | local yaml = require 'yaml' 5 | local log = require 'log' 6 | 7 | local dir = os.getenv('TNT_FOLDER') 8 | local cleanup = false 9 | 10 | local _ 11 | 12 | if dir == nil then 13 | dir = fio.tempdir() 14 | cleanup = true 15 | end 16 | 17 | local function compare_versions(expected, version) 18 | -- from tarantool/queue compat.lua 19 | local fun = require 'fun' 20 | local iter, op = fun.iter, fun.operator 21 | 22 | local function split(self, sep) 23 | local sep, fields = sep or ":", {} 24 | local pattern = string.format("([^%s]+)", sep) 25 | self:gsub(pattern, function(c) table.insert(fields, c) end) 26 | return fields 27 | end 28 | 29 | local function reducer(res, l, r) 30 | if res ~= nil then return res end 31 | if tonumber(l) == tonumber(r) then return nil end 32 | return tonumber(l) > tonumber(r) 33 | end 34 | 35 | local function split_version(version_string) 36 | local vtable = split(version_string, '.') 37 | local vtable2 = split(vtable[3], '-') 38 | vtable[3], vtable[4] = vtable2[1], vtable2[2] 39 | return vtable 40 | end 41 | 42 | local function check_version(expected, version) 43 | version = version or _TARANTOOL 44 | if type(version) == 'string' then version = split_version(version) end 45 | local res = iter(version):zip(expected):reduce(reducer, nil) 46 | 47 | if res or res == nil then res = true end 48 | return res 49 | end 50 | 51 | return check_version(expected, version) 52 | end 53 | 54 | local function tnt_prepare(cfg_args) 55 | cfg_args = cfg_args or {} 56 | local files = fio.glob(fio.pathjoin(dir, '*')) 57 | for _, file in pairs(files) do 58 | if fio.basename(file) ~= 'tarantool.log' then 59 | log.info("skip removing %s", file) 60 | fio.unlink(file) 61 | end 62 | end 63 | 64 | if compare_versions({1, 7, 3}, _TARANTOOL) then 65 | cfg_args['memtx_dir'] = dir 66 | cfg_args['vinyl_dir'] = dir 67 | cfg_args['log'] = "file:" .. fio.pathjoin(dir, 'tarantool.log') 68 | else 69 | cfg_args['snap_dir'] = dir 70 | cfg_args['vinyl'] = {} 71 | cfg_args['logger'] = fio.pathjoin(dir, 'tarantool.log') 72 | end 73 | cfg_args['wal_dir'] = dir 74 | 75 | box.cfg(cfg_args) 76 | end 77 | 78 | return { 79 | finish = function(code) 80 | local files = fio.glob(fio.pathjoin(dir, '*')) 81 | for _, file in pairs(files) do 82 | if fio.basename(file) == 'tarantool.log' and not cleanup then 83 | log.info("skip removing %s", file) 84 | else 85 | log.info("remove %s", file) 86 | fio.unlink(file) 87 | end 88 | end 89 | if cleanup then 90 | log.info("rmdir %s", dir) 91 | fio.rmdir(dir) 92 | end 93 | end, 94 | 95 | dir = function() return dir end, 96 | 97 | cleanup = function() return cleanup end, 98 | 99 | logfile = function() return fio.pathjoin(dir, 'tarantool.log') end, 100 | 101 | log = function() 102 | local fh = fio.open(fio.pathjoin(dir, 'tarantool.log'), 'O_RDONLY') 103 | if fh == nil then box.error(box.error.PROC_LUA, errno.strerror()) end 104 | 105 | local data = fh:read(16384) 106 | fh:close() 107 | return data 108 | end, 109 | 110 | cfg = tnt_prepare, 111 | } 112 | --------------------------------------------------------------------------------