├── .gitignore
├── tests
    ├── images
    │   ├── zookeeper
    │   │   ├── Dockerfile
    │   │   ├── start.sh
    │   │   ├── supervisor_zookeeper.conf
    │   │   ├── setup_zk.sh
    │   │   ├── setup.sh
    │   │   ├── zoo.cfg
    │   │   └── retriable_path_create.sh
    │   ├── jepsen
    │   │   ├── Dockerfile
    │   │   ├── ssh_config
    │   │   ├── jepsen
    │   │   │   ├── run.sh
    │   │   │   ├── test
    │   │   │   │   └── jepsen
    │   │   │   │   │   └── rdsync_test.clj
    │   │   │   ├── java
    │   │   │   │   ├── WaitQuorumJedis.java
    │   │   │   │   └── WaitQuorumCommand.java
    │   │   │   └── project.clj
    │   │   ├── setup.sh
    │   │   └── save_logs.sh
    │   ├── valkey
    │   │   ├── senticache.conf
    │   │   ├── supervisor_rdsync.conf
    │   │   ├── Dockerfile
    │   │   ├── supervisor_valkey.conf
    │   │   ├── supervisor_senticache.conf
    │   │   ├── setup.sh
    │   │   ├── default.conf
    │   │   ├── rdsync_cluster.yaml
    │   │   ├── rdsync_sentinel.yaml
    │   │   ├── setup_sentinel.sh
    │   │   └── setup_cluster.sh
    │   ├── base
    │   │   ├── sshd_config
    │   │   ├── supervisor_ssh.conf
    │   │   ├── Dockerfile
    │   │   ├── supervisor.conf
    │   │   ├── setup.sh
    │   │   └── generate_certs.sh
    │   ├── docker-compose.yaml
    │   └── jepsen-compose.yaml
    ├── testutil
    │   ├── retry.go
    │   └── matchers
    │   │   ├── matchers_test.go
    │   │   └── matchers.go
    └── features
    │   ├── 00_cluster_smoke.feature
    │   ├── 00_sentinel_smoke.feature
    │   ├── 03_cluster_switchover_to.feature
    │   ├── 07_cluster_local_repair.feature
    │   ├── 06_cluster_lost.feature
    │   ├── 06_sentinel_lost.feature
    │   └── 07_sentinel_local_repair.feature
├── AUTHORS
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── golangci-lint.yml
    │   ├── unit-tests.yml
    │   ├── jepsen-cluster-tests.yml
    │   ├── jepsen-sentinel-tests.yml
    │   ├── codeql.yml
    │   └── func-tests.yml
├── valkey_patches
    ├── build.sh
    ├── 0002_Allow_explicit_cluster_replication_cascades.patch
    ├── 0003_Add_offline_mode.patch
    └── 0001_Add_replication_pause.patch
├── internal
    ├── app
    │   ├── init.go
    │   ├── critical.go
    │   ├── pprof.go
    │   ├── aof.go
    │   ├── info_file.go
    │   ├── local.go
    │   ├── checks.go
    │   ├── parallel.go
    │   ├── candidate.go
    │   ├── poison_pill.go
    │   ├── failover.go
    │   ├── lag.go
    │   ├── lost.go
    │   ├── maintenance.go
    │   ├── cache.go
    │   ├── replication.go
    │   ├── active_nodes.go
    │   ├── app.go
    │   ├── state.go
    │   ├── master.go
    │   └── manager.go
    ├── valkey
    │   ├── tls.go
    │   └── shard.go
    └── dcs
    │   ├── zk_test.go
    │   ├── zk_tls.go
    │   ├── dcs.go
    │   ├── config.go
    │   └── zk_host_provider.go
├── cmd
    └── rdsync
    │   ├── info.go
    │   ├── state.go
    │   ├── abort.go
    │   ├── main.go
    │   ├── switch.go
    │   ├── maintenance.go
    │   └── hosts.go
├── README.md
├── LICENSE
├── CONTRIBUTING.md
├── .golangci.yml
├── go.mod
└── Makefile


/.gitignore:
--------------------------------------------------------------------------------
1 | cmd/rdsync/rdsync
2 | tests/logs/
3 | .idea
4 | 


--------------------------------------------------------------------------------
/tests/images/zookeeper/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM rdsync-base:latest
2 | COPY . /var/lib/dist/zookeeper
3 | RUN bash /var/lib/dist/zookeeper/setup.sh
4 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM rdsync-base:latest
2 | ENV LEIN_ROOT=1
3 | COPY . /var/lib/dist/jepsen
4 | RUN bash /var/lib/dist/jepsen/setup.sh
5 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/ssh_config:
--------------------------------------------------------------------------------
1 | Host *
2 |     StrictHostKeyChecking no
3 |     SendEnv LANG LC_*
4 |     HashKnownHosts yes
5 |     GSSAPIAuthentication yes
6 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/jepsen/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -xe
4 | 
5 | ssh-keygen -p -f /root/.ssh/id_rsa -m pem -P "" -N ""
6 | touch /root/.ssh/known_hosts
7 | cd "$(dirname "$0")"
8 | lein test
9 | 


--------------------------------------------------------------------------------
/tests/images/valkey/senticache.conf:
--------------------------------------------------------------------------------
1 | daemonize no
2 | protected-mode no
3 | dir "/tmp"
4 | sentinel set-cache-update-secret functestsecret
5 | sentinel resolve-hostnames yes
6 | sentinel announce-hostnames yes
7 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | The following authors have created the source code of "rdsync" published and distributed by YANDEX LLC as the owner:
2 | 
3 | Svyatoslav Ermilin munakoiso@yandex-team.ru
4 | Evgeny Dyukov secwall@yandex-team.ru
5 | 


--------------------------------------------------------------------------------
/tests/images/base/sshd_config:
--------------------------------------------------------------------------------
1 | PubkeyAuthentication yes
2 | PubkeyAcceptedAlgorithms +ssh-rsa
3 | ChallengeResponseAuthentication no
4 | UsePAM yes
5 | X11Forwarding yes
6 | PrintMotd no
7 | AcceptEnv LANG LC_*
8 | Subsystem sftp /usr/lib/openssh/sftp-server
9 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: gomod
 4 |     directory: /
 5 |     schedule:
 6 |       interval: daily
 7 |   - package-ecosystem: github-actions
 8 |     directory: /
 9 |     schedule:
10 |       interval: daily
11 | 


--------------------------------------------------------------------------------
/tests/images/base/supervisor_ssh.conf:
--------------------------------------------------------------------------------
 1 | [program:sshd]
 2 | user=root
 3 | command=/usr/sbin/sshd -D
 4 | autostart=true
 5 | autorestart=true
 6 | stdout_logfile=/proc/self/fd/1
 7 | stdout_logfile_maxbytes=0
 8 | stderr_logfile=/proc/self/fd/2
 9 | stderr_logfile_maxbytes=0
10 | priority=10
11 | 


--------------------------------------------------------------------------------
/tests/images/valkey/supervisor_rdsync.conf:
--------------------------------------------------------------------------------
 1 | [program:rdsync]
 2 | command=/usr/bin/rdsync
 3 | process_name=%(program_name)s
 4 | autostart=true
 5 | startretries=100000
 6 | autorestart=true
 7 | stopsignal=TERM
 8 | priority=3
 9 | redirect_stderr=true
10 | stdout_logfile=/var/log/rdsync.log
11 | 


--------------------------------------------------------------------------------
/tests/images/valkey/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM rdsync-base:latest
2 | COPY . /var/lib/dist/valkey
3 | COPY ./rdsync /usr/bin/rdsync
4 | COPY ./valkey-server /usr/bin/valkey-server
5 | COPY ./valkey-cli /usr/bin/valkey-cli
6 | COPY ./valkey-senticache /usr/bin/valkey-senticache
7 | RUN bash /var/lib/dist/valkey/setup.sh
8 | 


--------------------------------------------------------------------------------
/tests/images/valkey/supervisor_valkey.conf:
--------------------------------------------------------------------------------
 1 | [program:valkey]
 2 | command=/usr/bin/valkey-server /etc/valkey/valkey.conf
 3 | process_name=%(program_name)s
 4 | autostart=true
 5 | autorestart=false
 6 | stopsignal=TERM
 7 | priority=5
 8 | redirect_stderr=true
 9 | stdout_logfile=/var/log/valkey/server.log
10 | 


--------------------------------------------------------------------------------
/tests/images/zookeeper/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p /tmp/zookeeper
 4 | 
 5 | cp /var/lib/dist/zookeeper/zoo.cfg /opt/zookeeper/conf/zoo.cfg
 6 | 
 7 | echo $ZK_MYID > /tmp/zookeeper/myid
 8 | 
 9 | /var/lib/dist/base/generate_certs.sh;
10 | 
11 | /opt/zookeeper/bin/zkServer.sh start-foreground
12 | 


--------------------------------------------------------------------------------
/tests/images/base/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:noble
2 | ENV container=docker
3 | ENV DEBIAN_FRONTEND=noninteractive
4 | COPY . /var/lib/dist/base
5 | RUN bash /var/lib/dist/base/setup.sh
6 | RUN chmod 755 /var/lib/dist/base/generate_certs.sh
7 | CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]
8 | 


--------------------------------------------------------------------------------
/tests/images/valkey/supervisor_senticache.conf:
--------------------------------------------------------------------------------
 1 | [program:senticache]
 2 | command=/usr/bin/valkey-senticache /etc/valkey/senticache.conf
 3 | process_name=%(program_name)s
 4 | autostart=true
 5 | autorestart=true
 6 | stopsignal=TERM
 7 | priority=5
 8 | redirect_stderr=true
 9 | stdout_logfile=/var/log/valkey/senticache.log
10 | 


--------------------------------------------------------------------------------
/tests/images/zookeeper/supervisor_zookeeper.conf:
--------------------------------------------------------------------------------
 1 | [program:zookeeper]
 2 | command=bash /var/lib/dist/zookeeper/start.sh
 3 | process_name=%(program_name)s
 4 | stopasgroup=true
 5 | autostart=true
 6 | autorestart=false
 7 | stopsignal=TERM
 8 | priority=5
 9 | redirect_stderr=true
10 | stdout_logfile=/var/log/zookeeper/zookeeper.log
11 | 


--------------------------------------------------------------------------------
/tests/images/zookeeper/setup_zk.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | retriable_path_create.sh /test
 6 | retriable_path_create.sh /test/ha_nodes
 7 | retriable_path_create.sh /test/ha_nodes/valkey1 set_priority
 8 | retriable_path_create.sh /test/ha_nodes/valkey2 set_priority
 9 | retriable_path_create.sh /test/ha_nodes/valkey3 set_priority
10 | 


--------------------------------------------------------------------------------
/tests/images/valkey/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | mkdir -p /var/lib/valkey /var/log/valkey /etc/valkey
 6 | touch /var/log/valkey/senticache.log
 7 | cp /var/lib/dist/valkey/default.conf /etc/valkey/valkey.conf
 8 | cp /var/lib/dist/valkey/supervisor_valkey.conf /etc/supervisor/conf.d
 9 | 
10 | cp /var/lib/dist/valkey/setup_*.sh /usr/local/bin
11 | 


--------------------------------------------------------------------------------
/valkey_patches/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | apt update
 6 | DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt -y install build-essential git
 7 | cd /app
 8 | git clone https://github.com/valkey-io/valkey.git
 9 | cd valkey
10 | git checkout 9.0.1
11 | 
12 | for i in ../valkey_patches/*.patch
13 | do
14 |     git apply "${i}"
15 | done
16 | 
17 | make -j
18 | 


--------------------------------------------------------------------------------
/internal/app/init.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | func (app *App) stateInit() appState {
 4 | 	if !app.dcs.WaitConnected(app.config.DcsWaitTimeout) {
 5 | 		if app.doesMaintenanceFileExist() {
 6 | 			return stateMaintenance
 7 | 		}
 8 | 		return stateInit
 9 | 	}
10 | 	app.dcs.Initialize()
11 | 	if app.dcs.AcquireLock(pathManagerLock) {
12 | 		return stateManager
13 | 	}
14 | 	return stateCandidate
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/testutil/retry.go:
--------------------------------------------------------------------------------
 1 | package testutil
 2 | 
 3 | import "time"
 4 | 
 5 | // nolint: unparam
 6 | func Retry(code func() bool, timeout, sleep time.Duration) {
 7 | 	if code() {
 8 | 		return
 9 | 	}
10 | 	timer := time.NewTimer(timeout)
11 | 	ticker := time.NewTicker(sleep)
12 | 	for {
13 | 		select {
14 | 		case <-ticker.C:
15 | 			if code() {
16 | 				return
17 | 			}
18 | 		case <-timer.C:
19 | 			return
20 | 		}
21 | 	}
22 | }
23 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/jepsen/test/jepsen/rdsync_test.clj:
--------------------------------------------------------------------------------
 1 | (ns jepsen.rdsync-test
 2 |   (:require [clojure.test :refer :all]
 3 |             [jepsen.core :as jepsen]
 4 |             [jepsen.rdsync :as rdsync]))
 5 | 
 6 | (def valkey_nodes ["valkey1" "valkey2" "valkey3"])
 7 | 
 8 | (def zk_nodes ["zoo1" "zoo2" "zoo3"])
 9 | 
10 | (deftest rdsync-test
11 |   (is (:valid? (:results (jepsen/run! (rdsync/rdsync-test valkey_nodes zk_nodes))))))
12 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | apt update
 6 | apt install openjdk-17-jre-headless libjna-java gnuplot wget
 7 | chmod 600 /root/.ssh/id_rsa
 8 | wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein -O /usr/bin/lein
 9 | chmod +x /usr/bin/lein
10 | cp /var/lib/dist/jepsen/ssh_config /etc/ssh/ssh_config
11 | cp -r /var/lib/dist/jepsen/jepsen /root/
12 | cd /root/jepsen
13 | lein install
14 | lein deps
15 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/jepsen/java/WaitQuorumJedis.java:
--------------------------------------------------------------------------------
 1 | package jepsen.rdsync.waitquorum;
 2 | 
 3 | import redis.clients.jedis.Jedis;
 4 | 
 5 | public class WaitQuorumJedis extends Jedis {
 6 |     public WaitQuorumJedis(String url) {
 7 |         super(url);
 8 |     }
 9 | 
10 |     public long waitQuorum() {
11 |         checkIsInMultiOrPipeline();
12 |         connection.sendCommand(WaitQuorumCommand.WAITQUORUM);
13 |         return connection.getIntegerReply();
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/jepsen/project.clj:
--------------------------------------------------------------------------------
 1 | (defproject jepsen.rdsync "0.1.0-SNAPSHOT"
 2 |   :description "rdsync tests"
 3 |   :url "https://github.com/yandex/rdsync"
 4 |   :java-source-paths ["java"]
 5 |   :dependencies [[org.clojure/clojure "1.10.3"]
 6 |                  [org.clojure/tools.nrepl "0.2.13"]
 7 |                  [clojure-complete "0.2.5"]
 8 |                  [jepsen "0.2.6"]
 9 |                  [zookeeper-clj "0.9.4"]
10 |                  [redis.clients/jedis "5.0.0"]])
11 | 


--------------------------------------------------------------------------------
/internal/app/critical.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"os"
 5 | )
 6 | 
 7 | func (app *App) handleCritical() error {
 8 | 	if app.critical.Load().(bool) {
 9 | 		app.logger.Error("Lost dcs connection in critical section")
10 | 		os.Exit(1)
11 | 	} else {
12 | 		app.logger.Info("Lost dcs connection in non-critical section")
13 | 	}
14 | 	return nil
15 | }
16 | 
17 | func (app *App) enterCritical() {
18 | 	app.critical.Store(true)
19 | }
20 | 
21 | func (app *App) exitCritical() {
22 | 	app.critical.Store(false)
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/images/base/supervisor.conf:
--------------------------------------------------------------------------------
 1 | [unix_http_server]
 2 | file=/var/run/supervisor.sock
 3 | chmod=0777
 4 | 
 5 | [supervisord]
 6 | logfile=/var/log/supervisor.log
 7 | logfile_maxbytes=0
 8 | log_level=debug
 9 | pidfile=/var/run/supervisord.pid
10 | minfds=1024
11 | nodaemon=true
12 | 
13 | [rpcinterface:supervisor]
14 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
15 | 
16 | [supervisorctl]
17 | serverurl=unix:///var/run/supervisor.sock
18 | 
19 | [include]
20 | files = /etc/supervisor/conf.d/*.conf
21 | 


--------------------------------------------------------------------------------
/tests/images/zookeeper/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | apt update
 6 | 
 7 | apt install openjdk-17-jre-headless
 8 | 
 9 | tar -xzf /var/lib/dist/zookeeper/zookeeper.tar.gz -C /opt
10 | mv /opt/apache-zookeeper* /opt/zookeeper
11 | 
12 | mkdir /var/log/zookeeper
13 | cp /var/lib/dist/zookeeper/supervisor_zookeeper.conf /etc/supervisor/conf.d/zookeeper.conf
14 | cp /var/lib/dist/zookeeper/retriable_path_create.sh /usr/local/bin/retriable_path_create.sh
15 | cp /var/lib/dist/zookeeper/setup_zk.sh /usr/local/bin/setup_zk.sh
16 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/jepsen/java/WaitQuorumCommand.java:
--------------------------------------------------------------------------------
 1 | package jepsen.rdsync.waitquorum;
 2 | 
 3 | import redis.clients.jedis.commands.ProtocolCommand;
 4 | import redis.clients.jedis.util.SafeEncoder;
 5 | 
 6 | public enum WaitQuorumCommand implements ProtocolCommand {
 7 |     WAITQUORUM("WAITQUORUM");
 8 | 
 9 |     private final byte[] raw;
10 | 
11 |     private WaitQuorumCommand(String name) {
12 |         raw = SafeEncoder.encode(name);
13 |     }
14 | 
15 |     @Override
16 |     public byte[] getRaw() {
17 |         return raw;
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/cmd/rdsync/info.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/spf13/cobra"
 8 | 
 9 | 	"github.com/yandex/rdsync/internal/app"
10 | )
11 | 
12 | var infoCmd = &cobra.Command{
13 | 	Use:   "info",
14 | 	Short: "Print information from DCS",
15 | 	Run: func(cmd *cobra.Command, args []string) {
16 | 		app, err := app.NewApp(configFile, logLevel)
17 | 		if err != nil {
18 | 			fmt.Println(err)
19 | 			os.Exit(1)
20 | 		}
21 | 		os.Exit(app.CliInfo(verbose))
22 | 	},
23 | }
24 | 
25 | func init() {
26 | 	rootCmd.AddCommand(infoCmd)
27 | }
28 | 


--------------------------------------------------------------------------------
/cmd/rdsync/state.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/spf13/cobra"
 8 | 
 9 | 	"github.com/yandex/rdsync/internal/app"
10 | )
11 | 
12 | var stateCmd = &cobra.Command{
13 | 	Use:   "state",
14 | 	Short: "Print information from valkey hosts",
15 | 	Run: func(cmd *cobra.Command, args []string) {
16 | 		app, err := app.NewApp(configFile, logLevel)
17 | 		if err != nil {
18 | 			fmt.Println(err)
19 | 			os.Exit(1)
20 | 		}
21 | 		os.Exit(app.CliState(verbose))
22 | 	},
23 | }
24 | 
25 | func init() {
26 | 	rootCmd.AddCommand(stateCmd)
27 | }
28 | 


--------------------------------------------------------------------------------
/.github/workflows/golangci-lint.yml:
--------------------------------------------------------------------------------
 1 | name: Linters
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | env:
10 |   GO_VERSION: 1.25.5
11 | 
12 | permissions:
13 |   contents: read
14 | 
15 | jobs:
16 |   golangci:
17 |     name: lint
18 |     runs-on: ubuntu-24.04
19 |     steps:
20 |       - uses: actions/setup-go@v6
21 |         with:
22 |           go-version: ${{ env.GO_VERSION }}
23 |       - uses: actions/checkout@v6
24 |       - name: golangci-lint
25 |         uses: golangci/golangci-lint-action@v9.2.0
26 |         with:
27 |           version: v2.6
28 | 


--------------------------------------------------------------------------------
/cmd/rdsync/abort.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/spf13/cobra"
 8 | 
 9 | 	"github.com/yandex/rdsync/internal/app"
10 | )
11 | 
12 | var abortCmd = &cobra.Command{
13 | 	Use:   "abort",
14 | 	Short: "Clear switchover command from DCS",
15 | 	Long:  "It does NOT rollback performed actions. You should manually repair cluster after it.",
16 | 	Run: func(cmd *cobra.Command, args []string) {
17 | 		app, err := app.NewApp(configFile, logLevel)
18 | 		if err != nil {
19 | 			fmt.Println(err)
20 | 			os.Exit(1)
21 | 		}
22 | 		os.Exit(app.CliAbort())
23 | 	},
24 | }
25 | 
26 | func init() {
27 | 	rootCmd.AddCommand(abortCmd)
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/images/valkey/default.conf:
--------------------------------------------------------------------------------
 1 | daemonize no
 2 | protected-mode no
 3 | offline yes
 4 | repl-disable-tcp-nodelay yes
 5 | repl-diskless-sync yes
 6 | repl-diskless-sync-delay 8
 7 | no-appendfsync-on-rewrite yes
 8 | appendonly yes
 9 | masterauth "functestpassword"
10 | requirepass "functestpassword"
11 | tcp-keepalive 60
12 | repl-backlog-size 128mb
13 | maxmemory 1gb
14 | save ""
15 | dir "/var/lib/valkey"
16 | client-output-buffer-limit normal 2mb 1mb 60
17 | client-output-buffer-limit replica 16mb 1mb 60
18 | client-output-buffer-limit pubsub 2mb 1mb 60
19 | # This is only for tests (don't do this on production)
20 | maxclients 1000
21 | enable-debug-command yes
22 | 


--------------------------------------------------------------------------------
/tests/images/jepsen/save_logs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for i in 1 2 3
 4 | do
 5 |     mkdir -p tests/logs/valkey${i}
 6 |     mkdir -p tests/logs/zookeeper${i}
 7 | 
 8 |     for logfile in /var/log/rdsync.log /var/log/valkey/server.log /var/log/valkey/senticache.log /var/log/supervisor.log
 9 |     do
10 |         logname=$(echo "${logfile}" | rev | cut -d/ -f1 | rev)
11 |         docker exec rdsync-valkey${i}-1 cat "${logfile}" > "tests/logs/valkey${i}/${logname}"
12 |     done
13 | 
14 |     docker exec rdsync-zoo${i}-1 cat /var/log/zookeeper/zookeeper.log > tests/logs/zookeeper${i}/zookeeper.log 2>&1
15 | done
16 | 
17 | tail -n 18 tests/logs/jepsen.log
18 | # Explicitly fail here
19 | exit 1
20 | 


--------------------------------------------------------------------------------
/internal/valkey/tls.go:
--------------------------------------------------------------------------------
 1 | package valkey
 2 | 
 3 | import (
 4 | 	"crypto/tls"
 5 | 	"crypto/x509"
 6 | 	"fmt"
 7 | 	"os"
 8 | 
 9 | 	"github.com/yandex/rdsync/internal/config"
10 | )
11 | 
12 | func getTLSConfig(config *config.Config, CAPath, host string) (*tls.Config, error) {
13 | 	c := &tls.Config{}
14 | 	if host == localhost {
15 | 		c.ServerName = config.Hostname
16 | 	}
17 | 	if CAPath != "" {
18 | 		cert, err := os.ReadFile(CAPath)
19 | 		if err != nil {
20 | 			return nil, err
21 | 		}
22 | 		pool := x509.NewCertPool()
23 | 		ok := pool.AppendCertsFromPEM(cert)
24 | 		if !ok {
25 | 			return nil, fmt.Errorf("unable to build cert pool from pem at %s", CAPath)
26 | 		}
27 | 		c.RootCAs = pool
28 | 	}
29 | 	return c, nil
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/images/zookeeper/zoo.cfg:
--------------------------------------------------------------------------------
 1 | tickTime=1000
 2 | initLimit=20
 3 | syncLimit=10
 4 | dataDir=/tmp/zookeeper
 5 | clientPort=2181
 6 | maxSessionTimeout=60000
 7 | snapCount=1000000
 8 | forceSync=no
 9 | cnxTimeout=3000
10 | autopurge.snapRetainCount=3
11 | autopurge.purgeInterval=0
12 | leaderServes=yes
13 | quorumListenOnAllIPs=true
14 | jute.maxbuffer=16777216
15 | secureClientPort=2281
16 | serverCnxnFactory=org.apache.zookeeper.server.NettyServerCnxnFactory
17 | portUnification=false
18 | skipACL=no
19 | ssl.trustStore.password=testpassword123
20 | ssl.trustStore.location=/etc/zk-ssl/truststore.jks
21 | ssl.keyStore.password=testpassword321
22 | ssl.keyStore.location=/etc/zk-ssl/server.jks
23 | 
24 | server.1=zoo1:2888:3888
25 | server.2=zoo2:2888:3888
26 | server.3=zoo3:2888:3888
27 | 


--------------------------------------------------------------------------------
/.github/workflows/unit-tests.yml:
--------------------------------------------------------------------------------
 1 | name: Unit-tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: [ master ]
 7 |   pull_request:
 8 |     branches: [ master ]
 9 | 
10 | env:
11 |   GO_VERSION: 1.25.5
12 | 
13 | jobs:
14 |   unittest:
15 |     name: all_unittests
16 |     runs-on: ubuntu-24.04
17 |     steps:
18 |       - name: Set up Go 1.x
19 |         uses: actions/setup-go@v6
20 |         with:
21 |           go-version: ${{ env.GO_VERSION }}
22 |         id: go
23 | 
24 |       - name: Check out code into the Go module directory
25 |         uses: actions/checkout@v6
26 | 
27 |       - name: Get dependencies
28 |         run: |
29 |           go get -v -t -d ./...
30 | 
31 |       - name: Test
32 |         run: make unittests
33 |         env:
34 |           TEST_MODIFIER: -race
35 | 


--------------------------------------------------------------------------------
/internal/dcs/zk_test.go:
--------------------------------------------------------------------------------
 1 | package dcs
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/require"
 7 | )
 8 | 
 9 | func TestBuildFullPath(t *testing.T) {
10 | 	z := &zkDCS{config: &ZookeeperConfig{Namespace: "//abc//def"}}
11 | 	require.Equal(t, "/abc/def/xyz", z.buildFullPath("/xyz/"))
12 | 	require.Equal(t, "/abc/def/xyz", z.buildFullPath("xyz"))
13 | 	require.Equal(t, "/abc/def/xyz", z.buildFullPath("////xyz////"))
14 | 	require.Equal(t, "/abc/def", z.buildFullPath(""))
15 | 	z = &zkDCS{config: &ZookeeperConfig{Namespace: "//abc//def/"}}
16 | 	require.Equal(t, "/abc/def/xyz", z.buildFullPath("/xyz/"))
17 | 	require.Equal(t, "/abc/def/xyz", z.buildFullPath("xyz"))
18 | 	require.Equal(t, "/abc/def/xyz", z.buildFullPath("////xyz////"))
19 | 	require.Equal(t, "/abc/def", z.buildFullPath(""))
20 | }
21 | 


--------------------------------------------------------------------------------
/.github/workflows/jepsen-cluster-tests.yml:
--------------------------------------------------------------------------------
 1 | name: Jepsen-cluster-tests
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '30 06 * * *'
 6 | 
 7 | env:
 8 |   GO_VERSION: 1.25.5
 9 | 
10 | jobs:
11 |   test:
12 |     name: Test
13 |     runs-on: ubuntu-24.04
14 |     steps:
15 |       - name: Set up Go 1.x
16 |         uses: actions/setup-go@v6
17 |         with:
18 |           go-version: ${{ env.GO_VERSION }}
19 | 
20 |       - name: Check out code into the Go module directory
21 |         uses: actions/checkout@v6
22 | 
23 |       - name: Get dependencies
24 |         run: go get -v -t -d ./...
25 | 
26 |       - name: Run test
27 |         run: make jepsen_cluster_test
28 |         
29 |       - uses: actions/upload-artifact@v6
30 |         if: failure()
31 |         with:
32 |           name: logs
33 |           path: tests/logs
34 | 


--------------------------------------------------------------------------------
/.github/workflows/jepsen-sentinel-tests.yml:
--------------------------------------------------------------------------------
 1 | name: Jepsen-sentinel-tests
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '30 06 * * *'
 6 | 
 7 | env:
 8 |   GO_VERSION: 1.25.5
 9 | 
10 | jobs:
11 |   test:
12 |     name: Test
13 |     runs-on: ubuntu-24.04
14 |     steps:
15 |       - name: Set up Go 1.x
16 |         uses: actions/setup-go@v6
17 |         with:
18 |           go-version: ${{ env.GO_VERSION }}
19 | 
20 |       - name: Check out code into the Go module directory
21 |         uses: actions/checkout@v6
22 | 
23 |       - name: Get dependencies
24 |         run: go get -v -t -d ./...
25 | 
26 |       - name: Run test
27 |         run: make jepsen_sentinel_test
28 |         
29 |       - uses: actions/upload-artifact@v6
30 |         if: failure()
31 |         with:
32 |           name: logs
33 |           path: tests/logs
34 | 


--------------------------------------------------------------------------------
/tests/images/valkey/rdsync_cluster.yaml:
--------------------------------------------------------------------------------
 1 | mode: Cluster
 2 | aof_mode: OnReplicas
 3 | loglevel: Debug
 4 | pprof_addr: ":8081"
 5 | info_file: /var/run/rdsync.info
 6 | maintenance_file: /var/run/rdsync.maintenance
 7 | daemon_lock_file: /var/run/rdsync.lock
 8 | valkey:
 9 |   auth_password: functestpassword
10 |   restart_command: supervisorctl restart valkey
11 |   aof_path: /var/lib/valkey/appendonlydir
12 |   destructive_replication_repair_timeout: 5m
13 |   destructive_replication_repair_command: chattr -i /var/lib/valkey/dump.rdb
14 | zookeeper:
15 |   session_timeout: 3s
16 |   namespace: /test
17 |   hosts: [ zoo1:2281, zoo2:2281, zoo3:2281 ]
18 |   auth: true
19 |   username: testuser
20 |   password: testpassword123
21 |   use_ssl: true
22 |   keyfile: /etc/zk-ssl/server.key
23 |   certfile: /etc/zk-ssl/server.crt
24 |   ca_cert: /etc/zk-ssl/ca.cert.pem
25 |   verify_certs: true
26 | 


--------------------------------------------------------------------------------
/internal/app/pprof.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"log/slog"
 5 | 	"net/http"
 6 | 	"net/http/pprof"
 7 | 	"os"
 8 | )
 9 | 
10 | func (app *App) pprofHandler() {
11 | 	if app.config.PprofAddr == "" {
12 | 		return
13 | 	}
14 | 	serverMux := http.NewServeMux()
15 | 	serverMux.HandleFunc("/pprof/", pprof.Index)
16 | 	serverMux.HandleFunc("/pprof/cmdline", pprof.Cmdline)
17 | 	serverMux.HandleFunc("/pprof/profile", pprof.Profile)
18 | 	serverMux.HandleFunc("/pprof/symbol", pprof.Symbol)
19 | 	serverMux.HandleFunc("/pprof/trace", pprof.Trace)
20 | 	serverMux.HandleFunc("/pprof/heap", pprof.Handler("heap").ServeHTTP)
21 | 	serverMux.HandleFunc("/pprof/goroutine", pprof.Handler("goroutine").ServeHTTP)
22 | 
23 | 	err := http.ListenAndServe(app.config.PprofAddr, serverMux)
24 | 	if err != nil {
25 | 		app.logger.Error("Unable to init pprof handler", slog.Any("error", err))
26 | 		os.Exit(1)
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/internal/app/aof.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"os"
 5 | )
 6 | 
 7 | func (app *App) adjustAofMode(master string) error {
 8 | 	if app.aofMode == modeUnspecified {
 9 | 		return nil
10 | 	}
11 | 	local := app.shard.Local()
12 | 	targetMode := true
13 | 	if app.aofMode == modeOff {
14 | 		targetMode = false
15 | 	} else if app.aofMode == modeOnReplicas && local.FQDN() == master && app.checkHAReplicasRunning() {
16 | 		targetMode = false
17 | 	}
18 | 	currentMode, err := local.GetAppendonly(app.ctx)
19 | 	if err != nil {
20 | 		return err
21 | 	}
22 | 	if currentMode != targetMode {
23 | 		err = local.SetAppendonly(app.ctx, targetMode)
24 | 		if err != nil {
25 | 			return err
26 | 		}
27 | 	}
28 | 	if app.config.Valkey.AofPath != "" && !targetMode {
29 | 		if _, err := os.Stat(app.config.Valkey.AofPath); err == nil {
30 | 			return os.RemoveAll(app.config.Valkey.AofPath)
31 | 		}
32 | 	}
33 | 	return nil
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/images/zookeeper/retriable_path_create.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$1" == "" ]
 4 | then
 5 |     echo "Usage $(basename "${0}") <path in zk> [set_priority flag]"
 6 |     exit 1
 7 | fi
 8 | 
 9 | retry_create() {
10 |     echo "addauth digest testuser:testpassword123" > /tmp/zk_commands
11 |     echo "create ${1}" >> /tmp/zk_commands
12 |     if [ "$2" != "" ]
13 |     then
14 |         echo "set ${1} '{\"priority\": 100}'" >> /tmp/zk_commands
15 |     fi
16 |     echo "setAcl ${1} auth:testuser:testpassword123:crwad" >> /tmp/zk_commands
17 | 
18 |     tries=0
19 |     ret=1
20 |     while [ ${tries} -le 60 ]
21 |     do
22 |         if cat /tmp/zk_commands | /opt/zookeeper/bin/zkCli.sh
23 |         then
24 |             ret=0
25 |             break
26 |         else
27 |             tries=$(( tries + 1 ))
28 |             sleep 1
29 |         fi
30 |     done
31 |     return ${ret}
32 | }
33 | 
34 | retry_create "${1}" "${2}"
35 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: CodeQL
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 |   schedule:
 9 |     - cron: '30 06 * * 6'
10 | 
11 | env:
12 |   GO_VERSION: 1.25.5
13 | 
14 | jobs:
15 |   analyze:
16 |     runs-on: ubuntu-latest
17 |     permissions:
18 |       security-events: write
19 |     steps:
20 |       - name: Check out code into the Go module directory
21 |         uses: actions/checkout@v6
22 |       - name: Set up Go 1.x
23 |         uses: actions/setup-go@v6
24 |         with:
25 |           go-version: ${{ env.GO_VERSION }}
26 |       - name: Initialize CodeQL
27 |         uses: github/codeql-action/init@v4
28 |         with:
29 |           languages: go
30 |           build-mode: manual
31 |       - name: Build
32 |         run: make cmd/rdsync/rdsync
33 |       - name: Perform CodeQL Analysis
34 |         uses: github/codeql-action/analyze@v4
35 |         with:
36 |           category: "/language:go"
37 | 


--------------------------------------------------------------------------------
/internal/dcs/zk_tls.go:
--------------------------------------------------------------------------------
 1 | package dcs
 2 | 
 3 | import (
 4 | 	"crypto/tls"
 5 | 	"crypto/x509"
 6 | 	"net"
 7 | 	"os"
 8 | 	"time"
 9 | 
10 | 	"github.com/go-zookeeper/zk"
11 | )
12 | 
13 | func CreateTLSConfig(rootCAFile, certFile, keyFile string) (*tls.Config, error) {
14 | 	rootCABytes, err := os.ReadFile(rootCAFile)
15 | 	if err != nil {
16 | 		return nil, err
17 | 	}
18 | 
19 | 	rootCA := x509.NewCertPool()
20 | 	ok := rootCA.AppendCertsFromPEM(rootCABytes)
21 | 	if !ok {
22 | 		return nil, err
23 | 	}
24 | 
25 | 	cert, err := tls.LoadX509KeyPair(certFile, keyFile)
26 | 	if err != nil {
27 | 		return nil, err
28 | 	}
29 | 
30 | 	return &tls.Config{
31 | 		Certificates: []tls.Certificate{cert},
32 | 		RootCAs:      rootCA,
33 | 	}, nil
34 | }
35 | 
36 | func GetTLSDialer(dialer *net.Dialer, tlsConfig *tls.Config) (zk.Dialer, error) {
37 | 	return func(network, address string, _ time.Duration) (net.Conn, error) {
38 | 		return tls.DialWithDialer(dialer, network, address, tlsConfig)
39 | 	}, nil
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/images/valkey/rdsync_sentinel.yaml:
--------------------------------------------------------------------------------
 1 | mode: Sentinel
 2 | aof_mode: OnReplicas
 3 | loglevel: Debug
 4 | pprof_addr: ":8081"
 5 | info_file: /var/run/rdsync.info
 6 | maintenance_file: /var/run/rdsync.maintenance
 7 | daemon_lock_file: /var/run/rdsync.lock
 8 | valkey:
 9 |   auth_password: functestpassword
10 |   restart_command: supervisorctl restart valkey
11 |   aof_path: /var/lib/valkey/appendonlydir
12 |   destructive_replication_repair_timeout: 5m
13 |   destructive_replication_repair_command: chattr -i /var/lib/valkey/dump.rdb
14 | sentinel_mode:
15 |   announce_hostname: true
16 |   cluster_name: functest
17 |   cache_update_secret: functestsecret
18 |   cache_restart_command: supervisorctl restart senticache
19 | zookeeper:
20 |   session_timeout: 3s
21 |   namespace: /test
22 |   hosts: [ zoo1:2281, zoo2:2281, zoo3:2281 ]
23 |   auth: true
24 |   username: testuser
25 |   password: testpassword123
26 |   use_ssl: true
27 |   keyfile: /etc/zk-ssl/server.key
28 |   certfile: /etc/zk-ssl/server.crt
29 |   ca_cert: /etc/zk-ssl/ca.cert.pem
30 |   verify_certs: true
31 | 


--------------------------------------------------------------------------------
/internal/app/info_file.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"log/slog"
 6 | 	"os"
 7 | 	"time"
 8 | )
 9 | 
10 | func (app *App) stateFileHandler() {
11 | 	ticker := time.NewTicker(app.config.InfoFileHandlerInterval)
12 | 	for {
13 | 		select {
14 | 		case <-ticker.C:
15 | 			tree, err := app.dcs.GetTree("")
16 | 			if err != nil {
17 | 				app.logger.Error("StateFileHandler: failed to get current zk tree", slog.Any("error", err))
18 | 				_ = os.Remove(app.config.InfoFile)
19 | 				continue
20 | 			}
21 | 			data, err := json.Marshal(tree)
22 | 			if err != nil {
23 | 				app.logger.Error("StateFileHandler: failed to marshal zk node data", slog.Any("error", err))
24 | 				_ = os.Remove(app.config.InfoFile)
25 | 				continue
26 | 			}
27 | 			err = os.WriteFile(app.config.InfoFile, data, 0o640)
28 | 			if err != nil {
29 | 				app.logger.Error("StateFileHandler: failed to write info file", slog.Any("error", err))
30 | 				_ = os.Remove(app.config.InfoFile)
31 | 				continue
32 | 			}
33 | 
34 | 		case <-app.ctx.Done():
35 | 			return
36 | 		}
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/cmd/rdsync/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/spf13/cobra"
 8 | 
 9 | 	"github.com/yandex/rdsync/internal/app"
10 | )
11 | 
12 | var configFile string
13 | var logLevel string
14 | var verbose bool
15 | 
16 | var rootCmd = &cobra.Command{
17 | 	Use:   "rdsync",
18 | 	Short: "Rdsync is a Valkey HA cluster coordination tool",
19 | 	Long:  `Running without additional arguments will start rdsync service for current node.`,
20 | 	Run: func(cmd *cobra.Command, args []string) {
21 | 		app, err := app.NewApp(configFile, "")
22 | 		if err != nil {
23 | 			fmt.Println(err)
24 | 			os.Exit(1)
25 | 		}
26 | 		os.Exit(app.Run())
27 | 	},
28 | }
29 | 
30 | func init() {
31 | 	rootCmd.PersistentFlags().StringVarP(&configFile, "config", "c", "/etc/rdsync.yaml", "config file")
32 | 	rootCmd.PersistentFlags().StringVarP(&logLevel, "loglevel", "l", "Warn", "logging level (Debug|Info|Warn|Error)")
33 | 	rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "verbose output")
34 | }
35 | 
36 | func main() {
37 | 	if err := rootCmd.Execute(); err != nil {
38 | 		fmt.Println(err)
39 | 		os.Exit(1)
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Unit-tests-status](https://github.com/yandex/rdsync/workflows/Unit-tests/badge.svg)
 2 | ![Linters-status](https://github.com/yandex/rdsync/workflows/Linters/badge.svg)
 3 | ![Func-tests-status](https://github.com/yandex/rdsync/workflows/Func-tests/badge.svg)
 4 | 
 5 | # rdsync
 6 | 
 7 | Rdsync is a valkey high-availability tool.
 8 | It uses a patched valkey version to make a cluster or sentinel-like setup less prone to data loss.
 9 | 
10 | ## Limitations and requirements
11 | 
12 | * Patched valkey (patches for valkey 9.0 are included in this repo)
13 | * ZooKeeper as DCS
14 | * Single valkey instance per host
15 | * In clustered setup each shard must have it's own DCS prefix
16 | * Client application must use `WAITQUORUM` command to make data loss less usual (check jepsen test for example).
17 | 
18 | ## Try it out
19 | 
20 | * You will need a linux vm with gnu make, docker, docker compose and go >=1.25 installed.
21 | * Use `make start_sentinel_env` to start an environment with senticache
22 | * Or `make start_cluster_env` to start an environment with single shard of clustered setup
23 | * Run `make clean` to drop containers and network
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2023 YANDEX LLC
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/images/base/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | cat <<EOF > /etc/apt/apt.conf.d/01buildconfig
 6 | Acquire::AllowInsecureRepositories "true";
 7 | Acquire::AllowDowngradeToInsecureRepositories "true";
 8 | APT::Install-Recommends "0";
 9 | APT::Get::Assume-Yes "true";
10 | APT::Get::AllowUnauthenticated "true";
11 | APT::Install-Suggests "0";
12 | EOF
13 | 
14 | apt update
15 | 
16 | apt install less \
17 |     bind9-host \
18 |     net-tools \
19 |     iputils-ping \
20 |     sudo \
21 |     telnet \
22 |     git \
23 |     supervisor \
24 |     openssh-server \
25 |     faketime \
26 |     iptables \
27 |     openssl \
28 |     netcat-traditional
29 | 
30 | rm -rf /var/run
31 | ln -s /dev/shm /var/run
32 | 
33 | ln -sf /usr/sbin/ip6tables-legacy /usr/sbin/ip6tables
34 | ln -sf /usr/sbin/iptables-legacy /usr/sbin/iptables
35 | 
36 | mkdir -p /run/sshd
37 | cp /var/lib/dist/base/sshd_config /etc/ssh/sshd_config
38 | mkdir /root/.ssh
39 | chmod 0700 /root/.ssh
40 | yes | ssh-keygen -t rsa -N '' -f /root/.ssh/id_rsa
41 | cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
42 | chmod 0600 /root/.ssh/*
43 | 
44 | mkdir -p /etc/supervisor/conf.d
45 | cp /var/lib/dist/base/supervisor.conf /etc/supervisor/supervisord.conf
46 | cp /var/lib/dist/base/supervisor_ssh.conf /etc/supervisor/conf.d/ssh.conf
47 | 


--------------------------------------------------------------------------------
/cmd/rdsync/switch.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"time"
 7 | 
 8 | 	"github.com/spf13/cobra"
 9 | 
10 | 	"github.com/yandex/rdsync/internal/app"
11 | )
12 | 
13 | var switchTo string
14 | var switchFrom string
15 | var switchWait time.Duration
16 | var switchForce bool
17 | 
18 | var switchCmd = &cobra.Command{
19 | 	Use:   "switch",
20 | 	Short: "Move the master to (from) specified host",
21 | 	Long:  "If master is already on (not on) specified host it will be ignored",
22 | 	Run: func(cmd *cobra.Command, args []string) {
23 | 		app, err := app.NewApp(configFile, logLevel)
24 | 		if err != nil {
25 | 			fmt.Println(err)
26 | 			os.Exit(1)
27 | 		}
28 | 		os.Exit(app.CliSwitch(switchFrom, switchTo, switchWait, switchForce))
29 | 	},
30 | }
31 | 
32 | func init() {
33 | 	rootCmd.AddCommand(switchCmd)
34 | 	switchCmd.Flags().StringVar(&switchFrom, "from", "", "switch master from specific (or current master if empty) host")
35 | 	switchCmd.Flags().StringVar(&switchTo, "to", "", "switch master to specific (or most up-to-date if empty) host")
36 | 	switchCmd.Flags().BoolVar(&switchForce, "force", false, "make switchover preapproved")
37 | 	switchCmd.Flags().DurationVarP(&switchWait, "wait", "w", 5*time.Minute,
38 | 		"how long wait for switchover to complete, 0s to return immediately")
39 | }
40 | 


--------------------------------------------------------------------------------
/tests/images/valkey/setup_sentinel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | MASTER=${1}
 6 | 
 7 | if [ "${MASTER}" != "" ]
 8 | then
 9 |     valkey-cli -e -a functestpassword -p 6379 config set offline no
10 |     master_addr=$(host ${MASTER} | awk '{print $NF}')
11 |     valkey-cli -e -a functestpassword -p 6379 replicaof ${master_addr} 6379
12 |     valkey-cli -e -a functestpassword -p 6379 config rewrite
13 |     tries=0
14 |     ok=0
15 |     while [ ${tries} -le 60 ]
16 |     do
17 |         if valkey-cli -e -a functestpassword -p 6379 info replication | grep -q master_link_status:up
18 |         then
19 |             ok=1
20 |             break
21 |         else
22 |             tries=$(( tries + 1 ))
23 |             sleep 1
24 |         fi
25 |     done
26 |     if [ "${ok}" != "1" ]
27 |     then
28 |         echo "Cluster meet failed"
29 |         exit 1
30 |     fi
31 | else
32 |     valkey-cli -e -a functestpassword -p 6379 config set offline no
33 | fi
34 | 
35 | cp /var/lib/dist/valkey/supervisor_rdsync.conf /etc/supervisor/conf.d/rdsync.conf
36 | cp /var/lib/dist/valkey/rdsync_sentinel.yaml /etc/rdsync.yaml
37 | 
38 | cp /var/lib/dist/valkey/supervisor_senticache.conf /etc/supervisor/conf.d/senticache.conf
39 | cp /var/lib/dist/valkey/senticache.conf /etc/valkey/senticache.conf
40 | 
41 | /var/lib/dist/base/generate_certs.sh
42 | 
43 | supervisorctl update
44 | 


--------------------------------------------------------------------------------
/internal/app/local.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log/slog"
 6 | 	"time"
 7 | 
 8 | 	"github.com/yandex/rdsync/internal/dcs"
 9 | )
10 | 
11 | func (app *App) getLocalState() *HostState {
12 | 	node := app.shard.Local()
13 | 	if node == nil {
14 | 		return nil
15 | 	}
16 | 	return app.getHostState(node.FQDN())
17 | }
18 | 
19 | func (app *App) healthChecker() {
20 | 	ticker := time.NewTicker(app.config.HealthCheckInterval)
21 | 	path := dcs.JoinPath(pathHealthPrefix, app.config.Hostname)
22 | 	hcCheckTime := time.Time{}
23 | 	for {
24 | 		select {
25 | 		case <-ticker.C:
26 | 			hc := app.getLocalState()
27 | 			app.logger.Info(fmt.Sprintf("healthcheck: %v", hc))
28 | 			if hc != nil {
29 | 				hcCheckTime = hc.CheckAt
30 | 				err := app.dcs.SetEphemeral(path, hc)
31 | 				if err != nil {
32 | 					app.logger.Error("Failed to set healthcheck status to dcs", slog.Any("error", err))
33 | 				}
34 | 			} else if !hcCheckTime.IsZero() {
35 | 				if time.Since(hcCheckTime) < 5*app.config.HealthCheckInterval {
36 | 					app.logger.Warn("Unable to get local node state, leaving health node in dcs intact")
37 | 				} else {
38 | 					app.logger.Warn("Unable to get local node state, dropping health node from dcs")
39 | 					err := app.dcs.Delete(path)
40 | 					if err != nil {
41 | 						app.logger.Error("Failed to drop healthcheck status from dcs on dead local node", slog.Any("error", err))
42 | 					}
43 | 					hcCheckTime = time.Time{}
44 | 				}
45 | 			}
46 | 		case <-app.ctx.Done():
47 | 			return
48 | 		}
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/internal/app/checks.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log/slog"
 6 | )
 7 | 
 8 | func (app *App) checkHAReplicasRunning() bool {
 9 | 	hosts := len(app.shard.Hosts())
10 | 	if hosts == 1 {
11 | 		app.logger.Info("Check HA replicas ok: single node mode")
12 | 		return true
13 | 	}
14 | 	state, err := app.getShardStateFromDB()
15 | 	if err != nil {
16 | 		app.logger.Error("Check HA replicas failed", slog.Any("error", err))
17 | 		return false
18 | 	}
19 | 
20 | 	local := app.shard.Local()
21 | 	localState, ok := state[local.FQDN()]
22 | 	if !ok {
23 | 		app.logger.Error("Unable to find local node in state", slog.String("fqdn", local.FQDN()))
24 | 		return false
25 | 	}
26 | 
27 | 	baseOffset := getOffset(localState)
28 | 
29 | 	aheadHosts := 0
30 | 	availableReplicas := 0
31 | 	for host, hostState := range state {
32 | 		if getOffset(hostState) > baseOffset {
33 | 			app.logger.Warn("Host is ahead in replication history", slog.String("fqdn", host))
34 | 			aheadHosts++
35 | 		}
36 | 		if hostState.PingOk && !hostState.IsMaster {
37 | 			if replicates(localState, hostState.ReplicaState, host, local, false) {
38 | 				availableReplicas++
39 | 			}
40 | 		}
41 | 	}
42 | 
43 | 	if aheadHosts > 0 {
44 | 		app.logger.Error(fmt.Sprintf("Not making local node online: %d nodes are ahead in replication history", aheadHosts))
45 | 	}
46 | 
47 | 	if availableReplicas >= hosts/2 {
48 | 		app.logger.Info(fmt.Sprintf("Check HA replicas ok: %d replicas available", availableReplicas))
49 | 		return true
50 | 	}
51 | 	app.logger.Error(fmt.Sprintf("Check HA replicas failed: %d replicas available", availableReplicas))
52 | 	return false
53 | }
54 | 


--------------------------------------------------------------------------------
/internal/app/parallel.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"errors"
 5 | )
 6 | 
 7 | func getHostStatesInParallel(hosts []string, getter func(string) (*HostState, error)) (map[string]*HostState, error) {
 8 | 	type result struct {
 9 | 		err   error
10 | 		state *HostState
11 | 		name  string
12 | 	}
13 | 	results := make(chan result, len(hosts))
14 | 	for _, host := range hosts {
15 | 		go func(host string) {
16 | 			state, err := getter(host)
17 | 			results <- result{err, state, host}
18 | 		}(host)
19 | 	}
20 | 	shardState := make(map[string]*HostState)
21 | 	var err error
22 | 	for range hosts {
23 | 		result := <-results
24 | 		if result.err != nil {
25 | 			err = result.err
26 | 		} else {
27 | 			shardState[result.name] = result.state
28 | 		}
29 | 	}
30 | 	if err != nil {
31 | 		return nil, err
32 | 	}
33 | 	return shardState, nil
34 | }
35 | 
36 | func runParallel(f func(string) error, arguments []string) map[string]error {
37 | 	type pair struct {
38 | 		err error
39 | 		key string
40 | 	}
41 | 	errs := make(chan pair, len(arguments))
42 | 	for _, argValue := range arguments {
43 | 		go func(host string) {
44 | 			errs <- pair{f(host), host}
45 | 		}(argValue)
46 | 	}
47 | 	result := make(map[string]error)
48 | 	for range arguments {
49 | 		pairValue := <-errs
50 | 		result[pairValue.key] = pairValue.err
51 | 	}
52 | 	return result
53 | }
54 | 
55 | func combineErrors(allErrors map[string]error) error {
56 | 	var errStr string
57 | 	for _, err := range allErrors {
58 | 		if err != nil {
59 | 			errStr += err.Error() + ";"
60 | 		}
61 | 	}
62 | 	if errStr != "" {
63 | 		return errors.New(errStr)
64 | 	}
65 | 	return nil
66 | }
67 | 


--------------------------------------------------------------------------------
/cmd/rdsync/maintenance.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"time"
 7 | 
 8 | 	"github.com/spf13/cobra"
 9 | 
10 | 	"github.com/yandex/rdsync/internal/app"
11 | )
12 | 
13 | var maintWait time.Duration
14 | 
15 | var maintCmd = &cobra.Command{
16 | 	Use:     "maintenance",
17 | 	Aliases: []string{"maint", "mnt"},
18 | 	Short:   "Enables or disables maintenance mode",
19 | 	Long:    "When maintenance is enabled RdSync manager/candidates will not perform any actions.",
20 | 	Run: func(cmd *cobra.Command, args []string) {
21 | 		app, err := app.NewApp(configFile, logLevel)
22 | 		if err != nil {
23 | 			fmt.Println(err)
24 | 			os.Exit(1)
25 | 		}
26 | 		os.Exit(app.CliGetMaintenance())
27 | 	},
28 | }
29 | 
30 | var maintOnCmd = &cobra.Command{
31 | 	Use:     "on",
32 | 	Aliases: []string{"enable"},
33 | 	Run: func(cmd *cobra.Command, args []string) {
34 | 		app, err := app.NewApp(configFile, logLevel)
35 | 		if err != nil {
36 | 			fmt.Println(err)
37 | 			os.Exit(1)
38 | 		}
39 | 		os.Exit(app.CliEnableMaintenance(maintWait))
40 | 	},
41 | }
42 | 
43 | var maintOffCmd = &cobra.Command{
44 | 	Use:     "off",
45 | 	Aliases: []string{"disable"},
46 | 	Run: func(cmd *cobra.Command, args []string) {
47 | 		app, err := app.NewApp(configFile, logLevel)
48 | 		if err != nil {
49 | 			fmt.Println(err)
50 | 			os.Exit(1)
51 | 		}
52 | 		os.Exit(app.CliDisableMaintenance(maintWait))
53 | 	},
54 | }
55 | 
56 | func init() {
57 | 	maintCmd.AddCommand(maintOnCmd)
58 | 	maintCmd.AddCommand(maintOffCmd)
59 | 	maintCmd.PersistentFlags().DurationVarP(&maintWait, "wait", "w", 30*time.Second,
60 | 		"how long to wait for maintenance activation, 0s to return immediately")
61 | 	rootCmd.AddCommand(maintCmd)
62 | }
63 | 


--------------------------------------------------------------------------------
/valkey_patches/0002_Allow_explicit_cluster_replication_cascades.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
 2 | index ed4768bfe..cc6fb7d1d 100644
 3 | --- a/src/cluster_legacy.c
 4 | +++ b/src/cluster_legacy.c
 5 | @@ -7733,7 +7733,7 @@ int clusterCommandSpecial(client *c) {
 6 |          }
 7 |  
 8 |          /* Can't replicate a replica. */
 9 | -        if (nodeIsReplica(n)) {
10 | +        if (nodeIsReplica(n) && !server.cluster_replica_no_failover) {
11 |              addReplyError(c, "I can only replicate a master, not a replica.");
12 |              return 1;
13 |          }
14 | diff --git a/tests/cluster/tests/99-yandex-cloud-patches.tcl b/tests/cluster/tests/99-yandex-cloud-patches.tcl
15 | index 6d0c1007b..04b644128 100644
16 | --- a/tests/cluster/tests/99-yandex-cloud-patches.tcl
17 | +++ b/tests/cluster/tests/99-yandex-cloud-patches.tcl
18 | @@ -13,8 +13,8 @@ proc kill_clustered_redis {id} {
19 |      set ::pids [lsearch -all -inline -not -exact $::pids $pid]
20 |  }
21 |  
22 | -test "Create a 2 node cluster (1 master and 1 replica)" {
23 | -    create_cluster 1 1
24 | +test "Create a 3 node cluster (1 master and 2 replicas)" {
25 | +    create_cluster 1 2
26 |  }
27 |  
28 |  test "Cluster is up" {
29 | @@ -38,6 +38,16 @@ test "Replication pause on instance #1 works" {
30 |      }
31 |  }
32 |  
33 | +test "Replication cascade with paused instance works" {
34 | +    assert {[R 2 config set cluster-slave-no-failover yes] eq {OK}}
35 | +    assert {[R 2 cluster replicate [R 1 CLUSTER MYID]] eq {OK}}
36 | +    wait_for_condition 1000 50 {
37 | +        [RI 2 master_link_status] eq {up}
38 | +    } else {
39 | +        fail "Instance #2 master link status is not up"
40 | +    }
41 | +}
42 | +
43 |  test "Replication resume on instance #1 works" {
44 |      assert {[R 1 config set repl-paused no] eq {OK}}
45 |      wait_for_condition 1000 50 {
46 | -- 
47 | 2.51.1
48 | 
49 | 


--------------------------------------------------------------------------------
/internal/dcs/dcs.go:
--------------------------------------------------------------------------------
 1 | package dcs
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"strings"
 6 | 	"time"
 7 | )
 8 | 
 9 | /*
10 | DCS is the main interface representing data store
11 | DCS implementation should maintain connection to a server,
12 | track connection status changes (connected/disconnected)
13 | and perform basic operations
14 | */
15 | type DCS interface {
16 | 	IsConnected() bool
17 | 	WaitConnected(timeout time.Duration) bool
18 | 	Initialize() // Create initial data structure if not exists
19 | 	SetDisconnectCallback(callback func() error)
20 | 	AcquireLock(path string) bool
21 | 	ReleaseLock(path string)
22 | 	Create(path string, value any) error
23 | 	CreateEphemeral(path string, value any) error
24 | 	Set(path string, value any) error
25 | 	SetEphemeral(path string, value any) error
26 | 	Get(path string, dest any) error
27 | 	Delete(path string) error
28 | 	GetTree(path string) (any, error)
29 | 	GetChildren(path string) ([]string, error)
30 | 	Close()
31 | }
32 | 
33 | type ExtendedLockDCS interface {
34 | 	DCS
35 | 	ReleaseLockOrError(path string) error
36 | }
37 | 
38 | var (
39 | 	// ErrExists means that node being created already exists
40 | 	ErrExists = errors.New("key already exists")
41 | 	// ErrNotFound means that requested not does not exist
42 | 	ErrNotFound = errors.New("key was not found in DCS")
43 | 	// ErrMalformed means that we failed to unmarshall received data
44 | 	ErrMalformed = errors.New("failed to parse DCS value, possibly data format changed")
45 | )
46 | 
47 | // sep is a path separator for most common DCS
48 | // Zookeeper, etcd and consul use slash
49 | const sep = "/"
50 | 
51 | // LockOwner contains info about the process holding the lock
52 | type LockOwner struct {
53 | 	Hostname string `json:"hostname"`
54 | 	Pid      int    `json:"pid"`
55 | }
56 | 
57 | // JoinPath build node path from chunks
58 | func JoinPath(parts ...string) string {
59 | 	return strings.Join(parts, sep)
60 | }
61 | 


--------------------------------------------------------------------------------
/internal/app/candidate.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log/slog"
 6 | 
 7 | 	"github.com/yandex/rdsync/internal/dcs"
 8 | )
 9 | 
10 | func (app *App) stateCandidate() appState {
11 | 	if !app.dcs.IsConnected() {
12 | 		return stateLost
13 | 	}
14 | 	err := app.shard.UpdateHostsInfo()
15 | 	if err != nil {
16 | 		app.logger.Error("Candidate: failed to update host info from DCS", slog.Any("error", err))
17 | 		return stateCandidate
18 | 	}
19 | 	shardState, err := app.getShardStateFromDB()
20 | 	if err != nil {
21 | 		app.logger.Error("Failed to get shard state from DB", slog.Any("error", err))
22 | 	} else {
23 | 		app.logger.Info(fmt.Sprintf("Shard state: %v", shardState))
24 | 	}
25 | 	maintenance, err := app.GetMaintenance()
26 | 	if err != nil && err != dcs.ErrNotFound {
27 | 		app.logger.Error("Candidate: failed to get maintenance from DCS", slog.Any("error", err))
28 | 		return stateCandidate
29 | 	}
30 | 	if maintenance != nil && maintenance.RdSyncPaused {
31 | 		return stateMaintenance
32 | 	}
33 | 
34 | 	poisonPill, err := app.getPoisonPill()
35 | 	if err != nil && err != dcs.ErrNotFound {
36 | 		app.logger.Error("Candidate: failed to get poison pill from DCS", slog.Any("error", err))
37 | 		return stateCandidate
38 | 	}
39 | 	if poisonPill != nil {
40 | 		err = app.applyPoisonPill(poisonPill)
41 | 		if err != nil {
42 | 			app.logger.Error("Candidate: failed to apply poison pill", slog.Any("error", err))
43 | 			return stateCandidate
44 | 		}
45 | 		if poisonPill.TargetHost == app.config.Hostname {
46 | 			return stateCandidate
47 | 		}
48 | 	}
49 | 
50 | 	var master string
51 | 	err = app.dcs.Get(pathMasterNode, &master)
52 | 	if err != nil && err != dcs.ErrNotFound {
53 | 		app.logger.Error("Candidate: failed to get current master from DCS", slog.Any("error", err))
54 | 		return stateCandidate
55 | 	}
56 | 	app.repairLocalNode(master)
57 | 
58 | 	if app.dcs.AcquireLock(pathManagerLock) {
59 | 		return stateManager
60 | 	}
61 | 	return stateCandidate
62 | }
63 | 


--------------------------------------------------------------------------------
/tests/images/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '2.3'
 2 | 
 3 | services:
 4 |   zoo1:
 5 |     build:
 6 |       context: ./zookeeper
 7 |     hostname: zoo1
 8 |     ports:
 9 |       - 2181
10 |       - 2281
11 |       - 2888
12 |       - 3888
13 |     privileged: true
14 |     environment:
15 |       ZK_MYID: 1
16 |     networks:
17 |       rdsync_net:
18 |         ipv4_address: 192.168.234.10
19 | 
20 |   zoo2:
21 |     build:
22 |       context: ./zookeeper
23 |     hostname: zoo2
24 |     ports:
25 |       - 2181
26 |       - 2281
27 |       - 2888
28 |       - 3888
29 |     privileged: true
30 |     environment:
31 |       ZK_MYID: 2
32 |     networks:
33 |       rdsync_net:
34 |         ipv4_address: 192.168.234.11
35 | 
36 |   zoo3:
37 |     build:
38 |       context: ./zookeeper
39 |     hostname: zoo3
40 |     ports:
41 |       - 2181
42 |       - 2281
43 |       - 2888
44 |       - 3888
45 |     privileged: true
46 |     environment:
47 |       ZK_MYID: 3
48 |     networks:
49 |       rdsync_net:
50 |         ipv4_address: 192.168.234.12
51 | 
52 |   valkey1:
53 |     build:
54 |       context: ./valkey
55 |     hostname: valkey1
56 |     ports:
57 |       - 6379
58 |       - 26379
59 |     privileged: true
60 |     networks:
61 |       rdsync_net:
62 |         ipv4_address: 192.168.234.13
63 | 
64 |   valkey2:
65 |     build:
66 |       context: ./valkey
67 |     hostname: valkey2
68 |     ports:
69 |       - 6379
70 |       - 26379
71 |     privileged: true
72 |     networks:
73 |       rdsync_net:
74 |         ipv4_address: 192.168.234.14
75 | 
76 |   valkey3:
77 |     build:
78 |       context: ./valkey
79 |     hostname: valkey3
80 |     ports:
81 |       - 6379
82 |       - 26379
83 |     privileged: true
84 |     networks:
85 |       rdsync_net:
86 |         ipv4_address: 192.168.234.15
87 | 
88 | networks:
89 |   rdsync_net:
90 |     driver: bridge
91 |     ipam:
92 |       driver: default
93 |       config:
94 |        - subnet: 192.168.234.0/24
95 |          gateway: 192.168.234.1
96 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Notice to external contributors
 2 | 
 3 | 
 4 | ## General info
 5 | 
 6 | Hello! In order for us (YANDEX LLC) to accept patches and other contributions from you, you will have to adopt our Yandex Contributor License Agreement (the â€œ**CLA**â€). The current version of the CLA can be found here:
 7 | 1) https://yandex.ru/legal/cla/?lang=en (in English) and 
 8 | 2) https://yandex.ru/legal/cla/?lang=ru (in Russian).
 9 | 
10 | By adopting the CLA, you state the following:
11 | 
12 | * You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA,
13 | * You have read the terms and conditions of the CLA and agree with them in full,
14 | * You are legally able to provide and license your contributions as stated,
15 | * We may use your contributions for our open source projects and for any other our project too,
16 | * We rely on your assurances concerning the rights of third parties in relation to your contributions.
17 | 
18 | If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you have already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA.
19 | 
20 | ## Provide contributions 
21 | 
22 | If you have already adopted terms and conditions of the CLA, you are able to provide your contributions. When you submit your pull request, please add the following information into it:
23 | 
24 | ```
25 | I hereby agree to the terms of the CLA available at: [link].
26 | ```
27 | 
28 | Replace the bracketed text as follows:
29 | * [link] is the link to the current version of the CLA: https://yandex.ru/legal/cla/?lang=en (in English) or https://yandex.ru/legal/cla/?lang=ru (in Russian).
30 | 
31 | It is enough to provide us such notification once. 
32 | 
33 | ## Other questions
34 | 
35 | If you have any questions, please mail us at opensource-support@yandex-team.ru.
36 | 


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | run:
 3 |   go: "1.24"
 4 |   modules-download-mode: mod
 5 | linters:
 6 |   default: none
 7 |   enable:
 8 |     - bodyclose
 9 |     - copyloopvar
10 |     - dupl
11 |     - errcheck
12 |     - funlen
13 |     - gocritic
14 |     - govet
15 |     - ineffassign
16 |     - misspell
17 |     - nakedret
18 |     - revive
19 |     - sloglint
20 |     - staticcheck
21 |     - unconvert
22 |     - unparam
23 |     - unused
24 |     - whitespace
25 |   settings:
26 |     dupl:
27 |       threshold: 400
28 |     funlen:
29 |       lines: 400
30 |       statements: 200
31 |     gocritic:
32 |       enabled-tags:
33 |         - performance
34 |       disabled-tags:
35 |         - diagnostic
36 |         - experimental
37 |         - opinionated
38 |         - style
39 |     govet:
40 |       enable:
41 |         - fieldalignment
42 |     misspell:
43 |       locale: US
44 |     sloglint:
45 |       attr-only: true
46 |     revive:
47 |       rules:
48 |         - name: blank-imports
49 |         - name: context-as-argument
50 |         - name: context-keys-type
51 |         - name: dot-imports
52 |         - name: error-return
53 |         - name: error-naming
54 |         - name: exported
55 |         - name: var-naming
56 |         - name: var-declaration
57 |         - name: package-comments
58 |         - name: range
59 |         - name: receiver-naming
60 |         - name: time-naming
61 |         - name: unexported-return
62 |         - name: errorf
63 |         - name: empty-block
64 |         - name: unreachable-code
65 |         - name: redefines-builtin-id
66 |   exclusions:
67 |     generated: lax
68 |     presets:
69 |       - comments
70 |       - common-false-positives
71 |       - legacy
72 |       - std-error-handling
73 |     paths:
74 |       - third_party$
75 |       - builtin$
76 |       - examples$
77 | severity:
78 |   default: error
79 | formatters:
80 |   enable:
81 |     - gofmt
82 |     - goimports
83 |   exclusions:
84 |     generated: lax
85 |     paths:
86 |       - third_party$
87 |       - builtin$
88 |       - examples$
89 | 


--------------------------------------------------------------------------------
/tests/images/valkey/setup_cluster.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -xe
 4 | 
 5 | MASTER=${1}
 6 | 
 7 | supervisorctl stop valkey
 8 | 
 9 | cat >>/etc/valkey/valkey.conf <<EOF
10 | cluster-enabled yes
11 | cluster-config-file "/etc/valkey/cluster.conf"
12 | cluster-slave-no-failover yes
13 | cluster-allow-replica-migration no
14 | EOF
15 | 
16 | supervisorctl start valkey
17 | 
18 | if [ "${MASTER}" != "" ]
19 | then
20 |     valkey-cli -e -a functestpassword -p 6379 config set offline no
21 |     master_addr=$(host ${MASTER} | awk '{print $NF}')
22 |     valkey-cli -e -a functestpassword -p 6379 cluster meet ${master_addr} 6379
23 |     master_id=$(valkey-cli -e -a functestpassword -h ${master_addr} -p 6379 cluster myid)
24 |     tries=0
25 |     ok=0
26 |     while [ ${tries} -le 60 ]
27 |     do
28 |         if valkey-cli -e -a functestpassword -p 6379 cluster nodes | grep -q ${master_id}
29 |         then
30 |             ok=1
31 |             break
32 |         else
33 |             tries=$(( tries + 1 ))
34 |             sleep 1
35 |         fi
36 |     done
37 |     if [ "${ok}" != "1" ]
38 |     then
39 |         echo "Cluster meet failed"
40 |         exit 1
41 |     fi
42 |     valkey-cli -e -a functestpassword -p 6379 cluster replicate ${master_id}
43 |     tries=0
44 |     ok=0
45 |     while [ ${tries} -le 60 ]
46 |     do
47 |         if valkey-cli -e -a functestpassword -p 6379 cluster nodes | grep -q myself,slave
48 |         then
49 |             ok=1
50 |             break
51 |         else
52 |             tries=$(( tries + 1 ))
53 |             sleep 1
54 |         fi
55 |     done
56 |     if [ "${ok}" != "1" ]
57 |     then
58 |         echo "Cluster replication init failed"
59 |         exit 1
60 |     fi
61 | else
62 |     valkey-cli -e -a functestpassword -p 6379 config set offline no
63 |     valkey-cli -e -a functestpassword -p 6379 cluster addslotsrange 0 16383
64 | fi
65 | 
66 | cp /var/lib/dist/valkey/supervisor_rdsync.conf /etc/supervisor/conf.d/rdsync.conf
67 | cp /var/lib/dist/valkey/rdsync_cluster.yaml /etc/rdsync.yaml
68 | 
69 | /var/lib/dist/base/generate_certs.sh
70 | 
71 | supervisorctl update
72 | 


--------------------------------------------------------------------------------
/cmd/rdsync/hosts.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/spf13/cobra"
 8 | 	"github.com/spf13/pflag"
 9 | 
10 | 	"github.com/yandex/rdsync/internal/app"
11 | )
12 | 
13 | var priority int
14 | var dryRun bool
15 | var skipValkeyCheck bool
16 | 
17 | var hostListCmd = &cobra.Command{
18 | 	Use:     "host",
19 | 	Aliases: []string{"hosts"},
20 | 	Short:   "list hosts in cluster",
21 | 	Run: func(cmd *cobra.Command, args []string) {
22 | 		app, err := app.NewApp(configFile, logLevel)
23 | 		if err != nil {
24 | 			fmt.Println(err)
25 | 			os.Exit(1)
26 | 		}
27 | 		os.Exit(app.CliHostList())
28 | 	},
29 | }
30 | 
31 | var hostAddCmd = &cobra.Command{
32 | 	Use:   "add",
33 | 	Short: "add host to cluster",
34 | 	Args:  cobra.ExactArgs(1),
35 | 	Run: func(cmd *cobra.Command, args []string) {
36 | 		app, err := app.NewApp(configFile, logLevel)
37 | 		if err != nil {
38 | 			fmt.Println(err)
39 | 			os.Exit(1)
40 | 		}
41 | 
42 | 		var priorityVal *int
43 | 		cmd.Flags().Visit(func(f *pflag.Flag) {
44 | 			switch f.Name {
45 | 			case "priority":
46 | 				priorityVal = &priority
47 | 			}
48 | 		})
49 | 
50 | 		os.Exit(app.CliHostAdd(args[0], priorityVal, dryRun, skipValkeyCheck))
51 | 	},
52 | }
53 | 
54 | var hostRemoveCmd = &cobra.Command{
55 | 	Use:   "remove",
56 | 	Short: "remove host from cluster",
57 | 	Args:  cobra.ExactArgs(1),
58 | 	Run: func(cmd *cobra.Command, args []string) {
59 | 		app, err := app.NewApp(configFile, logLevel)
60 | 		if err != nil {
61 | 			fmt.Println(err)
62 | 			os.Exit(1)
63 | 		}
64 | 		os.Exit(app.CliHostRemove(args[0]))
65 | 	},
66 | }
67 | 
68 | func init() {
69 | 	hostAddCmd.Flags().IntVar(&priority, "priority", 100, "host priority")
70 | 	hostAddCmd.Flags().BoolVar(&skipValkeyCheck, "skip-valkey-check", false, "do not check valkey availability")
71 | 	hostAddCmd.Flags().BoolVar(&dryRun, "dry-run", false, "tests suggested changes."+
72 | 		" Exits codes:"+
73 | 		" 0 - when no changes detected,"+
74 | 		" 1 - when some error happened or changes prohibited,"+
75 | 		" 2 - when changes detected and some changes will be performed during usual run")
76 | 	hostListCmd.AddCommand(hostAddCmd)
77 | 	hostListCmd.AddCommand(hostRemoveCmd)
78 | 	rootCmd.AddCommand(hostListCmd)
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/images/jepsen-compose.yaml:
--------------------------------------------------------------------------------
  1 | version: '2.3'
  2 | 
  3 | services:
  4 |   zoo1:
  5 |     build:
  6 |       context: ./zookeeper
  7 |     hostname: zoo1
  8 |     ports:
  9 |       - 2181
 10 |       - 2281
 11 |       - 2888
 12 |       - 3888
 13 |     privileged: true
 14 |     environment:
 15 |       ZK_MYID: 1
 16 |     networks:
 17 |       rdsync_net:
 18 |         ipv4_address: 192.168.234.10
 19 | 
 20 |   zoo2:
 21 |     build:
 22 |       context: ./zookeeper
 23 |     hostname: zoo2
 24 |     ports:
 25 |       - 2181
 26 |       - 2281
 27 |       - 2888
 28 |       - 3888
 29 |     privileged: true
 30 |     environment:
 31 |       ZK_MYID: 2
 32 |     networks:
 33 |       rdsync_net:
 34 |         ipv4_address: 192.168.234.11
 35 | 
 36 |   zoo3:
 37 |     build:
 38 |       context: ./zookeeper
 39 |     hostname: zoo3
 40 |     ports:
 41 |       - 2181
 42 |       - 2281
 43 |       - 2888
 44 |       - 3888
 45 |     privileged: true
 46 |     environment:
 47 |       ZK_MYID: 3
 48 |     networks:
 49 |       rdsync_net:
 50 |         ipv4_address: 192.168.234.12
 51 | 
 52 |   valkey1:
 53 |     build:
 54 |       context: ./valkey
 55 |     hostname: valkey1
 56 |     ports:
 57 |       - 6379
 58 |       - 26379
 59 |       - 8081
 60 |     privileged: true
 61 |     networks:
 62 |       rdsync_net:
 63 |         ipv4_address: 192.168.234.13
 64 | 
 65 |   valkey2:
 66 |     build:
 67 |       context: ./valkey
 68 |     hostname: valkey2
 69 |     ports:
 70 |       - 6379
 71 |       - 26379
 72 |       - 8081
 73 |     privileged: true
 74 |     networks:
 75 |       rdsync_net:
 76 |         ipv4_address: 192.168.234.14
 77 | 
 78 |   valkey3:
 79 |     build:
 80 |       context: ./valkey
 81 |     hostname: valkey3
 82 |     ports:
 83 |       - 6379
 84 |       - 26379
 85 |       - 8081
 86 |     privileged: true
 87 |     networks:
 88 |       rdsync_net:
 89 |         ipv4_address: 192.168.234.15
 90 | 
 91 |   jepsen:
 92 |     build:
 93 |       context: ./jepsen
 94 |     hostname: jepsen
 95 |     privileged: true
 96 |     networks:
 97 |       rdsync_net:
 98 |         ipv4_address: 192.168.234.16
 99 | 
100 | networks:
101 |   rdsync_net:
102 |     driver: bridge
103 |     ipam:
104 |       driver: default
105 |       config:
106 |        - subnet: 192.168.234.0/24
107 |          gateway: 192.168.234.1
108 | 


--------------------------------------------------------------------------------
/internal/app/poison_pill.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"log/slog"
 7 | 	"time"
 8 | )
 9 | 
10 | func (app *App) getPoisonPill() (*PoisonPill, error) {
11 | 	var poisonPill PoisonPill
12 | 	err := app.dcs.Get(pathPoisonPill, &poisonPill)
13 | 	if err != nil {
14 | 		return nil, err
15 | 	}
16 | 	return &poisonPill, err
17 | }
18 | 
19 | func (app *App) issuePoisonPill(targetHost string) error {
20 | 	poisonPill := &PoisonPill{
21 | 		TargetHost:  targetHost,
22 | 		InitiatedBy: app.config.Hostname,
23 | 		InitiatedAt: time.Now(),
24 | 	}
25 | 
26 | 	return app.dcs.Create(pathPoisonPill, poisonPill)
27 | }
28 | 
29 | func (app *App) applyPoisonPill(poisonPill *PoisonPill) error {
30 | 	if poisonPill.TargetHost != app.config.Hostname {
31 | 		app.logger.Info(fmt.Sprintf("Poison pill issued for %s: not local host", poisonPill.TargetHost))
32 | 		return nil
33 | 	}
34 | 	local := app.shard.Local()
35 | 	isOffline, err := local.IsOffline(app.ctx)
36 | 	if err != nil {
37 | 		app.logger.Error("Unable to check offline status for poison pill apply", slog.Any("error", err))
38 | 		return local.Restart(app.ctx)
39 | 	}
40 | 	if !isOffline {
41 | 		app.logger.Info(fmt.Sprintf("Applying poison pill issued by %s: Going offline", poisonPill.InitiatedBy))
42 | 		err = local.SetOffline(app.ctx)
43 | 		if err != nil {
44 | 			return err
45 | 		}
46 | 	}
47 | 	poisonPill.Applied = true
48 | 	return app.dcs.Set(pathPoisonPill, poisonPill)
49 | }
50 | 
51 | func (app *App) clearPoisonPill() error {
52 | 	return app.dcs.Delete(pathPoisonPill)
53 | }
54 | 
55 | func (app *App) waitPoisonPill(timeout time.Duration) {
56 | 	waitCtx, cancel := context.WithTimeout(app.ctx, timeout)
57 | 	defer cancel()
58 | 	var poisonPill PoisonPill
59 | 	ticker := time.NewTicker(time.Second)
60 | Out:
61 | 	for {
62 | 		select {
63 | 		case <-ticker.C:
64 | 			err := app.dcs.Get(pathPoisonPill, &poisonPill)
65 | 			if err != nil {
66 | 				app.logger.Error("Wait for poison pill apply", slog.Any("error", err))
67 | 			}
68 | 			err = app.applyPoisonPill(&poisonPill)
69 | 			if err != nil {
70 | 				app.logger.Error("Poison pill apply", slog.Any("error", err))
71 | 			}
72 | 			if poisonPill.Applied {
73 | 				break Out
74 | 			}
75 | 		case <-waitCtx.Done():
76 | 			break Out
77 | 		}
78 | 	}
79 | 	if !poisonPill.Applied {
80 | 		app.logger.Error(fmt.Sprintf("Poison pill for %s was not applied within timeout", poisonPill.TargetHost))
81 | 	}
82 | }
83 | 


--------------------------------------------------------------------------------
/internal/app/failover.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"time"
 6 | 
 7 | 	"github.com/yandex/rdsync/internal/dcs"
 8 | )
 9 | 
10 | func countRunningHAReplicas(shardState map[string]*HostState) int {
11 | 	cnt := 0
12 | 	for _, state := range shardState {
13 | 		rs := state.ReplicaState
14 | 		if state.PingOk && !state.IsOffline && rs != nil && (rs.MasterLinkState || rs.MasterSyncInProgress) {
15 | 			cnt++
16 | 		}
17 | 	}
18 | 	return cnt
19 | }
20 | 
21 | func (app *App) getFailoverQuorum(activeNodes []string) int {
22 | 	fq := len(activeNodes) - app.getNumReplicasToWrite(activeNodes)
23 | 	if fq < 1 || app.config.Valkey.AllowDataLoss {
24 | 		fq = 1
25 | 	}
26 | 	return fq
27 | }
28 | 
29 | func (app *App) performFailover(master string) error {
30 | 	var switchover Switchover
31 | 	switchover.From = master
32 | 	switchover.InitiatedBy = app.config.Hostname
33 | 	switchover.InitiatedAt = time.Now()
34 | 	switchover.Cause = CauseAuto
35 | 	return app.dcs.Create(pathCurrentSwitch, switchover)
36 | }
37 | 
38 | func (app *App) approveFailover(shardState map[string]*HostState, activeNodes []string, master string) error {
39 | 	if app.config.Valkey.FailoverTimeout > 0 {
40 | 		failedTime := time.Since(app.nodeFailTime[master])
41 | 		if failedTime < app.config.Valkey.FailoverTimeout {
42 | 			return fmt.Errorf("failover timeout is not yet elapsed: remaining %v",
43 | 				app.config.Valkey.FailoverTimeout-failedTime)
44 | 		}
45 | 	}
46 | 	if countRunningHAReplicas(shardState) == len(shardState)-1 {
47 | 		return fmt.Errorf("all replicas are alive and running replication, seems dcs problems")
48 | 	}
49 | 
50 | 	app.logger.Info(fmt.Sprintf("Approve failover: active nodes are %v", activeNodes))
51 | 	permissibleReplicas := countAliveHAReplicasWithinNodes(activeNodes, shardState)
52 | 	failoverQuorum := app.getFailoverQuorum(activeNodes)
53 | 	if permissibleReplicas < failoverQuorum {
54 | 		return fmt.Errorf("no quorum, have %d replicas while %d is required", permissibleReplicas, failoverQuorum)
55 | 	}
56 | 
57 | 	var lastSwitchover Switchover
58 | 	err := app.dcs.Get(pathLastSwitch, &lastSwitchover)
59 | 	if err != dcs.ErrNotFound {
60 | 		if err != nil {
61 | 			return err
62 | 		}
63 | 		if lastSwitchover.Result == nil {
64 | 			return fmt.Errorf("another switchover with cause %s is in progress", lastSwitchover.Cause)
65 | 		}
66 | 		timeAfterLastSwitchover := time.Since(lastSwitchover.Result.FinishedAt)
67 | 		if timeAfterLastSwitchover < app.config.Valkey.FailoverCooldown && lastSwitchover.Cause == CauseAuto {
68 | 			return fmt.Errorf("not enough time from last failover %s (cooldown %s)",
69 | 				lastSwitchover.Result.FinishedAt, app.config.Valkey.FailoverCooldown)
70 | 		}
71 | 	}
72 | 	return nil
73 | }
74 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/yandex/rdsync
 2 | 
 3 | go 1.25.0
 4 | 
 5 | toolchain go1.25.5
 6 | 
 7 | require (
 8 | 	github.com/cenkalti/backoff/v4 v4.3.0
 9 | 	github.com/cucumber/godog v0.15.1
10 | 	github.com/docker/docker v28.5.2+incompatible
11 | 	github.com/go-zookeeper/zk v1.0.4
12 | 	github.com/gofrs/flock v0.13.0
13 | 	github.com/heetch/confita v0.11.0
14 | 	github.com/spf13/cobra v1.10.2
15 | 	github.com/spf13/pflag v1.0.10
16 | 	github.com/stretchr/testify v1.11.1
17 | 	github.com/valkey-io/valkey-go v1.0.69
18 | 	gopkg.in/yaml.v2 v2.4.0
19 | )
20 | 
21 | require (
22 | 	github.com/BurntSushi/toml v1.5.0 // indirect
23 | 	github.com/Microsoft/go-winio v0.6.2 // indirect
24 | 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
25 | 	github.com/containerd/errdefs v1.0.0 // indirect
26 | 	github.com/containerd/errdefs/pkg v0.3.0 // indirect
27 | 	github.com/containerd/log v0.1.0 // indirect
28 | 	github.com/cucumber/gherkin/go/v26 v26.2.0 // indirect
29 | 	github.com/cucumber/messages/go/v21 v21.0.1 // indirect
30 | 	github.com/davecgh/go-spew v1.1.1 // indirect
31 | 	github.com/distribution/reference v0.6.0 // indirect
32 | 	github.com/docker/go-connections v0.6.0 // indirect
33 | 	github.com/docker/go-units v0.5.0 // indirect
34 | 	github.com/felixge/httpsnoop v1.0.4 // indirect
35 | 	github.com/go-logr/logr v1.4.3 // indirect
36 | 	github.com/go-logr/stdr v1.2.2 // indirect
37 | 	github.com/goccy/go-yaml v1.19.0 // indirect
38 | 	github.com/gofrs/uuid v4.3.1+incompatible // indirect
39 | 	github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
40 | 	github.com/hashicorp/go-memdb v1.3.4 // indirect
41 | 	github.com/hashicorp/golang-lru v0.5.4 // indirect
42 | 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
43 | 	github.com/moby/docker-image-spec v1.3.1 // indirect
44 | 	github.com/moby/sys/atomicwriter v0.1.0 // indirect
45 | 	github.com/moby/term v0.5.0 // indirect
46 | 	github.com/morikuni/aec v1.0.0 // indirect
47 | 	github.com/opencontainers/go-digest v1.0.0 // indirect
48 | 	github.com/opencontainers/image-spec v1.1.1 // indirect
49 | 	github.com/pkg/errors v0.9.1 // indirect
50 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
51 | 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
52 | 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
53 | 	go.opentelemetry.io/otel v1.39.0 // indirect
54 | 	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0 // indirect
55 | 	go.opentelemetry.io/otel/metric v1.39.0 // indirect
56 | 	go.opentelemetry.io/otel/trace v1.39.0 // indirect
57 | 	golang.org/x/sys v0.39.0 // indirect
58 | 	google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect
59 | 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
60 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
61 | 	gotest.tools/v3 v3.5.1 // indirect
62 | )
63 | 


--------------------------------------------------------------------------------
/tests/testutil/matchers/matchers_test.go:
--------------------------------------------------------------------------------
 1 | package matchers
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | )
 8 | 
 9 | func TestRegexpMatcher(t *testing.T) {
10 | 	assert.NoError(t, RegexpMatcher("qwe asd er", "a.d"), "regexp matcher should find match at any position")
11 | 	assert.NoError(t, RegexpMatcher("qwe", "qwe"), "regexp matcher should also match full string")
12 | 	assert.NoError(t, RegexpMatcher("qwe asd er", "^.*qwe.*$"), "regexp matcher should also match full string with patterns")
13 | 	assert.Error(t, RegexpMatcher("qwe asd er", "boo"), "regexp matcher should match not match anything")
14 | 	assert.NoError(t, RegexpMatcher("1       1", "^1[[:space:]]+1$"), "regexp matcher should match [[:space:]]")
15 | }
16 | 
17 | func TestJsonExactlyMatcher(t *testing.T) {
18 | 	var a, e string
19 | 	a = `{"a":1, "b":0.2, "c": [1,2], "z": null, "e": {"x":"y"}}`
20 | 	e = `{"a":1, "c": [1,2], "b":0.2,    "z": null, "e": {"x":   "y"}}`
21 | 	assert.NoError(t, JSONExactlyMatcher(a, e), "exact json matcher should match jsons despite key orders and spaces")
22 | 	a = `{"a":2, "b":0.2, "c": [1,2], "d": null, "e": {"x":"y"}}`
23 | 	assert.Error(t, JSONExactlyMatcher(a, e), "exact json matcher should not match if value changes")
24 | }
25 | 
26 | func TestJSONMatcher(t *testing.T) {
27 | 	var a, e string
28 | 	a = `{"a":1, "b":0.2, "c": [1,2], "d": null, "e": {"x":"y"}}`
29 | 	e = `{"a":1, "c": [1,2], "b":0.2,    "d": null, "e": {"x":   "y"}}`
30 | 	assert.NoError(t, JSONMatcher(a, e), "json matcher should match jsons despite key orders and spaces")
31 | 	e = `{"a":1, "d": null}`
32 | 	assert.NoError(t, JSONMatcher(a, e), "json matcher should ignore extra keys")
33 | 	a = `
34 | 	{
35 | 		"a":1,
36 | 		"e": {
37 | 			"a": 1,
38 | 			"e": {
39 | 				"a": 1,
40 | 				"ok": true
41 | 			}
42 | 		}
43 | 	}
44 | 	`
45 | 	e = `{"e":{"e":{"ok":true}}}`
46 | 	assert.NoError(t, JSONMatcher(a, e), "json matcher should match deep nested jsons")
47 | 	e = `{"e":{"e":{"res":"ok"}}}`
48 | 	assert.Error(t, JSONMatcher(a, e), "json matcher should not match if field is missing")
49 | 	e = `{"e":{"e":{"ok":1}}}`
50 | 	assert.Error(t, JSONMatcher(a, e), "json matcher should not match if field type is different")
51 | 	e = `{"e":{"e":{"ok":false}}}`
52 | 	assert.Error(t, JSONMatcher(a, e), "json matcher should not match if field value is different")
53 | 	a = `
54 | 	{
55 | 		"e": [
56 | 			{"a": 1},
57 | 			{"b": 2},
58 | 			{"c": 3},
59 | 			{"d": 4},
60 | 			{"e": 5}
61 | 		]
62 | 	}
63 | 	`
64 | 	e = `
65 | 	{
66 | 		"e": [
67 | 			{"b": 2},
68 | 			{"d": 4}
69 | 		]
70 | 	}
71 | 	`
72 | 	assert.NoError(t, JSONMatcher(a, e), "json matcher should match parts of arrays, preserving order")
73 | 	e = `
74 | 	{
75 | 		"e": [
76 | 			{"d": 4},
77 | 			{"b": 2}
78 | 		]
79 | 	}
80 | 	`
81 | 	assert.Error(t, JSONMatcher(a, e), "json matcher should not match parts of arrays, if order differs")
82 | }
83 | 


--------------------------------------------------------------------------------
/internal/app/lag.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | )
  7 | 
  8 | func getOffset(state *HostState) int64 {
  9 | 	if state == nil || !state.PingOk || !state.PingStable {
 10 | 		return 0
 11 | 	}
 12 | 	if state.IsMaster {
 13 | 		return state.MasterReplicationOffset
 14 | 	} else if state.ReplicaState != nil {
 15 | 		return state.ReplicaState.ReplicationOffset
 16 | 	}
 17 | 	return 0
 18 | }
 19 | 
 20 | func isPartialSyncPossible(replica *HostState, master *HostState) bool {
 21 | 	if replica == nil || master == nil {
 22 | 		return false
 23 | 	}
 24 | 	rs := replica.ReplicaState
 25 | 	if rs == nil {
 26 | 		return false
 27 | 	}
 28 | 	psyncOffset := rs.ReplicationOffset + 1
 29 | 	if (master.ReplicationID != replica.ReplicationID) && (master.ReplicationID2 != replica.ReplicationID ||
 30 | 		psyncOffset > master.SecondReplicationOffset) {
 31 | 		return false
 32 | 	}
 33 | 	if psyncOffset < master.ReplicationBacklogStart ||
 34 | 		psyncOffset > (master.ReplicationBacklogStart+master.ReplicationBacklogSize) {
 35 | 		return false
 36 | 	}
 37 | 	return true
 38 | }
 39 | 
 40 | func (app *App) findMostRecentNode(shardState map[string]*HostState) string {
 41 | 	var recentHost string
 42 | 	var recentOffset int64
 43 | 	for host, state := range shardState {
 44 | 		offset := getOffset(state)
 45 | 		if offset > recentOffset {
 46 | 			recentHost = host
 47 | 			recentOffset = offset
 48 | 		}
 49 | 	}
 50 | 	return recentHost
 51 | }
 52 | 
 53 | func (app *App) getMostDesirableNode(shardState map[string]*HostState, switchoverFrom string) (string, error) {
 54 | 	recent := app.findMostRecentNode(shardState)
 55 | 	recentState := shardState[recent]
 56 | 
 57 | 	var recentNodes []string
 58 | 
 59 | 	for host, state := range shardState {
 60 | 		if strings.HasPrefix(host, switchoverFrom) {
 61 | 			continue
 62 | 		}
 63 | 		if host == recent {
 64 | 			recentNodes = append(recentNodes, host)
 65 | 			continue
 66 | 		}
 67 | 		if isPartialSyncPossible(state, recentState) {
 68 | 			recentNodes = append(recentNodes, host)
 69 | 		}
 70 | 	}
 71 | 
 72 | 	if len(recentNodes) < 1 {
 73 | 		return "", fmt.Errorf("no hosts with psync possible from most recent one: %s", recent)
 74 | 	}
 75 | 
 76 | 	app.logger.Info(fmt.Sprintf("Selecting most desirable within %s", recentNodes))
 77 | 
 78 | 	var priorityHost string
 79 | 	var maxPriority int
 80 | 	var maxOffset int64
 81 | 
 82 | 	for _, host := range recentNodes {
 83 | 		nc, err := app.shard.GetNodeConfiguration(host)
 84 | 		if err != nil {
 85 | 			return "", err
 86 | 		}
 87 | 		offset := getOffset(shardState[host])
 88 | 		if nc.Priority > maxPriority {
 89 | 			priorityHost = host
 90 | 			maxPriority = nc.Priority
 91 | 			maxOffset = offset
 92 | 		} else if nc.Priority == maxPriority && offset > maxOffset {
 93 | 			priorityHost = host
 94 | 			maxPriority = nc.Priority
 95 | 			maxOffset = offset
 96 | 		}
 97 | 	}
 98 | 
 99 | 	return priorityHost, nil
100 | }
101 | 


--------------------------------------------------------------------------------
/internal/app/lost.go:
--------------------------------------------------------------------------------
 1 | package app
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log/slog"
 6 | )
 7 | 
 8 | func (app *App) stateLost() appState {
 9 | 	if app.dcs.IsConnected() {
10 | 		return stateCandidate
11 | 	}
12 | 	if len(app.shard.Hosts()) == 1 {
13 | 		return stateLost
14 | 	}
15 | 
16 | 	localNodeState := app.getLocalState()
17 | 	node := app.shard.Local()
18 | 	if localNodeState.IsMaster {
19 | 		if app.checkHAReplicasRunning() {
20 | 			offline, err := node.IsOffline(app.ctx)
21 | 			if err != nil {
22 | 				app.logger.Error("Failed to get node offline state", slog.String("fqdn", node.FQDN()), slog.Any("error", err))
23 | 				return stateLost
24 | 			}
25 | 			if offline {
26 | 				app.logger.Info("Rdsync have lost connection to ZK. However HA cluster is alive. Setting local node online")
27 | 				err = node.SetOnline(app.ctx)
28 | 				if err != nil {
29 | 					app.logger.Error("Unable to set local node online", slog.Any("error", err))
30 | 				}
31 | 				return stateLost
32 | 			}
33 | 			app.logger.Info("Rdsync have lost connection to ZK. However HA cluster is alive. Do nothing")
34 | 			return stateLost
35 | 		}
36 | 	} else {
37 | 		shardState, err := app.getShardStateFromDB()
38 | 		if err != nil {
39 | 			app.logger.Error("Failed to get shard state from DB", slog.Any("error", err))
40 | 			return stateLost
41 | 		}
42 | 
43 | 		app.logger.Info(fmt.Sprintf("Shard state: %v", shardState))
44 | 		master, err := app.getMasterHost(shardState)
45 | 		if err != nil || master == "" {
46 | 			app.logger.Error("Failed to get master from shard state", slog.Any("error", err))
47 | 		} else {
48 | 			local := app.shard.Local()
49 | 			offline, err := local.IsOffline(app.ctx)
50 | 			if err != nil {
51 | 				app.logger.Error("Failed to get node offline state", slog.String("fqdn", local.FQDN()), slog.Any("error", err))
52 | 				return stateLost
53 | 			}
54 | 			if shardState[master].PingOk && shardState[master].PingStable && replicates(shardState[master], shardState[local.FQDN()].ReplicaState, local.FQDN(), app.shard.Get(master), false) && !app.isReplicaStale(shardState[local.FQDN()].ReplicaState, false) {
55 | 				if offline {
56 | 					app.logger.Info("Rdsync have lost connection to ZK. However our replication connection is alive. Setting local node online")
57 | 					err = node.SetOnline(app.ctx)
58 | 					if err != nil {
59 | 						app.logger.Error("Unable to set local node online", slog.Any("error", err))
60 | 					}
61 | 					return stateLost
62 | 				}
63 | 				app.logger.Info("Rdsync have lost connection to ZK. However our replication connection is alive. Do nothing")
64 | 				return stateLost
65 | 			}
66 | 		}
67 | 	}
68 | 
69 | 	offline, err := node.IsOffline(app.ctx)
70 | 	if err != nil {
71 | 		app.logger.Error("Failed to get node offline state", slog.String("fqdn", node.FQDN()), slog.Any("error", err))
72 | 		return stateLost
73 | 	}
74 | 	if offline {
75 | 		return stateLost
76 | 	}
77 | 	if err := node.SetOffline(app.ctx); err != nil {
78 | 		app.logger.Error("Failed to set node offline", slog.String("fqdn", node.FQDN()), slog.Any("error", err))
79 | 		return stateLost
80 | 	}
81 | 	app.logger.Info("Rdsync have lost connection to ZK. Node is now offline", slog.String("fqdn", node.FQDN()))
82 | 	return stateLost
83 | }
84 | 


--------------------------------------------------------------------------------
/internal/app/maintenance.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log/slog"
  6 | 	"os"
  7 | 
  8 | 	"github.com/yandex/rdsync/internal/dcs"
  9 | )
 10 | 
 11 | func (app *App) enterMaintenance(maintenance *Maintenance, master string) error {
 12 | 	node := app.shard.Get(master)
 13 | 	err, rewriteErr := node.SetNumQuorumReplicas(app.ctx, 0)
 14 | 	if err != nil {
 15 | 		return err
 16 | 	}
 17 | 	if rewriteErr != nil {
 18 | 		return rewriteErr
 19 | 	}
 20 | 	err = app.dcs.Delete(pathActiveNodes)
 21 | 	if err != nil {
 22 | 		return err
 23 | 	}
 24 | 	maintenance.RdSyncPaused = true
 25 | 	return app.dcs.Set(pathMaintenance, maintenance)
 26 | }
 27 | 
 28 | func (app *App) leaveMaintenance() error {
 29 | 	err := app.shard.UpdateHostsInfo()
 30 | 	if err != nil {
 31 | 		return err
 32 | 	}
 33 | 	state, err := app.getShardStateFromDB()
 34 | 	if err != nil {
 35 | 		return err
 36 | 	}
 37 | 	master, err := app.ensureCurrentMaster(state)
 38 | 	if err != nil {
 39 | 		return err
 40 | 	}
 41 | 	stateDcs, err := app.getShardStateFromDcs()
 42 | 	if err != nil {
 43 | 		return err
 44 | 	}
 45 | 	state, err = app.getShardStateFromDB()
 46 | 	if err != nil {
 47 | 		return err
 48 | 	}
 49 | 	err = app.updateActiveNodes(state, stateDcs, []string{}, master)
 50 | 	if err != nil {
 51 | 		return err
 52 | 	}
 53 | 	activeNodes, err := app.GetActiveNodes()
 54 | 	if err != nil {
 55 | 		return err
 56 | 	}
 57 | 	if len(activeNodes) == 0 {
 58 | 		return fmt.Errorf("no active nodes")
 59 | 	}
 60 | 	app.repairShard(state, activeNodes, master)
 61 | 	return app.dcs.Delete(pathMaintenance)
 62 | }
 63 | 
 64 | func (app *App) createMaintenanceFile() {
 65 | 	err := os.WriteFile(app.config.MaintenanceFile, []byte(""), 0o640)
 66 | 	if err != nil {
 67 | 		app.logger.Error("Failed to write maintenance file", slog.Any("error", err))
 68 | 	}
 69 | }
 70 | 
 71 | func (app *App) doesMaintenanceFileExist() bool {
 72 | 	_, err := os.Stat(app.config.MaintenanceFile)
 73 | 	return err == nil
 74 | }
 75 | 
 76 | func (app *App) removeMaintenanceFile() {
 77 | 	err := os.Remove(app.config.MaintenanceFile)
 78 | 	if err != nil && !os.IsNotExist(err) {
 79 | 		app.logger.Error("Failed to remove maintenance file", slog.Any("error", err))
 80 | 	}
 81 | }
 82 | 
 83 | // GetMaintenance returns current maintenance status from dcs
 84 | func (app *App) GetMaintenance() (*Maintenance, error) {
 85 | 	var maintenance Maintenance
 86 | 	err := app.dcs.Get(pathMaintenance, &maintenance)
 87 | 	if err != nil {
 88 | 		return nil, err
 89 | 	}
 90 | 	return &maintenance, err
 91 | }
 92 | 
 93 | func (app *App) stateMaintenance() appState {
 94 | 	if !app.doesMaintenanceFileExist() {
 95 | 		app.createMaintenanceFile()
 96 | 	}
 97 | 	maintenance, err := app.GetMaintenance()
 98 | 	if err != nil && err != dcs.ErrNotFound {
 99 | 		return stateMaintenance
100 | 	}
101 | 	if err == dcs.ErrNotFound || maintenance.ShouldLeave {
102 | 		if app.dcs.AcquireLock(pathManagerLock) {
103 | 			app.logger.Info("Leaving maintenance")
104 | 			err := app.leaveMaintenance()
105 | 			if err != nil {
106 | 				app.logger.Error("Failed to leave maintenance", slog.Any("error", err))
107 | 				return stateMaintenance
108 | 			}
109 | 			app.removeMaintenanceFile()
110 | 			return stateManager
111 | 		}
112 | 		app.removeMaintenanceFile()
113 | 		return stateCandidate
114 | 	}
115 | 	return stateMaintenance
116 | }
117 | 


--------------------------------------------------------------------------------
/internal/app/cache.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log/slog"
  6 | 	"time"
  7 | 
  8 | 	"github.com/yandex/rdsync/internal/valkey"
  9 | )
 10 | 
 11 | func (app *App) updateCache(refState map[string]*HostState, cache *valkey.SentiCacheNode) error {
 12 | 	var state valkey.SentiCacheState
 13 | 	masterReadOnly := false
 14 | 	for fqdn, hostState := range refState {
 15 | 		if hostState == nil || !hostState.PingOk || hostState.Error != "" {
 16 | 			continue
 17 | 		}
 18 | 
 19 | 		if hostState.SentiCacheState != nil && fqdn != app.config.Hostname {
 20 | 			var sentinel valkey.SentiCacheSentinel
 21 | 			sentinel.Name = hostState.SentiCacheState.Name
 22 | 			sentinel.RunID = hostState.SentiCacheState.RunID
 23 | 			if app.config.SentinelMode.AnnounceHostname {
 24 | 				sentinel.IP = fqdn
 25 | 			} else {
 26 | 				sentinel.IP = hostState.IP
 27 | 			}
 28 | 			sentinel.Port = app.config.SentinelMode.CachePort
 29 | 			state.Sentinels = append(state.Sentinels, sentinel)
 30 | 		}
 31 | 
 32 | 		if hostState.IsOffline {
 33 | 			continue
 34 | 		}
 35 | 
 36 | 		if hostState.IsMaster {
 37 | 			if state.Master.IP != "" && !masterReadOnly && !hostState.IsReadOnly {
 38 | 				return fmt.Errorf("2 open masters: %s and %s", hostState.IP, state.Master.IP)
 39 | 			}
 40 | 			if hostState.IsReadOnly && !masterReadOnly {
 41 | 				continue
 42 | 			}
 43 | 			masterReadOnly = hostState.IsReadOnly
 44 | 			state.Master.Name = app.config.SentinelMode.ClusterName
 45 | 			state.Master.IP = hostState.IP
 46 | 			if app.config.SentinelMode.AnnounceHostname {
 47 | 				state.Master.IP = fqdn
 48 | 			} else {
 49 | 				state.Master.IP = hostState.IP
 50 | 			}
 51 | 			state.Master.Port = app.config.Valkey.Port
 52 | 			state.Master.RunID = hostState.RunID
 53 | 			state.Master.Quorum = len(refState)/2 + 1
 54 | 			state.Master.ParallelSyncs = app.config.Valkey.MaxParallelSyncs
 55 | 			state.Master.ConfigEpoch = 0
 56 | 		} else {
 57 | 			nc, err := app.shard.GetNodeConfiguration(fqdn)
 58 | 			if err != nil {
 59 | 				return err
 60 | 			}
 61 | 			var replica valkey.SentiCacheReplica
 62 | 			if app.config.SentinelMode.AnnounceHostname {
 63 | 				replica.IP = fqdn
 64 | 			} else {
 65 | 				replica.IP = hostState.IP
 66 | 			}
 67 | 			replica.Port = app.config.Valkey.Port
 68 | 			replica.RunID = hostState.RunID
 69 | 			replica.MasterLinkDownTime = hostState.ReplicaState.MasterLinkDownTime
 70 | 			replica.SlavePriority = nc.Priority
 71 | 			replica.ReplicaAnnounced = 1
 72 | 			replica.MasterHost = hostState.ReplicaState.MasterHost
 73 | 			replica.MasterPort = app.config.Valkey.Port
 74 | 			if hostState.ReplicaState.MasterLinkState {
 75 | 				replica.SlaveMasterLinkStatus = 0
 76 | 			} else {
 77 | 				replica.SlaveMasterLinkStatus = 1
 78 | 			}
 79 | 			replica.SlaveReplOffset = hostState.ReplicaState.ReplicationOffset
 80 | 			state.Replicas = append(state.Replicas, replica)
 81 | 		}
 82 | 	}
 83 | 	if state.Master.IP == "" {
 84 | 		return fmt.Errorf("0 open masters within %d hosts", len(refState))
 85 | 	}
 86 | 	return cache.Update(app.ctx, &state)
 87 | }
 88 | 
 89 | func (app *App) cacheUpdater() {
 90 | 	ticker := time.NewTicker(app.config.TickInterval)
 91 | 	for {
 92 | 		select {
 93 | 		case <-ticker.C:
 94 | 			dcsState, err := app.getShardStateFromDcs()
 95 | 			if err == nil {
 96 | 				err = app.updateCache(dcsState, app.cache)
 97 | 			}
 98 | 			if err != nil {
 99 | 				app.logger.Error("CacheUpdater: failed to update cache", slog.Any("error", err))
100 | 			}
101 | 
102 | 		case <-app.ctx.Done():
103 | 			return
104 | 		}
105 | 	}
106 | }
107 | 


--------------------------------------------------------------------------------
/internal/dcs/config.go:
--------------------------------------------------------------------------------
 1 | package dcs
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"time"
 6 | 
 7 | 	"github.com/cenkalti/backoff/v4"
 8 | )
 9 | 
10 | // ZookeeperConfig contains Zookeeper connection info
11 | type ZookeeperConfig struct {
12 | 	CACert                string                   `config:"ca_cert" yaml:"ca_cert"`
13 | 	Namespace             string                   `config:"namespace,required"`
14 | 	Hostname              string                   `config:"hostname" yaml:"hostname"`
15 | 	CertFile              string                   `config:"certfile" yaml:"certfile"`
16 | 	KeyFile               string                   `config:"keyfile" yaml:"keyfile"`
17 | 	Password              string                   `config:"password" yaml:"password"`
18 | 	Username              string                   `config:"username" yaml:"username"`
19 | 	Hosts                 []string                 `config:"hosts,required"`
20 | 	RandomHostProvider    RandomHostProviderConfig `config:"random_host_provider" yaml:"random_host_provider"`
21 | 	BackoffInterval       time.Duration            `config:"backoff_interval" yaml:"backoff_interval"`
22 | 	BackoffMaxRetries     uint64                   `config:"backoff_max_retries" yaml:"backoff_max_retries"`
23 | 	BackoffMaxElapsedTime time.Duration            `config:"backoff_max_elapsed_time" yaml:"backoff_max_elapsed_time"`
24 | 	BackoffMaxInterval    time.Duration            `config:"backoff_max_interval" yaml:"backoff_max_interval"`
25 | 	BackoffMultiplier     float64                  `config:"backoff_multiplier" yaml:"backoff_multiplier"`
26 | 	BackoffRandFactor     float64                  `config:"backoff_rand_factor" yaml:"backoff_rand_factor"`
27 | 	SessionTimeout        time.Duration            `config:"session_timeout" yaml:"session_timeout"`
28 | 	Auth                  bool                     `config:"auth" yaml:"auth"`
29 | 	UseSSL                bool                     `config:"use_ssl" yaml:"use_ssl"`
30 | 	VerifyCerts           bool                     `config:"verify_certs" yaml:"verify_certs"`
31 | }
32 | 
33 | type RandomHostProviderConfig struct {
34 | 	LookupTimeout            time.Duration `config:"lookup_timeout" yaml:"lookup_timeout"`
35 | 	LookupTTL                time.Duration `config:"lookup_ttl" yaml:"lookup_ttl"`
36 | 	LookupTickInterval       time.Duration `config:"lookup_tick_interval" yaml:"lookup_tick_interval"`
37 | 	ConnectivityCheckTimeout time.Duration `config:"connectivity_check_timeout" yaml:"connectivity_check_timeout"`
38 | 	RetryJitter              time.Duration `config:"retry_jitter" yaml:"retry_jitter"`
39 | }
40 | 
41 | func DefaultRandomHostProviderConfig() RandomHostProviderConfig {
42 | 	return RandomHostProviderConfig{
43 | 		LookupTimeout:            3 * time.Second,
44 | 		LookupTTL:                300 * time.Second,
45 | 		LookupTickInterval:       60 * time.Second,
46 | 		ConnectivityCheckTimeout: 3 * time.Second,
47 | 		RetryJitter:              30 * time.Second,
48 | 	}
49 | }
50 | 
51 | // DefaultZookeeperConfig returns default Zookeeper connection configuration
52 | func DefaultZookeeperConfig() (ZookeeperConfig, error) {
53 | 	hostname, err := os.Hostname()
54 | 	if err != nil {
55 | 		return ZookeeperConfig{}, err
56 | 	}
57 | 	config := ZookeeperConfig{
58 | 		Hostname:              hostname,
59 | 		SessionTimeout:        2 * time.Second,
60 | 		BackoffInterval:       backoff.DefaultInitialInterval,
61 | 		BackoffRandFactor:     backoff.DefaultRandomizationFactor,
62 | 		BackoffMultiplier:     backoff.DefaultMultiplier,
63 | 		BackoffMaxInterval:    backoff.DefaultMaxInterval,
64 | 		BackoffMaxElapsedTime: backoff.DefaultMaxElapsedTime,
65 | 		BackoffMaxRetries:     10,
66 | 		RandomHostProvider:    DefaultRandomHostProviderConfig(),
67 | 	}
68 | 	return config, nil
69 | }
70 | 


--------------------------------------------------------------------------------
/tests/features/00_cluster_smoke.feature:
--------------------------------------------------------------------------------
 1 | Feature: Cluster mode smoke tests
 2 | 
 3 |     Scenario: Cluster mode initially works
 4 |         Given clustered shard is up and running
 5 |         Then valkey host "valkey1" should be master
 6 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 7 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 8 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 9 |         And replication on valkey host "valkey3" should run fine within "15" seconds
10 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
11 |         """
12 |             ["valkey1","valkey2","valkey3"]
13 |         """
14 |         And path "/var/lib/valkey/appendonlydir" does not exist on "valkey1"
15 |         And path "/var/lib/valkey/appendonlydir" exists on "valkey2"
16 |         And path "/var/lib/valkey/appendonlydir" exists on "valkey3"
17 | 
18 |     Scenario: Cluster mode duplicate ip resolve does not break rdsync
19 |         Given clustered shard is up and running
20 |         When I run command on host "valkey1"
21 |         """
22 |             echo '192.168.234.14 valkey2 test1' >> /etc/hosts
23 |             echo '192.168.234.14 valkey2 test2' >> /etc/hosts
24 |             echo '192.168.234.15 valkey3 test3' >> /etc/hosts
25 |             echo '192.168.234.15 valkey3 test4' >> /etc/hosts
26 |         """
27 |         Then valkey host "valkey1" should be master
28 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
29 |         And replication on valkey host "valkey2" should run fine within "15" seconds
30 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
31 |         And replication on valkey host "valkey3" should run fine within "15" seconds
32 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
33 |         """
34 |             ["valkey1","valkey2","valkey3"]
35 |         """
36 |         When I run command on host "valkey3"
37 |         """
38 |             supervisorctl stop rdsync
39 |         """
40 |         And I run command on host "valkey2"
41 |         """
42 |             supervisorctl stop rdsync
43 |         """
44 |         And I run command on host "valkey1"
45 |         """
46 |             supervisorctl stop rdsync
47 |         """
48 |         And I run command on valkey host "valkey1"
49 |         """
50 |             CONFIG SET quorum-replicas valkey2:6379
51 |         """
52 |         And I run command on host "valkey1"
53 |         """
54 |             supervisorctl start rdsync
55 |         """
56 |         And I run command on host "valkey2"
57 |         """
58 |             supervisorctl start rdsync
59 |         """
60 |         And I run command on host "valkey3"
61 |         """
62 |             supervisorctl start rdsync
63 |         """
64 |         When I set zookeeper node "/test/active_nodes" to
65 |         """
66 |             []
67 |         """
68 |         Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
69 |         """
70 |             ["valkey1","valkey2","valkey3"]
71 |         """
72 |         When I run command on valkey host "valkey1"
73 |         """
74 |             CONFIG GET quorum-replicas
75 |         """
76 |         Then valkey cmd result should match regexp
77 |         """
78 |             .*valkey2.*
79 |         """
80 |         And valkey cmd result should match regexp
81 |         """
82 |             .*valkey3.*
83 |         """
84 |         And valkey cmd result should match regexp
85 |         """
86 |             .*192.168.234.14.*
87 |         """
88 |         And valkey cmd result should match regexp
89 |         """
90 |             .*192.168.234.15.*
91 |         """
92 | 


--------------------------------------------------------------------------------
/internal/valkey/shard.go:
--------------------------------------------------------------------------------
  1 | package valkey
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log/slog"
  6 | 	"sort"
  7 | 	"sync"
  8 | 
  9 | 	"github.com/yandex/rdsync/internal/config"
 10 | 	"github.com/yandex/rdsync/internal/dcs"
 11 | )
 12 | 
 13 | // Shard contains a set of valkey nodes
 14 | type Shard struct {
 15 | 	dcs    dcs.DCS
 16 | 	config *config.Config
 17 | 	logger *slog.Logger
 18 | 	nodes  map[string]*Node
 19 | 	local  *Node
 20 | 	sync.Mutex
 21 | }
 22 | 
 23 | // NodeConfiguration is a dcs node configuration for valkey replica
 24 | type NodeConfiguration struct {
 25 | 	// Priority - is a host priority to become master. Can be changed via CLI.
 26 | 	Priority int `json:"priority"`
 27 | }
 28 | 
 29 | // NewShard is a Shard constructor
 30 | func NewShard(config *config.Config, logger *slog.Logger, dcs dcs.DCS) *Shard {
 31 | 	s := &Shard{
 32 | 		config: config,
 33 | 		logger: logger.With(slog.String("module", "shard")),
 34 | 		nodes:  make(map[string]*Node),
 35 | 		local:  nil,
 36 | 		dcs:    dcs,
 37 | 	}
 38 | 	return s
 39 | }
 40 | 
 41 | // GetShardHostsFromDcs returns current shard hosts from dcs state
 42 | func (s *Shard) GetShardHostsFromDcs() ([]string, error) {
 43 | 	fqdns, err := s.dcs.GetChildren(dcs.PathHANodesPrefix)
 44 | 	if err == dcs.ErrNotFound {
 45 | 		return make([]string, 0), nil
 46 | 	}
 47 | 	if err != nil {
 48 | 		return nil, err
 49 | 	}
 50 | 
 51 | 	return fqdns, nil
 52 | }
 53 | 
 54 | // UpdateHostsInfo reads host names from DCS and updates shard state
 55 | func (s *Shard) UpdateHostsInfo() error {
 56 | 	s.Lock()
 57 | 	defer s.Unlock()
 58 | 
 59 | 	hosts, err := s.GetShardHostsFromDcs()
 60 | 	if err != nil {
 61 | 		return err
 62 | 	}
 63 | 	s.logger.Info(fmt.Sprintf("Nodes from DCS: %s", hosts))
 64 | 	set := make(map[string]int, len(hosts))
 65 | 	for _, host := range hosts {
 66 | 		set[host]++
 67 | 	}
 68 | 
 69 | 	for host := range set {
 70 | 		if _, found := s.nodes[host]; !found {
 71 | 			var node *Node
 72 | 			if node, err = NewNode(s.config, s.logger, host); err != nil {
 73 | 				return err
 74 | 			}
 75 | 			s.nodes[host] = node
 76 | 			if s.local == nil && node.IsLocal() {
 77 | 				s.local = node
 78 | 			}
 79 | 		}
 80 | 	}
 81 | 	// we delete hosts which are no longer in dcs
 82 | 	for hostname := range s.nodes {
 83 | 		if _, found := set[hostname]; !found {
 84 | 			if s.local == nil || hostname != s.local.FQDN() {
 85 | 				s.nodes[hostname].Close()
 86 | 			}
 87 | 			delete(s.nodes, hostname)
 88 | 		}
 89 | 	}
 90 | 
 91 | 	return nil
 92 | }
 93 | 
 94 | // Get returns Valkey Node by host name
 95 | func (s *Shard) Get(host string) *Node {
 96 | 	s.Lock()
 97 | 	defer s.Unlock()
 98 | 
 99 | 	return s.nodes[host]
100 | }
101 | 
102 | // Local returns Valkey Node running on the same not as current rdsync process
103 | func (s *Shard) Local() *Node {
104 | 	return s.local
105 | }
106 | 
107 | // Close closes all established connections to nodes
108 | func (s *Shard) Close() {
109 | 	s.Lock()
110 | 	defer s.Unlock()
111 | 
112 | 	for _, node := range s.nodes {
113 | 		node.Close()
114 | 	}
115 | }
116 | 
117 | // Hosts returns all nodes from local state
118 | func (s *Shard) Hosts() []string {
119 | 	s.Lock()
120 | 	defer s.Unlock()
121 | 
122 | 	var hosts []string
123 | 	for host := range s.nodes {
124 | 		hosts = append(hosts, host)
125 | 	}
126 | 	sort.Strings(hosts)
127 | 
128 | 	return hosts
129 | }
130 | 
131 | // GetNodeConfiguration returns current node configuration from dcs
132 | func (s *Shard) GetNodeConfiguration(host string) (*NodeConfiguration, error) {
133 | 	var nc NodeConfiguration
134 | 	err := s.dcs.Get(dcs.JoinPath(dcs.PathHANodesPrefix, host), &nc)
135 | 	if err != nil {
136 | 		if err != dcs.ErrNotFound && err != dcs.ErrMalformed {
137 | 			return nil, fmt.Errorf("failed to get Priority for host %s: %s", host, err)
138 | 		}
139 | 		return DefaultNodeConfiguration(), nil
140 | 	}
141 | 
142 | 	return &nc, nil
143 | }
144 | 
145 | // DefaultNodeConfiguration returns default node configuration (matches upstream sentinel settings)
146 | func DefaultNodeConfiguration() *NodeConfiguration {
147 | 	return &NodeConfiguration{Priority: 100}
148 | }
149 | 


--------------------------------------------------------------------------------
/internal/app/replication.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"slices"
  6 | 	"time"
  7 | 
  8 | 	"github.com/yandex/rdsync/internal/dcs"
  9 | 	"github.com/yandex/rdsync/internal/valkey"
 10 | )
 11 | 
 12 | func replicates(masterState *HostState, replicaState *ReplicaState, replicaFQDN string, masterNode *valkey.Node, allowSync bool) bool {
 13 | 	if replicaState == nil || (!replicaState.MasterLinkState && !allowSync) {
 14 | 		return false
 15 | 	}
 16 | 	if masterState != nil && slices.Contains(masterState.ConnectedReplicas, replicaFQDN) {
 17 | 		return true
 18 | 	}
 19 | 	return masterNode != nil && masterNode.MatchHost(replicaState.MasterHost)
 20 | }
 21 | 
 22 | func (app *App) isReplicaStale(replicaState *ReplicaState, checkOpenLag bool) bool {
 23 | 	targetLag := app.config.Valkey.StaleReplicaLagClose
 24 | 	if checkOpenLag {
 25 | 		targetLag = app.config.Valkey.StaleReplicaLagOpen
 26 | 	}
 27 | 	if replicaState == nil {
 28 | 		if app.dcsDivergeTime.IsZero() {
 29 | 			app.dcsDivergeTime = time.Now()
 30 | 		}
 31 | 		result := time.Since(app.dcsDivergeTime) > targetLag
 32 | 		if !result {
 33 | 			app.logger.Info(fmt.Sprintf("Local node is primary and we got a dcs info divergence at %v. Waiting for %v to make decision.", app.dcsDivergeTime, time.Since(app.dcsDivergeTime)-targetLag))
 34 | 		} else {
 35 | 			app.logger.Info(fmt.Sprintf("Local node is primary and we got a dcs info divergence at %v. Marking local node as stale.", app.dcsDivergeTime))
 36 | 		}
 37 | 		return result
 38 | 	}
 39 | 	if !replicaState.MasterLinkState {
 40 | 		if replicaState.MasterSyncInProgress || checkOpenLag {
 41 | 			return true
 42 | 		}
 43 | 		return replicaState.MasterLinkDownTime < 0 || time.Duration(replicaState.MasterLinkDownTime)*time.Millisecond > targetLag
 44 | 	} else {
 45 | 		return replicaState.MasterLastIOSeconds < 0 || time.Duration(replicaState.MasterLastIOSeconds)*time.Second > targetLag
 46 | 	}
 47 | }
 48 | 
 49 | func (app *App) closeStaleReplica(master string) error {
 50 | 	local := app.shard.Local()
 51 | 	if local.FQDN() == master {
 52 | 		if !app.dcsDivergeTime.IsZero() {
 53 | 			app.logger.Info("Clearing DCS divergence time state")
 54 | 			app.dcsDivergeTime = time.Time{}
 55 | 		}
 56 | 		return nil
 57 | 	}
 58 | 	if app.mode == modeCluster {
 59 | 		hasSlots, err := local.HasClusterSlots(app.ctx)
 60 | 		if err != nil {
 61 | 			return err
 62 | 		}
 63 | 		if hasSlots {
 64 | 			return nil
 65 | 		}
 66 | 	}
 67 | 	paused, err := local.IsReplPaused(app.ctx)
 68 | 	if err != nil {
 69 | 		return err
 70 | 	}
 71 | 	if paused {
 72 | 		return nil
 73 | 	}
 74 | 	localState := app.getHostState(local.FQDN())
 75 | 	if app.isReplicaStale(localState.ReplicaState, false) {
 76 | 		app.logger.Debug("Local node seems stale. Checking if we could close.")
 77 | 		var switchover Switchover
 78 | 		err := app.dcs.Get(pathCurrentSwitch, &switchover)
 79 | 		if err == nil {
 80 | 			app.logger.Debug(fmt.Sprintf("Skipping staleness close due to switchover in progress: %v.", switchover))
 81 | 			return nil
 82 | 		}
 83 | 		if err != dcs.ErrNotFound {
 84 | 			return err
 85 | 		}
 86 | 		shardState, err := app.getShardStateFromDcs()
 87 | 		if err != nil {
 88 | 			return err
 89 | 		}
 90 | 		if shardState[master].PingOk && shardState[master].PingStable && time.Since(shardState[master].CheckAt) < 3*app.config.HealthCheckInterval {
 91 | 			okReplicas := 0
 92 | 			staleReplicas := 0
 93 | 			for host, state := range shardState {
 94 | 				if host == master {
 95 | 					continue
 96 | 				}
 97 | 				if !state.IsReplPaused && app.isReplicaStale(state.ReplicaState, false) {
 98 | 					staleReplicas++
 99 | 				} else if host != local.FQDN() {
100 | 					okReplicas++
101 | 				}
102 | 			}
103 | 			if okReplicas >= staleReplicas {
104 | 				offline, err := local.IsOffline(app.ctx)
105 | 				if err != nil {
106 | 					return err
107 | 				}
108 | 				if offline {
109 | 					return nil
110 | 				}
111 | 				app.logger.Error(fmt.Sprintf("Local node is stale. Alive replicas: %d, stale replicas: %d. Making local node offline.", okReplicas, staleReplicas))
112 | 				return local.SetOffline(app.ctx)
113 | 			}
114 | 		}
115 | 	} else if !app.replFailTime.IsZero() {
116 | 		app.logger.Debug("Clearing local node replication fail time")
117 | 		app.replFailTime = time.Time{}
118 | 	}
119 | 	return nil
120 | }
121 | 


--------------------------------------------------------------------------------
/tests/features/00_sentinel_smoke.feature:
--------------------------------------------------------------------------------
 1 | Feature: Sentinel mode smoke tests
 2 | 
 3 |     Scenario: Sentinel mode initially works
 4 |         Given sentinel shard is up and running
 5 |         Then valkey host "valkey1" should be master
 6 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 7 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 8 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 9 |         And replication on valkey host "valkey3" should run fine within "15" seconds
10 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
11 |         """
12 |             ["valkey1","valkey2","valkey3"]
13 |         """
14 |         And senticache host "valkey1" should have master "valkey1" within "30" seconds
15 |         And senticache host "valkey2" should have master "valkey1" within "30" seconds
16 |         And senticache host "valkey3" should have master "valkey1" within "30" seconds
17 |         And path "/var/lib/valkey/appendonlydir" does not exist on "valkey1"
18 |         And path "/var/lib/valkey/appendonlydir" exists on "valkey2"
19 |         And path "/var/lib/valkey/appendonlydir" exists on "valkey3"
20 | 
21 |     Scenario: Sentinel mode duplicate ip resolve does not break rdsync
22 |         Given sentinel shard is up and running
23 |         When I run command on host "valkey1"
24 |         """
25 |             echo '192.168.234.14 valkey2 test1' >> /etc/hosts
26 |             echo '192.168.234.14 valkey2 test2' >> /etc/hosts
27 |             echo '192.168.234.15 valkey3 test3' >> /etc/hosts
28 |             echo '192.168.234.15 valkey3 test4' >> /etc/hosts
29 |         """
30 |         Then valkey host "valkey1" should be master
31 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
32 |         And replication on valkey host "valkey2" should run fine within "15" seconds
33 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
34 |         And replication on valkey host "valkey3" should run fine within "15" seconds
35 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
36 |         """
37 |             ["valkey1","valkey2","valkey3"]
38 |         """
39 |         And senticache host "valkey1" should have master "valkey1" within "30" seconds
40 |         And senticache host "valkey2" should have master "valkey1" within "30" seconds
41 |         And senticache host "valkey3" should have master "valkey1" within "30" seconds
42 |         When I run command on host "valkey3"
43 |         """
44 |             supervisorctl stop rdsync
45 |         """
46 |         And I run command on host "valkey2"
47 |         """
48 |             supervisorctl stop rdsync
49 |         """
50 |         And I run command on host "valkey1"
51 |         """
52 |             supervisorctl stop rdsync
53 |         """
54 |         And I run command on valkey host "valkey1"
55 |         """
56 |             CONFIG SET quorum-replicas valkey2:6379
57 |         """
58 |         And I run command on host "valkey1"
59 |         """
60 |             supervisorctl start rdsync
61 |         """
62 |         And I run command on host "valkey2"
63 |         """
64 |             supervisorctl start rdsync
65 |         """
66 |         And I run command on host "valkey3"
67 |         """
68 |             supervisorctl start rdsync
69 |         """
70 |         When I set zookeeper node "/test/active_nodes" to
71 |         """
72 |             []
73 |         """
74 |         Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
75 |         """
76 |             ["valkey1","valkey2","valkey3"]
77 |         """ 
78 |         When I run command on valkey host "valkey1"
79 |         """
80 |             CONFIG GET quorum-replicas
81 |         """
82 |         Then valkey cmd result should match regexp
83 |         """
84 |             .*valkey2.*
85 |         """
86 |         And valkey cmd result should match regexp
87 |         """
88 |             .*valkey3.*
89 |         """
90 |         And valkey cmd result should match regexp
91 |         """
92 |             .*192.168.234.14.*
93 |         """
94 |         And valkey cmd result should match regexp
95 |         """
96 |             .*192.168.234.15.*
97 |         """
98 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: format lint unittests recreate_logs test start_sentinel_env run_jepsen_sentinel_test jepsen_sentinel_test start_cluster_env run_jepsen_cluster_test jepsen_cluster_test clean
 2 | PROJECT=rdsync
 3 | ZK_VERSION=3.9.4
 4 | 
 5 | cmd/rdsync/rdsync:
 6 | 	GOOS=linux go build -tags netgo,osusergo -o ./cmd/rdsync/rdsync ./cmd/rdsync/...
 7 | 
 8 | format:
 9 | 	gofmt -s -w `find . -name '*.go'`
10 | 	goimports -w `find . -name '*.go'`
11 | 
12 | lint:
13 | 	docker run --rm -v ${CURDIR}:/app -w /app golangci/golangci-lint:v2.6-alpine golangci-lint run -v
14 | 
15 | unittests:
16 | 	go test ./cmd/... ./internal/...
17 | 	go test ./cmd/... ./tests/testutil/matchers/
18 | 
19 | valkey/src/valkey-server:
20 | 	docker run --rm -v ${CURDIR}:/app -w /app ubuntu:noble /app/valkey_patches/build.sh
21 | 
22 | test: base_image valkey/src/valkey-server cmd/rdsync/rdsync recreate_logs
23 | 	rm -rf ./tests/images/valkey/rdsync && cp cmd/rdsync/rdsync ./tests/images/valkey/rdsync
24 | 	rm -rf ./tests/images/valkey/valkey-server && cp valkey/src/valkey-server ./tests/images/valkey/valkey-server
25 | 	rm -rf ./tests/images/valkey/valkey-senticache && cp valkey/src/valkey-senticache ./tests/images/valkey/valkey-senticache
26 | 	rm -rf ./tests/images/valkey/valkey-cli && cp valkey/src/valkey-cli ./tests/images/valkey/valkey-cli
27 | 	go build ./tests/...
28 | 	(cd tests; go test -timeout 180m)
29 | 
30 | recreate_logs:
31 | 	@if [ "$(shell ls tests/logs 2>/dev/null | wc -l)" != "0" ]; then\
32 | 		rm -rf ./tests/logs;\
33 | 	fi
34 | 	mkdir -p ./tests/logs
35 | 
36 | tests/images/zookeeper/zookeeper.tar.gz:
37 | 	wget https://archive.apache.org/dist/zookeeper/zookeeper-$(ZK_VERSION)/apache-zookeeper-$(ZK_VERSION)-bin.tar.gz -nc -O tests/images/zookeeper/zookeeper.tar.gz
38 | 
39 | base_image: tests/images/zookeeper/zookeeper.tar.gz
40 | 	@if [ "$(shell docker images | grep -c rdsync-base)" != "1" ]; then\
41 | 		docker build tests/images/base -t rdsync-base:latest;\
42 | 	fi
43 | 
44 | start_sentinel_env: base_image valkey/src/valkey-server cmd/rdsync/rdsync recreate_logs
45 | 	rm -rf ./tests/images/valkey/rdsync && cp cmd/rdsync/rdsync ./tests/images/valkey/rdsync
46 | 	rm -rf ./tests/images/valkey/valkey-server && cp valkey/src/valkey-server ./tests/images/valkey/valkey-server
47 | 	rm -rf ./tests/images/valkey/valkey-senticache && cp valkey/src/valkey-senticache ./tests/images/valkey/valkey-senticache
48 | 	rm -rf ./tests/images/valkey/valkey-cli && cp valkey/src/valkey-cli ./tests/images/valkey/valkey-cli
49 | 	docker compose -p $(PROJECT) -f ./tests/images/jepsen-compose.yaml up -d --force-recreate --build
50 | 	timeout 600 docker exec rdsync-zoo1-1 setup_zk.sh
51 | 	timeout 600 docker exec rdsync-valkey1-1 setup_sentinel.sh
52 | 	timeout 600 docker exec rdsync-valkey2-1 setup_sentinel.sh valkey1
53 | 	timeout 600 docker exec rdsync-valkey3-1 setup_sentinel.sh valkey1
54 | 
55 | run_jepsen_sentinel_test: recreate_logs start_sentinel_env
56 | 	(docker exec rdsync-jepsen-1 /root/jepsen/run.sh >tests/logs/jepsen.log 2>&1 && tail -n 4 tests/logs/jepsen.log) || ./tests/images/jepsen/save_logs.sh
57 | 
58 | jepsen_sentinel_test: run_jepsen_sentinel_test clean
59 | 
60 | start_cluster_env: base_image valkey/src/valkey-server cmd/rdsync/rdsync recreate_logs
61 | 	rm -rf ./tests/images/valkey/rdsync && cp cmd/rdsync/rdsync ./tests/images/valkey/rdsync
62 | 	rm -rf ./tests/images/valkey/valkey-server && cp valkey/src/valkey-server ./tests/images/valkey/valkey-server
63 | 	rm -rf ./tests/images/valkey/valkey-senticache && cp valkey/src/valkey-senticache ./tests/images/valkey/valkey-senticache
64 | 	rm -rf ./tests/images/valkey/valkey-cli && cp valkey/src/valkey-cli ./tests/images/valkey/valkey-cli
65 | 	docker compose -p $(PROJECT) -f ./tests/images/jepsen-compose.yaml up -d --force-recreate --build
66 | 	timeout 600 docker exec rdsync-zoo1-1 setup_zk.sh
67 | 	timeout 600 docker exec rdsync-valkey1-1 setup_cluster.sh
68 | 	timeout 600 docker exec rdsync-valkey2-1 setup_cluster.sh valkey1
69 | 	timeout 600 docker exec rdsync-valkey3-1 setup_cluster.sh valkey1
70 | 
71 | run_jepsen_cluster_test: recreate_logs start_cluster_env
72 | 	(docker exec rdsync-jepsen-1 /root/jepsen/run.sh >tests/logs/jepsen.log 2>&1 && tail -n 4 tests/logs/jepsen.log) || ./tests/images/jepsen/save_logs.sh
73 | 
74 | jepsen_cluster_test: run_jepsen_cluster_test clean
75 | 
76 | clean:
77 | 	docker ps | grep rdsync | awk '{print $$1}' | xargs -r docker rm -f || true
78 | 	docker network ls | grep rdsync | awk '{print $$1}' | xargs -r docker network rm || true
79 | 	rm -rf ./tests/logs
80 | 


--------------------------------------------------------------------------------
/internal/app/active_nodes.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log/slog"
  6 | 	"slices"
  7 | 	"sort"
  8 | 	"strings"
  9 | 	"time"
 10 | 
 11 | 	"github.com/yandex/rdsync/internal/dcs"
 12 | )
 13 | 
 14 | // GetActiveNodes returns a list of active nodes from DCS
 15 | func (app *App) GetActiveNodes() ([]string, error) {
 16 | 	var activeNodes []string
 17 | 	err := app.dcs.Get(pathActiveNodes, &activeNodes)
 18 | 	if err != nil {
 19 | 		if err == dcs.ErrNotFound {
 20 | 			return nil, nil
 21 | 		}
 22 | 		return nil, fmt.Errorf("get active nodes from dcs: %s", err.Error())
 23 | 	}
 24 | 	return activeNodes, nil
 25 | }
 26 | 
 27 | func (app *App) actualizeQuorumReplicas(master string, activeNodes []string) error {
 28 | 	node := app.shard.Get(master)
 29 | 	var expected []string
 30 | 
 31 | 	for _, host := range activeNodes {
 32 | 		if host == master {
 33 | 			continue
 34 | 		}
 35 | 		activeNode := app.shard.Get(host)
 36 | 		expected = append(expected, fmt.Sprintf("%s:%d", host, app.config.Valkey.Port))
 37 | 		for _, ip := range activeNode.GetIPs() {
 38 | 			expected = append(expected, fmt.Sprintf("%s:%d", ip, app.config.Valkey.Port))
 39 | 		}
 40 | 	}
 41 | 
 42 | 	sort.Strings(expected)
 43 | 
 44 | 	expectedValue := strings.Join(expected, " ")
 45 | 	currentValue, err := node.GetQuorumReplicas(app.ctx)
 46 | 	if err != nil {
 47 | 		return err
 48 | 	}
 49 | 
 50 | 	if currentValue != expectedValue {
 51 | 		app.logger.Debug(fmt.Sprintf("Setting quorum replicas to %s on %s", expectedValue, master))
 52 | 		err, rewriteErr := node.SetQuorumReplicas(app.ctx, expectedValue)
 53 | 		if err != nil {
 54 | 			return err
 55 | 		}
 56 | 		if rewriteErr != nil {
 57 | 			app.logger.Error("Unable to rewrite config", slog.String("fqdn", master), slog.Any("error", rewriteErr))
 58 | 		}
 59 | 	}
 60 | 
 61 | 	return nil
 62 | }
 63 | 
 64 | func (app *App) updateActiveNodes(state, stateDcs map[string]*HostState, oldActiveNodes []string, master string) error {
 65 | 	activeNodes := app.calcActiveNodes(state, stateDcs, oldActiveNodes, master)
 66 | 
 67 | 	var addNodes []string
 68 | 
 69 | 	for _, node := range activeNodes {
 70 | 		if !slices.Contains(oldActiveNodes, node) {
 71 | 			addNodes = append(addNodes, node)
 72 | 		}
 73 | 	}
 74 | 
 75 | 	if len(addNodes) > 0 {
 76 | 		addNodes = append(addNodes, oldActiveNodes...)
 77 | 		err := app.dcs.Set(pathActiveNodes, addNodes)
 78 | 		if err != nil {
 79 | 			app.logger.Error("Update active nodes: failed to update active nodes in dcs", slog.Any("error", err))
 80 | 			return err
 81 | 		}
 82 | 	}
 83 | 
 84 | 	err := app.actualizeQuorumReplicas(master, activeNodes)
 85 | 	if err != nil {
 86 | 		app.logger.Error("Update active nodes: failed to actualize quorum replicas", slog.Any("error", err))
 87 | 		return err
 88 | 	}
 89 | 
 90 | 	err = app.dcs.Set(pathActiveNodes, activeNodes)
 91 | 	if err != nil {
 92 | 		app.logger.Error("Update active nodes: failed to update active nodes in dcs", slog.Any("error", err))
 93 | 		return err
 94 | 	}
 95 | 	return nil
 96 | }
 97 | 
 98 | func (app *App) calcActiveNodes(state, stateDcs map[string]*HostState, oldActiveNodes []string, master string) []string {
 99 | 	var activeNodes []string
100 | 	masterNode := app.shard.Get(master)
101 | 	var masterState HostState
102 | 	for host, node := range state {
103 | 		if host == master {
104 | 			activeNodes = append(activeNodes, master)
105 | 			if node != nil {
106 | 				masterState = *node
107 | 			}
108 | 			continue
109 | 		}
110 | 	}
111 | 	for host, node := range state {
112 | 		if host == master {
113 | 			continue
114 | 		}
115 | 		if !node.PingOk {
116 | 			if stateDcs[host].PingOk {
117 | 				if slices.Contains(oldActiveNodes, host) {
118 | 					app.logger.Warn(fmt.Sprintf("Calc active nodes: %s keeps health lock in dcs, keeping active...", host))
119 | 					activeNodes = append(activeNodes, host)
120 | 				}
121 | 				continue
122 | 			}
123 | 			if app.nodeFailTime[host].IsZero() {
124 | 				app.nodeFailTime[host] = time.Now()
125 | 			}
126 | 			failTime := time.Since(app.nodeFailTime[host])
127 | 			if failTime < app.config.InactivationDelay {
128 | 				if slices.Contains(oldActiveNodes, host) {
129 | 					app.logger.Warn(fmt.Sprintf("Calc active nodes: %s is failing, remaining %v", host, app.config.InactivationDelay-failTime))
130 | 					activeNodes = append(activeNodes, host)
131 | 				}
132 | 				continue
133 | 			}
134 | 			app.logger.Error(fmt.Sprintf("Calc active nodes: %s is down, deleting from active...", host))
135 | 			continue
136 | 		} else if !stateDcs[host].IsOffline {
137 | 			delete(app.nodeFailTime, host)
138 | 		}
139 | 		replicaState := node.ReplicaState
140 | 		if replicaState == nil {
141 | 			app.logger.Warn(fmt.Sprintf("Calc active nodes: lost master %s", host))
142 | 			continue
143 | 		}
144 | 		if (masterState.PingOk && masterState.PingStable) && !replicates(&masterState, replicaState, host, masterNode, false) {
145 | 			app.logger.Error(fmt.Sprintf("Calc active nodes: %s is not replicating from alive master, deleting from active...", host))
146 | 			continue
147 | 		}
148 | 		activeNodes = append(activeNodes, host)
149 | 	}
150 | 
151 | 	sort.Strings(activeNodes)
152 | 	return activeNodes
153 | }
154 | 


--------------------------------------------------------------------------------
/internal/app/app.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"log/slog"
  7 | 	"os"
  8 | 	"os/signal"
  9 | 	"sync/atomic"
 10 | 	"syscall"
 11 | 	"time"
 12 | 
 13 | 	"github.com/gofrs/flock"
 14 | 
 15 | 	"github.com/yandex/rdsync/internal/config"
 16 | 	"github.com/yandex/rdsync/internal/dcs"
 17 | 	"github.com/yandex/rdsync/internal/valkey"
 18 | )
 19 | 
 20 | // App is main application structure
 21 | type App struct {
 22 | 	dcs            dcs.DCS
 23 | 	critical       atomic.Value
 24 | 	ctx            context.Context
 25 | 	nodeFailTime   map[string]time.Time
 26 | 	splitTime      map[string]time.Time
 27 | 	dcsDivergeTime time.Time
 28 | 	replFailTime   time.Time
 29 | 	logger         *slog.Logger
 30 | 	config         *config.Config
 31 | 	shard          *valkey.Shard
 32 | 	cache          *valkey.SentiCacheNode
 33 | 	daemonLock     *flock.Flock
 34 | 	mode           appMode
 35 | 	aofMode        aofMode
 36 | 	state          appState
 37 | }
 38 | 
 39 | func baseContext() context.Context {
 40 | 	ctx, cancel := context.WithCancel(context.Background())
 41 | 	signals := make(chan os.Signal, 1)
 42 | 	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
 43 | 	go func() {
 44 | 		<-signals
 45 | 		cancel()
 46 | 	}()
 47 | 	return ctx
 48 | }
 49 | 
 50 | func parseLevel(level string) (slog.Level, error) {
 51 | 	switch level {
 52 | 	case "Debug":
 53 | 		return slog.LevelDebug, nil
 54 | 	case "Info":
 55 | 		return slog.LevelInfo, nil
 56 | 	case "Warn":
 57 | 		return slog.LevelWarn, nil
 58 | 	case "Error":
 59 | 		return slog.LevelError, nil
 60 | 	}
 61 | 	return slog.LevelInfo, fmt.Errorf("unknown error level: %s", level)
 62 | }
 63 | 
 64 | // NewApp is an App constructor
 65 | func NewApp(configFile, logLevel string) (*App, error) {
 66 | 	conf, err := config.ReadFromFile(configFile)
 67 | 	if err != nil {
 68 | 		return nil, err
 69 | 	}
 70 | 	effectiveLevel := logLevel
 71 | 	if effectiveLevel == "" {
 72 | 		effectiveLevel = conf.LogLevel
 73 | 	}
 74 | 	logLevelN, err := parseLevel(effectiveLevel)
 75 | 	if err != nil {
 76 | 		return nil, err
 77 | 	}
 78 | 	logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: logLevelN}))
 79 | 	mode, err := parseMode(conf.Mode)
 80 | 	if err != nil {
 81 | 		return nil, err
 82 | 	}
 83 | 	aofMode, err := parseAofMode(conf.AofMode)
 84 | 	if err != nil {
 85 | 		return nil, err
 86 | 	}
 87 | 	app := &App{
 88 | 		ctx:          baseContext(),
 89 | 		mode:         mode,
 90 | 		aofMode:      aofMode,
 91 | 		nodeFailTime: make(map[string]time.Time),
 92 | 		splitTime:    make(map[string]time.Time),
 93 | 		state:        stateInit,
 94 | 		logger:       logger,
 95 | 		config:       conf,
 96 | 	}
 97 | 	app.critical.Store(false)
 98 | 	return app, nil
 99 | }
100 | 
101 | func (app *App) connectDCS() error {
102 | 	var err error
103 | 	app.dcs, err = dcs.NewZookeeper(app.ctx, &app.config.Zookeeper, app.logger)
104 | 	if err != nil {
105 | 		return fmt.Errorf("failed to connect to zkDCS: %s", err.Error())
106 | 	}
107 | 	return nil
108 | }
109 | 
110 | func (app *App) lockDaemonFile() {
111 | 	app.daemonLock = flock.New(app.config.DaemonLockFile)
112 | 	if locked, err := app.daemonLock.TryLock(); !locked {
113 | 		msg := "another instance is running."
114 | 		if err != nil {
115 | 			msg = err.Error()
116 | 		}
117 | 		app.logger.Error(fmt.Sprintf("Unable to acquire daemon lock on %s", app.config.DaemonLockFile), slog.Any("error", msg))
118 | 		os.Exit(1)
119 | 	}
120 | }
121 | 
122 | func (app *App) unlockDaemonFile() {
123 | 	err := app.daemonLock.Unlock()
124 | 	if err != nil {
125 | 		app.logger.Error(fmt.Sprintf("Unable to unlock daemon lock %s", app.config.DaemonLockFile), slog.Any("error", err))
126 | 	}
127 | }
128 | 
129 | // Run enters the main application loop
130 | func (app *App) Run() int {
131 | 	app.lockDaemonFile()
132 | 	defer app.unlockDaemonFile()
133 | 	err := app.connectDCS()
134 | 	if err != nil {
135 | 		app.logger.Error("Unable to connect to dcs", slog.Any("error", err))
136 | 		return 1
137 | 	}
138 | 	defer app.dcs.Close()
139 | 	app.dcs.SetDisconnectCallback(func() error { return app.handleCritical() })
140 | 
141 | 	app.shard = valkey.NewShard(app.config, app.logger, app.dcs)
142 | 	defer app.shard.Close()
143 | 	if app.mode == modeSentinel {
144 | 		app.cache, err = valkey.NewSentiCacheNode(app.config, app.logger)
145 | 		if err != nil {
146 | 			app.logger.Error("Unable to init senticache node", slog.Any("error", err))
147 | 			return 1
148 | 		}
149 | 		defer app.cache.Close()
150 | 		go app.cacheUpdater()
151 | 	}
152 | 
153 | 	go app.pprofHandler()
154 | 	go app.healthChecker()
155 | 	go app.stateFileHandler()
156 | 
157 | 	ticker := time.NewTicker(app.config.TickInterval)
158 | 	for {
159 | 		select {
160 | 		case <-ticker.C:
161 | 			for {
162 | 				app.logger.Info(fmt.Sprintf("Rdsync state: %s", app.state))
163 | 				stateHandler := map[appState](func() appState){
164 | 					stateInit:        app.stateInit,
165 | 					stateManager:     app.stateManager,
166 | 					stateCandidate:   app.stateCandidate,
167 | 					stateLost:        app.stateLost,
168 | 					stateMaintenance: app.stateMaintenance,
169 | 				}[app.state]
170 | 				if stateHandler == nil {
171 | 					panic(fmt.Sprintf("Unknown state: %s", app.state))
172 | 				}
173 | 				nextState := stateHandler()
174 | 				if nextState == app.state {
175 | 					break
176 | 				}
177 | 				app.state = nextState
178 | 			}
179 | 		case <-app.ctx.Done():
180 | 			return 0
181 | 		}
182 | 	}
183 | }
184 | 


--------------------------------------------------------------------------------
/.github/workflows/func-tests.yml:
--------------------------------------------------------------------------------
  1 | name: Func-tests
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ master ]
  6 |   pull_request:
  7 |     branches: [ master ]
  8 |   schedule:
  9 |     - cron: '30 06 * * *'
 10 | 
 11 | env:
 12 |   GO_VERSION: 1.25.5
 13 |   DOCKER_API_VERSION: 1.45
 14 | 
 15 | jobs:
 16 |   prepare:
 17 |     name: Prepare
 18 |     runs-on: ubuntu-24.04
 19 |     steps:
 20 |       - name: Check out code into the Go module directory
 21 |         uses: actions/checkout@v6
 22 | 
 23 |       - name: Patched valkey cache
 24 |         id: patched-valkey-cache
 25 |         uses: actions/cache@v5
 26 |         with:
 27 |           path: |
 28 |             valkey/src/valkey-server
 29 |             valkey/src/valkey-senticache
 30 |             valkey/src/valkey-cli
 31 |           key: rdsync-valkey-${{ hashFiles('valkey_patches/*') }}
 32 | 
 33 |       - name: Build patched valkey
 34 |         if: steps.patched-valkey-cache.outputs.cache-hit != 'true'
 35 |         run: make valkey/src/valkey-server
 36 | 
 37 |       - name: Zookeeper cache
 38 |         id: zookeeper-cache
 39 |         uses: actions/cache@v5
 40 |         with:
 41 |           path: tests/images/zookeeper/zookeeper.tar.gz
 42 |           key: rdsync-zookeeper-${{ hashFiles('tests/images/zookeeper/zookeeper.tar.gz') }}-${{ hashFiles('Makefile') }}
 43 | 
 44 |       - name: Download zookeeper binary
 45 |         if: steps.zookeeper-cache.outputs.cache-hit != 'true'
 46 |         run: make tests/images/zookeeper/zookeeper.tar.gz
 47 | 
 48 |       - name: Base image cache
 49 |         id: cache-base-image
 50 |         uses: actions/cache@v5
 51 |         with:
 52 |           path: ~/rdsync-base-img.tgz
 53 |           key: rdsync-base-img-${{ hashFiles('tests/images/base/*') }}
 54 | 
 55 |       - name: Build base image
 56 |         if: steps.cache-base-image.outputs.cache-hit != 'true'
 57 |         run: make base_image
 58 | 
 59 |       - name: Export base image
 60 |         if: steps.cache-base-image.outputs.cache-hit != 'true'
 61 |         run: docker save rdsync-base | gzip -c > ~/rdsync-base-img.tgz
 62 | 
 63 |   test:
 64 |     name: Test
 65 |     runs-on: ubuntu-24.04
 66 |     needs: [ prepare ]
 67 |     strategy:
 68 |       matrix:
 69 |         command:
 70 |           - 'GODOG_FEATURE=00_cluster_smoke make test'
 71 |           - 'GODOG_FEATURE=00_sentinel_smoke make test'
 72 |           - 'GODOG_FEATURE=01_cluster_maintenance make test'
 73 |           - 'GODOG_FEATURE=01_sentinel_maintenance make test'
 74 |           - 'GODOG_FEATURE=02_cluster_switchover_from make test'
 75 |           - 'GODOG_FEATURE=02_sentinel_switchover_from make test'
 76 |           - 'GODOG_FEATURE=03_cluster_switchover_to make test'
 77 |           - 'GODOG_FEATURE=03_sentinel_switchover_to make test'
 78 |           - 'GODOG_FEATURE=04_cluster_failover make test'
 79 |           - 'GODOG_FEATURE=04_sentinel_failover make test'
 80 |           - 'GODOG_FEATURE=05_cluster_replication_fix make test'
 81 |           - 'GODOG_FEATURE=05_sentinel_replication_fix make test'
 82 |           - 'GODOG_FEATURE=06_cluster_lost make test'
 83 |           - 'GODOG_FEATURE=06_sentinel_lost make test'
 84 |           - 'GODOG_FEATURE=07_cluster_local_repair make test'
 85 |           - 'GODOG_FEATURE=07_sentinel_local_repair make test'
 86 |       fail-fast: false
 87 | 
 88 |     steps:
 89 |       - name: Set up Go 1.x
 90 |         uses: actions/setup-go@v6
 91 |         with:
 92 |           go-version: ${{ env.GO_VERSION }}
 93 | 
 94 |       - name: Check out code into the Go module directory
 95 |         uses: actions/checkout@v6
 96 | 
 97 |       - name: Get dependencies
 98 |         run: go get -v -t -d ./...
 99 | 
100 |       - name: Load patched valkey
101 |         id: patched-valkey-cache
102 |         uses: actions/cache@v5
103 |         with:
104 |           path: |
105 |             valkey/src/valkey-server
106 |             valkey/src/valkey-senticache
107 |             valkey/src/valkey-cli
108 |           key: rdsync-valkey-${{ hashFiles('valkey_patches/*') }}
109 | 
110 |       - name: Fail if no cached patched valkey
111 |         if: steps.patched-valkey-cache.outputs.cache-hit != 'true'
112 |         run: |
113 |           echo "Failed to fetch cached patched valkey. Will now exit..."
114 |           exit 1
115 | 
116 |       - name: Load zookeeper
117 |         id: zookeeper-cache
118 |         uses: actions/cache@v5
119 |         with:
120 |           path: tests/images/zookeeper/zookeeper.tar.gz
121 |           key: rdsync-zookeeper-${{ hashFiles('tests/images/zookeeper/zookeeper.tar.gz') }}-${{ hashFiles('Makefile') }}
122 | 
123 |       - name: Fail if no cached zookeeper
124 |         if: steps.zookeeper-cache.outputs.cache-hit != 'true'
125 |         run: |
126 |           echo "Failed to fetch cached zookeeper. Will now exit..."
127 |           exit 1
128 | 
129 |       - name: Load docker images
130 |         id: cache-base-image
131 |         uses: actions/cache@v5
132 |         with:
133 |           path: ~/rdsync-base-img.tgz
134 |           key: rdsync-base-img-${{ hashFiles('tests/images/base/*') }}
135 | 
136 |       - name: Fail if no cached base image
137 |         if: steps.cache-base-image.outputs.cache-hit != 'true'
138 |         run: |
139 |           echo "Failed to fetch cached base image. Will now exit..."
140 |           exit 1
141 | 
142 |       - name: Import image
143 |         run: docker load -i ~/rdsync-base-img.tgz
144 | 
145 |       - name: Run test
146 |         run: ${{ matrix.command }}
147 |         
148 |       - uses: actions/upload-artifact@v6
149 |         if: failure()
150 |         with:
151 |           name: logs
152 |           path: tests/logs
153 | 


--------------------------------------------------------------------------------
/tests/testutil/matchers/matchers.go:
--------------------------------------------------------------------------------
  1 | package matchers
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"reflect"
  7 | 	"regexp"
  8 | 	"strconv"
  9 | 	"strings"
 10 | )
 11 | 
 12 | var registry map[string]Matcher
 13 | 
 14 | // Matcher is function that matches two string values in some way
 15 | // It returns nil if arguments match or error with mismatch description otherwise
 16 | type Matcher func(actual string, expected string) error
 17 | 
 18 | // MatcherError is an Error instance describing mismatch
 19 | type MatcherError struct {
 20 | 	actual   string
 21 | 	expected string
 22 | }
 23 | 
 24 | func (me MatcherError) Error() string {
 25 | 	if strings.Contains(me.actual, "\n") || strings.Contains(me.expected, "\n") {
 26 | 		return fmt.Sprintf("actual value doesn't match expected\nExpected:\n%s\nBut was:\n%s\n", me.expected, me.actual)
 27 | 	}
 28 | 	return fmt.Sprintf("actual value '%s' doesn't match expected '%s'", me.actual, me.expected)
 29 | }
 30 | 
 31 | // JSONMatcherError is MatchError with mismatch path details
 32 | type JSONMatcherError struct {
 33 | 	MatcherError
 34 | 	path []string
 35 | }
 36 | 
 37 | func (jme JSONMatcherError) Error() string {
 38 | 	return fmt.Sprintf("actual value doesn't match expected at path '%s'\nExpected:\n%s\nBut was:\n%s\n", strings.Join(jme.path, "."), jme.expected, jme.actual)
 39 | }
 40 | 
 41 | // RegexpMatcher matches actual value against regular expression
 42 | func RegexpMatcher(actual string, expected string) error {
 43 | 	if regexp.MustCompile(expected).Find([]byte(actual)) == nil {
 44 | 		return &MatcherError{actual, expected}
 45 | 	}
 46 | 	return nil
 47 | }
 48 | 
 49 | // nolint: gocyclo
 50 | func jsonContains(a, e any, path []string) []string {
 51 | 	av := reflect.ValueOf(a)
 52 | 	ev := reflect.ValueOf(e)
 53 | 	if (a == nil && e != nil) || (a != nil && e == nil) {
 54 | 		return path
 55 | 	}
 56 | 	if a == nil && e == nil {
 57 | 		return nil
 58 | 	}
 59 | 	if av.Kind() != ev.Kind() {
 60 | 		return path
 61 | 	}
 62 | 	switch ev.Kind() {
 63 | 	case reflect.Map:
 64 | 		em, ok := e.(map[string]any)
 65 | 		if !ok {
 66 | 			panic(fmt.Sprintf("unexpected expected JSON datatype %s at path %s", reflect.TypeOf(e), path))
 67 | 		}
 68 | 		am, ok := a.(map[string]any)
 69 | 		if !ok {
 70 | 			panic(fmt.Sprintf("unexpected actual JSON datatype %s at path %s", reflect.TypeOf(a), path))
 71 | 		}
 72 | 		for k, v1 := range em {
 73 | 			v2, ok := am[k]
 74 | 			if !ok {
 75 | 				return append(path, k)
 76 | 			}
 77 | 			res := jsonContains(v2, v1, append(path, k))
 78 | 			if len(res) != 0 {
 79 | 				return res
 80 | 			}
 81 | 		}
 82 | 	case reflect.Slice:
 83 | 		es, ok := e.([]any)
 84 | 		if !ok {
 85 | 			panic(fmt.Sprintf("unexpected expected JSON datatype %s at path %s", reflect.TypeOf(e), path))
 86 | 		}
 87 | 		as, ok := a.([]any)
 88 | 		if !ok {
 89 | 			panic(fmt.Sprintf("unexpected actual JSON datatype %s at path %s", reflect.TypeOf(a), path))
 90 | 		}
 91 | 		j := 0
 92 | 		for i, v1 := range es {
 93 | 			for ; j < len(as); j++ {
 94 | 				v2 := as[j]
 95 | 				res := jsonContains(v2, v1, append(path, strconv.Itoa(j)))
 96 | 				if len(res) == 0 {
 97 | 					break
 98 | 				}
 99 | 			}
100 | 			if j == len(as) {
101 | 				return append(path, strconv.Itoa(i))
102 | 			}
103 | 		}
104 | 	case reflect.Bool:
105 | 		if a.(bool) != e.(bool) {
106 | 			return path
107 | 		}
108 | 	case reflect.Float64:
109 | 		// assume int math
110 | 		if a.(float64) != e.(float64) {
111 | 			return path
112 | 		}
113 | 	case reflect.String:
114 | 		if a.(string) != e.(string) {
115 | 			return path
116 | 		}
117 | 	default:
118 | 		panic(fmt.Sprintf("unexpected JSON datatype %s at path %s", reflect.TypeOf(e), path))
119 | 	}
120 | 	return nil
121 | }
122 | 
123 | // JSONMatcher checks that actual JSON value deeply contains expected value
124 | // In other words actual value should be superset of expected
125 | // See tests for more examples
126 | func JSONMatcher(actual string, expected string) error {
127 | 	var a, e any
128 | 	if err := json.Unmarshal([]byte(actual), &a); err != nil {
129 | 		return fmt.Errorf("actual value is not valid json: %s", err)
130 | 	}
131 | 	if err := json.Unmarshal([]byte(expected), &e); err != nil {
132 | 		panic(fmt.Errorf("expected value is not valid json: %s", err))
133 | 	}
134 | 	res := jsonContains(a, e, []string{""})
135 | 	if len(res) > 0 {
136 | 		return &JSONMatcherError{MatcherError{actual, expected}, res}
137 | 	}
138 | 	return nil
139 | }
140 | 
141 | // JSONExactlyMatcher checks that actual and expected JSON represents
142 | // exactly the same data structure
143 | func JSONExactlyMatcher(actual string, expected string) error {
144 | 	var a, e any
145 | 	if err := json.Unmarshal([]byte(actual), &a); err != nil {
146 | 		return fmt.Errorf("actual value is not valid json: %s", err)
147 | 	}
148 | 	if err := json.Unmarshal([]byte(expected), &e); err != nil {
149 | 		panic(fmt.Errorf("expected value is not valid json: %s", err))
150 | 	}
151 | 	if !reflect.DeepEqual(a, e) {
152 | 		return &MatcherError{actual, expected}
153 | 	}
154 | 	return nil
155 | }
156 | 
157 | // GetMatcher returns registered matcher by name
158 | func GetMatcher(name string) (Matcher, error) {
159 | 	if matcher, ok := registry[name]; ok {
160 | 		return matcher, nil
161 | 	}
162 | 	return nil, fmt.Errorf("no such matcher: %s", name)
163 | }
164 | 
165 | // RegisterMatcher registers new matcher with given name.
166 | // Should be typically called in init() function.
167 | func RegisterMatcher(name string, matcher Matcher) {
168 | 	if _, ok := registry[name]; ok {
169 | 		panic(fmt.Sprintf("matcher %s already exists", name))
170 | 	}
171 | 	registry[name] = matcher
172 | }
173 | 
174 | func init() {
175 | 	registry = make(map[string]Matcher)
176 | 	RegisterMatcher("regexp", RegexpMatcher)
177 | 	RegisterMatcher("json", JSONMatcher)
178 | 	RegisterMatcher("json_exactly", JSONExactlyMatcher)
179 | }
180 | 


--------------------------------------------------------------------------------
/internal/dcs/zk_host_provider.go:
--------------------------------------------------------------------------------
  1 | package dcs
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"log/slog"
  7 | 	"math/rand"
  8 | 	"net"
  9 | 	"sync"
 10 | 	"time"
 11 | )
 12 | 
 13 | type zkhost struct {
 14 | 	lastLookup time.Time
 15 | 	resolved   []string
 16 | }
 17 | 
 18 | type RandomHostProvider struct {
 19 | 	ctx                      context.Context
 20 | 	logger                   *slog.Logger
 21 | 	resolver                 *net.Resolver
 22 | 	tried                    map[string]struct{}
 23 | 	hosts                    sync.Map
 24 | 	hostsKeys                []string
 25 | 	lookupTTL                time.Duration
 26 | 	lookupTimeout            time.Duration
 27 | 	lookupTickInterval       time.Duration
 28 | 	connectivityCheckTimeout time.Duration
 29 | 	retryJitter              time.Duration
 30 | 	useAddrs                 bool
 31 | 	isRetry                  bool
 32 | }
 33 | 
 34 | func NewRandomHostProvider(ctx context.Context, config *RandomHostProviderConfig, useAddrs bool, logger *slog.Logger) *RandomHostProvider {
 35 | 	return &RandomHostProvider{
 36 | 		ctx:                      ctx,
 37 | 		lookupTTL:                config.LookupTTL,
 38 | 		lookupTimeout:            config.LookupTimeout,
 39 | 		lookupTickInterval:       config.LookupTickInterval,
 40 | 		connectivityCheckTimeout: config.ConnectivityCheckTimeout,
 41 | 		retryJitter:              config.RetryJitter,
 42 | 		logger:                   logger,
 43 | 		tried:                    make(map[string]struct{}),
 44 | 		hosts:                    sync.Map{},
 45 | 		resolver:                 &net.Resolver{},
 46 | 		useAddrs:                 useAddrs,
 47 | 	}
 48 | }
 49 | 
 50 | func (rhp *RandomHostProvider) Init(servers []string) error {
 51 | 	var allResolvedServers []string
 52 | 
 53 | 	for _, host := range servers {
 54 | 		resolved, err := rhp.resolveHost(host)
 55 | 		if err != nil {
 56 | 			rhp.logger.Error(fmt.Sprintf("host definition %s is invalid", host), slog.Any("error", err))
 57 | 			continue
 58 | 		}
 59 | 		allResolvedServers = append(allResolvedServers, resolved...)
 60 | 		rhp.hosts.Store(host, zkhost{
 61 | 			resolved:   resolved,
 62 | 			lastLookup: time.Now(),
 63 | 		})
 64 | 		rhp.hostsKeys = append(rhp.hostsKeys, host)
 65 | 	}
 66 | 
 67 | 	if len(allResolvedServers) == 0 {
 68 | 		return fmt.Errorf("unable to resolve any host from %v", servers)
 69 | 	}
 70 | 
 71 | 	if err := rhp.checkZKConnectivity(allResolvedServers); err != nil {
 72 | 		return err
 73 | 	}
 74 | 
 75 | 	go rhp.resolveHosts()
 76 | 
 77 | 	return nil
 78 | }
 79 | 
 80 | func (rhp *RandomHostProvider) checkZKConnectivity(servers []string) error {
 81 | 	if len(servers) == 0 {
 82 | 		return fmt.Errorf("no servers available for connectivity check")
 83 | 	}
 84 | 
 85 | 	for _, server := range servers {
 86 | 		conn, err := net.DialTimeout("tcp", server, rhp.connectivityCheckTimeout)
 87 | 		if err == nil {
 88 | 			conn.Close()
 89 | 			rhp.logger.Info("zk connectivity check succeeded", slog.String("server", server))
 90 | 			return nil
 91 | 		}
 92 | 		rhp.logger.Error("connectivity check failed", slog.String("server", server), slog.Any("error", err))
 93 | 	}
 94 | 
 95 | 	return fmt.Errorf("failed to connect to any zk server: all attempts timed out or refused")
 96 | }
 97 | 
 98 | func (rhp *RandomHostProvider) resolveHosts() {
 99 | 	ticker := time.NewTicker(rhp.lookupTickInterval)
100 | 	for {
101 | 		select {
102 | 		case <-ticker.C:
103 | 			for _, pair := range rhp.hostsKeys {
104 | 				host, _ := rhp.hosts.Load(pair)
105 | 				zhost := host.(zkhost)
106 | 
107 | 				if len(zhost.resolved) == 0 || time.Since(zhost.lastLookup) > rhp.lookupTTL {
108 | 					resolved, err := rhp.resolveHost(pair)
109 | 					if err != nil || len(resolved) == 0 {
110 | 						rhp.logger.Error(fmt.Sprintf("background resolve for %s failed", pair), slog.Any("error", err))
111 | 						continue
112 | 					}
113 | 					rhp.hosts.Store(pair, zkhost{
114 | 						resolved:   resolved,
115 | 						lastLookup: time.Now(),
116 | 					})
117 | 				}
118 | 			}
119 | 		case <-rhp.ctx.Done():
120 | 			return
121 | 		}
122 | 	}
123 | }
124 | 
125 | func (rhp *RandomHostProvider) resolveHost(pair string) ([]string, error) {
126 | 	var res []string
127 | 	host, port, err := net.SplitHostPort(pair)
128 | 	if err != nil {
129 | 		return res, err
130 | 	}
131 | 	ctx, cancel := context.WithTimeout(rhp.ctx, rhp.lookupTimeout)
132 | 	defer cancel()
133 | 	addrs, err := rhp.resolver.LookupHost(ctx, host)
134 | 	if err != nil {
135 | 		rhp.logger.Error(fmt.Sprintf("unable to resolve %s", host), slog.Any("error", err))
136 | 	}
137 | 	for _, addr := range addrs {
138 | 		res = append(res, net.JoinHostPort(addr, port))
139 | 	}
140 | 
141 | 	return res, nil
142 | }
143 | 
144 | func (rhp *RandomHostProvider) Len() int {
145 | 	return len(rhp.hostsKeys)
146 | }
147 | 
148 | func (rhp *RandomHostProvider) Next() (server string, retryStart bool) {
149 | 	if rhp.isRetry {
150 | 		v := time.Duration(rand.Float64() * float64(rhp.retryJitter))
151 | 		rhp.logger.Info("Triggering connection retry jitter", slog.Duration("duration", v))
152 | 		time.Sleep(v)
153 | 		rhp.isRetry = false
154 | 	}
155 | 
156 | 	needRetry := false
157 | 
158 | 	var ret string
159 | 
160 | 	for len(ret) == 0 {
161 | 		notTried := []string{}
162 | 
163 | 		for _, host := range rhp.hostsKeys {
164 | 			if _, ok := rhp.tried[host]; !ok {
165 | 				notTried = append(notTried, host)
166 | 			}
167 | 		}
168 | 
169 | 		var selected string
170 | 		if len(notTried) == 0 {
171 | 			needRetry = true
172 | 			for k := range rhp.tried {
173 | 				delete(rhp.tried, k)
174 | 			}
175 | 			selected = rhp.hostsKeys[rand.Intn(len(rhp.hostsKeys))]
176 | 		} else {
177 | 			selected = notTried[rand.Intn(len(notTried))]
178 | 		}
179 | 		rhp.tried[selected] = struct{}{}
180 | 
181 | 		host, _ := rhp.hosts.Load(selected)
182 | 		zhost := host.(zkhost)
183 | 
184 | 		if len(zhost.resolved) > 0 {
185 | 			if rhp.useAddrs {
186 | 				ret = zhost.resolved[rand.Intn(len(zhost.resolved))]
187 | 			} else {
188 | 				ret = selected
189 | 			}
190 | 		}
191 | 	}
192 | 
193 | 	if needRetry {
194 | 		rhp.isRetry = true
195 | 	}
196 | 
197 | 	return ret, needRetry
198 | }
199 | 
200 | func (rhp *RandomHostProvider) Connected() {
201 | 	for k := range rhp.tried {
202 | 		delete(rhp.tried, k)
203 | 	}
204 | }
205 | 


--------------------------------------------------------------------------------
/valkey_patches/0003_Add_offline_mode.patch:
--------------------------------------------------------------------------------
  1 | diff --git a/src/config.c b/src/config.c
  2 | index f788aef6f..aafa0616a 100644
  3 | --- a/src/config.c
  4 | +++ b/src/config.c
  5 | @@ -623,6 +623,8 @@ void loadServerConfigFromString(sds config) {
  6 |      if (server.hz < CONFIG_MIN_HZ) server.hz = CONFIG_MIN_HZ;
  7 |      if (server.hz > CONFIG_MAX_HZ) server.hz = CONFIG_MAX_HZ;
  8 |  
  9 | +    server.offline_initial = server.offline;
 10 | +
 11 |      sdsfreesplitres(lines, totlines);
 12 |      reading_config_file = 0;
 13 |      return;
 14 | @@ -3206,6 +3208,31 @@ static int applyClientMaxMemoryUsage(const char **err) {
 15 |      return 1;
 16 |  }
 17 |  
 18 | +static int setOfflineMode(standardConfig *config, sds *argv, int argc, const char **err) {
 19 | +    UNUSED(config);
 20 | +    if (argc != 1) {
 21 | +        *err = "wrong number of arguments";
 22 | +        return 0;
 23 | +    }
 24 | +    int offline = yesnotoi(argv[0]);
 25 | +    if (offline == -1) {
 26 | +        *err = "argument must be 'yes' or 'no'";
 27 | +        return 0;
 28 | +    }
 29 | +    server.offline = offline;
 30 | +    return 1;
 31 | +}
 32 | +
 33 | +static sds getOfflineMode(standardConfig *config) {
 34 | +    UNUSED(config);
 35 | +    return sdsnew(server.offline ? "yes" : "no");
 36 | +}
 37 | +
 38 | +static void rewriteConfigOfflineMode(standardConfig *config, const char *name, struct rewriteConfigState *state) {
 39 | +    UNUSED(config);
 40 | +    rewriteConfigYesNoOption(state, name, server.offline_initial, 0);
 41 | +}
 42 | +
 43 |  standardConfig static_configs[] = {
 44 |      /* Bool configs */
 45 |      createBoolConfig("rdbchecksum", NULL, IMMUTABLE_CONFIG, server.rdb_checksum, 1, NULL, NULL),
 46 | @@ -3461,6 +3488,7 @@ standardConfig static_configs[] = {
 47 |      createSpecialConfig("rdma-bind", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigRdmaBindOption, getConfigRdmaBindOption, rewriteConfigRdmaBindOption, applyRdmaBind),
 48 |      createSpecialConfig("replicaof", "slaveof", IMMUTABLE_CONFIG | MULTI_ARG_CONFIG, setConfigReplicaOfOption, getConfigReplicaOfOption, rewriteConfigReplicaOfOption, NULL),
 49 |      createSpecialConfig("latency-tracking-info-percentiles", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigLatencyTrackingInfoPercentilesOutputOption, getConfigLatencyTrackingInfoPercentilesOutputOption, rewriteConfigLatencyTrackingInfoPercentilesOutputOption, NULL),
 50 | +    createSpecialConfig("offline", NULL, MODIFIABLE_CONFIG, setOfflineMode, getOfflineMode, rewriteConfigOfflineMode, applyBind),
 51 |  
 52 |      /* NULL Terminator, this is dropped when we convert to the runtime array. */
 53 |      {NULL},
 54 | diff --git a/src/server.c b/src/server.c
 55 | index 46a20d1ae..860a3cb41 100644
 56 | --- a/src/server.c
 57 | +++ b/src/server.c
 58 | @@ -2664,9 +2664,15 @@ int listenToPort(connListener *sfd) {
 59 |      int j;
 60 |      int port = sfd->port;
 61 |      char **bindaddr = sfd->bindaddr;
 62 | +    int bindaddr_count = sfd->bindaddr_count;
 63 | +    char *offline_bindaddr[2] = {"127.0.0.1", "-::1"};
 64 |  
 65 | +    if (server.offline && (port == server.port || port == server.tls_port)) {
 66 | +        bindaddr_count = 2;
 67 | +        bindaddr = offline_bindaddr;
 68 | +    }
 69 |      /* If we have no bind address, we don't listen on a TCP socket */
 70 | -    if (sfd->bindaddr_count == 0) return C_OK;
 71 | +    if (bindaddr_count == 0) return C_OK;
 72 |  
 73 |      for (j = 0; j < sfd->bindaddr_count; j++) {
 74 |          char *addr = bindaddr[j];
 75 | @@ -6865,8 +6871,8 @@ void dismissMemoryInChild(void) {
 76 |      /* madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. */
 77 |      if (server.thp_enabled) return;
 78 |  
 79 | -        /* Currently we use zmadvise_dontneed only when we use jemalloc with Linux.
 80 | -         * so we avoid these pointless loops when they're not going to do anything. */
 81 | +    /* Currently we use zmadvise_dontneed only when we use jemalloc with Linux.
 82 | +     * so we avoid these pointless loops when they're not going to do anything. */
 83 |  #if defined(USE_JEMALLOC) && defined(__linux__)
 84 |      listIter li;
 85 |      listNode *ln;
 86 | @@ -7311,7 +7317,7 @@ __attribute__((weak)) int main(int argc, char **argv) {
 87 |      }
 88 |      if (server.sentinel_mode) sentinelCheckConfigFile();
 89 |  
 90 | -        /* Do system checks */
 91 | +    /* Do system checks */
 92 |  #ifdef __linux__
 93 |      linuxMemoryWarnings();
 94 |      sds err_msg = NULL;
 95 | diff --git a/src/server.h b/src/server.h
 96 | index 95d31758e..a82eabd26 100644
 97 | --- a/src/server.h
 98 | +++ b/src/server.h
 99 | @@ -2118,6 +2118,9 @@ struct valkeyServer {
100 |      list *clients_waiting_acks; /* Clients waiting in WAIT or WAITAOF. */
101 |      int get_ack_from_replicas;  /* If true we send REPLCONF GETACK. */
102 |      int repl_paused;            /* If true we don't try to connect to master */
103 | +    /* Offline mode */
104 | +    int offline;         /* If true only localhost connections are accepted */
105 | +    int offline_initial; /* Initial state of offline mode (from config) */
106 |      /* Limits */
107 |      unsigned int maxclients;                    /* Max number of simultaneous clients */
108 |      unsigned long long maxmemory;               /* Max number of memory bytes to use */
109 | diff --git a/tests/unit/yandex-cloud-patches.tcl b/tests/unit/yandex-cloud-patches.tcl
110 | new file mode 100644
111 | index 000000000..b8c3ba453
112 | --- /dev/null
113 | +++ b/tests/unit/yandex-cloud-patches.tcl
114 | @@ -0,0 +1,23 @@
115 | +start_server {config "minimal.conf" tags {"external:skip"}} {
116 | +    test {Offline mode works as expected} {
117 | +        # Get a non-loopback address of this instance for this test.
118 | +        set myaddr [get_nonloopback_addr]
119 | +        if {$myaddr != "" && ![string match {127.*} $myaddr]} {
120 | +            # Disable protected mode
121 | +            assert_equal {OK} [r config set protected-mode no]
122 | +            # Enable offline mode
123 | +            assert_equal {OK} [r config set offline yes]
124 | +            catch {set r2 [get_nonloopback_client]} err
125 | +            assert_match {*connection refused*} $err
126 | +
127 | +            # CONFIG REWRITE does not persist runtime change of offline mode
128 | +            r CONFIG REWRITE
129 | +            assert_equal 0 [count_message_lines [srv 0 config_file] offline]
130 | +
131 | +            # Disable offline mode
132 | +            assert_equal {OK} [r config set offline no]
133 | +            set r2 [get_nonloopback_client]
134 | +            assert_equal {PONG} [$r2 ping]
135 | +        }
136 | +    }
137 | +}
138 | -- 
139 | 2.51.1
140 | 
141 | 


--------------------------------------------------------------------------------
/tests/images/base/generate_certs.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -ex
  3 | FQDN=$(hostname)
  4 | 
  5 | ls /etc/zk-ssl/server.crt && exit 0 || true
  6 | mkdir /etc/zk-ssl
  7 | 
  8 | echo "-----BEGIN CERTIFICATE-----
  9 | MIIE/TCCAuWgAwIBAgIUU9e6chP84r3iZk3JtvnWb1V2N1YwDQYJKoZIhvcNAQEL
 10 | BQAwDTELMAkGA1UEBhMCUlUwIBcNMjMwMzEwMDgzNTUzWhgPMzAyMjA3MTEwODM1
 11 | NTNaMA0xCzAJBgNVBAYTAlJVMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKC
 12 | AgEAwJuy394cK127yT8nGHVPKF6TG6xL0WpxahyaKwIYp5lbv9wDvzjMPE7KmONU
 13 | 8GhCFUdEJTRqBkaRdZNYxnOUxufU3+jIf1hq1Csg8q1NXICVWVwfFL2F5mKHgeHQ
 14 | n3FaJM2pZQ5iIWFY1c18MgV8qqNWbtyLeppcyZOL9duLM9A8XpYb0JOZis82d+lh
 15 | kcxzE1XM+MZEgZfHImh0zod9OMtSAOwQzVXpiA3JO/eHkLQGYcy6KNTm42mubVlX
 16 | kBcu/BplnP7gXGOYDt/JyRhGSLAfn762+jRbAlAvbPzOy67hc4pW7aloU5zPBhYf
 17 | BaTxM9UPqPtyp7Lxkp9HL68QXtm5MobDuDtZ6ePQtHgHrl7P7PXvEUPwK7BZzgZy
 18 | MerVhxIssutA2yBCuu5T7dMSwIsUdvXtgdHRdHDwn1D/V1CxnujDv9l6/T3sCmRv
 19 | tWPwTOCUf5BLLw6N6TnSsVR5I9NALKCLYE8LsfCuLdyi363JZqubkdJr1Ro8yI5J
 20 | m0GX5pypwZJPV2Ivt6kKVTQiN2hoWNe+3TNPS+7ysqit37s71YRDajZaZ55DopmF
 21 | +oIYdA3MqUZEVZyKFifWvo/l2gYarlEtcEJl++OwydirWLAjCPHh9UvDhjKS43bQ
 22 | zSlRC+d4CfRqXftmETHVAxMokai3WvAdUpJrW2RrjiuR0MkCAwEAAaNTMFEwHQYD
 23 | VR0OBBYEFJGDr6xmoKJFU6cgS90aFg6lUGbhMB8GA1UdIwQYMBaAFJGDr6xmoKJF
 24 | U6cgS90aFg6lUGbhMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB
 25 | ACj87ymjBlgY9UZTUbudHREPPXfqMi2TgWt5hygQSiTrNeQOodnq+Swp86qX/y8w
 26 | xtnvc+iILfFnh9ZevHKmLx+JziN4kD4ywEpHW7zS7c3+2QEjIZUwj5qlIg0ByOBd
 27 | 0M/kpimmuTwlDylBaY12GcFlZcsbuezzm4hU+0qoCV/zi2DvSdAPKXMAeZ3lOkde
 28 | PUYJUpRz/QkkxEhSdM3BQYI51mUiltCHMhe6COoN4MHV7tix0Pj9vPjhAVN/4sot
 29 | 2PgUiCwY8eNQugZhpTosMTSBLZvg/EKG+4slY75/voNTIxWHAHmnPMOAzVgNTya0
 30 | /eP6NB3MCjFuY2E+fGox9YTomjI5oxBr+1LlwVy7wbwXTrgBz9Z4izScAsVbPrk6
 31 | jSrqNeNWK1f+JVnYZkjgPGgPaQVCJ22vdLmkW7U/ATdeedQS3RCApMnb9VCRTUaO
 32 | eY4ccuEvj0huhdcUguw6fBjrhPjoPxKMn6S93ginW8Wz9vo8qLkEg2NtQDFu1Omb
 33 | cJM5F8uLRr8NotPV5QPg1koHeBv/N2WTRZiUoavAogR9XdyOtrB8+MBu1nsp4Goi
 34 | 7/suv9XzMJ7IpgXiQfCM++1x7oooyWWdeFTCzqNDJ1IbQDeOCc9cQgeOAPWcIqWO
 35 | nAWt08+eToI1YUvjl6UT0bpVaJEACv+/HfBr1T26u4Jh
 36 | -----END CERTIFICATE-----" > /etc/zk-ssl/ca.cert.pem
 37 | 
 38 | echo "-----BEGIN RSA PRIVATE KEY-----
 39 | MIIJKAIBAAKCAgEAwJuy394cK127yT8nGHVPKF6TG6xL0WpxahyaKwIYp5lbv9wD
 40 | vzjMPE7KmONU8GhCFUdEJTRqBkaRdZNYxnOUxufU3+jIf1hq1Csg8q1NXICVWVwf
 41 | FL2F5mKHgeHQn3FaJM2pZQ5iIWFY1c18MgV8qqNWbtyLeppcyZOL9duLM9A8XpYb
 42 | 0JOZis82d+lhkcxzE1XM+MZEgZfHImh0zod9OMtSAOwQzVXpiA3JO/eHkLQGYcy6
 43 | KNTm42mubVlXkBcu/BplnP7gXGOYDt/JyRhGSLAfn762+jRbAlAvbPzOy67hc4pW
 44 | 7aloU5zPBhYfBaTxM9UPqPtyp7Lxkp9HL68QXtm5MobDuDtZ6ePQtHgHrl7P7PXv
 45 | EUPwK7BZzgZyMerVhxIssutA2yBCuu5T7dMSwIsUdvXtgdHRdHDwn1D/V1CxnujD
 46 | v9l6/T3sCmRvtWPwTOCUf5BLLw6N6TnSsVR5I9NALKCLYE8LsfCuLdyi363JZqub
 47 | kdJr1Ro8yI5Jm0GX5pypwZJPV2Ivt6kKVTQiN2hoWNe+3TNPS+7ysqit37s71YRD
 48 | ajZaZ55DopmF+oIYdA3MqUZEVZyKFifWvo/l2gYarlEtcEJl++OwydirWLAjCPHh
 49 | 9UvDhjKS43bQzSlRC+d4CfRqXftmETHVAxMokai3WvAdUpJrW2RrjiuR0MkCAwEA
 50 | AQKCAgAgemC4RTDE00J2FfMWublGWmQ991i1kFhdh0Mr22ei40ZIXOY42W/+/15E
 51 | V5kcDMiP4/uGtobmVgHzLIx8skK1I6SOuScN6i/hZQBiS3zPC1OjxNfs3GR2y8iD
 52 | yzstl6SWriNRShKcBFlBfCvkF27FK1PIz+GpI9xflUS1iXa4nvV/EZrRGgJ7GKPb
 53 | pnvwZORGr2In1O76V0iZ8bk4ljo0WHyUcToIFeOSMJjtRrkSWnj1BtuhRP1F/a0O
 54 | /VC5mF8w3Zai2YulqJmccHoLMc+wNBqxCiy6lhd+lVzZ6OtKB0w2+m3cF4PjDX8P
 55 | TK2gewa9McE5QmU8B/2aNsd/L+r3eGEvWAF/1vRq6NcrFwigq8uCTtgw9edRlDnm
 56 | RvICkfAbrwhNaixWwqBVQHoy53H29TohxGNNKa6TTKeJvYEdYKgHx55TxkB9X9jc
 57 | iSisqb3fgEl4Yh1Izpu+6nULOqdlldfkKPgKJqVB1AT/avR8J09zmMvW5fPa6fFx
 58 | alZ1iVahR5bIFEu1lXygsrBP6N+K/ogyztg7ZKLTIN/FguwMKnXMaUbN/Y/ZZXV1
 59 | oGil9vHKnDrRnUGfcm9tyH2Ddcy6RDoDz+O4cYgMGxDhHran2cicVY1q+Yi08q5h
 60 | Napk1phNra5HIHnNHwMxQ75ZKZZ3TOGJL+HMF4yRDj19C/6sAQKCAQEA8a9ZQhWw
 61 | 0vhZENmSYZgGZLa7RZLSbBzQOX/cetdI6/kvmZVcMvNz4q0/UI9XLkqokL1wJiku
 62 | O0zXkaVrBVAsgozp4I3oFqwtcAAGw0KwF4FDAS36k4gkE4SmIUl2eI0XMZCPQIKp
 63 | 3TB81+XdBITtwfPl5yG+IZDkXNu16qUHEhnhvs/kKhMr8flhFC1J4gdrrQhfuRHY
 64 | Jv8e1RLJzMhu/ErRjh82LkzB6m3jp0YxBeIA+9Kkw+OX6SlzRbJPirKxJTaZnB8o
 65 | wQmzOy1kTRG4qjKswjdTbzf6549721i8QHwSpwPI3NZQhlSkfsvZ5QL4qPW0nRta
 66 | m76YeLlS12yQSQKCAQEAzAQz6OcE6yS2q5UfTZluGaU54Zkm0YSnS394pitJpHoh
 67 | JSZlvkL1DzpacquDxa3uQLDikai5TqpNnkuufeMJf7I2ygg4n/v4OFaE+/qj5uNA
 68 | 3QnL3BVT9DCJ0JvQ1qA5Q/6P5WpUHYB7JHBM9BpaE8e4xocJyWSdcSJDaEXns4Hx
 69 | WzhpBdVpPSamqB0VHYg1bv6OGFPfwUaRafWhNzljtxbY8RYcz7IfPmnLImFePTtZ
 70 | AjzIoAwUIRFzvmoduda0kQKogRVoEeaW1q6ebPUjYjIZvohnpe27EvgCiTNkcaSf
 71 | C96uIxHrSvI8114z9CBXer60xQ0Kz+ds18LtY6w8gQKCAQEAkP/JxlsrHje/f9t4
 72 | 9jJ2S4BSNLiUpCZZStYKWmzFJEX5J+SzTyI+uZWFcfi9rlk+brApE8wLH6rHfmtH
 73 | HQXv3ldajc21m7yq+hIZ/JYK/d8gaxnBxzebpVYlMb1YZZUIgEUhnOuHq9vGWuVe
 74 | x7JUztNccGIPJyY9y/RJXUCrUFHU3Vzun8umxuL+OlO9iu02zbZDb85j52mSfvVp
 75 | uwHZjGX6+ZCCOh71DIfnWFlFWikwu+Sx05C9eDbVINCM5kK1AwWR/Ve4ZLBEJtHh
 76 | 5lcmen4ypcb5uLVWRA0SmxPOxcVqj2c24D94Sk+H7UayMLKqqvvW45cgsmYUJgHR
 77 | 0MsieQKCAQB9goBk4erWtmliuYTeemuPf2RSc6O79b3t5mfU4oCVnUTS1AJ3wD1+
 78 | tsl6DiYs8MnIJoncTk5iJMdHgQvCCnCHjJ3EQLaFRb/4+NErK5C1tEztLt+pb72M
 79 | VmgSXCloQH26ZNslqfpBhA895ZCSA7wyuwXjrKPKsAlj1k5d0dOvTVusYNHLcvUh
 80 | V6vjdLDO0EL/G79THBZlkwJWi3Q4wyejNX0VJCNpaw1pmjAL4JbXWLFzfO13+LZR
 81 | eakZFbNf5sSDCX2cnAzAJnnZbOet5El2WZgY7VXGcLBMBSOaQHGksD/gT4gVrypv
 82 | mwLvA9c2cscejkArkdB7AsalHhho30cBAoIBAFJBO0RU7o0S+F6KHIP5aFbItcUd
 83 | NfUgoJTAFUD3EnBirvDv0pu8T8zkgKf7PRFkZQIOXocvpX0Zy6N7fiPbvzTA/vH3
 84 | mFqias89pTUAgv43R8ZsAC/qlozUuByegigEz2zeVd34w7MdkgGo1jnqmijAIXZE
 85 | INBo0swkxAbix+W1Pur/yvGUpC6xu3ISmdrn0p20B7QhyuoqC3ea/az7ePwx+Pu9
 86 | Jl8tzMujbHNHhw+OQAQOPHi6EUPs/H37euj3G7oBaVUwXJq3Tbwg95W5Jih+CgTB
 87 | Sbe6eYpR/j/SYGwbS6/DbHi3IjvblN+2pSPI05JvXMhLC/lAeqcdVJAgTvw=
 88 | -----END RSA PRIVATE KEY-----" > /etc/zk-ssl/ca.key
 89 | 
 90 | openssl genrsa -out /etc/zk-ssl/server.key -passout pass:testpassword123 4096
 91 | openssl req -new -key /etc/zk-ssl/server.key -out /etc/zk-ssl/server.csr -passin pass:testpassword123 -subj "/C=RU/ST=Test/L=Test/O=Test/OU=Test/CN=${FQDN}"
 92 | echo "[SAN]
 93 | subjectAltName = @alt_names
 94 | [alt_names]
 95 | DNS.1 = ${FQDN}" > /etc/zk-ssl/openssl.cnf
 96 | openssl x509 -req -days 365 -in /etc/zk-ssl/server.csr -CA /etc/zk-ssl/ca.cert.pem -CAkey /etc/zk-ssl/ca.key -CAcreateserial -out /etc/zk-ssl/server.crt -passin pass:testpassword123 -extensions SAN -extfile /etc/zk-ssl/openssl.cnf
 97 | 
 98 | if [[ "${FQDN}" == *"zoo"* ]];
 99 | then
100 |     keytool -import -trustcacerts -alias yandex -file /etc/zk-ssl/ca.cert.pem -keystore /etc/zk-ssl/truststore.jks -storepass testpassword123 -noprompt && \
101 |     openssl pkcs12 -export -in /etc/zk-ssl/server.crt -inkey /etc/zk-ssl/server.key -out /etc/zk-ssl/server.p12 -passout pass:testpassword321 -name ${FQDN} && \
102 |     keytool -importkeystore -destkeystore /etc/zk-ssl/server.jks -srckeystore /etc/zk-ssl/server.p12 -deststorepass testpassword321 -srcstoretype PKCS12 -srcstorepass testpassword321 -alias ${FQDN} && \
103 |     rm -f /etc/zk-ssl/server.p12
104 | fi
105 | 
106 | chmod 755 /etc/zk-ssl/*
107 | 


--------------------------------------------------------------------------------
/internal/app/state.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log/slog"
  6 | 	"strconv"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/yandex/rdsync/internal/dcs"
 11 | )
 12 | 
 13 | func (app *App) setStateError(state *HostState, fqdn, message string) {
 14 | 	app.logger.Error("GetHostState error", slog.String("fqdn", fqdn), slog.String("error", message))
 15 | 	state.Error = message
 16 | }
 17 | 
 18 | func (app *App) getHostState(fqdn string) *HostState {
 19 | 	node := app.shard.Get(fqdn)
 20 | 	var state HostState
 21 | 	state.CheckAt = time.Now()
 22 | 	state.CheckBy = app.config.Hostname
 23 | 	if app.mode == modeSentinel && fqdn == app.config.Hostname {
 24 | 		state.SentiCacheState = &SentiCacheState{
 25 | 			Name:  app.config.SentinelMode.Name,
 26 | 			RunID: app.config.SentinelMode.RunID,
 27 | 		}
 28 | 	}
 29 | 	info, minReplicasToWrite, isReadOnly, isOffline, isReplPaused, err := node.GetState(app.ctx)
 30 | 	if err != nil {
 31 | 		app.setStateError(&state, fqdn, err.Error())
 32 | 		if len(info) == 0 {
 33 | 			state.PingOk = false
 34 | 			state.PingStable = false
 35 | 			return &state
 36 | 		}
 37 | 	}
 38 | 	state.PingOk, state.PingStable = node.EvaluatePing()
 39 | 	var ok bool
 40 | 	state.RunID, ok = info["run_id"]
 41 | 	if !ok {
 42 | 		app.setStateError(&state, fqdn, "No run_id in info")
 43 | 		return &state
 44 | 	}
 45 | 	state.ReplicationID, ok = info["master_replid"]
 46 | 	if !ok {
 47 | 		app.setStateError(&state, fqdn, "No master_replid in info")
 48 | 		return &state
 49 | 	}
 50 | 	state.ReplicationID2, ok = info["master_replid2"]
 51 | 	if !ok {
 52 | 		app.setStateError(&state, fqdn, "No master_replid2 in info")
 53 | 		return &state
 54 | 	}
 55 | 	masterOffset, ok := info["master_repl_offset"]
 56 | 	if !ok {
 57 | 		app.setStateError(&state, fqdn, "No master_repl_offset in info")
 58 | 		return &state
 59 | 	}
 60 | 	state.MasterReplicationOffset, err = strconv.ParseInt(masterOffset, 10, 64)
 61 | 	if err != nil {
 62 | 		app.setStateError(&state, fqdn, err.Error())
 63 | 		return &state
 64 | 	}
 65 | 	secondOffset, ok := info["second_repl_offset"]
 66 | 	if !ok {
 67 | 		app.setStateError(&state, fqdn, "No second_repl_offset in info")
 68 | 		return &state
 69 | 	}
 70 | 	state.SecondReplicationOffset, err = strconv.ParseInt(secondOffset, 10, 64)
 71 | 	if err != nil {
 72 | 		app.setStateError(&state, fqdn, err.Error())
 73 | 		return &state
 74 | 	}
 75 | 	replBacklogFirstByte, ok := info["repl_backlog_first_byte_offset"]
 76 | 	if !ok {
 77 | 		app.setStateError(&state, fqdn, "No repl_backlog_first_byte_offset in info")
 78 | 		return &state
 79 | 	}
 80 | 	state.ReplicationBacklogStart, err = strconv.ParseInt(replBacklogFirstByte, 10, 64)
 81 | 	if err != nil {
 82 | 		app.setStateError(&state, fqdn, err.Error())
 83 | 		return &state
 84 | 	}
 85 | 	replBacklogHistlen, ok := info["repl_backlog_histlen"]
 86 | 	if !ok {
 87 | 		app.setStateError(&state, fqdn, "No repl_backlog_histlen in info")
 88 | 		return &state
 89 | 	}
 90 | 	state.ReplicationBacklogSize, err = strconv.ParseInt(replBacklogHistlen, 10, 64)
 91 | 	if err != nil {
 92 | 		app.setStateError(&state, fqdn, err.Error())
 93 | 		return &state
 94 | 	}
 95 | 	role, ok := info["role"]
 96 | 	if !ok {
 97 | 		app.setStateError(&state, fqdn, "No role in info")
 98 | 		return &state
 99 | 	}
100 | 	if role == "master" {
101 | 		state.IsMaster = true
102 | 		numReplicasStr, ok := info["connected_slaves"]
103 | 		if !ok {
104 | 			app.setStateError(&state, fqdn, "Master has no connected_slaves in info")
105 | 			return &state
106 | 		}
107 | 		numReplicas, err := strconv.ParseInt(numReplicasStr, 10, 64)
108 | 		if err != nil {
109 | 			app.setStateError(&state, fqdn, err.Error())
110 | 			return &state
111 | 		}
112 | 		var i int64
113 | 		for i < numReplicas {
114 | 			replicaID := fmt.Sprintf("slave%d", i)
115 | 			replicaValue, ok := info[replicaID]
116 | 			if !ok {
117 | 				app.logger.Warn(fmt.Sprintf("Master has no %s in info but connected_slaves is %d", replicaID, numReplicas), slog.String("fqdn", fqdn))
118 | 				i++
119 | 				continue
120 | 			}
121 | 			// ip is first value in slaveN info
122 | 			start := strings.Index(replicaValue, "=")
123 | 			end := strings.Index(replicaValue, ",")
124 | 			state.ConnectedReplicas = append(state.ConnectedReplicas, replicaValue[start+1:end])
125 | 			i++
126 | 		}
127 | 	} else {
128 | 		state.IsMaster = false
129 | 		rs := ReplicaState{}
130 | 		rs.MasterHost, ok = info["master_host"]
131 | 		if !ok {
132 | 			app.setStateError(&state, fqdn, "Replica but no master_host in info")
133 | 			return &state
134 | 		}
135 | 		linkState, ok := info["master_link_status"]
136 | 		if !ok {
137 | 			app.setStateError(&state, fqdn, "Replica but no master_link_status in info")
138 | 			return &state
139 | 		}
140 | 		rs.MasterLinkState = (linkState == "up")
141 | 		syncInProgress, ok := info["master_sync_in_progress"]
142 | 		if !ok {
143 | 			app.setStateError(&state, fqdn, "Replica but no master_sync_in_progress in info")
144 | 			return &state
145 | 		}
146 | 		rs.MasterSyncInProgress = (syncInProgress != "0")
147 | 		if !rs.MasterLinkState && !rs.MasterSyncInProgress {
148 | 			downSeconds, ok := info["master_link_down_since_seconds"]
149 | 			if !ok {
150 | 				app.setStateError(&state, fqdn, "Replica with link down but no master_link_down_since_seconds in info")
151 | 				return &state
152 | 			}
153 | 			rs.MasterLinkDownTime, err = strconv.ParseInt(downSeconds, 10, 64)
154 | 			rs.MasterLinkDownTime *= 1000
155 | 			if err != nil {
156 | 				app.setStateError(&state, fqdn, err.Error())
157 | 				return &state
158 | 			}
159 | 		} else if rs.MasterLinkState && !rs.MasterSyncInProgress {
160 | 			lastIOSeconds, ok := info["master_last_io_seconds_ago"]
161 | 			if !ok {
162 | 				app.setStateError(&state, fqdn, "Replica with link up but no master_last_io_seconds_ago in info")
163 | 				return &state
164 | 			}
165 | 			rs.MasterLastIOSeconds, err = strconv.ParseInt(lastIOSeconds, 10, 64)
166 | 			if err != nil {
167 | 				app.setStateError(&state, fqdn, err.Error())
168 | 				return &state
169 | 			}
170 | 		}
171 | 		replicaOffset, ok := info["slave_repl_offset"]
172 | 		if !ok {
173 | 			app.setStateError(&state, fqdn, "Replica but no slave_repl_offset in info")
174 | 			return &state
175 | 		}
176 | 		rs.ReplicationOffset, err = strconv.ParseInt(replicaOffset, 10, 64)
177 | 		if err != nil {
178 | 			app.setStateError(&state, fqdn, err.Error())
179 | 			return &state
180 | 		}
181 | 		state.ReplicaState = &rs
182 | 	}
183 | 	state.MinReplicasToWrite = minReplicasToWrite
184 | 	state.IsReadOnly = isReadOnly
185 | 	state.IsOffline = isOffline
186 | 	state.IsReplPaused = isReplPaused
187 | 	err = node.RefreshAddrs()
188 | 	if err != nil {
189 | 		app.setStateError(&state, fqdn, err.Error())
190 | 		return &state
191 | 	}
192 | 	state.IP, err = node.GetIP()
193 | 	if err != nil {
194 | 		app.setStateError(&state, fqdn, err.Error())
195 | 		return &state
196 | 	}
197 | 	return &state
198 | }
199 | 
200 | func (app *App) getShardStateFromDcs() (map[string]*HostState, error) {
201 | 	hosts := app.shard.Hosts()
202 | 	getter := func(host string) (*HostState, error) {
203 | 		var state HostState
204 | 		err := app.dcs.Get(dcs.JoinPath(pathHealthPrefix, host), &state)
205 | 		if err != nil && err != dcs.ErrNotFound {
206 | 			return nil, err
207 | 		}
208 | 		return &state, nil
209 | 	}
210 | 	return getHostStatesInParallel(hosts, getter)
211 | }
212 | 
213 | func (app *App) getShardStateFromDB() (map[string]*HostState, error) {
214 | 	hosts := app.shard.Hosts()
215 | 	getter := func(host string) (*HostState, error) {
216 | 		return app.getHostState(host), nil
217 | 	}
218 | 	return getHostStatesInParallel(hosts, getter)
219 | }
220 | 


--------------------------------------------------------------------------------
/internal/app/master.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"log/slog"
  6 | 	"time"
  7 | 
  8 | 	"github.com/yandex/rdsync/internal/dcs"
  9 | )
 10 | 
 11 | func (app *App) getNumReplicasToWrite(activeNodes []string) int {
 12 | 	return len(activeNodes) / 2
 13 | }
 14 | 
 15 | func (app *App) getCurrentMaster(shardState map[string]*HostState) (string, error) {
 16 | 	var master string
 17 | 	err := app.dcs.Get(pathMasterNode, &master)
 18 | 	if err != nil && err != dcs.ErrNotFound {
 19 | 		return "", fmt.Errorf("failed to get current master from dcs: %s", err)
 20 | 	}
 21 | 	if master != "" {
 22 | 		stateMaster, err := app.getMasterHost(shardState)
 23 | 		if err != nil {
 24 | 			app.logger.Warn("Have master in DCS but unable to validate", slog.Any("error", err))
 25 | 			return master, nil
 26 | 		}
 27 | 		if stateMaster != "" && stateMaster != master {
 28 | 			app.logger.Warn(fmt.Sprintf("DCS and valkey master state diverged: %s and %s", master, stateMaster))
 29 | 			allStable := true
 30 | 			for host, state := range shardState {
 31 | 				if !state.PingStable || state.IsOffline {
 32 | 					allStable = false
 33 | 					app.logger.Warn(fmt.Sprintf("%s is dead skipping divergence fix", host))
 34 | 					break
 35 | 				}
 36 | 			}
 37 | 			if allStable {
 38 | 				return app.ensureCurrentMaster(shardState)
 39 | 			}
 40 | 		}
 41 | 		return master, nil
 42 | 	}
 43 | 	return app.ensureCurrentMaster(shardState)
 44 | }
 45 | 
 46 | func (app *App) getMasterHost(shardState map[string]*HostState) (string, error) {
 47 | 	masters := make([]string, 0)
 48 | 	for host, state := range shardState {
 49 | 		if state.PingOk && state.IsMaster {
 50 | 			masters = append(masters, host)
 51 | 		}
 52 | 	}
 53 | 	if len(masters) > 1 {
 54 | 		if app.mode == modeCluster {
 55 | 			mastersWithSlots := make([]string, 0)
 56 | 			for _, master := range masters {
 57 | 				node := app.shard.Get(master)
 58 | 				hasSlots, err := node.HasClusterSlots(app.ctx)
 59 | 				if err != nil {
 60 | 					return "", fmt.Errorf("unable to check slots on %s", master)
 61 | 				}
 62 | 				if hasSlots {
 63 | 					mastersWithSlots = append(mastersWithSlots, master)
 64 | 				}
 65 | 			}
 66 | 			if len(mastersWithSlots) == 1 {
 67 | 				return mastersWithSlots[0], nil
 68 | 			}
 69 | 		}
 70 | 		return "", fmt.Errorf("got more than 1 master: %s", masters)
 71 | 	}
 72 | 	if len(masters) == 0 {
 73 | 		return "", nil
 74 | 	}
 75 | 	return masters[0], nil
 76 | }
 77 | 
 78 | func (app *App) ensureCurrentMaster(shardState map[string]*HostState) (string, error) {
 79 | 	master, err := app.getMasterHost(shardState)
 80 | 	if err != nil {
 81 | 		return "", err
 82 | 	}
 83 | 	if master == "" {
 84 | 		return "", fmt.Errorf("no master in shard of %d nodes", len(shardState))
 85 | 	}
 86 | 	err = app.dcs.Set(pathMasterNode, master)
 87 | 	if err != nil {
 88 | 		return "", fmt.Errorf("failed to set current master in dcs: %s", err)
 89 | 	}
 90 | 	return master, nil
 91 | }
 92 | 
 93 | func (app *App) changeMaster(host, master string) error {
 94 | 	if host == master {
 95 | 		return fmt.Errorf("changing %s replication source to itself", host)
 96 | 	}
 97 | 
 98 | 	node := app.shard.Get(host)
 99 | 	masterState := app.getHostState(master)
100 | 	masterNode := app.shard.Get(master)
101 | 	state := app.getHostState(host)
102 | 
103 | 	if !state.PingOk {
104 | 		return fmt.Errorf("changeMaster: replica %s is dead - unable to init repair", host)
105 | 	}
106 | 
107 | 	app.repairReplica(node, masterState, state, master, host)
108 | 
109 | 	deadline := time.Now().Add(app.config.Valkey.WaitReplicationTimeout)
110 | 	for time.Now().Before(deadline) {
111 | 		state = app.getHostState(host)
112 | 		rs := state.ReplicaState
113 | 		if rs != nil && replicates(masterState, rs, host, masterNode, false) {
114 | 			break
115 | 		}
116 | 		if !state.PingOk {
117 | 			return fmt.Errorf("changeMaster: replica %s died while waiting to start replication from %s", host, master)
118 | 		}
119 | 		masterState = app.getHostState(master)
120 | 		if !masterState.PingOk {
121 | 			return fmt.Errorf("changeMaster: %s died while waiting to start replication to %s", master, host)
122 | 		}
123 | 		app.logger.Info(fmt.Sprintf("ChangeMaster: waiting for %s to start replication from %s", host, master))
124 | 		app.repairReplica(node, masterState, state, master, host)
125 | 		time.Sleep(time.Second)
126 | 	}
127 | 	rs := state.ReplicaState
128 | 	if rs != nil && replicates(masterState, rs, host, masterNode, false) {
129 | 		app.logger.Info(fmt.Sprintf("ChangeMaster: %s started replication from %s", host, master))
130 | 	} else {
131 | 		return fmt.Errorf("%s was unable to start replication from %s", host, master)
132 | 	}
133 | 	return nil
134 | }
135 | 
136 | func (app *App) waitForCatchup(host, master string) error {
137 | 	if host == master {
138 | 		return fmt.Errorf("waiting for %s to catchup with itself", host)
139 | 	}
140 | 
141 | 	deadline := time.Now().Add(app.config.Valkey.WaitCatchupTimeout)
142 | 	for time.Now().Before(deadline) {
143 | 		masterState := app.getHostState(master)
144 | 		if !masterState.PingOk {
145 | 			return fmt.Errorf("waitForCatchup: %s died while waiting for catchup on %s", master, host)
146 | 		}
147 | 		state := app.getHostState(host)
148 | 		if !state.PingOk {
149 | 			return fmt.Errorf("waitForCatchup: replica %s died while waiting for catchup from %s", host, master)
150 | 		}
151 | 		if state.ReplicaState == nil {
152 | 			app.logger.Warn(fmt.Sprintf("WaitForCatchup: %s has invalid replica state", host))
153 | 			time.Sleep(time.Second)
154 | 			continue
155 | 		}
156 | 		var masterOffset int64
157 | 		if masterState.IsMaster {
158 | 			masterOffset = masterState.MasterReplicationOffset
159 | 		} else if masterState.ReplicaState == nil {
160 | 			app.logger.Warn(fmt.Sprintf("WaitForCatchup: %s has invalid replica state", master))
161 | 			time.Sleep(time.Second)
162 | 			continue
163 | 		} else {
164 | 			masterOffset = masterState.ReplicaState.ReplicationOffset
165 | 		}
166 | 		if masterOffset <= state.ReplicaState.ReplicationOffset {
167 | 			return nil
168 | 		}
169 | 		app.logger.Info(fmt.Sprintf("WaitForCatchup: waiting for %s (offset=%d) to catchup with %s (offset=%d)", host, state.ReplicaState.ReplicationOffset, master, masterOffset))
170 | 		time.Sleep(time.Second)
171 | 	}
172 | 
173 | 	return fmt.Errorf("timeout waiting for %s to catchup with %s", host, master)
174 | }
175 | 
176 | func (app *App) promote(master, oldMaster string, shardState map[string]*HostState, forceDeadline time.Time) error {
177 | 	node := app.shard.Get(master)
178 | 
179 | 	if shardState[master].IsMaster {
180 | 		app.logger.Info(fmt.Sprintf("%s is already master", master))
181 | 		return nil
182 | 	}
183 | 
184 | 	switch app.mode {
185 | 	case modeSentinel:
186 | 		return node.SentinelPromote(app.ctx)
187 | 	case modeCluster:
188 | 		if shardState[oldMaster].PingOk {
189 | 			if time.Now().Before(forceDeadline) {
190 | 				app.logger.Info("Old master alive. Using FORCE to promote")
191 | 				return node.ClusterPromoteForce(app.ctx)
192 | 			}
193 | 		}
194 | 		majorityAlive, err := node.IsClusterMajorityAlive(app.ctx)
195 | 		if err != nil {
196 | 			app.logger.Error("New master is not able to check cluster majority state. Assuming that majority is alive.", slog.Any("error", err))
197 | 			majorityAlive = true
198 | 		}
199 | 		if majorityAlive {
200 | 			app.logger.Info("Majority of master nodes in cluster alive. Using FORCE to promote")
201 | 			return node.ClusterPromoteForce(app.ctx)
202 | 		}
203 | 		app.logger.Info("Old master is dead and majority of master nodes in cluster dead. Using TAKEOVER to promote")
204 | 		return node.ClusterPromoteTakeover(app.ctx)
205 | 	}
206 | 
207 | 	return fmt.Errorf("running promote with unsupported mode: %s", app.mode)
208 | }
209 | 


--------------------------------------------------------------------------------
/tests/features/03_cluster_switchover_to.feature:
--------------------------------------------------------------------------------
  1 | Feature: Cluster mode switchover to specified host
  2 | 
  3 |     Scenario: Cluster mode switchover (to) with healthy master works
  4 |         Given clustered shard is up and running
  5 |         Then zookeeper node "/test/health/valkey1" should match json within "30" seconds
  6 |         """
  7 |         {
  8 |             "ping_ok": true,
  9 |             "is_master": true,
 10 |             "is_read_only": false
 11 |         }
 12 |         """
 13 |         And zookeeper node "/test/health/valkey2" should match json within "30" seconds
 14 |         """
 15 |         {
 16 |             "ping_ok": true,
 17 |             "is_master": false
 18 |         }
 19 |         """
 20 |         And zookeeper node "/test/health/valkey3" should match json within "30" seconds
 21 |         """
 22 |         {
 23 |             "ping_ok": true,
 24 |             "is_master": false
 25 |         }
 26 |         """
 27 |         When I run command on host "valkey1"
 28 |         """
 29 |             rdsync switch --to valkey2
 30 |         """
 31 |         Then command return code should be "0"
 32 |         And command output should match regexp
 33 |         """
 34 |             switchover done
 35 |         """
 36 |         And zookeeper node "/test/last_switch" should match json within "30" seconds
 37 |         """
 38 |         {
 39 |             "to": "valkey2",
 40 |             "result": {
 41 |                 "ok": true
 42 |             }
 43 |         }
 44 |         """
 45 |         And zookeeper node "/test/master" should match regexp within "30" seconds
 46 |         """
 47 |             valkey2
 48 |         """
 49 |         And valkey host "valkey2" should be master
 50 | 
 51 |     Scenario: Cluster mode switchover (to) works with dead replica
 52 |         Given clustered shard is up and running
 53 |         Then zookeeper node "/test/health/valkey1" should match json within "30" seconds
 54 |         """
 55 |         {
 56 |             "ping_ok": true,
 57 |             "is_master": true,
 58 |             "is_read_only": false
 59 |         }
 60 |         """
 61 |         And zookeeper node "/test/health/valkey2" should match json within "30" seconds
 62 |         """
 63 |         {
 64 |             "ping_ok": true,
 65 |             "is_master": false
 66 |         }
 67 |         """
 68 |         And zookeeper node "/test/health/valkey3" should match json within "30" seconds
 69 |         """
 70 |         {
 71 |             "ping_ok": true,
 72 |             "is_master": false
 73 |         }
 74 |         """
 75 |         When valkey on host "valkey3" is stopped
 76 |         Then zookeeper node "/test/health/valkey3" should match json within "30" seconds
 77 |         """
 78 |         {
 79 |             "ping_ok": false,
 80 |             "is_master": false
 81 |         }
 82 |         """
 83 |         And zookeeper node "/test/active_nodes" should match json_exactly within "60" seconds
 84 |         """
 85 |             ["valkey1","valkey2"]
 86 |         """
 87 |         When I run command on host "valkey1"
 88 |         """
 89 |             rdsync switch --to valkey2 --wait=0s
 90 |         """
 91 |         Then command return code should be "0"
 92 |         And command output should match regexp
 93 |         """
 94 |             switchover scheduled
 95 |         """
 96 |         And zookeeper node "/test/last_switch" should match json within "30" seconds
 97 |         """
 98 |         {
 99 |             "to": "valkey2",
100 |             "result": {
101 |                 "ok": true
102 |             }
103 |         }
104 |         """
105 |         And zookeeper node "/test/master" should match regexp within "30" seconds
106 |         """
107 |             valkey2
108 |         """
109 |         And valkey host "valkey2" should be master
110 | 
111 |     Scenario: Cluster mode switchover to non-active host fails
112 |         Given clustered shard is up and running
113 |         Then zookeeper node "/test/health/valkey1" should match json within "30" seconds
114 |         """
115 |         {
116 |             "ping_ok": true,
117 |             "is_master": true,
118 |             "is_read_only": false
119 |         }
120 |         """
121 |         And zookeeper node "/test/health/valkey2" should match json within "30" seconds
122 |         """
123 |         {
124 |             "ping_ok": true,
125 |             "is_master": false
126 |         }
127 |         """
128 |         And zookeeper node "/test/health/valkey3" should match json within "30" seconds
129 |         """
130 |         {
131 |             "ping_ok": true,
132 |             "is_master": false
133 |         }
134 |         """
135 |         When valkey on host "valkey3" is stopped
136 |         Then zookeeper node "/test/health/valkey3" should match json within "30" seconds
137 |         """
138 |         {
139 |             "ping_ok": false,
140 |             "is_master": false
141 |         }
142 |         """
143 |         And zookeeper node "/test/active_nodes" should match json_exactly within "60" seconds
144 |         """
145 |             ["valkey1","valkey2"]
146 |         """
147 |         When I run command on host "valkey1"
148 |         """
149 |         rdsync switch --to valkey3 --wait=0s
150 |         """
151 |         Then command return code should be "1"
152 |         And command output should match regexp
153 |         """
154 |         valkey3 is not active
155 |         """
156 | 
157 |     Scenario: Cluster mode switchover with force works
158 |         Given clustered shard is up and running
159 |         Then zookeeper node "/test/health/valkey1" should match json within "30" seconds
160 |         """
161 |         {
162 |             "ping_ok": true,
163 |             "is_master": true,
164 |             "is_read_only": false
165 |         }
166 |         """
167 |         And zookeeper node "/test/health/valkey2" should match json within "30" seconds
168 |         """
169 |         {
170 |             "ping_ok": true,
171 |             "is_master": false
172 |         }
173 |         """
174 |         And zookeeper node "/test/health/valkey3" should match json within "30" seconds
175 |         """
176 |         {
177 |             "ping_ok": true,
178 |             "is_master": false
179 |         }
180 |         """
181 |         When host "valkey1" is detached from the network
182 |         And host "valkey3" is detached from the network
183 |         And I run command on host "valkey2" with timeout "180" seconds
184 |         """
185 |             rdsync switch --to valkey2 --force
186 |         """
187 |         Then command return code should be "0"
188 |         And command output should match regexp
189 |         """
190 |             switchover done
191 |         """
192 |         Then zookeeper node "/test/last_switch" should match json within "60" seconds
193 |         """
194 |         {
195 |             "from": "",
196 |             "to": "valkey2",
197 |             "cause": "manual",
198 |             "initiated_by": "valkey2",
199 |             "result": {
200 |                 "ok": true
201 |             }
202 |         }
203 |         """
204 |         When host "valkey3" is attached to the network
205 |         And host "valkey1" is attached to the network
206 |         Then zookeeper node "/test/health/valkey3" should match json within "60" seconds
207 |         """
208 |         {
209 |             "ping_ok": true,
210 |             "is_master": false
211 |         }
212 |         """
213 |         And replication on valkey host "valkey3" should run fine within "60" seconds
214 |         And zookeeper node "/test/health/valkey1" should match json within "60" seconds
215 |         """
216 |         {
217 |             "ping_ok": true,
218 |             "is_master": false
219 |         }
220 |         """
221 |         And replication on valkey host "valkey1" should run fine within "60" seconds
222 | 


--------------------------------------------------------------------------------
/tests/features/07_cluster_local_repair.feature:
--------------------------------------------------------------------------------
  1 | Feature: Cluster mode local node repair
  2 | 
  3 |     Scenario: Cluster mode node overflowed with connections gets reserved connections watchdog triggered
  4 |         Given clustered shard is up and running
  5 |         Then valkey host "valkey1" should be master
  6 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
  7 |         And replication on valkey host "valkey2" should run fine within "15" seconds
  8 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
  9 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 10 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 11 |         """
 12 |             ["valkey1","valkey2","valkey3"]
 13 |         """
 14 |         When I run command on host "valkey1"
 15 |         """
 16 |             supervisorctl pid valkey
 17 |         """
 18 |         And I save command output as "pid_before_conn_exhaustion"
 19 |         And I run command on host "valkey1"
 20 |         """
 21 |             supervisorctl signal STOP rdsync
 22 |         """
 23 |         And I run command on host "valkey2"
 24 |         """
 25 |             supervisorctl signal STOP rdsync
 26 |         """
 27 |         And I run command on host "valkey3"
 28 |         """
 29 |             supervisorctl signal STOP rdsync
 30 |         """
 31 |         And I run async command on host "valkey1"
 32 |         """
 33 |             bash -c 'for _ in {0..1100}; do nc localhost 6379 & done; sleep infinity'
 34 |         """
 35 |         Then valkey host "valkey1" should become unavailable within "30" seconds
 36 |         When I run command on host "valkey1"
 37 |         """
 38 |             supervisorctl signal CONT rdsync
 39 |         """
 40 |         And I run command on host "valkey2"
 41 |         """
 42 |             supervisorctl signal CONT rdsync
 43 |         """
 44 |         And I run command on host "valkey3"
 45 |         """
 46 |             supervisorctl signal CONT rdsync
 47 |         """
 48 |         Then valkey host "valkey1" should become available within "60" seconds
 49 |         When I run command on host "valkey1"
 50 |         """
 51 |             supervisorctl pid valkey
 52 |         """
 53 |         Then command output should match regexp
 54 |         """
 55 |             {{.pid_before_conn_exhaustion}}
 56 |         """
 57 |         When I run command on host "valkey1"
 58 |         """
 59 |             grep Killing /var/log/rdsync.log
 60 |         """
 61 |         Then command output should match regexp
 62 |         """
 63 |             .*Local node has .* free connections left. Killing all client connections.*
 64 |         """
 65 | 
 66 |     Scenario: Busy cluster node gets a SCRIPT KILL
 67 |         Given clustered shard is up and running
 68 |         Then valkey host "valkey1" should be master
 69 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 70 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 71 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 72 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 73 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 74 |         """
 75 |             ["valkey1","valkey2","valkey3"]
 76 |         """
 77 |         When I run async command on host "valkey1"
 78 |         """
 79 |             valkey-cli -a functestpassword eval 'while true do end' 0
 80 |         """
 81 |         Then valkey host "valkey1" should become available within "60" seconds
 82 | 
 83 |     Scenario: Cluster mode replica is restarted after OOM
 84 |         Given clustered shard is up and running
 85 |         Then valkey host "valkey1" should be master
 86 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 87 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 88 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 89 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 90 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 91 |         """
 92 |             ["valkey1","valkey2","valkey3"]
 93 |         """
 94 |         When valkey on host "valkey2" is killed
 95 |         And I wait for "300" seconds
 96 |         Then valkey host "valkey2" should become available within "120" seconds
 97 |         And valkey host "valkey2" should become replica of "valkey1" within "60" seconds
 98 |         And replication on valkey host "valkey2" should run fine within "60" seconds
 99 | 
100 |     Scenario: Cluster mode loading replica is not restarted
101 |         Given clustered shard is up and running
102 |         Then valkey host "valkey1" should be master
103 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
104 |         And replication on valkey host "valkey2" should run fine within "15" seconds
105 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
106 |         And replication on valkey host "valkey3" should run fine within "15" seconds
107 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
108 |         """
109 |             ["valkey1","valkey2","valkey3"]
110 |         """
111 |         When I run command on host "valkey1" with timeout "180" seconds
112 |         """
113 |             valkey-cli -a functestpassword DEBUG populate 10000000 key 100
114 |         """
115 |         And I run command on valkey host "valkey2"
116 |         """
117 |             CONFIG SET key-load-delay 50
118 |         """
119 |         And I run command on valkey host "valkey2"
120 |         """
121 |             CONFIG SET loading-process-events-interval-bytes 1024
122 |         """
123 |         And I run command on valkey host "valkey2"
124 |         """
125 |             CONFIG REWRITE
126 |         """
127 |         And I run async command on host "valkey2"
128 |         """
129 |             supervisorctl restart valkey
130 |         """
131 |         Then valkey host "valkey2" should become unavailable within "30" seconds
132 |         When I run command on host "valkey2"
133 |         """
134 |             supervisorctl pid valkey
135 |         """
136 |         And I save command output as "pid_right_after_restart"
137 |         And I wait for "360" seconds
138 |         And I run command on host "valkey2"
139 |         """
140 |             supervisorctl pid valkey
141 |         """
142 |         Then command output should match regexp
143 |         """
144 |             {{.pid_right_after_restart}}
145 |         """
146 | 
147 |     Scenario: Cluster mode master is restarted after hanging
148 |         Given clustered shard is up and running
149 |         Then valkey host "valkey1" should be master
150 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
151 |         And replication on valkey host "valkey2" should run fine within "15" seconds
152 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
153 |         And replication on valkey host "valkey3" should run fine within "15" seconds
154 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
155 |         """
156 |             ["valkey1","valkey2","valkey3"]
157 |         """
158 |         When I run async command on host "valkey1"
159 |         """
160 |             valkey-cli -a functestpassword DEBUG SLEEP 600
161 |         """
162 |         And I wait for "420" seconds
163 |         Then valkey host "valkey1" should become available within "60" seconds
164 | 


--------------------------------------------------------------------------------
/internal/app/manager.go:
--------------------------------------------------------------------------------
  1 | package app
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"log/slog"
  7 | 	"time"
  8 | 
  9 | 	"github.com/yandex/rdsync/internal/dcs"
 10 | )
 11 | 
 12 | func (app *App) stateManager() appState {
 13 | 	if !app.dcs.IsConnected() {
 14 | 		return stateLost
 15 | 	}
 16 | 	if !app.dcs.AcquireLock(pathManagerLock) {
 17 | 		return stateCandidate
 18 | 	}
 19 | 
 20 | 	err := app.shard.UpdateHostsInfo()
 21 | 	if err != nil {
 22 | 		app.logger.Error("Updating hosts info failed", slog.Any("error", err))
 23 | 	}
 24 | 
 25 | 	shardState, err := app.getShardStateFromDB()
 26 | 	if err != nil {
 27 | 		app.logger.Error("Failed to get shard state from DB", slog.Any("error", err))
 28 | 		return stateManager
 29 | 	}
 30 | 
 31 | 	shardStateDcs, err := app.getShardStateFromDcs()
 32 | 	if err != nil {
 33 | 		app.logger.Error("Failed to get shard state from DCS", slog.Any("error", err))
 34 | 		return stateManager
 35 | 	}
 36 | 
 37 | 	master, err := app.getCurrentMaster(shardState)
 38 | 	if err != nil {
 39 | 		app.logger.Error("Failed to get or identify master", slog.Any("error", err))
 40 | 		return stateManager
 41 | 	}
 42 | 
 43 | 	activeNodes, err := app.GetActiveNodes()
 44 | 	if err != nil {
 45 | 		app.logger.Error("Failed to get active nodes", slog.Any("error", err))
 46 | 		return stateManager
 47 | 	}
 48 | 	app.logger.Info(fmt.Sprintf("Active nodes: %v", activeNodes))
 49 | 	app.logger.Info(fmt.Sprintf("Master: %s", master))
 50 | 	app.logger.Info(fmt.Sprintf("Shard state: %v", shardState))
 51 | 	app.logger.Info(fmt.Sprintf("DCS shard state: %v", shardStateDcs))
 52 | 
 53 | 	maintenance, err := app.GetMaintenance()
 54 | 	if err != nil && err != dcs.ErrNotFound {
 55 | 		app.logger.Error("Failed to get maintenance from dcs", slog.Any("error", err))
 56 | 		return stateManager
 57 | 	}
 58 | 	if maintenance != nil {
 59 | 		if !maintenance.RdSyncPaused {
 60 | 			app.logger.Info("Entering maintenance")
 61 | 			err := app.enterMaintenance(maintenance, master)
 62 | 			if err != nil {
 63 | 				app.logger.Error("Unable to enter maintenance", slog.Any("error", err))
 64 | 				return stateManager
 65 | 			}
 66 | 		}
 67 | 		return stateMaintenance
 68 | 	}
 69 | 
 70 | 	updateActive := app.repairLocalNode(master)
 71 | 
 72 | 	var switchover Switchover
 73 | 	if err := app.dcs.Get(pathCurrentSwitch, &switchover); err == nil {
 74 | 		err = app.approveSwitchover(&switchover, activeNodes, shardState)
 75 | 		if err != nil {
 76 | 			app.logger.Error("Unable to perform switchover", slog.Any("error", err))
 77 | 			err = app.finishSwitchover(&switchover, err)
 78 | 			if err != nil {
 79 | 				app.logger.Error("Failed to reject switchover", slog.Any("error", err))
 80 | 			}
 81 | 			return stateManager
 82 | 		}
 83 | 
 84 | 		err = app.startSwitchover(&switchover)
 85 | 		if err != nil {
 86 | 			app.logger.Error("Unable to start switchover", slog.Any("error", err))
 87 | 			return stateManager
 88 | 		}
 89 | 		err = app.performSwitchover(shardState, activeNodes, &switchover, master)
 90 | 		if app.dcs.Get(pathCurrentSwitch, new(Switchover)) == dcs.ErrNotFound {
 91 | 			app.logger.Error("Switchover was aborted")
 92 | 		} else {
 93 | 			if err != nil {
 94 | 				err = app.failSwitchover(&switchover, err)
 95 | 				if err != nil {
 96 | 					app.logger.Error("Failed to report switchover failure", slog.Any("error", err))
 97 | 				}
 98 | 			} else {
 99 | 				err = app.finishSwitchover(&switchover, nil)
100 | 				if err != nil {
101 | 					app.logger.Error("Failed to report switchover finish", slog.Any("error", err))
102 | 				}
103 | 			}
104 | 		}
105 | 		return stateManager
106 | 	} else if err != dcs.ErrNotFound {
107 | 		app.logger.Error("Getting current switchover failed", slog.Any("error", err))
108 | 		return stateManager
109 | 	}
110 | 	poisonPill, err := app.getPoisonPill()
111 | 	if err != nil && err != dcs.ErrNotFound {
112 | 		app.logger.Error("Manager: failed to get poison pill from DCS", slog.Any("error", err))
113 | 		return stateManager
114 | 	}
115 | 	if poisonPill != nil {
116 | 		err = app.clearPoisonPill()
117 | 		if err != nil {
118 | 			app.logger.Error("Manager: failed to remove poison pill from DCS", slog.Any("error", err))
119 | 			return stateManager
120 | 		}
121 | 	}
122 | 	hosts := len(app.shard.Hosts())
123 | 	masterFailed := false
124 | 	if shardStateDcs[master].PingOk && !shardState[master].PingOk {
125 | 		availableReplicas := 0
126 | 		for host, state := range shardState {
127 | 			if state.PingOk {
128 | 				availableReplicas++
129 | 			} else {
130 | 				app.logger.Warn("Host seems down", slog.String("fqdn", host))
131 | 			}
132 | 		}
133 | 		if availableReplicas > hosts/2 {
134 | 			app.logger.Error("We see that majority of shard is still alive, but master is not. So it probably failed.")
135 | 			masterFailed = true
136 | 		}
137 | 	}
138 | 	if (!shardStateDcs[master].PingOk && !shardState[master].PingOk) || shardStateDcs[master].IsOffline {
139 | 		masterFailed = true
140 | 	}
141 | 	if masterFailed {
142 | 		app.logger.Error(fmt.Sprintf("Master %s failure", master))
143 | 		if app.nodeFailTime[master].IsZero() {
144 | 			app.nodeFailTime[master] = time.Now()
145 | 		}
146 | 		err = app.approveFailover(shardState, activeNodes, master)
147 | 		if err == nil {
148 | 			app.logger.Info("Failover approved")
149 | 			err = app.performFailover(master)
150 | 			if err != nil {
151 | 				app.logger.Error("Unable to perform failover", slog.Any("error", err))
152 | 			}
153 | 		} else {
154 | 			app.logger.Error("Failover was not approved", slog.Any("error", err))
155 | 		}
156 | 		return stateManager
157 | 	}
158 | 	needGiveUp := false
159 | 	if hosts > 2 {
160 | 		availableReplicas := 0
161 | 		availableReplicasDcs := 0
162 | 		for host, state := range shardState {
163 | 			if state.PingOk {
164 | 				availableReplicas++
165 | 			} else {
166 | 				app.logger.Warn("Host seems down", slog.String("fqdn", host))
167 | 			}
168 | 		}
169 | 		for host, state := range shardStateDcs {
170 | 			if state.PingOk {
171 | 				availableReplicasDcs++
172 | 			} else {
173 | 				app.logger.Warn("Host seems down in DCS", slog.String("fqdn", host))
174 | 			}
175 | 		}
176 | 		if availableReplicas <= hosts/2 && availableReplicasDcs > hosts/2 {
177 | 			if app.splitTime[master].IsZero() {
178 | 				app.splitTime[master] = time.Now()
179 | 			}
180 | 			if app.config.Valkey.FailoverTimeout > 0 {
181 | 				failedTime := time.Since(app.splitTime[master])
182 | 				if failedTime < app.config.Valkey.FailoverTimeout {
183 | 					app.logger.Error(
184 | 						fmt.Sprintf("According to DCS majority of shard is still alive, but we don't see that from here, will wait for %v before giving up on manager role",
185 | 							app.config.Valkey.FailoverTimeout-failedTime))
186 | 					return stateManager
187 | 				}
188 | 			}
189 | 			needGiveUp = true
190 | 		}
191 | 	} else if master != app.config.Hostname && !shardState[master].PingOk {
192 | 		app.logger.Error(fmt.Sprintf("Master %s probably failed, do not perform any kind of repair", master))
193 | 		return stateManager
194 | 	}
195 | 	if needGiveUp {
196 | 		app.logger.Error("According to DCS majority of shard is still alive, but we don't see that from here. Giving up on manager role")
197 | 		delete(app.splitTime, master)
198 | 		app.dcs.ReleaseLock(pathManagerLock)
199 | 		waitCtx, cancel := context.WithTimeout(app.ctx, app.config.Valkey.FailoverTimeout)
200 | 		defer cancel()
201 | 		ticker := time.NewTicker(app.config.TickInterval)
202 | 		var manager dcs.LockOwner
203 | 	Out:
204 | 		for {
205 | 			select {
206 | 			case <-ticker.C:
207 | 				err = app.dcs.Get(pathManagerLock, &manager)
208 | 				if err != nil {
209 | 					app.logger.Error(fmt.Sprintf("Failed to get %s", pathManagerLock), slog.Any("error", err))
210 | 				} else if manager.Hostname != app.config.Hostname {
211 | 					app.logger.Info(fmt.Sprintf("New manager: %s", manager.Hostname))
212 | 					break Out
213 | 				}
214 | 			case <-waitCtx.Done():
215 | 				app.logger.Error("No node took manager lock for failover timeout")
216 | 				break Out
217 | 			}
218 | 		}
219 | 		return stateCandidate
220 | 	}
221 | 	delete(app.nodeFailTime, master)
222 | 	delete(app.splitTime, master)
223 | 	app.repairShard(shardState, activeNodes, master)
224 | 
225 | 	if updateActive {
226 | 		err = app.updateActiveNodes(shardState, shardStateDcs, activeNodes, master)
227 | 		if err != nil {
228 | 			app.logger.Error("Failed to update active nodes in dcs", slog.Any("error", err))
229 | 		}
230 | 	}
231 | 
232 | 	return stateManager
233 | }
234 | 


--------------------------------------------------------------------------------
/valkey_patches/0001_Add_replication_pause.patch:
--------------------------------------------------------------------------------
  1 | diff --git a/src/config.c b/src/config.c
  2 | index d0158b2c4..f788aef6f 100644
  3 | --- a/src/config.c
  4 | +++ b/src/config.c
  5 | @@ -2588,6 +2588,25 @@ static int updateExtendedRedisCompat(const char **err) {
  6 |      return 1;
  7 |  }
  8 |  
  9 | +static int updateReplPaused(const char **err) {
 10 | +    UNUSED(err);
 11 | +
 12 | +    if (server.repl_paused) {
 13 | +        if (server.primary_host) {
 14 | +            if (server.repl_state == REPL_STATE_CONNECTING || replicaIsInHandshakeState() ||
 15 | +                server.repl_state == REPL_STATE_TRANSFER) {
 16 | +                cancelReplicationHandshake(0);
 17 | +            }
 18 | +            if (server.primary) {
 19 | +                freeClient(server.primary);
 20 | +            }
 21 | +            server.repl_state = REPL_STATE_CONNECT;
 22 | +        }
 23 | +    }
 24 | +
 25 | +    return 1;
 26 | +}
 27 | +
 28 |  static int updateSighandlerEnabled(const char **err) {
 29 |      UNUSED(err);
 30 |      if (server.crashlog_enabled)
 31 | @@ -3241,6 +3260,7 @@ standardConfig static_configs[] = {
 32 |      createBoolConfig("hide-user-data-from-log", NULL, MODIFIABLE_CONFIG, server.hide_user_data_from_log, 1, NULL, NULL),
 33 |      createBoolConfig("lua-enable-insecure-api", "lua-enable-deprecated-api", MODIFIABLE_CONFIG | HIDDEN_CONFIG | PROTECTED_CONFIG, server.lua_enable_insecure_api, 0, NULL, updateLuaEnableInsecureApi),
 34 |      createBoolConfig("import-mode", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.import_mode, 0, NULL, NULL),
 35 | +    createBoolConfig("repl-paused", NULL, MODIFIABLE_CONFIG, server.repl_paused, 0, NULL, updateReplPaused),
 36 |  
 37 |      /* String Configs */
 38 |      createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL),
 39 | diff --git a/src/replication.c b/src/replication.c
 40 | index 82ee9450f..25726c542 100644
 41 | --- a/src/replication.c
 42 | +++ b/src/replication.c
 43 | @@ -59,7 +59,6 @@ void replicationResurrectProvisionalPrimary(void);
 44 |  void replicationSendAck(void);
 45 |  int replicaPutOnline(client *replica);
 46 |  void replicaStartCommandStream(client *replica);
 47 | -int cancelReplicationHandshake(int reconnect);
 48 |  void replicationSteadyStateInit(void);
 49 |  void dualChannelSetupMainConnForPsync(connection *conn);
 50 |  void dualChannelSyncHandleRdbLoadCompletion(void);
 51 | @@ -1069,7 +1068,7 @@ void syncCommand(client *c) {
 52 |  
 53 |      /* Refuse SYNC requests if we are a replica but the link with our primary
 54 |       * is not ok... */
 55 | -    if (server.primary_host && server.repl_state != REPL_STATE_CONNECTED) {
 56 | +    if (server.primary_host && server.repl_state != REPL_STATE_CONNECTED && !server.repl_paused) {
 57 |          addReplyError(c, "-NOMASTERLINK Can't SYNC while not connected with my master");
 58 |          return;
 59 |      }
 60 | @@ -5150,7 +5149,7 @@ void replicationCron(void) {
 61 |      }
 62 |  
 63 |      /* Check if we should connect to a PRIMARY */
 64 | -    if (server.repl_state == REPL_STATE_CONNECT) {
 65 | +    if (server.repl_state == REPL_STATE_CONNECT && !server.repl_paused) {
 66 |          serverLog(LL_NOTICE, "Connecting to PRIMARY %s:%d", server.primary_host, server.primary_port);
 67 |          connectWithPrimary();
 68 |      }
 69 | diff --git a/src/server.h b/src/server.h
 70 | index 25f01a31e..95d31758e 100644
 71 | --- a/src/server.h
 72 | +++ b/src/server.h
 73 | @@ -2117,6 +2117,7 @@ struct valkeyServer {
 74 |      /* Synchronous replication. */
 75 |      list *clients_waiting_acks; /* Clients waiting in WAIT or WAITAOF. */
 76 |      int get_ack_from_replicas;  /* If true we send REPLCONF GETACK. */
 77 | +    int repl_paused;            /* If true we don't try to connect to master */
 78 |      /* Limits */
 79 |      unsigned int maxclients;                    /* Max number of simultaneous clients */
 80 |      unsigned long long maxmemory;               /* Max number of memory bytes to use */
 81 | @@ -3116,6 +3117,8 @@ void updateFailoverStatus(void);
 82 |  void abortFailover(const char *err);
 83 |  const char *getFailoverStateString(void);
 84 |  sds getReplicaPortString(void);
 85 | +int cancelReplicationHandshake(int reconnect);
 86 | +int replicaIsInHandshakeState(void);
 87 |  int sendCurrentOffsetToReplica(client *replica);
 88 |  void addRdbReplicaToPsyncWait(client *replica);
 89 |  void initClientReplicationData(client *c);
 90 | diff --git a/tests/cluster/tests/99-yandex-cloud-patches.tcl b/tests/cluster/tests/99-yandex-cloud-patches.tcl
 91 | new file mode 100644
 92 | index 000000000..6d0c1007b
 93 | --- /dev/null
 94 | +++ b/tests/cluster/tests/99-yandex-cloud-patches.tcl
 95 | @@ -0,0 +1,48 @@
 96 | +# Test Yandex Cloud patches on cluster
 97 | +
 98 | +source "../tests/includes/init-tests.tcl"
 99 | +
100 | +proc kill_clustered_redis {id} {
101 | +    set pid [get_instance_attrib redis $id pid]
102 | +
103 | +    stop_instance $pid
104 | +    set_instance_attrib redis $id pid -1
105 | +    set_instance_attrib redis $id link you_tried_to_talk_with_killed_instance
106 | +
107 | +    # Remove the PID from the list of pids to kill at exit.
108 | +    set ::pids [lsearch -all -inline -not -exact $::pids $pid]
109 | +}
110 | +
111 | +test "Create a 2 node cluster (1 master and 1 replica)" {
112 | +    create_cluster 1 1
113 | +}
114 | +
115 | +test "Cluster is up" {
116 | +    assert_cluster_state ok
117 | +}
118 | +
119 | +test "Instance #1 synced with the master" {
120 | +    wait_for_condition 1000 50 {
121 | +        [RI 1 master_link_status] eq {up}
122 | +    } else {
123 | +        fail "Instance #1 master link status is not up"
124 | +    }
125 | +}
126 | +
127 | +test "Replication pause on instance #1 works" {
128 | +    assert {[R 1 config set repl-paused yes] eq {OK}}
129 | +    wait_for_condition 1000 50 {
130 | +        [RI 1 master_link_status] eq {down}
131 | +    } else {
132 | +        fail "Instance #1 master link status is not down"
133 | +    }
134 | +}
135 | +
136 | +test "Replication resume on instance #1 works" {
137 | +    assert {[R 1 config set repl-paused no] eq {OK}}
138 | +    wait_for_condition 1000 50 {
139 | +        [RI 1 master_link_status] eq {up}
140 | +    } else {
141 | +        fail "Instance #1 master link status is not up"
142 | +    }
143 | +}
144 | diff --git a/tests/integration/yandex-cloud-patches.tcl b/tests/integration/yandex-cloud-patches.tcl
145 | new file mode 100644
146 | index 000000000..a2c9bb949
147 | --- /dev/null
148 | +++ b/tests/integration/yandex-cloud-patches.tcl
149 | @@ -0,0 +1,56 @@
150 | +start_server {tags {"repl network external:skip"}} {
151 | +    set slave [srv 0 client]
152 | +    set slave_host [srv 0 host]
153 | +    set slave_port [srv 0 port]
154 | +    start_server {} {
155 | +        set master_host [srv 0 host]
156 | +        set master_port [srv 0 port]
157 | +
158 | +        # Start the replication process...
159 | +        $slave replicaof $master_host $master_port
160 | +
161 | +        test {Replication pause works} {
162 | +            wait_for_condition 50 1000 {
163 | +                [lindex [$slave role] 0] eq {slave} &&
164 | +                [string match {*master_link_status:up*} [$slave info replication]]
165 | +            } else {
166 | +                fail "Replication did not start"
167 | +            }
168 | +
169 | +            $slave config set repl-paused yes
170 | +
171 | +            wait_for_condition 50 1000 {
172 | +                [lindex [$slave role] 0] eq {slave} &&
173 | +                [string match {*master_link_status:down*} [$slave info replication]]
174 | +            } else {
175 | +                fail "Replication did not enter paused state"
176 | +            }
177 | +        }
178 | +
179 | +        start_server {} {
180 | +            set cascade_slave [srv 0 client]
181 | +
182 | +            $cascade_slave replicaof $slave_host $slave_port
183 | +
184 | +            test {Cascade replication from paused slave works} {
185 | +                wait_for_condition 50 1000 {
186 | +                    [lindex [$cascade_slave role] 0] eq {slave} &&
187 | +                    [string match {*master_link_status:up*} [$cascade_slave info replication]]
188 | +                } else {
189 | +                    fail "Replication did not start"
190 | +                }
191 | +            }
192 | +        }
193 | +
194 | +        test {Replication resume works} {
195 | +            $slave config set repl-paused no
196 | +
197 | +            wait_for_condition 50 1000 {
198 | +                [lindex [$slave role] 0] eq {slave} &&
199 | +                [string match {*master_link_status:up*} [$slave info replication]]
200 | +            } else {
201 | +                fail "Replication did not start after pause"
202 | +            }
203 | +        }
204 | +    }
205 | +}
206 | -- 
207 | 2.51.1
208 | 
209 | 


--------------------------------------------------------------------------------
/tests/features/06_cluster_lost.feature:
--------------------------------------------------------------------------------
  1 | Feature: Cluster mode survives dcs conn loss
  2 | 
  3 |     Scenario: Cluster mode survives dcs conn loss
  4 |         Given clustered shard is up and running
  5 |         Then valkey host "valkey1" should be master
  6 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
  7 |         And replication on valkey host "valkey2" should run fine within "15" seconds
  8 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
  9 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 10 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 11 |         """
 12 |             ["valkey1","valkey2","valkey3"]
 13 |         """
 14 |         When host "zoo3" is detached from the network
 15 |         And host "zoo2" is detached from the network
 16 |         And host "zoo1" is detached from the network
 17 |         Then valkey host "valkey1" should be master
 18 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 19 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 20 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 21 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 22 |         When I run command on valkey host "valkey1"
 23 |         """
 24 |             SET MYKEY TESTVALUE
 25 |         """
 26 |         Then valkey cmd result should match regexp
 27 |         """
 28 |             OK
 29 |         """
 30 | 
 31 |     Scenario: Cluster mode partitioned master goes offline
 32 |         Given clustered shard is up and running
 33 |         Then valkey host "valkey1" should be master
 34 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 35 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 36 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 37 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 38 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 39 |         """
 40 |             ["valkey1","valkey2","valkey3"]
 41 |         """
 42 |         When host "zoo3" is detached from the network
 43 |         And host "zoo2" is detached from the network
 44 |         And host "zoo1" is detached from the network
 45 |         And host "valkey2" is detached from the network
 46 |         And host "valkey3" is detached from the network
 47 |         Then valkey host "valkey1" should become unavailable within "60" seconds
 48 |         When host "zoo3" is attached to the network
 49 |         And host "zoo2" is attached to the network
 50 |         And host "zoo1" is attached to the network
 51 |         And host "valkey2" is attached to the network
 52 |         And host "valkey3" is attached to the network
 53 |         Then valkey host "valkey1" should become available within "60" seconds
 54 | 
 55 |     Scenario: Cluster mode partitioned replica goes offline
 56 |         Given clustered shard is up and running
 57 |         Then valkey host "valkey1" should be master
 58 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 59 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 60 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 61 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 62 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 63 |         """
 64 |             ["valkey1","valkey2","valkey3"]
 65 |         """
 66 |         When host "zoo3" is detached from the network
 67 |         And host "zoo2" is detached from the network
 68 |         And host "zoo1" is detached from the network
 69 |         And host "valkey1" is detached from the network
 70 |         And host "valkey3" is detached from the network
 71 |         Then valkey host "valkey2" should become unavailable within "60" seconds
 72 |         When host "zoo3" is attached to the network
 73 |         And host "zoo2" is attached to the network
 74 |         And host "zoo1" is attached to the network
 75 |         And host "valkey1" is attached to the network
 76 |         And host "valkey3" is attached to the network
 77 |         Then valkey host "valkey2" should become available within "60" seconds
 78 | 
 79 |     Scenario: Cluster mode partially partitioned manager gives up on manager role
 80 |         Given clustered shard is up and running
 81 |         Then valkey host "valkey1" should be master
 82 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 83 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 84 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 85 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 86 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 87 |         """
 88 |             ["valkey1","valkey2","valkey3"]
 89 |         """
 90 |         When I run command on host "valkey1" with timeout "20" seconds
 91 |         """
 92 |             supervisorctl stop rdsync
 93 |         """
 94 |         Then command return code should be "0"
 95 |         And  zookeeper node "/test/manager" should match regexp within "30" seconds
 96 |         """
 97 |             .*valkey[23].*
 98 |         """
 99 |         When I run command on host "valkey1" with timeout "20" seconds
100 |         """
101 |             supervisorctl start rdsync
102 |         """
103 |         When I get zookeeper node "/test/manager"
104 |         And I save zookeeper query result as "new_manager"
105 |         And port "6379" on host "{{.new_manager.hostname}}" is blocked
106 |         And I wait for "120" seconds
107 |         Then valkey host "valkey1" should be master
108 |         When I run command on host "{{.new_manager.hostname}}"
109 |         """
110 |             grep ERROR /var/log/rdsync.log
111 |         """
112 |         Then command output should match regexp
113 |         """
114 |             .*Giving up on manager role.*
115 |         """
116 |         When I run command on host "{{.new_manager.hostname}}"
117 |         """
118 |             grep INFO /var/log/rdsync.log
119 |         """
120 |         Then command output should match regexp
121 |         """
122 |             .*New manager.*
123 |         """
124 |         When port "6379" on host "{{.new_manager.hostname}}" is unblocked
125 |         Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
126 |         """
127 |             ["valkey1","valkey2","valkey3"]
128 |         """
129 | 
130 |     Scenario: Cluster mode partially partitioned manager gives up on manager role and triggers failover on master
131 |         Given clustered shard is up and running
132 |         Then valkey host "valkey1" should be master
133 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
134 |         And replication on valkey host "valkey2" should run fine within "15" seconds
135 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
136 |         And replication on valkey host "valkey3" should run fine within "15" seconds
137 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
138 |         """
139 |             ["valkey1","valkey2","valkey3"]
140 |         """
141 |         When port "6379" on host "valkey1" is blocked
142 |         And I wait for "240" seconds
143 |         And I run command on host "valkey1"
144 |         """
145 |             grep ERROR /var/log/rdsync.log
146 |         """
147 |         Then command output should match regexp
148 |         """
149 |             .*Giving up on manager role.*
150 |         """
151 |         And zookeeper node "/test/last_switch" should match json within "60" seconds
152 |         """
153 |         {
154 |             "cause": "auto",
155 |             "from": "valkey1",
156 |             "result": {
157 |                 "ok": true
158 |             }
159 |         }
160 |         """
161 |         When I get zookeeper node "/test/master"
162 |         And I save zookeeper query result as "new_master"
163 |         Then valkey host "{{.new_master}}" should be master
164 |         When port "6379" on host "valkey1" is unblocked
165 |         Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
166 |         """
167 |             ["valkey1","valkey2","valkey3"]
168 |         """
169 | 


--------------------------------------------------------------------------------
/tests/features/06_sentinel_lost.feature:
--------------------------------------------------------------------------------
  1 | Feature: Sentinel mode survives dcs conn loss
  2 | 
  3 |     Scenario: Sentinel mode survives dcs conn loss
  4 |         Given sentinel shard is up and running
  5 |         Then valkey host "valkey1" should be master
  6 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
  7 |         And replication on valkey host "valkey2" should run fine within "15" seconds
  8 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
  9 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 10 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 11 |         """
 12 |             ["valkey1","valkey2","valkey3"]
 13 |         """
 14 |         When host "zoo3" is detached from the network
 15 |         And host "zoo2" is detached from the network
 16 |         And host "zoo1" is detached from the network
 17 |         Then valkey host "valkey1" should be master
 18 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 19 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 20 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 21 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 22 |         When I run command on valkey host "valkey1"
 23 |         """
 24 |             SET MYKEY TESTVALUE
 25 |         """
 26 |         Then valkey cmd result should match regexp
 27 |         """
 28 |             OK
 29 |         """
 30 | 
 31 |     Scenario: Sentinel mode partitioned master goes offline
 32 |         Given sentinel shard is up and running
 33 |         Then valkey host "valkey1" should be master
 34 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 35 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 36 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 37 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 38 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 39 |         """
 40 |             ["valkey1","valkey2","valkey3"]
 41 |         """
 42 |         When host "zoo3" is detached from the network
 43 |         And host "zoo2" is detached from the network
 44 |         And host "zoo1" is detached from the network
 45 |         And host "valkey2" is detached from the network
 46 |         And host "valkey3" is detached from the network
 47 |         Then valkey host "valkey1" should become unavailable within "60" seconds
 48 |         When host "zoo3" is attached to the network
 49 |         And host "zoo2" is attached to the network
 50 |         And host "zoo1" is attached to the network
 51 |         And host "valkey2" is attached to the network
 52 |         And host "valkey3" is attached to the network
 53 |         Then valkey host "valkey1" should become available within "60" seconds
 54 | 
 55 |     Scenario: Sentinel mode partitioned replica goes offline
 56 |         Given sentinel shard is up and running
 57 |         Then valkey host "valkey1" should be master
 58 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 59 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 60 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 61 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 62 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 63 |         """
 64 |             ["valkey1","valkey2","valkey3"]
 65 |         """
 66 |         When host "zoo3" is detached from the network
 67 |         And host "zoo2" is detached from the network
 68 |         And host "zoo1" is detached from the network
 69 |         And host "valkey1" is detached from the network
 70 |         And host "valkey3" is detached from the network
 71 |         Then valkey host "valkey2" should become unavailable within "60" seconds
 72 |         When host "zoo3" is attached to the network
 73 |         And host "zoo2" is attached to the network
 74 |         And host "zoo1" is attached to the network
 75 |         And host "valkey1" is attached to the network
 76 |         And host "valkey3" is attached to the network
 77 |         Then valkey host "valkey2" should become available within "60" seconds
 78 | 
 79 |     Scenario: Sentinel mode partially partitioned manager gives up on manager role
 80 |         Given sentinel shard is up and running
 81 |         Then valkey host "valkey1" should be master
 82 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 83 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 84 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 85 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 86 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 87 |         """
 88 |             ["valkey1","valkey2","valkey3"]
 89 |         """
 90 |         When I run command on host "valkey1" with timeout "20" seconds
 91 |         """
 92 |             supervisorctl stop rdsync
 93 |         """
 94 |         Then command return code should be "0"
 95 |         And  zookeeper node "/test/manager" should match regexp within "30" seconds
 96 |         """
 97 |             .*valkey[23].*
 98 |         """
 99 |         When I run command on host "valkey1" with timeout "20" seconds
100 |         """
101 |             supervisorctl start rdsync
102 |         """
103 |         When I get zookeeper node "/test/manager"
104 |         And I save zookeeper query result as "new_manager"
105 |         And port "6379" on host "{{.new_manager.hostname}}" is blocked
106 |         And I wait for "120" seconds
107 |         Then valkey host "valkey1" should be master
108 |         When I run command on host "{{.new_manager.hostname}}"
109 |         """
110 |             grep ERROR /var/log/rdsync.log
111 |         """
112 |         Then command output should match regexp
113 |         """
114 |             .*Giving up on manager role.*
115 |         """
116 |         When I run command on host "{{.new_manager.hostname}}"
117 |         """
118 |             grep INFO /var/log/rdsync.log
119 |         """
120 |         Then command output should match regexp
121 |         """
122 |             .*New manager.*
123 |         """
124 |         When port "6379" on host "{{.new_manager.hostname}}" is unblocked
125 |         Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
126 |         """
127 |             ["valkey1","valkey2","valkey3"]
128 |         """
129 | 
130 |     Scenario: Sentinel mode partially partitioned manager gives up on manager role and triggers failover on master
131 |         Given sentinel shard is up and running
132 |         Then valkey host "valkey1" should be master
133 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
134 |         And replication on valkey host "valkey2" should run fine within "15" seconds
135 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
136 |         And replication on valkey host "valkey3" should run fine within "15" seconds
137 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
138 |         """
139 |             ["valkey1","valkey2","valkey3"]
140 |         """
141 |         When port "6379" on host "valkey1" is blocked
142 |         And I wait for "240" seconds
143 |         And I run command on host "valkey1"
144 |         """
145 |             grep ERROR /var/log/rdsync.log
146 |         """
147 |         Then command output should match regexp
148 |         """
149 |             .*Giving up on manager role.*
150 |         """
151 |         And zookeeper node "/test/last_switch" should match json within "60" seconds
152 |         """
153 |         {
154 |             "cause": "auto",
155 |             "from": "valkey1",
156 |             "result": {
157 |                 "ok": true
158 |             }
159 |         }
160 |         """
161 |         When I get zookeeper node "/test/master"
162 |         And I save zookeeper query result as "new_master"
163 |         Then valkey host "{{.new_master}}" should be master
164 |         When port "6379" on host "valkey1" is unblocked
165 |         Then zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
166 |         """
167 |             ["valkey1","valkey2","valkey3"]
168 |         """
169 | 


--------------------------------------------------------------------------------
/tests/features/07_sentinel_local_repair.feature:
--------------------------------------------------------------------------------
  1 | Feature: Sentinel mode local node repair
  2 | 
  3 |     Scenario: Sentinel mode node overflowed with connections gets reserved connections watchdog triggered
  4 |         Given sentinel shard is up and running
  5 |         Then valkey host "valkey1" should be master
  6 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
  7 |         And replication on valkey host "valkey2" should run fine within "15" seconds
  8 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
  9 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 10 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 11 |         """
 12 |             ["valkey1","valkey2","valkey3"]
 13 |         """
 14 |         When I run command on host "valkey1"
 15 |         """
 16 |             supervisorctl pid valkey
 17 |         """
 18 |         And I save command output as "pid_before_conn_exhaustion"
 19 |         And I run command on host "valkey1"
 20 |         """
 21 |             supervisorctl signal STOP rdsync
 22 |         """
 23 |         And I run command on host "valkey2"
 24 |         """
 25 |             supervisorctl signal STOP rdsync
 26 |         """
 27 |         And I run command on host "valkey3"
 28 |         """
 29 |             supervisorctl signal STOP rdsync
 30 |         """
 31 |         And I run async command on host "valkey1"
 32 |         """
 33 |             bash -c 'for _ in {0..1100}; do nc localhost 6379 & done; sleep infinity'
 34 |         """
 35 |         Then valkey host "valkey1" should become unavailable within "30" seconds
 36 |         When I run command on host "valkey1"
 37 |         """
 38 |             supervisorctl signal CONT rdsync
 39 |         """
 40 |         And I run command on host "valkey2"
 41 |         """
 42 |             supervisorctl signal CONT rdsync
 43 |         """
 44 |         And I run command on host "valkey3"
 45 |         """
 46 |             supervisorctl signal CONT rdsync
 47 |         """
 48 |         Then valkey host "valkey1" should become available within "60" seconds
 49 |         When I run command on host "valkey1"
 50 |         """
 51 |             supervisorctl pid valkey
 52 |         """
 53 |         Then command output should match regexp
 54 |         """
 55 |             {{.pid_before_conn_exhaustion}}
 56 |         """
 57 |         When I run command on host "valkey1"
 58 |         """
 59 |             grep Killing /var/log/rdsync.log
 60 |         """
 61 |         Then command output should match regexp
 62 |         """
 63 |             .*Local node has .* free connections left. Killing all client connections.*
 64 |         """
 65 | 
 66 |     Scenario: Sentinel mode senticache is restarted after OOM
 67 |         Given sentinel shard is up and running
 68 |         Then valkey host "valkey1" should be master
 69 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 70 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 71 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 72 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 73 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 74 |         """
 75 |             ["valkey1","valkey2","valkey3"]
 76 |         """
 77 |         When I run command on host "valkey1"
 78 |         """
 79 |             supervisorctl stop senticache
 80 |         """
 81 |         Then senticache host "valkey1" should have master "valkey1" within "30" seconds
 82 | 
 83 |     Scenario: Busy sentinel mode node gets a SCRIPT KILL
 84 |         Given sentinel shard is up and running
 85 |         Then valkey host "valkey1" should be master
 86 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
 87 |         And replication on valkey host "valkey2" should run fine within "15" seconds
 88 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
 89 |         And replication on valkey host "valkey3" should run fine within "15" seconds
 90 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
 91 |         """
 92 |             ["valkey1","valkey2","valkey3"]
 93 |         """
 94 |         When I run async command on host "valkey1"
 95 |         """
 96 |             valkey-cli -a functestpassword eval 'while true do end' 0
 97 |         """
 98 |         Then valkey host "valkey1" should become available within "60" seconds
 99 | 
100 |     Scenario: Sentinel mode replica is restarted after OOM
101 |         Given sentinel shard is up and running
102 |         Then valkey host "valkey1" should be master
103 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
104 |         And replication on valkey host "valkey2" should run fine within "15" seconds
105 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
106 |         And replication on valkey host "valkey3" should run fine within "15" seconds
107 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
108 |         """
109 |             ["valkey1","valkey2","valkey3"]
110 |         """
111 |         When valkey on host "valkey2" is killed
112 |         And I wait for "300" seconds
113 |         Then valkey host "valkey2" should become available within "120" seconds
114 |         And valkey host "valkey2" should become replica of "valkey1" within "60" seconds
115 |         And replication on valkey host "valkey2" should run fine within "60" seconds
116 | 
117 |     Scenario: Sentinel mode loading replica is not restarted
118 |         Given sentinel shard is up and running
119 |         Then valkey host "valkey1" should be master
120 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
121 |         And replication on valkey host "valkey2" should run fine within "15" seconds
122 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
123 |         And replication on valkey host "valkey3" should run fine within "15" seconds
124 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
125 |         """
126 |             ["valkey1","valkey2","valkey3"]
127 |         """
128 |         When I run command on host "valkey1" with timeout "180" seconds
129 |         """
130 |             valkey-cli -a functestpassword DEBUG populate 10000000 key 100
131 |         """
132 |         And I run command on valkey host "valkey2"
133 |         """
134 |             CONFIG SET key-load-delay 50
135 |         """
136 |         And I run command on valkey host "valkey2"
137 |         """
138 |             CONFIG SET loading-process-events-interval-bytes 1024
139 |         """
140 |         And I run command on valkey host "valkey2"
141 |         """
142 |             CONFIG REWRITE
143 |         """
144 |         And I run async command on host "valkey2"
145 |         """
146 |             supervisorctl restart valkey
147 |         """
148 |         Then valkey host "valkey2" should become unavailable within "30" seconds
149 |         When I run command on host "valkey2"
150 |         """
151 |             supervisorctl pid valkey
152 |         """
153 |         And I save command output as "pid_right_after_restart"
154 |         And I wait for "360" seconds
155 |         And I run command on host "valkey2"
156 |         """
157 |             supervisorctl pid valkey
158 |         """
159 |         Then command output should match regexp
160 |         """
161 |             {{.pid_right_after_restart}}
162 |         """
163 | 
164 |     Scenario: Sentinel mode master is restarted after hanging
165 |         Given sentinel shard is up and running
166 |         Then valkey host "valkey1" should be master
167 |         And valkey host "valkey2" should become replica of "valkey1" within "15" seconds
168 |         And replication on valkey host "valkey2" should run fine within "15" seconds
169 |         And valkey host "valkey3" should become replica of "valkey1" within "15" seconds
170 |         And replication on valkey host "valkey3" should run fine within "15" seconds
171 |         And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
172 |         """
173 |             ["valkey1","valkey2","valkey3"]
174 |         """
175 |         When I run async command on host "valkey1"
176 |         """
177 |             valkey-cli -a functestpassword DEBUG SLEEP 600
178 |         """
179 |         And I wait for "420" seconds
180 |         Then valkey host "valkey1" should become available within "60" seconds
181 | 


--------------------------------------------------------------------------------