├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .golangci.yml ├── LICENSE.txt ├── Makefile ├── README.md ├── change.go ├── ci └── docker-compose.yml ├── consumer_test.go ├── doc.go ├── examples ├── printer │ └── main.go ├── replicator-aws-sns │ ├── go.mod │ ├── go.sum │ ├── main.go │ ├── replicator_test.go │ └── utils.go ├── replicator-gcp-pub │ ├── go.mod │ ├── go.sum │ ├── main.go │ ├── replicator_test.go │ └── utils.go ├── replicator │ ├── .gitignore │ ├── main.go │ ├── replicator_test.go │ └── utils.go └── simple-printer │ └── main.go ├── go.mod ├── go.sum ├── logger.go ├── progress.go ├── reader.go ├── stream_batch.go ├── testutils └── utils.go ├── topology.go ├── types_test.go ├── utils.go └── utils_test.go /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build-and-test: 11 | name: Build and test 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: git checkout 15 | uses: actions/checkout@v4 16 | 17 | - name: set up Go 18 | uses: actions/setup-go@v5 19 | with: 20 | go-version: ^1.18 21 | cache-dependency-path: | 22 | go.sum 23 | 24 | - name: Build 25 | run: make build 26 | 27 | - name: Run linter 28 | run: make lint 29 | 30 | - name: Start test environment 31 | run: make link-existing-docker-compose && make start-docker-environment 32 | 33 | - name: Run tests 34 | run: make test 35 | 36 | - name: Stop test environment 37 | run: make stop-docker-environment -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | bin/* -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | issues: 2 | exclude-rules: 3 | - path: examples/* 4 | linters: 5 | - forbidigo 6 | linters: 7 | disable-all: true 8 | enable: 9 | - errcheck 10 | - gocritic 11 | - gofumpt 12 | - goheader 13 | - goimports 14 | - gosimple 15 | - govet 16 | - ineffassign 17 | #- lll 18 | - misspell 19 | - predeclared 20 | - staticcheck 21 | - thelper 22 | - tparallel 23 | - typecheck 24 | - unused 25 | - forbidigo 26 | run: 27 | allow-parallel-runners: true 28 | modules-download-mode: readonly 29 | tests: true 30 | go: '1.14' 31 | linters-settings: 32 | govet: 33 | enable-all: true 34 | disable: 35 | - shadow 36 | - fieldalignment 37 | gofumpt: 38 | extra-rules: true 39 | goimports: 40 | local-prefixes: github.com/scylladb/scylla-cdc-go 41 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GOVERSION ?= 1.22.5 2 | GOOS := $(shell uname | tr '[:upper:]' '[:lower:]') 3 | GOARCH := $(shell go env GOARCH) 4 | GOPACKAGES := $(shell go list -tags="e2e,integration" ./...) 5 | GOBUILDPACKAGES := $(shell go list ./...) 6 | 7 | GIT_COMMIT := $(shell git rev-list -1 HEAD) 8 | TAG_VERSION := $(shell git describe --tags --abbrev=0) 9 | DATE := $(shell date -u) 10 | MAKEFILE_PATH := $(abspath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) 11 | 12 | ifndef SCYLLA_SUBNET 13 | export SCYLLA_SUBNET := 10.254.254.0/24 14 | endif 15 | ifndef SCYLLA_SRC_URI 16 | export SCYLLA_SRC_URI := 10.254.254.100 17 | endif 18 | ifndef SCYLLA_DST_URI 19 | export SCYLLA_DST_URI := 10.254.254.200 20 | endif 21 | ifndef SCYLLA_IMAGE 22 | export SCYLLA_IMAGE := scylladb/scylla:6.0.2 23 | endif 24 | 25 | ifndef GOBIN 26 | export GOBIN := $(MAKEFILE_PATH)/bin 27 | endif 28 | 29 | define dl_bin 30 | @[ -d "$(GOBIN)" ] || mkdir -p "$(GOBIN)"; \ 31 | if [ -L "$(GOBIN)/$(1)" ] && [ -e "$(GOBIN)/$(1)" ]; then \ 32 | echo "$(GOBIN)/$(1) is already installed."; \ 33 | exit 0; \ 34 | fi; \ 35 | if $(GOBIN)/$(1) --version 2>/dev/null | grep "$(2)" >/dev/null; then \ 36 | echo "$(GOBIN)/$(1) is already installed."; \ 37 | exit 0; \ 38 | fi; \ 39 | echo "$(GOBIN)/$(1) is not found, downloading."; \ 40 | rm -f "$(GOBIN)/$(1)" >/dev/null 2>&1 \ 41 | echo "Downloading $(GOBIN)/$(1)"; \ 42 | curl --progress-bar -L $(3) --output "$(GOBIN)/$(1)"; \ 43 | chmod +x "$(GOBIN)/$(1)"; 44 | endef 45 | 46 | define dl_tgz 47 | @if [ ! -f "$(GOBIN)/$(1)" ]; then \ 48 | echo "Downloading $(GOBIN)/$(1)"; \ 49 | curl --progress-bar -L $(2) | tar zxf - --wildcards --strip 1 -C $(GOBIN) '*/$(1)'; \ 50 | chmod +x "$(GOBIN)/$(1)"; \ 51 | fi 52 | endef 53 | 54 | define dl_tgz 55 | @[ -d "$(GOBIN)" ] || mkdir -p "$(GOBIN)"; \ 56 | if [ -L "$(GOBIN)/$(1)" ] && [ -e "$(GOBIN)/$(1)" ]; then \ 57 | echo "$(GOBIN)/$(1) is already installed."; \ 58 | exit 0; \ 59 | fi; \ 60 | if $(GOBIN)/$(1) --version 2>/dev/null | grep "$(2)" >/dev/null; then \ 61 | echo "$(GOBIN)/$(1) is already installed."; \ 62 | exit 0; \ 63 | fi; \ 64 | echo "$(GOBIN)/$(1) is not found, downloading."; \ 65 | rm -f "$(GOBIN)/$(1)" >/dev/null 2>&1 \ 66 | echo "Downloading $(GOBIN)/$(1)"; \ 67 | curl --progress-bar -L $(3) | tar zxf - --wildcards --strip 1 -C $(GOBIN) '*/$(1)'; \ 68 | chmod +x "$(GOBIN)/$(1)"; 69 | endef 70 | 71 | .PHONY: tune-aio-max-nr 72 | tune-aio-max-nr: 73 | @bash -c '[[ "2097152" -ge "$(cat /proc/sys/fs/aio-max-nr)" ]] && sudo sh -c "echo 2097152 >> /proc/sys/fs/aio-max-nr"' 74 | 75 | .PHONY: start-docker-environment 76 | start-docker-environment: install-docker-compose tune-aio-max-nr 77 | $(GOBIN)/docker-compose -f ./ci/docker-compose.yml up -d 78 | until $(GOBIN)/docker-compose -f ./ci/docker-compose.yml exec -T source_node cqlsh -e "select * from system.local" ; do sleep 1; done 79 | until $(GOBIN)/docker-compose -f ./ci/docker-compose.yml exec -T destination_node cqlsh -e "select * from system.local" ; do sleep 1; done 80 | 81 | .PHONY: stop-docker-environment 82 | stop-docker-environment: install-docker-compose 83 | $(GOBIN)/docker-compose -f ./ci/docker-compose.yml kill 84 | 85 | .PHONY: link-existing-docker-compose 86 | link-existing-docker-compose: 87 | [ -d "$(GOBIN)" ] || mkdir -p "$(GOBIN)" 88 | whereis docker-compose | awk '{print $$2}' | xargs -I {} ln -s {} $(GOBIN)/docker-compose 2>/dev/null || true 89 | 90 | .PHONY: install-docker-compose 91 | install-docker-compose: DOCKER_COMPOSE_VERSION = 2.29.2 92 | install-docker-compose: Makefile 93 | ifeq ($(GOARCH),arm64) 94 | $(call dl_bin,docker-compose,${DOCKER_COMPOSE_VERSION},https://github.com/docker/compose/releases/download/v$(DOCKER_COMPOSE_VERSION)/docker-compose-$(GOOS)-aarch64) 95 | else ifeq ($(GOARCH),amd64) 96 | $(call dl_bin,docker-compose,${DOCKER_COMPOSE_VERSION},https://github.com/docker/compose/releases/download/v$(DOCKER_COMPOSE_VERSION)/docker-compose-$(GOOS)-x86_64) 97 | else 98 | @printf 'Unknown architecture "%s"\n', "$(GOARCH)" \ 99 | @exit 69 100 | endif 101 | 102 | 103 | install-golangci-lint: GOLANGCI_VERSION = 1.60.1 104 | install-golangci-lint: Makefile 105 | ifeq ($(GOARCH),arm64) 106 | $(call dl_tgz,golangci-lint,${GOLANGCI_VERSION},https://github.com/golangci/golangci-lint/releases/download/v$(GOLANGCI_VERSION)/golangci-lint-$(GOLANGCI_VERSION)-$(GOOS)-arm64.tar.gz) 107 | else ifeq ($(GOARCH),amd64) 108 | $(call dl_tgz,golangci-lint,${GOLANGCI_VERSION},https://github.com/golangci/golangci-lint/releases/download/v$(GOLANGCI_VERSION)/golangci-lint-$(GOLANGCI_VERSION)-$(GOOS)-amd64.tar.gz) 109 | else 110 | @printf 'Unknown architecture "%s"\n', "$(GOARCH)" \ 111 | @exit 69 112 | endif 113 | 114 | .PHONY: test 115 | test: install-docker-compose start-docker-environment 116 | @echo "Running tests" 117 | @go test -v ./... 118 | 119 | .PHONY: build 120 | build: 121 | @echo "Building" 122 | @go build -v ./... 123 | 124 | .PHONY: lint 125 | lint: install-golangci-lint 126 | go list ./... | sed -e 's/github.com\/scylladb\/scylla-cdc-go/./g' | \ 127 | xargs $(GOBIN)/golangci-lint run --timeout=5m 128 | 129 | .PHONY: fix-lint 130 | fix-lint: install-golangci-lint 131 | go list ./... | sed -e 's/github.com\/scylladb\/scylla-cdc-go/./g' | \ 132 | xargs $(GOBIN)/golangci-lint run --timeout=5m --fix 133 | 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scylla-cdc-go 2 | 3 | Package scyllacdc is a library that helps develop applications that react 4 | to changes from Scylla's CDC. 5 | 6 | It is recommended to get familiar with the Scylla CDC documentation first 7 | in order to understand the concepts used in the documentation of scyllacdc: 8 | https://docs.scylladb.com/using-scylla/cdc/ 9 | 10 | ## Documentation 11 | 12 | For an explanation how to use the library, please look at the [godoc documenation](https://godoc.org/github.com/scylladb/scylla-cdc-go). 13 | 14 | This repository also includes two [example programs](examples). 15 | -------------------------------------------------------------------------------- /ci/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | services: 4 | source_node: 5 | image: scylladb/scylla 6 | command: --seeds ${SCYLLA_SRC_URI} --skip-wait-for-gossip-to-settle 0 7 | networks: 8 | public: 9 | ipv4_address: ${SCYLLA_SRC_URI} 10 | destination_node: 11 | image: scylladb/scylla 12 | command: --seeds ${SCYLLA_DST_URI} --skip-wait-for-gossip-to-settle 0 13 | networks: 14 | public: 15 | ipv4_address: ${SCYLLA_DST_URI} 16 | 17 | networks: 18 | public: 19 | driver: bridge 20 | ipam: 21 | driver: default 22 | config: 23 | - subnet: ${SCYLLA_SUBNET} 24 | -------------------------------------------------------------------------------- /consumer_test.go: -------------------------------------------------------------------------------- 1 | package scyllacdc_test 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "os" 7 | "sync" 8 | "testing" 9 | "time" 10 | 11 | "github.com/gocql/gocql" 12 | 13 | "github.com/scylladb/scylla-cdc-go/testutils" 14 | 15 | scyllacdc "github.com/scylladb/scylla-cdc-go" 16 | ) 17 | 18 | type recordingConsumer struct { 19 | mu *sync.Mutex 20 | emptyTimestamps []gocql.UUID 21 | } 22 | 23 | func (rc *recordingConsumer) CreateChangeConsumer(_ context.Context, _ scyllacdc.CreateChangeConsumerInput) (scyllacdc.ChangeConsumer, error) { 24 | return rc, nil 25 | } 26 | 27 | func (rc *recordingConsumer) Consume(ctx context.Context, change scyllacdc.Change) error { 28 | return nil 29 | } 30 | 31 | func (rc *recordingConsumer) End() error { 32 | return nil 33 | } 34 | 35 | func (rc *recordingConsumer) Empty(ctx context.Context, ackTime gocql.UUID) error { 36 | rc.mu.Lock() 37 | rc.emptyTimestamps = append(rc.emptyTimestamps, ackTime) 38 | rc.mu.Unlock() 39 | return nil 40 | } 41 | 42 | func (rc *recordingConsumer) GetTimestamps() []gocql.UUID { 43 | rc.mu.Lock() 44 | ret := append([]gocql.UUID{}, rc.emptyTimestamps...) 45 | rc.mu.Unlock() 46 | return ret 47 | } 48 | 49 | func TestConsumerCallsEmptyCallback(t *testing.T) { 50 | consumer := &recordingConsumer{mu: &sync.Mutex{}} 51 | 52 | adv := scyllacdc.AdvancedReaderConfig{ 53 | ChangeAgeLimit: -time.Millisecond, 54 | PostNonEmptyQueryDelay: 100 * time.Millisecond, 55 | PostEmptyQueryDelay: 100 * time.Millisecond, 56 | PostFailedQueryDelay: 100 * time.Millisecond, 57 | QueryTimeWindowSize: 100 * time.Millisecond, 58 | ConfidenceWindowSize: time.Millisecond, 59 | } 60 | 61 | // Configure a session 62 | address := testutils.GetSourceClusterContactPoint() 63 | keyspaceName := testutils.CreateUniqueKeyspace(t, address) 64 | cluster := gocql.NewCluster(address) 65 | cluster.Keyspace = keyspaceName 66 | cluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 67 | session, err := cluster.CreateSession() 68 | if err != nil { 69 | t.Fatal(err) 70 | } 71 | defer session.Close() 72 | 73 | execQuery(t, session, "CREATE TABLE tbl (pk int PRIMARY KEY, v int) WITH cdc = {'enabled': true}") 74 | 75 | cfg := &scyllacdc.ReaderConfig{ 76 | Session: session, 77 | ChangeConsumerFactory: consumer, 78 | TableNames: []string{keyspaceName + ".tbl"}, 79 | Advanced: adv, 80 | Logger: log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile), 81 | } 82 | 83 | startTime := time.Now() 84 | 85 | reader, err := scyllacdc.NewReader(context.Background(), cfg) 86 | if err != nil { 87 | t.Fatal(err) 88 | } 89 | 90 | errC := make(chan error) 91 | go func() { errC <- reader.Run(context.Background()) }() 92 | 93 | time.Sleep(time.Second) 94 | 95 | endTime := startTime.Add(5 * time.Second) 96 | reader.StopAt(endTime) 97 | if err := <-errC; err != nil { 98 | t.Fatal(err) 99 | } 100 | 101 | // All timestamps should be roughly between startTime and endTime 102 | // To adjust for different clock on the scylla node, allow the time 103 | // to exceed one second 104 | acceptableStart := startTime.Add(-time.Second) 105 | acceptableEnd := endTime.Add(time.Second) 106 | 107 | timestamps := consumer.GetTimestamps() 108 | 109 | if len(timestamps) == 0 { 110 | t.Fatal("no empty event timestamps recorded") 111 | } 112 | 113 | for _, tstp := range timestamps { 114 | early := !acceptableStart.Before(tstp.Time()) 115 | late := !tstp.Time().Before(acceptableEnd) 116 | if early || late { 117 | t.Errorf("timestamp of empty event %s not in expected range %s, %s", 118 | tstp.Time(), acceptableStart, acceptableEnd) 119 | } 120 | } 121 | } 122 | 123 | func TestConsumerResumesWithTableBackedProgressReporter(t *testing.T) { 124 | // Makes sure that the table backed progress consumer is able to resume correctly 125 | // when StartGeneration was called, but no SaveProgress has been called 126 | // so far. 127 | 128 | // Configure a session 129 | address := testutils.GetSourceClusterContactPoint() 130 | keyspaceName := testutils.CreateUniqueKeyspace(t, address) 131 | cluster := gocql.NewCluster(address) 132 | cluster.Keyspace = keyspaceName 133 | cluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 134 | session, err := cluster.CreateSession() 135 | if err != nil { 136 | t.Fatal(err) 137 | } 138 | defer session.Close() 139 | 140 | execQuery(t, session, "CREATE TABLE tbl (pk int PRIMARY KEY, v int) WITH cdc = {'enabled': true}") 141 | 142 | runWithProgressReporter := func(consumerFactory scyllacdc.ChangeConsumerFactory, endTime time.Time, adv scyllacdc.AdvancedReaderConfig) { 143 | progressManager, err := scyllacdc.NewTableBackedProgressManager(session, "progress", "test") 144 | if err != nil { 145 | t.Fatalf("failed to create progress manager: %v", err) 146 | } 147 | 148 | cfg := &scyllacdc.ReaderConfig{ 149 | Session: session, 150 | ChangeConsumerFactory: consumerFactory, 151 | TableNames: []string{keyspaceName + ".tbl"}, 152 | ProgressManager: progressManager, 153 | Advanced: adv, 154 | Logger: log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile), 155 | } 156 | 157 | reader, err := scyllacdc.NewReader(context.Background(), cfg) 158 | if err != nil { 159 | t.Fatal(err) 160 | } 161 | 162 | errC := make(chan error) 163 | go func() { errC <- reader.Run(context.Background()) }() 164 | 165 | time.Sleep(500 * time.Millisecond) 166 | 167 | reader.StopAt(endTime) 168 | if err := <-errC; err != nil { 169 | t.Fatal(err) 170 | } 171 | } 172 | 173 | startTime := time.Now() 174 | 175 | adv := scyllacdc.AdvancedReaderConfig{ 176 | PostNonEmptyQueryDelay: 100 * time.Millisecond, 177 | PostEmptyQueryDelay: 100 * time.Millisecond, 178 | PostFailedQueryDelay: 100 * time.Millisecond, 179 | QueryTimeWindowSize: 100 * time.Millisecond, 180 | ConfidenceWindowSize: time.Millisecond, 181 | } 182 | 183 | // Create and start the first consumer which will not call SaveProgress 184 | // Start reading from ~now and stop after two seconds 185 | // We should record that we started now but recorded no progress for 186 | // any stream 187 | adv.ChangeAgeLimit = -time.Millisecond 188 | consumer := &recordingConsumer{mu: &sync.Mutex{}} 189 | runWithProgressReporter(consumer, startTime.Add(2*time.Second), adv) 190 | 191 | // Create and start the second consumer 192 | // The progress manager should resume reading from the time 193 | // when the previous run was started, not 1 minute ago 194 | adv.ChangeAgeLimit = 10 * time.Second 195 | consumer = &recordingConsumer{mu: &sync.Mutex{}} 196 | runWithProgressReporter(consumer, startTime.Add(4*time.Second), adv) 197 | 198 | // All timestamps should be roughly between startTime and endTime 199 | // To adjust for different clock on the scylla node, allow the time 200 | // to exceed one second 201 | acceptableStart := startTime.Add(-time.Second) 202 | acceptableEnd := startTime.Add(4 * time.Second).Add(time.Second) 203 | 204 | timestamps := consumer.GetTimestamps() 205 | 206 | if len(timestamps) == 0 { 207 | t.Fatal("no empty event timestamps recorded") 208 | } 209 | 210 | for _, tstp := range timestamps { 211 | early := !acceptableStart.Before(tstp.Time()) 212 | late := !tstp.Time().Before(acceptableEnd) 213 | if early || late { 214 | t.Errorf("timestamp of empty event %s not in expected range %s, %s", 215 | tstp.Time(), acceptableStart, acceptableEnd) 216 | } 217 | } 218 | } 219 | 220 | func TestConsumerHonorsTableTTL(t *testing.T) { 221 | // Make sure that the library doesn't attempt to read earlier than 222 | // the table TTL 223 | 224 | // Configure a session 225 | address := testutils.GetSourceClusterContactPoint() 226 | keyspaceName := testutils.CreateUniqueKeyspace(t, address) 227 | cluster := gocql.NewCluster(address) 228 | cluster.Keyspace = keyspaceName 229 | cluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 230 | session, err := cluster.CreateSession() 231 | if err != nil { 232 | t.Fatal(err) 233 | } 234 | defer session.Close() 235 | 236 | // Create a table with a very short TTL 237 | execQuery(t, session, "CREATE TABLE tbl (pk int PRIMARY KEY, v int) WITH cdc = {'enabled': true, 'ttl': 2}") 238 | 239 | startTime := time.Now() 240 | endTime := startTime.Add(2 * time.Second) 241 | 242 | adv := scyllacdc.AdvancedReaderConfig{ 243 | PostNonEmptyQueryDelay: 100 * time.Millisecond, 244 | PostEmptyQueryDelay: 100 * time.Millisecond, 245 | PostFailedQueryDelay: 100 * time.Millisecond, 246 | QueryTimeWindowSize: 500 * time.Millisecond, 247 | ConfidenceWindowSize: time.Millisecond, 248 | ChangeAgeLimit: time.Minute, // should be overridden by the TTL 249 | } 250 | 251 | consumer := &recordingConsumer{mu: &sync.Mutex{}} 252 | 253 | cfg := &scyllacdc.ReaderConfig{ 254 | Session: session, 255 | ChangeConsumerFactory: consumer, 256 | TableNames: []string{keyspaceName + ".tbl"}, 257 | Advanced: adv, 258 | Logger: log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile), 259 | } 260 | 261 | reader, err := scyllacdc.NewReader(context.Background(), cfg) 262 | if err != nil { 263 | t.Fatal(err) 264 | } 265 | 266 | errC := make(chan error) 267 | go func() { errC <- reader.Run(context.Background()) }() 268 | 269 | time.Sleep(500 * time.Millisecond) 270 | 271 | reader.StopAt(endTime) 272 | if err := <-errC; err != nil { 273 | t.Fatal(err) 274 | } 275 | 276 | // All timestamps should be roughly between startTime-TTL and endTime 277 | // To adjust for different clock on the scylla node, allow the time 278 | // to exceed one second 279 | acceptableStart := startTime.Add(-time.Second).Add(-2 * time.Second) 280 | acceptableEnd := startTime.Add(2 * time.Second).Add(time.Second) 281 | 282 | timestamps := consumer.GetTimestamps() 283 | 284 | if len(timestamps) == 0 { 285 | t.Fatal("no empty event timestamps recorded") 286 | } 287 | 288 | for _, tstp := range timestamps { 289 | early := !acceptableStart.Before(tstp.Time()) 290 | late := !tstp.Time().Before(acceptableEnd) 291 | if early || late { 292 | t.Errorf("timestamp of empty event %s not in expected range %s, %s", 293 | tstp.Time(), acceptableStart, acceptableEnd) 294 | } 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package scyllacdc is a library that helps develop applications that react 3 | to changes from Scylla's CDC. 4 | 5 | It is recommended to get familiar with the Scylla CDC documentation first 6 | in order to understand the concepts used in the documentation of scyllacdc: 7 | https://docs.scylladb.com/using-scylla/cdc/ 8 | 9 | # Overview 10 | 11 | The library hides the complexity of reading from CDC log stemming from 12 | the need for polling for changes and handling topology changes. It reads 13 | changes from CDC logs of selected tables and propagates them to instances 14 | of ChangeConsumer - which is an interface that is meant to be implemented 15 | by the user. 16 | 17 | # Getting started 18 | 19 | To start working with the library, you first need to implement your own 20 | logic for consuming changes. The simplest way to do it is to define 21 | a ChangeConsumerFunc which will be called for each change from the CDC log. 22 | For example: 23 | 24 | func printerConsumer(ctx context.Context, tableName string, c scyllacdc.Change) error { 25 | fmt.Printf("[%s] %#v\n", tableName, c) 26 | } 27 | 28 | For any use case more complicated than above, you will need to define 29 | a ChangeConsumer and a ChangeConsumerFactory: 30 | 31 | type myConsumer struct { 32 | id int 33 | tableName string 34 | } 35 | 36 | func (mc *myConsumer) Consume(ctx context.Context, change scyllacdc.Change) error { 37 | fmt.Printf("[%d] [%s] %#v\n", mc.id, mc.tableName, change) 38 | return nil 39 | } 40 | 41 | func (mc *myConsumer) End() error { 42 | return nil 43 | } 44 | 45 | type myFactory struct { 46 | nextID int 47 | } 48 | 49 | func (f *myFactory) CreateChangeConsumer(ctx context.Context, input scyllacdc.CreateChangeConsumerInput) (scyllacdc.ChangeConsumer, error) { 50 | f.nextID++ 51 | return &myConsumer{ 52 | id: f.nextID-1, 53 | tableName: input.TableName, 54 | }, nil 55 | } 56 | 57 | Next, you need to create and run a scyllacdc.Reader object: 58 | 59 | func main() { 60 | cluster := gocql.NewCluster("127.0.0.1") 61 | cluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 62 | session, err := cluster.CreateSession() 63 | if err != nil { 64 | log.Fatal(err) 65 | } 66 | defer session.Close() 67 | 68 | cfg := &scyllacdc.ReaderConfig{ 69 | Session: session, 70 | TableNames: []string{"my_keyspace.my_table"}, 71 | ChangeConsumerFactory: scyllacdc.MakeChangeConsumerFactoryFromFunc(printerConsumer), 72 | // The above can be changed to: 73 | // ChangeConsumerFactory: &myFactory{}, 74 | } 75 | 76 | reader, err := scyllacdc.NewReader(context.Background(), cfg) 77 | if err != nil { 78 | log.Fatal(err) 79 | } 80 | 81 | // React to Ctrl+C signal, and stop gracefully after the first signal 82 | // Second signal exits the process 83 | signalC := make(chan os.Signal) 84 | go func() { 85 | <-signalC 86 | reader.Stop() 87 | 88 | <-signalC 89 | os.Exit(1) 90 | }() 91 | signal.Notify(signalC, os.Interrupt) 92 | 93 | if err := reader.Run(context.Background()); err != nil { 94 | log.Fatal(err) 95 | } 96 | } 97 | 98 | # Saving progress 99 | 100 | The library supports saving progress and restoring from the last saved position. 101 | To enable it, you need to do two things: 102 | 103 | First, you need to modify your consumer to regularly save progress. The consumer 104 | receives a *scyllacdc.ProgressReporter object which can be used to save progress 105 | at any point in the lifetime of the consumer. 106 | 107 | The library itself doesn't regularly save progress - it only does it by itself 108 | when switching to the next CDC generation. Therefore, the consumer is 109 | responsible for saving the progress regularly. 110 | 111 | Example: 112 | 113 | type myConsumer struct { 114 | // PeriodicProgressReporter is a wrapper around ProgressReporter 115 | // which rate-limits saving the progress 116 | reporter *scyllacdc.PeriodicProgressReporter 117 | } 118 | 119 | func (mc *myConsumer) Consume(ctx context.Context, change scyllacdc.Change) error { 120 | // ... do work ... 121 | 122 | mc.reporter.Update(change.Time) 123 | return nil 124 | } 125 | 126 | func (mc *myConsumer) End() error { 127 | _ = mc.reporter.SaveAndStop(context.Background()) 128 | return nil 129 | } 130 | 131 | type myFactory struct { 132 | session *gocql.Session 133 | } 134 | 135 | func (f *myFactory) CreateChangeConsumer(ctx context.Context, input scyllacdc.CreateChangeConsumerInput) (ChangeConsumer, error) 136 | reporter := scyllacdc.NewPeriodicProgressReporter(f.session, time.Minute, input.ProgressReporter) 137 | reporter.Start(ctx) 138 | return &myConsumer{reporter: reporter}, nil 139 | } 140 | 141 | Then, you need to specify an appropriate ProgressManager in the configuration. 142 | ProgressManager represents a mechanism of saving and restoring progress. You can 143 | use the provided implementations (TableBackedProgressManager), or implement 144 | it yourself. 145 | 146 | In the main function: 147 | 148 | cfg.ProgressReporter = scyllacdc.NewTableBackedProgressManager("my_keyspace.progress_table", "my_application_name") 149 | 150 | # Processing changes 151 | 152 | Data from the CDC log is supplied to the ChangeConsumer through Change objects, 153 | which can contain multiple ChangeRow objects. A single ChangeRow corresponds 154 | to a single, full (all columns included) row from the CDC log. 155 | 156 | func (mc *myConsumer) Consume(ctx context.Background, change scyllacdc.Change) error { 157 | for _, changeRow := range change.Deltas { 158 | // You can access CDC columns directly via 159 | // GetValue, IsDeleted, GetDeletedElements 160 | rawValue, _ := changeRow.GetValue("col_int") 161 | intValue := rawValue.(*int) 162 | isDeleted, _ := changeRow.IsDeleted("col_int") 163 | if isDeleted { 164 | fmt.Println("Column col_int was set to null") 165 | } else if intValue != nil { 166 | fmt.Printf("Column col_int was set to %d\n", *intValue) 167 | } 168 | 169 | // You can also use convenience functions: 170 | // GetAtomicChange, GetListChange, GetUDTChange, etc. 171 | atomicChange := changeRow.GetAtomicChange("col_text") 172 | strValue := atomicChange.Value.(*string) 173 | if atomicChange.IsDeleted { 174 | fmt.Println("Column col_text was deleted") 175 | } else if strValue != nil { 176 | fmt.Printf("Column col_text was set to %s\n", *strValue) 177 | } 178 | } 179 | 180 | return nil 181 | } 182 | */ 183 | package scyllacdc 184 | -------------------------------------------------------------------------------- /examples/printer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "os" 9 | "os/signal" 10 | 11 | "github.com/gocql/gocql" 12 | 13 | scyllacdc "github.com/scylladb/scylla-cdc-go" 14 | ) 15 | 16 | func main() { 17 | var ( 18 | keyspace string 19 | table string 20 | source string 21 | datacenter string 22 | ) 23 | 24 | flag.StringVar(&keyspace, "keyspace", "", "keyspace name") 25 | flag.StringVar(&table, "table", "", "table name") 26 | flag.StringVar(&source, "source", "127.0.0.1", "address of a node in the cluster") 27 | flag.StringVar(&datacenter, "datacenter", "", "target datacenter") 28 | flag.Parse() 29 | 30 | if err := run(context.Background(), source, datacenter, keyspace, table); err != nil { 31 | log.Fatal(err) 32 | } 33 | } 34 | 35 | func run(ctx context.Context, source, datacenter, keyspace, table string) error { 36 | cluster := gocql.NewCluster(source) 37 | if datacenter == "" { 38 | cluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 39 | } else { 40 | cluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.DCAwareRoundRobinPolicy(datacenter)) 41 | } 42 | session, err := cluster.CreateSession() 43 | if err != nil { 44 | return err 45 | } 46 | defer session.Close() 47 | 48 | cfg := &scyllacdc.ReaderConfig{ 49 | Session: session, 50 | ChangeConsumerFactory: changeConsumerFactory, 51 | TableNames: []string{keyspace + "." + table}, 52 | Logger: log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile), 53 | } 54 | 55 | reader, err := scyllacdc.NewReader(ctx, cfg) 56 | if err != nil { 57 | return err 58 | } 59 | 60 | // React to Ctrl+C signal, and stop gracefully after the first signal 61 | // Second signal exits the process 62 | signalC := make(chan os.Signal, 2) 63 | go func() { 64 | <-signalC 65 | reader.Stop() 66 | 67 | <-signalC 68 | os.Exit(1) 69 | }() 70 | signal.Notify(signalC, os.Interrupt) 71 | 72 | return reader.Run(ctx) 73 | } 74 | 75 | func printerConsumer(ctx context.Context, tableName string, c scyllacdc.Change) error { 76 | fmt.Printf("[%s %s]:\n", c.StreamID, c.Time.String()) 77 | if len(c.PreImage) > 0 { 78 | fmt.Println(" PREIMAGE:") 79 | for _, r := range c.PreImage { 80 | fmt.Printf(" %s\n", r) 81 | } 82 | } 83 | if len(c.Delta) > 0 { 84 | fmt.Println(" DELTA:") 85 | for _, r := range c.Delta { 86 | fmt.Printf(" %s\n", r) 87 | } 88 | } 89 | if len(c.PostImage) > 0 { 90 | fmt.Println(" POSTIMAGE:") 91 | for _, r := range c.PostImage { 92 | fmt.Printf(" %s\n", r) 93 | } 94 | } 95 | fmt.Println() 96 | 97 | return nil 98 | } 99 | 100 | var changeConsumerFactory = scyllacdc.MakeChangeConsumerFactoryFromFunc(printerConsumer) 101 | -------------------------------------------------------------------------------- /examples/replicator-aws-sns/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/scylladb/replicator-was-sns 2 | 3 | go 1.22 4 | 5 | require ( 6 | github.com/aws/aws-sdk-go-v2 v1.32.6 7 | github.com/aws/aws-sdk-go-v2/config v1.28.6 8 | github.com/aws/aws-sdk-go-v2/service/sns v1.33.7 9 | github.com/gocql/gocql v0.0.0-20201215165327-e49edf966d90 10 | github.com/scylladb/scylla-cdc-go v0.0.0-20201215165327-e49edf966d90 11 | ) 12 | 13 | require ( 14 | github.com/aws/aws-sdk-go-v2/credentials v1.17.47 // indirect 15 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 // indirect 16 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25 // indirect 17 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25 // indirect 18 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect 19 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 // indirect 20 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6 // indirect 21 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.7 // indirect 22 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6 // indirect 23 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.2 // indirect 24 | github.com/aws/smithy-go v1.22.1 // indirect 25 | github.com/golang/snappy v0.0.3 // indirect 26 | github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect 27 | golang.org/x/sync v0.8.0 // indirect 28 | gopkg.in/inf.v0 v0.9.1 // indirect 29 | ) 30 | 31 | replace ( 32 | github.com/gocql/gocql => github.com/scylladb/gocql v1.14.4 33 | github.com/scylladb/scylla-cdc-go => ../../ 34 | ) 35 | -------------------------------------------------------------------------------- /examples/replicator-aws-sns/go.sum: -------------------------------------------------------------------------------- 1 | github.com/aws/aws-sdk-go-v2 v1.32.6 h1:7BokKRgRPuGmKkFMhEg/jSul+tB9VvXhcViILtfG8b4= 2 | github.com/aws/aws-sdk-go-v2 v1.32.6/go.mod h1:P5WJBrYqqbWVaOxgH0X/FYYD47/nooaPOZPlQdmiN2U= 3 | github.com/aws/aws-sdk-go-v2/config v1.28.6 h1:D89IKtGrs/I3QXOLNTH93NJYtDhm8SYa9Q5CsPShmyo= 4 | github.com/aws/aws-sdk-go-v2/config v1.28.6/go.mod h1:GDzxJ5wyyFSCoLkS+UhGB0dArhb9mI+Co4dHtoTxbko= 5 | github.com/aws/aws-sdk-go-v2/credentials v1.17.47 h1:48bA+3/fCdi2yAwVt+3COvmatZ6jUDNkDTIsqDiMUdw= 6 | github.com/aws/aws-sdk-go-v2/credentials v1.17.47/go.mod h1:+KdckOejLW3Ks3b0E3b5rHsr2f9yuORBum0WPnE5o5w= 7 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 h1:AmoU1pziydclFT/xRV+xXE/Vb8fttJCLRPv8oAkprc0= 8 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21/go.mod h1:AjUdLYe4Tgs6kpH4Bv7uMZo7pottoyHMn4eTcIcneaY= 9 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25 h1:s/fF4+yDQDoElYhfIVvSNyeCydfbuTKzhxSXDXCPasU= 10 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25/go.mod h1:IgPfDv5jqFIzQSNbUEMoitNooSMXjRSDkhXv8jiROvU= 11 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25 h1:ZntTCl5EsYnhN/IygQEUugpdwbhdkom9uHcbCftiGgA= 12 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25/go.mod h1:DBdPrgeocww+CSl1C8cEV8PN1mHMBhuCDLpXezyvWkE= 13 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 h1:VaRN3TlFdd6KxX1x3ILT5ynH6HvKgqdiXoTxAF4HQcQ= 14 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= 15 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 h1:iXtILhvDxB6kPvEXgsDhGaZCSC6LQET5ZHSdJozeI0Y= 16 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1/go.mod h1:9nu0fVANtYiAePIBh2/pFUSwtJ402hLnp854CNoDOeE= 17 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6 h1:50+XsN70RS7dwJ2CkVNXzj7U2L1HKP8nqTd3XWEXBN4= 18 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6/go.mod h1:WqgLmwY7so32kG01zD8CPTJWVWM+TzJoOVHwTg4aPug= 19 | github.com/aws/aws-sdk-go-v2/service/sns v1.33.7 h1:N3o8mXK6/MP24BtD9sb51omEO9J9cgPM3Ughc293dZc= 20 | github.com/aws/aws-sdk-go-v2/service/sns v1.33.7/go.mod h1:AAHZydTB8/V2zn3WNwjLXBK1RAcSEpDNmFfrmjvrJQg= 21 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.7 h1:rLnYAfXQ3YAccocshIH5mzNNwZBkBo+bP6EhIxak6Hw= 22 | github.com/aws/aws-sdk-go-v2/service/sso v1.24.7/go.mod h1:ZHtuQJ6t9A/+YDuxOLnbryAmITtr8UysSny3qcyvJTc= 23 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6 h1:JnhTZR3PiYDNKlXy50/pNeix9aGMo6lLpXwJ1mw8MD4= 24 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6/go.mod h1:URronUEGfXZN1VpdktPSD1EkAL9mfrV+2F4sjH38qOY= 25 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.2 h1:s4074ZO1Hk8qv65GqNXqDjmkf4HSQqJukaLuuW0TpDA= 26 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.2/go.mod h1:mVggCnIWoM09jP71Wh+ea7+5gAp53q+49wDFs1SW5z8= 27 | github.com/aws/smithy-go v1.22.1 h1:/HPHZQ0g7f4eUeK6HKglFz8uwVfZKgoI25rb/J+dnro= 28 | github.com/aws/smithy-go v1.22.1/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= 29 | github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY= 30 | github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= 31 | github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= 32 | github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= 33 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 34 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 35 | github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= 36 | github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 37 | github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= 38 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 39 | github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= 40 | github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= 41 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 42 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 43 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 44 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 45 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 46 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 47 | github.com/scylladb/gocql v1.14.4 h1:MhevwCfyAraQ6RvZYFO3pF4Lt0YhvQlfg8Eo2HEqVQA= 48 | github.com/scylladb/gocql v1.14.4/go.mod h1:ZLEJ0EVE5JhmtxIW2stgHq/v1P4fWap0qyyXSKyV8K0= 49 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 50 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 51 | golang.org/x/net v0.0.0-20220526153639-5463443f8c37 h1:lUkvobShwKsOesNfWWlCS5q7fnbG1MEliIzwu886fn8= 52 | golang.org/x/net v0.0.0-20220526153639-5463443f8c37/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 53 | golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= 54 | golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 55 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 56 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 57 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 58 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 59 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 60 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 61 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 62 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 63 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 64 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 65 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 66 | sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= 67 | sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= 68 | -------------------------------------------------------------------------------- /examples/replicator-aws-sns/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "os/signal" 11 | "strings" 12 | "sync/atomic" 13 | "time" 14 | 15 | "github.com/gocql/gocql" 16 | scyllacdc "github.com/scylladb/scylla-cdc-go" 17 | 18 | "github.com/aws/aws-sdk-go-v2/aws" 19 | "github.com/aws/aws-sdk-go-v2/config" 20 | "github.com/aws/aws-sdk-go-v2/service/sns" 21 | ) 22 | 23 | // TODO: Escape field names? 24 | var showTimestamps = false 25 | 26 | var reportPeriod = 1 * time.Minute 27 | 28 | func main() { 29 | var ( 30 | keyspace string 31 | table string 32 | source string 33 | progressNode string 34 | readConsistency string 35 | writeConsistency string 36 | 37 | snsTopic string 38 | snsRegion string 39 | snsSubject string 40 | 41 | progressTable string 42 | ) 43 | 44 | flag.StringVar(&keyspace, "keyspace", "", "keyspace name") 45 | flag.StringVar(&table, "table", "", "table name; you can specify multiple table by separating them with a comma") 46 | flag.StringVar(&source, "source", "", "address of a node in source cluster") 47 | flag.StringVar(&progressNode, "progress-node", "", "address of a node in progress cluster") 48 | flag.StringVar(&readConsistency, "read-consistency", "", "consistency level used to read from cdc log (one, quorum, all)") 49 | flag.StringVar(&writeConsistency, "write-consistency", "", "consistency level used to write to the destination cluster (one, quorum, all)") 50 | flag.StringVar(&progressTable, "progress-table", "", "fully-qualified name of the table in the destination cluster to use for saving progress; if omitted, the progress won't be saved") 51 | 52 | flag.StringVar(&snsTopic, "sns-topic", "", "SNS Topic ARN") 53 | flag.StringVar(&snsSubject, "sns-subject", "", "SNS Subject") 54 | flag.StringVar(&snsRegion, "sns-region", "", "AWS region where SNS topic is deployed") 55 | 56 | flag.String("mode", "", "mode (ignored)") 57 | 58 | adv := scyllacdc.AdvancedReaderConfig{} 59 | flag.DurationVar(&adv.ConfidenceWindowSize, "polling-confidence-window-size", 30*time.Second, "defines a minimal age a change must have in order to be read.") 60 | flag.DurationVar(&adv.ChangeAgeLimit, "polling-change-age-limit", 10*time.Minute, "When the library starts for the first time it has to start consuming\nchanges from some point in time. This parameter defines how far in the\npast it needs to look. If the value of the parameter is set to an hour,\nthen the library will only read historical changes that are no older than\nan hour.") 61 | flag.DurationVar(&adv.QueryTimeWindowSize, "pooling-query-time-window-size", 1*time.Minute, "Changes are queried using select statements with restriction on the time\nthose changes appeared. The restriction is bounding the time from both\nlower and upper bounds. This parameter defines the width of the time\nwindow used for the restriction.") 62 | flag.DurationVar(&adv.PostEmptyQueryDelay, "polling-post-empty-query-delay", 30*time.Second, "The library uses select statements to fetch changes from CDC Log tables.\nEach select fetches changes from a single table and fetches only changes\nfrom a limited set of CDC streams. If such select returns no changes then\nnext select to this table and set of CDC streams will be issued after\na delay. This parameter specifies the length of the delay") 63 | flag.DurationVar(&adv.PostNonEmptyQueryDelay, "polling-post-non-empty-query-delay", 10*time.Second, "The library uses select statements to fetch changes from CDC Log tables.\nEach select fetches changes from a single table and fetches only changes\nfrom a limited set of CDC streams. If such select returns one or more\nchanges then next select to this table and set of CDC streams will be\nissued after a delay. This parameter specifies the length of the delay") 64 | flag.DurationVar(&adv.PostFailedQueryDelay, "pooling-post-failed-query-delay", 1*time.Second, "If the library tries to read from the CDC log and the read operation\nfails, it will wait some time before attempting to read again. This\nparameter specifies the length of the delay.") 65 | 66 | flag.Parse() 67 | 68 | clRead := parseConsistency(readConsistency) 69 | clWrite := parseConsistency(writeConsistency) 70 | 71 | fmt.Println("Parameters:") 72 | fmt.Printf(" Keyspace: %s\n", keyspace) 73 | fmt.Printf(" Table: %s\n", table) 74 | fmt.Printf(" Source cluster IP: %s\n", source) 75 | fmt.Printf(" Destination cluster IP: %s\n", progressNode) 76 | fmt.Printf(" Consistency for reads: %s\n", clRead) 77 | fmt.Printf(" Consistency for writes: %s\n", clWrite) 78 | fmt.Printf(" Table to use for saving progress: %s\n", progressTable) 79 | fmt.Println("Advanced reader parameters:") 80 | fmt.Printf(" Confidence window size: %s\n", adv.ConfidenceWindowSize) 81 | fmt.Printf(" Change age limit: %s\n", adv.ChangeAgeLimit) 82 | fmt.Printf(" Query window size: %s\n", adv.QueryTimeWindowSize) 83 | fmt.Printf(" Delay after poll with empty results: %s\n", adv.PostEmptyQueryDelay) 84 | fmt.Printf(" Delay after poll with non-empty results: %s\n", adv.PostNonEmptyQueryDelay) 85 | fmt.Printf(" Delay after failed poll: %s\n", adv.PostFailedQueryDelay) 86 | 87 | var fullyQualifiedTables []string 88 | 89 | for _, t := range strings.Split(table, ",") { 90 | fullyQualifiedTables = append(fullyQualifiedTables, keyspace+"."+t) 91 | } 92 | 93 | logger := log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile) 94 | repl, err := newReplicator( 95 | context.Background(), 96 | source, progressNode, 97 | fullyQualifiedTables, 98 | snsTopic, snsSubject, snsRegion, 99 | &adv, 100 | clRead, 101 | clWrite, 102 | progressTable, 103 | logger, 104 | ) 105 | if err != nil { 106 | log.Fatalln(err) 107 | } 108 | 109 | ctx, cancel := context.WithCancel(context.Background()) 110 | 111 | // React to Ctrl+C signal. 112 | // 113 | // 1st signal will cause the replicator to read changes up until 114 | // the moment the signal was received, and then it will stop the replicator. 115 | // This is the "most graceful" way of stopping the replicator. 116 | // 117 | // 2nd signal will cancel the context. This should stop all operations 118 | // done by the replicator ASAP and stop it. 119 | // 120 | // 3rd signal will exit the process immediately with error code 1. 121 | signalC := make(chan os.Signal, 3) 122 | go func() { 123 | <-signalC 124 | now := time.Now() 125 | log.Printf("stopping at %v", now) 126 | repl.StopAt(now) 127 | 128 | <-signalC 129 | log.Printf("stopping now") 130 | cancel() 131 | 132 | <-signalC 133 | log.Printf("killing") 134 | os.Exit(1) 135 | }() 136 | signal.Notify(signalC, os.Interrupt) 137 | 138 | if err := repl.Run(ctx); err != nil { 139 | log.Fatalln(err) 140 | } 141 | 142 | log.Printf("quitting, rows read: %d", repl.GetReadRowsCount()) 143 | } 144 | 145 | func parseConsistency(s string) gocql.Consistency { 146 | switch strings.ToLower(s) { 147 | case "one": 148 | return gocql.One 149 | case "quorum": 150 | return gocql.Quorum 151 | case "all": 152 | return gocql.All 153 | default: 154 | log.Printf("warning: got unsupported consistency level \"%s\", will use \"one\" instead", s) 155 | return gocql.One 156 | } 157 | } 158 | 159 | type replicator struct { 160 | reader *scyllacdc.Reader 161 | 162 | readerSession *gocql.Session 163 | progressSession *gocql.Session 164 | 165 | rowsRead *int64 166 | } 167 | 168 | func newReplicator( 169 | ctx context.Context, 170 | source, destination string, 171 | tableNames []string, 172 | topic, subject, region string, 173 | advancedParams *scyllacdc.AdvancedReaderConfig, 174 | readConsistency gocql.Consistency, 175 | progressConsistency gocql.Consistency, 176 | progressTable string, 177 | logger scyllacdc.Logger, 178 | ) (*replicator, error) { 179 | ptCluster := gocql.NewCluster(destination) 180 | ptCluster.Timeout = 10 * time.Second 181 | ptCluster.Consistency = progressConsistency 182 | progressSession, err := ptCluster.CreateSession() 183 | if err != nil { 184 | return nil, err 185 | } 186 | 187 | // Configure a session 188 | readerCluster := gocql.NewCluster(source) 189 | readerCluster.Timeout = 10 * time.Second 190 | readerCluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 191 | readerSession, err := readerCluster.CreateSession() 192 | if err != nil { 193 | progressSession.Close() 194 | return nil, err 195 | } 196 | 197 | rowsRead := new(int64) 198 | 199 | factory := &replicatorFactory{ 200 | rowsRead: rowsRead, 201 | topic: topic, 202 | subject: subject, 203 | region: region, 204 | logger: logger, 205 | } 206 | 207 | var progressManager scyllacdc.ProgressManager 208 | if progressTable != "" { 209 | progressManager, err = scyllacdc.NewTableBackedProgressManager(progressSession, progressTable, "cdc-replicator") 210 | if err != nil { 211 | progressSession.Close() 212 | return nil, err 213 | } 214 | } 215 | 216 | cfg := &scyllacdc.ReaderConfig{ 217 | Session: readerSession, 218 | ChangeConsumerFactory: factory, 219 | ProgressManager: progressManager, 220 | TableNames: tableNames, 221 | Consistency: readConsistency, 222 | } 223 | 224 | if advancedParams != nil { 225 | cfg.Advanced = *advancedParams 226 | } 227 | cfg.Consistency = readConsistency 228 | cfg.Logger = logger 229 | 230 | reader, err := scyllacdc.NewReader(ctx, cfg) 231 | if err != nil { 232 | readerSession.Close() 233 | progressSession.Close() 234 | return nil, err 235 | } 236 | 237 | repl := &replicator{ 238 | reader: reader, 239 | 240 | readerSession: readerSession, 241 | progressSession: progressSession, 242 | 243 | rowsRead: rowsRead, 244 | } 245 | 246 | return repl, nil 247 | } 248 | 249 | func (repl *replicator) Run(ctx context.Context) error { 250 | defer repl.progressSession.Close() 251 | defer repl.readerSession.Close() 252 | return repl.reader.Run(ctx) 253 | } 254 | 255 | func (repl *replicator) StopAt(at time.Time) { 256 | repl.reader.StopAt(at) 257 | } 258 | 259 | func (repl *replicator) Stop() { 260 | repl.reader.Stop() 261 | } 262 | 263 | func (repl *replicator) GetReadRowsCount() int64 { 264 | return atomic.LoadInt64(repl.rowsRead) 265 | } 266 | 267 | type replicatorFactory struct { 268 | rowsRead *int64 269 | topic string 270 | subject string 271 | region string 272 | logger scyllacdc.Logger 273 | } 274 | 275 | func (rf *replicatorFactory) CreateChangeConsumer( 276 | ctx context.Context, 277 | input scyllacdc.CreateChangeConsumerInput, 278 | ) (scyllacdc.ChangeConsumer, error) { 279 | splitTableName := strings.SplitN(input.TableName, ".", 2) 280 | if len(splitTableName) < 2 { 281 | return nil, fmt.Errorf("table name is not fully qualified: %s", input.TableName) 282 | } 283 | return NewSNSReplicator(ctx, rf.topic, rf.subject, rf.region, rf.rowsRead, input.StreamID, input.ProgressReporter, rf.logger) 284 | } 285 | 286 | type SNSReplicator struct { 287 | snsClient *sns.Client 288 | snsTopic string 289 | snsSubject string 290 | consistency gocql.Consistency 291 | 292 | pkColumns []string 293 | ckColumns []string 294 | otherColumns []string 295 | columnTypes map[string]TypeInfo 296 | allColumns []string 297 | 298 | insertStr string 299 | rowDeleteQueryStr string 300 | partitionDeleteQueryStr string 301 | 302 | localCount int64 303 | totalCount *int64 304 | 305 | streamID scyllacdc.StreamID 306 | reporter *scyllacdc.PeriodicProgressReporter 307 | } 308 | 309 | func NewSNSReplicator( 310 | ctx context.Context, 311 | topic, subject, region string, 312 | count *int64, 313 | streamID scyllacdc.StreamID, 314 | reporter *scyllacdc.ProgressReporter, 315 | logger scyllacdc.Logger, 316 | ) (*SNSReplicator, error) { 317 | var opts [](func(*config.LoadOptions) error) 318 | if region != "" { 319 | opts = append(opts, config.WithRegion(region)) 320 | } 321 | 322 | awsCfg, err := config.LoadDefaultConfig(ctx, opts...) 323 | if err != nil { 324 | return nil, err 325 | } 326 | 327 | dr := &SNSReplicator{ 328 | snsClient: sns.NewFromConfig(awsCfg), 329 | snsTopic: topic, 330 | snsSubject: subject, 331 | totalCount: count, 332 | streamID: streamID, 333 | reporter: scyllacdc.NewPeriodicProgressReporter(logger, reportPeriod, reporter), 334 | } 335 | 336 | dr.reporter.Start(ctx) 337 | return dr, nil 338 | } 339 | 340 | func (r *SNSReplicator) Consume(ctx context.Context, c scyllacdc.Change) error { 341 | timestamp := c.GetCassandraTimestamp() 342 | if showTimestamps { 343 | log.Printf("[%s] Processing timestamp: %s (%s)\n", c.StreamID, c.Time, c.Time.Time()) 344 | } 345 | 346 | for _, change := range c.Delta { 347 | if err := r.sendChangeToSNS(ctx, change, timestamp, "Delta"); err != nil { 348 | return err 349 | } 350 | } 351 | 352 | for _, change := range c.PreImage { 353 | if err := r.sendChangeToSNS(ctx, change, timestamp, "PreImage"); err != nil { 354 | return err 355 | } 356 | } 357 | 358 | for _, change := range c.PostImage { 359 | if err := r.sendChangeToSNS(ctx, change, timestamp, "PostImage"); err != nil { 360 | return err 361 | } 362 | } 363 | 364 | r.reporter.Update(c.Time) 365 | r.localCount += int64(len(c.Delta)) 366 | 367 | return nil 368 | } 369 | 370 | func (r *SNSReplicator) sendChangeToSNS(ctx context.Context, change *scyllacdc.ChangeRow, timestamp int64, recType string) error { 371 | change.GetRawData() 372 | change.GetOperation() 373 | 374 | msg, err := json.Marshal(map[string]interface{}{ 375 | "type": recType, 376 | "operation": change.GetOperation(), 377 | "timestamp": timestamp, 378 | "ttl": change.GetTTL(), 379 | "seq_no": change.GetSeqNo(), 380 | "end_of_batch": change.GetEndOfBatch(), 381 | "data": change.GetRawData(), 382 | }) 383 | if err != nil { 384 | return fmt.Errorf("failed to serialize message: %w", err) 385 | } 386 | 387 | _, err = r.snsClient.Publish(ctx, &sns.PublishInput{ 388 | TopicArn: aws.String(r.snsTopic), 389 | Message: aws.String(string(msg)), 390 | Subject: aws.String(r.snsSubject), 391 | }) 392 | 393 | if err != nil { 394 | return fmt.Errorf("failed to send message to SNS: %w", err) 395 | } 396 | return nil 397 | } 398 | 399 | func (r *SNSReplicator) End() error { 400 | log.Printf("Streams [%s]: processed %d changes in total", r.streamID, r.localCount) 401 | atomic.AddInt64(r.totalCount, r.localCount) 402 | _ = r.reporter.SaveAndStop(context.Background()) 403 | return nil 404 | } 405 | 406 | func (r *SNSReplicator) Empty(ctx context.Context, ackTime gocql.UUID) error { 407 | log.Printf("Streams [%s]: saw no changes up to %s", r.streamID, ackTime.Time()) 408 | r.reporter.Update(ackTime) 409 | return nil 410 | } 411 | 412 | // Make sure that SNSReplicator supports the ChangeOrEmptyNotificationConsumer interface 413 | var _ scyllacdc.ChangeOrEmptyNotificationConsumer = (*SNSReplicator)(nil) 414 | -------------------------------------------------------------------------------- /examples/replicator-aws-sns/replicator_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | "reflect" 9 | "regexp" 10 | "testing" 11 | "time" 12 | 13 | "github.com/gocql/gocql" 14 | scyllacdc "github.com/scylladb/scylla-cdc-go" 15 | "github.com/scylladb/scylla-cdc-go/testutils" 16 | ) 17 | 18 | type schema struct { 19 | tableName string 20 | createQuery string 21 | } 22 | 23 | var udts = []string{ 24 | "CREATE TYPE udt_simple (a int, b int, c text)", 25 | } 26 | 27 | var ( 28 | schemaSimple = schema{ 29 | "tbl_simple", 30 | "CREATE TABLE tbl_simple (pk text, ck int, v1 int, v2 text, PRIMARY KEY (pk, ck))", 31 | } 32 | schemaMultipleClusteringKeys = schema{ 33 | "tbl_multiple_clustering_keys", 34 | "CREATE TABLE tbl_multiple_clustering_keys (pk text, ck1 int, ck2 int, v int, PRIMARY KEY (pk, ck1, ck2))", 35 | } 36 | schemaBlobs = schema{ 37 | "tbl_blobs", 38 | "CREATE TABLE tbl_blobs (pk text, ck int, v blob, PRIMARY KEY (pk, ck))", 39 | } 40 | schemaLists = schema{ 41 | "tbl_lists", 42 | "CREATE TABLE tbl_lists (pk text, ck int, v list, PRIMARY KEY(pk, ck))", 43 | } 44 | schemaSets = schema{ 45 | "tbl_sets", 46 | "CREATE TABLE tbl_sets (pk text, ck int, v set, PRIMARY KEY (pk, ck))", 47 | } 48 | schemaMaps = schema{ 49 | "tbl_maps", 50 | "CREATE TABLE tbl_maps (pk text, ck int, v map, PRIMARY KEY (pk, ck))", 51 | } 52 | schemaTuples = schema{ 53 | "tbl_tuples", 54 | "CREATE TABLE tbl_tuples (pk text, ck int, v tuple, PRIMARY KEY (pk, ck))", 55 | } 56 | schemaTuplesInTuples = schema{ 57 | "tbl_tuples_in_tuples", 58 | "CREATE TABLE tbl_tuples_in_tuples (pk text, ck int, v tuple, int>, PRIMARY KEY (pk, ck))", 59 | } 60 | schemaTuplesInTuplesInTuples = schema{ 61 | "tbl_tuples_in_tuples_in_tuples", 62 | "CREATE TABLE tbl_tuples_in_tuples_in_tuples (pk text, ck int, v tuple, text>, int>, PRIMARY KEY (pk, ck))", 63 | } 64 | schemaUDTs = schema{ 65 | "tbl_udts", 66 | "CREATE TABLE tbl_udts (pk text, ck int, v udt_simple, PRIMARY KEY (pk, ck))", 67 | } 68 | ) 69 | 70 | var testCases = []struct { 71 | schema schema 72 | pk string 73 | queries []string 74 | }{ 75 | // Operations test cases 76 | { 77 | schemaSimple, 78 | "simpleInserts", 79 | []string{ 80 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('simpleInserts', 1, 2, 'abc')", 81 | "INSERT INTO %s (pk, ck, v1) VALUES ('simpleInserts', 2, 3)", 82 | "INSERT INTO %s (pk, ck, v2) VALUES ('simpleInserts', 2, 'def')", 83 | }, 84 | }, 85 | { 86 | schemaSimple, 87 | "simpleUpdates", 88 | []string{ 89 | "UPDATE %s SET v1 = 1 WHERE pk = 'simpleUpdates' AND ck = 1", 90 | "UPDATE %s SET v2 = 'abc' WHERE pk = 'simpleUpdates' AND ck = 2", 91 | "UPDATE %s SET v1 = 5, v2 = 'def' WHERE pk = 'simpleUpdates' AND ck = 3", 92 | }, 93 | }, 94 | { 95 | schemaSimple, 96 | "rowDeletes", 97 | []string{ 98 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('rowDeletes', 1, 2, 'abc')", 99 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('rowDeletes', 2, 3, 'def')", 100 | "DELETE FROM %s WHERE pk = 'rowDeletes' AND ck = 1", 101 | }, 102 | }, 103 | { 104 | schemaSimple, 105 | "partitionDeletes", 106 | []string{ 107 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 1, 2, 'abc')", 108 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 2, 3, 'def')", 109 | "DELETE FROM %s WHERE pk = 'partitionDeletes'", 110 | // Insert one more row, just to check if replication works at all 111 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 4, 5, 'def')", 112 | }, 113 | }, 114 | { 115 | schemaMultipleClusteringKeys, 116 | "rangeDeletes", 117 | []string{ 118 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 1, 0)", 119 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 2, 0)", 120 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 3, 0)", 121 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 4, 0)", 122 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 1, 0)", 123 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 2, 0)", 124 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 3, 0)", 125 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 4, 0)", 126 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 1, 0)", 127 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 2, 0)", 128 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 3, 0)", 129 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 4, 0)", 130 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 1, 0)", 131 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 2, 0)", 132 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 3, 0)", 133 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 4, 0)", 134 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 > 3", 135 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 <= 1", 136 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 = 2 AND ck2 > 1 AND ck2 < 4", 137 | }, 138 | }, 139 | 140 | // Blob test cases 141 | { 142 | schemaBlobs, 143 | "blobs", 144 | []string{ 145 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 1, 0x1234)", 146 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 2, 0x)", 147 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 3, null)", 148 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 4, 0x4321)", 149 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 5, 0x00)", 150 | "UPDATE %s SET v = null WHERE pk = 'blobs' AND ck = 4", 151 | "UPDATE %s SET v = 0x WHERE pk = 'blobs' AND ck = 5", 152 | }, 153 | }, 154 | 155 | // Lists test cases 156 | { 157 | schemaLists, 158 | "listOverwrites", 159 | []string{ 160 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 1, [1, 2, 3])", 161 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 1, [4, 5, 6, 7])", 162 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 2, [6, 5, 4, 3, 2, 1])", 163 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 2, null)", 164 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 3, [1, 11, 111])", 165 | "UPDATE %s SET v = [2, 22, 222] WHERE pk = 'listOverwrites' AND ck = 3", 166 | }, 167 | }, 168 | { 169 | schemaLists, 170 | "listAppends", 171 | []string{ 172 | "INSERT INTO %s (pk, ck, v) VALUES ('listAppends', 1, [1, 2, 3])", 173 | "UPDATE %s SET v = v + [4, 5, 6] WHERE pk = 'listAppends' AND ck = 1", 174 | "UPDATE %s SET v = [-2, -1, 0] + v WHERE pk = 'listAppends' AND ck = 1", 175 | }, 176 | }, 177 | { 178 | schemaLists, 179 | "listRemoves", 180 | []string{ 181 | "INSERT INTO %s (pk, ck, v) VALUES ('listRemoves', 1, [1, 2, 3])", 182 | "UPDATE %s SET v = v + [4, 5, 6] WHERE pk = 'listRemoves' AND ck = 1", 183 | "UPDATE %s SET v = v - [1, 2, 3] WHERE pk = 'listRemoves' AND ck = 1", 184 | }, 185 | }, 186 | 187 | // Set test cases 188 | { 189 | schemaSets, 190 | "setOverwrites", 191 | []string{ 192 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 1, {1, 2, 3, 4})", 193 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 1, {4, 5, 6, 7})", 194 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 2, {8, 9, 10, 11})", 195 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 2, null)", 196 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 3, {12, 13, 14, 15})", 197 | "UPDATE %s SET v = null WHERE pk = 'setOverwrites' AND ck = 3", 198 | }, 199 | }, 200 | { 201 | schemaSets, 202 | "setAppends", 203 | []string{ 204 | "INSERT INTO %s (pk, ck, v) VALUES ('setAppends', 1, {1, 2, 3, 4})", 205 | "UPDATE %s SET v = v + {5, 6} WHERE pk = 'setAppends' AND ck = 1", 206 | "UPDATE %s SET v = v + {5, 6} WHERE pk = 'setAppends' AND ck = 2", 207 | }, 208 | }, 209 | { 210 | schemaSets, 211 | "setRemovals", 212 | []string{ 213 | "INSERT INTO %s (pk, ck, v) VALUES ('setRemovals', 1, {1, 2, 3, 4})", 214 | "UPDATE %s SET v = v - {1, 3} WHERE pk = 'setRemovals' AND ck = 1", 215 | "UPDATE %s SET v = v - {1138} WHERE pk = 'setRemovals' AND ck = 2", 216 | }, 217 | }, 218 | 219 | // Map test cases 220 | { 221 | schemaMaps, 222 | "mapOverwrites", 223 | []string{ 224 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 1, {1: 2, 3: 4})", 225 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 1, {5: 6, 7: 8})", 226 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 2, {9: 10, 11: 12})", 227 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 2, null)", 228 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 3, {13: 14, 15: 16})", 229 | "UPDATE %s SET v = null WHERE pk = 'mapOverwrites' AND ck = 3", 230 | }, 231 | }, 232 | { 233 | schemaMaps, 234 | "mapSets", 235 | []string{ 236 | "INSERT INTO %s (pk, ck, v) VALUES ('mapSets', 1, {1: 2, 3: 4, 5: 6})", 237 | "UPDATE %s SET v[1] = 42 WHERE pk = 'mapSets' AND ck = 1", 238 | "UPDATE %s SET v[3] = null WHERE pk = 'mapSets' AND ck = 1", 239 | "UPDATE %s SET v[3] = 123 WHERE pk = 'mapSets' AND ck = 1", 240 | "UPDATE %s SET v[5] = 321 WHERE pk = 'mapSets' AND ck = 2", 241 | }, 242 | }, 243 | { 244 | schemaMaps, 245 | "mapAppends", 246 | []string{ 247 | "INSERT INTO %s (pk, ck, v) VALUES ('mapAppends', 1, {1: 2, 3: 4})", 248 | "UPDATE %s SET v = v + {5: 6} WHERE pk = 'mapAppends' AND ck = 1", 249 | "UPDATE %s SET v = v + {5: 6} WHERE pk = 'mapAppends' AND ck = 2", 250 | }, 251 | }, 252 | { 253 | schemaMaps, 254 | "mapRemovals", 255 | []string{ 256 | "INSERT INTO %s (pk, ck, v) VALUES ('mapRemovals', 1, {1: 2, 3: 4})", 257 | "UPDATE %s SET v = v - {1} WHERE pk = 'mapRemovals' AND ck = 1", 258 | "UPDATE %s SET v = v - {1138} WHERE pk = 'mapRemovals' AND ck = 2", 259 | }, 260 | }, 261 | 262 | // Tuple test cases 263 | { 264 | schemaTuples, 265 | "tupleInserts", 266 | []string{ 267 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 1, (7, 'abc'))", 268 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 2, (9, 'def'))", 269 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 2, null)", 270 | }, 271 | }, 272 | { 273 | schemaTuples, 274 | "tupleUpdates", 275 | []string{ 276 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 1, (7, 'abc'))", 277 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 2, (9, 'def'))", 278 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 3, (11, 'ghi'))", 279 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 4, (13, 'jkl'))", 280 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 5, (15, 'mno'))", 281 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 6, (17, 'pqr'))", 282 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 7, (19, 'stu'))", 283 | "UPDATE %s SET v = (111, 'zyx') WHERE pk = 'tupleUpdates' AND ck = 1", 284 | "UPDATE %s SET v = null WHERE pk = 'tupleUpdates' AND ck = 2", 285 | "INSERT INTO %s (pk, ck) VALUES ('tupleUpdates', 3)", 286 | "UPDATE %s SET v = (null, null) WHERE pk = 'tupleUpdates' AND ck = 4", 287 | "UPDATE %s SET v = (null, 'asdf') WHERE pk = 'tupleUpdates' AND ck = 5", 288 | "UPDATE %s SET v = (123, null) WHERE pk = 'tupleUpdates' AND ck = 6", 289 | "UPDATE %s SET v = (null, '') WHERE pk = 'tupleUpdates' AND ck = 7", 290 | }, 291 | }, 292 | { 293 | schemaTuplesInTuples, 294 | "tuplesInTuples", 295 | []string{ 296 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 1, ((1, 'abc'), 7))", 297 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 2, ((3, 'def'), 9))", 298 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 3, ((3, 'ghi'), 9))", 299 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 4, ((3, 'jkl'), 9))", 300 | "UPDATE %s SET v = ((100, 'zyx'), 111) WHERE pk = 'tuplesInTuples' AND ck = 1", 301 | "UPDATE %s SET v = null WHERE pk = 'tuplesInTuples' AND ck = 2", 302 | "UPDATE %s SET v = ((200, null), 999) WHERE pk = 'tuplesInTuples' AND ck = 3", 303 | "UPDATE %s SET v = ((300, ''), 333) WHERE pk = 'tuplesInTuples' AND ck = 4", 304 | }, 305 | }, 306 | { 307 | schemaTuplesInTuplesInTuples, 308 | "tuplesInTuplesInTuples", 309 | []string{ 310 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuplesInTuples', 1, (((1, 9), 'abc'), 7))", 311 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuplesInTuples', 2, (((3, 8), 'def'), 9))", 312 | "UPDATE %s SET v = (((100, 200), 'zyx'), 111) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 1", 313 | "UPDATE %s SET v = null WHERE pk = 'tuplesInTuplesInTuples' AND ck = 2", 314 | "UPDATE %s SET v = (null, 123) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 3", 315 | "UPDATE %s SET v = ((null, 'xyz'), 321) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 4", 316 | }, 317 | }, 318 | 319 | // UDT test cases 320 | { 321 | schemaUDTs, 322 | "udt", 323 | []string{ 324 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 1, (2, 3, 'abc'))", 325 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 2, {a: 6, c: 'zxcv'})", 326 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 3, (9, 4, 'def'))", 327 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 4, (123, 321, 'ghi'))", 328 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 5, (333, 222, 'jkl'))", 329 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 6, (432, 678, 'mno'))", 330 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 7, (765, 345, 'pqr'))", 331 | "UPDATE %s SET v.b = 41414 WHERE pk = 'udt' AND ck = 2", 332 | "UPDATE %s SET v = null WHERE pk = 'udt' AND ck = 3", 333 | "UPDATE %s SET v = {b: 123456, c: 'tyu'} WHERE pk = 'udt' AND ck = 4", 334 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 5, (999, 888, 'zxc'))", 335 | "UPDATE %s SET v.c = null WHERE pk = 'udt' AND ck = 6", 336 | "UPDATE %s SET v = {a: 923, b: 123456, c: ''} WHERE pk = 'udt' AND ck = 7", 337 | }, 338 | }, 339 | } 340 | 341 | func TestReplicator(t *testing.T) { 342 | filter := os.Getenv("REPLICATOR_TEST_FILTER") 343 | if filter == "" { 344 | filter = ".*" 345 | } 346 | re := regexp.MustCompile(filter) 347 | 348 | topic := os.Getenv("SNS_TEST_TOPIC") 349 | subject := os.Getenv("SNS_TEST_SUBJECT") 350 | 351 | if topic == "" || subject == "" { 352 | t.Fatal("SNS_TEST_TOPIC and SNS_TEST_SUBJECT can't be empty") 353 | } 354 | 355 | // Collect all schemas 356 | schemas := make(map[string]string) 357 | for _, tc := range testCases { 358 | schemas[tc.schema.tableName] = tc.schema.createQuery 359 | } 360 | 361 | sourceAddress := testutils.GetSourceClusterContactPoint() 362 | destinationAddress := testutils.GetDestinationClusterContactPoint() 363 | keyspaceName := testutils.GetUniqueName("test_keyspace") 364 | 365 | sourceSession := createSessionAndSetupSchema(t, sourceAddress, keyspaceName, true, schemas) 366 | defer sourceSession.Close() 367 | 368 | destinationSession := createSessionAndSetupSchema(t, destinationAddress, keyspaceName, false, schemas) 369 | defer destinationSession.Close() 370 | 371 | // Execute all of the queries 372 | for _, tc := range testCases { 373 | if !re.MatchString(tc.pk) { 374 | continue 375 | } 376 | for _, qStr := range tc.queries { 377 | execQuery(t, sourceSession, fmt.Sprintf(qStr, tc.schema.tableName)) 378 | } 379 | } 380 | 381 | t.Log("running replicators") 382 | 383 | adv := scyllacdc.AdvancedReaderConfig{ 384 | ChangeAgeLimit: time.Minute, 385 | PostNonEmptyQueryDelay: 3 * time.Second, 386 | PostEmptyQueryDelay: 3 * time.Second, 387 | PostFailedQueryDelay: 3 * time.Second, 388 | QueryTimeWindowSize: 5 * time.Minute, 389 | ConfidenceWindowSize: time.Millisecond, 390 | } 391 | 392 | schemaNames := make([]string, 0) 393 | for tbl := range schemas { 394 | schemaNames = append(schemaNames, fmt.Sprintf("%s.%s", keyspaceName, tbl)) 395 | } 396 | 397 | logger := log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile) 398 | replicator, err := newReplicator( 399 | context.Background(), 400 | sourceAddress, 401 | destinationAddress, 402 | schemaNames, 403 | topic, subject, "", 404 | &adv, gocql.Quorum, 405 | gocql.Quorum, 406 | "", 407 | logger, 408 | ) 409 | 410 | if err != nil { 411 | t.Fatal(err) 412 | } 413 | 414 | ctx := context.Background() 415 | 416 | errC := make(chan error) 417 | go func() { errC <- replicator.Run(ctx) }() 418 | 419 | time.Sleep(time.Second) 420 | 421 | replicator.StopAt(time.Now().Add(time.Second)) 422 | if err := <-errC; err != nil { 423 | t.Fatal(err) 424 | } 425 | 426 | t.Log("validating results") 427 | 428 | // Compare 429 | sourceSet := fetchFullSet(t, sourceSession, schemas) 430 | destinationSet := fetchFullSet(t, destinationSession, schemas) 431 | 432 | failedCount := 0 433 | 434 | for _, tc := range testCases { 435 | sourceData := sourceSet[tc.pk] 436 | destinationData := destinationSet[tc.pk] 437 | 438 | if len(sourceData) != len(destinationData) { 439 | t.Logf( 440 | "%s: source len %d, destination len %d\n", 441 | tc.pk, 442 | len(sourceData), 443 | len(destinationData), 444 | ) 445 | t.Log(" source:") 446 | for _, row := range sourceData { 447 | t.Logf(" %v", row) 448 | } 449 | t.Log(" dest:") 450 | for _, row := range destinationData { 451 | t.Logf(" %v", row) 452 | } 453 | t.Fail() 454 | failedCount++ 455 | continue 456 | } 457 | 458 | failed := false 459 | for i := 0; i < len(sourceData); i++ { 460 | if !reflect.DeepEqual(sourceData[i], destinationData[i]) { 461 | t.Logf("%s: mismatch", tc.pk) 462 | t.Logf(" source: %v", sourceData[i]) 463 | t.Logf(" dest: %v", destinationData[i]) 464 | failed = true 465 | } 466 | } 467 | 468 | if failed { 469 | t.Fail() 470 | failedCount++ 471 | } else { 472 | t.Logf("%s: OK", tc.pk) 473 | } 474 | } 475 | 476 | if failedCount > 0 { 477 | t.Logf("failed %d/%d test cases", failedCount, len(testCases)) 478 | } 479 | } 480 | 481 | func createSessionAndSetupSchema(t *testing.T, addr string, keyspaceName string, withCdc bool, schemas map[string]string) *gocql.Session { 482 | testutils.CreateKeyspace(t, addr, keyspaceName) 483 | 484 | cfg := gocql.NewCluster(addr) 485 | cfg.Keyspace = keyspaceName 486 | session, err := cfg.CreateSession() 487 | if err != nil { 488 | t.Fatal(err) 489 | } 490 | 491 | for _, udt := range udts { 492 | execQuery(t, session, udt) 493 | } 494 | 495 | for _, tbl := range schemas { 496 | tblQuery := tbl 497 | if withCdc { 498 | tblQuery += " WITH cdc = {'enabled': true, 'preimage': true, 'postimage': true}" 499 | } 500 | execQuery(t, session, tblQuery) 501 | } 502 | 503 | err = session.AwaitSchemaAgreement(context.Background()) 504 | if err != nil { 505 | t.Fatal(err) 506 | } 507 | 508 | return session 509 | } 510 | 511 | func execQuery(t *testing.T, session *gocql.Session, query string) { 512 | t.Logf("executing query %s", query) 513 | err := session.Query(query).Exec() 514 | if err != nil { 515 | t.Fatal(err) 516 | } 517 | } 518 | 519 | func fetchFullSet(t *testing.T, session *gocql.Session, schemas map[string]string) map[string][]map[string]interface{} { 520 | groups := make(map[string][]map[string]interface{}) 521 | 522 | for tbl := range schemas { 523 | data, err := session.Query("SELECT * FROM " + tbl).Iter().SliceMap() 524 | if err != nil { 525 | t.Fatal(err) 526 | } 527 | 528 | for _, row := range data { 529 | pk := row["pk"].(string) 530 | groups[pk] = append(groups[pk], row) 531 | } 532 | } 533 | 534 | return groups 535 | } 536 | -------------------------------------------------------------------------------- /examples/replicator-aws-sns/utils.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "strings" 4 | 5 | // Re-implementation of the type parsing logic from the driver. 6 | // Unlike the driver, this implementation differentiates frozen types 7 | // from non-frozen ones. 8 | 9 | type Type int 10 | 11 | const ( 12 | TypeCustom Type = 0x0000 13 | TypeAscii Type = 0x0001 14 | TypeBigInt Type = 0x0002 15 | TypeBlob Type = 0x0003 16 | TypeBoolean Type = 0x0004 17 | TypeCounter Type = 0x0005 18 | TypeDecimal Type = 0x0006 19 | TypeDouble Type = 0x0007 20 | TypeFloat Type = 0x0008 21 | TypeInt Type = 0x0009 22 | TypeText Type = 0x000A 23 | TypeTimestamp Type = 0x000B 24 | TypeUUID Type = 0x000C 25 | TypeVarchar Type = 0x000D 26 | TypeVarint Type = 0x000E 27 | TypeTimeUUID Type = 0x000F 28 | TypeInet Type = 0x0010 29 | TypeDate Type = 0x0011 30 | TypeTime Type = 0x0012 31 | TypeSmallInt Type = 0x0013 32 | TypeTinyInt Type = 0x0014 33 | TypeDuration Type = 0x0015 34 | TypeList Type = 0x0020 35 | TypeMap Type = 0x0021 36 | TypeSet Type = 0x0022 37 | TypeUDT Type = 0x0030 38 | TypeTuple Type = 0x0031 39 | ) 40 | 41 | func (t Type) IsCollection() bool { 42 | switch t { 43 | case TypeList, TypeMap, TypeSet, TypeUDT: 44 | return true 45 | default: 46 | return false 47 | } 48 | } 49 | 50 | type TypeInfo interface { 51 | Type() Type 52 | IsFrozen() bool 53 | Unfrozen() TypeInfo 54 | } 55 | 56 | type FrozenType struct { 57 | Inner TypeInfo 58 | } 59 | 60 | func (ft *FrozenType) Type() Type { 61 | return ft.Inner.Type() 62 | } 63 | 64 | func (ft *FrozenType) IsFrozen() bool { 65 | return true 66 | } 67 | 68 | func (ft *FrozenType) Unfrozen() TypeInfo { 69 | return ft.Inner 70 | } 71 | 72 | type MapType struct { 73 | Key TypeInfo 74 | Value TypeInfo 75 | } 76 | 77 | func (mt *MapType) Type() Type { 78 | return TypeMap 79 | } 80 | 81 | func (mt *MapType) IsFrozen() bool { 82 | return false 83 | } 84 | 85 | func (mt *MapType) Unfrozen() TypeInfo { 86 | return mt 87 | } 88 | 89 | type ListType struct { 90 | Element TypeInfo 91 | } 92 | 93 | func (lt *ListType) Type() Type { 94 | return TypeList 95 | } 96 | 97 | func (lt *ListType) IsFrozen() bool { 98 | return false 99 | } 100 | 101 | func (lt *ListType) Unfrozen() TypeInfo { 102 | return lt 103 | } 104 | 105 | type SetType struct { 106 | Element TypeInfo 107 | } 108 | 109 | func (st *SetType) Type() Type { 110 | return TypeSet 111 | } 112 | 113 | func (st *SetType) IsFrozen() bool { 114 | return false 115 | } 116 | 117 | func (st *SetType) Unfrozen() TypeInfo { 118 | return st 119 | } 120 | 121 | type TupleType struct { 122 | Elements []TypeInfo 123 | } 124 | 125 | func (tt *TupleType) Type() Type { 126 | return TypeTuple 127 | } 128 | 129 | func (tt *TupleType) IsFrozen() bool { 130 | return false 131 | } 132 | 133 | func (tt *TupleType) Unfrozen() TypeInfo { 134 | return tt 135 | } 136 | 137 | type NativeType struct { 138 | RealType Type 139 | } 140 | 141 | func (nt *NativeType) Type() Type { 142 | return nt.RealType 143 | } 144 | 145 | func (nt *NativeType) IsFrozen() bool { 146 | return false 147 | } 148 | 149 | func (nt *NativeType) Unfrozen() TypeInfo { 150 | return nt 151 | } 152 | 153 | type UDTType struct { 154 | Name string 155 | } 156 | 157 | func (ut *UDTType) Type() Type { 158 | return TypeUDT 159 | } 160 | 161 | func (ut *UDTType) IsFrozen() bool { 162 | return false 163 | } 164 | 165 | func (ut *UDTType) Unfrozen() TypeInfo { 166 | return ut 167 | } 168 | 169 | func parseType(str string) TypeInfo { 170 | if strings.HasPrefix(str, "frozen<") { 171 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "frozen<"), ">") 172 | return &FrozenType{parseType(innerStr)} 173 | } 174 | if strings.HasPrefix(str, "list<") { 175 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "list<"), ">") 176 | return &ListType{parseType(innerStr)} 177 | } 178 | if strings.HasPrefix(str, "set<") { 179 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "set<"), ">") 180 | return &SetType{parseType(innerStr)} 181 | } 182 | if strings.HasPrefix(str, "map<") { 183 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "map<"), ">") 184 | list := parseTypeList(innerStr) 185 | return &MapType{Key: list[0], Value: list[1]} 186 | } 187 | if strings.HasPrefix(str, "tuple<") { 188 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "tuple<"), ">") 189 | list := parseTypeList(innerStr) 190 | return &TupleType{Elements: list} 191 | } 192 | typ := parseNativeType(str) 193 | if typ == TypeUDT { 194 | return &UDTType{Name: str} 195 | } 196 | return &NativeType{RealType: typ} 197 | } 198 | 199 | func parseTypeList(str string) []TypeInfo { 200 | var ret []TypeInfo 201 | var level int 202 | var builder strings.Builder 203 | for _, r := range str { 204 | if r == ',' && level == 0 { 205 | s := strings.TrimSpace(builder.String()) 206 | ret = append(ret, parseType(s)) 207 | builder.Reset() 208 | continue 209 | } 210 | 211 | if r == '<' { 212 | level++ 213 | } else if r == '>' { 214 | level-- 215 | } 216 | builder.WriteRune(r) 217 | } 218 | if builder.Len() != 0 { 219 | s := strings.TrimSpace(builder.String()) 220 | ret = append(ret, parseType(s)) 221 | } 222 | return ret 223 | } 224 | 225 | func parseNativeType(str string) Type { 226 | switch str { 227 | case "ascii": 228 | return TypeAscii 229 | case "bigint": 230 | return TypeBigInt 231 | case "blob": 232 | return TypeBlob 233 | case "boolean": 234 | return TypeBoolean 235 | case "counter": 236 | return TypeCounter 237 | case "date": 238 | return TypeDate 239 | case "decimal": 240 | return TypeDecimal 241 | case "double": 242 | return TypeDouble 243 | case "duration": 244 | return TypeDuration 245 | case "float": 246 | return TypeFloat 247 | case "int": 248 | return TypeInt 249 | case "smallint": 250 | return TypeSmallInt 251 | case "tinyint": 252 | return TypeTinyInt 253 | case "time": 254 | return TypeTime 255 | case "timestamp": 256 | return TypeTimestamp 257 | case "uuid": 258 | return TypeUUID 259 | case "varchar": 260 | return TypeVarchar 261 | case "text": 262 | return TypeText 263 | case "varint": 264 | return TypeVarint 265 | case "timeuuid": 266 | return TypeTimeUUID 267 | case "inet": 268 | return TypeInet 269 | default: 270 | // Assume it's a UDT 271 | return TypeUDT 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /examples/replicator-gcp-pub/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/scylladb/replicator-gcp-pub 2 | 3 | go 1.22 4 | 5 | require ( 6 | cloud.google.com/go/pubsub v1.45.3 7 | github.com/gocql/gocql v0.0.0-20201215165327-e49edf966d90 8 | github.com/scylladb/scylla-cdc-go v0.0.0-20201215165327-e49edf966d90 9 | ) 10 | 11 | require ( 12 | cloud.google.com/go v0.116.0 // indirect 13 | cloud.google.com/go/auth v0.11.0 // indirect 14 | cloud.google.com/go/auth/oauth2adapt v0.2.6 // indirect 15 | cloud.google.com/go/compute/metadata v0.5.2 // indirect 16 | cloud.google.com/go/iam v1.2.2 // indirect 17 | github.com/felixge/httpsnoop v1.0.4 // indirect 18 | github.com/go-logr/logr v1.4.2 // indirect 19 | github.com/go-logr/stdr v1.2.2 // indirect 20 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 21 | github.com/golang/snappy v0.0.4 // indirect 22 | github.com/google/s2a-go v0.1.8 // indirect 23 | github.com/google/uuid v1.6.0 // indirect 24 | github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect 25 | github.com/googleapis/gax-go/v2 v2.14.0 // indirect 26 | github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect 27 | go.opencensus.io v0.24.0 // indirect 28 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect 29 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect 30 | go.opentelemetry.io/otel v1.29.0 // indirect 31 | go.opentelemetry.io/otel/metric v1.29.0 // indirect 32 | go.opentelemetry.io/otel/trace v1.29.0 // indirect 33 | golang.org/x/crypto v0.31.0 // indirect 34 | golang.org/x/net v0.31.0 // indirect 35 | golang.org/x/oauth2 v0.24.0 // indirect 36 | golang.org/x/sync v0.10.0 // indirect 37 | golang.org/x/sys v0.28.0 // indirect 38 | golang.org/x/text v0.21.0 // indirect 39 | golang.org/x/time v0.8.0 // indirect 40 | google.golang.org/api v0.210.0 // indirect 41 | google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 // indirect 42 | google.golang.org/genproto/googleapis/api v0.0.0-20241113202542-65e8d215514f // indirect 43 | google.golang.org/genproto/googleapis/rpc v0.0.0-20241118233622-e639e219e697 // indirect 44 | google.golang.org/grpc v1.67.1 // indirect 45 | google.golang.org/protobuf v1.35.2 // indirect 46 | gopkg.in/inf.v0 v0.9.1 // indirect 47 | ) 48 | 49 | replace ( 50 | github.com/gocql/gocql => github.com/scylladb/gocql v1.14.4 51 | github.com/scylladb/scylla-cdc-go => ../../ 52 | ) 53 | -------------------------------------------------------------------------------- /examples/replicator-gcp-pub/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "os/signal" 11 | "strings" 12 | "sync" 13 | "sync/atomic" 14 | "time" 15 | 16 | "cloud.google.com/go/pubsub" 17 | "github.com/gocql/gocql" 18 | 19 | scyllacdc "github.com/scylladb/scylla-cdc-go" 20 | ) 21 | 22 | // TODO: Escape field names? 23 | var showTimestamps = false 24 | 25 | var reportPeriod = 1 * time.Minute 26 | 27 | func main() { 28 | var ( 29 | keyspace string 30 | table string 31 | source string 32 | progressNode string 33 | readConsistency string 34 | writeConsistency string 35 | 36 | pubTopic string 37 | 38 | progressTable string 39 | ) 40 | 41 | flag.StringVar(&keyspace, "keyspace", "", "keyspace name") 42 | flag.StringVar(&table, "table", "", "table name; you can specify multiple table by separating them with a comma") 43 | flag.StringVar(&source, "source", "", "address of a node in source cluster") 44 | flag.StringVar(&progressNode, "progress-node", "", "address of a node in progress cluster") 45 | flag.StringVar(&readConsistency, "read-consistency", "", "consistency level used to read from cdc log (one, quorum, all)") 46 | flag.StringVar(&writeConsistency, "write-consistency", "", "consistency level used to write to the destination cluster (one, quorum, all)") 47 | flag.StringVar(&progressTable, "progress-table", "", "fully-qualified name of the table in the destination cluster to use for saving progress; if omitted, the progress won't be saved") 48 | 49 | flag.StringVar(&pubTopic, "topic", "", "GCP PUB/SUB Topic") 50 | 51 | flag.String("mode", "", "mode (ignored)") 52 | 53 | adv := scyllacdc.AdvancedReaderConfig{} 54 | flag.DurationVar(&adv.ConfidenceWindowSize, "polling-confidence-window-size", 30*time.Second, "defines a minimal age a change must have in order to be read.") 55 | flag.DurationVar(&adv.ChangeAgeLimit, "polling-change-age-limit", 10*time.Minute, "When the library starts for the first time it has to start consuming\nchanges from some point in time. This parameter defines how far in the\npast it needs to look. If the value of the parameter is set to an hour,\nthen the library will only read historical changes that are no older than\nan hour.") 56 | flag.DurationVar(&adv.QueryTimeWindowSize, "pooling-query-time-window-size", 1*time.Minute, "Changes are queried using select statements with restriction on the time\nthose changes appeared. The restriction is bounding the time from both\nlower and upper bounds. This parameter defines the width of the time\nwindow used for the restriction.") 57 | flag.DurationVar(&adv.PostEmptyQueryDelay, "polling-post-empty-query-delay", 30*time.Second, "The library uses select statements to fetch changes from CDC Log tables.\nEach select fetches changes from a single table and fetches only changes\nfrom a limited set of CDC streams. If such select returns no changes then\nnext select to this table and set of CDC streams will be issued after\na delay. This parameter specifies the length of the delay") 58 | flag.DurationVar(&adv.PostNonEmptyQueryDelay, "polling-post-non-empty-query-delay", 10*time.Second, "The library uses select statements to fetch changes from CDC Log tables.\nEach select fetches changes from a single table and fetches only changes\nfrom a limited set of CDC streams. If such select returns one or more\nchanges then next select to this table and set of CDC streams will be\nissued after a delay. This parameter specifies the length of the delay") 59 | flag.DurationVar(&adv.PostFailedQueryDelay, "pooling-post-failed-query-delay", 1*time.Second, "If the library tries to read from the CDC log and the read operation\nfails, it will wait some time before attempting to read again. This\nparameter specifies the length of the delay.") 60 | 61 | flag.Parse() 62 | 63 | clRead := parseConsistency(readConsistency) 64 | clWrite := parseConsistency(writeConsistency) 65 | 66 | fmt.Println("Parameters:") 67 | fmt.Printf(" Keyspace: %s\n", keyspace) 68 | fmt.Printf(" Table: %s\n", table) 69 | fmt.Printf(" Source cluster IP: %s\n", source) 70 | fmt.Printf(" Destination cluster IP: %s\n", progressNode) 71 | fmt.Printf(" Consistency for reads: %s\n", clRead) 72 | fmt.Printf(" Consistency for writes: %s\n", clWrite) 73 | fmt.Printf(" Table to use for saving progress: %s\n", progressTable) 74 | fmt.Println("Advanced reader parameters:") 75 | fmt.Printf(" Confidence window size: %s\n", adv.ConfidenceWindowSize) 76 | fmt.Printf(" Change age limit: %s\n", adv.ChangeAgeLimit) 77 | fmt.Printf(" Query window size: %s\n", adv.QueryTimeWindowSize) 78 | fmt.Printf(" Delay after poll with empty results: %s\n", adv.PostEmptyQueryDelay) 79 | fmt.Printf(" Delay after poll with non-empty results: %s\n", adv.PostNonEmptyQueryDelay) 80 | fmt.Printf(" Delay after failed poll: %s\n", adv.PostFailedQueryDelay) 81 | 82 | var fullyQualifiedTables []string 83 | 84 | for _, t := range strings.Split(table, ",") { 85 | fullyQualifiedTables = append(fullyQualifiedTables, keyspace+"."+t) 86 | } 87 | 88 | logger := log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile) 89 | repl, err := newReplicator( 90 | context.Background(), 91 | source, progressNode, 92 | fullyQualifiedTables, 93 | pubTopic, 94 | &adv, 95 | clRead, 96 | clWrite, 97 | progressTable, 98 | logger, 99 | ) 100 | if err != nil { 101 | log.Fatalln(err) 102 | } 103 | 104 | ctx, cancel := context.WithCancel(context.Background()) 105 | 106 | // React to Ctrl+C signal. 107 | // 108 | // 1st signal will cause the replicator to read changes up until 109 | // the moment the signal was received, and then it will stop the replicator. 110 | // This is the "most graceful" way of stopping the replicator. 111 | // 112 | // 2nd signal will cancel the context. This should stop all operations 113 | // done by the replicator ASAP and stop it. 114 | // 115 | // 3rd signal will exit the process immediately with error code 1. 116 | signalC := make(chan os.Signal, 3) 117 | go func() { 118 | <-signalC 119 | now := time.Now() 120 | log.Printf("stopping at %v", now) 121 | repl.StopAt(now) 122 | 123 | <-signalC 124 | log.Printf("stopping now") 125 | cancel() 126 | 127 | <-signalC 128 | log.Printf("killing") 129 | os.Exit(1) 130 | }() 131 | signal.Notify(signalC, os.Interrupt) 132 | 133 | if err := repl.Run(ctx); err != nil { 134 | log.Fatalln(err) 135 | } 136 | 137 | log.Printf("quitting, rows read: %d", repl.GetReadRowsCount()) 138 | } 139 | 140 | func parseConsistency(s string) gocql.Consistency { 141 | switch strings.ToLower(s) { 142 | case "one": 143 | return gocql.One 144 | case "quorum": 145 | return gocql.Quorum 146 | case "all": 147 | return gocql.All 148 | default: 149 | log.Printf("warning: got unsupported consistency level \"%s\", will use \"one\" instead", s) 150 | return gocql.One 151 | } 152 | } 153 | 154 | type replicator struct { 155 | reader *scyllacdc.Reader 156 | 157 | readerSession *gocql.Session 158 | progressSession *gocql.Session 159 | 160 | rowsRead *int64 161 | } 162 | 163 | func newReplicator( 164 | ctx context.Context, 165 | source, destination string, 166 | tableNames []string, 167 | topic string, 168 | advancedParams *scyllacdc.AdvancedReaderConfig, 169 | readConsistency gocql.Consistency, 170 | progressConsistency gocql.Consistency, 171 | progressTable string, 172 | logger scyllacdc.Logger, 173 | ) (*replicator, error) { 174 | ptCluster := gocql.NewCluster(destination) 175 | ptCluster.Timeout = 10 * time.Second 176 | ptCluster.Consistency = progressConsistency 177 | progressSession, err := ptCluster.CreateSession() 178 | if err != nil { 179 | return nil, err 180 | } 181 | 182 | // Configure a session 183 | readerCluster := gocql.NewCluster(source) 184 | readerCluster.Timeout = 10 * time.Second 185 | readerCluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy()) 186 | readerSession, err := readerCluster.CreateSession() 187 | if err != nil { 188 | progressSession.Close() 189 | return nil, err 190 | } 191 | 192 | rowsRead := new(int64) 193 | 194 | factory := &replicatorFactory{ 195 | rowsRead: rowsRead, 196 | topic: topic, 197 | logger: logger, 198 | } 199 | 200 | var progressManager scyllacdc.ProgressManager 201 | if progressTable != "" { 202 | progressManager, err = scyllacdc.NewTableBackedProgressManager(progressSession, progressTable, "cdc-replicator") 203 | if err != nil { 204 | progressSession.Close() 205 | return nil, err 206 | } 207 | } 208 | 209 | cfg := &scyllacdc.ReaderConfig{ 210 | Session: readerSession, 211 | ChangeConsumerFactory: factory, 212 | ProgressManager: progressManager, 213 | TableNames: tableNames, 214 | Consistency: readConsistency, 215 | } 216 | 217 | if advancedParams != nil { 218 | cfg.Advanced = *advancedParams 219 | } 220 | cfg.Consistency = readConsistency 221 | cfg.Logger = logger 222 | 223 | reader, err := scyllacdc.NewReader(ctx, cfg) 224 | if err != nil { 225 | readerSession.Close() 226 | progressSession.Close() 227 | return nil, err 228 | } 229 | 230 | repl := &replicator{ 231 | reader: reader, 232 | 233 | readerSession: readerSession, 234 | progressSession: progressSession, 235 | 236 | rowsRead: rowsRead, 237 | } 238 | 239 | return repl, nil 240 | } 241 | 242 | func (repl *replicator) Run(ctx context.Context) error { 243 | defer repl.progressSession.Close() 244 | defer repl.readerSession.Close() 245 | return repl.reader.Run(ctx) 246 | } 247 | 248 | func (repl *replicator) StopAt(at time.Time) { 249 | repl.reader.StopAt(at) 250 | } 251 | 252 | func (repl *replicator) Stop() { 253 | repl.reader.Stop() 254 | } 255 | 256 | func (repl *replicator) GetReadRowsCount() int64 { 257 | return atomic.LoadInt64(repl.rowsRead) 258 | } 259 | 260 | type replicatorFactory struct { 261 | rowsRead *int64 262 | projectID string 263 | topic string 264 | logger scyllacdc.Logger 265 | } 266 | 267 | func (rf *replicatorFactory) CreateChangeConsumer( 268 | ctx context.Context, 269 | input scyllacdc.CreateChangeConsumerInput, 270 | ) (scyllacdc.ChangeConsumer, error) { 271 | splitTableName := strings.SplitN(input.TableName, ".", 2) 272 | if len(splitTableName) < 2 { 273 | return nil, fmt.Errorf("table name is not fully qualified: %s", input.TableName) 274 | } 275 | return NewPUBReplicator(ctx, rf.projectID, rf.topic, rf.rowsRead, input.StreamID, input.ProgressReporter, rf.logger) 276 | } 277 | 278 | type PUBReplicator struct { 279 | topic *pubsub.Topic 280 | pubTopic string 281 | consistency gocql.Consistency 282 | 283 | pkColumns []string 284 | ckColumns []string 285 | otherColumns []string 286 | columnTypes map[string]TypeInfo 287 | allColumns []string 288 | 289 | insertStr string 290 | rowDeleteQueryStr string 291 | partitionDeleteQueryStr string 292 | 293 | localCount int64 294 | totalCount *int64 295 | 296 | streamID scyllacdc.StreamID 297 | reporter *scyllacdc.PeriodicProgressReporter 298 | } 299 | 300 | func NewPUBReplicator( 301 | ctx context.Context, 302 | projectID, topic string, 303 | count *int64, 304 | streamID scyllacdc.StreamID, 305 | reporter *scyllacdc.ProgressReporter, 306 | logger scyllacdc.Logger, 307 | ) (*PUBReplicator, error) { 308 | cl, err := pubsub.NewClient(ctx, projectID) 309 | if err != nil { 310 | return nil, err 311 | } 312 | 313 | dr := &PUBReplicator{ 314 | topic: cl.Topic(topic), 315 | pubTopic: topic, 316 | totalCount: count, 317 | streamID: streamID, 318 | reporter: scyllacdc.NewPeriodicProgressReporter(logger, reportPeriod, reporter), 319 | } 320 | 321 | dr.reporter.Start(ctx) 322 | return dr, nil 323 | } 324 | 325 | func (r *PUBReplicator) Consume(ctx context.Context, c scyllacdc.Change) error { 326 | timestamp := c.GetCassandraTimestamp() 327 | if showTimestamps { 328 | log.Printf("[%s] Processing timestamp: %s (%s)\n", c.StreamID, c.Time, c.Time.Time()) 329 | } 330 | wg := &sync.WaitGroup{} 331 | errs := &writeSafeList[error]{} 332 | for _, change := range c.Delta { 333 | wg.Add(1) 334 | if err := r.sendChangeToPUB(ctx, change, timestamp, "Delta", wg, errs); err != nil { 335 | return err 336 | } 337 | } 338 | 339 | for _, change := range c.PreImage { 340 | wg.Add(1) 341 | if err := r.sendChangeToPUB(ctx, change, timestamp, "PreImage", wg, errs); err != nil { 342 | return err 343 | } 344 | } 345 | 346 | for _, change := range c.PostImage { 347 | wg.Add(1) 348 | if err := r.sendChangeToPUB(ctx, change, timestamp, "PostImage", wg, errs); err != nil { 349 | return err 350 | } 351 | } 352 | 353 | wg.Wait() 354 | for _, err := range errs.list { 355 | if err != nil { 356 | return err 357 | } 358 | } 359 | 360 | r.reporter.Update(c.Time) 361 | r.localCount += int64(len(c.Delta)) 362 | 363 | return nil 364 | } 365 | 366 | func (r *PUBReplicator) sendChangeToPUB(ctx context.Context, change *scyllacdc.ChangeRow, timestamp int64, recType string, wg *sync.WaitGroup, errs *writeSafeList[error]) error { 367 | change.GetRawData() 368 | change.GetOperation() 369 | 370 | msg, err := json.Marshal(map[string]interface{}{ 371 | "type": recType, 372 | "operation": change.GetOperation(), 373 | "timestamp": timestamp, 374 | "ttl": change.GetTTL(), 375 | "seq_no": change.GetSeqNo(), 376 | "end_of_batch": change.GetEndOfBatch(), 377 | "data": change.GetRawData(), 378 | }) 379 | if err != nil { 380 | return fmt.Errorf("failed to serialize message: %w", err) 381 | } 382 | 383 | resp := r.topic.Publish(ctx, &pubsub.Message{ 384 | Data: msg, 385 | }) 386 | 387 | go func() { 388 | defer wg.Done() 389 | _, err := resp.Get(ctx) 390 | errs.Add(err) 391 | }() 392 | 393 | return nil 394 | } 395 | 396 | func (r *PUBReplicator) End() error { 397 | log.Printf("Streams [%s]: processed %d changes in total", r.streamID, r.localCount) 398 | atomic.AddInt64(r.totalCount, r.localCount) 399 | _ = r.reporter.SaveAndStop(context.Background()) 400 | r.topic.Flush() 401 | r.topic.Stop() 402 | return nil 403 | } 404 | 405 | func (r *PUBReplicator) Empty(ctx context.Context, ackTime gocql.UUID) error { 406 | log.Printf("Streams [%s]: saw no changes up to %s", r.streamID, ackTime.Time()) 407 | r.reporter.Update(ackTime) 408 | return nil 409 | } 410 | 411 | // Make sure that PUBReplicator supports the ChangeOrEmptyNotificationConsumer interface 412 | var _ scyllacdc.ChangeOrEmptyNotificationConsumer = (*PUBReplicator)(nil) 413 | 414 | type writeSafeList[V any] struct { 415 | list []V 416 | mutex sync.Mutex 417 | } 418 | 419 | func (w *writeSafeList[V]) Add(item V) { 420 | w.mutex.Lock() 421 | w.list = append(w.list, item) 422 | w.mutex.Unlock() 423 | } 424 | 425 | func (w *writeSafeList[V]) Items() []V { 426 | w.mutex.Lock() 427 | defer w.mutex.Unlock() 428 | return w.list 429 | } 430 | -------------------------------------------------------------------------------- /examples/replicator-gcp-pub/replicator_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | "reflect" 9 | "regexp" 10 | "testing" 11 | "time" 12 | 13 | "github.com/gocql/gocql" 14 | scyllacdc "github.com/scylladb/scylla-cdc-go" 15 | "github.com/scylladb/scylla-cdc-go/testutils" 16 | ) 17 | 18 | type schema struct { 19 | tableName string 20 | createQuery string 21 | } 22 | 23 | var udts = []string{ 24 | "CREATE TYPE udt_simple (a int, b int, c text)", 25 | } 26 | 27 | var ( 28 | schemaSimple = schema{ 29 | "tbl_simple", 30 | "CREATE TABLE tbl_simple (pk text, ck int, v1 int, v2 text, PRIMARY KEY (pk, ck))", 31 | } 32 | schemaMultipleClusteringKeys = schema{ 33 | "tbl_multiple_clustering_keys", 34 | "CREATE TABLE tbl_multiple_clustering_keys (pk text, ck1 int, ck2 int, v int, PRIMARY KEY (pk, ck1, ck2))", 35 | } 36 | schemaBlobs = schema{ 37 | "tbl_blobs", 38 | "CREATE TABLE tbl_blobs (pk text, ck int, v blob, PRIMARY KEY (pk, ck))", 39 | } 40 | schemaLists = schema{ 41 | "tbl_lists", 42 | "CREATE TABLE tbl_lists (pk text, ck int, v list, PRIMARY KEY(pk, ck))", 43 | } 44 | schemaSets = schema{ 45 | "tbl_sets", 46 | "CREATE TABLE tbl_sets (pk text, ck int, v set, PRIMARY KEY (pk, ck))", 47 | } 48 | schemaMaps = schema{ 49 | "tbl_maps", 50 | "CREATE TABLE tbl_maps (pk text, ck int, v map, PRIMARY KEY (pk, ck))", 51 | } 52 | schemaTuples = schema{ 53 | "tbl_tuples", 54 | "CREATE TABLE tbl_tuples (pk text, ck int, v tuple, PRIMARY KEY (pk, ck))", 55 | } 56 | schemaTuplesInTuples = schema{ 57 | "tbl_tuples_in_tuples", 58 | "CREATE TABLE tbl_tuples_in_tuples (pk text, ck int, v tuple, int>, PRIMARY KEY (pk, ck))", 59 | } 60 | schemaTuplesInTuplesInTuples = schema{ 61 | "tbl_tuples_in_tuples_in_tuples", 62 | "CREATE TABLE tbl_tuples_in_tuples_in_tuples (pk text, ck int, v tuple, text>, int>, PRIMARY KEY (pk, ck))", 63 | } 64 | schemaUDTs = schema{ 65 | "tbl_udts", 66 | "CREATE TABLE tbl_udts (pk text, ck int, v udt_simple, PRIMARY KEY (pk, ck))", 67 | } 68 | ) 69 | 70 | var testCases = []struct { 71 | schema schema 72 | pk string 73 | queries []string 74 | }{ 75 | // Operations test cases 76 | { 77 | schemaSimple, 78 | "simpleInserts", 79 | []string{ 80 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('simpleInserts', 1, 2, 'abc')", 81 | "INSERT INTO %s (pk, ck, v1) VALUES ('simpleInserts', 2, 3)", 82 | "INSERT INTO %s (pk, ck, v2) VALUES ('simpleInserts', 2, 'def')", 83 | }, 84 | }, 85 | { 86 | schemaSimple, 87 | "simpleUpdates", 88 | []string{ 89 | "UPDATE %s SET v1 = 1 WHERE pk = 'simpleUpdates' AND ck = 1", 90 | "UPDATE %s SET v2 = 'abc' WHERE pk = 'simpleUpdates' AND ck = 2", 91 | "UPDATE %s SET v1 = 5, v2 = 'def' WHERE pk = 'simpleUpdates' AND ck = 3", 92 | }, 93 | }, 94 | { 95 | schemaSimple, 96 | "rowDeletes", 97 | []string{ 98 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('rowDeletes', 1, 2, 'abc')", 99 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('rowDeletes', 2, 3, 'def')", 100 | "DELETE FROM %s WHERE pk = 'rowDeletes' AND ck = 1", 101 | }, 102 | }, 103 | { 104 | schemaSimple, 105 | "partitionDeletes", 106 | []string{ 107 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 1, 2, 'abc')", 108 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 2, 3, 'def')", 109 | "DELETE FROM %s WHERE pk = 'partitionDeletes'", 110 | // Insert one more row, just to check if replication works at all 111 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 4, 5, 'def')", 112 | }, 113 | }, 114 | { 115 | schemaMultipleClusteringKeys, 116 | "rangeDeletes", 117 | []string{ 118 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 1, 0)", 119 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 2, 0)", 120 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 3, 0)", 121 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 4, 0)", 122 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 1, 0)", 123 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 2, 0)", 124 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 3, 0)", 125 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 4, 0)", 126 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 1, 0)", 127 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 2, 0)", 128 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 3, 0)", 129 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 4, 0)", 130 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 1, 0)", 131 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 2, 0)", 132 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 3, 0)", 133 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 4, 0)", 134 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 > 3", 135 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 <= 1", 136 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 = 2 AND ck2 > 1 AND ck2 < 4", 137 | }, 138 | }, 139 | 140 | // Blob test cases 141 | { 142 | schemaBlobs, 143 | "blobs", 144 | []string{ 145 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 1, 0x1234)", 146 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 2, 0x)", 147 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 3, null)", 148 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 4, 0x4321)", 149 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 5, 0x00)", 150 | "UPDATE %s SET v = null WHERE pk = 'blobs' AND ck = 4", 151 | "UPDATE %s SET v = 0x WHERE pk = 'blobs' AND ck = 5", 152 | }, 153 | }, 154 | 155 | // Lists test cases 156 | { 157 | schemaLists, 158 | "listOverwrites", 159 | []string{ 160 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 1, [1, 2, 3])", 161 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 1, [4, 5, 6, 7])", 162 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 2, [6, 5, 4, 3, 2, 1])", 163 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 2, null)", 164 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 3, [1, 11, 111])", 165 | "UPDATE %s SET v = [2, 22, 222] WHERE pk = 'listOverwrites' AND ck = 3", 166 | }, 167 | }, 168 | { 169 | schemaLists, 170 | "listAppends", 171 | []string{ 172 | "INSERT INTO %s (pk, ck, v) VALUES ('listAppends', 1, [1, 2, 3])", 173 | "UPDATE %s SET v = v + [4, 5, 6] WHERE pk = 'listAppends' AND ck = 1", 174 | "UPDATE %s SET v = [-2, -1, 0] + v WHERE pk = 'listAppends' AND ck = 1", 175 | }, 176 | }, 177 | { 178 | schemaLists, 179 | "listRemoves", 180 | []string{ 181 | "INSERT INTO %s (pk, ck, v) VALUES ('listRemoves', 1, [1, 2, 3])", 182 | "UPDATE %s SET v = v + [4, 5, 6] WHERE pk = 'listRemoves' AND ck = 1", 183 | "UPDATE %s SET v = v - [1, 2, 3] WHERE pk = 'listRemoves' AND ck = 1", 184 | }, 185 | }, 186 | 187 | // Set test cases 188 | { 189 | schemaSets, 190 | "setOverwrites", 191 | []string{ 192 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 1, {1, 2, 3, 4})", 193 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 1, {4, 5, 6, 7})", 194 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 2, {8, 9, 10, 11})", 195 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 2, null)", 196 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 3, {12, 13, 14, 15})", 197 | "UPDATE %s SET v = null WHERE pk = 'setOverwrites' AND ck = 3", 198 | }, 199 | }, 200 | { 201 | schemaSets, 202 | "setAppends", 203 | []string{ 204 | "INSERT INTO %s (pk, ck, v) VALUES ('setAppends', 1, {1, 2, 3, 4})", 205 | "UPDATE %s SET v = v + {5, 6} WHERE pk = 'setAppends' AND ck = 1", 206 | "UPDATE %s SET v = v + {5, 6} WHERE pk = 'setAppends' AND ck = 2", 207 | }, 208 | }, 209 | { 210 | schemaSets, 211 | "setRemovals", 212 | []string{ 213 | "INSERT INTO %s (pk, ck, v) VALUES ('setRemovals', 1, {1, 2, 3, 4})", 214 | "UPDATE %s SET v = v - {1, 3} WHERE pk = 'setRemovals' AND ck = 1", 215 | "UPDATE %s SET v = v - {1138} WHERE pk = 'setRemovals' AND ck = 2", 216 | }, 217 | }, 218 | 219 | // Map test cases 220 | { 221 | schemaMaps, 222 | "mapOverwrites", 223 | []string{ 224 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 1, {1: 2, 3: 4})", 225 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 1, {5: 6, 7: 8})", 226 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 2, {9: 10, 11: 12})", 227 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 2, null)", 228 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 3, {13: 14, 15: 16})", 229 | "UPDATE %s SET v = null WHERE pk = 'mapOverwrites' AND ck = 3", 230 | }, 231 | }, 232 | { 233 | schemaMaps, 234 | "mapSets", 235 | []string{ 236 | "INSERT INTO %s (pk, ck, v) VALUES ('mapSets', 1, {1: 2, 3: 4, 5: 6})", 237 | "UPDATE %s SET v[1] = 42 WHERE pk = 'mapSets' AND ck = 1", 238 | "UPDATE %s SET v[3] = null WHERE pk = 'mapSets' AND ck = 1", 239 | "UPDATE %s SET v[3] = 123 WHERE pk = 'mapSets' AND ck = 1", 240 | "UPDATE %s SET v[5] = 321 WHERE pk = 'mapSets' AND ck = 2", 241 | }, 242 | }, 243 | { 244 | schemaMaps, 245 | "mapAppends", 246 | []string{ 247 | "INSERT INTO %s (pk, ck, v) VALUES ('mapAppends', 1, {1: 2, 3: 4})", 248 | "UPDATE %s SET v = v + {5: 6} WHERE pk = 'mapAppends' AND ck = 1", 249 | "UPDATE %s SET v = v + {5: 6} WHERE pk = 'mapAppends' AND ck = 2", 250 | }, 251 | }, 252 | { 253 | schemaMaps, 254 | "mapRemovals", 255 | []string{ 256 | "INSERT INTO %s (pk, ck, v) VALUES ('mapRemovals', 1, {1: 2, 3: 4})", 257 | "UPDATE %s SET v = v - {1} WHERE pk = 'mapRemovals' AND ck = 1", 258 | "UPDATE %s SET v = v - {1138} WHERE pk = 'mapRemovals' AND ck = 2", 259 | }, 260 | }, 261 | 262 | // Tuple test cases 263 | { 264 | schemaTuples, 265 | "tupleInserts", 266 | []string{ 267 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 1, (7, 'abc'))", 268 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 2, (9, 'def'))", 269 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 2, null)", 270 | }, 271 | }, 272 | { 273 | schemaTuples, 274 | "tupleUpdates", 275 | []string{ 276 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 1, (7, 'abc'))", 277 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 2, (9, 'def'))", 278 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 3, (11, 'ghi'))", 279 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 4, (13, 'jkl'))", 280 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 5, (15, 'mno'))", 281 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 6, (17, 'pqr'))", 282 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 7, (19, 'stu'))", 283 | "UPDATE %s SET v = (111, 'zyx') WHERE pk = 'tupleUpdates' AND ck = 1", 284 | "UPDATE %s SET v = null WHERE pk = 'tupleUpdates' AND ck = 2", 285 | "INSERT INTO %s (pk, ck) VALUES ('tupleUpdates', 3)", 286 | "UPDATE %s SET v = (null, null) WHERE pk = 'tupleUpdates' AND ck = 4", 287 | "UPDATE %s SET v = (null, 'asdf') WHERE pk = 'tupleUpdates' AND ck = 5", 288 | "UPDATE %s SET v = (123, null) WHERE pk = 'tupleUpdates' AND ck = 6", 289 | "UPDATE %s SET v = (null, '') WHERE pk = 'tupleUpdates' AND ck = 7", 290 | }, 291 | }, 292 | { 293 | schemaTuplesInTuples, 294 | "tuplesInTuples", 295 | []string{ 296 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 1, ((1, 'abc'), 7))", 297 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 2, ((3, 'def'), 9))", 298 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 3, ((3, 'ghi'), 9))", 299 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 4, ((3, 'jkl'), 9))", 300 | "UPDATE %s SET v = ((100, 'zyx'), 111) WHERE pk = 'tuplesInTuples' AND ck = 1", 301 | "UPDATE %s SET v = null WHERE pk = 'tuplesInTuples' AND ck = 2", 302 | "UPDATE %s SET v = ((200, null), 999) WHERE pk = 'tuplesInTuples' AND ck = 3", 303 | "UPDATE %s SET v = ((300, ''), 333) WHERE pk = 'tuplesInTuples' AND ck = 4", 304 | }, 305 | }, 306 | { 307 | schemaTuplesInTuplesInTuples, 308 | "tuplesInTuplesInTuples", 309 | []string{ 310 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuplesInTuples', 1, (((1, 9), 'abc'), 7))", 311 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuplesInTuples', 2, (((3, 8), 'def'), 9))", 312 | "UPDATE %s SET v = (((100, 200), 'zyx'), 111) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 1", 313 | "UPDATE %s SET v = null WHERE pk = 'tuplesInTuplesInTuples' AND ck = 2", 314 | "UPDATE %s SET v = (null, 123) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 3", 315 | "UPDATE %s SET v = ((null, 'xyz'), 321) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 4", 316 | }, 317 | }, 318 | 319 | // UDT test cases 320 | { 321 | schemaUDTs, 322 | "udt", 323 | []string{ 324 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 1, (2, 3, 'abc'))", 325 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 2, {a: 6, c: 'zxcv'})", 326 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 3, (9, 4, 'def'))", 327 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 4, (123, 321, 'ghi'))", 328 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 5, (333, 222, 'jkl'))", 329 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 6, (432, 678, 'mno'))", 330 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 7, (765, 345, 'pqr'))", 331 | "UPDATE %s SET v.b = 41414 WHERE pk = 'udt' AND ck = 2", 332 | "UPDATE %s SET v = null WHERE pk = 'udt' AND ck = 3", 333 | "UPDATE %s SET v = {b: 123456, c: 'tyu'} WHERE pk = 'udt' AND ck = 4", 334 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 5, (999, 888, 'zxc'))", 335 | "UPDATE %s SET v.c = null WHERE pk = 'udt' AND ck = 6", 336 | "UPDATE %s SET v = {a: 923, b: 123456, c: ''} WHERE pk = 'udt' AND ck = 7", 337 | }, 338 | }, 339 | } 340 | 341 | func TestReplicator(t *testing.T) { 342 | filter := os.Getenv("REPLICATOR_TEST_FILTER") 343 | if filter == "" { 344 | filter = ".*" 345 | } 346 | re := regexp.MustCompile(filter) 347 | 348 | topic := os.Getenv("TEST_TOPIC") 349 | 350 | if topic == "" { 351 | t.Fatal("TEST_TOPIC can't be empty") 352 | } 353 | 354 | // Collect all schemas 355 | schemas := make(map[string]string) 356 | for _, tc := range testCases { 357 | schemas[tc.schema.tableName] = tc.schema.createQuery 358 | } 359 | 360 | sourceAddress := testutils.GetSourceClusterContactPoint() 361 | destinationAddress := testutils.GetDestinationClusterContactPoint() 362 | keyspaceName := testutils.GetUniqueName("test_keyspace") 363 | 364 | sourceSession := createSessionAndSetupSchema(t, sourceAddress, keyspaceName, true, schemas) 365 | defer sourceSession.Close() 366 | 367 | destinationSession := createSessionAndSetupSchema(t, destinationAddress, keyspaceName, false, schemas) 368 | defer destinationSession.Close() 369 | 370 | // Execute all of the queries 371 | for _, tc := range testCases { 372 | if !re.MatchString(tc.pk) { 373 | continue 374 | } 375 | for _, qStr := range tc.queries { 376 | execQuery(t, sourceSession, fmt.Sprintf(qStr, tc.schema.tableName)) 377 | } 378 | } 379 | 380 | t.Log("running replicators") 381 | 382 | adv := scyllacdc.AdvancedReaderConfig{ 383 | ChangeAgeLimit: time.Minute, 384 | PostNonEmptyQueryDelay: 3 * time.Second, 385 | PostEmptyQueryDelay: 3 * time.Second, 386 | PostFailedQueryDelay: 3 * time.Second, 387 | QueryTimeWindowSize: 5 * time.Minute, 388 | ConfidenceWindowSize: time.Millisecond, 389 | } 390 | 391 | schemaNames := make([]string, 0) 392 | for tbl := range schemas { 393 | schemaNames = append(schemaNames, fmt.Sprintf("%s.%s", keyspaceName, tbl)) 394 | } 395 | 396 | logger := log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile) 397 | replicator, err := newReplicator( 398 | context.Background(), 399 | sourceAddress, 400 | destinationAddress, 401 | schemaNames, 402 | "", 403 | &adv, gocql.Quorum, 404 | gocql.Quorum, 405 | "", 406 | logger, 407 | ) 408 | 409 | if err != nil { 410 | t.Fatal(err) 411 | } 412 | 413 | ctx := context.Background() 414 | 415 | errC := make(chan error) 416 | go func() { errC <- replicator.Run(ctx) }() 417 | 418 | time.Sleep(time.Second) 419 | 420 | replicator.StopAt(time.Now().Add(time.Second)) 421 | if err := <-errC; err != nil { 422 | t.Fatal(err) 423 | } 424 | 425 | t.Log("validating results") 426 | 427 | // Compare 428 | sourceSet := fetchFullSet(t, sourceSession, schemas) 429 | destinationSet := fetchFullSet(t, destinationSession, schemas) 430 | 431 | failedCount := 0 432 | 433 | for _, tc := range testCases { 434 | sourceData := sourceSet[tc.pk] 435 | destinationData := destinationSet[tc.pk] 436 | 437 | if len(sourceData) != len(destinationData) { 438 | t.Logf( 439 | "%s: source len %d, destination len %d\n", 440 | tc.pk, 441 | len(sourceData), 442 | len(destinationData), 443 | ) 444 | t.Log(" source:") 445 | for _, row := range sourceData { 446 | t.Logf(" %v", row) 447 | } 448 | t.Log(" dest:") 449 | for _, row := range destinationData { 450 | t.Logf(" %v", row) 451 | } 452 | t.Fail() 453 | failedCount++ 454 | continue 455 | } 456 | 457 | failed := false 458 | for i := 0; i < len(sourceData); i++ { 459 | if !reflect.DeepEqual(sourceData[i], destinationData[i]) { 460 | t.Logf("%s: mismatch", tc.pk) 461 | t.Logf(" source: %v", sourceData[i]) 462 | t.Logf(" dest: %v", destinationData[i]) 463 | failed = true 464 | } 465 | } 466 | 467 | if failed { 468 | t.Fail() 469 | failedCount++ 470 | } else { 471 | t.Logf("%s: OK", tc.pk) 472 | } 473 | } 474 | 475 | if failedCount > 0 { 476 | t.Logf("failed %d/%d test cases", failedCount, len(testCases)) 477 | } 478 | } 479 | 480 | func createSessionAndSetupSchema(t *testing.T, addr string, keyspaceName string, withCdc bool, schemas map[string]string) *gocql.Session { 481 | testutils.CreateKeyspace(t, addr, keyspaceName) 482 | 483 | cfg := gocql.NewCluster(addr) 484 | cfg.Keyspace = keyspaceName 485 | session, err := cfg.CreateSession() 486 | if err != nil { 487 | t.Fatal(err) 488 | } 489 | 490 | for _, udt := range udts { 491 | execQuery(t, session, udt) 492 | } 493 | 494 | for _, tbl := range schemas { 495 | tblQuery := tbl 496 | if withCdc { 497 | tblQuery += " WITH cdc = {'enabled': true, 'preimage': true, 'postimage': true}" 498 | } 499 | execQuery(t, session, tblQuery) 500 | } 501 | 502 | err = session.AwaitSchemaAgreement(context.Background()) 503 | if err != nil { 504 | t.Fatal(err) 505 | } 506 | 507 | return session 508 | } 509 | 510 | func execQuery(t *testing.T, session *gocql.Session, query string) { 511 | t.Logf("executing query %s", query) 512 | err := session.Query(query).Exec() 513 | if err != nil { 514 | t.Fatal(err) 515 | } 516 | } 517 | 518 | func fetchFullSet(t *testing.T, session *gocql.Session, schemas map[string]string) map[string][]map[string]interface{} { 519 | groups := make(map[string][]map[string]interface{}) 520 | 521 | for tbl := range schemas { 522 | data, err := session.Query("SELECT * FROM " + tbl).Iter().SliceMap() 523 | if err != nil { 524 | t.Fatal(err) 525 | } 526 | 527 | for _, row := range data { 528 | pk := row["pk"].(string) 529 | groups[pk] = append(groups[pk], row) 530 | } 531 | } 532 | 533 | return groups 534 | } 535 | -------------------------------------------------------------------------------- /examples/replicator-gcp-pub/utils.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "strings" 4 | 5 | // Re-implementation of the type parsing logic from the driver. 6 | // Unlike the driver, this implementation differentiates frozen types 7 | // from non-frozen ones. 8 | 9 | type Type int 10 | 11 | const ( 12 | TypeCustom Type = 0x0000 13 | TypeAscii Type = 0x0001 14 | TypeBigInt Type = 0x0002 15 | TypeBlob Type = 0x0003 16 | TypeBoolean Type = 0x0004 17 | TypeCounter Type = 0x0005 18 | TypeDecimal Type = 0x0006 19 | TypeDouble Type = 0x0007 20 | TypeFloat Type = 0x0008 21 | TypeInt Type = 0x0009 22 | TypeText Type = 0x000A 23 | TypeTimestamp Type = 0x000B 24 | TypeUUID Type = 0x000C 25 | TypeVarchar Type = 0x000D 26 | TypeVarint Type = 0x000E 27 | TypeTimeUUID Type = 0x000F 28 | TypeInet Type = 0x0010 29 | TypeDate Type = 0x0011 30 | TypeTime Type = 0x0012 31 | TypeSmallInt Type = 0x0013 32 | TypeTinyInt Type = 0x0014 33 | TypeDuration Type = 0x0015 34 | TypeList Type = 0x0020 35 | TypeMap Type = 0x0021 36 | TypeSet Type = 0x0022 37 | TypeUDT Type = 0x0030 38 | TypeTuple Type = 0x0031 39 | ) 40 | 41 | func (t Type) IsCollection() bool { 42 | switch t { 43 | case TypeList, TypeMap, TypeSet, TypeUDT: 44 | return true 45 | default: 46 | return false 47 | } 48 | } 49 | 50 | type TypeInfo interface { 51 | Type() Type 52 | IsFrozen() bool 53 | Unfrozen() TypeInfo 54 | } 55 | 56 | type FrozenType struct { 57 | Inner TypeInfo 58 | } 59 | 60 | func (ft *FrozenType) Type() Type { 61 | return ft.Inner.Type() 62 | } 63 | 64 | func (ft *FrozenType) IsFrozen() bool { 65 | return true 66 | } 67 | 68 | func (ft *FrozenType) Unfrozen() TypeInfo { 69 | return ft.Inner 70 | } 71 | 72 | type MapType struct { 73 | Key TypeInfo 74 | Value TypeInfo 75 | } 76 | 77 | func (mt *MapType) Type() Type { 78 | return TypeMap 79 | } 80 | 81 | func (mt *MapType) IsFrozen() bool { 82 | return false 83 | } 84 | 85 | func (mt *MapType) Unfrozen() TypeInfo { 86 | return mt 87 | } 88 | 89 | type ListType struct { 90 | Element TypeInfo 91 | } 92 | 93 | func (lt *ListType) Type() Type { 94 | return TypeList 95 | } 96 | 97 | func (lt *ListType) IsFrozen() bool { 98 | return false 99 | } 100 | 101 | func (lt *ListType) Unfrozen() TypeInfo { 102 | return lt 103 | } 104 | 105 | type SetType struct { 106 | Element TypeInfo 107 | } 108 | 109 | func (st *SetType) Type() Type { 110 | return TypeSet 111 | } 112 | 113 | func (st *SetType) IsFrozen() bool { 114 | return false 115 | } 116 | 117 | func (st *SetType) Unfrozen() TypeInfo { 118 | return st 119 | } 120 | 121 | type TupleType struct { 122 | Elements []TypeInfo 123 | } 124 | 125 | func (tt *TupleType) Type() Type { 126 | return TypeTuple 127 | } 128 | 129 | func (tt *TupleType) IsFrozen() bool { 130 | return false 131 | } 132 | 133 | func (tt *TupleType) Unfrozen() TypeInfo { 134 | return tt 135 | } 136 | 137 | type NativeType struct { 138 | RealType Type 139 | } 140 | 141 | func (nt *NativeType) Type() Type { 142 | return nt.RealType 143 | } 144 | 145 | func (nt *NativeType) IsFrozen() bool { 146 | return false 147 | } 148 | 149 | func (nt *NativeType) Unfrozen() TypeInfo { 150 | return nt 151 | } 152 | 153 | type UDTType struct { 154 | Name string 155 | } 156 | 157 | func (ut *UDTType) Type() Type { 158 | return TypeUDT 159 | } 160 | 161 | func (ut *UDTType) IsFrozen() bool { 162 | return false 163 | } 164 | 165 | func (ut *UDTType) Unfrozen() TypeInfo { 166 | return ut 167 | } 168 | 169 | func parseType(str string) TypeInfo { 170 | if strings.HasPrefix(str, "frozen<") { 171 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "frozen<"), ">") 172 | return &FrozenType{parseType(innerStr)} 173 | } 174 | if strings.HasPrefix(str, "list<") { 175 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "list<"), ">") 176 | return &ListType{parseType(innerStr)} 177 | } 178 | if strings.HasPrefix(str, "set<") { 179 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "set<"), ">") 180 | return &SetType{parseType(innerStr)} 181 | } 182 | if strings.HasPrefix(str, "map<") { 183 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "map<"), ">") 184 | list := parseTypeList(innerStr) 185 | return &MapType{Key: list[0], Value: list[1]} 186 | } 187 | if strings.HasPrefix(str, "tuple<") { 188 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "tuple<"), ">") 189 | list := parseTypeList(innerStr) 190 | return &TupleType{Elements: list} 191 | } 192 | typ := parseNativeType(str) 193 | if typ == TypeUDT { 194 | return &UDTType{Name: str} 195 | } 196 | return &NativeType{RealType: typ} 197 | } 198 | 199 | func parseTypeList(str string) []TypeInfo { 200 | var ret []TypeInfo 201 | var level int 202 | var builder strings.Builder 203 | for _, r := range str { 204 | if r == ',' && level == 0 { 205 | s := strings.TrimSpace(builder.String()) 206 | ret = append(ret, parseType(s)) 207 | builder.Reset() 208 | continue 209 | } 210 | 211 | if r == '<' { 212 | level++ 213 | } else if r == '>' { 214 | level-- 215 | } 216 | builder.WriteRune(r) 217 | } 218 | if builder.Len() != 0 { 219 | s := strings.TrimSpace(builder.String()) 220 | ret = append(ret, parseType(s)) 221 | } 222 | return ret 223 | } 224 | 225 | func parseNativeType(str string) Type { 226 | switch str { 227 | case "ascii": 228 | return TypeAscii 229 | case "bigint": 230 | return TypeBigInt 231 | case "blob": 232 | return TypeBlob 233 | case "boolean": 234 | return TypeBoolean 235 | case "counter": 236 | return TypeCounter 237 | case "date": 238 | return TypeDate 239 | case "decimal": 240 | return TypeDecimal 241 | case "double": 242 | return TypeDouble 243 | case "duration": 244 | return TypeDuration 245 | case "float": 246 | return TypeFloat 247 | case "int": 248 | return TypeInt 249 | case "smallint": 250 | return TypeSmallInt 251 | case "tinyint": 252 | return TypeTinyInt 253 | case "time": 254 | return TypeTime 255 | case "timestamp": 256 | return TypeTimestamp 257 | case "uuid": 258 | return TypeUUID 259 | case "varchar": 260 | return TypeVarchar 261 | case "text": 262 | return TypeText 263 | case "varint": 264 | return TypeVarint 265 | case "timeuuid": 266 | return TypeTimeUUID 267 | case "inet": 268 | return TypeInet 269 | default: 270 | // Assume it's a UDT 271 | return TypeUDT 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /examples/replicator/.gitignore: -------------------------------------------------------------------------------- 1 | replicator 2 | -------------------------------------------------------------------------------- /examples/replicator/replicator_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | "reflect" 9 | "regexp" 10 | "testing" 11 | "time" 12 | 13 | "github.com/gocql/gocql" 14 | 15 | "github.com/scylladb/scylla-cdc-go/testutils" 16 | 17 | scyllacdc "github.com/scylladb/scylla-cdc-go" 18 | ) 19 | 20 | type schema struct { 21 | tableName string 22 | createQuery string 23 | } 24 | 25 | var udts = []string{ 26 | "CREATE TYPE udt_simple (a int, b int, c text)", 27 | } 28 | 29 | var ( 30 | schemaSimple = schema{ 31 | "tbl_simple", 32 | "CREATE TABLE tbl_simple (pk text, ck int, v1 int, v2 text, PRIMARY KEY (pk, ck))", 33 | } 34 | schemaMultipleClusteringKeys = schema{ 35 | "tbl_multiple_clustering_keys", 36 | "CREATE TABLE tbl_multiple_clustering_keys (pk text, ck1 int, ck2 int, v int, PRIMARY KEY (pk, ck1, ck2))", 37 | } 38 | schemaBlobs = schema{ 39 | "tbl_blobs", 40 | "CREATE TABLE tbl_blobs (pk text, ck int, v blob, PRIMARY KEY (pk, ck))", 41 | } 42 | schemaLists = schema{ 43 | "tbl_lists", 44 | "CREATE TABLE tbl_lists (pk text, ck int, v list, PRIMARY KEY(pk, ck))", 45 | } 46 | schemaSets = schema{ 47 | "tbl_sets", 48 | "CREATE TABLE tbl_sets (pk text, ck int, v set, PRIMARY KEY (pk, ck))", 49 | } 50 | schemaMaps = schema{ 51 | "tbl_maps", 52 | "CREATE TABLE tbl_maps (pk text, ck int, v map, PRIMARY KEY (pk, ck))", 53 | } 54 | schemaTuples = schema{ 55 | "tbl_tuples", 56 | "CREATE TABLE tbl_tuples (pk text, ck int, v tuple, PRIMARY KEY (pk, ck))", 57 | } 58 | schemaTuplesInTuples = schema{ 59 | "tbl_tuples_in_tuples", 60 | "CREATE TABLE tbl_tuples_in_tuples (pk text, ck int, v tuple, int>, PRIMARY KEY (pk, ck))", 61 | } 62 | schemaTuplesInTuplesInTuples = schema{ 63 | "tbl_tuples_in_tuples_in_tuples", 64 | "CREATE TABLE tbl_tuples_in_tuples_in_tuples (pk text, ck int, v tuple, text>, int>, PRIMARY KEY (pk, ck))", 65 | } 66 | schemaUDTs = schema{ 67 | "tbl_udts", 68 | "CREATE TABLE tbl_udts (pk text, ck int, v udt_simple, PRIMARY KEY (pk, ck))", 69 | } 70 | ) 71 | 72 | var testCases = []struct { 73 | schema schema 74 | pk string 75 | queries []string 76 | }{ 77 | // Operations test cases 78 | { 79 | schemaSimple, 80 | "simpleInserts", 81 | []string{ 82 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('simpleInserts', 1, 2, 'abc')", 83 | "INSERT INTO %s (pk, ck, v1) VALUES ('simpleInserts', 2, 3)", 84 | "INSERT INTO %s (pk, ck, v2) VALUES ('simpleInserts', 2, 'def')", 85 | }, 86 | }, 87 | { 88 | schemaSimple, 89 | "simpleUpdates", 90 | []string{ 91 | "UPDATE %s SET v1 = 1 WHERE pk = 'simpleUpdates' AND ck = 1", 92 | "UPDATE %s SET v2 = 'abc' WHERE pk = 'simpleUpdates' AND ck = 2", 93 | "UPDATE %s SET v1 = 5, v2 = 'def' WHERE pk = 'simpleUpdates' AND ck = 3", 94 | }, 95 | }, 96 | { 97 | schemaSimple, 98 | "rowDeletes", 99 | []string{ 100 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('rowDeletes', 1, 2, 'abc')", 101 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('rowDeletes', 2, 3, 'def')", 102 | "DELETE FROM %s WHERE pk = 'rowDeletes' AND ck = 1", 103 | }, 104 | }, 105 | { 106 | schemaSimple, 107 | "partitionDeletes", 108 | []string{ 109 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 1, 2, 'abc')", 110 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 2, 3, 'def')", 111 | "DELETE FROM %s WHERE pk = 'partitionDeletes'", 112 | // Insert one more row, just to check if replication works at all 113 | "INSERT INTO %s (pk, ck, v1, v2) VALUES ('partitionDeletes', 4, 5, 'def')", 114 | }, 115 | }, 116 | { 117 | schemaMultipleClusteringKeys, 118 | "rangeDeletes", 119 | []string{ 120 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 1, 0)", 121 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 2, 0)", 122 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 3, 0)", 123 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 1, 4, 0)", 124 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 1, 0)", 125 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 2, 0)", 126 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 3, 0)", 127 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 2, 4, 0)", 128 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 1, 0)", 129 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 2, 0)", 130 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 3, 0)", 131 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 3, 4, 0)", 132 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 1, 0)", 133 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 2, 0)", 134 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 3, 0)", 135 | "INSERT INTO %s (pk, ck1, ck2, v) VALUES ('rangeDeletes', 4, 4, 0)", 136 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 > 3", 137 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 <= 1", 138 | "DELETE FROM %s WHERE pk = 'rangeDeletes' AND ck1 = 2 AND ck2 > 1 AND ck2 < 4", 139 | }, 140 | }, 141 | 142 | // Blob test cases 143 | { 144 | schemaBlobs, 145 | "blobs", 146 | []string{ 147 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 1, 0x1234)", 148 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 2, 0x)", 149 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 3, null)", 150 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 4, 0x4321)", 151 | "INSERT INTO %s (pk, ck, v) VALUES ('blobs', 5, 0x00)", 152 | "UPDATE %s SET v = null WHERE pk = 'blobs' AND ck = 4", 153 | "UPDATE %s SET v = 0x WHERE pk = 'blobs' AND ck = 5", 154 | }, 155 | }, 156 | 157 | // Lists test cases 158 | { 159 | schemaLists, 160 | "listOverwrites", 161 | []string{ 162 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 1, [1, 2, 3])", 163 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 1, [4, 5, 6, 7])", 164 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 2, [6, 5, 4, 3, 2, 1])", 165 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 2, null)", 166 | "INSERT INTO %s (pk, ck, v) VALUES ('listOverwrites', 3, [1, 11, 111])", 167 | "UPDATE %s SET v = [2, 22, 222] WHERE pk = 'listOverwrites' AND ck = 3", 168 | }, 169 | }, 170 | { 171 | schemaLists, 172 | "listAppends", 173 | []string{ 174 | "INSERT INTO %s (pk, ck, v) VALUES ('listAppends', 1, [1, 2, 3])", 175 | "UPDATE %s SET v = v + [4, 5, 6] WHERE pk = 'listAppends' AND ck = 1", 176 | "UPDATE %s SET v = [-2, -1, 0] + v WHERE pk = 'listAppends' AND ck = 1", 177 | }, 178 | }, 179 | { 180 | schemaLists, 181 | "listRemoves", 182 | []string{ 183 | "INSERT INTO %s (pk, ck, v) VALUES ('listRemoves', 1, [1, 2, 3])", 184 | "UPDATE %s SET v = v + [4, 5, 6] WHERE pk = 'listRemoves' AND ck = 1", 185 | "UPDATE %s SET v = v - [1, 2, 3] WHERE pk = 'listRemoves' AND ck = 1", 186 | }, 187 | }, 188 | 189 | // Set test cases 190 | { 191 | schemaSets, 192 | "setOverwrites", 193 | []string{ 194 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 1, {1, 2, 3, 4})", 195 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 1, {4, 5, 6, 7})", 196 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 2, {8, 9, 10, 11})", 197 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 2, null)", 198 | "INSERT INTO %s (pk, ck, v) VALUES ('setOverwrites', 3, {12, 13, 14, 15})", 199 | "UPDATE %s SET v = null WHERE pk = 'setOverwrites' AND ck = 3", 200 | }, 201 | }, 202 | { 203 | schemaSets, 204 | "setAppends", 205 | []string{ 206 | "INSERT INTO %s (pk, ck, v) VALUES ('setAppends', 1, {1, 2, 3, 4})", 207 | "UPDATE %s SET v = v + {5, 6} WHERE pk = 'setAppends' AND ck = 1", 208 | "UPDATE %s SET v = v + {5, 6} WHERE pk = 'setAppends' AND ck = 2", 209 | }, 210 | }, 211 | { 212 | schemaSets, 213 | "setRemovals", 214 | []string{ 215 | "INSERT INTO %s (pk, ck, v) VALUES ('setRemovals', 1, {1, 2, 3, 4})", 216 | "UPDATE %s SET v = v - {1, 3} WHERE pk = 'setRemovals' AND ck = 1", 217 | "UPDATE %s SET v = v - {1138} WHERE pk = 'setRemovals' AND ck = 2", 218 | }, 219 | }, 220 | 221 | // Map test cases 222 | { 223 | schemaMaps, 224 | "mapOverwrites", 225 | []string{ 226 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 1, {1: 2, 3: 4})", 227 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 1, {5: 6, 7: 8})", 228 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 2, {9: 10, 11: 12})", 229 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 2, null)", 230 | "INSERT INTO %s (pk, ck, v) VALUES ('mapOverwrites', 3, {13: 14, 15: 16})", 231 | "UPDATE %s SET v = null WHERE pk = 'mapOverwrites' AND ck = 3", 232 | }, 233 | }, 234 | { 235 | schemaMaps, 236 | "mapSets", 237 | []string{ 238 | "INSERT INTO %s (pk, ck, v) VALUES ('mapSets', 1, {1: 2, 3: 4, 5: 6})", 239 | "UPDATE %s SET v[1] = 42 WHERE pk = 'mapSets' AND ck = 1", 240 | "UPDATE %s SET v[3] = null WHERE pk = 'mapSets' AND ck = 1", 241 | "UPDATE %s SET v[3] = 123 WHERE pk = 'mapSets' AND ck = 1", 242 | "UPDATE %s SET v[5] = 321 WHERE pk = 'mapSets' AND ck = 2", 243 | }, 244 | }, 245 | { 246 | schemaMaps, 247 | "mapAppends", 248 | []string{ 249 | "INSERT INTO %s (pk, ck, v) VALUES ('mapAppends', 1, {1: 2, 3: 4})", 250 | "UPDATE %s SET v = v + {5: 6} WHERE pk = 'mapAppends' AND ck = 1", 251 | "UPDATE %s SET v = v + {5: 6} WHERE pk = 'mapAppends' AND ck = 2", 252 | }, 253 | }, 254 | { 255 | schemaMaps, 256 | "mapRemovals", 257 | []string{ 258 | "INSERT INTO %s (pk, ck, v) VALUES ('mapRemovals', 1, {1: 2, 3: 4})", 259 | "UPDATE %s SET v = v - {1} WHERE pk = 'mapRemovals' AND ck = 1", 260 | "UPDATE %s SET v = v - {1138} WHERE pk = 'mapRemovals' AND ck = 2", 261 | }, 262 | }, 263 | 264 | // Tuple test cases 265 | { 266 | schemaTuples, 267 | "tupleInserts", 268 | []string{ 269 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 1, (7, 'abc'))", 270 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 2, (9, 'def'))", 271 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleInserts', 2, null)", 272 | }, 273 | }, 274 | { 275 | schemaTuples, 276 | "tupleUpdates", 277 | []string{ 278 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 1, (7, 'abc'))", 279 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 2, (9, 'def'))", 280 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 3, (11, 'ghi'))", 281 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 4, (13, 'jkl'))", 282 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 5, (15, 'mno'))", 283 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 6, (17, 'pqr'))", 284 | "INSERT INTO %s (pk, ck, v) VALUES ('tupleUpdates', 7, (19, 'stu'))", 285 | "UPDATE %s SET v = (111, 'zyx') WHERE pk = 'tupleUpdates' AND ck = 1", 286 | "UPDATE %s SET v = null WHERE pk = 'tupleUpdates' AND ck = 2", 287 | "INSERT INTO %s (pk, ck) VALUES ('tupleUpdates', 3)", 288 | "UPDATE %s SET v = (null, null) WHERE pk = 'tupleUpdates' AND ck = 4", 289 | "UPDATE %s SET v = (null, 'asdf') WHERE pk = 'tupleUpdates' AND ck = 5", 290 | "UPDATE %s SET v = (123, null) WHERE pk = 'tupleUpdates' AND ck = 6", 291 | "UPDATE %s SET v = (null, '') WHERE pk = 'tupleUpdates' AND ck = 7", 292 | }, 293 | }, 294 | { 295 | schemaTuplesInTuples, 296 | "tuplesInTuples", 297 | []string{ 298 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 1, ((1, 'abc'), 7))", 299 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 2, ((3, 'def'), 9))", 300 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 3, ((3, 'ghi'), 9))", 301 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuples', 4, ((3, 'jkl'), 9))", 302 | "UPDATE %s SET v = ((100, 'zyx'), 111) WHERE pk = 'tuplesInTuples' AND ck = 1", 303 | "UPDATE %s SET v = null WHERE pk = 'tuplesInTuples' AND ck = 2", 304 | "UPDATE %s SET v = ((200, null), 999) WHERE pk = 'tuplesInTuples' AND ck = 3", 305 | "UPDATE %s SET v = ((300, ''), 333) WHERE pk = 'tuplesInTuples' AND ck = 4", 306 | }, 307 | }, 308 | { 309 | schemaTuplesInTuplesInTuples, 310 | "tuplesInTuplesInTuples", 311 | []string{ 312 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuplesInTuples', 1, (((1, 9), 'abc'), 7))", 313 | "INSERT INTO %s (pk, ck, v) VALUES ('tuplesInTuplesInTuples', 2, (((3, 8), 'def'), 9))", 314 | "UPDATE %s SET v = (((100, 200), 'zyx'), 111) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 1", 315 | "UPDATE %s SET v = null WHERE pk = 'tuplesInTuplesInTuples' AND ck = 2", 316 | "UPDATE %s SET v = (null, 123) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 3", 317 | "UPDATE %s SET v = ((null, 'xyz'), 321) WHERE pk = 'tuplesInTuplesInTuples' AND ck = 4", 318 | }, 319 | }, 320 | 321 | // UDT test cases 322 | { 323 | schemaUDTs, 324 | "udt", 325 | []string{ 326 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 1, (2, 3, 'abc'))", 327 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 2, {a: 6, c: 'zxcv'})", 328 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 3, (9, 4, 'def'))", 329 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 4, (123, 321, 'ghi'))", 330 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 5, (333, 222, 'jkl'))", 331 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 6, (432, 678, 'mno'))", 332 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 7, (765, 345, 'pqr'))", 333 | "UPDATE %s SET v.b = 41414 WHERE pk = 'udt' AND ck = 2", 334 | "UPDATE %s SET v = null WHERE pk = 'udt' AND ck = 3", 335 | "UPDATE %s SET v = {b: 123456, c: 'tyu'} WHERE pk = 'udt' AND ck = 4", 336 | "INSERT INTO %s (pk, ck, v) VALUES ('udt', 5, (999, 888, 'zxc'))", 337 | "UPDATE %s SET v.c = null WHERE pk = 'udt' AND ck = 6", 338 | "UPDATE %s SET v = {a: 923, b: 123456, c: ''} WHERE pk = 'udt' AND ck = 7", 339 | }, 340 | }, 341 | } 342 | 343 | func TestReplicator(t *testing.T) { 344 | filter := os.Getenv("REPLICATOR_TEST_FILTER") 345 | if filter == "" { 346 | filter = ".*" 347 | } 348 | re := regexp.MustCompile(filter) 349 | 350 | // Collect all schemas 351 | schemas := make(map[string]string) 352 | for _, tc := range testCases { 353 | schemas[tc.schema.tableName] = tc.schema.createQuery 354 | } 355 | 356 | sourceAddress := testutils.GetSourceClusterContactPoint() 357 | destinationAddress := testutils.GetDestinationClusterContactPoint() 358 | keyspaceName := testutils.GetUniqueName("test_keyspace") 359 | 360 | sourceSession := createSessionAndSetupSchema(t, sourceAddress, keyspaceName, true, schemas) 361 | defer sourceSession.Close() 362 | 363 | destinationSession := createSessionAndSetupSchema(t, destinationAddress, keyspaceName, false, schemas) 364 | defer destinationSession.Close() 365 | 366 | // Execute all of the queries 367 | for _, tc := range testCases { 368 | if !re.MatchString(tc.pk) { 369 | continue 370 | } 371 | for _, qStr := range tc.queries { 372 | execQuery(t, sourceSession, fmt.Sprintf(qStr, tc.schema.tableName)) 373 | } 374 | } 375 | 376 | t.Log("running replicators") 377 | 378 | adv := scyllacdc.AdvancedReaderConfig{ 379 | ChangeAgeLimit: time.Minute, 380 | PostNonEmptyQueryDelay: 3 * time.Second, 381 | PostEmptyQueryDelay: 3 * time.Second, 382 | PostFailedQueryDelay: 3 * time.Second, 383 | QueryTimeWindowSize: 5 * time.Minute, 384 | ConfidenceWindowSize: time.Millisecond, 385 | } 386 | 387 | schemaNames := make([]string, 0) 388 | for tbl := range schemas { 389 | schemaNames = append(schemaNames, fmt.Sprintf("%s.%s", keyspaceName, tbl)) 390 | } 391 | 392 | logger := log.New(os.Stderr, "", log.Ldate|log.Lmicroseconds|log.Lshortfile) 393 | replicator, err := newReplicator( 394 | context.Background(), 395 | sourceAddress, 396 | destinationAddress, 397 | schemaNames, 398 | &adv, 399 | gocql.Quorum, 400 | gocql.Quorum, 401 | "", 402 | logger, 403 | ) 404 | if err != nil { 405 | t.Fatal(err) 406 | } 407 | 408 | ctx := context.Background() 409 | 410 | errC := make(chan error) 411 | go func() { errC <- replicator.Run(ctx) }() 412 | 413 | time.Sleep(time.Second) 414 | 415 | replicator.StopAt(time.Now().Add(time.Second)) 416 | if err := <-errC; err != nil { 417 | t.Fatal(err) 418 | } 419 | 420 | t.Log("validating results") 421 | 422 | // Compare 423 | sourceSet := fetchFullSet(t, sourceSession, schemas) 424 | destinationSet := fetchFullSet(t, destinationSession, schemas) 425 | 426 | failedCount := 0 427 | 428 | for _, tc := range testCases { 429 | sourceData := sourceSet[tc.pk] 430 | destinationData := destinationSet[tc.pk] 431 | 432 | if len(sourceData) != len(destinationData) { 433 | t.Logf( 434 | "%s: source len %d, destination len %d\n", 435 | tc.pk, 436 | len(sourceData), 437 | len(destinationData), 438 | ) 439 | t.Log(" source:") 440 | for _, row := range sourceData { 441 | t.Logf(" %v", row) 442 | } 443 | t.Log(" dest:") 444 | for _, row := range destinationData { 445 | t.Logf(" %v", row) 446 | } 447 | t.Fail() 448 | failedCount++ 449 | continue 450 | } 451 | 452 | failed := false 453 | for i := 0; i < len(sourceData); i++ { 454 | if !reflect.DeepEqual(sourceData[i], destinationData[i]) { 455 | t.Logf("%s: mismatch", tc.pk) 456 | t.Logf(" source: %v", sourceData[i]) 457 | t.Logf(" dest: %v", destinationData[i]) 458 | failed = true 459 | } 460 | } 461 | 462 | if failed { 463 | t.Fail() 464 | failedCount++ 465 | } else { 466 | t.Logf("%s: OK", tc.pk) 467 | } 468 | } 469 | 470 | if failedCount > 0 { 471 | t.Logf("failed %d/%d test cases", failedCount, len(testCases)) 472 | } 473 | } 474 | 475 | func createSessionAndSetupSchema(t *testing.T, addr, keyspaceName string, withCdc bool, schemas map[string]string) *gocql.Session { 476 | t.Helper() 477 | 478 | testutils.CreateKeyspace(t, addr, keyspaceName) 479 | 480 | cfg := gocql.NewCluster(addr) 481 | cfg.Keyspace = keyspaceName 482 | session, err := cfg.CreateSession() 483 | if err != nil { 484 | t.Fatal(err) 485 | } 486 | 487 | for _, udt := range udts { 488 | execQuery(t, session, udt) 489 | } 490 | 491 | for _, tbl := range schemas { 492 | tblQuery := tbl 493 | if withCdc { 494 | tblQuery += " WITH cdc = {'enabled': true, 'preimage': true, 'postimage': true}" 495 | } 496 | execQuery(t, session, tblQuery) 497 | } 498 | 499 | err = session.AwaitSchemaAgreement(context.Background()) 500 | if err != nil { 501 | t.Fatal(err) 502 | } 503 | 504 | return session 505 | } 506 | 507 | func execQuery(t *testing.T, session *gocql.Session, query string) { 508 | t.Helper() 509 | 510 | t.Logf("executing query %s", query) 511 | err := session.Query(query).Exec() 512 | if err != nil { 513 | t.Fatal(err) 514 | } 515 | } 516 | 517 | func fetchFullSet(t *testing.T, session *gocql.Session, schemas map[string]string) map[string][]map[string]interface{} { 518 | t.Helper() 519 | 520 | groups := make(map[string][]map[string]interface{}) 521 | 522 | for tbl := range schemas { 523 | data, err := session.Query("SELECT * FROM " + tbl).Iter().SliceMap() 524 | if err != nil { 525 | t.Fatal(err) 526 | } 527 | 528 | for _, row := range data { 529 | pk := row["pk"].(string) 530 | groups[pk] = append(groups[pk], row) 531 | } 532 | } 533 | 534 | return groups 535 | } 536 | -------------------------------------------------------------------------------- /examples/replicator/utils.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "strings" 4 | 5 | // Re-implementation of the type parsing logic from the driver. 6 | // Unlike the driver, this implementation differentiates frozen types 7 | // from non-frozen ones. 8 | 9 | type Type int 10 | 11 | const ( 12 | TypeCustom Type = 0x0000 13 | TypeAscii Type = 0x0001 14 | TypeBigInt Type = 0x0002 15 | TypeBlob Type = 0x0003 16 | TypeBoolean Type = 0x0004 17 | TypeCounter Type = 0x0005 18 | TypeDecimal Type = 0x0006 19 | TypeDouble Type = 0x0007 20 | TypeFloat Type = 0x0008 21 | TypeInt Type = 0x0009 22 | TypeText Type = 0x000A 23 | TypeTimestamp Type = 0x000B 24 | TypeUUID Type = 0x000C 25 | TypeVarchar Type = 0x000D 26 | TypeVarint Type = 0x000E 27 | TypeTimeUUID Type = 0x000F 28 | TypeInet Type = 0x0010 29 | TypeDate Type = 0x0011 30 | TypeTime Type = 0x0012 31 | TypeSmallInt Type = 0x0013 32 | TypeTinyInt Type = 0x0014 33 | TypeDuration Type = 0x0015 34 | TypeList Type = 0x0020 35 | TypeMap Type = 0x0021 36 | TypeSet Type = 0x0022 37 | TypeUDT Type = 0x0030 38 | TypeTuple Type = 0x0031 39 | ) 40 | 41 | func (t Type) IsCollection() bool { 42 | switch t { 43 | case TypeList, TypeMap, TypeSet, TypeUDT: 44 | return true 45 | default: 46 | return false 47 | } 48 | } 49 | 50 | type TypeInfo interface { 51 | Type() Type 52 | IsFrozen() bool 53 | Unfrozen() TypeInfo 54 | } 55 | 56 | type FrozenType struct { 57 | Inner TypeInfo 58 | } 59 | 60 | func (ft *FrozenType) Type() Type { 61 | return ft.Inner.Type() 62 | } 63 | 64 | func (ft *FrozenType) IsFrozen() bool { 65 | return true 66 | } 67 | 68 | func (ft *FrozenType) Unfrozen() TypeInfo { 69 | return ft.Inner 70 | } 71 | 72 | type MapType struct { 73 | Key TypeInfo 74 | Value TypeInfo 75 | } 76 | 77 | func (mt *MapType) Type() Type { 78 | return TypeMap 79 | } 80 | 81 | func (mt *MapType) IsFrozen() bool { 82 | return false 83 | } 84 | 85 | func (mt *MapType) Unfrozen() TypeInfo { 86 | return mt 87 | } 88 | 89 | type ListType struct { 90 | Element TypeInfo 91 | } 92 | 93 | func (lt *ListType) Type() Type { 94 | return TypeList 95 | } 96 | 97 | func (lt *ListType) IsFrozen() bool { 98 | return false 99 | } 100 | 101 | func (lt *ListType) Unfrozen() TypeInfo { 102 | return lt 103 | } 104 | 105 | type SetType struct { 106 | Element TypeInfo 107 | } 108 | 109 | func (st *SetType) Type() Type { 110 | return TypeSet 111 | } 112 | 113 | func (st *SetType) IsFrozen() bool { 114 | return false 115 | } 116 | 117 | func (st *SetType) Unfrozen() TypeInfo { 118 | return st 119 | } 120 | 121 | type TupleType struct { 122 | Elements []TypeInfo 123 | } 124 | 125 | func (tt *TupleType) Type() Type { 126 | return TypeTuple 127 | } 128 | 129 | func (tt *TupleType) IsFrozen() bool { 130 | return false 131 | } 132 | 133 | func (tt *TupleType) Unfrozen() TypeInfo { 134 | return tt 135 | } 136 | 137 | type NativeType struct { 138 | RealType Type 139 | } 140 | 141 | func (nt *NativeType) Type() Type { 142 | return nt.RealType 143 | } 144 | 145 | func (nt *NativeType) IsFrozen() bool { 146 | return false 147 | } 148 | 149 | func (nt *NativeType) Unfrozen() TypeInfo { 150 | return nt 151 | } 152 | 153 | type UDTType struct { 154 | Name string 155 | } 156 | 157 | func (ut *UDTType) Type() Type { 158 | return TypeUDT 159 | } 160 | 161 | func (ut *UDTType) IsFrozen() bool { 162 | return false 163 | } 164 | 165 | func (ut *UDTType) Unfrozen() TypeInfo { 166 | return ut 167 | } 168 | 169 | func parseType(str string) TypeInfo { 170 | if strings.HasPrefix(str, "frozen<") { 171 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "frozen<"), ">") 172 | return &FrozenType{parseType(innerStr)} 173 | } 174 | if strings.HasPrefix(str, "list<") { 175 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "list<"), ">") 176 | return &ListType{parseType(innerStr)} 177 | } 178 | if strings.HasPrefix(str, "set<") { 179 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "set<"), ">") 180 | return &SetType{parseType(innerStr)} 181 | } 182 | if strings.HasPrefix(str, "map<") { 183 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "map<"), ">") 184 | list := parseTypeList(innerStr) 185 | return &MapType{Key: list[0], Value: list[1]} 186 | } 187 | if strings.HasPrefix(str, "tuple<") { 188 | innerStr := strings.TrimSuffix(strings.TrimPrefix(str, "tuple<"), ">") 189 | list := parseTypeList(innerStr) 190 | return &TupleType{Elements: list} 191 | } 192 | typ := parseNativeType(str) 193 | if typ == TypeUDT { 194 | return &UDTType{Name: str} 195 | } 196 | return &NativeType{RealType: typ} 197 | } 198 | 199 | func parseTypeList(str string) []TypeInfo { 200 | var ret []TypeInfo 201 | var level int 202 | var builder strings.Builder 203 | for _, r := range str { 204 | if r == ',' && level == 0 { 205 | s := strings.TrimSpace(builder.String()) 206 | ret = append(ret, parseType(s)) 207 | builder.Reset() 208 | continue 209 | } 210 | 211 | if r == '<' { 212 | level++ 213 | } else if r == '>' { 214 | level-- 215 | } 216 | builder.WriteRune(r) 217 | } 218 | if builder.Len() != 0 { 219 | s := strings.TrimSpace(builder.String()) 220 | ret = append(ret, parseType(s)) 221 | } 222 | return ret 223 | } 224 | 225 | func parseNativeType(str string) Type { 226 | switch str { 227 | case "ascii": 228 | return TypeAscii 229 | case "bigint": 230 | return TypeBigInt 231 | case "blob": 232 | return TypeBlob 233 | case "boolean": 234 | return TypeBoolean 235 | case "counter": 236 | return TypeCounter 237 | case "date": 238 | return TypeDate 239 | case "decimal": 240 | return TypeDecimal 241 | case "double": 242 | return TypeDouble 243 | case "duration": 244 | return TypeDuration 245 | case "float": 246 | return TypeFloat 247 | case "int": 248 | return TypeInt 249 | case "smallint": 250 | return TypeSmallInt 251 | case "tinyint": 252 | return TypeTinyInt 253 | case "time": 254 | return TypeTime 255 | case "timestamp": 256 | return TypeTimestamp 257 | case "uuid": 258 | return TypeUUID 259 | case "varchar": 260 | return TypeVarchar 261 | case "text": 262 | return TypeText 263 | case "varint": 264 | return TypeVarint 265 | case "timeuuid": 266 | return TypeTimeUUID 267 | case "inet": 268 | return TypeInet 269 | default: 270 | // Assume it's a UDT 271 | return TypeUDT 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /examples/simple-printer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "os" 8 | 9 | "github.com/gocql/gocql" 10 | 11 | scyllacdc "github.com/scylladb/scylla-cdc-go" 12 | ) 13 | 14 | // Make sure you create the following table before you run this example: 15 | // CREATE TABLE ks.tbl (pk int, ck int, v int, PRIMARY KEY (pk, ck)) WITH cdc = {'enabled': 'true'}; 16 | 17 | func main() { 18 | if err := run(context.Background(), []string{"127.0.0.1"}, "local-dc", "ks.tbl"); err != nil { 19 | log.Fatal(err) 20 | } 21 | } 22 | 23 | func run(ctx context.Context, hosts []string, localDC, tableName string) error { 24 | cluster := gocql.NewCluster(hosts...) 25 | cluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.DCAwareRoundRobinPolicy(localDC)) 26 | session, err := cluster.CreateSession() 27 | if err != nil { 28 | log.Fatal(err) 29 | } 30 | defer session.Close() 31 | 32 | cfg := &scyllacdc.ReaderConfig{ 33 | Session: session, 34 | TableNames: []string{tableName}, 35 | ChangeConsumerFactory: changeConsumerFactory, 36 | Logger: log.New(os.Stderr, "", log.Ldate|log.Lshortfile), 37 | } 38 | 39 | reader, err := scyllacdc.NewReader(ctx, cfg) 40 | if err != nil { 41 | return err 42 | } 43 | 44 | return reader.Run(ctx) 45 | } 46 | 47 | func consumeChange(ctx context.Context, tableName string, c scyllacdc.Change) error { 48 | for _, changeRow := range c.Delta { 49 | pkRaw, _ := changeRow.GetValue("pk") 50 | ckRaw, _ := changeRow.GetValue("ck") 51 | v := changeRow.GetAtomicChange("v") 52 | 53 | pk := pkRaw.(*int) 54 | ck := ckRaw.(*int) 55 | 56 | fmt.Printf("Operation: %s, pk: %s, ck: %s\n", changeRow.GetOperation(), 57 | nullableIntToStr(pk), nullableIntToStr(ck)) 58 | 59 | if v.IsDeleted { 60 | fmt.Printf(" Column v was set to null/deleted\n") 61 | } else { 62 | vInt := v.Value.(*int) 63 | if vInt != nil { 64 | fmt.Printf(" Column v was set to %d\n", *vInt) 65 | } else { 66 | fmt.Print(" Column v was not changed\n") 67 | } 68 | } 69 | } 70 | 71 | return nil 72 | } 73 | 74 | func nullableIntToStr(i *int) string { 75 | if i == nil { 76 | return "null" 77 | } 78 | return fmt.Sprintf("%d", *i) 79 | } 80 | 81 | var changeConsumerFactory = scyllacdc.MakeChangeConsumerFactoryFromFunc(consumeChange) 82 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/scylladb/scylla-cdc-go 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/gocql/gocql v0.0.0-20201215165327-e49edf966d90 7 | golang.org/x/sync v0.8.0 8 | ) 9 | 10 | replace github.com/gocql/gocql => github.com/scylladb/gocql v1.14.4 11 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY= 2 | github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= 3 | github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= 4 | github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= 5 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 7 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 8 | github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= 9 | github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 10 | github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= 11 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 12 | github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= 13 | github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= 14 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 15 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 16 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 17 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 18 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 19 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 20 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 21 | github.com/scylladb/gocql v1.14.4 h1:MhevwCfyAraQ6RvZYFO3pF4Lt0YhvQlfg8Eo2HEqVQA= 22 | github.com/scylladb/gocql v1.14.4/go.mod h1:ZLEJ0EVE5JhmtxIW2stgHq/v1P4fWap0qyyXSKyV8K0= 23 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 24 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 25 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 26 | golang.org/x/net v0.0.0-20220526153639-5463443f8c37 h1:lUkvobShwKsOesNfWWlCS5q7fnbG1MEliIzwu886fn8= 27 | golang.org/x/net v0.0.0-20220526153639-5463443f8c37/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 28 | golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= 29 | golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 30 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 31 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 32 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 33 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 34 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 35 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 36 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 37 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 38 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 39 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 40 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 41 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 42 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 43 | sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= 44 | sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= 45 | -------------------------------------------------------------------------------- /logger.go: -------------------------------------------------------------------------------- 1 | package scyllacdc 2 | 3 | type Logger interface { 4 | Printf(format string, v ...interface{}) 5 | } 6 | 7 | type noLogger struct{} 8 | 9 | func (noLogger) Printf(_ string, _ ...interface{}) {} 10 | -------------------------------------------------------------------------------- /progress.go: -------------------------------------------------------------------------------- 1 | package scyllacdc 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/gocql/gocql" 9 | "golang.org/x/sync/semaphore" 10 | ) 11 | 12 | // ProgressManager allows the library to load and save progress for each 13 | // stream and table separately. 14 | type ProgressManager interface { 15 | // GetCurrentGeneration returns the time of the generation that was 16 | // last saved by StartGeneration. The library will call this function 17 | // at the beginning in order to determine from which generation it should 18 | // start reading first. 19 | // 20 | // If there is no information available about the time of the generation 21 | // from which reading should start, GetCurrentGeneration can return 22 | // a zero time value. In that case, reading will start from the point 23 | // determined by AdvancedReaderConfig.ChangeAgeLimit. 24 | // 25 | // If this function returns an error, the library will stop with an error. 26 | GetCurrentGeneration(ctx context.Context) (time.Time, error) 27 | 28 | // StartGeneration is called after all changes have been read from the 29 | // previous generation and the library is about to start processing 30 | // the next one. The ProgressManager should save this information so that 31 | // GetCurrentGeneration will return it after the library is restarted. 32 | // 33 | // If this function returns an error, the library will stop with an error. 34 | StartGeneration(ctx context.Context, gen time.Time) error 35 | 36 | // GetProgress retrieves information about the progress of given stream, 37 | // in a given table. If there was no progress saved for this stream 38 | // during this generation, GetProgress can return a zero time value 39 | // and the library will start processing changes from the stream 40 | // starting from the beginning of the generation. 41 | // 42 | // This method needs to be thread-safe, as the library is allowed to 43 | // call it concurrently for different combinations of `table` and `streamID`. 44 | // The library won't issue concurrent calls to this method with the same 45 | // `table` and `streamID` parameters. 46 | // 47 | // If this function returns an error, the library will stop with an error. 48 | GetProgress(ctx context.Context, gen time.Time, table string, streamID StreamID) (Progress, error) 49 | 50 | // SaveProgress stores information about the last cdc log record which was 51 | // processed successfully. If the reader is restarted, it should resume 52 | // work for this stream starting from the row _after_ the last saved 53 | // timestamp. 54 | // 55 | // This method is only called by ChangeConsumers, indirectly through 56 | // the ProgressReporter struct. Within a generation, ChangeConsumers 57 | // are run concurrently, therefore SaveProgress should be safe to call 58 | // concurrently. 59 | // 60 | // Contrary to other methods, an error returned does not immediately 61 | // result in the library stopping with an error. The error is propagated 62 | // to the ChangeConsumer, and it can decide what to do with the error next. 63 | SaveProgress(ctx context.Context, gen time.Time, table string, streamID StreamID, progress Progress) error 64 | } 65 | 66 | // ProgressManagerWithStartTime is an extension to the ProgressManager interface. 67 | type ProgressManagerWithStartTime interface { 68 | ProgressManager 69 | 70 | // GetApplicationReadStartTime returns the timestamp from which 71 | // the application started reading data. The library uses this timestamp 72 | // as a lower bound to determine where it should start reading. For example, 73 | // if there is no generation saved or there is no progress information 74 | // saved for a stream, reading will be restarted from the given timestamp 75 | // (or higher if the generation timestamp is higher). 76 | // 77 | // If this function returns a zero timeuuid, the library will start reading 78 | // from `time.Now() - AdvancedReaderConfig.ChangeAgeLimit`. 79 | // If this function returns an error, the library will stop with an error. 80 | GetApplicationReadStartTime(ctx context.Context) (time.Time, error) 81 | 82 | // SaveApplicationReadStartTime stores information about the timestamp 83 | // from which the application originally started reading data. 84 | // It is called by the library if there was no start timestamp saved. 85 | // 86 | // If this function returns an error, the library will stop with an error. 87 | SaveApplicationReadStartTime(ctx context.Context, startTime time.Time) error 88 | } 89 | 90 | // ProgressReporter is a helper object for the ChangeConsumer. It allows 91 | // the consumer to save its progress. 92 | type ProgressReporter struct { 93 | progressManager ProgressManager 94 | gen time.Time 95 | tableName string 96 | streamID StreamID 97 | } 98 | 99 | // MarkProgress saves progress for the consumer associated with the ProgressReporter. 100 | // 101 | // The associated ChangeConsumer is allowed to call it anytime between its 102 | // creation by ChangeConsumerFactory and the moment it is stopped (the call to 103 | // (ChangeConsumer).End() finishes). 104 | func (pr *ProgressReporter) MarkProgress(ctx context.Context, progress Progress) error { 105 | return pr.progressManager.SaveProgress(ctx, pr.gen, pr.tableName, pr.streamID, progress) 106 | } 107 | 108 | // Progress represents the point up to which the library has processed changes 109 | // in a given stream. 110 | type Progress struct { 111 | // LastProcessedRecordTime represents the value of the cdc$time column 112 | // of the last processed record in the stream. 113 | LastProcessedRecordTime gocql.UUID 114 | } 115 | 116 | // noProgressManager does not actually save any progress, and always reports 117 | // zero progress. This implementation can be used when saving progress 118 | // is not necessary for the application. 119 | type noProgressManager struct{} 120 | 121 | // GetCurrentGeneration is needed to implement the ProgressManager interface. 122 | func (noProgressManager) GetCurrentGeneration(ctx context.Context) (time.Time, error) { 123 | return time.Time{}, nil 124 | } 125 | 126 | // StartGeneration is needed to implement the ProgressManager interface. 127 | func (noProgressManager) StartGeneration(ctx context.Context, gen time.Time) error { 128 | return nil 129 | } 130 | 131 | // GetProgress is needed to implement the ProgressManager interface. 132 | func (noProgressManager) GetProgress(ctx context.Context, gen time.Time, table string, streamID StreamID) (Progress, error) { 133 | return Progress{}, nil 134 | } 135 | 136 | // SaveProgress is needed to implement the ProgressManager interface. 137 | func (noProgressManager) SaveProgress(ctx context.Context, gen time.Time, table string, streamID StreamID, progress Progress) error { 138 | return nil 139 | } 140 | 141 | // TableBackedProgressManager is a ProgressManager which saves progress in a Scylla table. 142 | // 143 | // The schema is as follows: 144 | // 145 | // CREATE TABLE IF NOT EXISTS ( 146 | // generation timestamp, 147 | // application_name text, 148 | // table_name text, 149 | // stream_id blob, 150 | // last_timestamp timeuuid, 151 | // current_generation timestamp, 152 | // PRIMARY KEY ((generation, application_name, table_name, stream_id)) 153 | // ) 154 | // 155 | // Progress for each stream is stored in a separate row, indexed by generation, 156 | // application_name, table_name and stream_id. 157 | // 158 | // For storing information about current generation, special rows with stream 159 | // set to empty bytes is used. 160 | type TableBackedProgressManager struct { 161 | session *gocql.Session 162 | progressTableName string 163 | applicationName string 164 | 165 | // TTL to use when writing progress for a stream (a week by default). 166 | // TODO: maybe not? maybe we should clean up this data manually? 167 | // Progress data may be large if generations are very large 168 | ttl int32 169 | 170 | concurrentQueryLimiter *semaphore.Weighted 171 | } 172 | 173 | // NewTableBackedProgressManager creates a new TableBackedProgressManager. 174 | func NewTableBackedProgressManager(session *gocql.Session, progressTableName, applicationName string) (*TableBackedProgressManager, error) { 175 | tbpm := &TableBackedProgressManager{ 176 | session: session, 177 | progressTableName: progressTableName, 178 | applicationName: applicationName, 179 | 180 | ttl: 7 * 24 * 60 * 60, // 1 week 181 | 182 | concurrentQueryLimiter: semaphore.NewWeighted(100), // TODO: Make units configurable 183 | } 184 | 185 | if err := tbpm.ensureTableExists(); err != nil { 186 | return nil, err 187 | } 188 | return tbpm, nil 189 | } 190 | 191 | // SetTTL sets the TTL used to expire progress. By default, it's 7 days. 192 | func (tbpm *TableBackedProgressManager) SetTTL(ttl int32) { 193 | tbpm.ttl = ttl 194 | } 195 | 196 | // SetMaxConcurrency sets the maximum allowed concurrency for write operations. 197 | // By default, it's 100. 198 | // This function must not be called after Reader for this manager is started. 199 | func (tbpm *TableBackedProgressManager) SetMaxConcurrency(maxConcurrentOps int64) { 200 | tbpm.concurrentQueryLimiter = semaphore.NewWeighted(maxConcurrentOps) 201 | } 202 | 203 | func (tbpm *TableBackedProgressManager) ensureTableExists() error { 204 | return tbpm.session.Query( 205 | fmt.Sprintf( 206 | "CREATE TABLE IF NOT EXISTS %s "+ 207 | "(generation timestamp, application_name text, table_name text, stream_id blob, last_timestamp timeuuid, current_generation timestamp, "+ 208 | "PRIMARY KEY ((generation, application_name, table_name, stream_id)))", 209 | tbpm.progressTableName, 210 | ), 211 | ).Exec() 212 | } 213 | 214 | // GetCurrentGeneration is needed to implement the ProgressManager interface. 215 | func (tbpm *TableBackedProgressManager) GetCurrentGeneration(ctx context.Context) (time.Time, error) { 216 | var gen time.Time 217 | err := tbpm.session.Query( 218 | fmt.Sprintf("SELECT current_generation FROM %s WHERE generation = ? AND application_name = ? AND table_name = ? AND stream_id = ?", tbpm.progressTableName), 219 | time.Time{}, tbpm.applicationName, "", []byte{}, 220 | ).Scan(&gen) 221 | 222 | if err != nil && err != gocql.ErrNotFound { 223 | return time.Time{}, err 224 | } 225 | return gen, nil 226 | } 227 | 228 | // StartGeneration is needed to implement the ProgressManager interface. 229 | func (tbpm *TableBackedProgressManager) StartGeneration(ctx context.Context, gen time.Time) error { 230 | // Update the progress in the special partition 231 | return tbpm.session.Query( 232 | fmt.Sprintf( 233 | "INSERT INTO %s (generation, application_name, table_name, stream_id, current_generation) "+ 234 | "VALUES (?, ?, ?, ?, ?)", 235 | tbpm.progressTableName, 236 | ), 237 | time.Time{}, tbpm.applicationName, "", []byte{}, gen, 238 | ).Exec() 239 | } 240 | 241 | // GetProgress is needed to implement the ProgressManager interface. 242 | func (tbpm *TableBackedProgressManager) GetProgress(ctx context.Context, gen time.Time, tableName string, streamID StreamID) (Progress, error) { 243 | err := tbpm.concurrentQueryLimiter.Acquire(ctx, 1) 244 | if err != nil { 245 | return Progress{}, err 246 | } 247 | defer tbpm.concurrentQueryLimiter.Release(1) 248 | 249 | var timestamp gocql.UUID 250 | err = tbpm.session.Query( 251 | fmt.Sprintf("SELECT last_timestamp FROM %s WHERE generation = ? AND application_name = ? AND table_name = ? AND stream_id = ?", tbpm.progressTableName), 252 | gen, tbpm.applicationName, tableName, streamID, 253 | ).Scan(×tamp) 254 | 255 | if err != nil && err != gocql.ErrNotFound { 256 | return Progress{}, err 257 | } 258 | return Progress{timestamp}, nil 259 | } 260 | 261 | // SaveProgress is needed to implement the ProgressManager interface. 262 | func (tbpm *TableBackedProgressManager) SaveProgress(ctx context.Context, gen time.Time, tableName string, streamID StreamID, progress Progress) error { 263 | err := tbpm.concurrentQueryLimiter.Acquire(ctx, 1) 264 | if err != nil { 265 | return err 266 | } 267 | defer tbpm.concurrentQueryLimiter.Release(1) 268 | 269 | return tbpm.session.Query( 270 | fmt.Sprintf("INSERT INTO %s (generation, application_name, table_name, stream_id, last_timestamp) VALUES (?, ?, ?, ?, ?) USING TTL ?", tbpm.progressTableName), 271 | gen, tbpm.applicationName, tableName, streamID, progress.LastProcessedRecordTime, tbpm.ttl, 272 | ).Exec() 273 | } 274 | 275 | // SaveApplicationReadStartTime is needed to implement the ProgressManagerWithStartTime interface. 276 | func (tbpm *TableBackedProgressManager) SaveApplicationReadStartTime(ctx context.Context, startTime time.Time) error { 277 | // Store information about the timestamp in the `last_timestamp` column, 278 | // in the special partition with "zero generation". 279 | return tbpm.session.Query( 280 | fmt.Sprintf( 281 | "INSERT INTO %s (generation, application_name, table_name, stream_id, last_timestamp) "+ 282 | "VALUES (?, ?, ?, ?, ?)", 283 | tbpm.progressTableName, 284 | ), 285 | time.Time{}, tbpm.applicationName, "", []byte{}, gocql.MinTimeUUID(startTime), 286 | ).Exec() 287 | } 288 | 289 | // GetApplicationReadStartTime is needed to implement the ProgressManagerWithStartTime interface. 290 | func (tbpm *TableBackedProgressManager) GetApplicationReadStartTime(ctx context.Context) (time.Time, error) { 291 | // Retrieve the information from the special column 292 | var timestamp gocql.UUID 293 | err := tbpm.session.Query( 294 | fmt.Sprintf("SELECT last_timestamp FROM %s WHERE generation = ? AND application_name = ? AND table_name = ? AND stream_id = ?", tbpm.progressTableName), 295 | time.Time{}, tbpm.applicationName, "", []byte{}, 296 | ).Scan(×tamp) 297 | if err != nil && err != gocql.ErrNotFound { 298 | return time.Time{}, err 299 | } 300 | return timestamp.Time(), nil 301 | } 302 | 303 | var ( 304 | _ ProgressManager = (*TableBackedProgressManager)(nil) 305 | _ ProgressManagerWithStartTime = (*TableBackedProgressManager)(nil) 306 | ) 307 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | package scyllacdc 2 | 3 | import ( 4 | "context" 5 | "encoding/binary" 6 | "errors" 7 | "strings" 8 | "sync/atomic" 9 | "time" 10 | 11 | "github.com/gocql/gocql" 12 | "golang.org/x/sync/errgroup" 13 | ) 14 | 15 | // ReaderConfig defines parameters used for creation of the CDC Reader object. 16 | type ReaderConfig struct { 17 | // An active gocql session to the cluster. 18 | Session *gocql.Session 19 | 20 | // Names of the tables for which to read changes. This should be the name 21 | // of the base table, not the cdc log table. 22 | // Can be prefixed with keyspace name. 23 | TableNames []string 24 | 25 | // Consistency to use when querying CDC log. 26 | // If not specified, LOCAL_QUORUM consistency will be used. 27 | Consistency gocql.Consistency 28 | 29 | // Creates ChangeProcessors, which process information fetched from the CDC log. 30 | // A callback which processes information fetched from the CDC log. 31 | ChangeConsumerFactory ChangeConsumerFactory 32 | 33 | // An object which allows the reader to read and write information about 34 | // current progress. 35 | ProgressManager ProgressManager 36 | 37 | // A logger. If set, it will receive log messages useful for debugging of the library. 38 | Logger Logger 39 | 40 | // Advanced parameters. 41 | Advanced AdvancedReaderConfig 42 | } 43 | 44 | func (rc *ReaderConfig) validate() error { 45 | if len(rc.TableNames) == 0 { 46 | return errors.New("no table names specified to read from") 47 | } 48 | if rc.ChangeConsumerFactory == nil { 49 | return errors.New("no change consumer factory specified") 50 | } 51 | 52 | return nil 53 | } 54 | 55 | func (rc *ReaderConfig) setDefaults() { 56 | if rc.Consistency == 0 { 57 | // Consistency 0 is ANY. It doesn't make sense 58 | // to use it for reading, so default to LOCAL_QUORUM instead 59 | rc.Consistency = gocql.LocalQuorum 60 | } 61 | if rc.ProgressManager == nil { 62 | rc.ProgressManager = noProgressManager{} 63 | } 64 | if rc.Logger == nil { 65 | rc.Logger = noLogger{} 66 | } 67 | rc.Advanced.setDefaults() 68 | } 69 | 70 | // AdvancedReaderConfig contains advanced parameters that control behavior 71 | // of the CDC Reader. It is not recommended to change them unless really 72 | // necessary. They have carefully selected default values that should work for 73 | // most cases. Changing these parameters need to be done carefully. 74 | type AdvancedReaderConfig struct { 75 | // ConfidenceWindowSize defines a minimal age a change must have in order 76 | // to be read. 77 | // 78 | // Due to the eventually consistent nature of Scylla, newer writes may 79 | // appear in CDC log earlier than some older writes. This can cause the 80 | // Reader to skip the older write, therefore the need for this parameter. 81 | // 82 | // If the parameter is left as 0, the library will automatically choose 83 | // a default confidence window size. 84 | ConfidenceWindowSize time.Duration 85 | 86 | // The library uses select statements to fetch changes from CDC Log tables. 87 | // Each select fetches changes from a single table and fetches only changes 88 | // from a limited set of CDC streams. If such select returns one or more 89 | // changes then next select to this table and set of CDC streams will be 90 | // issued after a delay. This parameter specifies the length of the delay. 91 | // 92 | // If the parameter is left as 0, the library will automatically adjust 93 | // the length of the delay. 94 | PostNonEmptyQueryDelay time.Duration 95 | 96 | // The library uses select statements to fetch changes from CDC Log tables. 97 | // Each select fetches changes from a single table and fetches only changes 98 | // from a limited set of CDC streams. If such select returns no changes then 99 | // next select to this table and set of CDC streams will be issued after 100 | // a delay. This parameter specifies the length of the delay. 101 | // 102 | // If the parameter is left as 0, the library will automatically adjust 103 | // the length of the delay. 104 | PostEmptyQueryDelay time.Duration 105 | 106 | // If the library tries to read from the CDC log and the read operation 107 | // fails, it will wait some time before attempting to read again. This 108 | // parameter specifies the length of the delay. 109 | // 110 | // If the parameter is left as 0, the library will automatically adjust 111 | // the length of the delay. 112 | PostFailedQueryDelay time.Duration 113 | 114 | // Changes are queried using select statements with restriction on the time 115 | // those changes appeared. The restriction is bounding the time from both 116 | // lower and upper bounds. This parameter defines the width of the time 117 | // window used for the restriction. 118 | // 119 | // If the parameter is left as 0, the library will automatically adjust 120 | // the size of the restriction window. 121 | QueryTimeWindowSize time.Duration 122 | 123 | // When the library starts for the first time it has to start consuming 124 | // changes from some point in time. This parameter defines how far in the 125 | // past it needs to look. If the value of the parameter is set to an hour, 126 | // then the library will only read historical changes that are no older than 127 | // an hour. 128 | // 129 | // Note of caution: data in CDC Log table is automatically deleted so 130 | // setting this parameter to something bigger than TTL used on CDC Log won’t 131 | // cause changes older than this TTL to appear. 132 | // 133 | // If the parameter is left as 0, the library will automatically adjust 134 | // the size of the restriction window. 135 | ChangeAgeLimit time.Duration 136 | } 137 | 138 | func (arc *AdvancedReaderConfig) setDefaults() { 139 | setIfZero := func(p *time.Duration, v time.Duration) { 140 | if *p == 0 { 141 | *p = v 142 | } 143 | } 144 | setIfZero(&arc.ConfidenceWindowSize, 30*time.Second) 145 | 146 | setIfZero(&arc.PostNonEmptyQueryDelay, 10*time.Second) 147 | setIfZero(&arc.PostEmptyQueryDelay, 30*time.Second) 148 | setIfZero(&arc.PostFailedQueryDelay, 1*time.Second) 149 | 150 | setIfZero(&arc.QueryTimeWindowSize, 30*time.Second) 151 | setIfZero(&arc.ChangeAgeLimit, 1*time.Minute) 152 | } 153 | 154 | // Copy makes a shallow copy of the ReaderConfig. 155 | func (rc *ReaderConfig) Copy() *ReaderConfig { 156 | newRC := &ReaderConfig{} 157 | *newRC = *rc 158 | return newRC 159 | } 160 | 161 | const ( 162 | cdcTableSuffix string = "_scylla_cdc_log" 163 | ) 164 | 165 | // Reader reads changes from CDC logs of the specified tables. 166 | type Reader struct { 167 | config *ReaderConfig 168 | genFetcher *generationFetcher 169 | readFrom time.Time 170 | stoppedCh chan struct{} 171 | stopTime atomic.Value 172 | } 173 | 174 | // NewReader creates a new CDC reader using the specified configuration. 175 | func NewReader(ctx context.Context, config *ReaderConfig) (*Reader, error) { 176 | config = config.Copy() 177 | 178 | config.setDefaults() 179 | if err := config.validate(); err != nil { 180 | return nil, err 181 | } 182 | 183 | readFrom, err := determineStartTimestamp(ctx, config) 184 | if err != nil { 185 | return nil, err 186 | } 187 | 188 | genFetcher, err := newGenerationFetcher( 189 | config.Session, 190 | readFrom, 191 | config.Logger, 192 | ) 193 | if err != nil { 194 | return nil, err 195 | } 196 | 197 | reader := &Reader{ 198 | config: config, 199 | genFetcher: genFetcher, 200 | readFrom: readFrom, 201 | stoppedCh: make(chan struct{}), 202 | } 203 | return reader, nil 204 | } 205 | 206 | func determineStartTimestamp(ctx context.Context, config *ReaderConfig) (time.Time, error) { 207 | mostRecentGeneration, err := config.ProgressManager.GetCurrentGeneration(ctx) 208 | if err != nil { 209 | return time.Time{}, err 210 | } 211 | if mostRecentGeneration.IsZero() { 212 | config.Logger.Printf("no information about the last generation was found") 213 | } else { 214 | config.Logger.Printf("last saved progress was at generation %v", mostRecentGeneration) 215 | } 216 | 217 | var applicationStartTime time.Time 218 | if withStartTime, ok := config.ProgressManager.(ProgressManagerWithStartTime); ok { 219 | applicationStartTime, err = withStartTime.GetApplicationReadStartTime(ctx) 220 | if err != nil { 221 | return time.Time{}, err 222 | } 223 | if applicationStartTime.IsZero() { 224 | config.Logger.Printf("no information about the application start time was found") 225 | } else { 226 | config.Logger.Printf("application started reading from time point %v", mostRecentGeneration) 227 | } 228 | } 229 | 230 | // Choose the maximum of those two 231 | readFrom := mostRecentGeneration 232 | if readFrom.Before(applicationStartTime) { 233 | readFrom = applicationStartTime 234 | } 235 | 236 | // If the timestamp is still zero, calculate the start time based on ChangeAgeLimit 237 | if readFrom.IsZero() { 238 | config.Logger.Printf("neither last generation nor application start time is available, will use ChangeAgeLimit") 239 | readFrom = time.Now().Add(-config.Advanced.ChangeAgeLimit) 240 | 241 | // Need to save this timestamp, if the ProgressManager supports that 242 | if withStartTime, ok := config.ProgressManager.(ProgressManagerWithStartTime); ok { 243 | if err := withStartTime.SaveApplicationReadStartTime(ctx, readFrom); err != nil { 244 | return time.Time{}, err 245 | } 246 | } 247 | } 248 | 249 | config.Logger.Printf("the application will start reading from %v or later (depending on per-stream saved progress)", readFrom) 250 | return readFrom, nil 251 | } 252 | 253 | // Run runs the CDC reader. This call is blocking and returns after an error occurs, or the reader 254 | // is stopped gracefully. 255 | func (r *Reader) Run(ctx context.Context) error { 256 | l := r.config.Logger 257 | 258 | runErrG, runCtx := errgroup.WithContext(ctx) 259 | 260 | runErrG.Go(func() error { 261 | select { 262 | case <-runCtx.Done(): 263 | return runCtx.Err() 264 | case <-r.stoppedCh: 265 | } 266 | r.genFetcher.Stop() 267 | return nil 268 | }) 269 | runErrG.Go(func() error { 270 | return r.genFetcher.Run(runCtx) 271 | }) 272 | runErrG.Go(func() error { 273 | gen, err := r.genFetcher.Get(runCtx) 274 | if gen == nil { 275 | return err 276 | } 277 | 278 | if r.readFrom.Before(gen.startTime) { 279 | r.readFrom = gen.startTime 280 | } 281 | 282 | for { 283 | l.Printf("starting reading generation %v from timestamp %v", gen.startTime, r.readFrom) 284 | 285 | if err := r.config.ProgressManager.StartGeneration(ctx, gen.startTime); err != nil { 286 | return err 287 | } 288 | 289 | // Start batch readers for this generation 290 | split := r.splitStreams(gen.streams) 291 | 292 | l.Printf("grouped %d streams into %d batches", len(gen.streams), len(split)) 293 | 294 | genErrG, genCtx := errgroup.WithContext(runCtx) 295 | 296 | readers := make([]*streamBatchReader, 0, len(split)*len(r.config.TableNames)) 297 | for _, fullTableName := range r.config.TableNames { 298 | // TODO: This is ugly? 299 | splitName := strings.SplitN(fullTableName, ".", 2) 300 | keyspaceName := splitName[0] 301 | tableName := splitName[1] 302 | 303 | // Fetch the current table's TTL 304 | startTime := r.readFrom 305 | ttl, err := fetchScyllaCDCExtensionTTL(r.config.Session, keyspaceName, tableName) 306 | if err == nil { 307 | if ttl != 0 { 308 | l.Printf("the TTL for %s.%s is %d seconds", keyspaceName, tableName, ttl) 309 | ttlBound := time.Now().Add(-time.Duration(ttl) * time.Second) 310 | if startTime.Before(ttlBound) { 311 | startTime = ttlBound 312 | } 313 | } else { 314 | l.Printf("the table %s.%s has not TTL set", keyspaceName, tableName) 315 | } 316 | } else { 317 | l.Printf("failed to fetch TTL for table %s.%s, assuming no TTL; error: %s", keyspaceName, tableName, err) 318 | } 319 | 320 | for _, group := range split { 321 | readers = append(readers, newStreamBatchReader( 322 | r.config, 323 | gen.startTime, 324 | group, 325 | keyspaceName, 326 | tableName, 327 | gocql.MinTimeUUID(startTime), 328 | )) 329 | } 330 | } 331 | 332 | sleepAmount := r.config.Advanced.PostNonEmptyQueryDelay / time.Duration(len(readers)) 333 | for i := range readers { 334 | reader := readers[i] 335 | select { 336 | case <-ctx.Done(): 337 | return ctx.Err() 338 | case <-time.After(sleepAmount): 339 | } 340 | genErrG.Go(func() error { 341 | return reader.run(genCtx) 342 | }) 343 | } 344 | 345 | var nextGen *generation 346 | genErrG.Go(func() error { 347 | var err error 348 | nextGen, err = r.genFetcher.Get(genCtx) 349 | if err != nil { 350 | return err 351 | } 352 | for _, reader := range readers { 353 | if nextGen == nil { 354 | // The reader was stopped 355 | stopAt, _ := r.stopTime.Load().(time.Time) 356 | if stopAt.IsZero() { 357 | reader.stopNow() 358 | } else { 359 | reader.close(gocql.MaxTimeUUID(stopAt)) 360 | r.readFrom = stopAt 361 | } 362 | } else { 363 | reader.close(gocql.MinTimeUUID(nextGen.startTime)) 364 | r.readFrom = nextGen.startTime 365 | } 366 | } 367 | return nil 368 | }) 369 | 370 | if err := genErrG.Wait(); err != nil { 371 | return err 372 | } 373 | l.Printf("stopped reading from generation %v", gen.startTime) 374 | if nextGen == nil { 375 | break 376 | } 377 | gen = nextGen 378 | } 379 | 380 | return nil 381 | }) 382 | 383 | return runErrG.Wait() 384 | } 385 | 386 | // Stop tells the reader to stop as soon as possible. There is no guarantee 387 | // related to how much data will be processed in each stream when the reader 388 | // stops. If you want to e.g. make sure that all cdc log data with timestamps 389 | // up to the current moment was processed, use (*Reader).StopAt(time.Now()). 390 | // This function does not wait until the reader stops. 391 | func (r *Reader) Stop() { 392 | close(r.stoppedCh) 393 | } 394 | 395 | // StopAt tells the reader to stop reading changes after reaching given timestamp. 396 | // Does not guarantee that the reader won't read any changes after the timestamp, 397 | // but the reader will stop after all tables and streams are advanced to or past 398 | // the timestamp. 399 | // This function does not wait until the reader stops. 400 | func (r *Reader) StopAt(at time.Time) { 401 | r.stopTime.Store(at) 402 | close(r.stoppedCh) 403 | } 404 | 405 | func (r *Reader) splitStreams(streams []StreamID) [][]StreamID { 406 | vnodesIdxToStreams := make(map[int64][]StreamID, 0) 407 | for _, stream := range streams { 408 | idx := getVnodeIndexForStream(stream) 409 | vnodesIdxToStreams[idx] = append(vnodesIdxToStreams[idx], stream) 410 | } 411 | 412 | groups := make([][]StreamID, 0) 413 | 414 | // Idx -1 means that we don't know the vnode for given stream, 415 | // therefore we will put those streams into a separate group 416 | for _, stream := range vnodesIdxToStreams[-1] { 417 | groups = append(groups, []StreamID{stream}) 418 | } 419 | delete(vnodesIdxToStreams, -1) 420 | 421 | for _, group := range vnodesIdxToStreams { 422 | groups = append(groups, group) 423 | } 424 | return groups 425 | } 426 | 427 | // Computes vnode index from given stream ID. 428 | // Returns -1 if the stream ID format is unrecognized. 429 | func getVnodeIndexForStream(streamID StreamID) int64 { 430 | if len(streamID) != 16 { 431 | // Don't know how to handle other sizes 432 | return -1 433 | } 434 | 435 | lowerQword := binary.BigEndian.Uint64(streamID[8:16]) 436 | version := lowerQword & (1<<4 - 1) 437 | if version != 1 { 438 | // Unrecognized version 439 | return -1 440 | } 441 | 442 | vnodeIdx := (lowerQword >> 4) & (1<<22 - 1) 443 | return int64(vnodeIdx) 444 | } 445 | -------------------------------------------------------------------------------- /stream_batch.go: -------------------------------------------------------------------------------- 1 | package scyllacdc 2 | 3 | import ( 4 | "context" 5 | "sync/atomic" 6 | "time" 7 | 8 | "github.com/gocql/gocql" 9 | ) 10 | 11 | type streamBatchReader struct { 12 | config *ReaderConfig 13 | generationTime time.Time 14 | streams []StreamID 15 | keyspaceName string 16 | tableName string 17 | 18 | lastTimestamp gocql.UUID 19 | endTimestamp atomic.Value 20 | 21 | consumers map[string]ChangeConsumer 22 | 23 | perStreamProgress map[string]gocql.UUID 24 | 25 | interruptCh chan struct{} 26 | } 27 | 28 | func newStreamBatchReader( 29 | config *ReaderConfig, 30 | generationTime time.Time, 31 | streams []StreamID, 32 | keyspaceName string, 33 | tableName string, 34 | startFrom gocql.UUID, 35 | ) *streamBatchReader { 36 | return &streamBatchReader{ 37 | config: config, 38 | generationTime: generationTime, 39 | streams: streams, 40 | keyspaceName: keyspaceName, 41 | tableName: tableName, 42 | 43 | lastTimestamp: startFrom, 44 | 45 | consumers: make(map[string]ChangeConsumer), 46 | 47 | perStreamProgress: make(map[string]gocql.UUID, len(streams)), 48 | 49 | interruptCh: make(chan struct{}, 1), 50 | } 51 | } 52 | 53 | func (sbr *streamBatchReader) run(ctx context.Context) (err error) { 54 | if err := sbr.loadProgressForStreams(ctx); err != nil { 55 | return err 56 | } 57 | 58 | defer func(err *error) { 59 | for s, c := range sbr.consumers { 60 | err2 := c.End() 61 | if err2 != nil { 62 | sbr.config.Logger.Printf("error while ending consumer for stream %s (will quit): %s", StreamID(s), err2) 63 | } 64 | if *err == nil { 65 | *err = err2 66 | } 67 | } 68 | }(&err) 69 | 70 | for _, s := range sbr.streams { 71 | input := CreateChangeConsumerInput{ 72 | TableName: sbr.getBaseTableName(), 73 | StreamID: s, 74 | 75 | ProgressReporter: &ProgressReporter{ 76 | progressManager: sbr.config.ProgressManager, 77 | gen: sbr.generationTime, 78 | tableName: sbr.getBaseTableName(), 79 | streamID: s, 80 | }, 81 | } 82 | consumer, err := sbr.config.ChangeConsumerFactory.CreateChangeConsumer(ctx, input) 83 | if err != nil { 84 | sbr.config.Logger.Printf("error while creating change consumer (will quit): %s", err) 85 | } 86 | 87 | sbr.consumers[string(s)] = consumer 88 | } 89 | 90 | crq := newChangeRowQuerier(sbr.config.Session, sbr.streams, sbr.keyspaceName, sbr.tableName, sbr.config.Consistency) 91 | 92 | wnd := sbr.getPollWindow() 93 | 94 | outer: 95 | for { 96 | var err error 97 | var hadRows bool 98 | 99 | windowProcessingStartTime := time.Now() 100 | 101 | if CompareTimeUUID(wnd.begin, wnd.end) < 0 { 102 | var iter *changeRowIterator 103 | iter, err = crq.queryRange(wnd.begin, wnd.end) 104 | if err != nil { 105 | sbr.config.Logger.Printf("error while sending a query (will retry): %s", err) 106 | } else { 107 | rowCount, consumerErr := sbr.processRows(ctx, iter) 108 | if err = iter.Close(); err != nil { 109 | sbr.config.Logger.Printf("error while querying (will retry): %s", err) 110 | } 111 | if consumerErr != nil { 112 | return consumerErr 113 | } 114 | hadRows = rowCount > 0 115 | } 116 | 117 | if err == nil { 118 | // If there were no errors, then we can safely advance 119 | // all streams to the window end 120 | sbr.advanceAllStreamsTo(wnd.end) 121 | 122 | if !hadRows { 123 | for _, c := range sbr.consumers { 124 | if enc, ok := c.(ChangeOrEmptyNotificationConsumer); ok { 125 | err = enc.Empty(ctx, wnd.end) 126 | } 127 | } 128 | } 129 | } 130 | } 131 | 132 | wnd = sbr.getPollWindow() 133 | 134 | var delay time.Duration 135 | switch { 136 | case err != nil: 137 | delay = sbr.config.Advanced.PostFailedQueryDelay 138 | case hadRows: 139 | delay = sbr.config.Advanced.PostNonEmptyQueryDelay 140 | default: 141 | delay = sbr.config.Advanced.PostEmptyQueryDelay 142 | } 143 | 144 | delayUntil := windowProcessingStartTime.Add(delay) 145 | if time.Until(delayUntil) < time.Duration(0) { 146 | sbr.config.Logger.Printf("the stream can't keep up! the next poll was supposed to happen %v ago", -time.Until(delayUntil)) 147 | } 148 | 149 | if sbr.reachedEndOfTheGeneration(wnd.begin) { 150 | break outer 151 | } 152 | 153 | delay: 154 | for { 155 | select { 156 | case <-ctx.Done(): 157 | return ctx.Err() 158 | case <-time.After(time.Until(delayUntil)): 159 | break delay 160 | case <-sbr.interruptCh: 161 | if sbr.reachedEndOfTheGeneration(wnd.begin) { 162 | break outer 163 | } 164 | } 165 | } 166 | } 167 | 168 | sbr.config.Logger.Printf("ending stream batch %v", sbr.streams) 169 | 170 | return nil 171 | } 172 | 173 | func (sbr *streamBatchReader) loadProgressForStreams(ctx context.Context) error { 174 | for _, stream := range sbr.streams { 175 | progress, err := sbr.config.ProgressManager.GetProgress(ctx, sbr.generationTime, sbr.getBaseTableName(), stream) 176 | if err != nil { 177 | return err 178 | } 179 | if CompareTimeUUID(sbr.lastTimestamp, progress.LastProcessedRecordTime) < 0 { 180 | sbr.config.Logger.Printf("loaded progress for stream %s: %s (%s)\n", stream, progress.LastProcessedRecordTime, progress.LastProcessedRecordTime.Time()) 181 | sbr.perStreamProgress[string(stream)] = progress.LastProcessedRecordTime 182 | } else { 183 | sbr.perStreamProgress[string(stream)] = sbr.lastTimestamp 184 | } 185 | } 186 | 187 | return nil 188 | } 189 | 190 | func (sbr *streamBatchReader) advanceAllStreamsTo(point gocql.UUID) { 191 | for id := range sbr.perStreamProgress { 192 | if CompareTimeUUID(sbr.perStreamProgress[id], point) < 0 { 193 | sbr.perStreamProgress[id] = point 194 | } 195 | } 196 | } 197 | 198 | type pollWindow struct { 199 | begin gocql.UUID 200 | end gocql.UUID 201 | 202 | touchesConfidenceWindow bool 203 | } 204 | 205 | func (sbr *streamBatchReader) getPollWindow() pollWindow { 206 | // Left range end is the minimum of all progresses of each stream 207 | windowStart := sbr.getPollWindowStart() 208 | 209 | // Right range end is the minimum of (left range + query window size, now - confidence window size) 210 | queryWindowRightEnd := windowStart.Time().Add(sbr.config.Advanced.QueryTimeWindowSize) 211 | confidenceWindowStart := sbr.getConfidenceLimitPoint() 212 | if queryWindowRightEnd.Before(confidenceWindowStart) { 213 | return pollWindow{ 214 | begin: windowStart, 215 | end: gocql.MinTimeUUID(queryWindowRightEnd), 216 | 217 | touchesConfidenceWindow: false, 218 | } 219 | } 220 | return pollWindow{ 221 | begin: windowStart, 222 | end: gocql.MinTimeUUID(confidenceWindowStart), 223 | 224 | touchesConfidenceWindow: true, 225 | } 226 | } 227 | 228 | func (sbr *streamBatchReader) getPollWindowStart() gocql.UUID { 229 | first := true 230 | var windowStart gocql.UUID 231 | for _, progress := range sbr.perStreamProgress { 232 | if first || CompareTimeUUID(windowStart, progress) > 0 { 233 | windowStart = progress 234 | } 235 | first = false 236 | } 237 | return windowStart 238 | } 239 | 240 | func (sbr *streamBatchReader) getConfidenceLimitPoint() time.Time { 241 | return time.Now().Add(-sbr.config.Advanced.ConfidenceWindowSize) 242 | } 243 | 244 | func (sbr *streamBatchReader) processRows(ctx context.Context, iter *changeRowIterator) (int, error) { 245 | rowCount := 0 246 | var change Change 247 | 248 | for { 249 | changeBatchCols, c := iter.Next() 250 | if c == nil { 251 | break 252 | } 253 | switch c.GetOperation() { 254 | case PreImage: 255 | change.PreImage = append(change.PreImage, c) 256 | case PostImage: 257 | change.PostImage = append(change.PostImage, c) 258 | default: 259 | change.Delta = append(change.Delta, c) 260 | } 261 | 262 | rowCount++ 263 | 264 | if c.cdcCols.endOfBatch { 265 | // Since we are reading in batches and we started from the lowest progress mark 266 | // of all streams in the batch, we might have to manually filter out changes 267 | // from streams that had a save point later than the earliest progress mark 268 | if CompareTimeUUID(sbr.perStreamProgress[string(changeBatchCols.streamID)], changeBatchCols.time) < 0 { 269 | change.StreamID = changeBatchCols.streamID 270 | change.Time = changeBatchCols.time 271 | consumer := sbr.consumers[string(changeBatchCols.streamID)] 272 | if err := consumer.Consume(ctx, change); err != nil { 273 | sbr.config.Logger.Printf("error while processing change (will quit): %s", err) 274 | return 0, err 275 | } 276 | 277 | // It's important to save progress here. If fetching of a page fails, 278 | // we will have to poll again, and filter out some rows. 279 | sbr.perStreamProgress[string(changeBatchCols.streamID)] = changeBatchCols.time 280 | } 281 | 282 | change.PreImage = nil 283 | change.Delta = nil 284 | change.PostImage = nil 285 | } 286 | } 287 | 288 | return rowCount, nil 289 | } 290 | 291 | func (sbr *streamBatchReader) getBaseTableName() string { 292 | return sbr.keyspaceName + "." + sbr.tableName 293 | } 294 | 295 | func (sbr *streamBatchReader) reachedEndOfTheGeneration(windowEnd gocql.UUID) bool { 296 | end, isClosed := sbr.endTimestamp.Load().(gocql.UUID) 297 | return isClosed && (end == gocql.UUID{} || CompareTimeUUID(end, windowEnd) <= 0) 298 | } 299 | 300 | // Only one of `close`, `stopNow` methods should be called, only once 301 | 302 | func (sbr *streamBatchReader) close(processUntil gocql.UUID) { 303 | sbr.endTimestamp.Store(processUntil) 304 | sbr.interruptCh <- struct{}{} 305 | } 306 | 307 | func (sbr *streamBatchReader) stopNow() { 308 | sbr.close(gocql.UUID{}) 309 | } 310 | -------------------------------------------------------------------------------- /testutils/utils.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "sync/atomic" 8 | "testing" 9 | "time" 10 | 11 | "github.com/gocql/gocql" 12 | ) 13 | 14 | var ( 15 | testStartTimestamp time.Time 16 | currentTestNumber uint32 17 | ) 18 | 19 | func init() { 20 | testStartTimestamp = time.Now() 21 | } 22 | 23 | func GetUniqueName(prefix string) string { 24 | unixNano := testStartTimestamp.UnixNano() 25 | uniqueID := atomic.AddUint32(¤tTestNumber, 1) - 1 26 | return fmt.Sprintf("%s_%d_%d", prefix, unixNano, uniqueID) 27 | } 28 | 29 | func GetSourceClusterContactPoint() string { 30 | uri := os.Getenv("SCYLLA_SRC_URI") 31 | if uri == "" { 32 | uri = "127.0.0.1" 33 | } 34 | return uri 35 | } 36 | 37 | func GetDestinationClusterContactPoint() string { 38 | uri := os.Getenv("SCYLLA_DST_URI") 39 | if uri == "" { 40 | uri = "127.0.0.2" 41 | } 42 | return uri 43 | } 44 | 45 | func CreateKeyspace(t *testing.T, contactPoint, keyspaceName string) { 46 | t.Helper() 47 | 48 | cluster := gocql.NewCluster(contactPoint) 49 | session, err := cluster.CreateSession() 50 | if err != nil { 51 | t.Fatalf("failed to create session: %v", err) 52 | } 53 | 54 | defer session.Close() 55 | err = session.Query(fmt.Sprintf("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}", keyspaceName)).Exec() 56 | if err != nil { 57 | t.Fatalf("failed to create keyspace %s: %v", keyspaceName, err) 58 | } 59 | 60 | err = session.AwaitSchemaAgreement(context.Background()) 61 | if err != nil { 62 | t.Fatalf("awaiting schema agreement failed: %v", err) 63 | } 64 | } 65 | 66 | func CreateUniqueKeyspace(t *testing.T, contactPoint string) string { 67 | t.Helper() 68 | 69 | keyspaceName := GetUniqueName("test_keyspace") 70 | CreateKeyspace(t, contactPoint, keyspaceName) 71 | return keyspaceName 72 | } 73 | -------------------------------------------------------------------------------- /topology.go: -------------------------------------------------------------------------------- 1 | package scyllacdc 2 | 3 | import ( 4 | "context" 5 | "encoding/hex" 6 | "errors" 7 | "fmt" 8 | "sort" 9 | "strings" 10 | "time" 11 | 12 | "github.com/gocql/gocql" 13 | ) 14 | 15 | var ( 16 | ErrNoGenerationsPresent = errors.New("there are no generations present") 17 | ErrNoSupportedGenerationTablesPresent = errors.New("no supported generation tables are present") 18 | ) 19 | 20 | const ( 21 | generationsTableNamePre4_4 = "system_distributed.cdc_streams_descriptions" 22 | 23 | timestampsTableSince4_4 = "system_distributed.cdc_generation_timestamps" 24 | streamsTableSince4_4 = "system_distributed.cdc_streams_descriptions_v2" 25 | 26 | // TODO: Switch to a model which reacts to cluster state changes 27 | // and forces a refresh when all worker goroutines did not report any 28 | // changes for some time 29 | generationFetchPeriod time.Duration = 15 * time.Second 30 | ) 31 | 32 | type generation struct { 33 | startTime time.Time 34 | streams []StreamID 35 | } 36 | 37 | // StreamID represents an ID of a stream from a CDC log (cdc$time column). 38 | type StreamID []byte 39 | 40 | // String is needed to implement the fmt.Stringer interface. 41 | func (sid StreamID) String() string { 42 | return hex.EncodeToString(sid) 43 | } 44 | 45 | type timeList []time.Time 46 | 47 | func (tl timeList) Len() int { 48 | return len(tl) 49 | } 50 | 51 | func (tl timeList) Less(i, j int) bool { 52 | return tl[i].Before(tl[j]) 53 | } 54 | 55 | func (tl timeList) Swap(i, j int) { 56 | tl[i], tl[j] = tl[j], tl[i] 57 | } 58 | 59 | type generationFetcher struct { 60 | session *gocql.Session 61 | lastTime time.Time 62 | logger Logger 63 | 64 | pushedFirst bool 65 | 66 | generationCh chan *generation 67 | refreshCh chan struct{} 68 | stopCh chan struct{} 69 | 70 | source generationSource 71 | } 72 | 73 | func newGenerationFetcher( 74 | session *gocql.Session, 75 | startFrom time.Time, 76 | logger Logger, 77 | ) (*generationFetcher, error) { 78 | source, err := chooseGenerationSource(session, logger) 79 | if err != nil { 80 | return nil, fmt.Errorf("failed to detect version of the generation tables used by the cluster: %v", err) 81 | } 82 | 83 | gf := &generationFetcher{ 84 | session: session, 85 | lastTime: startFrom, 86 | logger: logger, 87 | 88 | generationCh: make(chan *generation, 1), 89 | stopCh: make(chan struct{}), 90 | refreshCh: make(chan struct{}, 1), 91 | 92 | source: source, 93 | } 94 | return gf, nil 95 | } 96 | 97 | func chooseGenerationSource(session *gocql.Session, logger Logger) (generationSource, error) { 98 | hasPre4_4, err := isTableInSchema(session, generationsTableNamePre4_4) 99 | if err != nil { 100 | return nil, err 101 | } 102 | hasPost4_4, err := isTableInSchema(session, streamsTableSince4_4) 103 | if err != nil { 104 | return nil, err 105 | } 106 | 107 | if !hasPost4_4 && !hasPre4_4 { 108 | // There are no tables we know how to use - return an error 109 | return nil, ErrNoSupportedGenerationTablesPresent 110 | } 111 | 112 | if hasPost4_4 && !hasPre4_4 { 113 | // There is only 4.4+ table, we can immediately start 114 | // using the new table 115 | return &generationSourceSince4_4{ 116 | session: session, 117 | logger: logger, 118 | }, nil 119 | } 120 | 121 | // If we are here, then the pre-4.4 table is there for sure 122 | // If there is no 4.4+ table - we will start using it right away 123 | // If there is a 4.4+ table - the maybeUpgrade function 124 | // will take care of switching to the new table, but only after 125 | // generation rewriting completes 126 | 127 | return &generationSourcePre4_4{ 128 | session: session, 129 | logger: logger, 130 | }, nil 131 | } 132 | 133 | func (gf *generationFetcher) Run(ctx context.Context) error { 134 | l := gf.logger 135 | 136 | l.Printf("starting generation fetcher loop") 137 | 138 | outer: 139 | for { 140 | // Generation processing can take some time, so start calculating 141 | // the next poll time starting from now 142 | waitC := time.After(generationFetchPeriod) 143 | 144 | gf.tryFetchGenerations() 145 | 146 | select { 147 | // Give priority to the stop channel and the context 148 | case <-gf.stopCh: 149 | break outer 150 | case <-ctx.Done(): 151 | return ctx.Err() 152 | default: 153 | select { 154 | case <-gf.stopCh: 155 | break outer 156 | case <-ctx.Done(): 157 | return ctx.Err() 158 | case <-waitC: 159 | case <-gf.refreshCh: 160 | } 161 | } 162 | } 163 | 164 | l.Printf("stopped generation fetcher") 165 | close(gf.generationCh) 166 | return nil 167 | } 168 | 169 | func (gf *generationFetcher) tryFetchGenerations() { 170 | // Decide on the consistency to use 171 | size, err := gf.getClusterSize() 172 | if err != nil { 173 | gf.logger.Printf("an error occurred while determining cluster size: %s", err) 174 | return 175 | } 176 | 177 | consistency := gocql.One 178 | if size >= 2 { 179 | consistency = gocql.Quorum 180 | } 181 | 182 | // Try switching to a new format before fetching any generations 183 | newSource, err := gf.source.maybeUpgrade() 184 | if err != nil { 185 | gf.logger.Printf("an error occurred while trying to switch to new generations format: %s", err) 186 | } else { 187 | gf.source = newSource 188 | } 189 | 190 | // Fetch some generation times 191 | times, err := gf.source.getGenerationTimes(consistency) 192 | if err != nil { 193 | gf.logger.Printf("an error occurred while fetching generation times: %s", err) 194 | return 195 | } 196 | sort.Sort(timeList(times)) 197 | 198 | fetchAndPush := func(t time.Time) (shouldBreak bool) { 199 | streams, err := gf.source.getGeneration(t, consistency) 200 | if err != nil { 201 | gf.logger.Printf("an error occurred while fetching generation streams for %s: %s", t, err) 202 | return true 203 | } 204 | gen := &generation{t, streams} 205 | if shouldStop := gf.pushGeneration(gen); shouldStop { 206 | return true 207 | } 208 | return false 209 | } 210 | 211 | var prevTime time.Time 212 | 213 | maybePushFirst := func() bool { 214 | if !gf.pushedFirst { 215 | // When we start, we need to push the generation that is being 216 | // currently open. If we are here, then it means we arrived 217 | // at the timestamp of the first generation which is after 218 | // the timestamp from which we wish to start replicating. 219 | // We need to push the previous generation first. 220 | // If there was no previous generation, then it probably means 221 | // that the generation we arrived at is the very first generation 222 | // in the cluster 223 | if !prevTime.IsZero() { 224 | if shouldBreak := fetchAndPush(prevTime); shouldBreak { 225 | return true 226 | } 227 | } 228 | gf.pushedFirst = true 229 | } 230 | return false 231 | } 232 | 233 | for _, t := range times { 234 | if gf.lastTime.Before(t) { 235 | 236 | if shouldBreak := maybePushFirst(); shouldBreak { 237 | return 238 | } 239 | 240 | if shouldBreak := fetchAndPush(t); shouldBreak { 241 | return 242 | } 243 | gf.lastTime = t 244 | } 245 | prevTime = t 246 | } 247 | 248 | _ = maybePushFirst() 249 | } 250 | 251 | func (gf *generationFetcher) Get(ctx context.Context) (*generation, error) { 252 | select { 253 | case <-ctx.Done(): 254 | return nil, ctx.Err() 255 | case gen := <-gf.generationCh: 256 | return gen, nil 257 | } 258 | } 259 | 260 | func (gf *generationFetcher) Stop() { 261 | close(gf.stopCh) 262 | } 263 | 264 | func (gf *generationFetcher) TriggerRefresh() { 265 | select { 266 | case gf.refreshCh <- struct{}{}: 267 | default: 268 | } 269 | } 270 | 271 | func (gf *generationFetcher) pushGeneration(gen *generation) (shouldStop bool) { 272 | gf.logger.Printf("pushing generation %v", gen.startTime) 273 | select { 274 | case <-gf.stopCh: 275 | return true 276 | case gf.generationCh <- gen: 277 | gf.lastTime = gen.startTime 278 | return false 279 | } 280 | } 281 | 282 | // Unfortunately, gocql does not expose information about the cluster, 283 | // therefore we need to poll system.peers manually 284 | func (gf *generationFetcher) getClusterSize() (int, error) { 285 | var size int 286 | err := gf.session.Query("SELECT COUNT(*) FROM system.peers").Scan(&size) 287 | if err != nil { 288 | return 0, err 289 | } 290 | return size + 1, nil 291 | } 292 | 293 | type generationSource interface { 294 | getGeneration(genTime time.Time, consistency gocql.Consistency) ([]StreamID, error) 295 | getGenerationTimes(consistency gocql.Consistency) ([]time.Time, error) 296 | 297 | maybeUpgrade() (generationSource, error) 298 | } 299 | 300 | type generationSourcePre4_4 struct { 301 | session *gocql.Session 302 | logger Logger 303 | } 304 | 305 | func (gs *generationSourcePre4_4) getGeneration(genTime time.Time, consistency gocql.Consistency) ([]StreamID, error) { 306 | var streams []StreamID 307 | err := gs.session.Query("SELECT streams FROM "+generationsTableNamePre4_4+" WHERE time = ?", genTime). 308 | Consistency(consistency). 309 | Scan(&streams) 310 | if err != nil { 311 | return nil, err 312 | } 313 | return streams, err 314 | } 315 | 316 | func (gs *generationSourcePre4_4) getGenerationTimes(consistency gocql.Consistency) ([]time.Time, error) { 317 | iter := gs.session.Query("SELECT time FROM " + generationsTableNamePre4_4). 318 | Consistency(consistency). 319 | Iter() 320 | var ( 321 | times []time.Time 322 | currTime time.Time 323 | ) 324 | for iter.Scan(&currTime) { 325 | times = append(times, currTime) 326 | } 327 | if err := iter.Close(); err != nil { 328 | return nil, err 329 | } 330 | return times, nil 331 | } 332 | 333 | // Follows the migration procedure from Scylla's documentation 334 | // https://docs.scylladb.com/using-scylla/cdc/cdc-querying-streams/ 335 | func (gs *generationSourcePre4_4) maybeUpgrade() (generationSource, error) { 336 | // Check if table is present 337 | hasNewStreamsTable, err := isTableInSchema(gs.session, streamsTableSince4_4) 338 | if err != nil { 339 | return gs, err 340 | } 341 | if !hasNewStreamsTable { 342 | // Don't upgrade, the new table is not there yet 343 | return gs, nil 344 | } 345 | 346 | // Was the migration completed? 347 | data := make(map[string]interface{}) 348 | err = gs.session.Query("SELECT streams_timestamp FROM system.cdc_local WHERE key = 'rewritten'"). 349 | MapScan(data) 350 | 351 | if err == gocql.ErrNotFound { 352 | // The "rewritten" row is not present yet, this means that the generations 353 | // weren't rewritten yet 354 | // Try again later 355 | return gs, nil 356 | } 357 | 358 | if err != nil { 359 | // Some other error 360 | return gs, err 361 | } 362 | 363 | newGs := &generationSourceSince4_4{ 364 | session: gs.session, 365 | logger: gs.logger, 366 | } 367 | 368 | return newGs.maybeUpgrade() 369 | } 370 | 371 | type generationSourceSince4_4 struct { 372 | session *gocql.Session 373 | logger Logger 374 | } 375 | 376 | func (gs *generationSourceSince4_4) getGeneration(genTime time.Time, consistency gocql.Consistency) ([]StreamID, error) { 377 | var streams []StreamID 378 | iter := gs.session.Query("SELECT streams FROM "+streamsTableSince4_4+" WHERE time = ?", genTime). 379 | Consistency(consistency). 380 | Iter() 381 | 382 | var vnodeStreams []StreamID 383 | for iter.Scan(&vnodeStreams) { 384 | streams = append(streams, vnodeStreams...) 385 | } 386 | 387 | if err := iter.Close(); err != nil { 388 | return nil, err 389 | } 390 | return streams, nil 391 | } 392 | 393 | func (gs *generationSourceSince4_4) getGenerationTimes(consistency gocql.Consistency) ([]time.Time, error) { 394 | iter := gs.session.Query("SELECT time FROM " + timestampsTableSince4_4 + " WHERE key = 'timestamps'"). 395 | Consistency(consistency). 396 | Iter() 397 | var ( 398 | times []time.Time 399 | currTime time.Time 400 | ) 401 | for iter.Scan(&currTime) { 402 | times = append(times, currTime) 403 | } 404 | if err := iter.Close(); err != nil { 405 | return nil, err 406 | } 407 | return times, nil 408 | } 409 | 410 | func (gs *generationSourceSince4_4) maybeUpgrade() (generationSource, error) { 411 | // No newer format is known 412 | return gs, nil 413 | } 414 | 415 | // Takes a fully-qualified name of a table and returns if a table of given name 416 | // is in the schema. 417 | // Panics if the table name is not qualified, i.e. it does not contain a dot. 418 | func isTableInSchema(session *gocql.Session, tableName string) (bool, error) { 419 | decomposed := strings.SplitN(tableName, ".", 2) 420 | if len(decomposed) < 2 { 421 | panic("unqualified table name passed to inTableInSchema") 422 | } 423 | 424 | keyspace := decomposed[0] 425 | table := decomposed[1] 426 | 427 | meta, err := session.KeyspaceMetadata(keyspace) 428 | if err == gocql.ErrKeyspaceDoesNotExist { 429 | return false, nil 430 | } else if err != nil { 431 | return false, err 432 | } 433 | 434 | _, ok := meta.Tables[table] 435 | return ok, nil 436 | } 437 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package scyllacdc 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "regexp" 9 | "strconv" 10 | "strings" 11 | "sync" 12 | "time" 13 | 14 | "github.com/gocql/gocql" 15 | ) 16 | 17 | // PeriodicProgressReporter is a wrapper around ProgressReporter which can be 18 | // used to save progress in regular periods of time. 19 | type PeriodicProgressReporter struct { 20 | reporter *ProgressReporter 21 | interval time.Duration 22 | 23 | refreshCh chan struct{} 24 | stopCh chan struct{} 25 | finishCh chan struct{} 26 | mu *sync.Mutex 27 | timeToReport gocql.UUID 28 | 29 | logger Logger 30 | } 31 | 32 | // NewPeriodicProgressReporter creates a new PeriodicProgressReporter with 33 | // given report interval. 34 | func NewPeriodicProgressReporter(logger Logger, interval time.Duration, reporter *ProgressReporter) *PeriodicProgressReporter { 35 | return &PeriodicProgressReporter{ 36 | reporter: reporter, 37 | interval: interval, 38 | 39 | refreshCh: make(chan struct{}, 1), 40 | stopCh: make(chan struct{}), 41 | finishCh: make(chan struct{}), 42 | mu: &sync.Mutex{}, 43 | 44 | logger: logger, 45 | } 46 | } 47 | 48 | // Start spawns an internal goroutine and starts the progress reporting loop. 49 | func (ppr *PeriodicProgressReporter) Start(ctx context.Context) { 50 | // Optimization: if the reporter is nil, or is NoProgressManager, 51 | // then don't start the goroutine at all. 52 | if _, ok := ppr.reporter.progressManager.(noProgressManager); ok { 53 | close(ppr.finishCh) 54 | return 55 | } 56 | 57 | go func() { 58 | defer close(ppr.finishCh) 59 | for { 60 | // Wait for the duration period 61 | select { 62 | case <-time.After(ppr.interval): 63 | 64 | case <-ctx.Done(): 65 | return 66 | case <-ppr.stopCh: 67 | return 68 | } 69 | 70 | // Wait for a signal to refresh 71 | select { 72 | case <-ppr.refreshCh: 73 | ppr.mu.Lock() 74 | timeToReport := ppr.timeToReport 75 | ppr.mu.Unlock() 76 | 77 | // TODO: Log errors? 78 | err := ppr.reporter.MarkProgress(ctx, Progress{timeToReport}) 79 | if err != nil { 80 | ppr.logger.Printf("failed to save progress for %s: %s", ppr.reporter.streamID, err) 81 | } 82 | 83 | case <-ctx.Done(): 84 | return 85 | case <-ppr.stopCh: 86 | return 87 | } 88 | } 89 | }() 90 | } 91 | 92 | // Update tells the PeriodicProgressReporter that a row has been processed. 93 | func (ppr *PeriodicProgressReporter) Update(newTime gocql.UUID) { 94 | ppr.mu.Lock() 95 | ppr.timeToReport = newTime 96 | ppr.mu.Unlock() 97 | 98 | // Fill the channel in a non-blocking manner 99 | select { 100 | case ppr.refreshCh <- struct{}{}: 101 | default: 102 | } 103 | } 104 | 105 | // Stop stops inner goroutine and waits until it finishes. 106 | func (ppr *PeriodicProgressReporter) Stop() { 107 | close(ppr.stopCh) 108 | <-ppr.finishCh 109 | } 110 | 111 | // SaveAndStop stops inner goroutine, waits until it finishes, and then 112 | // saves the most recent progress. 113 | func (ppr *PeriodicProgressReporter) SaveAndStop(ctx context.Context) error { 114 | close(ppr.stopCh) 115 | <-ppr.finishCh 116 | 117 | // No need to lock the mutex for timeToReport 118 | if (ppr.timeToReport == gocql.UUID{}) { 119 | return nil 120 | } 121 | 122 | err := ppr.reporter.MarkProgress(ctx, Progress{ppr.timeToReport}) 123 | if err != nil { 124 | ppr.logger.Printf("failed to save progress for %s: %s", ppr.reporter.streamID, err) 125 | } else { 126 | ppr.logger.Printf("successfully saved final progress for %s: %s (%s)", ppr.reporter.streamID, ppr.timeToReport, ppr.timeToReport.Time()) 127 | } 128 | return err 129 | } 130 | 131 | func CompareTimeUUID(u1, u2 gocql.UUID) int { 132 | // Compare timestamps 133 | t1 := u1.Timestamp() 134 | t2 := u2.Timestamp() 135 | if t1 < t2 { 136 | return -1 137 | } 138 | if t1 > t2 { 139 | return 1 140 | } 141 | 142 | // Lexicographically compare the second half as signed bytes 143 | for i := 8; i < 16; i++ { 144 | d := int(int8(u1[i])) - int(int8(u2[i])) 145 | if d != 0 { 146 | return int(d) 147 | } 148 | } 149 | return 0 150 | } 151 | 152 | var validIDPattern = regexp.MustCompile("^[a-zA-Z][a-zA-Z0-9_]*$") 153 | 154 | func escapeColumnNameIfNeeded(s string) string { 155 | if shouldEscape(s) { 156 | return escapeColumnName(s) 157 | } 158 | return s 159 | } 160 | 161 | func shouldEscape(s string) bool { 162 | // TODO: Check if it is a reserved keyword - for now, assume it's not 163 | return !validIDPattern.MatchString(s) 164 | } 165 | 166 | func escapeColumnName(s string) string { 167 | return "\"" + strings.ReplaceAll(s, "\"", "\\\"") + "\"" 168 | } 169 | 170 | func fetchScyllaCDCExtensionTTL( 171 | session *gocql.Session, 172 | keyspaceName string, 173 | tableName string, 174 | ) (int64, error) { 175 | // Extensions are not available in the metadata, 176 | // fetch and parse them manually until this is implemented in gocql 177 | var exts map[string][]byte 178 | err := session.Query( 179 | "SELECT extensions FROM system_schema.tables "+ 180 | "WHERE keyspace_name = ? AND table_name = ?", 181 | keyspaceName, tableName, 182 | ).Scan(&exts) 183 | if err != nil { 184 | return 0, fmt.Errorf("failed to query system tables: %w", err) 185 | } 186 | 187 | ext, ok := exts["cdc"] 188 | if !ok { 189 | return 0, errors.New("cdc extension not found") 190 | } 191 | 192 | m, err := newExtensionParser(ext).parseStringMap() 193 | if err != nil { 194 | return 0, fmt.Errorf("failed to parse the CDC extension: %w", err) 195 | } 196 | 197 | ttlS, ok := m["ttl"] 198 | if !ok { 199 | return 0, errors.New("ttl not set") 200 | } 201 | 202 | ttl, err := strconv.ParseInt(ttlS, 10, 64) 203 | if err != nil { 204 | return 0, fmt.Errorf("failed to parse TTL from schema extension: %w", err) 205 | } 206 | return ttl, nil 207 | } 208 | 209 | type extensionParser struct { 210 | raw []byte 211 | } 212 | 213 | func newExtensionParser(raw []byte) *extensionParser { 214 | return &extensionParser{raw} 215 | } 216 | 217 | func (ep *extensionParser) parseStringMap() (map[string]string, error) { 218 | l, err := ep.parseInt() 219 | if err != nil { 220 | return nil, err 221 | } 222 | if l < 0 { 223 | return nil, errors.New("invalid map length") 224 | } 225 | 226 | m := make(map[string]string) 227 | 228 | for i := int32(0); i < l; i++ { 229 | k, err := ep.parseString() 230 | if err != nil { 231 | return nil, fmt.Errorf("failed to parse key #%d: %w", i, err) 232 | } 233 | v, err := ep.parseString() 234 | if err != nil { 235 | return nil, fmt.Errorf("failed to parse value #%d: %w", i, err) 236 | } 237 | m[k] = v 238 | } 239 | 240 | return m, nil 241 | } 242 | 243 | func (ep *extensionParser) parseInt() (int32, error) { 244 | if len(ep.raw) < 4 { 245 | return 0, io.EOF 246 | } 247 | 248 | // Little endian 249 | x := int32(ep.raw[0]) | 250 | (int32(ep.raw[1]) << 8) | 251 | (int32(ep.raw[2]) << 16) | 252 | (int32(ep.raw[3]) << 24) 253 | 254 | ep.raw = ep.raw[4:] 255 | return x, nil 256 | } 257 | 258 | func (ep *extensionParser) parseString() (string, error) { 259 | l, err := ep.parseInt() 260 | if err != nil { 261 | return "", fmt.Errorf("failed to parse string length: %w", err) 262 | } 263 | if l < 0 { 264 | return "", errors.New("invalid string length") 265 | } 266 | if len(ep.raw) < int(l) { 267 | return "", io.EOF 268 | } 269 | s := string(ep.raw[:l]) 270 | ep.raw = ep.raw[l:] 271 | return s, nil 272 | } 273 | -------------------------------------------------------------------------------- /utils_test.go: -------------------------------------------------------------------------------- 1 | package scyllacdc 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/gocql/gocql" 7 | ) 8 | 9 | func TestTimeUUIDCompare(t *testing.T) { 10 | uuids := []string{ 11 | "1f085b0c-3f3b-11eb-1f65-8af9c0d59390", 12 | "1f4e6278-3f3b-11eb-c486-5caaf85a5ff8", 13 | "200977b6-3f3b-11eb-b122-b0f477f765ec", 14 | "200977b6-3f3b-11eb-4230-d2afd2dadc1b", 15 | "2020daa0-3f3b-11eb-5459-3f3b971d3b10", 16 | "20fa9312-3f3b-11eb-da71-8cc350d31319", 17 | } 18 | 19 | for i := 0; i < len(uuids); i++ { 20 | u1, err := gocql.ParseUUID(uuids[i]) 21 | if err != nil { 22 | t.Fatal(err) 23 | } 24 | for j := 0; j < len(uuids); j++ { 25 | u2, err := gocql.ParseUUID(uuids[j]) 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | 30 | cmp := CompareTimeUUID(u1, u2) 31 | 32 | switch { 33 | case i < j && !(cmp < 0): 34 | t.Errorf("expected %s to be smaller than %s", u1, u2) 35 | case i == j && !(cmp == 0): 36 | t.Errorf("expected %s to be equal to %s", u1, u2) 37 | case i > j && !(cmp > 0): 38 | t.Errorf("expected %s to be larger than %s", u1, u2) 39 | } 40 | } 41 | } 42 | } 43 | --------------------------------------------------------------------------------