├── .devcontainer ├── Containerfile └── devcontainer.json ├── .github └── workflows │ ├── pr.yml │ └── tag.yaml ├── .gitignore ├── .golangci.yaml ├── .goreleaser.yaml ├── LICENSE ├── Makefile ├── README.md ├── cdc.go ├── change.go ├── cmd ├── sqlite-cdc-setup │ └── main.go └── sqlite-cdc │ └── main.go ├── dbmeta.go ├── go.mod ├── go.sum ├── handler.go ├── handlers ├── debug.go ├── http.go └── stdio.go ├── images ├── blob-data.plot ├── blob-data.svg ├── simple-tables.plot ├── simple-tables.svg ├── wide-tables.plot └── wide-tables.svg ├── internal └── tools │ ├── go.mod │ ├── go.sum │ └── tools.go ├── signals.go ├── signals_test.go ├── trigger_test.go └── triggers.go /.devcontainer/Containerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/golang:1-bookworm 2 | 3 | RUN apt-get update && apt-get install --yes \ 4 | curl wget gpg \ 5 | make git vim less \ 6 | sudo \ 7 | bash-completion man \ 8 | gnuplot 9 | 10 | # Create a non-root user so linux users can run the container as the current 11 | # OS user. See https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user 12 | # and https://github.com/devcontainers/spec/blob/main/docs/specs/devcontainer-reference.md#container-creation 13 | # for more information. 14 | ARG USERNAME=dev 15 | ARG USER_UID=1010 16 | ARG USER_GID=1010 17 | 18 | RUN groupadd --gid $USER_GID $USERNAME \ 19 | && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ 20 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \ 21 | && chmod 0440 /etc/sudoers.d/$USERNAME 22 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sqlite-cdc", 3 | "build": { 4 | "dockerfile": "Containerfile", 5 | "context": ".." 6 | }, 7 | "remoteUser": "dev", 8 | "updateRemoteUserUID": true, 9 | "containerEnv": {}, 10 | "mounts": [], 11 | "customizations": { 12 | "vscode": { 13 | "settings": { 14 | "telemetry.telemetryLevel": "off", 15 | "telemetry.enableTelemetry": false, 16 | "files.insertFinalNewline": true, 17 | "files.trimTrailingWhitespace": true, 18 | "rewrap.wrappingColumn": 80, 19 | "go.formatTool": "goimports", 20 | "go.lintTool": "golangci-lint", 21 | "terminal.integrated.profiles.linux": { 22 | "bash": { 23 | "path": "/usr/bin/bash" 24 | } 25 | }, 26 | "terminal.integrated.defaultProfile.linux": "bash" 27 | }, 28 | "extensions": [ 29 | "golang.go", 30 | "streetsidesoftware.code-spell-checker", 31 | "stkb.rewrap", 32 | "ms-vscode.makefile-tools", 33 | "redhat.vscode-yaml", 34 | "ms-vscode.cpptools-extension-pack" 35 | ] 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /.github/workflows/pr.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Pull Request Checks 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | pull_request: 11 | branches: 12 | - "**" 13 | workflow_dispatch: {} 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | - uses: actions/setup-go@v5 21 | with: 22 | cache-dependency-path: | 23 | go.sum 24 | internal/tools/go.sum 25 | - uses: actions/cache@v4 26 | id: tools 27 | with: 28 | path: | 29 | .bin 30 | key: ${{ runner.os }}-${{ hashFiles('internal/tools/go.sum') }} 31 | - name: Tools 32 | if: steps.tools.outputs.cache-hit != 'true' 33 | run: make tools 34 | - name: Lint 35 | run: make test/lint 36 | - name: Unit Test 37 | run: make test/unit 38 | -------------------------------------------------------------------------------- /.github/workflows/tag.yaml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Tag And Release 5 | 6 | on: 7 | push: 8 | tags: 9 | - "**" 10 | workflow_dispatch: {} 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: actions/setup-go@v5 18 | with: 19 | cache-dependency-path: | 20 | go.sum 21 | internal/tools/go.sum 22 | - uses: actions/cache@v4 23 | id: tools 24 | with: 25 | path: | 26 | .bin 27 | key: ${{ runner.os }}-${{ hashFiles('internal/tools/go.sum') }} 28 | - name: Tools 29 | if: steps.tools.outputs.cache-hit != 'true' 30 | run: make tools 31 | - name: Lint 32 | run: make test/lint 33 | - name: Unit Test 34 | run: make test/unit 35 | - name: Generate builds 36 | run: make BUILD_MODE=tag build 37 | - name: Publish release 38 | run: make release 39 | env: 40 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | .coverage 13 | 14 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 15 | .glide/ 16 | Gopkg.lock 17 | 18 | # Vendor directories 19 | vendor/ 20 | 21 | # General 22 | .DS_Store 23 | .AppleDouble 24 | .LSOverride 25 | 26 | # Icon must end with two \r 27 | Icon 28 | 29 | 30 | # Thumbnails 31 | ._* 32 | 33 | # Files that might appear in the root of a volume 34 | .DocumentRevisions-V100 35 | .fseventsd 36 | .Spotlight-V100 37 | .TemporaryItems 38 | .Trashes 39 | .VolumeIcon.icns 40 | .com.apple.timemachine.donotpresent 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | 49 | 50 | # Xcode 51 | # 52 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 53 | 54 | ## User settings 55 | xcuserdata/ 56 | 57 | ## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9) 58 | *.xcscmblueprint 59 | *.xccheckout 60 | 61 | ## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4) 62 | build/ 63 | DerivedData/ 64 | *.moved-aside 65 | *.pbxuser 66 | !default.pbxuser 67 | *.mode1v3 68 | !default.mode1v3 69 | *.mode2v3 70 | !default.mode2v3 71 | *.perspectivev3 72 | !default.perspectivev3 73 | 74 | # Swap 75 | [._]*.s[a-v][a-z] 76 | [._]*.sw[a-p] 77 | [._]s[a-v][a-z] 78 | [._]sw[a-p] 79 | 80 | # Session 81 | Session.vim 82 | 83 | # Temporary 84 | .netrwhist 85 | *~ 86 | # Auto-generated tag files 87 | tags 88 | 89 | # Cache files for Sublime Text 90 | *.tmlanguage.cache 91 | *.tmPreferences.cache 92 | *.stTheme.cache 93 | 94 | # Workspace files are user-specific 95 | *.sublime-workspace 96 | 97 | # Project files should be checked into the repository, unless a significant 98 | # proportion of contributors will probably not be using Sublime Text 99 | # *.sublime-project 100 | 101 | # SFTP configuration file 102 | sftp-config.json 103 | 104 | # Package control specific files 105 | Package Control.last-run 106 | Package Control.ca-list 107 | Package Control.ca-bundle 108 | Package Control.system-ca-bundle 109 | Package Control.cache/ 110 | Package Control.ca-certs/ 111 | Package Control.merged-ca-bundle 112 | Package Control.user-ca-bundle 113 | oscrypto-ca-bundle.crt 114 | bh_unicode_properties.cache 115 | 116 | # Sublime-github package stores a github token in this file 117 | # https://packagecontrol.io/packages/sublime-github 118 | GitHub.sublime-settings 119 | 120 | *~ 121 | 122 | # temporary files which can be created if a process still has a handle open of a deleted file 123 | .fuse_hidden* 124 | 125 | # KDE directory preferences 126 | .directory 127 | 128 | # Linux trash folder which might appear on any partition or disk 129 | .Trash-* 130 | 131 | # .nfs files are created when an open file is removed but is still being accessed 132 | .nfs* 133 | 134 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 135 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 136 | 137 | # User-specific stuff: 138 | .idea/**/workspace.xml 139 | .idea/**/tasks.xml 140 | .idea/dictionaries 141 | 142 | # Sensitive or high-churn files: 143 | .idea/**/dataSources/ 144 | .idea/**/dataSources.ids 145 | .idea/**/dataSources.xml 146 | .idea/**/dataSources.local.xml 147 | .idea/**/sqlDataSources.xml 148 | .idea/**/dynamic.xml 149 | .idea/**/uiDesigner.xml 150 | 151 | # Gradle: 152 | .idea/**/gradle.xml 153 | .idea/**/libraries 154 | 155 | # CMake 156 | cmake-build-debug/ 157 | 158 | # Mongo Explorer plugin: 159 | .idea/**/mongoSettings.xml 160 | 161 | ## File-based project format: 162 | *.iws 163 | 164 | ## Plugin-specific files: 165 | 166 | # IntelliJ 167 | out/ 168 | 169 | # mpeltonen/sbt-idea plugin 170 | .idea_modules/ 171 | 172 | # JIRA plugin 173 | atlassian-ide-plugin.xml 174 | 175 | # Cursive Clojure plugin 176 | .idea/replstate.xml 177 | 178 | # Crashlytics plugin (for Android Studio and IntelliJ) 179 | com_crashlytics_export_strings.xml 180 | crashlytics.properties 181 | crashlytics-build.properties 182 | fabric.properties 183 | 184 | # VSCODE 185 | .vscode 186 | 187 | # -*- mode: gitignore; -*- 188 | *~ 189 | \#*\# 190 | /.emacs.desktop 191 | /.emacs.desktop.lock 192 | *.elc 193 | auto-save-list 194 | tramp 195 | .\#* 196 | 197 | # Org-mode 198 | .org-id-locations 199 | *_archive 200 | 201 | # flymake-mode 202 | *_flymake.* 203 | 204 | # eshell files 205 | /eshell/history 206 | /eshell/lastdir 207 | 208 | # elpa packages 209 | /elpa/ 210 | 211 | # reftex files 212 | *.rel 213 | 214 | # AUCTeX auto folder 215 | /auto/ 216 | 217 | # cask packages 218 | .cask/ 219 | dist/ 220 | 221 | # Flycheck 222 | flycheck_*.el 223 | 224 | # server auth directory 225 | /server/ 226 | 227 | # projectiles files 228 | .projectile 229 | 230 | # directory configuration 231 | .dir-locals.el 232 | 233 | # project generated files 234 | .bin/ 235 | .coverage/ 236 | .build/ 237 | go.work 238 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | run: 2 | timeout: 5m 3 | issues-exit-code: 2 4 | tests: true 5 | build-tags: [] 6 | modules-download-mode: readonly 7 | allow-parallel-runners: false 8 | 9 | output: 10 | formats: 11 | - format: colored-line-number 12 | print-issued-lines: true 13 | print-linter-name: true 14 | uniq-by-line: true 15 | path-prefix: "" 16 | sort-results: false 17 | 18 | linters-settings: 19 | errcheck: 20 | exclude-functions: 21 | - "(*database/sql.Tx).Rollback" 22 | linters: 23 | enable-all: false 24 | disable-all: false 25 | enable: 26 | - gochecknoglobals 27 | - gochecknoinits 28 | presets: 29 | - bugs 30 | - test 31 | disable: 32 | - exhaustruct 33 | - testpackage 34 | fast: false 35 | 36 | issues: 37 | exclude-rules: 38 | - path: _test\.go 39 | linters: 40 | - gocyclo 41 | - errcheck 42 | - dupl 43 | - gosec 44 | exclude-use-default: true 45 | exclude-case-sensitive: false 46 | exclude-dirs: 47 | - \.bin$ 48 | - \.coverage$ 49 | exclude-dirs-use-default: true 50 | include: [] 51 | max-issues-per-linter: 50 52 | max-same-issues: 3 53 | new: false 54 | new-from-patch: "" 55 | fix: false 56 | 57 | severity: 58 | default-severity: error 59 | case-sensitive: true 60 | rules: 61 | - linters: 62 | - dupl 63 | severity: info 64 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://goreleaser.com/static/schema.json 2 | 3 | version: 2 4 | 5 | builds: 6 | - id: sqlite-cdc 7 | binary: sqlite-cdc 8 | main: ./cmd/sqlite-cdc/main.go 9 | env: 10 | - CGO_ENABLED=0 11 | goos: 12 | - linux 13 | - windows 14 | - darwin 15 | flags: 16 | - -trimpath 17 | ldflags: 18 | - -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.Date}} 19 | - id: sqlite-cdc-setup 20 | binary: sqlite-cdc-setup 21 | main: ./cmd/sqlite-cdc-setup/main.go 22 | env: 23 | - CGO_ENABLED=0 24 | goos: 25 | - linux 26 | - windows 27 | - darwin 28 | flags: 29 | - -trimpath 30 | ldflags: 31 | - -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.Date}} 32 | 33 | archives: 34 | - format: tar.gz 35 | # this name template makes the OS and Arch compatible with the results of `uname`. 36 | name_template: >- 37 | {{ .ProjectName }}_ 38 | {{- title .Os }}_ 39 | {{- if eq .Arch "amd64" }}x86_64 40 | {{- else if eq .Arch "386" }}i386 41 | {{- else }}{{ .Arch }}{{ end }} 42 | {{- if .Arm }}v{{ .Arm }}{{ end }} 43 | # use zip for windows archives 44 | format_overrides: 45 | - goos: windows 46 | format: zip 47 | 48 | checksum: 49 | split: true 50 | 51 | changelog: 52 | sort: asc 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test test/lint test/unit test/coverage 2 | .PHONY: tools tools/update 3 | .PHONY: generate fmt clean clean/test clean/tools 4 | 5 | PROJECT_PATH = $(shell pwd -L) 6 | GOFLAGS ::= ${GOFLAGS} 7 | GOTOOLS = $(shell grep '_' $(TOOLS_DIR)/tools.go | sed 's/[[:space:]]*_//g' | sed 's/\"//g') 8 | BUILD_DIR = $(PROJECT_PATH)/.build 9 | TOOLS_DIR = $(PROJECT_PATH)/internal/tools 10 | TOOLS_FILE = $(TOOLS_DIR)/tools.go 11 | DIST_DIR = $(PROJECT_PATH)/dist 12 | BIN_DIR = $(PROJECT_PATH)/.bin 13 | BENCH_DIR = $(BUILD_DIR)/.bench 14 | IMAGES_DIR = $(PROJECT_PATH)/images 15 | IMAGES_PLOTS = $(wildcard $(IMAGES_DIR)/*.plot) 16 | IMAGES_PLOTS_RENDERED = $(subst .plot,.svg,$(IMAGES_PLOTS)) 17 | COVER_DIR = $(BUILD_DIR)/.coverage 18 | COVERAGE_UNIT = $(COVER_DIR)/unit.out 19 | COVERAGE_UNIT_INTERCHANGE = $(COVERAGE_UNIT:.out=.interchange) 20 | COVERATE_UNIT_HTML = $(COVERAGE_UNIT:.out=.html) 21 | COVERAGE_UNIT_XML = $(COVERAGE_UNIT:.out=.xml) 22 | COVERAGE_COMBINED = $(COVER_DIR)/combined.out 23 | COVERAGE_COMBINED_INTERCHANGE = $(COVERAGE_COMBINED:.out=.interchange) 24 | COVERAGE_COMBINED_HTML = $(COVERAGE_COMBINED:.out=.html) 25 | COVERAGE_COMBINED_XML = $(COVERAGE_COMBINED:.out=.xml) 26 | GOIMPORT_LOCAL = github.com/kevinconway 27 | GOLANGCILINT_CONFIG = $(PROJECT_PATH)/.golangci.yaml 28 | GOCMD = GOFLAGS=$(GOFLAGS) go 29 | BUILD_MODE = local 30 | BUILD_FLAGS = --clean 31 | ifneq ($(BUILD_MODE),tag) 32 | BUILD_FLAGS = --clean --snapshot 33 | endif 34 | 35 | ####### 36 | # https://stackoverflow.com/a/10858332 37 | check_defined = \ 38 | $(strip $(foreach 1,$1, \ 39 | $(call __check_defined,$1,$(strip $(value 2))))) 40 | __check_defined = \ 41 | $(if $(value $1),, \ 42 | $(error Undefined $1$(if $2, ($2)))) 43 | ####### 44 | 45 | build: | $(BIN_DIR) $(DIST_DIR) 46 | @ $(BIN_DIR)/goreleaser build $(BUILD_FLAGS) 47 | 48 | release: | $(BIN_DIR) $(DIST_DIR) 49 | @ $(BIN_DIR)/goreleaser release --clean 50 | 51 | test: test/lint test/unit test/coverage 52 | 53 | test/lint: | $(BIN_DIR) 54 | @ GOFLAGS="$(GOFLAGS)" \ 55 | $(BIN_DIR)/golangci-lint run \ 56 | --config $(GOLANGCILINT_CONFIG) 57 | 58 | test/unit: $(COVERAGE_UNIT) | $(BIN_DIR) 59 | 60 | test/coverage: $(COVER_DIR) $(COVERAGE_UNIT) $(COVERAGE_UNIT_INTERCHANGE) $(COVERATE_UNIT_HTML) $(COVERAGE_UNIT_XML) $(COVERAGE_COMBINED) $(COVERAGE_COMBINED_INTERCHANGE) $(COVERAGE_COMBINED_HTML) $(COVERAGE_COMBINED_XML) | $(BIN_DIR) 61 | @ $(GOCMD) tool cover -func $(COVERAGE_COMBINED) 62 | 63 | BENCH_TRIGGER_SERIAL_INSERT = $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialInserts.txt 64 | BENCH_TRIGGER_SERIAL_INSERT_REPORT = $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialInserts-cmp.csv 65 | BENCH_TRIGGER_SERIAL_UPDATE = $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialUpdates.txt 66 | BENCH_TRIGGER_SERIAL_UPDATE_REPORT = $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialUpdates-cmp.csv 67 | BENCH_TRIGGER_SERIAL_DELETE = $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialDeletes.txt 68 | BENCH_TRIGGER_SERIAL_DELETE_REPORT = $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialDeletes-cmp.csv 69 | BENCH_TRIGGER_SERIAL_WIDE_INSERT = $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialInserts.txt 70 | BENCH_TRIGGER_SERIAL_WIDE_INSERT_REPORT = $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialInserts-cmp.csv 71 | BENCH_TRIGGER_SERIAL_WIDE_UPDATE = $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialUpdates.txt 72 | BENCH_TRIGGER_SERIAL_WIDE_UPDATE_REPORT = $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialUpdates-cmp.csv 73 | BENCH_TRIGGER_SERIAL_WIDE_DELETE = $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialDeletes.txt 74 | BENCH_TRIGGER_SERIAL_WIDE_DELETE_REPORT = $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialDeletes-cmp.csv 75 | BENCH_BLOB = $(BENCH_DIR)/BenchmarkBlobEncoding.txt 76 | BENCH_BLOB_REPORT = $(BENCH_DIR)/BenchmarkBlobEncoding-cmp.csv 77 | 78 | benchmarks: benchmarks/simple benchmarks/wide benchmarks/blob 79 | benchmarks/simple: $(BENCH_TRIGGER_SERIAL_INSERT) $(BENCH_TRIGGER_SERIAL_UPDATE) $(BENCH_TRIGGER_SERIAL_DELETE) 80 | benchmarks/wide: $(BENCH_TRIGGER_SERIAL_WIDE_INSERT) $(BENCH_TRIGGER_SERIAL_WIDE_UPDATE) $(BENCH_TRIGGER_SERIAL_WIDE_DELETE) 81 | benchmarks/blob: $(BENCH_BLOB) 82 | 83 | benchmark-reports: benchmark-reports/simple benchmark-reports/wide benchmark-reports/blob 84 | benchmark-reports/simple: $(BENCH_TRIGGER_SERIAL_INSERT_REPORT) $(BENCH_TRIGGER_SERIAL_UPDATE_REPORT) $(BENCH_TRIGGER_SERIAL_DELETE_REPORT) 85 | benchmark-reports/wide: $(BENCH_TRIGGER_SERIAL_WIDE_INSERT_REPORT) $(BENCH_TRIGGER_SERIAL_WIDE_UPDATE_REPORT) $(BENCH_TRIGGER_SERIAL_WIDE_DELETE_REPORT) 86 | benchmark-reports/blob: $(BENCH_BLOB_REPORT) 87 | 88 | $(BENCH_TRIGGER_SERIAL_INSERT): | $(BIN_DIR) $(BENCH_DIR) 89 | @ $(GOCMD) test -timeout 0 -run='^$$' -bench='^BenchmarkTriggerLatencySimpleTableSerialInserts*' -count=20 > $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialInserts.txt 90 | $(BENCH_TRIGGER_SERIAL_UPDATE): | $(BIN_DIR) $(BENCH_DIR) 91 | @ $(GOCMD) test -timeout 0 -run='^$$' -bench='^BenchmarkTriggerLatencySimpleTableSerialUpdates*' -count=20 > $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialUpdates.txt 92 | $(BENCH_TRIGGER_SERIAL_DELETE): | $(BIN_DIR) $(BENCH_DIR) 93 | @ $(GOCMD) test -timeout 0 -run='^$$' -bench='^BenchmarkTriggerLatencySimpleTableSerialDeletes*' -count=20 > $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialDeletes.txt 94 | $(BENCH_TRIGGER_SERIAL_WIDE_INSERT): | $(BIN_DIR) $(BENCH_DIR) 95 | @ $(GOCMD) test -timeout 0 -run='^$$' -bench='^BenchmarkTriggerLatencyWideTableSerialInserts*' -count=20 > $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialInserts.txt 96 | $(BENCH_TRIGGER_SERIAL_WIDE_UPDATE): | $(BIN_DIR) $(BENCH_DIR) 97 | @ $(GOCMD) test -timeout 0 -run='^$$' -bench='^BenchmarkTriggerLatencyWideTableSerialUpdates*' -count=20 > $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialUpdates.txt 98 | $(BENCH_TRIGGER_SERIAL_WIDE_DELETE): | $(BIN_DIR) $(BENCH_DIR) 99 | @ $(GOCMD) test -timeout 0 -run='^$$' -bench='^BenchmarkTriggerLatencyWideTableSerialDeletes*' -count=20 > $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialDeletes.txt 100 | $(BENCH_BLOB): | $(BIN_DIR) $(BENCH_DIR) 101 | @ $(GOCMD) test -timeout 0 -run='^$$' -bench='^BenchmarkBlobEncoding*' -count=20 > $(BENCH_DIR)/BenchmarkBlobEncoding.txt 102 | 103 | $(BENCH_TRIGGER_SERIAL_INSERT_REPORT): $(BENCH_TRIGGER_SERIAL) | $(BIN_DIR) 104 | @ $(BIN_DIR)/benchstat -format csv -col /triggers -row /columns $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialInserts.txt > $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialInserts-cmp.csv 105 | $(BENCH_TRIGGER_SERIAL_UPDATE_REPORT): $(BENCH_TRIGGER_SERIAL) | $(BIN_DIR) 106 | @ $(BIN_DIR)/benchstat -format csv -col /triggers -row /columns $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialUpdates.txt > $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialUpdates-cmp.csv 107 | $(BENCH_TRIGGER_SERIAL_DELETE_REPORT): $(BENCH_TRIGGER_SERIAL) | $(BIN_DIR) 108 | @ $(BIN_DIR)/benchstat -format csv -col /triggers -row /columns $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialDeletes.txt > $(BENCH_DIR)/BenchmarkTriggerLatencySimpleTableSerialDeletes-cmp.csv 109 | $(BENCH_TRIGGER_SERIAL_WIDE_INSERT_REPORT): $(BENCH_TRIGGER_SERIAL) | $(BIN_DIR) 110 | @ $(BIN_DIR)/benchstat -format csv -col /triggers -row /columns $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialInserts.txt > $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialInserts-cmp.csv 111 | $(BENCH_TRIGGER_SERIAL_WIDE_UPDATE_REPORT): $(BENCH_TRIGGER_SERIAL) | $(BIN_DIR) 112 | @ $(BIN_DIR)/benchstat -format csv -col /triggers -row /columns $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialUpdates.txt > $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialUpdates-cmp.csv 113 | $(BENCH_TRIGGER_SERIAL_WIDE_DELETE_REPORT): $(BENCH_TRIGGER_SERIAL) | $(BIN_DIR) 114 | @ $(BIN_DIR)/benchstat -format csv -col /triggers -row /columns $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialDeletes.txt > $(BENCH_DIR)/BenchmarkTriggerLatencyWideTableSerialDeletes-cmp.csv 115 | $(BENCH_BLOB_REPORT): $(BENCH_BLOB) | $(BIN_DIR) 116 | @ $(BIN_DIR)/benchstat -format csv -col /size $(BENCH_DIR)/BenchmarkBlobEncoding.txt > $(BENCH_DIR)/BenchmarkBlobEncoding-cmp.csv 117 | 118 | images: $(IMAGES_PLOTS_RENDERED) 119 | 120 | $(IMAGES_DIR)/%.svg: $(IMAGES_DIR)/%.plot 121 | @ gnuplot $^ > $@ 122 | 123 | tools: | $(BIN_DIR) 124 | @ cd $(TOOLS_DIR) && GOBIN=$(BIN_DIR) $(GOCMD) install $(GOTOOLS) 125 | tools/update: 126 | @ cd $(TOOLS_DIR) && GOBIN=$(BIN_DIR) $(GOCMD) get -u 127 | @ cd $(TOOLS_DIR) && GOBIN=$(BIN_DIR) $(GOCMD) mod tidy 128 | 129 | $(BIN_DIR): 130 | @ mkdir -p $(BIN_DIR) 131 | 132 | generate: 133 | @ go generate ./... 134 | 135 | fmt: | $(BIN_DIR) 136 | @ GOFLAGS="$(GOFLAGS)" \ 137 | $(BIN_DIR)/goimports -w -v \ 138 | -local $(GOIMPORT_LOCAL) \ 139 | $(shell find . -type f -name '*.go' -not -path "./vendor/*") 140 | 141 | clean: clean/test clean/tools clean/build 142 | clean/build: 143 | @:$(call check_defined,BUILD_DIR) 144 | @ rm -rf "$(BUILD_DIR)" 145 | @:$(call check_defined,DIST_DIR) 146 | @ rm -rf "$(DIST_DIR)" 147 | clean/test: 148 | @:$(call check_defined,COVER_DIR) 149 | @ rm -rf "$(COVER_DIR)" 150 | clean/tools: 151 | @:$(call check_defined,BIN_DIR) 152 | @ rm -rf "$(BIN_DIR)" 153 | 154 | 155 | $(COVERAGE_UNIT): $(shell find . -type f -name '*.go' -not -path "./vendor/*") | $(COVER_DIR) 156 | @ $(GOCMD) test \ 157 | -v \ 158 | -cover \ 159 | -race \ 160 | -coverprofile="$(COVERAGE_UNIT)" \ 161 | ./... 162 | 163 | $(COVER_DIR)/%.interchange: $(COVER_DIR)/%.out 164 | @ GOFLAGS="$(GOFLAGS)" \ 165 | $(BIN_DIR)/gocov convert $< > $@ 166 | 167 | $(COVER_DIR)/%.xml: $(COVER_DIR)/%.interchange 168 | @ cat $< | \ 169 | GOFLAGS="$(GOFLAGS)" \ 170 | $(BIN_DIR)/gocov-xml > $@ 171 | 172 | $(COVER_DIR)/%.html: $(COVER_DIR)/%.interchange 173 | @ cat $< | \ 174 | GOFLAGS="$(GOFLAGS)" \ 175 | $(BIN_DIR)/gocov-html > $@ 176 | 177 | $(COVERAGE_COMBINED): 178 | @ GOFLAGS="$(GOFLAGS)" \ 179 | $(BIN_DIR)/gocovmerge $(COVER_DIR)/*.out > $(COVERAGE_COMBINED) 180 | 181 | $(COVER_DIR): | $(BUILD_DIR) 182 | @ mkdir -p $(COVER_DIR) 183 | 184 | $(BENCH_DIR): | $(BUILD_DIR) 185 | @ mkdir -p $(BENCH_DIR) 186 | 187 | $(BUILD_DIR): 188 | @ mkdir -p $(BUILD_DIR) 189 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SQLite-CDC 2 | 3 | **A change-data-capture engine for SQLite.** 4 | 5 | ## Overview 6 | 7 | This project implements change-data-capture, or CDC, for SQLite databases. The 8 | current implementation works by installing triggers on target tables that record 9 | the before and after states of a row modification to a change log table. The 10 | engine then watches the SQLite files for changes, reads batches of change 11 | records, and then sends them to a configurable destination for processing. 12 | 13 | Each entry in the log contains the following information: 14 | 15 | - Timestamp of change 16 | - Type of change (INSERT, UPDATE, or DELETE) 17 | - Name of source table 18 | - Before (when type is UPDATE or DELETE) 19 | - After (when type is INSERT or UPDATE) 20 | 21 | The before and after fields contain the values of all columns from the source 22 | table before and after the row was modified. 23 | 24 | The engine can read up to a configurable number of records as a batch and 25 | deliver these changes to some processing logic or destination. These change 26 | records can then be used to build higher level replication systems such as 27 | read-only replicas of SQLite data in a networked database such as MySQL, 28 | client-side caches of data with automated invalidation, real-time ETL and data 29 | warehousing, or even more advanced systems such as bidirectional replication 30 | between independent databases. 31 | 32 | ## Project Status 33 | 34 | Consider this project in alpha or a work-in-progress state. 35 | 36 | My original plan was to put this in production by the end of 2024 but I've 37 | missed that goal. As of writing, I've only tested the project on small scale, 38 | non-production workloads. I've also added some benchmarks to help set 39 | expectations or estimates around performance impacts but they aren't guaranteed 40 | to be accurate for production workloads. Use at your own risk. 41 | 42 | If you end up doing your own testing or experimentation with this project then 43 | please let me know your results. I'd be grateful for any success or failure you 44 | can share. 45 | 46 | ## Adding CDC To A Database 47 | 48 | The included `cmd/sqlite-cdc-setup` command can be used to add or remove CDC 49 | support from a database. This command attaches triggers to a target set of 50 | tables that automatically populate a change log table named `__cdc_log` by 51 | default. 52 | 53 | ```bash 54 | go run cmd/sqlite-cdc-setup/main.go --db test.sqlite --table my_table --setup 55 | go run cmd/sqlite-cdc-setup/main.go --db test.sqlite --table my_table --teardown 56 | ``` 57 | 58 | Use the `--help` flag to get a summary of the options. For convenience: 59 | ``` 60 | Usage of sqlite-cdc-setup: 61 | -blobs 62 | Enable support for blobs 63 | -db string 64 | SQLite file path 65 | -db-params string 66 | SQLite connection parameters. See for parameter syntax (default "_pragma=journal_mode(wal)&_pragma=busy_timeout(5000)") 67 | -disable-subsec 68 | Disable subsecond time resolution to support old clients 69 | -log-table string 70 | Name of the table to store CDC log entries (default "__cdc_log") 71 | -setup 72 | Perform initial setup of the database for CDC before starting in any mode 73 | -table value 74 | A table name to monitor. Can be specified multiple times 75 | -teardown 76 | Perform teardown of the CDC tables and triggers. Setting the teardown flag prevents any other action. The process will perform the teardown and then exit 77 | -version 78 | Print version and exit 79 | ``` 80 | 81 | ### Running The Example Processor 82 | 83 | The easiest way to test the changelog table and process the contents is to use 84 | the `cmd/sqlite-cdc` command. To see CDC events streamed to the console use 85 | something like: 86 | 87 | ```bash 88 | go run cmd/sqlite-cdc/main.go --output json --db test.sqlite --table my_table --bootstrap --cdc 89 | ``` 90 | 91 | This will begin streaming JSON encoded change events to the console starting 92 | with an event for every existing row followed by events from the changelog 93 | table: 94 | 95 | ```json 96 | { 97 | "table":"my_table", 98 | "timestamp":"2024-05-05T19:03:11.5681173-05:00", 99 | "operation":"INSERT", 100 | "before":null, 101 | "after":{"email":"noreply@example.com","favorite_color":"blue","id":1,"username":"example"} 102 | } 103 | 104 | { 105 | "table":"my_table", 106 | "timestamp":"2024-05-06T00:03:34Z", 107 | "operation":"UPDATE", 108 | "before":{"email":"noreply@example.com","favorite_color":"blue","id":1,"username":"example"}, 109 | "after":{"email":"noreply@example.com","favorite_color":"green","id":1,"username":"example"} 110 | } 111 | 112 | { 113 | "table":"my_table", 114 | "timestamp":"2024-05-06T00:04:07Z", 115 | "operation":"DELETE", 116 | "before":{"email":"noreply@example.com","favorite_color":"green","id":1,"username":"example"}, 117 | "after":null 118 | } 119 | ``` 120 | 121 | Use the `--help` flag to see all the available options. Here's a snapshot of 122 | what's available: 123 | ``` 124 | Usage of sqlite-cdc: 125 | -batch-size int 126 | The max number of log entries to collect in each batch (default 256) 127 | -blobs 128 | Enable support for blobs 129 | -bootstrap 130 | Read all existing records as if they are inserts and then exit. If this flag is set in addition to the cdc flag the cdc mode will begin after the bootstrap is complete 131 | -cdc 132 | Run a continuous extraction of the CDC log. 133 | -db string 134 | SQLite file path 135 | -db-params string 136 | SQLite connection parameters. See for parameter syntax (default "_pragma=journal_mode(wal)&_pragma=busy_timeout(5000)") 137 | -disable-subsec 138 | Disable subsecond time resolution to support old clients 139 | -log-table string 140 | Name of the table to store CDC log entries (default "__cdc_log") 141 | -output string 142 | Write destination for log entries. Valid options are - for simplified stdout, json for full JSON stdout, or an HTTP URL that will receive POST requests containing batches of log entries. See for more. (default "-") 143 | -table value 144 | A table name to monitor. Can be specified multiple times 145 | -version 146 | Print version and exit 147 | ``` 148 | 149 | ### Limits Of The Provided Processor 150 | 151 | The example processor exists primarily for demonstrations and for quickly 152 | performing an initial test of the system. In most cases, I expect that you will 153 | use the example as a template for creating your own custom build that integrates 154 | your own event handling logic and better integrates with your runtime 155 | environment by, for example, adding instrumentation. 156 | 157 | As is, the example processor can perform all the CDC responsibilities but 158 | supports only three outputs: 159 | 160 | - Simplified logging to STDOUT (ex: `: `) 161 | - JSON logging to STDOUT (shown in the previous section) 162 | - HTTP POST 163 | 164 | If you absolutely must use the example as-is then the HTTP POST output enables 165 | you to redirect batches of CDC events to a specified URL. That receiver can then 166 | implement any logic you need. Note that the current version of the project does 167 | not include any form of built-in retries so your POST endpoint must implement 168 | those internally. See 169 | for details on the API contract. 170 | 171 | ## Extending The Engine 172 | 173 | Custom builds need to provide two components: an event handler and a database 174 | client. 175 | 176 | The database client can be any SQLite driver for Go that implements the standard 177 | library `databases/sql` driver interface. See 178 | for benchmarks covering most of 179 | the popular drivers available for Go. 180 | 181 | The event handler is anything that implements: 182 | ```golang 183 | type ChangesHandler interface { 184 | HandleChanges(ctx context.Context, changes Changes) error 185 | } 186 | ``` 187 | Each call to `HandleChanges` receives a batch of change records. Each batch of 188 | changes given to your event handler will be in change order. The engine makes 189 | only one call to the handler at a time so that batches are also strictly 190 | processed in change order. If the handler returns an error then the engine 191 | considers this a critical fault and shuts down. If the handler returns success 192 | then the entire batch is considered successful and the relevant changes are 193 | removed from the log. Your handler is responsible for durability or reliability 194 | behaviors such as retries with backoff. 195 | 196 | ### API Documentation 197 | 198 | 199 | 200 | ## Limitations 201 | 202 | ### BLOB Support 203 | 204 | By default, the engine ignores all columns with a BLOB type. BLOB support can be 205 | enabled by using the `WithBlobSupport(true)` option when constructing the engine 206 | or the `--blobs` flag when using the example executable. However, BLOB support 207 | is limited even when enabled. 208 | 209 | When enabled, all values in a BLOB typed column are converted to an upper-case, 210 | hexadecimal representation of the BLOB's value using the SQLite 211 | [hex](https://www.sqlite.org/lang_corefunc.html#hex) function. This is done to 212 | make the values representable in JSON because JSON has no native expression of 213 | raw bytes. 214 | 215 | Note, however, that SQLite distinguishes between column type and data type. By 216 | default, column types are not enforced and SQLite is capable of storing any data 217 | type within any column. For example, a column may be defined with type INTEGER 218 | but that does not prevent a client from inserting values that are of type TEXT 219 | or BLOB. 220 | 221 | This project cannot handle BLOB type data in a non-BLOB type column. I suggest 222 | using [STRICT tables](https://www.sqlite.org/stricttables.html) to avoid this 223 | situation. 224 | 225 | Note also that larger blob values take longer to convert to hex and can result 226 | in poorer performance. 227 | 228 | ### Very Wide Tables 229 | 230 | The engine uses SQLite functions to produce JSON representations of a record. 231 | SQLite has a hard limit of 127 arguments per function call. This equates to a 232 | limit of 63 columns convertible per function call. 233 | 234 | All rows from tables with less than 64 columns are converted to JSON in a single 235 | function call. Tables with 64 or more columns engage in an alternative 236 | conversion process. The wide table conversion process groups columns into sets 237 | of 63, generates a JSON object from each set, and performs a series of JSON 238 | patches to merge each column set with the previously generated JSON object. 239 | 240 | Wide table conversion is limited to 1000 columns because this is the default 241 | stack depth limit for SQLite. Wider tables result in poorer performance. 242 | 243 | ### WAL Mode Required 244 | 245 | Running `sqlite-cdc` on your database necessarily adds reads and writes that are 246 | concurrent with your existing usage. Practically, enabling WAL mode is the only 247 | way for this to work. 248 | 249 | ### Clients With Version Less Than 3.42.0 250 | 251 | The log table contains a timestamp that represents the time of change. SQLite 252 | datetime resolution is second precision by default. Version 3.42.0 added a 253 | `subsecond` modifier to time functions that enables millisecond precision. 254 | 255 | If any of your clients are older than 3.42.0 then you must use the 256 | `WithoutSubsecondTime` option when constructing an engine or the 257 | `--disable-subsec` flag in the example executable. 258 | 259 | It's important to note that the version is associated with the client and not 260 | the database file. It is possible to have both old and new clients operating on 261 | the same database file. The lowest versioned client determines the limitations. 262 | 263 | ### Clients With Version Less Than 3.38.0 264 | 265 | Any clients older than 3.38.0 must be compiled with JSON support enabled. This 266 | is enabled by default on 3.38.0 and newer clients. 267 | 268 | ## Why Not Use The Session API? 269 | 270 | SQLite can be built with an extension called 271 | [session](https://sqlite.org/sessionintro.html) that provides an API for 272 | recording database changes and formatting them into change sets. The resulting 273 | change sets contain very similar information to the current design of the change 274 | log that this project uses. 275 | 276 | On the surface, the sessions extension appears to be a great fit for CDC. 277 | Starting a session begins a capture of all changes to a select set of tables. 278 | Each change record describes modifications to a table row. Those change records 279 | accumulate in a log that can be fetched at any time. This is conceptually the 280 | same as what this project does. 281 | 282 | However, the sessions extension has three notable behaviors that make it the 283 | wrong choice for general purpose CDC. The first is that session change records 284 | are only materialized when reading the log and only one change is generated per 285 | row that was modified. As a result, sessions are a poor way to receive real-time 286 | change notifications and are only capable of producing the final state of a 287 | record rather than a series of interim changes. The second behavior is that the 288 | session log can only be fetched as a whole but never resets. As a result, the 289 | session log grows unbounded and there is no mechanism to filter out already 290 | handled changes. The third behavior is that a session is not necessarily 291 | persisted beyond the lifetime of the database connection it's associated with. 292 | As a result, there is no way to restore CDC from the last known position if the 293 | system exited for any reason. 294 | 295 | The sessions extension appears to be designed to operate in a specific kind of 296 | environment where SQLite is being used as a data structure rather than a typical 297 | SQL database. Sessions work best when more tightly integrated into application 298 | logic and aren't well suited for this project's "bolt-on" or sidecar model. 299 | 300 | ## Performance Impacts Of The CDC Triggers 301 | 302 | I don't yet have any production performance metrics to share. Until then, the 303 | code includes benchmarks that attempt to measure the performance impacts of the 304 | change capture triggers and the different ways they manage or encode data. The 305 | benchmarks are run with the following pragmas: 306 | 307 | - journal_mode(wal) 308 | - busy_timeout(5000) 309 | - synchronous(normal) 310 | - foreign_keys(on) 311 | 312 | You can run the suite of benchmarks using either standard Go tooling or the 313 | included `make benchmarks` rule. The Makefile rule generates a set of benchmark 314 | outputs in `.build/.bench/` that contain 20 runs of each benchmark. The `make 315 | benchmark-reports` rule will generate a comparative analysis of the contents of 316 | each output file as a CSV. Note that the full suite of benchmarks can take 317 | potentially more than an hour to complete. 318 | 319 | The results below are based on running the benchmarks on a local workstation. 320 | The workstation was not dedicated to the benchmarks and subject to jitter. I've 321 | included the percent differences but not the absolute values because the 322 | absolute values will differ based on hardware but the percent differences should 323 | be somewhat consistent across different hardware configurations. For a sense of 324 | scale, the absolute values for individual operations without triggers, in my 325 | test runs, were in the magnitude of 10s of microseconds. 326 | 327 | My personal interpretation of the overall results is that the CDC triggers add 328 | marginal write performance overhead for the majority of use cases. 329 | 330 | ### Simple Tables 331 | 332 | The "simple" table benchmarks are run without concurrency and compare the cost 333 | of insert, update, and delete statements. The tables used in these benchmarks 334 | are defined with all integer columns, an integer primary key, and without ROWID. 335 | The number of columns never exceeds 63 which is the maximum count that can be 336 | converted to a CDC record by the triggers in a single step. 337 | 338 | | Columns | Insert (% Difference) | Update (% Difference) | Delete (% Difference) | 339 | | ------- | ------------- | ------------- | ------------- | 340 | |2 | 97% | 100% | 113% | 341 | |4 | 96% | 96% | 105% | 342 | |8 | 93% | 99% | 111% | 343 | |16 | 99% | 111% | 127% | 344 | |32 | 106% | 158% | 153% | 345 | |64 | 105% | 179% | 203% | 346 | 347 | ![A graph showing the growth of performance impacts of the CDC triggers on inserts, updates, and deletes as the number of columns to merge grows.](./images/simple-tables.svg) 348 | 349 | For most table sizes the average overhead appears to be around 100% compared to 350 | a single operation which matches the expectations of the triggers adding an 351 | additional insert for each modifying operation. The overhead of the JSON 352 | encoding of the data appears minimal for small table but grows with column 353 | count. 354 | 355 | ### Wide Tables 356 | 357 | The wide table benchmarks are the same as the simple table benchmarks but the 358 | tables are always larger than 63 columns. This engages a workaround for the max 359 | SQLite function parameter limit which is 127. To generate JSON objects with more 360 | than 63 key/value pairs the system computes objects in batches of up to 63 361 | columns and then merges the results using the json_patch function. The 362 | performance impacts grow relative to the number of objects that must be merged. 363 | Here's a table and graph illustrating the performance impacts of wide tables: 364 | 365 | | Columns | Insert (% Difference) | Update (% Difference) | Delete (% Difference) | 366 | | ------- | ------------- | ------------- | ------------- | 367 | | 64 | 119% | 225% | 251% | 368 | | 128 | 195% | 335% | 556% | 369 | | 256 | 412% | 696% | 1,434% | 370 | | 512 | 1,011% | 1,558% | 4,948% | 371 | | 1000 | 3,263% | 3,872% | 33,814% | 372 | 373 | ![A graph showing the growth of performance impacts as the number of objects to merge grows.](./images/wide-tables.svg) 374 | 375 | The overhead for wide tables appears to grow linearly with the number of objects 376 | that must be merged with the exception of deletes with 1000 columns. I don't 377 | have an explanation for that discontinuity yet. 378 | 379 | ### BLOB Data 380 | 381 | All BLOB data must be encoded to be JSON compatible. The impact of the encoding 382 | process grows with the size of the BLOB. Here's a table and graph showing the 383 | growth of encoding time as the byte size increases: 384 | 385 | | Blob Size | Percent Slower Than 16 Bytes | 386 | | ------- | ------------- | 387 | | 16 | 0% | 388 | | 64 | 67% | 389 | | 256 | 64% | 390 | | 1024 | 69% | 391 | | 4096 | 74% | 392 | | 16384 | 85% | 393 | | 32768 | 102% | 394 | | 65536 | 126% | 395 | | 131072 | 187% | 396 | | 262144 | 287% | 397 | | 524288 | 510% | 398 | | 1048576 | 937% | 399 | 400 | ![A graph showing the growth of performance impacts as the size of BLOB data grows.](./images/blob-data.svg) 401 | 402 | For a sense of scale, the encoding of 16 bytes took an average of about 46 403 | microseconds on my particular workstation. 404 | 405 | ## Compatibility With Other Replication Tools 406 | 407 | I have not yet tested with any of the SQLite replication tools I know about: 408 | 409 | - 410 | - 411 | - 412 | - 413 | 414 | This tool should be compatible with any SQLite implementation or modification 415 | that supports using a standard SQLite client. This means that `rqlite` is not 416 | compatible because it requires all database writes to happen through an HTTP API 417 | rather than a SQLite client. SQLite derivatives that work through a custom 418 | filesystem, such as `litefs` may work but likely cannot rely on the integrated 419 | filesystem watcher to detect SQLite file changes. The workaround is to use the 420 | time interval based polling for changes that is included. 421 | 422 | The `litestream` project _should_ be compatible because it uses a standard 423 | filesystem and supports arbitrary SQLite clients. I also suspect `marmot` is 424 | compatible but redundant because it implements a very similar trigger based 425 | system to this project. 426 | 427 | ## Developing 428 | 429 | This project only requires a Go installation to work on and is compatible with 430 | standard Go tooling. For example, you can run `go test ./...` to run the tests. 431 | 432 | For convenience, I've included a devcontainer configuration and a `Makefile` 433 | with the following rules: 434 | 435 | - build 436 | - Create executables for both CLIs 437 | - test 438 | - Run all test suites 439 | - test/lint 440 | - Run static analysis 441 | - test/unit 442 | - Run unit tests 443 | - test/coverage 444 | - Generate a coverage report 445 | - benchmarks 446 | - Run all benchmarks (NOTE: This can take a long time) 447 | - benchmark-reports 448 | - Generate comparison reports from benchmarks 449 | - tools 450 | - Download any Go tooling used for project automation 451 | - tools/update 452 | - Update all tools to the latest version 453 | - fmt 454 | - Run the project's auto formatter 455 | - clean 456 | - Remove all build and test artifacts 457 | - clean/test 458 | - Remove test artifacts and coverage reports 459 | - clean/tools 460 | - Remove any downloaded tools 461 | 462 | ## License 463 | 464 | The code for this project is licensed under the Apache 2.0 terms. 465 | 466 | See `LICENSE` for full details. 467 | 468 | ## Prior Art 469 | 470 | This project is inspired by and 471 | . 472 | -------------------------------------------------------------------------------- /cdc.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package cdc 5 | 6 | import ( 7 | "context" 8 | ) 9 | 10 | // CDC represents a complete implementation of a CDC for SQLite. 11 | // 12 | // Installer methods should not be called if any Engine methods are running. 13 | // Only one Engine method may be called per instance of CDC with the exception 14 | // of Close which may be called at any time to end a running operation. The 15 | // instance is no longer valid after an Engine method completes. 16 | type CDC interface { 17 | Installer 18 | Engine 19 | } 20 | 21 | // Installer is an optional set of methods that a CDC implementation may offer. 22 | // 23 | // These methods must be present but may be no-ops if an implementation has no 24 | // setup or teardown requirements. Generally, the setup method should be called 25 | // before any Engine methods. 26 | type Installer interface { 27 | // Perform any setup necessary to support CDC. 28 | Setup(ctx context.Context) error 29 | // Perform any teardown necessary to remove CDC. 30 | Teardown(ctx context.Context) error 31 | } 32 | 33 | // Engine represents the change handling portion of a CDC implementation. 34 | // 35 | // Only one of these methods may be called per instance of CDC with the 36 | // exception of Close which may be called at any time to end a running 37 | // operation. Once any of these methods returns then the instance is no longer 38 | // valid. 39 | type Engine interface { 40 | // CDC-only mode processes changes made to the database. 41 | // 42 | // This mode only operates on changes that have been captured by the CDC 43 | // implementation and does not process any unchanged records. This means 44 | // that CDC-only mode does not process existing, unmodified records like 45 | // Bootstrap does. 46 | // 47 | // This mode runs until stopped or it encounters an error. 48 | CDC(ctx context.Context) error 49 | // Bootstrap-only mode processes all existing records in the database. 50 | // 51 | // This mode only operates on the current state of existing records and does 52 | // not process any captured changes. All existing records are processed as 53 | // though they are INSERT operations. For convenience, the first change 54 | // handled by bootstrap mode for any table is always a BOOTSTRAP operation 55 | // with an empty before and after image. This signal may be used in systems 56 | // that manage state based on the stream of captured changes to indicate 57 | // that the previously accumulated state is likely invalid and must be 58 | // re-built from the bootstrap data. 59 | // 60 | // This mode runs until it completes a scan of each table and then exits. 61 | Bootstrap(ctx context.Context) error 62 | // Bootstrap-and-CDC mode is a combination of the bootstrap and CDC modes. 63 | // 64 | // This mode starts with a bootstrap and then enters CDC mode once it is 65 | // complete. Any changes to data during hte bootstrap are captured and 66 | // emitted once the engine enters CDC mode. 67 | // 68 | // This mode runs until stopped or it encounters an error. 69 | BootstrapAndCDC(ctx context.Context) error 70 | // Stop any ongoing CDC operations and shut down. 71 | Close(ctx context.Context) error 72 | } 73 | -------------------------------------------------------------------------------- /change.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package cdc 5 | 6 | import ( 7 | "encoding/json" 8 | "fmt" 9 | "time" 10 | ) 11 | 12 | type Change struct { 13 | Table string `json:"table"` 14 | Timestamp time.Time `json:"timestamp"` 15 | Operation Operation `json:"operation"` 16 | Before json.RawMessage `json:"before"` 17 | After json.RawMessage `json:"after"` 18 | } 19 | 20 | type Operation string 21 | 22 | const ( 23 | Insert Operation = "INSERT" 24 | Update Operation = "UPDATE" 25 | Delete Operation = "DELETE" 26 | Bootstrap Operation = "BOOTSTRAP" 27 | ) 28 | 29 | func (c Change) String() string { 30 | return fmt.Sprintf("%s: %s %s", c.Timestamp.Format(time.RFC3339Nano), c.Table, c.Operation) 31 | } 32 | 33 | type Changes []Change 34 | -------------------------------------------------------------------------------- /cmd/sqlite-cdc-setup/main.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package main 5 | 6 | import ( 7 | "context" 8 | "database/sql" 9 | "flag" 10 | "fmt" 11 | "io" 12 | "log" 13 | "os" 14 | "os/signal" 15 | "strings" 16 | "time" 17 | 18 | _ "modernc.org/sqlite" 19 | 20 | cdc "github.com/kevinconway/sqlite-cdc" 21 | "github.com/kevinconway/sqlite-cdc/handlers" 22 | ) 23 | 24 | type strList []string 25 | 26 | func (l *strList) String() string { 27 | return strings.Join(*l, ",") 28 | } 29 | 30 | func (l *strList) Set(s string) error { 31 | *l = append(*l, s) 32 | return nil 33 | } 34 | 35 | type flags struct { 36 | dbFile string 37 | dbParams string 38 | tables strList 39 | logTableName string 40 | setup bool 41 | teardown bool 42 | disableSubsec bool 43 | blobs bool 44 | version bool 45 | } 46 | 47 | var ( 48 | version = "source" //nolint:gochecknoglobals 49 | commit = "unknown" //nolint:gochecknoglobals 50 | date = time.Now().Format(time.RFC3339Nano) //nolint:gochecknoglobals 51 | ) 52 | 53 | func main() { 54 | ctx, cancel := context.WithCancel(context.Background()) 55 | defer cancel() 56 | ctx, _ = signal.NotifyContext(ctx, os.Interrupt) 57 | 58 | f := flags{} 59 | fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError) 60 | fs.StringVar(&f.dbFile, "db", "", "SQLite file path") 61 | fs.StringVar(&f.dbParams, "db-params", "_pragma=journal_mode(wal)&_pragma=busy_timeout(5000)", "SQLite connection parameters. See for parameter syntax") 62 | fs.StringVar(&f.logTableName, "log-table", "__cdc_log", "Name of the table to store CDC log entries") 63 | fs.Var(&f.tables, "table", "A table name to monitor. Can be specified multiple times") 64 | fs.BoolVar(&f.setup, "setup", false, "Perform initial setup of the database for CDC before starting in any mode") 65 | fs.BoolVar(&f.teardown, "teardown", false, "Perform teardown of the CDC tables and triggers. Setting the teardown flag prevents any other action. The process will perform the teardown and then exit") 66 | fs.BoolVar(&f.disableSubsec, "disable-subsec", false, "Disable subsecond time resolution to support old clients") 67 | fs.BoolVar(&f.blobs, "blobs", false, "Enable support for blobs") 68 | fs.BoolVar(&f.version, "version", false, "Print version and exit") 69 | if err := fs.Parse(os.Args[1:]); err != nil { 70 | log.Fatalln(err) 71 | } 72 | 73 | if f.version { 74 | fmt.Printf("%s version:%s commit:(%s)\n", os.Args[0], version, commit) 75 | t, err := time.Parse(time.RFC3339Nano, date) 76 | if err != nil { 77 | fmt.Printf("Built on %s\n", date) 78 | return 79 | } 80 | fmt.Printf("Built on %s\n", t.Format(time.RubyDate)) 81 | return 82 | } 83 | 84 | if len(f.tables) < 1 { 85 | log.Fatalln("at least one table must be specified for setup or teardown operations") 86 | } 87 | if f.setup && f.teardown { 88 | log.Fatalln("setup and teardown flags are mutually exclusive") 89 | } 90 | 91 | dsn := fmt.Sprintf("%s?%s", f.dbFile, f.dbParams) 92 | db, err := sql.Open("sqlite", dsn) 93 | if err != nil { 94 | log.Fatalln(err) 95 | } 96 | defer db.Close() 97 | 98 | handler := &handlers.STDIO{Output: io.Discard} 99 | c, err := cdc.NewTriggerEngine(db, handler, f.tables, 100 | cdc.WithLogTableName(f.logTableName), 101 | cdc.WithoutSubsecondTime(f.disableSubsec), 102 | cdc.WithBlobSupport(f.blobs), 103 | ) 104 | if err != nil { 105 | log.Fatalln(err) 106 | } 107 | 108 | if f.setup { 109 | if err = c.Setup(ctx); err != nil { 110 | log.Fatalln(err) 111 | } 112 | return 113 | } 114 | 115 | if f.teardown { 116 | if err = c.Teardown(ctx); err != nil { 117 | log.Fatalln(err) 118 | } 119 | return 120 | } 121 | 122 | <-ctx.Done() 123 | } 124 | -------------------------------------------------------------------------------- /cmd/sqlite-cdc/main.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package main 5 | 6 | import ( 7 | "context" 8 | "database/sql" 9 | "flag" 10 | "fmt" 11 | "log" 12 | "net/http" 13 | "os" 14 | "os/signal" 15 | "strings" 16 | "time" 17 | 18 | _ "modernc.org/sqlite" 19 | 20 | cdc "github.com/kevinconway/sqlite-cdc" 21 | "github.com/kevinconway/sqlite-cdc/handlers" 22 | ) 23 | 24 | type strList []string 25 | 26 | func (l *strList) String() string { 27 | return strings.Join(*l, ",") 28 | } 29 | 30 | func (l *strList) Set(s string) error { 31 | *l = append(*l, s) 32 | return nil 33 | } 34 | 35 | type flags struct { 36 | dbFile string 37 | dbParams string 38 | tables strList 39 | logTableName string 40 | cdc bool 41 | bootstrap bool 42 | destination string 43 | batchSize int 44 | disableSubsec bool 45 | blobs bool 46 | version bool 47 | } 48 | 49 | var ( 50 | version = "source" //nolint:gochecknoglobals 51 | commit = "unknown" //nolint:gochecknoglobals 52 | date = time.Now().Format(time.RFC3339Nano) //nolint:gochecknoglobals 53 | ) 54 | 55 | func main() { 56 | ctx, cancel := context.WithCancel(context.Background()) 57 | defer cancel() 58 | ctx, _ = signal.NotifyContext(ctx, os.Interrupt) 59 | 60 | f := flags{} 61 | fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError) 62 | fs.StringVar(&f.dbFile, "db", "", "SQLite file path") 63 | fs.StringVar(&f.dbParams, "db-params", "_pragma=journal_mode(wal)&_pragma=busy_timeout(5000)", "SQLite connection parameters. See for parameter syntax") 64 | fs.StringVar(&f.logTableName, "log-table", "__cdc_log", "Name of the table to store CDC log entries") 65 | fs.Var(&f.tables, "table", "A table name to monitor. Can be specified multiple times") 66 | fs.BoolVar(&f.bootstrap, "bootstrap", false, "Read all existing records as if they are inserts and then exit. If this flag is set in addition to the cdc flag the cdc mode will begin after the bootstrap is complete") 67 | fs.BoolVar(&f.cdc, "cdc", false, "Run a continuous extraction of the CDC log.") 68 | fs.StringVar(&f.destination, "output", "-", "Write destination for log entries. Valid options are - for simplified stdout, json for full JSON stdout, or an HTTP URL that will receive POST requests containing batches of log entries. See for more.") 69 | fs.IntVar(&f.batchSize, "batch-size", 256, "The max number of log entries to collect in each batch") 70 | fs.BoolVar(&f.disableSubsec, "disable-subsec", false, "Disable subsecond time resolution to support old clients") 71 | fs.BoolVar(&f.blobs, "blobs", false, "Enable support for blobs") 72 | fs.BoolVar(&f.version, "version", false, "Print version and exit") 73 | if err := fs.Parse(os.Args[1:]); err != nil { 74 | log.Fatalln(err) 75 | } 76 | 77 | if f.version { 78 | fmt.Printf("%s version:%s commit:(%s)\n", os.Args[0], version, commit) 79 | t, err := time.Parse(time.RFC3339Nano, date) 80 | if err != nil { 81 | fmt.Printf("Built on %s\n", date) 82 | return 83 | } 84 | fmt.Printf("Built on %s\n", t.Format(time.RubyDate)) 85 | return 86 | } 87 | 88 | if len(f.tables) < 1 && f.bootstrap { 89 | log.Fatalln("at least one table must be specified if bootstrap mode is enabled") 90 | } 91 | 92 | dsn := fmt.Sprintf("%s?%s", f.dbFile, f.dbParams) 93 | db, err := sql.Open("sqlite", dsn) 94 | if err != nil { 95 | log.Fatalln(err) 96 | } 97 | defer db.Close() 98 | 99 | var handler cdc.ChangesHandler 100 | switch f.destination { 101 | case "json": 102 | handler = &handlers.Debug{Output: os.Stdout} 103 | case "-": 104 | handler = &handlers.STDIO{Output: os.Stdout} 105 | default: 106 | handler = &handlers.HTTPBasicPOST{ 107 | Client: http.DefaultClient, 108 | Endpoint: f.destination, 109 | } 110 | } 111 | 112 | c, err := cdc.NewTriggerEngine(db, handler, f.tables, 113 | cdc.WithMaxBatchSize(f.batchSize), 114 | cdc.WithLogTableName(f.logTableName), 115 | cdc.WithoutSubsecondTime(f.disableSubsec), 116 | cdc.WithBlobSupport(f.blobs), 117 | ) 118 | if err != nil { 119 | log.Fatalln(err) 120 | } 121 | 122 | switch { 123 | case f.bootstrap && !f.cdc: 124 | if err = c.Bootstrap(ctx); err != nil { 125 | log.Fatalln(err) 126 | } 127 | return 128 | case f.cdc && !f.bootstrap: 129 | go func() { 130 | defer cancel() 131 | if err = c.CDC(ctx); err != nil { 132 | log.Fatalln(err) 133 | } 134 | }() 135 | case f.cdc && f.bootstrap: 136 | go func() { 137 | defer cancel() 138 | if err = c.BootstrapAndCDC(ctx); err != nil { 139 | log.Fatalln(err) 140 | } 141 | }() 142 | case !f.cdc && !f.bootstrap: 143 | log.Fatalln("at least one of cdc or bootstrap must be set") 144 | default: 145 | panic("unreachable") 146 | } 147 | 148 | <-ctx.Done() 149 | } 150 | -------------------------------------------------------------------------------- /dbmeta.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package cdc 5 | 6 | import ( 7 | "database/sql" 8 | "fmt" 9 | "strings" 10 | ) 11 | 12 | type dbMeta struct { 13 | Filename string 14 | WAL bool 15 | ExtraFiles []string 16 | Tables map[string]tableMeta 17 | } 18 | 19 | func newDBMeta(db *sql.DB) (*dbMeta, error) { 20 | var filename string 21 | var extraFiles []string 22 | if err := db.QueryRow("SELECT file FROM pragma_database_list WHERE name='main'").Scan(&filename); err != nil { 23 | return nil, fmt.Errorf("%w: failed to determine database filename", err) 24 | } 25 | 26 | var pragmaWAL string 27 | if err := db.QueryRow("PRAGMA journal_mode").Scan(&pragmaWAL); err != nil { 28 | return nil, fmt.Errorf("%w: failed to determine database journal mode", err) 29 | } 30 | wal := strings.ToLower(pragmaWAL) == "wal" 31 | if wal { 32 | extraFiles = append(extraFiles, filename+"-wal", filename+"-shm") 33 | } 34 | 35 | tables := make(map[string]tableMeta) 36 | rows, err := db.Query("SELECT name, wr FROM pragma_table_list WHERE schema='main' AND type='table'") 37 | if err != nil { 38 | return nil, fmt.Errorf("%w: failed to list tables", err) 39 | } 40 | defer rows.Close() 41 | for rows.Next() { 42 | var name string 43 | var wr bool 44 | if err := rows.Scan(&name, &wr); err != nil { 45 | return nil, fmt.Errorf("%w: failed to read table metadata", err) 46 | } 47 | table := tableMeta{ 48 | Name: name, 49 | WithoutRowID: wr, 50 | } 51 | cRows, err := db.Query("SELECT name, type, pk FROM pragma_table_info(?)", table.Name) 52 | if err != nil { 53 | return nil, fmt.Errorf("%w: failed to list table columns for %s", err, table.Name) 54 | } 55 | defer cRows.Close() 56 | for cRows.Next() { 57 | var name string 58 | var type_ string 59 | var pk int 60 | if err := cRows.Scan(&name, &type_, &pk); err != nil { 61 | return nil, fmt.Errorf("%w: failed to read table column metadata for %s", err, table.Name) 62 | } 63 | table.Columns = append(table.Columns, columnMeta{ 64 | Name: name, 65 | Type: type_, 66 | PK: pk, 67 | }) 68 | } 69 | if cRows.Err() != nil { 70 | return nil, fmt.Errorf("%w: failed to iterate table column metadata for %s", err, table.Name) 71 | } 72 | _ = cRows.Close() 73 | tables[table.Name] = table 74 | } 75 | if err := rows.Err(); err != nil { 76 | return nil, fmt.Errorf("%w: failed to iterate table metadata entries", err) 77 | } 78 | 79 | return &dbMeta{ 80 | Filename: filename, 81 | WAL: wal, 82 | Tables: tables, 83 | ExtraFiles: extraFiles, 84 | }, nil 85 | } 86 | 87 | type tableMeta struct { 88 | Name string 89 | WithoutRowID bool 90 | Columns []columnMeta 91 | } 92 | 93 | type columnMeta struct { 94 | Name string 95 | Type string 96 | PK int 97 | } 98 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/kevinconway/sqlite-cdc 2 | 3 | go 1.22.1 4 | 5 | require ( 6 | github.com/fsnotify/fsnotify v1.8.0 7 | github.com/stretchr/testify v1.9.0 8 | modernc.org/sqlite v1.34.4 9 | ) 10 | 11 | require ( 12 | github.com/davecgh/go-spew v1.1.1 // indirect 13 | github.com/dustin/go-humanize v1.0.1 // indirect 14 | github.com/google/uuid v1.6.0 // indirect 15 | github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect 16 | github.com/mattn/go-isatty v0.0.20 // indirect 17 | github.com/ncruces/go-strftime v0.1.9 // indirect 18 | github.com/pmezard/go-difflib v1.0.0 // indirect 19 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect 20 | golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 // indirect 21 | golang.org/x/sys v0.29.0 // indirect 22 | gopkg.in/yaml.v3 v3.0.1 // indirect 23 | modernc.org/gc/v3 v3.0.0-20250105121824-520be1a3aee6 // indirect 24 | modernc.org/libc v1.61.6 // indirect 25 | modernc.org/mathutil v1.7.1 // indirect 26 | modernc.org/memory v1.8.1 // indirect 27 | modernc.org/strutil v1.2.1 // indirect 28 | modernc.org/token v1.1.0 // indirect 29 | ) 30 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 4 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 5 | github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= 6 | github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= 7 | github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= 8 | github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= 9 | github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo= 10 | github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= 11 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 12 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 13 | github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= 14 | github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= 15 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 16 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 17 | github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= 18 | github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= 19 | github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= 20 | github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= 21 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 22 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 23 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= 24 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= 25 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 26 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 27 | golang.org/x/exp v0.0.0-20241215155358-4a5509556b9e h1:4qufH0hlUYs6AO6XmZC3GqfDPGSXHVXUFR6OND+iJX4= 28 | golang.org/x/exp v0.0.0-20241215155358-4a5509556b9e/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= 29 | golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 h1:yqrTHse8TCMW1M1ZCP+VAR/l0kKxwaAIqN/il7x4voA= 30 | golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= 31 | golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic= 32 | golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= 33 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 34 | golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= 35 | golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 36 | golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= 37 | golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 38 | golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= 39 | golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 40 | golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= 41 | golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= 42 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 43 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 44 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 45 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 46 | modernc.org/cc/v4 v4.20.0 h1:45Or8mQfbUqJOG9WaxvlFYOAQO0lQ5RvqBcFCXngjxk= 47 | modernc.org/cc/v4 v4.20.0/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ= 48 | modernc.org/cc/v4 v4.23.1 h1:WqJoPL3x4cUufQVHkXpXX7ThFJ1C4ik80i2eXEXbhD8= 49 | modernc.org/cc/v4 v4.24.2 h1:uektamHbSXU7egelXcyVpMaaAsrRH4/+uMKUQAQUdOw= 50 | modernc.org/ccgo/v4 v4.16.0 h1:ofwORa6vx2FMm0916/CkZjpFPSR70VwTjUCe2Eg5BnA= 51 | modernc.org/ccgo/v4 v4.16.0/go.mod h1:dkNyWIjFrVIZ68DTo36vHK+6/ShBn4ysU61So6PIqCI= 52 | modernc.org/ccgo/v4 v4.23.1 h1:N49a7JiWGWV7lkPE4yYcvjkBGZQi93/JabRYjdWmJXc= 53 | modernc.org/ccgo/v4 v4.23.5 h1:6uAwu8u3pnla3l/+UVUrDDO1HIGxHTYmFH6w+X9nsyw= 54 | modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE= 55 | modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ= 56 | modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw= 57 | modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU= 58 | modernc.org/gc/v2 v2.5.0 h1:bJ9ChznK1L1mUtAQtxi0wi5AtAs5jQuw4PrPHO5pb6M= 59 | modernc.org/gc/v2 v2.6.0 h1:Tiw3pezQj7PfV8k4Dzyu/vhRHR2e92kOXtTFU8pbCl4= 60 | modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI= 61 | modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4= 62 | modernc.org/gc/v3 v3.0.0-20241213165251-3bc300f6d0c9 h1:ovz6yUKX71igz2yvk4NpiCL5fvdjZAI+DhuDEGx1xyU= 63 | modernc.org/gc/v3 v3.0.0-20241213165251-3bc300f6d0c9/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4= 64 | modernc.org/gc/v3 v3.0.0-20250105121824-520be1a3aee6 h1:JoKwHjIFumiKrjMbp1cNbC5E9UyCgA/ZcID0xOWQ2N8= 65 | modernc.org/gc/v3 v3.0.0-20250105121824-520be1a3aee6/go.mod h1:LG5UO1Ran4OO0JRKz2oNiXhR5nNrgz0PzH7UKhz0aMU= 66 | modernc.org/libc v1.49.3 h1:j2MRCRdwJI2ls/sGbeSk0t2bypOG/uvPZUsGQFDulqg= 67 | modernc.org/libc v1.49.3/go.mod h1:yMZuGkn7pXbKfoT/M35gFJOAEdSKdxL0q64sF7KqCDo= 68 | modernc.org/libc v1.61.4 h1:wVyqEx6tlltte9lPTjq0kDAdtdM9c4JH8rU6M1ZVawA= 69 | modernc.org/libc v1.61.4/go.mod h1:VfXVuM/Shh5XsMNrh3C6OkfL78G3loa4ZC/Ljv9k7xc= 70 | modernc.org/libc v1.61.6 h1:L2jW0wxHPCyHK0YSHaGaVlY0WxjpG/TTVdg6gRJOPqw= 71 | modernc.org/libc v1.61.6/go.mod h1:G+DzuaCcReUYYg4nNSfigIfTDCENdj9EByglvaRx53A= 72 | modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= 73 | modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo= 74 | modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= 75 | modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= 76 | modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E= 77 | modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU= 78 | modernc.org/memory v1.8.1 h1:HS1HRg1jEohnuONobEq2WrLEhLyw8+J42yLFTnllm2A= 79 | modernc.org/memory v1.8.1/go.mod h1:ZbjSvMO5NQ1A2i3bWeDiVMxIorXwdClKE/0SZ+BMotU= 80 | modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4= 81 | modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= 82 | modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc= 83 | modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss= 84 | modernc.org/sqlite v1.29.8 h1:nGKglNx9K5v0As+zF0/Gcl1kMkmaU1XynYyq92PbsC8= 85 | modernc.org/sqlite v1.29.8/go.mod h1:lQPm27iqa4UNZpmr4Aor0MH0HkCLbt1huYDfWylLZFk= 86 | modernc.org/sqlite v1.34.2 h1:J9n76TPsfYYkFkZ9Uy1QphILYifiVEwwOT7yP5b++2Y= 87 | modernc.org/sqlite v1.34.2/go.mod h1:dnR723UrTtjKpoHCAMN0Q/gZ9MT4r+iRvIBb9umWFkU= 88 | modernc.org/sqlite v1.34.4 h1:sjdARozcL5KJBvYQvLlZEmctRgW9xqIZc2ncN7PU0P8= 89 | modernc.org/sqlite v1.34.4/go.mod h1:3QQFCG2SEMtc2nv+Wq4cQCH7Hjcg+p/RMlS1XK+zwbk= 90 | modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= 91 | modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= 92 | modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= 93 | modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= 94 | modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= 95 | modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= 96 | -------------------------------------------------------------------------------- /handler.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package cdc 5 | 6 | import "context" 7 | 8 | // ChangesHandler implementations are given batches of database changes. Each 9 | // batch of changes is in the same order as the CDC log table. Batches may 10 | // contain changes from multiple tables. 11 | // 12 | // If a handler returns an error then the entire batch is considered failed and 13 | // retried. If a handler returns nil then the entire batch is considered 14 | // successful and the relevant entries are removed from the CDC log table. 15 | type ChangesHandler interface { 16 | HandleChanges(ctx context.Context, changes Changes) error 17 | } 18 | 19 | // ChangesHandlerFunc is an adaptor to allow the use of ordinary functions as 20 | // ChangesHandler implementations. Note that you should not use this type 21 | // directly and should instead always target the ChangesHandler type. For 22 | // example, the appropriate use of this adaptor is: 23 | // 24 | // var handler ChangesHandler = ChangesHandlerFunc(func(changes Changes) error { 25 | // // handle changes 26 | // }) 27 | type ChangesHandlerFunc func(ctx context.Context, changes Changes) error 28 | 29 | func (fn ChangesHandlerFunc) HandleChanges(ctx context.Context, changes Changes) error { 30 | return fn(ctx, changes) 31 | } 32 | -------------------------------------------------------------------------------- /handlers/debug.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package handlers 5 | 6 | import ( 7 | "context" 8 | "encoding/json" 9 | "fmt" 10 | "io" 11 | 12 | cdc "github.com/kevinconway/sqlite-cdc" 13 | ) 14 | 15 | type Debug struct { 16 | Output io.Writer 17 | } 18 | 19 | func (d *Debug) HandleChanges(ctx context.Context, changes cdc.Changes) error { 20 | for _, change := range changes { 21 | b, err := json.Marshal(change) 22 | if err != nil { 23 | return fmt.Errorf("%w: failed to marshal changes to JSON", err) 24 | } 25 | fmt.Fprintln(d.Output, string(b)) 26 | } 27 | return nil 28 | } 29 | -------------------------------------------------------------------------------- /handlers/http.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package handlers 5 | 6 | import ( 7 | "bytes" 8 | "context" 9 | "encoding/json" 10 | "fmt" 11 | "io" 12 | "net/http" 13 | 14 | cdc "github.com/kevinconway/sqlite-cdc" 15 | ) 16 | 17 | // HTTPBasicPOST implements the cdc.ChangesHandler interface by making POST 18 | // requests to an HTTP endpoint. 19 | // 20 | // Targets of this handler will receive HTTP POST requests with the following 21 | // format: 22 | // 23 | // { 24 | // "changes": [ 25 | // { 26 | // "table": "table_name", 27 | // "timestamp": "2022-01-01T00:00:00Z", 28 | // "operation": "INSERT", // or "UPDATE" or "DELETE" 29 | // "before": { // present for updates and deletes 30 | // "key": "value", 31 | // ... 32 | // }, 33 | // "after": { // present for inserts and updates 34 | // "key": "value", 35 | // ... 36 | // } 37 | // }, 38 | // ... 39 | // ] 40 | // } 41 | type HTTPBasicPOST struct { 42 | Client *http.Client 43 | Endpoint string 44 | } 45 | 46 | func (h *HTTPBasicPOST) HandleChanges(ctx context.Context, changes cdc.Changes) error { 47 | cs := jsonChanges{Changes: changes} 48 | b, err := json.Marshal(cs) 49 | if err != nil { 50 | return fmt.Errorf("%w: failed to marshal changes for POST", err) 51 | } 52 | req, err := http.NewRequestWithContext(ctx, http.MethodPost, h.Endpoint, bytes.NewReader(b)) 53 | if err != nil { 54 | return fmt.Errorf("%w: failed to create POST request", err) 55 | } 56 | resp, err := h.Client.Do(req) 57 | if resp != nil { 58 | defer resp.Body.Close() 59 | } 60 | if err != nil { 61 | return fmt.Errorf("%w: failed to POST changes", err) 62 | } 63 | if resp.StatusCode != http.StatusOK { 64 | b, err := io.ReadAll(resp.Body) 65 | if err != nil { 66 | return fmt.Errorf("%w: failed to read error response body", err) 67 | } 68 | return fmt.Errorf("%w: HTTP status %d: %s", err, resp.StatusCode, string(b)) 69 | } 70 | return nil 71 | } 72 | 73 | type jsonChanges struct { 74 | Changes cdc.Changes `json:"changes"` 75 | } 76 | -------------------------------------------------------------------------------- /handlers/stdio.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package handlers 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "io" 10 | 11 | cdc "github.com/kevinconway/sqlite-cdc" 12 | ) 13 | 14 | type STDIO struct { 15 | Output io.Writer 16 | } 17 | 18 | func (s *STDIO) HandleChanges(ctx context.Context, changes cdc.Changes) error { 19 | for _, change := range changes { 20 | fmt.Fprintln(s.Output, change.String()) 21 | } 22 | return nil 23 | } 24 | -------------------------------------------------------------------------------- /images/blob-data.plot: -------------------------------------------------------------------------------- 1 | set terminal svg background rgb "#FFFFFF" 2 | set title "Encoding Cost Of BLOB Data" 3 | set border lc rgb 'black' 4 | set key below horizontal tc rgb 'black' 5 | set linetype 1 lc rgb 'black' 6 | 7 | set style fill solid 8 | set boxwidth .5 9 | 10 | 11 | set xlabel 'Byte Size' tc rgb 'black' 12 | set logscale x 2 13 | set xtic rotate by 90 right 14 | 15 | set ylabel 'Percent Worse Than 16 bytes (%)' tc rgb 'black' 16 | set yrange [0.0:1000.0] 17 | set ytics nomirror 50 18 | 19 | $data << EOF 20 | 16 0 21 | 64 67.0 22 | 256 64.8 23 | 1024 69.2 24 | 4096 74.0 25 | 16384 85.4 26 | 32768 102.3 27 | 65536 126.4 28 | 131072 187.3 29 | 262144 286.9 30 | 524288 510.1 31 | 1048576 936.9 32 | EOF 33 | 34 | plot \ 35 | "$data" using 1:2:xtic(1) title "" with boxes axes x1y1 36 | 37 | -------------------------------------------------------------------------------- /images/blob-data.svg: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | Gnuplot 10 | Produced by GNUPLOT 5.4 patchlevel 4 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 0 47 | 48 | 49 | 50 | 51 | 50 52 | 53 | 54 | 55 | 56 | 100 57 | 58 | 59 | 60 | 61 | 150 62 | 63 | 64 | 65 | 66 | 200 67 | 68 | 69 | 70 | 71 | 250 72 | 73 | 74 | 75 | 76 | 300 77 | 78 | 79 | 80 | 81 | 350 82 | 83 | 84 | 85 | 86 | 400 87 | 88 | 89 | 90 | 91 | 450 92 | 93 | 94 | 95 | 96 | 500 97 | 98 | 99 | 100 | 101 | 550 102 | 103 | 104 | 105 | 106 | 600 107 | 108 | 109 | 110 | 111 | 650 112 | 113 | 114 | 115 | 116 | 700 117 | 118 | 119 | 120 | 121 | 750 122 | 123 | 124 | 125 | 126 | 800 127 | 128 | 129 | 130 | 131 | 850 132 | 133 | 134 | 135 | 136 | 900 137 | 138 | 139 | 140 | 141 | 950 142 | 143 | 144 | 145 | 146 | 1000 147 | 148 | 149 | 150 | 151 | 16 152 | 153 | 154 | 155 | 156 | 64 157 | 158 | 159 | 160 | 161 | 256 162 | 163 | 164 | 165 | 166 | 1024 167 | 168 | 169 | 170 | 171 | 4096 172 | 173 | 174 | 175 | 176 | 16384 177 | 178 | 179 | 180 | 181 | 32768 182 | 183 | 184 | 185 | 186 | 65536 187 | 188 | 189 | 190 | 191 | 131072 192 | 193 | 194 | 195 | 196 | 262144 197 | 198 | 199 | 200 | 201 | 524288 202 | 203 | 204 | 205 | 206 | 1048576 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | Percent Worse Than 16 bytes (%) 216 | 217 | 218 | 219 | 220 | Byte Size 221 | 222 | 223 | 224 | 225 | gnuplot_plot_1 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | Encoding Cost Of BLOB Data 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | -------------------------------------------------------------------------------- /images/simple-tables.plot: -------------------------------------------------------------------------------- 1 | set terminal svg background rgb "#FFFFFF" 2 | set title "Simple Tables With With Triggers" 3 | set border lc rgb 'black' 4 | set key outside right vertical tc rgb 'black' 5 | set linetype 1 lc rgb 'black' 6 | set linetype 2 lc rgb 'blue' 7 | set linetype 3 lc rgb 'dark-grey' 8 | 9 | 10 | set style fill solid 11 | set boxwidth .5 12 | 13 | 14 | set xlabel 'Columns' tc rgb 'black' 15 | set style data histograms 16 | 17 | set ylabel 'Percent Worse (%)' tc rgb 'black' 18 | set yrange [0.0:205.0] 19 | set ytics nomirror 50 20 | 21 | $data << EOF 22 | 2 97 100 113 23 | 4 96 96 105 24 | 8 93 99 111 25 | 16 99 111 127 26 | 32 106 158 153 27 | 64 105 179 203 28 | EOF 29 | 30 | plot \ 31 | "$data" using 2:xtic(1) title "INSERT", \ 32 | "" using 3 title "UPDATE", \ 33 | "" using 4 title "DELETE" 34 | 35 | -------------------------------------------------------------------------------- /images/simple-tables.svg: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | Gnuplot 10 | Produced by GNUPLOT 5.4 patchlevel 4 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 0 47 | 48 | 49 | 50 | 51 | 50 52 | 53 | 54 | 55 | 56 | 100 57 | 58 | 59 | 60 | 61 | 150 62 | 63 | 64 | 65 | 66 | 200 67 | 68 | 69 | 70 | 71 | 2 72 | 73 | 74 | 75 | 76 | 4 77 | 78 | 79 | 80 | 81 | 8 82 | 83 | 84 | 85 | 86 | 16 87 | 88 | 89 | 90 | 91 | 32 92 | 93 | 94 | 95 | 96 | 64 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | Percent Worse (%) 106 | 107 | 108 | 109 | 110 | Columns 111 | 112 | 113 | 114 | 115 | INSERT 116 | 117 | 118 | INSERT 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | UPDATE 148 | 149 | 150 | UPDATE 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | DELETE 180 | 181 | 182 | DELETE 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | Simple Tables With With Triggers 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /images/wide-tables.plot: -------------------------------------------------------------------------------- 1 | set terminal svg background rgb "#FFFFFF" 2 | set title "Wide Tables With With Triggers" 3 | set border lc rgb 'black' 4 | set key outside right vertical tc rgb 'black' 5 | set linetype 1 lc rgb 'black' 6 | set linetype 2 lc rgb 'blue' 7 | set linetype 3 lc rgb 'dark-grey' 8 | 9 | set style fill solid 10 | set boxwidth .5 11 | 12 | 13 | set xlabel 'Columns' tc rgb 'black' 14 | set style data histograms 15 | 16 | set ylabel 'Percent Worse (%)' tc rgb 'black' 17 | set yrange [0.0:50000.0] 18 | set logscale y 2 19 | 20 | $data << EOF 21 | 64 119 225 251 22 | 128 195 335 556 23 | 256 412 696 1434 24 | 512 1011 1558 4948 25 | 1000 3263 3872 33814 26 | EOF 27 | 28 | plot \ 29 | "$data" using 2:xtic(1) title "INSERT", \ 30 | "" using 3 title "UPDATE", \ 31 | "" using 4 title "DELETE" 32 | -------------------------------------------------------------------------------- /images/wide-tables.svg: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | Gnuplot 10 | Produced by GNUPLOT 5.4 patchlevel 4 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 0.25 47 | 48 | 49 | 50 | 51 | 1 52 | 53 | 54 | 55 | 56 | 4 57 | 58 | 59 | 60 | 61 | 16 62 | 63 | 64 | 65 | 66 | 64 67 | 68 | 69 | 70 | 71 | 256 72 | 73 | 74 | 75 | 76 | 1024 77 | 78 | 79 | 80 | 81 | 4096 82 | 83 | 84 | 85 | 86 | 16384 87 | 88 | 89 | 90 | 91 | 64 92 | 93 | 94 | 95 | 96 | 128 97 | 98 | 99 | 100 | 101 | 256 102 | 103 | 104 | 105 | 106 | 512 107 | 108 | 109 | 110 | 111 | 1000 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | Percent Worse (%) 121 | 122 | 123 | 124 | 125 | Columns 126 | 127 | 128 | 129 | 130 | INSERT 131 | 132 | 133 | INSERT 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | UPDATE 160 | 161 | 162 | UPDATE 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | DELETE 189 | 190 | 191 | DELETE 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | Wide Tables With With Triggers 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /internal/tools/tools.go: -------------------------------------------------------------------------------- 1 | package tools 2 | 3 | import ( 4 | _ "github.com/AlekSi/gocov-xml" 5 | _ "github.com/axw/gocov/gocov" 6 | _ "github.com/golangci/golangci-lint/cmd/golangci-lint" 7 | _ "github.com/goreleaser/goreleaser/v2" 8 | _ "github.com/matm/gocov-html/cmd/gocov-html" 9 | _ "github.com/wadey/gocovmerge" 10 | _ "golang.org/x/perf/cmd/benchstat" 11 | _ "golang.org/x/tools/cmd/goimports" 12 | ) 13 | -------------------------------------------------------------------------------- /signals.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package cdc 5 | 6 | import ( 7 | "context" 8 | "database/sql" 9 | "errors" 10 | "fmt" 11 | "path/filepath" 12 | "sync" 13 | "time" 14 | 15 | "github.com/fsnotify/fsnotify" 16 | ) 17 | 18 | // Signal implementations tell a CDC engine when to awaken while in CDC mode 19 | // in order to process new changes. 20 | type Signal interface { 21 | // Waker returns a channel that will receive wake signals. If the channel 22 | // is closed by the Signal then the Signal has entered a terminal state. 23 | Waker() <-chan SignalEvent 24 | // Start implementations establish any state required to generate wake 25 | // signals. Long running tasks, such as continuous monitoring, should be 26 | // started in goroutines so that this method does not block. 27 | Start(ctx context.Context) error 28 | // Close releases any resources used by the Signal including any goroutines 29 | // started. Once a Signal is closed then it is in a terminal state and 30 | // cannot be re-used. 31 | Close() error 32 | } 33 | 34 | // SignalEvent indicates that a Signal has been triggered. 35 | // 36 | // An engine should only check for changes if the Wake field is true. A non-nil 37 | // error represents a terminal error for the Signal. 38 | type SignalEvent struct { 39 | Wake bool 40 | Err error 41 | } 42 | 43 | // fSNotifySignal implements Signal using filesystem notifications to detect 44 | // changes. 45 | type fSNotifySignal struct { 46 | watcher *fsnotify.Watcher 47 | wake chan SignalEvent 48 | closed chan any 49 | closeOnce *sync.Once 50 | meta *dbMeta 51 | targets map[string]bool 52 | } 53 | 54 | // NewFSNotifySignal creates a new Signal implementation that uses filesystem 55 | // notifications to detect changes. This implementation uses the given database 56 | // client to determine the path of the main SQLite database as well as any 57 | // supplemental files such as the WAL when in WAL mode. 58 | func NewFSNotifySignal(db *sql.DB) (Signal, error) { 59 | return newFSNotifySignal(db) 60 | } 61 | 62 | func newFSNotifySignal(db *sql.DB) (*fSNotifySignal, error) { 63 | meta, err := newDBMeta(db) 64 | if err != nil { 65 | return nil, fmt.Errorf("failed to get database metadata: %w", err) 66 | } 67 | 68 | watcher, err := fsnotify.NewWatcher() 69 | if err != nil { 70 | return nil, fmt.Errorf("failed to create fsnotify watcher: %w", err) 71 | } 72 | 73 | if err := watcher.Add(filepath.Dir(meta.Filename)); err != nil { 74 | return nil, fmt.Errorf("%w: failed to add %q to fsnotify watcher", err, filepath.Dir(meta.Filename)) 75 | } 76 | 77 | watchTargets := make(map[string]bool) 78 | watchTargets[meta.Filename] = true 79 | for _, f := range meta.ExtraFiles { 80 | watchTargets[f] = true 81 | } 82 | 83 | s := &fSNotifySignal{ 84 | watcher: watcher, 85 | wake: make(chan SignalEvent), 86 | closed: make(chan any), 87 | closeOnce: &sync.Once{}, 88 | meta: meta, 89 | targets: watchTargets, 90 | } 91 | return s, nil 92 | } 93 | 94 | func (s *fSNotifySignal) Start(ctx context.Context) error { 95 | go s.watch(ctx) 96 | return nil 97 | } 98 | 99 | func (s *fSNotifySignal) watch(ctx context.Context) { 100 | for { 101 | ok, err := s.watchStep(ctx) 102 | if errors.Is(err, errStop) { 103 | return 104 | } 105 | if err != nil { 106 | select { 107 | case s.wake <- SignalEvent{Err: err}: 108 | case <-s.closed: 109 | return 110 | } 111 | continue 112 | } 113 | if !ok { 114 | continue 115 | } 116 | select { 117 | case s.wake <- SignalEvent{Wake: true}: 118 | case <-s.closed: 119 | return 120 | } 121 | } 122 | } 123 | func (s *fSNotifySignal) watchStep(ctx context.Context) (bool, error) { 124 | select { 125 | case <-s.closed: 126 | return false, errStop 127 | case <-ctx.Done(): 128 | return false, s.Close() 129 | case event, ok := <-s.watcher.Events: 130 | if !ok { 131 | // The watcher was closed. 132 | return false, s.Close() 133 | } 134 | if event.Op == fsnotify.Chmod { 135 | return false, nil 136 | } 137 | if !s.targets[event.Name] { 138 | return false, nil 139 | } 140 | return true, nil 141 | case err, ok := <-s.watcher.Errors: 142 | if !ok { 143 | // The watcher was closed. 144 | return false, s.Close() 145 | } 146 | return false, fmt.Errorf("%w: fsnotify watcher error", err) 147 | } 148 | } 149 | 150 | // Waker returns a channel that will receive a signal when changes are detected. 151 | func (s *fSNotifySignal) Waker() <-chan SignalEvent { 152 | return s.wake 153 | } 154 | 155 | // Close stops the file system watcher and cleans up resources. 156 | func (s *fSNotifySignal) Close() error { 157 | s.closeOnce.Do(func() { 158 | close(s.closed) 159 | }) 160 | return s.watcher.Close() 161 | } 162 | 163 | type timeSignal struct { 164 | wake chan SignalEvent 165 | closed chan any 166 | closeOnce *sync.Once 167 | interval time.Duration 168 | } 169 | 170 | // NewTimeSignal returns a new time-based Signal that emits a wake event on an 171 | // interval. This can be used in environments where the file watching 172 | // implementation is unsupported or unreliable. 173 | func NewTimeSignal(interval time.Duration) (Signal, error) { 174 | return newTimeSignal(interval), nil 175 | } 176 | 177 | func newTimeSignal(interval time.Duration) *timeSignal { 178 | return &timeSignal{ 179 | wake: make(chan SignalEvent, 1), 180 | closed: make(chan any), 181 | closeOnce: &sync.Once{}, 182 | interval: interval, 183 | } 184 | } 185 | 186 | func (s *timeSignal) Start(ctx context.Context) error { 187 | go s.watch(ctx) 188 | return nil 189 | } 190 | 191 | func (s *timeSignal) watch(ctx context.Context) { 192 | ticker := time.NewTicker(s.interval) 193 | defer ticker.Stop() 194 | 195 | for { 196 | ok, err := s.watchStep(ctx, ticker) 197 | if errors.Is(err, errStop) { 198 | return 199 | } 200 | if err != nil { 201 | select { 202 | case s.wake <- SignalEvent{Err: err}: 203 | case <-s.closed: 204 | return 205 | } 206 | continue 207 | } 208 | if !ok { 209 | continue 210 | } 211 | select { 212 | case s.wake <- SignalEvent{Wake: true}: 213 | case <-s.closed: 214 | return 215 | } 216 | } 217 | } 218 | 219 | func (s *timeSignal) watchStep(ctx context.Context, ticker *time.Ticker) (bool, error) { 220 | select { 221 | case <-s.closed: 222 | return false, errStop 223 | case <-ctx.Done(): 224 | return false, s.Close() 225 | case <-ticker.C: 226 | return true, nil 227 | } 228 | } 229 | 230 | func (s *timeSignal) Waker() <-chan SignalEvent { 231 | return s.wake 232 | } 233 | 234 | func (s *timeSignal) Close() error { 235 | s.closeOnce.Do(func() { 236 | close(s.closed) 237 | }) 238 | return nil 239 | } 240 | 241 | type channelSignal struct { 242 | input <-chan SignalEvent 243 | wake chan SignalEvent 244 | closed chan any 245 | closeOnce *sync.Once 246 | } 247 | 248 | // NewChannelSignal returns a Signal implementation that proxies events from 249 | // the given channel to the waker channel. This is useful for awakening based 250 | // on external triggers. 251 | func NewChannelSignal(input <-chan SignalEvent) (Signal, error) { 252 | return newChannelSignal(input), nil 253 | } 254 | 255 | func newChannelSignal(input <-chan SignalEvent) *channelSignal { 256 | return &channelSignal{ 257 | input: input, 258 | wake: make(chan SignalEvent), 259 | closed: make(chan any), 260 | closeOnce: &sync.Once{}, 261 | } 262 | } 263 | 264 | func (s *channelSignal) Start(ctx context.Context) error { 265 | go s.watch(ctx) 266 | return nil 267 | } 268 | 269 | func (s *channelSignal) watch(ctx context.Context) { 270 | for { 271 | event, err := s.watchStep(ctx) 272 | if errors.Is(err, errStop) { 273 | return 274 | } 275 | if err != nil { 276 | select { 277 | case s.wake <- SignalEvent{Err: err}: 278 | case <-s.closed: 279 | return 280 | } 281 | continue 282 | } 283 | select { 284 | case s.wake <- event: 285 | case <-s.closed: 286 | return 287 | } 288 | } 289 | } 290 | 291 | func (s *channelSignal) watchStep(ctx context.Context) (SignalEvent, error) { 292 | select { 293 | case <-s.closed: 294 | return SignalEvent{}, errStop 295 | case <-ctx.Done(): 296 | _ = s.Close() 297 | return SignalEvent{}, errStop 298 | case event := <-s.input: 299 | return event, nil 300 | } 301 | } 302 | 303 | func (s *channelSignal) Waker() <-chan SignalEvent { 304 | return s.wake 305 | } 306 | 307 | func (s *channelSignal) Close() error { 308 | s.closeOnce.Do(func() { 309 | close(s.closed) 310 | }) 311 | return nil 312 | } 313 | 314 | type multiSignal struct { 315 | signals []Signal 316 | wake chan SignalEvent 317 | closed chan any 318 | closeOnce *sync.Once 319 | } 320 | 321 | // NewMultiSignal returns a Signal implementation that combines multiple 322 | // signals into a single channel. 323 | func NewMultiSignal(signals ...Signal) (Signal, error) { 324 | return newMultiSignal(signals...), nil 325 | } 326 | 327 | func newMultiSignal(signals ...Signal) *multiSignal { 328 | return &multiSignal{ 329 | signals: signals, 330 | wake: make(chan SignalEvent, len(signals)), 331 | closed: make(chan any), 332 | closeOnce: &sync.Once{}, 333 | } 334 | } 335 | 336 | func (s *multiSignal) Start(ctx context.Context) error { 337 | for _, signal := range s.signals { 338 | if err := signal.Start(ctx); err != nil { 339 | _ = s.Close() 340 | return err 341 | } 342 | } 343 | for _, signal := range s.signals { 344 | waker := signal.Waker() 345 | go func() { 346 | for event := range waker { 347 | if event.Err != nil { 348 | select { 349 | case s.wake <- event: 350 | case <-s.closed: 351 | return 352 | } 353 | continue 354 | } 355 | select { 356 | case s.wake <- event: 357 | case <-s.closed: 358 | return 359 | } 360 | } 361 | }() 362 | } 363 | return nil 364 | } 365 | 366 | func (s *multiSignal) Waker() <-chan SignalEvent { 367 | return s.wake 368 | } 369 | 370 | func (s *multiSignal) Close() error { 371 | s.closeOnce.Do(func() { 372 | close(s.closed) 373 | for _, signal := range s.signals { 374 | _ = signal.Close() 375 | } 376 | }) 377 | return nil 378 | } 379 | 380 | var errStop = fmt.Errorf("stop signal received") 381 | -------------------------------------------------------------------------------- /signals_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package cdc 5 | 6 | import ( 7 | "context" 8 | "testing" 9 | "time" 10 | 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestFSNotifySignal(t *testing.T) { 16 | t.Parallel() 17 | 18 | // Create a test database 19 | db := testDB(t) 20 | t.Cleanup(func() { db.Close() }) 21 | _, err := db.Exec("CREATE TABLE test (col INT)") 22 | require.NoError(t, err) 23 | 24 | // Create our signal 25 | signal, err := NewFSNotifySignal(db) 26 | require.NoError(t, err) 27 | t.Cleanup(func() { signal.Close() }) 28 | 29 | // Start watching 30 | ctx, cancel := context.WithCancel(context.Background()) 31 | t.Cleanup(cancel) 32 | require.NoError(t, signal.Start(ctx)) 33 | 34 | // Get the waker channel for receiving signals 35 | waker := signal.Waker() 36 | 37 | _, err = db.Exec("INSERT INTO test (col) VALUES (1)") 38 | require.NoError(t, err) 39 | assert.True(t, wait(t, waker), "should receive wake signal for db file change") 40 | } 41 | 42 | func TestTimeSignal(t *testing.T) { 43 | t.Parallel() 44 | 45 | interval := 10 * time.Millisecond 46 | signal, err := NewTimeSignal(interval) 47 | require.NoError(t, err) 48 | t.Cleanup(func() { signal.Close() }) 49 | 50 | // Start watching 51 | ctx, cancel := context.WithCancel(context.Background()) 52 | t.Cleanup(cancel) 53 | require.NoError(t, signal.Start(ctx)) 54 | 55 | // Get the waker channel for receiving signals 56 | waker := signal.Waker() 57 | 58 | // Should receive multiple signals 59 | for x := 0; x < 3; x = x + 1 { 60 | assert.True(t, wait(t, waker), "should receive wake signal on interval") 61 | } 62 | 63 | // Test cancellation 64 | cancel() 65 | time.Sleep(interval * 2) 66 | require.False(t, wait(t, waker), "should not receive wake signal after cancellation") 67 | } 68 | 69 | func TestChannelSignal(t *testing.T) { 70 | t.Parallel() 71 | 72 | input := make(chan SignalEvent, 1) 73 | signal, err := NewChannelSignal(input) 74 | require.NoError(t, err) 75 | t.Cleanup(func() { signal.Close() }) 76 | 77 | ctx, cancel := context.WithCancel(context.Background()) 78 | t.Cleanup(cancel) 79 | require.NoError(t, signal.Start(ctx)) 80 | 81 | waker := signal.Waker() 82 | 83 | // Should receive all signals 84 | for x := 0; x < 3; x = x + 1 { 85 | input <- SignalEvent{Wake: true} 86 | assert.True(t, wait(t, waker), "should receive wake signal from input channel") 87 | } 88 | 89 | // Test cancellation 90 | cancel() 91 | require.False(t, wait(t, waker), "should not receive wake signal after cancellation") 92 | } 93 | 94 | func TestMultiSignal(t *testing.T) { 95 | t.Parallel() 96 | 97 | // Create multiple input channels 98 | input1 := make(chan SignalEvent, 1) 99 | signal1, err := NewChannelSignal(input1) 100 | require.NoError(t, err) 101 | 102 | input2 := make(chan SignalEvent, 1) 103 | signal2, err := NewChannelSignal(input2) 104 | require.NoError(t, err) 105 | 106 | input3 := make(chan SignalEvent, 1) 107 | signal3, err := NewChannelSignal(input3) 108 | require.NoError(t, err) 109 | 110 | // Create multi signal 111 | multi, err := NewMultiSignal(signal1, signal2, signal3) 112 | require.NoError(t, err) 113 | t.Cleanup(func() { multi.Close() }) 114 | 115 | ctx, cancel := context.WithCancel(context.Background()) 116 | t.Cleanup(cancel) 117 | require.NoError(t, multi.Start(ctx)) 118 | 119 | waker := multi.Waker() 120 | 121 | // Test signals from first channel 122 | input1 <- SignalEvent{Wake: true} 123 | assert.True(t, wait(t, waker), "should receive wake signal from first channel") 124 | 125 | // Test signals from second channel 126 | input2 <- SignalEvent{Wake: true} 127 | assert.True(t, wait(t, waker), "should receive wake signal from second channel") 128 | 129 | // Test signals from third channel 130 | input3 <- SignalEvent{Wake: true} 131 | assert.True(t, wait(t, waker), "should receive wake signal from third channel") 132 | 133 | // Test multiple concurrent signals 134 | input1 <- SignalEvent{Wake: true} 135 | input2 <- SignalEvent{Wake: true} 136 | input3 <- SignalEvent{Wake: true} 137 | 138 | for x := 0; x < 3; x = x + 1 { 139 | assert.True(t, wait(t, waker), "should receive wake signal from concurrent inputs") 140 | } 141 | 142 | // Test cancellation 143 | cancel() 144 | require.False(t, wait(t, waker), "should not receive wake signal after cancellation") 145 | } 146 | 147 | func wait(tb tOrB, ch <-chan SignalEvent) bool { 148 | tb.Helper() 149 | select { 150 | case event := <-ch: 151 | require.NoError(tb, event.Err) 152 | return event.Wake 153 | case <-time.After(time.Second): 154 | return false 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /trigger_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package cdc 5 | 6 | import ( 7 | "context" 8 | "database/sql" 9 | "encoding/json" 10 | "fmt" 11 | "path/filepath" 12 | "strings" 13 | "sync" 14 | "testing" 15 | "time" 16 | 17 | "github.com/stretchr/testify/require" 18 | _ "modernc.org/sqlite" 19 | ) 20 | 21 | func TestBootstrapWithRowID(t *testing.T) { 22 | t.Parallel() 23 | db := testDB(t) 24 | defer db.Close() 25 | 26 | count := 1024 27 | createTable(t, db) 28 | generateRecords(t, db, count, 0) 29 | 30 | h := newHandler() 31 | batchSize := defaultMaxBatchSize 32 | c, err := NewTriggerEngine(db, h, []string{testTableName}, WithMaxBatchSize(batchSize), WithBlobSupport(true)) 33 | require.NoError(t, err) 34 | 35 | ctx, cancel := context.WithCancel(context.Background()) 36 | defer cancel() 37 | 38 | require.NoError(t, c.Bootstrap(ctx)) 39 | require.NoError(t, c.Close(ctx)) 40 | 41 | expectedBatches := count / batchSize 42 | if count%batchSize != 0 { 43 | expectedBatches = expectedBatches + 1 44 | } 45 | waitForChanges(t, h, expectedBatches, count, time.Second) 46 | } 47 | 48 | func TestCDCWithRowID(t *testing.T) { 49 | t.Parallel() 50 | db := testDB(t) 51 | t.Cleanup(func() { db.Close() }) 52 | 53 | count := 1024 54 | createTable(t, db) 55 | 56 | ctx, cancel := context.WithCancel(context.Background()) 57 | t.Cleanup(cancel) 58 | 59 | h := newHandler() 60 | batchSize := defaultMaxBatchSize 61 | fsSignal, err := NewFSNotifySignal(db) 62 | require.NoError(t, err) 63 | timeSignal, err := NewTimeSignal(10 * time.Millisecond) 64 | require.NoError(t, err) 65 | signal, err := NewMultiSignal(fsSignal, timeSignal) 66 | require.NoError(t, err) 67 | c, err := NewTriggerEngine(db, h, []string{testTableName}, WithMaxBatchSize(batchSize), WithBlobSupport(true), WithSignal(signal)) 68 | require.NoError(t, err) 69 | t.Cleanup(func() { c.Close(ctx) }) 70 | 71 | require.NoError(t, c.Setup(ctx)) 72 | 73 | cdcStatus := make(chan error, 1) 74 | go func(t *testing.T, c CDC) { 75 | t.Helper() 76 | cdcStatus <- c.CDC(ctx) 77 | }(t, c) 78 | time.Sleep(5 * time.Millisecond) // force a scheduler break to get CDC going 79 | generateRecords(t, db, count, 0) 80 | 81 | expectedBatches := count / batchSize 82 | if count%batchSize != 0 { 83 | expectedBatches = expectedBatches + 1 84 | } 85 | waitForChanges(t, h, expectedBatches, count, time.Second) 86 | require.NoError(t, c.Close(ctx)) 87 | require.NoError(t, <-cdcStatus) 88 | } 89 | 90 | func TestBootstrapWithoutRowID(t *testing.T) { 91 | t.Parallel() 92 | db := testDB(t) 93 | defer db.Close() 94 | 95 | count := 1024 96 | createTableWithoutRowID(t, db) 97 | generateRecords(t, db, count, 0) 98 | 99 | h := newHandler() 100 | batchSize := defaultMaxBatchSize 101 | c, err := NewTriggerEngine(db, h, []string{testTableName}, WithMaxBatchSize(batchSize), WithBlobSupport(true)) 102 | require.NoError(t, err) 103 | 104 | ctx, cancel := context.WithCancel(context.Background()) 105 | defer cancel() 106 | 107 | require.NoError(t, c.Bootstrap(ctx)) 108 | require.NoError(t, c.Close(ctx)) 109 | 110 | expectedBatches := count / batchSize 111 | if count%batchSize != 0 { 112 | expectedBatches = expectedBatches + 1 113 | } 114 | 115 | results := h.Changes() 116 | require.Len(t, results, expectedBatches) 117 | totalChanges := 0 118 | for _, changes := range results { 119 | totalChanges = totalChanges + len(changes) 120 | } 121 | require.Equal(t, count+1, totalChanges) // +1 for the BOOTSTRAP event 122 | } 123 | 124 | func TestCDCWithoutRowID(t *testing.T) { 125 | t.Parallel() 126 | db := testDB(t) 127 | t.Cleanup(func() { db.Close() }) 128 | 129 | count := 1024 130 | createTableWithoutRowID(t, db) 131 | 132 | ctx, cancel := context.WithCancel(context.Background()) 133 | t.Cleanup(cancel) 134 | 135 | h := newHandler() 136 | batchSize := defaultMaxBatchSize 137 | fsSignal, err := NewFSNotifySignal(db) 138 | require.NoError(t, err) 139 | timeSignal, err := NewTimeSignal(10 * time.Millisecond) 140 | require.NoError(t, err) 141 | signal, err := NewMultiSignal(fsSignal, timeSignal) 142 | require.NoError(t, err) 143 | c, err := NewTriggerEngine(db, h, []string{testTableName}, WithMaxBatchSize(batchSize), WithBlobSupport(true), WithSignal(signal)) 144 | require.NoError(t, err) 145 | t.Cleanup(func() { c.Close(ctx) }) 146 | 147 | require.NoError(t, c.Setup(ctx)) 148 | 149 | cdcStatus := make(chan error, 1) 150 | go func(t *testing.T, c CDC) { 151 | t.Helper() 152 | cdcStatus <- c.CDC(ctx) 153 | }(t, c) 154 | 155 | time.Sleep(5 * time.Millisecond) // force a scheduler break to get CDC going 156 | generateRecords(t, db, count, 0) 157 | 158 | expectedBatches := count / batchSize 159 | if count%batchSize != 0 { 160 | expectedBatches = expectedBatches + 1 161 | } 162 | waitForChanges(t, h, expectedBatches, count, time.Second) 163 | require.NoError(t, c.Close(ctx)) 164 | require.NoError(t, <-cdcStatus) 165 | } 166 | 167 | func TestBootstrapAndCDCWithRowID(t *testing.T) { 168 | t.Parallel() 169 | db := testDB(t) 170 | t.Cleanup(func() { db.Close() }) 171 | 172 | count := 1024 173 | createTable(t, db) 174 | 175 | ctx, cancel := context.WithCancel(context.Background()) 176 | t.Cleanup(cancel) 177 | 178 | h := newHandler() 179 | batchSize := defaultMaxBatchSize 180 | fsSignal, err := NewFSNotifySignal(db) 181 | require.NoError(t, err) 182 | timeSignal, err := NewTimeSignal(10 * time.Millisecond) 183 | require.NoError(t, err) 184 | signal, err := NewMultiSignal(fsSignal, timeSignal) 185 | require.NoError(t, err) 186 | c, err := NewTriggerEngine(db, h, []string{testTableName}, WithMaxBatchSize(batchSize), WithBlobSupport(true), WithSignal(signal)) 187 | require.NoError(t, err) 188 | t.Cleanup(func() { c.Close(ctx) }) 189 | 190 | require.NoError(t, c.Setup(ctx)) 191 | generateRecords(t, db, count, 0) 192 | 193 | cdcStatus := make(chan error, 1) 194 | go func(t *testing.T, c CDC) { 195 | t.Helper() 196 | cdcStatus <- c.BootstrapAndCDC(ctx) 197 | }(t, c) 198 | 199 | time.Sleep(5 * time.Millisecond) // force a scheduler break to get CDC going 200 | generateRecords(t, db, count, count) 201 | expectedBatches := (count * 2) / batchSize 202 | if (count*2)%batchSize != 0 { 203 | expectedBatches = expectedBatches + 1 204 | } 205 | waitForChanges(t, h, expectedBatches, count*2, time.Second) 206 | require.NoError(t, c.Close(ctx)) 207 | require.NoError(t, <-cdcStatus) 208 | } 209 | 210 | func TestBootstrapAndCDCWithoutRowID(t *testing.T) { 211 | t.Parallel() 212 | db := testDB(t) 213 | t.Cleanup(func() { db.Close() }) 214 | 215 | count := 1024 216 | createTableWithoutRowID(t, db) 217 | 218 | ctx, cancel := context.WithCancel(context.Background()) 219 | t.Cleanup(cancel) 220 | 221 | h := newHandler() 222 | batchSize := defaultMaxBatchSize 223 | fsSignal, err := NewFSNotifySignal(db) 224 | require.NoError(t, err) 225 | timeSignal, err := NewTimeSignal(10 * time.Millisecond) 226 | require.NoError(t, err) 227 | signal, err := NewMultiSignal(fsSignal, timeSignal) 228 | require.NoError(t, err) 229 | c, err := NewTriggerEngine(db, h, []string{testTableName}, WithMaxBatchSize(batchSize), WithBlobSupport(true), WithSignal(signal)) 230 | require.NoError(t, err) 231 | t.Cleanup(func() { c.Close(ctx) }) 232 | 233 | require.NoError(t, c.Setup(ctx)) 234 | generateRecords(t, db, count, 0) 235 | 236 | cdcStatus := make(chan error, 1) 237 | go func(t *testing.T, c CDC) { 238 | t.Helper() 239 | cdcStatus <- c.BootstrapAndCDC(ctx) 240 | }(t, c) 241 | 242 | time.Sleep(5 * time.Millisecond) // force a scheduler break to get CDC going 243 | generateRecords(t, db, count, count) 244 | expectedBatches := (count * 2) / batchSize 245 | if (count*2)%batchSize != 0 { 246 | expectedBatches = expectedBatches + 1 247 | } 248 | waitForChanges(t, h, expectedBatches, count*2, time.Second) 249 | require.NoError(t, c.Close(ctx)) 250 | require.NoError(t, <-cdcStatus) 251 | } 252 | 253 | func TestWideTables(t *testing.T) { 254 | t.Parallel() 255 | db := testDB(t) 256 | defer db.Close() 257 | 258 | columnCount := 1000 // This is the default max stack depth in SQLite 259 | var b strings.Builder 260 | b.WriteString("CREATE TABLE test (") 261 | for x := 0; x < columnCount; x = x + 1 { 262 | b.WriteString(fmt.Sprintf("col%d INT", x)) 263 | if x < columnCount-1 { 264 | b.WriteString(", ") 265 | } 266 | } 267 | b.WriteString(")") 268 | _, err := db.Exec(b.String()) 269 | require.NoError(t, err) 270 | 271 | b.Reset() 272 | params := make([]any, columnCount) 273 | b.WriteString("INSERT INTO test VALUES (") 274 | for x := 0; x < columnCount; x = x + 1 { 275 | params[x] = x 276 | b.WriteString("?") 277 | if x < columnCount-1 { 278 | b.WriteString(", ") 279 | } 280 | } 281 | b.WriteString(")") 282 | _, err = db.Exec(b.String(), params...) 283 | require.NoError(t, err) 284 | 285 | h := newHandler() 286 | batchSize := defaultMaxBatchSize 287 | c, err := NewTriggerEngine(db, h, []string{testTableName}, WithMaxBatchSize(batchSize), WithBlobSupport(true)) 288 | require.NoError(t, err) 289 | 290 | ctx, cancel := context.WithCancel(context.Background()) 291 | defer cancel() 292 | 293 | require.NoError(t, c.Bootstrap(ctx)) 294 | require.NoError(t, c.Close(ctx)) 295 | 296 | results := h.Changes() 297 | require.Len(t, results, 1) 298 | require.Len(t, results[0], 2) 299 | ch := results[0][0] 300 | require.Equal(t, Bootstrap, ch.Operation) 301 | ch = results[0][1] 302 | afterMap := make(map[string]any) 303 | require.NoError(t, json.Unmarshal(ch.After, &afterMap)) 304 | require.Len(t, afterMap, columnCount) 305 | } 306 | 307 | var ( 308 | smallColumnCounts = []int{2, 4, 8, 16, 32, 63} //nolint:gochecknoglobals 309 | largeColumnCounts = []int{64, 128, 256, 512, 1000} //nolint:gochecknoglobals 310 | ) 311 | 312 | // This benchmark measures the added latency of the CDC triggers for inserts. 313 | // 314 | // The tables used in this benchmark have a simplistic structure where all 315 | // columns are integer types. The number of columns varies by the test case but 316 | // never exceeds the 63 column limit for generating a change event in a single 317 | // step. Each table is tested with the triggers on and off to highlight the 318 | // added latency of the triggers. 319 | // 320 | // All of the writes are applied serially so there is no impact from concurrent 321 | // writes. 322 | func BenchmarkTriggerLatencySimpleTableSerialInserts(b *testing.B) { 323 | for _, columnCount := range smallColumnCounts { 324 | benchmarkTriggerLatencyTableSerial(b, columnCount, OpInsert) 325 | } 326 | } 327 | 328 | // This benchmark measures the added latency of the CDC triggers for updates. 329 | // 330 | // The tables used in this benchmark have a simplistic structure where all 331 | // columns are integer types. The number of columns varies by the test case but 332 | // never exceeds the 63 column limit for generating a change event in a single 333 | // step. Each table is tested with the triggers on and off to highlight the 334 | // added latency of the triggers. 335 | // 336 | // The table is populated with enough records that each step of the benchmark 337 | // operates on a unique row. All columns in each row are updated in each step. 338 | // 339 | // All of the writes are applied serially so there is no impact from concurrent 340 | // writes. 341 | func BenchmarkTriggerLatencySimpleTableSerialUpdates(b *testing.B) { 342 | for _, columnCount := range smallColumnCounts { 343 | benchmarkTriggerLatencyTableSerial(b, columnCount, OpUpdate) 344 | } 345 | } 346 | 347 | // This benchmark measures the added latency of the CDC triggers for deletes. 348 | // 349 | // The tables used in this benchmark have a simplistic structure where all 350 | // columns are integer types. The number of columns varies by the test case but 351 | // never exceeds the 63 column limit for generating a change event in a single 352 | // step. Each table is tested with the triggers on and off to highlight the 353 | // added latency of the triggers. 354 | // 355 | // The table is populated with enough records that each step of the benchmark 356 | // operates on a unique row. Each step deletes a row. 357 | // 358 | // All of the writes are applied serially so there is no impact from concurrent 359 | // writes. 360 | func BenchmarkTriggerLatencySimpleTableSerialDeletes(b *testing.B) { 361 | for _, columnCount := range smallColumnCounts { 362 | benchmarkTriggerLatencyTableSerial(b, columnCount, OpDelete) 363 | } 364 | } 365 | 366 | // Measure the latency of inserts in wide tables. 367 | // 368 | // The tables used in this benchmark have a simplistic structure where all 369 | // columns are integer types. The number of columns varies by the test case but 370 | // always exceeds the 63 column limit for generating a change event in a single 371 | // step. Each table is tested with the triggers on and off to highlight the 372 | // added latency of the triggers. 373 | // 374 | // All of the writes are applied serially so there is no impact from concurrent 375 | // writes. 376 | func BenchmarkTriggerLatencyWideTableSerialInserts(b *testing.B) { 377 | for _, columnCount := range largeColumnCounts { 378 | benchmarkTriggerLatencyTableSerial(b, columnCount, OpInsert) 379 | } 380 | } 381 | 382 | // Measure the latency of updates in wide tables. 383 | // 384 | // The tables used in this benchmark have a simplistic structure where all 385 | // columns are integer types. The number of columns varies by the test case but 386 | // always exceeds the 63 column limit for generating a change event in a single 387 | // step. Each table is tested with the triggers on and off to highlight the 388 | // added latency of the triggers. 389 | // 390 | // The table is populated with enough records that each step of the benchmark 391 | // operates on a unique row. All columns in each row are updated in each step. 392 | // 393 | // All of the writes are applied serially so there is no impact from concurrent 394 | // writes. 395 | func BenchmarkTriggerLatencyWideTableSerialUpdates(b *testing.B) { 396 | for _, columnCount := range largeColumnCounts { 397 | benchmarkTriggerLatencyTableSerial(b, columnCount, OpUpdate) 398 | } 399 | } 400 | 401 | // Measure the latency of deletes in wide tables. 402 | // 403 | // The tables used in this benchmark have a simplistic structure where all 404 | // columns are integer types. The number of columns varies by the test case but 405 | // always exceeds the 63 column limit for generating a change event in a single 406 | // step. Each table is tested with the triggers on and off to highlight the 407 | // added latency of the triggers. 408 | // 409 | // The table is populated with enough records that each step of the benchmark 410 | // operates on a unique row. Each step deletes a row. 411 | // 412 | // All of the writes are applied serially so there is no impact from concurrent 413 | // writes. 414 | func BenchmarkTriggerLatencyWideTableSerialDeletes(b *testing.B) { 415 | for _, columnCount := range largeColumnCounts { 416 | benchmarkTriggerLatencyTableSerial(b, columnCount, OpDelete) 417 | } 418 | } 419 | 420 | // This benchmark measures the BLOB column type encoding process. 421 | // 422 | // BLOB type columns have to be encoded because JSON does not have a native 423 | // binary type. The encoding process uses the hex encoding SQLite function. 424 | // This benchmark attempts to measure the encoding time growth as the size of 425 | // the blob increases. 426 | func BenchmarkBlobEncoding(b *testing.B) { 427 | blobSizes := []int{16, 64, 256, 1024, 4096, 16384, 32768, 65536, 131072, 262144, 524288, 1048576} 428 | for _, blobSize := range blobSizes { 429 | b.Run(fmt.Sprintf("size=%d", blobSize), func(b *testing.B) { 430 | db := testDB(b) 431 | defer db.Close() 432 | 433 | _, err := db.Exec(`CREATE TABLE test (col BLOB)`) 434 | require.NoError(b, err) 435 | 436 | blobBody := make([]byte, blobSize) 437 | for x := 0; x < blobSize; x = x + 1 { 438 | blobBody[x] = byte(x % 256) 439 | } 440 | 441 | q := `INSERT INTO test VALUES (?)` 442 | ctx, cancel := context.WithCancel(context.Background()) 443 | defer cancel() 444 | h := &handlerNull{} 445 | c, err := NewTriggerEngine(db, h, []string{testTableName}, WithBlobSupport(true)) 446 | require.NoError(b, err) 447 | defer c.Close(ctx) 448 | require.NoError(b, c.Setup(ctx)) 449 | 450 | b.ResetTimer() 451 | for n := 0; n < b.N; n = n + 1 { 452 | _, err = db.Exec(q, blobBody) 453 | require.NoError(b, err) 454 | } 455 | }) 456 | } 457 | } 458 | 459 | type OpType string 460 | 461 | const ( 462 | OpInsert OpType = "insert" 463 | OpUpdate OpType = "update" 464 | OpDelete OpType = "delete" 465 | ) 466 | 467 | func benchmarkTriggerLatencyTableSerial(b *testing.B, columnCount int, op OpType) { 468 | b.Helper() 469 | 470 | b.Run(fmt.Sprintf("triggers=off/columns=%d", columnCount), func(b *testing.B) { 471 | b.StopTimer() 472 | db := testDB(b) 473 | defer db.Close() 474 | 475 | query, params := setupBenchmarkTable(b, db, columnCount, op, b.N) 476 | 477 | b.ResetTimer() 478 | b.StartTimer() 479 | for n := 0; n < b.N; n = n + 1 { 480 | if op != OpInsert { 481 | params[len(params)-1] = n 482 | } 483 | if op == OpInsert { 484 | params[0] = n 485 | } 486 | _, err := db.Exec(query, params...) 487 | require.NoError(b, err) 488 | } 489 | }) 490 | 491 | b.Run(fmt.Sprintf("triggers=on/columns=%d", columnCount), func(b *testing.B) { 492 | b.StopTimer() 493 | db := testDB(b) 494 | defer db.Close() 495 | 496 | query, params := setupBenchmarkTable(b, db, columnCount, op, b.N) 497 | 498 | c, err := NewTriggerEngine(db, &handlerNull{}, []string{testTableName}, WithBlobSupport(true)) 499 | require.NoError(b, err) 500 | defer c.Close(context.Background()) 501 | require.NoError(b, c.Setup(context.Background())) 502 | 503 | b.ResetTimer() 504 | b.StartTimer() 505 | for n := 0; n < b.N; n = n + 1 { 506 | if op != OpInsert { 507 | params[len(params)-1] = n 508 | } 509 | if op == OpInsert { 510 | params[0] = n 511 | } 512 | _, err = db.Exec(query, params...) 513 | require.NoError(b, err) 514 | } 515 | }) 516 | } 517 | 518 | func setupBenchmarkTable(b *testing.B, db *sql.DB, columnCount int, op OpType, n int) (string, []any) { 519 | b.Helper() 520 | 521 | var builder strings.Builder 522 | builder.WriteString(fmt.Sprintf("CREATE TABLE %s (", testTableName)) 523 | for x := 0; x < columnCount; x = x + 1 { 524 | builder.WriteString(fmt.Sprintf("col%d INT", x)) 525 | if x == 0 { 526 | builder.WriteString(" PRIMARY KEY") 527 | } 528 | if x < columnCount-1 { 529 | builder.WriteString(", ") 530 | } 531 | } 532 | builder.WriteString(") WITHOUT ROWID") 533 | _, err := db.Exec(builder.String()) 534 | require.NoError(b, err) 535 | 536 | // For updates and deletes, we need initial data 537 | if op != OpInsert { 538 | builder.Reset() 539 | params := make([]any, columnCount) 540 | builder.WriteString(fmt.Sprintf("INSERT INTO %s VALUES (", testTableName)) 541 | for x := 0; x < columnCount; x = x + 1 { 542 | params[x] = x 543 | builder.WriteString("?") 544 | if x < columnCount-1 { 545 | builder.WriteString(", ") 546 | } 547 | } 548 | builder.WriteString(")") 549 | query := builder.String() 550 | 551 | // Insert a record count equal to the requested benchmark operation count 552 | for x := 0; x < n; x = x + 1 { 553 | params[0] = x 554 | _, err = db.Exec(query, params...) 555 | require.NoError(b, err) 556 | } 557 | } 558 | 559 | // Prepare the operation query 560 | var query string 561 | var params []any 562 | switch op { 563 | case OpInsert: 564 | builder.Reset() 565 | params = make([]any, columnCount) 566 | builder.WriteString(fmt.Sprintf("INSERT INTO %s VALUES (", testTableName)) 567 | for x := 0; x < columnCount; x = x + 1 { 568 | params[x] = x 569 | builder.WriteString("?") 570 | if x < columnCount-1 { 571 | builder.WriteString(", ") 572 | } 573 | } 574 | builder.WriteString(")") 575 | query = builder.String() 576 | case OpUpdate: 577 | builder.Reset() 578 | params = make([]any, columnCount) 579 | builder.WriteString(fmt.Sprintf("UPDATE %s SET ", testTableName)) 580 | for x := 1; x < columnCount; x = x + 1 { 581 | // Set all the columns to a value greater than the number of rows 582 | // inserted during setup. This ensures that all columns are modified 583 | // and that no row is modified twice. 584 | params[x-1] = n + 1 585 | builder.WriteString(fmt.Sprintf("col%d = ?", x)) 586 | if x < columnCount-1 { 587 | builder.WriteString(", ") 588 | } 589 | } 590 | builder.WriteString(" WHERE col0 = ?") 591 | params[len(params)-1] = 0 592 | query = builder.String() 593 | case OpDelete: 594 | query = fmt.Sprintf("DELETE FROM %s WHERE col0 = ?", testTableName) 595 | params = make([]any, 1) 596 | } 597 | 598 | return query, params 599 | } 600 | 601 | func generateRecords(t tOrB, db *sql.DB, n int, offset int) { 602 | t.Helper() 603 | 604 | tx, err := db.Begin() 605 | require.NoError(t, err) 606 | defer tx.Rollback() 607 | 608 | textValue := "foo" 609 | blobValue := []byte{0xDE, 0xAD, 0xBE, 0xAF} 610 | realValue := 3.14 611 | numericValue := 1 612 | for x := 0; x < n; x = x + 1 { 613 | intValue := x + offset 614 | _, err := tx.Exec( 615 | `INSERT INTO `+testTableName+` VALUES ( 616 | ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 617 | ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 618 | ?, ?, ?, ?, ?, ?, ? 619 | )`, 620 | intValue, intValue, intValue, intValue, intValue, intValue, intValue, intValue, intValue, 621 | textValue, textValue, textValue, textValue, textValue, textValue, textValue, textValue, 622 | blobValue, 623 | realValue, realValue, realValue, realValue, 624 | numericValue, numericValue, numericValue, numericValue, numericValue, 625 | ) 626 | require.NoError(t, err) 627 | } 628 | 629 | require.NoError(t, tx.Commit()) 630 | } 631 | 632 | func createTable(t tOrB, db *sql.DB) { 633 | t.Helper() 634 | _, err := db.Exec(sqlCreateTestTable) 635 | require.NoError(t, err) 636 | } 637 | 638 | func createTableWithoutRowID(t tOrB, db *sql.DB) { 639 | t.Helper() 640 | _, err := db.Exec(sqlCreateTestTable + " WITHOUT ROWID") 641 | require.NoError(t, err) 642 | } 643 | 644 | const testTableName = "test" 645 | const sqlCreateTestTable = `CREATE TABLE ` + testTableName + ` ( 646 | a INT, 647 | b INTEGER, 648 | c TINYINT, 649 | d SMALLINT, 650 | e MEDIUMINT, 651 | f BIGINT, 652 | g UNSIGNED BIG INT, 653 | h INT2, 654 | i INT8, 655 | 656 | j CHARACTER(20), 657 | k VARCHAR(255), 658 | l VARYING CHARACTER(255), 659 | m NCHAR(55), 660 | n NATIVE CHARACTER(70), 661 | o NVARCHAR(100), 662 | p TEXT, 663 | q CLOB, 664 | 665 | r BLOB, 666 | 667 | s REAL, 668 | t DOUBLE, 669 | u DOUBLE PRECISION, 670 | v FLOAT, 671 | 672 | w NUMERIC, 673 | x DECIMAL(10,5), 674 | y BOOLEAN, 675 | z DATE, 676 | aa DATETIME, 677 | 678 | PRIMARY KEY (a,b,c) 679 | )` 680 | 681 | type tOrB interface { 682 | Errorf(format string, args ...interface{}) 683 | FailNow() 684 | Helper() 685 | TempDir() string 686 | } 687 | 688 | type testCDC struct { 689 | db *sql.DB 690 | cdc CDC 691 | awake chan<- SignalEvent 692 | } 693 | 694 | func (c *testCDC) Cleanup() { 695 | _ = c.db.Close() 696 | _ = c.cdc.Close(context.Background()) 697 | } 698 | 699 | func newTestCDC(t tOrB, handler ChangesHandler, options ...Option) *testCDC { 700 | t.Helper() 701 | db := testDB(t) 702 | awake := make(chan SignalEvent) 703 | signal, err := NewChannelSignal(awake) 704 | require.NoError(t, err) 705 | options = append(options, WithSignal(signal)) 706 | cdc, err := NewTriggerEngine(db, handler, []string{testTableName}, options...) 707 | require.NoError(t, err) 708 | return &testCDC{ 709 | db: db, 710 | cdc: cdc, 711 | awake: awake, 712 | } 713 | } 714 | 715 | func testDB(t tOrB) *sql.DB { 716 | t.Helper() 717 | dir := t.TempDir() 718 | 719 | db, err := sql.Open("sqlite", filepath.Join(dir, "test.sqlite")+"?_pragma=journal_mode(wal)&_pragma=busy_timeout(5000)&_pragma=synchronous(normal)&_pragma=foreign_keys(on)") 720 | require.NoError(t, err) 721 | return db 722 | } 723 | 724 | func waitForChanges(t tOrB, h *handler, expectedBatches int, expectedChanges int, timeout time.Duration) { 725 | t.Helper() 726 | 727 | results := make([]Changes, 0, expectedBatches) 728 | totalChanges := 0 729 | start := time.Now() 730 | didTimeout := false 731 | for len(results) < expectedBatches && totalChanges < expectedChanges { 732 | if time.Since(start) > timeout { 733 | didTimeout = true 734 | break 735 | } 736 | results = append(results, h.Changes()...) 737 | if len(results) > 0 { 738 | totalChanges = totalChanges + len(results[len(results)-1]) 739 | } 740 | time.Sleep(5 * time.Millisecond) 741 | } 742 | require.False(t, didTimeout, "CDC did not complete in time. wanted %d but got %d", expectedChanges, totalChanges) 743 | } 744 | 745 | type handler struct { 746 | changes []Changes 747 | lock sync.Locker 748 | } 749 | 750 | func newHandler() *handler { 751 | return &handler{ 752 | lock: &sync.Mutex{}, 753 | } 754 | } 755 | 756 | func (h *handler) Changes() []Changes { 757 | h.lock.Lock() 758 | defer h.lock.Unlock() 759 | 760 | changes := h.changes 761 | h.changes = nil 762 | return changes 763 | } 764 | 765 | func (h *handler) HandleChanges(ctx context.Context, changes Changes) error { 766 | h.lock.Lock() 767 | defer h.lock.Unlock() 768 | h.changes = append(h.changes, changes) 769 | return nil 770 | } 771 | 772 | type handlerNull struct{} 773 | 774 | func (h *handlerNull) HandleChanges(ctx context.Context, changes Changes) error { 775 | return nil 776 | } 777 | -------------------------------------------------------------------------------- /triggers.go: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2024 Kevin Conway 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | package cdc 5 | 6 | import ( 7 | "context" 8 | "database/sql" 9 | "fmt" 10 | "strings" 11 | "sync" 12 | "time" 13 | ) 14 | 15 | const ( 16 | defaultLogTableName = "__cdc_log" 17 | defaultMaxBatchSize = 50 18 | ) 19 | 20 | type Option func(*TriggerEngine) error 21 | 22 | // WithLogTableName specifies the name of the log table. This defaults to 23 | // __cdc_log but may be customized if needed. 24 | func WithLogTableName(name string) Option { 25 | return func(t *TriggerEngine) error { 26 | t.logTableName = name 27 | return nil 28 | } 29 | } 30 | 31 | // WithMaxBatchSize specifies the maximum number of changes to process in a 32 | // single batch. This defaults to 50. 33 | func WithMaxBatchSize(size int) Option { 34 | return func(t *TriggerEngine) error { 35 | t.maxBatchSize = size 36 | return nil 37 | } 38 | } 39 | 40 | // WithoutSubsecondTime can disable the use of subsecond timestamps in the log 41 | // table. This is only needed for old versions of SQLite and should be avoided 42 | // otherwise. 43 | func WithoutSubsecondTime(v bool) Option { 44 | return func(t *TriggerEngine) error { 45 | t.subsec = !v 46 | return nil 47 | } 48 | } 49 | 50 | // WithBlobSupport can enable or disable the storage of BLOB columns in the log 51 | // table. This defaults to false because of the performance impacts of encoding 52 | // BLOB type data. 53 | func WithBlobSupport(v bool) Option { 54 | return func(t *TriggerEngine) error { 55 | t.blobs = v 56 | return nil 57 | } 58 | } 59 | 60 | // WithSignal installs a custom awakening signal that triggers the inspection 61 | // of the log table when in CDC mode. The default signal is a combination of 62 | // a filesystem watcher that signals when the SQLite files have changed and a 63 | // 250ms timer used as a backstop for any missed filesystem events. 64 | func WithSignal(signal Signal) Option { 65 | return func(t *TriggerEngine) error { 66 | t.signal = signal 67 | return nil 68 | } 69 | } 70 | 71 | // NewTriggerEngine returns a CDC implementation based on table triggers. 72 | // 73 | // This implementation works with any SQLite driver and uses only SQL operations 74 | // to implement CDC. For each specified table to monitor, the implementation 75 | // creates triggers for AFTER INSERT, AFTER UPDATE, and AFTER DELETE. These 76 | // triggers populate a log table, named __cdc_log by default. The log table 77 | // entries contain effectively identical information as the Change struct. 78 | // 79 | // The before and after images are stored as JSON objects in the log table. The 80 | // JSON objects are generated from the column names and values in the table. 81 | // 82 | // See the TriggerEngine documentation for more details. 83 | func NewTriggerEngine(db *sql.DB, handler ChangesHandler, tables []string, options ...Option) (CDC, error) { 84 | meta, err := newDBMeta(db) 85 | if err != nil { 86 | return nil, err 87 | } 88 | result := &TriggerEngine{ 89 | db: db, 90 | meta: meta, 91 | handler: handler, 92 | tables: tables, 93 | fnOnce: &sync.Once{}, 94 | closed: make(chan any), 95 | closeOnce: &sync.Once{}, 96 | logTableName: defaultLogTableName, 97 | maxBatchSize: defaultMaxBatchSize, 98 | subsec: true, 99 | blobs: false, 100 | } 101 | for _, opt := range options { 102 | if err := opt(result); err != nil { 103 | return nil, err 104 | } 105 | } 106 | 107 | if result.signal == nil { 108 | fsSignal, err := NewFSNotifySignal(db) 109 | if err != nil { 110 | return nil, fmt.Errorf("failed to create filesystem wake signal: %w", err) 111 | } 112 | timeSignal, err := NewTimeSignal(250 * time.Millisecond) 113 | if err != nil { 114 | return nil, fmt.Errorf("failed to create time wake signal: %w", err) 115 | } 116 | signal, err := NewMultiSignal(fsSignal, timeSignal) 117 | if err != nil { 118 | return nil, fmt.Errorf("failed to create multi wake signal: %w", err) 119 | } 120 | result.signal = signal 121 | } 122 | 123 | return result, nil 124 | } 125 | 126 | // TriggerEngine implements CDC using table triggers. 127 | // 128 | // This implementation targets a specified set of tables and captures changes by 129 | // using AFTER triggers to populate a change log table. The setup and teardown 130 | // methods manage both the triggers and the log table. Currently, all target 131 | // tables must be set up and torn down together and cannot be targeted 132 | // individually. 133 | // 134 | // The bootstrap mode is implemented by selecting batches of records from target 135 | // tables. These are passed through to the bound ChangesHandler as they are 136 | // selected. Each table bootstrap begins with the specified BOOTSTRAP operation 137 | // event. Because this implementation of CDC uses table triggers and a 138 | // persistent chang log table, a bootstrap is usually only needed once after 139 | // running setup. If your system encounters a critical fault and needs to 140 | // rebuild state from a bootstrap then you can safely run bootstrap again. 141 | // However, subsequent runs of bootstrap mode do not clear the change log table. 142 | // 143 | // The cdc mode is implemented by selecting batches of records from the change 144 | // log table. The order of the log selection matches the natural sort order of 145 | // the table which, itself, matches the order in which changes were made to the 146 | // data. The frequency with which cdc mode checks for changes is determined by 147 | // the bound Signal implementation. The default signal is a combination of a 148 | // filesystem watcher and a time based interval. The filesystem watcher detects 149 | // changes to the underlying SQLite files with the intent to handle changes as 150 | // quickly as possible once they are persisted. However, the filesystem watcher 151 | // is not infallible so a time based interval signal is included as a backstop. 152 | // Generally, you are recommended to have some form of time based interval 153 | // signal to augment any other signal choices. 154 | // 155 | // By default, all change log entries are recorded with a millisecond precision 156 | // timestamp. This precision is only available in SQLite 3.42.0 and later. If 157 | // any system accessing the SQLite database is older than 3.42.0 then you must 158 | // disable the subsecond timestamp with the WithoutSubsecondTime option. 159 | // 160 | // By default, support for BLOB data is disabled and BLOB type columns are not 161 | // included in change log records due to the performance impacts of encoding 162 | // BLOB type data. If you need to handle BLOB type data then you must enable 163 | // BLOB support with the WithBlobSupport option. Note, however, that this 164 | // implementations identification of BLOB data is based on the declared column 165 | // type and not the underlying data type. Any BLOB data in a non-BLOB column 166 | // will cause a fault in this implementation. You are strongly recommended to 167 | // use STRICT tables to avoid accidental BLOB data in a non-BLOB column. 168 | type TriggerEngine struct { 169 | db *sql.DB 170 | meta *dbMeta 171 | handler ChangesHandler 172 | tables []string 173 | fnOnce *sync.Once 174 | signal Signal 175 | closed chan any 176 | closeOnce *sync.Once 177 | logTableName string 178 | maxBatchSize int 179 | subsec bool 180 | blobs bool 181 | } 182 | 183 | func (c *TriggerEngine) CDC(ctx context.Context) error { 184 | var err error 185 | c.fnOnce.Do(func() { 186 | err = c.cdc(ctx) 187 | }) 188 | return err 189 | } 190 | 191 | func (c *TriggerEngine) cdc(ctx context.Context) error { 192 | if err := c.signal.Start(ctx); err != nil { 193 | return fmt.Errorf("failed to start signal: %w", err) 194 | } 195 | 196 | waker := c.signal.Waker() 197 | for { 198 | select { 199 | case <-c.closed: 200 | return nil 201 | case <-ctx.Done(): 202 | return c.Close(ctx) 203 | case event, ok := <-waker: 204 | if !ok { 205 | return c.Close(ctx) 206 | } 207 | if event.Err != nil { 208 | _ = c.Close(ctx) 209 | return fmt.Errorf("wake signal error: %w", event.Err) 210 | } 211 | if !event.Wake { 212 | continue 213 | } 214 | if err := c.drainChanges(ctx); err != nil { 215 | return fmt.Errorf("%w: failed to process changes from the log", err) 216 | } 217 | } 218 | } 219 | } 220 | 221 | func (c *TriggerEngine) drainChanges(ctx context.Context) error { 222 | changes := make(Changes, 0, c.maxBatchSize) 223 | for { 224 | rows, err := c.db.QueryContext(ctx, `SELECT id, timestamp, tablename, operation, before, after FROM `+c.logTableName+` ORDER BY id ASC LIMIT ?`, c.maxBatchSize) //nolint:gosec 225 | if err != nil { 226 | return fmt.Errorf("%w: failed to select changes from the log", err) 227 | } 228 | defer rows.Close() 229 | maxID := new(int64) 230 | for rows.Next() { 231 | timestamp := new(string) 232 | table := new(string) 233 | operation := new(string) 234 | before := &sql.NullString{} 235 | after := &sql.NullString{} 236 | if err := rows.Scan(maxID, timestamp, table, operation, before, after); err != nil { 237 | return fmt.Errorf("%w: failed to read change record from the log", err) 238 | } 239 | ts, err := time.Parse("2006-01-02 15:04:05.999999999", *timestamp) 240 | if err != nil { 241 | return fmt.Errorf("%w: failed to parse timestamp %s from the log", err, *timestamp) 242 | } 243 | ch := Change{ 244 | Timestamp: ts, 245 | Table: *table, 246 | Operation: strToOperation(*operation), 247 | } 248 | if before.Valid { 249 | ch.Before = []byte(before.String) 250 | } 251 | if after.Valid { 252 | ch.After = []byte(after.String) 253 | } 254 | changes = append(changes, ch) 255 | } 256 | if err := rows.Err(); err != nil { 257 | return fmt.Errorf("%w: failed to read changes from the log", err) 258 | } 259 | if len(changes) < 1 { 260 | return nil 261 | } 262 | if err := c.handle(ctx, changes); err != nil { 263 | return fmt.Errorf("%w: failed to handle changes", err) 264 | } 265 | changes = changes[:0] 266 | tx, err := c.db.BeginTx(ctx, nil) 267 | if err != nil { 268 | return fmt.Errorf("%w: failed to create transaction to delete logs", err) 269 | } 270 | defer tx.Rollback() 271 | 272 | _, err = tx.ExecContext(ctx, `DELETE FROM `+c.logTableName+` WHERE id <= ?`, *maxID) //nolint:gosec 273 | if err != nil { 274 | return fmt.Errorf("%w: failed to delete handled logs", err) 275 | } 276 | if err = tx.Commit(); err != nil { 277 | return fmt.Errorf("%w: failed to commit deletion of logs", err) 278 | } 279 | } 280 | } 281 | 282 | func (c *TriggerEngine) Bootstrap(ctx context.Context) error { 283 | var err error 284 | c.fnOnce.Do(func() { 285 | err = c.bootstrap(ctx) 286 | }) 287 | return err 288 | } 289 | 290 | func (c *TriggerEngine) bootstrap(ctx context.Context) error { 291 | for _, table := range c.tables { 292 | if err := c.bootstrapTable(ctx, table); err != nil { 293 | return fmt.Errorf("%w: failed to bootstrap table %s", err, table) 294 | } 295 | } 296 | return nil 297 | } 298 | 299 | func (c *TriggerEngine) bootstrapTable(ctx context.Context, table string) error { 300 | t, ok := c.meta.Tables[table] 301 | if !ok { 302 | return fmt.Errorf("table %q not found in database", table) 303 | } 304 | q := sqlSelectFirst(t, c.blobs) 305 | rows, err := c.db.QueryContext(ctx, q, c.maxBatchSize-1) 306 | if err != nil { 307 | return fmt.Errorf("%w: failed to select first bootstrap rows for %s", err, table) 308 | } 309 | defer rows.Close() 310 | chs := make(Changes, 0, c.maxBatchSize) 311 | chs = append(chs, Change{ 312 | Table: table, 313 | Timestamp: time.Now(), 314 | Operation: Bootstrap, 315 | }) 316 | selections := append(sqlKeyValuesForTable(t), new(string)) 317 | for rows.Next() { 318 | if err := rows.Scan(selections...); err != nil { 319 | return fmt.Errorf("%w: failed to read bootstrap row for %s", err, table) 320 | } 321 | body := selections[len(selections)-1].(*string) 322 | chs = append(chs, Change{ 323 | Table: table, 324 | Timestamp: time.Now(), 325 | Operation: Insert, 326 | After: []byte(*body), 327 | }) 328 | } 329 | if rows.Err() != nil { 330 | return fmt.Errorf("%w: failed to read bootstrap rows for %s", rows.Err(), table) 331 | } 332 | _ = rows.Close() 333 | if len(chs) < 1 { 334 | return nil 335 | } 336 | if len(chs) < c.maxBatchSize { 337 | return c.handle(ctx, chs) 338 | } 339 | if err := c.handle(ctx, chs); err != nil { 340 | return fmt.Errorf("%w: failed to handle bootstrap changes for %s", err, table) 341 | } 342 | keys := make([]any, len(selections)-1) 343 | copy(keys, selections[:len(selections)-1]) 344 | params := make([]any, len(keys)+1) 345 | for { 346 | q = sqlSelectNext(t, c.blobs) 347 | copy(params, keys) 348 | params[len(params)-1] = c.maxBatchSize 349 | rows, err = c.db.QueryContext(ctx, q, params...) 350 | if err != nil { 351 | return fmt.Errorf("%w: failed to select bootstrap rows for %s", err, table) 352 | } 353 | defer rows.Close() 354 | chs = chs[:0] 355 | for rows.Next() { 356 | selections = append(sqlKeyValuesForTable(t), new(string)) 357 | if err := rows.Scan(selections...); err != nil { 358 | return fmt.Errorf("%w: failed to read bootstrap row for %s", err, table) 359 | } 360 | body := selections[len(selections)-1].(*string) 361 | chs = append(chs, Change{ 362 | Table: table, 363 | Timestamp: time.Now(), 364 | Operation: Insert, 365 | After: []byte(*body), 366 | }) 367 | copy(keys, selections[:len(selections)-1]) 368 | } 369 | if rows.Err() != nil { 370 | return fmt.Errorf("%w: failed to read bootstrap rows for %s", rows.Err(), table) 371 | } 372 | _ = rows.Close() 373 | if len(chs) < 1 { 374 | return nil 375 | } 376 | if len(chs) < c.maxBatchSize { 377 | return c.handle(ctx, chs) 378 | } 379 | if err := c.handle(ctx, chs); err != nil { 380 | return fmt.Errorf("%w: failed to handle bootstrap changes for %s", err, table) 381 | } 382 | } 383 | } 384 | 385 | func (c *TriggerEngine) BootstrapAndCDC(ctx context.Context) error { 386 | var err error 387 | c.fnOnce.Do(func() { 388 | err = c.bootstrap(ctx) 389 | if err != nil { 390 | return 391 | } 392 | err = c.cdc(ctx) 393 | }) 394 | return err 395 | } 396 | func (c *TriggerEngine) Setup(ctx context.Context) error { 397 | tx, err := c.db.BeginTx(ctx, nil) 398 | if err != nil { 399 | return fmt.Errorf("%w: failed to create setup transaction", err) 400 | } 401 | defer tx.Rollback() 402 | 403 | logSQL := sqlCreateLogTable(c.logTableName) 404 | if _, err = tx.Exec(logSQL); err != nil { 405 | return fmt.Errorf("%w: failed to create log table", err) 406 | } 407 | for _, table := range c.tables { 408 | t, ok := c.meta.Tables[table] 409 | if !ok { 410 | return fmt.Errorf("table %q not found in database", table) 411 | } 412 | if _, err = tx.Exec(sqlCreateTableTriggerInsert(c.logTableName, t, c.subsec, c.blobs)); err != nil { 413 | return fmt.Errorf("%w: failed to create table trigger for inserts on %s", err, table) 414 | } 415 | if _, err = tx.Exec(sqlCreateTableTriggerUpdate(c.logTableName, t, c.subsec, c.blobs)); err != nil { 416 | return fmt.Errorf("%w: failed to create table trigger for updates on %s", err, table) 417 | } 418 | if _, err = tx.Exec(sqlCreateTableTriggerDelete(c.logTableName, t, c.subsec, c.blobs)); err != nil { 419 | return fmt.Errorf("%w: failed to create table trigger for deletes on %s", err, table) 420 | } 421 | } 422 | if err = tx.Commit(); err != nil { 423 | return fmt.Errorf("%w: failed to commit setup transaction", err) 424 | } 425 | return nil 426 | } 427 | func (c *TriggerEngine) Teardown(ctx context.Context) error { 428 | tx, err := c.db.BeginTx(ctx, nil) 429 | if err != nil { 430 | return fmt.Errorf("%w: failed to create teardown transaction", err) 431 | } 432 | defer tx.Rollback() 433 | 434 | for _, table := range c.tables { 435 | t, ok := c.meta.Tables[table] 436 | if !ok { 437 | return fmt.Errorf("table %q not found in database", table) 438 | } 439 | if _, err = tx.Exec(sqlDeleteTableTriggerInsert(t)); err != nil { 440 | return fmt.Errorf("%w: failed to delete table trigger for inserts on %s", err, table) 441 | } 442 | if _, err = tx.Exec(sqlDeleteTableTriggerUpdate(t)); err != nil { 443 | return fmt.Errorf("%w: failed to delete table trigger for updates on %s", err, table) 444 | } 445 | if _, err = tx.Exec(sqlDeleteTableTriggerDelete(t)); err != nil { 446 | return fmt.Errorf("%w: failed to delete table trigger for deletes on %s", err, table) 447 | } 448 | } 449 | logSQL := sqlDeleteLogTable(c.logTableName) 450 | if _, err = tx.Exec(logSQL); err != nil { 451 | return fmt.Errorf("%w: failed to delete log table", err) 452 | } 453 | if err = tx.Commit(); err != nil { 454 | return fmt.Errorf("%w: failed to commit teardown transaction", err) 455 | } 456 | return nil 457 | } 458 | func (c *TriggerEngine) Close(ctx context.Context) error { 459 | var err error 460 | c.closeOnce.Do(func() { 461 | close(c.closed) 462 | if c.signal != nil { 463 | if cerr := c.signal.Close(); cerr != nil { 464 | err = fmt.Errorf("failed to close wake signal: %w", cerr) 465 | return 466 | } 467 | } 468 | }) 469 | return err 470 | } 471 | 472 | func (c *TriggerEngine) handle(ctx context.Context, changes Changes) error { 473 | return c.handler.HandleChanges(ctx, changes) 474 | } 475 | 476 | func sqlCreateLogTable(name string) string { 477 | return `CREATE TABLE IF NOT EXISTS ` + name + ` ( 478 | id INTEGER PRIMARY KEY, 479 | timestamp TEXT NOT NULL, 480 | tablename TEXT NOT NULL, 481 | operation TEXT NOT NULL, 482 | before TEXT, 483 | after TEXT 484 | )` 485 | } 486 | func sqlCreateTableTriggerInsert(logTable string, table tableMeta, subsec bool, blobs bool) string { 487 | return `CREATE TRIGGER IF NOT EXISTS ` + table.Name + `__cdc_insert AFTER INSERT ON ` + table.Name + ` BEGIN 488 | INSERT INTO ` + logTable + ` (timestamp, tablename, operation, before, after) VALUES 489 | (` + sqlDateTimeNow(subsec) + `, '` + table.Name + `', 'INSERT', NULL, ` + sqlJsonObject("NEW.", table.Columns, blobs) + `); 490 | END` 491 | } 492 | func sqlCreateTableTriggerUpdate(logTable string, table tableMeta, subsec bool, blobs bool) string { 493 | return `CREATE TRIGGER IF NOT EXISTS ` + table.Name + `__cdc_update AFTER UPDATE ON ` + table.Name + ` BEGIN 494 | INSERT INTO ` + logTable + ` (timestamp, tablename, operation, before, after) VALUES 495 | (` + sqlDateTimeNow(subsec) + `, '` + table.Name + `', 'UPDATE', ` + sqlJsonObject("OLD.", table.Columns, blobs) + `, ` + sqlJsonObject("NEW.", table.Columns, blobs) + `); 496 | END` 497 | } 498 | func sqlCreateTableTriggerDelete(logTable string, table tableMeta, subsec bool, blobs bool) string { 499 | return `CREATE TRIGGER IF NOT EXISTS ` + table.Name + `__cdc_delete AFTER DELETE ON ` + table.Name + ` BEGIN 500 | INSERT INTO ` + logTable + ` (timestamp, tablename, operation, before, after) VALUES 501 | (` + sqlDateTimeNow(subsec) + `, '` + table.Name + `', 'DELETE', ` + sqlJsonObject("OLD.", table.Columns, blobs) + `, NULL); 502 | END` 503 | } 504 | func sqlDateTimeNow(subsec bool) string { 505 | if subsec { 506 | return "datetime('now', 'subsec')" 507 | } 508 | return "datetime('now')" 509 | } 510 | func sqlDeleteTableTriggerInsert(table tableMeta) string { 511 | return `DROP TRIGGER IF EXISTS ` + table.Name + `__cdc_insert` 512 | } 513 | func sqlDeleteTableTriggerUpdate(table tableMeta) string { 514 | return `DROP TRIGGER IF EXISTS ` + table.Name + `__cdc_update` 515 | } 516 | func sqlDeleteTableTriggerDelete(table tableMeta) string { 517 | return `DROP TRIGGER IF EXISTS ` + table.Name + `__cdc_delete` 518 | } 519 | func sqlDeleteLogTable(table string) string { 520 | return `DROP TABLE IF EXISTS ` + table 521 | } 522 | 523 | const colChunkSize = 63 524 | 525 | func sqlJsonObject(prefix string, columns []columnMeta, blobs bool) string { 526 | objects := make([]string, 0, (len(columns)/colChunkSize)+1) 527 | var b strings.Builder 528 | b.WriteString("json_object(") 529 | for offset, column := range columns { 530 | if strings.ToUpper(column.Type) == "BLOB" { 531 | if blobs { 532 | b.WriteString(fmt.Sprintf("'%s'", column.Name)) 533 | b.WriteString(", hex(") 534 | b.WriteString(prefix + column.Name) 535 | b.WriteString(")") 536 | if (offset+1)%colChunkSize == 0 && offset < len(columns)-1 { 537 | b.WriteString(")") 538 | objects = append(objects, b.String()) 539 | b.Reset() 540 | b.WriteString("json_object(") 541 | continue 542 | } 543 | if offset < len(columns)-1 { 544 | b.WriteString(", ") 545 | } 546 | } 547 | continue 548 | } 549 | b.WriteString(fmt.Sprintf("'%s'", column.Name)) 550 | b.WriteString(", ") 551 | b.WriteString(prefix + column.Name) 552 | 553 | if (offset+1)%colChunkSize == 0 && offset < len(columns)-1 { 554 | b.WriteString(")") 555 | objects = append(objects, b.String()) 556 | b.Reset() 557 | b.WriteString("json_object(") 558 | continue 559 | } 560 | if offset < len(columns)-1 { 561 | b.WriteString(", ") 562 | } 563 | } 564 | b.WriteString(")") 565 | objects = append(objects, b.String()) 566 | b.Reset() 567 | 568 | if len(objects) == 1 { 569 | return objects[0] 570 | } 571 | for offset, object := range objects { 572 | if offset+1 == len(objects) { 573 | b.WriteString(object) 574 | for x := 1; x < len(objects); x = x + 1 { 575 | b.WriteString(")") 576 | } 577 | continue 578 | } 579 | b.WriteString("json_patch(") 580 | b.WriteString(object) 581 | b.WriteString(", ") 582 | } 583 | return b.String() 584 | } 585 | 586 | func sqlSelectFirst(table tableMeta, blobs bool) string { 587 | if !table.WithoutRowID { 588 | return `SELECT rowid, ` + sqlJsonObject("", table.Columns, blobs) + ` AS body FROM ` + table.Name + ` ORDER BY rowid LIMIT ?` 589 | } 590 | var keyCount int 591 | for _, column := range table.Columns { 592 | if column.PK != 0 { 593 | keyCount = keyCount + 1 594 | } 595 | } 596 | keyColumns := make([]string, keyCount) 597 | for _, column := range table.Columns { 598 | if column.PK != 0 { 599 | keyColumns[column.PK-1] = column.Name 600 | } 601 | } 602 | return `SELECT ` + strings.Join(keyColumns, ", ") + `, ` + sqlJsonObject("", table.Columns, blobs) + ` AS body FROM ` + table.Name + ` ORDER BY ` + strings.Join(keyColumns, ", ") + ` LIMIT ?` 603 | } 604 | 605 | func sqlSelectNext(table tableMeta, blobs bool) string { 606 | if !table.WithoutRowID { 607 | return `SELECT rowid, ` + sqlJsonObject("", table.Columns, blobs) + ` AS body FROM ` + table.Name + ` WHERE rowid > ? ORDER BY rowid LIMIT ?` 608 | } 609 | var keyCount int 610 | for _, column := range table.Columns { 611 | if column.PK != 0 { 612 | keyCount = keyCount + 1 613 | } 614 | } 615 | keyColumns := make([]string, keyCount) 616 | for _, column := range table.Columns { 617 | if column.PK != 0 { 618 | keyColumns[column.PK-1] = column.Name 619 | } 620 | } 621 | var b strings.Builder 622 | b.WriteString(`SELECT ` + strings.Join(keyColumns, ", ") + `, ` + sqlJsonObject("", table.Columns, blobs) + ` AS body FROM ` + table.Name) 623 | b.WriteString(` WHERE `) 624 | for offset, column := range keyColumns { 625 | b.WriteString(column) 626 | b.WriteString(" > ?") 627 | if offset < keyCount-1 { 628 | b.WriteString(" AND ") 629 | } 630 | } 631 | b.WriteString(` ORDER BY ` + strings.Join(keyColumns, ", ") + ` LIMIT ?`) 632 | 633 | return b.String() 634 | } 635 | 636 | func sqlKeyValuesForTable(table tableMeta) []any { 637 | if !table.WithoutRowID { 638 | return []any{new(int64)} 639 | } 640 | var keyCount int 641 | for _, column := range table.Columns { 642 | if column.PK != 0 { 643 | keyCount = keyCount + 1 644 | } 645 | } 646 | keyValues := make([]any, keyCount) 647 | for offset, column := range table.Columns { 648 | if column.PK != 0 { 649 | keyValues[offset] = new(any) 650 | } 651 | } 652 | return keyValues 653 | } 654 | 655 | func strToOperation(operation string) Operation { 656 | switch strings.ToUpper(operation) { 657 | case "INSERT": 658 | return Insert 659 | case "UPDATE": 660 | return Update 661 | case "DELETE": 662 | return Delete 663 | } 664 | return Operation("UNKNOWN") 665 | } 666 | --------------------------------------------------------------------------------