├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── 1-feature-request.yml │ ├── 2-bug.yml │ └── config.yml ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── dependabot-auto-merge-go.yml │ ├── lint.yml │ ├── project-automation.yml │ ├── release.yml │ ├── test.yml │ └── validate-generated-files.yml ├── .gitignore ├── .golangci.goheader.template ├── .golangci.yml ├── .goreleaser.yml ├── LICENSE.md ├── Makefile ├── README.md ├── cmd └── connector │ └── main.go ├── connector.go ├── connector.yaml ├── destination.go ├── destination └── config.go ├── destination_integration_test.go ├── go.mod ├── go.sum ├── internal ├── db_info.go ├── utils.go └── utils_test.go ├── scripts ├── bump_version.sh ├── common.sh └── tag.sh ├── source.go ├── source ├── config.go ├── config_test.go ├── cpool │ ├── cpool.go │ ├── json.go │ └── json_test.go ├── iterator.go ├── logrepl │ ├── cdc.go │ ├── cdc_test.go │ ├── cleaner.go │ ├── cleaner_test.go │ ├── combined.go │ ├── combined_test.go │ ├── handler.go │ ├── handler_test.go │ └── internal │ │ ├── error.go │ │ ├── publication.go │ │ ├── publication_test.go │ │ ├── relationset.go │ │ ├── relationset_test.go │ │ ├── replication_slot.go │ │ ├── replication_slot_test.go │ │ ├── subscription.go │ │ └── subscription_test.go ├── position │ ├── position.go │ ├── position_test.go │ └── type_string.go ├── schema │ ├── avro.go │ └── avro_test.go ├── snapshot │ ├── convert.go │ ├── fetch_worker.go │ ├── fetch_worker_test.go │ ├── iterator.go │ └── iterator_test.go └── types │ ├── numeric.go │ ├── types.go │ ├── types_test.go │ └── uuid.go ├── source_integration_test.go ├── test ├── conf.d │ └── postgresql.conf ├── docker-compose.yml └── helper.go └── tools ├── go.mod └── go.sum /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @ConduitIO/conduit-core 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/1-feature-request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Request a new feature. 3 | title: "Feature: " 4 | labels: [feature, triage] 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: Feature description 9 | description: A clear and concise description of what you want to happen and what problem will this solve. 10 | validations: 11 | required: true 12 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/2-bug.yml: -------------------------------------------------------------------------------- 1 | name: Bug 2 | description: Report a bug. 3 | title: "Bug: <title>" 4 | labels: [bug, triage] 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: Bug description 9 | description: A concise description of what you're experiencing and what you expected to happen instead. 10 | validations: 11 | required: true 12 | - type: textarea 13 | attributes: 14 | label: Steps to reproduce 15 | description: Steps to reproduce the behavior. 16 | placeholder: | 17 | 1. In this environment... 18 | 2. With this config... 19 | 3. Run '...' 20 | 4. See error... 21 | validations: 22 | required: true 23 | - type: input 24 | attributes: 25 | label: Version 26 | description: "Version of the Conduit connector as well as version of the Postgres connector you're using." 27 | placeholder: v0.1.0 28 | validations: 29 | required: true 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: ❓ Ask a Question 4 | url: https://github.com/ConduitIO/conduit/discussions 5 | about: Please ask and answer questions here. 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Docs: https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates 2 | version: 2 3 | updates: 4 | 5 | # Maintain dependencies for GitHub Actions 6 | - package-ecosystem: "github-actions" 7 | directory: "/" 8 | schedule: 9 | interval: "daily" 10 | commit-message: 11 | prefix: ".github:" 12 | 13 | # Maintain dependencies for Go 14 | - package-ecosystem: "gomod" 15 | directory: "/" 16 | schedule: 17 | interval: "daily" 18 | commit-message: 19 | prefix: "go.mod:" 20 | 21 | # Maintain dependencies for Go tools 22 | - package-ecosystem: "gomod" 23 | directory: "/tools" 24 | schedule: 25 | interval: "weekly" 26 | commit-message: 27 | prefix: "Go tools:" 28 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | 3 | Please include a summary of the change and what type of change it is (new feature, bug fix, refactoring, documentation). 4 | Please also include relevant motivation and context. 5 | List any dependencies that are required for this change. 6 | 7 | Fixes # (issue) 8 | 9 | ### Quick checks: 10 | 11 | - [ ] I have followed the [Code Guidelines](https://github.com/ConduitIO/conduit/blob/main/docs/code_guidelines.md). 12 | - [ ] There is no other [pull request](https://github.com/ConduitIO/conduit-connector-postgres/pulls) for the same update/change. 13 | - [ ] I have written unit tests. 14 | - [ ] I have made sure that the PR is of reasonable size and can be easily reviewed. 15 | -------------------------------------------------------------------------------- /.github/workflows/dependabot-auto-merge-go.yml: -------------------------------------------------------------------------------- 1 | # This action automatically merges dependabot PRs that update go dependencies (only patch and minor updates). 2 | # Based on: https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/automating-dependabot-with-github-actions#enable-auto-merge-on-a-pull-request 3 | 4 | name: Dependabot auto-merge 5 | on: 6 | pull_request: 7 | # Run this action when dependabot labels the PR, we care about the 'go' label. 8 | types: [labeled] 9 | 10 | permissions: 11 | pull-requests: write 12 | contents: write 13 | 14 | jobs: 15 | dependabot-go: 16 | runs-on: ubuntu-latest 17 | if: ${{ github.actor == 'dependabot[bot]' && contains(github.event.pull_request.labels.*.name, 'go') }} 18 | steps: 19 | - name: Dependabot metadata 20 | id: metadata 21 | uses: dependabot/fetch-metadata@v2.4.0 22 | with: 23 | github-token: "${{ secrets.GITHUB_TOKEN }}" 24 | 25 | - name: Approve PR 26 | # Approve only patch and minor updates 27 | if: ${{ steps.metadata.outputs.update-type != 'version-update:semver-major' }} 28 | run: gh pr review --approve "$PR_URL" 29 | env: 30 | PR_URL: ${{ github.event.pull_request.html_url }} 31 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 32 | 33 | - name: Enable auto-merge for Dependabot PRs 34 | # Enable auto-merging only for patch and minor updates 35 | if: ${{ steps.metadata.outputs.update-type != 'version-update:semver-major' }} 36 | run: gh pr merge --auto --squash "$PR_URL" 37 | env: 38 | PR_URL: ${{ github.event.pull_request.html_url }} 39 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 40 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | 8 | jobs: 9 | golangci-lint: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions/setup-go@v5 14 | with: 15 | go-version-file: 'go.mod' 16 | 17 | # This step sets up the variable steps.golangci-lint-version.outputs.v 18 | # to contain the version of golangci-lint (e.g. v1.54.2). 19 | # The version is taken from go.mod. 20 | - name: Golangci-lint version 21 | id: golangci-lint-version 22 | run: | 23 | GOLANGCI_LINT_VERSION=$( go list -modfile=tools/go.mod -m -f '{{.Version}}' github.com/golangci/golangci-lint/v2 ) 24 | echo "v=$GOLANGCI_LINT_VERSION" >> "$GITHUB_OUTPUT" 25 | 26 | - name: golangci-lint 27 | uses: golangci/golangci-lint-action@v8 28 | with: 29 | version: ${{ steps.golangci-lint-version.outputs.v }} 30 | -------------------------------------------------------------------------------- /.github/workflows/project-automation.yml: -------------------------------------------------------------------------------- 1 | name: project-management 2 | 3 | on: 4 | issues: 5 | types: [opened] 6 | 7 | jobs: 8 | project-mgmt: 9 | uses: ConduitIO/automation/.github/workflows/project-automation.yml@main 10 | secrets: 11 | project-automation-token: ${{ secrets.PROJECT_AUTOMATION }} 12 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | release: 13 | name: Release 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | with: 19 | fetch-depth: 0 20 | 21 | - name: Check Connector Tag 22 | uses: conduitio/automation/actions/check_connector_tag@main 23 | 24 | - name: Set up Go 25 | uses: actions/setup-go@v5 26 | with: 27 | go-version-file: 'go.mod' 28 | 29 | - name: Run GoReleaser 30 | uses: goreleaser/goreleaser-action@v6 31 | with: 32 | distribution: goreleaser 33 | version: latest 34 | args: release 35 | env: 36 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 37 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | 14 | - name: Set up Go 15 | uses: actions/setup-go@v5 16 | with: 17 | go-version-file: 'go.mod' 18 | 19 | - name: Test 20 | run: make test GOTEST_FLAGS="-v -count=1" 21 | -------------------------------------------------------------------------------- /.github/workflows/validate-generated-files.yml: -------------------------------------------------------------------------------- 1 | name: validate-generated-files 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | 8 | jobs: 9 | validate-generated-files: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | 14 | - name: Set up Go 15 | uses: actions/setup-go@v5 16 | with: 17 | go-version-file: 'go.mod' 18 | 19 | - name: Check generated files 20 | run: | 21 | export PATH=$PATH:$(go env GOPATH)/bin 22 | make install-tools generate 23 | git diff 24 | git diff --exit-code --numstat 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binary built with `make build` 2 | /conduit-connector-postgres 3 | .DS_Store 4 | .idea/ 5 | -------------------------------------------------------------------------------- /.golangci.goheader.template: -------------------------------------------------------------------------------- 1 | Copyright © {{ copyright-year }} Meroxa, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | linters: 3 | default: none 4 | enable: 5 | - bodyclose 6 | - copyloopvar 7 | - dogsled 8 | - durationcheck 9 | - errcheck 10 | - errname 11 | - forbidigo 12 | - goconst 13 | - gocritic 14 | - gocyclo 15 | - goheader 16 | - gomoddirectives 17 | - gomodguard 18 | - goprintffuncname 19 | - gosec 20 | - govet 21 | - ineffassign 22 | - makezero 23 | - noctx 24 | - nolintlint 25 | - predeclared 26 | - revive 27 | - rowserrcheck 28 | - sqlclosecheck 29 | - staticcheck 30 | - unconvert 31 | - unused 32 | - wastedassign 33 | - whitespace 34 | settings: 35 | forbidigo: 36 | forbid: 37 | - pattern: ^pgxpool\.New.*$ 38 | msg: Use github.com/conduitio/conduit-connector-postgres/source/cpool.New instead. 39 | gocyclo: 40 | min-complexity: 20 41 | goheader: 42 | values: 43 | regexp: 44 | copyright-year: 20[2-9]\d 45 | template-path: .golangci.goheader.template 46 | nolintlint: 47 | require-explanation: true 48 | require-specific: true 49 | allow-unused: false 50 | exclusions: 51 | generated: lax 52 | presets: 53 | - comments 54 | - common-false-positives 55 | - legacy 56 | - std-error-handling 57 | rules: 58 | - linters: 59 | - gosec 60 | path: test/helper\.go 61 | - linters: 62 | - forbidigo 63 | path: source/cpool/cpool\.go 64 | - linters: 65 | - goconst 66 | path: (.+)_test\.go 67 | paths: 68 | - third_party$ 69 | - builtin$ 70 | - examples$ 71 | formatters: 72 | enable: 73 | - gofmt 74 | - goimports 75 | settings: 76 | gofmt: 77 | simplify: false 78 | exclusions: 79 | generated: lax 80 | paths: 81 | - third_party$ 82 | - builtin$ 83 | - examples$ 84 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | builds: 3 | - main: ./cmd/connector/main.go 4 | goos: 5 | - darwin 6 | - linux 7 | - windows 8 | env: 9 | - CGO_ENABLED=0 10 | ldflags: 11 | - "-s -w -X 'github.com/conduitio/conduit-connector-postgres.version={{ .Tag }}'" 12 | checksum: 13 | name_template: checksums.txt 14 | archives: 15 | - name_template: >- 16 | {{ .ProjectName }}_ 17 | {{- .Version }}_ 18 | {{- title .Os }}_ 19 | {{- if eq .Arch "amd64" }}x86_64 20 | {{- else if eq .Arch "386" }}i386 21 | {{- else }}{{ .Arch }}{{ end }} 22 | changelog: 23 | sort: asc 24 | use: github 25 | filters: 26 | exclude: 27 | - '^docs:' 28 | - '^test:' 29 | - '^go.mod:' 30 | - '^.github:' 31 | - Merge branch 32 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Meroxa, Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION=$(shell git describe --tags --dirty --always) 2 | 3 | .PHONY: build 4 | build: 5 | go build -ldflags "-X 'github.com/conduitio/conduit-connector-postgres.version=${VERSION}'" -o conduit-connector-postgres cmd/connector/main.go 6 | 7 | .PHONY: test 8 | test: 9 | # run required docker containers, execute integration tests, stop containers after tests 10 | docker compose -f test/docker-compose.yml up --force-recreate --quiet-pull -d --wait 11 | go test $(GOTEST_FLAGS) -race ./...; ret=$$?; \ 12 | docker compose -f test/docker-compose.yml down --volumes; \ 13 | exit $$ret 14 | 15 | .PHONY: lint 16 | lint: 17 | golangci-lint run 18 | 19 | .PHONY: generate 20 | generate: 21 | go generate ./... 22 | conn-sdk-cli readmegen -w 23 | 24 | .PHONY: fmt 25 | fmt: 26 | gofumpt -l -w . 27 | gci write --skip-generated . 28 | 29 | .PHONY: install-tools 30 | install-tools: 31 | @echo Installing tools from tools/go.mod 32 | @go list -modfile=tools/go.mod tool | xargs -I % go list -modfile=tools/go.mod -f "%@{{.Module.Version}}" % | xargs -tI % go install % 33 | @go mod tidy 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Conduit Connector PostgreSQL 2 | 3 | The PostgreSQL connector is a [Conduit](https://github.com/ConduitIO/conduit) 4 | plugin. It provides both, a source and a destination PostgresSQL connector. 5 | 6 | <!-- readmegen:description --> 7 | ## Source 8 | 9 | The Postgres Source Connector connects to a database with the provided `url` and 10 | starts creating records for each change detected in the provided tables. 11 | 12 | Upon starting, the source takes a snapshot of the provided tables in the database, 13 | then switches into CDC mode. In CDC mode, the plugin reads from a buffer of CDC events. 14 | 15 | ### Snapshot 16 | 17 | When the connector first starts, snapshot mode is enabled. The connector acquires 18 | a read-only lock on the tables, and then reads all rows of the tables into Conduit. 19 | Once all rows in that initial snapshot are read the connector releases its lock and 20 | switches into CDC mode. 21 | 22 | This behavior is enabled by default, but can be turned off by adding 23 | `"snapshotMode": "never"` to the Source configuration. 24 | 25 | ### Change Data Capture 26 | 27 | This connector implements Change Data Capture (CDC) features for PostgreSQL by 28 | creating a logical replication slot and a publication that listens to changes in the 29 | configured tables. Every detected change is converted into a record. If there are no 30 | records available, the connector blocks until a record is available or the connector 31 | receives a stop signal. 32 | 33 | #### Logical Replication Configuration 34 | 35 | When the connector switches to CDC mode, it attempts to run the initial setup commands 36 | to create its logical replication slot and publication. It will connect to an existing 37 | slot if one with the configured name exists. 38 | 39 | The Postgres user specified in the connection URL must have sufficient privileges to 40 | run all of these setup commands, or it will fail. 41 | 42 | Example pipeline configuration that's using logical replication: 43 | 44 | ```yaml 45 | version: 2.2 46 | pipelines: 47 | - id: pg-to-log 48 | status: running 49 | connectors: 50 | - id: pg 51 | type: source 52 | plugin: builtin:postgres 53 | settings: 54 | url: "postgres://exampleuser:examplepass@localhost:5433/exampledb?sslmode=disable" 55 | tables: "users" 56 | cdcMode: "logrepl" 57 | logrepl.publicationName: "examplepub" 58 | logrepl.slotName: "exampleslot" 59 | - id: log 60 | type: destination 61 | plugin: builtin:log 62 | settings: 63 | level: info 64 | ``` 65 | 66 | :warning: When the connector or pipeline is deleted, the connector will automatically 67 | attempt to delete the replication slot and publication. This is the default behaviour 68 | and can be disabled by setting `logrepl.autoCleanup` to `false`. 69 | 70 | ### Key Handling 71 | 72 | The connector will automatically look up the primary key column for the specified tables 73 | and use them as the key value. If that can't be determined, the connector will return 74 | an error. 75 | 76 | ## Destination 77 | 78 | The Postgres Destination takes a Conduit record and stores it using a SQL statement. 79 | The Destination is designed to handle different payloads and keys. Because of this, 80 | each record is individually parsed and upserted. 81 | 82 | ### Handling record operations 83 | 84 | Based on the `Operation` field in the record, the destination will either insert, 85 | update or delete the record in the target table. Snapshot records are always inserted. 86 | 87 | If the target table already contains a record with the same key as a record being 88 | inserted, the record will be updated (upserted). This can overwrite and thus potentially 89 | lose data, so keys should be assigned correctly from the Source. 90 | 91 | If the target table does not contain a record with the same key as a record being 92 | deleted, the record will be ignored. 93 | 94 | If there is no key, the record will be simply appended. 95 | <!-- /readmegen:description --> 96 | 97 | ## Source Configuration Parameters 98 | 99 | <!-- readmegen:source.parameters.yaml --> 100 | ```yaml 101 | version: 2.2 102 | pipelines: 103 | - id: example 104 | status: running 105 | connectors: 106 | - id: example 107 | plugin: "postgres" 108 | settings: 109 | # Tables is a List of table names to read from, separated by a comma, 110 | # e.g.:"table1,table2". Use "*" if you'd like to listen to all tables. 111 | # Type: string 112 | # Required: yes 113 | tables: "" 114 | # URL is the connection string for the Postgres database. 115 | # Type: string 116 | # Required: yes 117 | url: "" 118 | # CDCMode determines how the connector should listen to changes. 119 | # Type: string 120 | # Required: no 121 | cdcMode: "auto" 122 | # LogreplAutoCleanup determines if the replication slot and 123 | # publication should be removed when the connector is deleted. 124 | # Type: bool 125 | # Required: no 126 | logrepl.autoCleanup: "true" 127 | # LogreplPublicationName determines the publication name in case the 128 | # connector uses logical replication to listen to changes (see 129 | # CDCMode). 130 | # Type: string 131 | # Required: no 132 | logrepl.publicationName: "conduitpub" 133 | # LogreplSlotName determines the replication slot name in case the 134 | # connector uses logical replication to listen to changes (see 135 | # CDCMode). Can only contain lower-case letters, numbers, and the 136 | # underscore character. 137 | # Type: string 138 | # Required: no 139 | logrepl.slotName: "conduitslot" 140 | # WithAvroSchema determines whether the connector should attach an 141 | # avro schema on each record. 142 | # Type: bool 143 | # Required: no 144 | logrepl.withAvroSchema: "true" 145 | # Snapshot fetcher size determines the number of rows to retrieve at a 146 | # time. 147 | # Type: int 148 | # Required: no 149 | snapshot.fetchSize: "50000" 150 | # SnapshotMode is whether the plugin will take a snapshot of the 151 | # entire table before starting cdc mode. 152 | # Type: string 153 | # Required: no 154 | snapshotMode: "initial" 155 | # Maximum delay before an incomplete batch is read from the source. 156 | # Type: duration 157 | # Required: no 158 | sdk.batch.delay: "0" 159 | # Maximum size of batch before it gets read from the source. 160 | # Type: int 161 | # Required: no 162 | sdk.batch.size: "0" 163 | # Specifies whether to use a schema context name. If set to false, no 164 | # schema context name will be used, and schemas will be saved with the 165 | # subject name specified in the connector (not safe because of name 166 | # conflicts). 167 | # Type: bool 168 | # Required: no 169 | sdk.schema.context.enabled: "true" 170 | # Schema context name to be used. Used as a prefix for all schema 171 | # subject names. If empty, defaults to the connector ID. 172 | # Type: string 173 | # Required: no 174 | sdk.schema.context.name: "" 175 | # Whether to extract and encode the record key with a schema. 176 | # Type: bool 177 | # Required: no 178 | sdk.schema.extract.key.enabled: "false" 179 | # The subject of the key schema. If the record metadata contains the 180 | # field "opencdc.collection" it is prepended to the subject name and 181 | # separated with a dot. 182 | # Type: string 183 | # Required: no 184 | sdk.schema.extract.key.subject: "key" 185 | # Whether to extract and encode the record payload with a schema. 186 | # Type: bool 187 | # Required: no 188 | sdk.schema.extract.payload.enabled: "false" 189 | # The subject of the payload schema. If the record metadata contains 190 | # the field "opencdc.collection" it is prepended to the subject name 191 | # and separated with a dot. 192 | # Type: string 193 | # Required: no 194 | sdk.schema.extract.payload.subject: "payload" 195 | # The type of the payload schema. 196 | # Type: string 197 | # Required: no 198 | sdk.schema.extract.type: "avro" 199 | ``` 200 | <!-- /readmegen:source.parameters.yaml --> 201 | 202 | ## Destination Configuration Parameters 203 | 204 | <!-- readmegen:destination.parameters.yaml --> 205 | ```yaml 206 | version: 2.2 207 | pipelines: 208 | - id: example 209 | status: running 210 | connectors: 211 | - id: example 212 | plugin: "postgres" 213 | settings: 214 | # URL is the connection string for the Postgres database. 215 | # Type: string 216 | # Required: yes 217 | url: "" 218 | # Key represents the column name for the key used to identify and 219 | # update existing rows. 220 | # Type: string 221 | # Required: no 222 | key: "" 223 | # Table is used as the target table into which records are inserted. 224 | # Type: string 225 | # Required: no 226 | table: "{{ index .Metadata "opencdc.collection" }}" 227 | # Maximum delay before an incomplete batch is written to the 228 | # destination. 229 | # Type: duration 230 | # Required: no 231 | sdk.batch.delay: "0" 232 | # Maximum size of batch before it gets written to the destination. 233 | # Type: int 234 | # Required: no 235 | sdk.batch.size: "0" 236 | # Allow bursts of at most X records (0 or less means that bursts are 237 | # not limited). Only takes effect if a rate limit per second is set. 238 | # Note that if `sdk.batch.size` is bigger than `sdk.rate.burst`, the 239 | # effective batch size will be equal to `sdk.rate.burst`. 240 | # Type: int 241 | # Required: no 242 | sdk.rate.burst: "0" 243 | # Maximum number of records written per second (0 means no rate 244 | # limit). 245 | # Type: float 246 | # Required: no 247 | sdk.rate.perSecond: "0" 248 | # The format of the output record. See the Conduit documentation for a 249 | # full list of supported formats 250 | # (https://conduit.io/docs/using/connectors/configuration-parameters/output-format). 251 | # Type: string 252 | # Required: no 253 | sdk.record.format: "opencdc/json" 254 | # Options to configure the chosen output record format. Options are 255 | # normally key=value pairs separated with comma (e.g. 256 | # opt1=val2,opt2=val2), except for the `template` record format, where 257 | # options are a Go template. 258 | # Type: string 259 | # Required: no 260 | sdk.record.format.options: "" 261 | # Whether to extract and decode the record key with a schema. 262 | # Type: bool 263 | # Required: no 264 | sdk.schema.extract.key.enabled: "true" 265 | # Whether to extract and decode the record payload with a schema. 266 | # Type: bool 267 | # Required: no 268 | sdk.schema.extract.payload.enabled: "true" 269 | ``` 270 | <!-- /readmegen:destination.parameters.yaml --> 271 | 272 | ## Testing 273 | 274 | Run `make test` to run all the unit and integration tests, which require Docker 275 | to be installed and running. The command will handle starting and stopping 276 | docker containers for you. 277 | 278 | ## References 279 | 280 | - https://github.com/bitnami/bitnami-docker-postgresql-repmgr 281 | - https://github.com/Masterminds/squirrel 282 | 283 | ![scarf pixel](https://static.scarf.sh/a.png?x-pxid=1423de19-24e7-4d64-91cf-0b893ca28cc6) 284 | -------------------------------------------------------------------------------- /cmd/connector/main.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | postgres "github.com/conduitio/conduit-connector-postgres" 19 | sdk "github.com/conduitio/conduit-connector-sdk" 20 | ) 21 | 22 | func main() { 23 | sdk.Serve(postgres.Connector) 24 | } 25 | -------------------------------------------------------------------------------- /connector.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:generate conn-sdk-cli specgen 16 | 17 | package postgres 18 | 19 | import ( 20 | _ "embed" 21 | 22 | sdk "github.com/conduitio/conduit-connector-sdk" 23 | ) 24 | 25 | //go:embed connector.yaml 26 | var specs string 27 | 28 | var version = "(devel)" 29 | 30 | var Connector = sdk.Connector{ 31 | NewSpecification: sdk.YAMLSpecification(specs, version), 32 | NewSource: NewSource, 33 | NewDestination: NewDestination, 34 | } 35 | -------------------------------------------------------------------------------- /connector.yaml: -------------------------------------------------------------------------------- 1 | version: "1.0" 2 | specification: 3 | name: postgres 4 | summary: Conduit connector for PostgreSQL 5 | description: | 6 | ## Source 7 | 8 | The Postgres Source Connector connects to a database with the provided `url` and 9 | starts creating records for each change detected in the provided tables. 10 | 11 | Upon starting, the source takes a snapshot of the provided tables in the database, 12 | then switches into CDC mode. In CDC mode, the plugin reads from a buffer of CDC events. 13 | 14 | ### Snapshot 15 | 16 | When the connector first starts, snapshot mode is enabled. The connector acquires 17 | a read-only lock on the tables, and then reads all rows of the tables into Conduit. 18 | Once all rows in that initial snapshot are read the connector releases its lock and 19 | switches into CDC mode. 20 | 21 | This behavior is enabled by default, but can be turned off by adding 22 | `"snapshotMode": "never"` to the Source configuration. 23 | 24 | ### Change Data Capture 25 | 26 | This connector implements Change Data Capture (CDC) features for PostgreSQL by 27 | creating a logical replication slot and a publication that listens to changes in the 28 | configured tables. Every detected change is converted into a record. If there are no 29 | records available, the connector blocks until a record is available or the connector 30 | receives a stop signal. 31 | 32 | #### Logical Replication Configuration 33 | 34 | When the connector switches to CDC mode, it attempts to run the initial setup commands 35 | to create its logical replication slot and publication. It will connect to an existing 36 | slot if one with the configured name exists. 37 | 38 | The Postgres user specified in the connection URL must have sufficient privileges to 39 | run all of these setup commands, or it will fail. 40 | 41 | Example pipeline configuration that's using logical replication: 42 | 43 | ```yaml 44 | version: 2.2 45 | pipelines: 46 | - id: pg-to-log 47 | status: running 48 | connectors: 49 | - id: pg 50 | type: source 51 | plugin: builtin:postgres 52 | settings: 53 | url: "postgres://exampleuser:examplepass@localhost:5433/exampledb?sslmode=disable" 54 | tables: "users" 55 | cdcMode: "logrepl" 56 | logrepl.publicationName: "examplepub" 57 | logrepl.slotName: "exampleslot" 58 | - id: log 59 | type: destination 60 | plugin: builtin:log 61 | settings: 62 | level: info 63 | ``` 64 | 65 | :warning: When the connector or pipeline is deleted, the connector will automatically 66 | attempt to delete the replication slot and publication. This is the default behaviour 67 | and can be disabled by setting `logrepl.autoCleanup` to `false`. 68 | 69 | ### Key Handling 70 | 71 | The connector will automatically look up the primary key column for the specified tables 72 | and use them as the key value. If that can't be determined, the connector will return 73 | an error. 74 | 75 | ## Destination 76 | 77 | The Postgres Destination takes a Conduit record and stores it using a SQL statement. 78 | The Destination is designed to handle different payloads and keys. Because of this, 79 | each record is individually parsed and upserted. 80 | 81 | ### Handling record operations 82 | 83 | Based on the `Operation` field in the record, the destination will either insert, 84 | update or delete the record in the target table. Snapshot records are always inserted. 85 | 86 | If the target table already contains a record with the same key as a record being 87 | inserted, the record will be updated (upserted). This can overwrite and thus potentially 88 | lose data, so keys should be assigned correctly from the Source. 89 | 90 | If the target table does not contain a record with the same key as a record being 91 | deleted, the record will be ignored. 92 | 93 | If there is no key, the record will be simply appended. 94 | version: v0.13.0 95 | author: Meroxa, Inc. 96 | source: 97 | parameters: 98 | - name: tables 99 | description: |- 100 | Tables is a List of table names to read from, separated by a comma, e.g.:"table1,table2". 101 | Use "*" if you'd like to listen to all tables. 102 | type: string 103 | default: "" 104 | validations: 105 | - type: required 106 | value: "" 107 | - name: url 108 | description: URL is the connection string for the Postgres database. 109 | type: string 110 | default: "" 111 | validations: 112 | - type: required 113 | value: "" 114 | - name: cdcMode 115 | description: CDCMode determines how the connector should listen to changes. 116 | type: string 117 | default: auto 118 | validations: 119 | - type: inclusion 120 | value: auto,logrepl 121 | - name: logrepl.autoCleanup 122 | description: |- 123 | LogreplAutoCleanup determines if the replication slot and publication should be 124 | removed when the connector is deleted. 125 | type: bool 126 | default: "true" 127 | validations: [] 128 | - name: logrepl.publicationName 129 | description: |- 130 | LogreplPublicationName determines the publication name in case the 131 | connector uses logical replication to listen to changes (see CDCMode). 132 | type: string 133 | default: conduitpub 134 | validations: [] 135 | - name: logrepl.slotName 136 | description: |- 137 | LogreplSlotName determines the replication slot name in case the 138 | connector uses logical replication to listen to changes (see CDCMode). 139 | Can only contain lower-case letters, numbers, and the underscore character. 140 | type: string 141 | default: conduitslot 142 | validations: 143 | - type: regex 144 | value: ^[a-z0-9_]+$ 145 | - name: logrepl.withAvroSchema 146 | description: |- 147 | WithAvroSchema determines whether the connector should attach an avro schema on each 148 | record. 149 | type: bool 150 | default: "true" 151 | validations: [] 152 | - name: snapshot.fetchSize 153 | description: Snapshot fetcher size determines the number of rows to retrieve at a time. 154 | type: int 155 | default: "50000" 156 | validations: [] 157 | - name: snapshotMode 158 | description: SnapshotMode is whether the plugin will take a snapshot of the entire table before starting cdc mode. 159 | type: string 160 | default: initial 161 | validations: 162 | - type: inclusion 163 | value: initial,never 164 | - name: sdk.batch.delay 165 | description: Maximum delay before an incomplete batch is read from the source. 166 | type: duration 167 | default: "0" 168 | validations: [] 169 | - name: sdk.batch.size 170 | description: Maximum size of batch before it gets read from the source. 171 | type: int 172 | default: "0" 173 | validations: 174 | - type: greater-than 175 | value: "-1" 176 | - name: sdk.schema.context.enabled 177 | description: |- 178 | Specifies whether to use a schema context name. If set to false, no schema context name will 179 | be used, and schemas will be saved with the subject name specified in the connector 180 | (not safe because of name conflicts). 181 | type: bool 182 | default: "true" 183 | validations: [] 184 | - name: sdk.schema.context.name 185 | description: |- 186 | Schema context name to be used. Used as a prefix for all schema subject names. 187 | If empty, defaults to the connector ID. 188 | type: string 189 | default: "" 190 | validations: [] 191 | - name: sdk.schema.extract.key.enabled 192 | description: Whether to extract and encode the record key with a schema. 193 | type: bool 194 | default: "false" 195 | validations: [] 196 | - name: sdk.schema.extract.key.subject 197 | description: |- 198 | The subject of the key schema. If the record metadata contains the field 199 | "opencdc.collection" it is prepended to the subject name and separated 200 | with a dot. 201 | type: string 202 | default: key 203 | validations: [] 204 | - name: sdk.schema.extract.payload.enabled 205 | description: Whether to extract and encode the record payload with a schema. 206 | type: bool 207 | default: "false" 208 | validations: [] 209 | - name: sdk.schema.extract.payload.subject 210 | description: |- 211 | The subject of the payload schema. If the record metadata contains the 212 | field "opencdc.collection" it is prepended to the subject name and 213 | separated with a dot. 214 | type: string 215 | default: payload 216 | validations: [] 217 | - name: sdk.schema.extract.type 218 | description: The type of the payload schema. 219 | type: string 220 | default: avro 221 | validations: 222 | - type: inclusion 223 | value: avro 224 | destination: 225 | parameters: 226 | - name: url 227 | description: URL is the connection string for the Postgres database. 228 | type: string 229 | default: "" 230 | validations: 231 | - type: required 232 | value: "" 233 | - name: key 234 | description: Key represents the column name for the key used to identify and update existing rows. 235 | type: string 236 | default: "" 237 | validations: [] 238 | - name: table 239 | description: Table is used as the target table into which records are inserted. 240 | type: string 241 | default: '{{ index .Metadata "opencdc.collection" }}' 242 | validations: [] 243 | - name: sdk.batch.delay 244 | description: Maximum delay before an incomplete batch is written to the destination. 245 | type: duration 246 | default: "0" 247 | validations: [] 248 | - name: sdk.batch.size 249 | description: Maximum size of batch before it gets written to the destination. 250 | type: int 251 | default: "0" 252 | validations: 253 | - type: greater-than 254 | value: "-1" 255 | - name: sdk.rate.burst 256 | description: |- 257 | Allow bursts of at most X records (0 or less means that bursts are not 258 | limited). Only takes effect if a rate limit per second is set. Note that 259 | if `sdk.batch.size` is bigger than `sdk.rate.burst`, the effective batch 260 | size will be equal to `sdk.rate.burst`. 261 | type: int 262 | default: "0" 263 | validations: 264 | - type: greater-than 265 | value: "-1" 266 | - name: sdk.rate.perSecond 267 | description: Maximum number of records written per second (0 means no rate limit). 268 | type: float 269 | default: "0" 270 | validations: 271 | - type: greater-than 272 | value: "-1" 273 | - name: sdk.record.format 274 | description: |- 275 | The format of the output record. See the Conduit documentation for a full 276 | list of supported formats (https://conduit.io/docs/using/connectors/configuration-parameters/output-format). 277 | type: string 278 | default: opencdc/json 279 | validations: [] 280 | - name: sdk.record.format.options 281 | description: |- 282 | Options to configure the chosen output record format. Options are normally 283 | key=value pairs separated with comma (e.g. opt1=val2,opt2=val2), except 284 | for the `template` record format, where options are a Go template. 285 | type: string 286 | default: "" 287 | validations: [] 288 | - name: sdk.schema.extract.key.enabled 289 | description: Whether to extract and decode the record key with a schema. 290 | type: bool 291 | default: "true" 292 | validations: [] 293 | - name: sdk.schema.extract.payload.enabled 294 | description: Whether to extract and decode the record payload with a schema. 295 | type: bool 296 | default: "true" 297 | validations: [] 298 | -------------------------------------------------------------------------------- /destination/config.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2023 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package destination 16 | 17 | import ( 18 | "bytes" 19 | "context" 20 | "fmt" 21 | "strings" 22 | "text/template" 23 | 24 | "github.com/Masterminds/sprig/v3" 25 | "github.com/conduitio/conduit-commons/opencdc" 26 | sdk "github.com/conduitio/conduit-connector-sdk" 27 | "github.com/jackc/pgx/v5" 28 | ) 29 | 30 | type TableFn func(opencdc.Record) (string, error) 31 | 32 | type Config struct { 33 | sdk.DefaultDestinationMiddleware 34 | 35 | // URL is the connection string for the Postgres database. 36 | URL string `json:"url" validate:"required"` 37 | // Table is used as the target table into which records are inserted. 38 | Table string `json:"table" default:"{{ index .Metadata \"opencdc.collection\" }}"` 39 | // Key represents the column name for the key used to identify and update existing rows. 40 | Key string `json:"key"` 41 | } 42 | 43 | func (c *Config) Validate(ctx context.Context) error { 44 | if _, err := pgx.ParseConfig(c.URL); err != nil { 45 | return fmt.Errorf("invalid url: %w", err) 46 | } 47 | 48 | if _, err := c.TableFunction(); err != nil { 49 | return fmt.Errorf("invalid table name or table function: %w", err) 50 | } 51 | 52 | err := c.DefaultDestinationMiddleware.Validate(ctx) 53 | if err != nil { 54 | return fmt.Errorf("middleware validation failed: %w", err) 55 | } 56 | 57 | return nil 58 | } 59 | 60 | // TableFunction returns a function that determines the table for each record individually. 61 | // The function might be returning a static table name. 62 | // If the table is neither static nor a template, an error is returned. 63 | func (c *Config) TableFunction() (f TableFn, err error) { 64 | // Not a template, i.e. it's a static table name 65 | if !strings.HasPrefix(c.Table, "{{") && !strings.HasSuffix(c.Table, "}}") { 66 | return func(_ opencdc.Record) (string, error) { 67 | return c.Table, nil 68 | }, nil 69 | } 70 | 71 | // Try to parse the table 72 | t, err := template.New("table").Funcs(sprig.FuncMap()).Parse(c.Table) 73 | if err != nil { 74 | // The table is not a valid Go template. 75 | return nil, fmt.Errorf("table is neither a valid static table nor a valid Go template: %w", err) 76 | } 77 | 78 | // The table is a valid template, return TableFn. 79 | var buf bytes.Buffer 80 | return func(r opencdc.Record) (string, error) { 81 | buf.Reset() 82 | if err := t.Execute(&buf, r); err != nil { 83 | return "", fmt.Errorf("failed to execute table template: %w", err) 84 | } 85 | return buf.String(), nil 86 | }, nil 87 | } 88 | -------------------------------------------------------------------------------- /destination_integration_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package postgres 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "math/big" 21 | "strings" 22 | "testing" 23 | 24 | "github.com/conduitio/conduit-commons/opencdc" 25 | "github.com/conduitio/conduit-connector-postgres/test" 26 | sdk "github.com/conduitio/conduit-connector-sdk" 27 | "github.com/google/go-cmp/cmp" 28 | "github.com/jackc/pgx/v5" 29 | "github.com/matryer/is" 30 | ) 31 | 32 | func TestDestination_Write(t *testing.T) { 33 | is := is.New(t) 34 | ctx := test.Context(t) 35 | conn := test.ConnectSimple(ctx, t, test.RegularConnString) 36 | 37 | // tables with capital letters should be quoted 38 | tableName := strings.ToUpper(test.RandomIdentifier(t)) 39 | test.SetupTestTableWithName(ctx, t, conn, tableName) 40 | 41 | d := NewDestination() 42 | err := sdk.Util.ParseConfig( 43 | ctx, 44 | map[string]string{ 45 | "url": test.RegularConnString, 46 | "table": "{{ index .Metadata \"opencdc.collection\" }}", 47 | }, 48 | d.Config(), 49 | Connector.NewSpecification().DestinationParams, 50 | ) 51 | is.NoErr(err) 52 | 53 | err = d.Open(ctx) 54 | is.NoErr(err) 55 | defer func() { 56 | err := d.Teardown(ctx) 57 | is.NoErr(err) 58 | }() 59 | 60 | tests := []struct { 61 | name string 62 | record opencdc.Record 63 | }{ 64 | { 65 | name: "snapshot", 66 | record: opencdc.Record{ 67 | Position: opencdc.Position("foo"), 68 | Operation: opencdc.OperationSnapshot, 69 | Metadata: map[string]string{opencdc.MetadataCollection: tableName}, 70 | Key: opencdc.StructuredData{"id": 5000}, 71 | Payload: opencdc.Change{ 72 | After: opencdc.StructuredData{ 73 | "column1": "foo", 74 | "column2": 123, 75 | "column3": true, 76 | "column4": nil, 77 | "UppercaseColumn1": 222, 78 | }, 79 | }, 80 | }, 81 | }, 82 | { 83 | name: "create", 84 | record: opencdc.Record{ 85 | Position: opencdc.Position("foo"), 86 | Operation: opencdc.OperationCreate, 87 | Metadata: map[string]string{opencdc.MetadataCollection: tableName}, 88 | Key: opencdc.StructuredData{"id": 5}, 89 | Payload: opencdc.Change{ 90 | After: opencdc.StructuredData{ 91 | "column1": "foo", 92 | "column2": 456, 93 | "column3": false, 94 | "column4": nil, 95 | "UppercaseColumn1": 333, 96 | }, 97 | }, 98 | }, 99 | }, 100 | { 101 | name: "insert on update (upsert)", 102 | record: opencdc.Record{ 103 | Position: opencdc.Position("foo"), 104 | Operation: opencdc.OperationUpdate, 105 | Metadata: map[string]string{opencdc.MetadataCollection: tableName}, 106 | Key: opencdc.StructuredData{"id": 6}, 107 | Payload: opencdc.Change{ 108 | After: opencdc.StructuredData{ 109 | "column1": "bar", 110 | "column2": 567, 111 | "column3": true, 112 | "column4": nil, 113 | "UppercaseColumn1": 444, 114 | }, 115 | }, 116 | }, 117 | }, 118 | { 119 | name: "update on conflict", 120 | record: opencdc.Record{ 121 | Position: opencdc.Position("foo"), 122 | Operation: opencdc.OperationUpdate, 123 | Metadata: map[string]string{opencdc.MetadataCollection: tableName}, 124 | Key: opencdc.StructuredData{"id": 1}, 125 | Payload: opencdc.Change{ 126 | After: opencdc.StructuredData{ 127 | "column1": "foobar", 128 | "column2": 567, 129 | "column3": true, 130 | "column4": nil, 131 | "UppercaseColumn1": 555, 132 | }, 133 | }, 134 | }, 135 | }, 136 | { 137 | name: "delete", 138 | record: opencdc.Record{ 139 | Position: opencdc.Position("foo"), 140 | Metadata: map[string]string{opencdc.MetadataCollection: tableName}, 141 | Operation: opencdc.OperationDelete, 142 | Key: opencdc.StructuredData{"id": 4}, 143 | }, 144 | }, 145 | { 146 | name: "write a big.Rat", 147 | record: opencdc.Record{ 148 | Position: opencdc.Position("foo"), 149 | Operation: opencdc.OperationSnapshot, 150 | Metadata: map[string]string{opencdc.MetadataCollection: tableName}, 151 | Key: opencdc.StructuredData{"id": 123}, 152 | Payload: opencdc.Change{ 153 | After: opencdc.StructuredData{ 154 | "column1": "abcdef", 155 | "column2": 567, 156 | "column3": true, 157 | "column4": big.NewRat(123, 100), 158 | "UppercaseColumn1": 555, 159 | }, 160 | }, 161 | }, 162 | }, 163 | } 164 | for _, tt := range tests { 165 | t.Run(tt.name, func(t *testing.T) { 166 | is = is.New(t) 167 | id := tt.record.Key.(opencdc.StructuredData)["id"] 168 | 169 | i, err := d.Write(ctx, []opencdc.Record{tt.record}) 170 | is.NoErr(err) 171 | is.Equal(i, 1) 172 | 173 | got, err := queryTestTable(ctx, conn, tableName, id) 174 | switch tt.record.Operation { 175 | case opencdc.OperationCreate, opencdc.OperationSnapshot, opencdc.OperationUpdate: 176 | is.NoErr(err) 177 | is.Equal( 178 | "", 179 | cmp.Diff( 180 | tt.record.Payload.After, 181 | got, 182 | cmp.Comparer(func(x, y *big.Rat) bool { 183 | return x.Cmp(y) == 0 184 | }), 185 | ), 186 | ) // -want, +got 187 | case opencdc.OperationDelete: 188 | is.Equal(err, pgx.ErrNoRows) 189 | } 190 | }) 191 | } 192 | } 193 | 194 | func TestDestination_Batch(t *testing.T) { 195 | is := is.New(t) 196 | ctx := test.Context(t) 197 | conn := test.ConnectSimple(ctx, t, test.RegularConnString) 198 | 199 | tableName := strings.ToUpper(test.RandomIdentifier(t)) 200 | test.SetupTestTableWithName(ctx, t, conn, tableName) 201 | 202 | d := NewDestination() 203 | 204 | err := sdk.Util.ParseConfig( 205 | ctx, 206 | map[string]string{"url": test.RegularConnString, "table": tableName}, 207 | d.Config(), 208 | Connector.NewSpecification().DestinationParams, 209 | ) 210 | is.NoErr(err) 211 | 212 | err = d.Open(ctx) 213 | is.NoErr(err) 214 | defer func() { 215 | err := d.Teardown(ctx) 216 | is.NoErr(err) 217 | }() 218 | 219 | records := []opencdc.Record{ 220 | { 221 | Position: opencdc.Position("foo1"), 222 | Operation: opencdc.OperationCreate, 223 | Key: opencdc.StructuredData{"id": 5}, 224 | Payload: opencdc.Change{ 225 | After: opencdc.StructuredData{ 226 | "column1": "foo1", 227 | "column2": 1, 228 | "column3": false, 229 | "column4": nil, 230 | "UppercaseColumn1": 111, 231 | }, 232 | }, 233 | }, 234 | { 235 | Position: opencdc.Position("foo2"), 236 | Operation: opencdc.OperationCreate, 237 | Key: opencdc.StructuredData{"id": 6}, 238 | Payload: opencdc.Change{ 239 | After: opencdc.StructuredData{ 240 | "column1": "foo2", 241 | "column2": 2, 242 | "column3": true, 243 | "column4": nil, 244 | "UppercaseColumn1": 222, 245 | }, 246 | }, 247 | }, 248 | { 249 | Position: opencdc.Position("foo3"), 250 | Operation: opencdc.OperationCreate, 251 | Key: opencdc.StructuredData{"id": 7}, 252 | Payload: opencdc.Change{ 253 | After: opencdc.StructuredData{ 254 | "column1": "foo3", 255 | "column2": 3, 256 | "column3": false, 257 | "column4": nil, 258 | "UppercaseColumn1": 333, 259 | }, 260 | }, 261 | }, 262 | } 263 | 264 | i, err := d.Write(ctx, records) 265 | is.NoErr(err) 266 | is.Equal(i, len(records)) 267 | 268 | for _, rec := range records { 269 | got, err := queryTestTable(ctx, conn, tableName, rec.Key.(opencdc.StructuredData)["id"]) 270 | is.NoErr(err) 271 | is.Equal(rec.Payload.After, got) 272 | } 273 | } 274 | 275 | func queryTestTable(ctx context.Context, conn test.Querier, tableName string, id any) (opencdc.StructuredData, error) { 276 | row := conn.QueryRow( 277 | ctx, 278 | fmt.Sprintf(`SELECT column1, column2, column3, column4, "UppercaseColumn1" FROM %q WHERE id = $1`, tableName), 279 | id, 280 | ) 281 | 282 | var ( 283 | col1 string 284 | col2 int 285 | col3 bool 286 | col4Str *string 287 | uppercaseCol1 int 288 | ) 289 | 290 | err := row.Scan(&col1, &col2, &col3, &col4Str, &uppercaseCol1) 291 | if err != nil { 292 | return nil, err 293 | } 294 | 295 | // Handle the potential nil case for col4 296 | var col4 interface{} 297 | if col4Str != nil { 298 | r := new(big.Rat) 299 | r.SetString(*col4Str) 300 | col4 = r 301 | } 302 | 303 | return opencdc.StructuredData{ 304 | "column1": col1, 305 | "column2": col2, 306 | "column3": col3, 307 | "column4": col4, 308 | "UppercaseColumn1": uppercaseCol1, 309 | }, nil 310 | } 311 | -------------------------------------------------------------------------------- /internal/db_info.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2025 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | 22 | "github.com/jackc/pgx/v5" 23 | ) 24 | 25 | // DbInfo provides information about tables in a database. 26 | type DbInfo struct { 27 | conn *pgx.Conn 28 | cache map[string]*tableCache 29 | } 30 | 31 | // tableCache stores information about a table. 32 | // The information is cached and refreshed every 'cacheExpiration'. 33 | type tableCache struct { 34 | columns map[string]int 35 | } 36 | 37 | func NewDbInfo(conn *pgx.Conn) *DbInfo { 38 | return &DbInfo{ 39 | conn: conn, 40 | cache: map[string]*tableCache{}, 41 | } 42 | } 43 | 44 | func (d *DbInfo) GetNumericColumnScale(ctx context.Context, table string, column string) (int, error) { 45 | // Check if table exists in cache and is not expired 46 | tableInfo, ok := d.cache[table] 47 | if ok { 48 | scale, ok := tableInfo.columns[column] 49 | if ok { 50 | return scale, nil 51 | } 52 | } else { 53 | // Table info has expired, refresh the cache 54 | d.cache[table] = &tableCache{ 55 | columns: map[string]int{}, 56 | } 57 | } 58 | 59 | // Fetch scale from database 60 | scale, err := d.numericScaleFromDb(ctx, table, column) 61 | if err != nil { 62 | return 0, err 63 | } 64 | 65 | d.cache[table].columns[column] = scale 66 | 67 | return scale, nil 68 | } 69 | 70 | func (d *DbInfo) numericScaleFromDb(ctx context.Context, table string, column string) (int, error) { 71 | // Query to get the column type and numeric scale 72 | query := ` 73 | SELECT 74 | data_type, 75 | numeric_scale 76 | FROM 77 | information_schema.columns 78 | WHERE 79 | table_name = $1 80 | AND column_name = $2 81 | ` 82 | 83 | var dataType string 84 | var numericScale *int 85 | 86 | err := d.conn.QueryRow(ctx, query, table, column).Scan(&dataType, &numericScale) 87 | if err != nil { 88 | if errors.Is(err, pgx.ErrNoRows) { 89 | return 0, fmt.Errorf("column %s not found in table %s", column, table) 90 | } 91 | return 0, fmt.Errorf("error querying column info: %w", err) 92 | } 93 | 94 | // Check if the column is of the numeric/decimal type 95 | if dataType != "numeric" && dataType != "decimal" { 96 | return 0, fmt.Errorf("column %s in table %s is not a numeric type (actual type: %s)", column, table, dataType) 97 | } 98 | 99 | // Handle case where numeric_scale is NULL 100 | if numericScale == nil { 101 | return 0, nil // The default scale is 0 when not specified 102 | } 103 | 104 | return *numericScale, nil 105 | } 106 | -------------------------------------------------------------------------------- /internal/utils.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2025 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "strconv" 19 | ) 20 | 21 | // WrapSQLIdent is used to wrap PostgreSQL identifier under quotes. 22 | // It allows to use uppercase letters and special characters (like `-`) in the 23 | // names of identifiers 24 | var WrapSQLIdent = strconv.Quote 25 | -------------------------------------------------------------------------------- /internal/utils_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2025 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "strings" 21 | "testing" 22 | 23 | "github.com/conduitio/conduit-connector-postgres/test" 24 | "github.com/matryer/is" 25 | ) 26 | 27 | func TestSQLIdentWrapping(t *testing.T) { 28 | is := is.New(t) 29 | ctx := test.Context(t) 30 | conn := test.ConnectSimple(ctx, t, test.RegularConnString) 31 | 32 | cases := []struct { 33 | ident string 34 | testName string 35 | expectError bool 36 | }{ 37 | {"just_a_name", "common case", false}, 38 | {"слон", "unicode chars", false}, 39 | {"test table", "spaces", false}, 40 | {"TEST_table", "uppercase letters", false}, 41 | {`'test_table'`, "single quotes", false}, 42 | {"tes`t_table", "apostrophe", false}, 43 | {`te"st_table`, "double quotes", true}, 44 | } 45 | 46 | for _, c := range cases { 47 | t.Run(c.testName, func(t *testing.T) { 48 | w := WrapSQLIdent(c.ident) 49 | 50 | t.Cleanup(func() { 51 | if c.expectError { 52 | return 53 | } 54 | 55 | query := fmt.Sprintf("DROP TABLE %s", w) 56 | _, err := conn.Exec(context.Background(), query) 57 | is.NoErr(err) 58 | }) 59 | 60 | query := fmt.Sprintf("CREATE TABLE %s (%s int)", w, w) 61 | _, err := conn.Exec(context.Background(), query) 62 | 63 | if c.expectError { 64 | is.True(err != nil) 65 | is.True(strings.Contains(err.Error(), `(SQLSTATE 42601)`)) // syntax error 66 | } else { 67 | is.NoErr(err) 68 | } 69 | }) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /scripts/bump_version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Copyright © 2025 Meroxa, Inc. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # Get the directory where the script is located 19 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 20 | 21 | source "${SCRIPT_DIR}/common.sh" 22 | 23 | TAG=$1 24 | 25 | if ! check_semver "$TAG"; then 26 | echo "$TAG is NOT a valid semver string" 27 | exit 1 28 | fi 29 | 30 | # Check if yq is installed 31 | if ! command -v yq &> /dev/null; then 32 | echo "Error: yq is not installed. Please install it and try again." 33 | exit 1 34 | fi 35 | 36 | V_TAG="v$TAG" 37 | 38 | BRANCH=$(git rev-parse --abbrev-ref HEAD) 39 | CURRENT_TAG=$(get_spec_version connector.yaml) 40 | MSG="You are about to bump the version from ${CURRENT_TAG} to ${V_TAG} on branch '${BRANCH}'.\n" 41 | while true; do 42 | printf "${MSG}" 43 | read -p "Are you sure you want to continue? [y/n]" yn 44 | echo 45 | case $yn in 46 | [Yy]* ) 47 | BRANCH_NAME="update-version-$V_TAG" 48 | git checkout -b "$BRANCH_NAME" 49 | yq e ".specification.version = \"${V_TAG}\"" -i connector.yaml 50 | git commit -am "Update version to $V_TAG" 51 | git push origin "$BRANCH_NAME" 52 | 53 | # Check if gh is installed 54 | if command -v gh &> /dev/null; then 55 | echo "Creating pull request..." 56 | gh pr create \ 57 | --base main \ 58 | --title "Update version to $V_TAG" \ 59 | --body "Automated version update to $V_TAG" \ 60 | --head "$BRANCH_NAME" 61 | else 62 | echo "GitHub CLI (gh) is not installed. To create a PR, please install gh or create it manually." 63 | echo "Branch '$BRANCH_NAME' has been pushed to origin." 64 | fi 65 | 66 | echo "Once the change has been merged, you can use scripts/tag.sh to push a new tag." 67 | break;; 68 | [Nn]* ) exit;; 69 | * ) echo "Please answer yes or no.";; 70 | esac 71 | done 72 | -------------------------------------------------------------------------------- /scripts/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright © 2025 Meroxa, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | check_semver() { 18 | local version=$1 19 | local SV_REGEX="^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-((0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?$" 20 | 21 | if ! [[ $version =~ $SV_REGEX ]]; then 22 | echo "$version is NOT a valid semver string" 23 | return 1 24 | fi 25 | return 0 26 | } 27 | 28 | get_spec_version() { 29 | local yaml_file=$1 30 | 31 | if command -v yq &> /dev/null; then 32 | yq '.specification.version' "$yaml_file" 33 | else 34 | sed -n '/specification:/,/version:/ s/.*version: //p' "$yaml_file" | tail -1 35 | fi 36 | } 37 | -------------------------------------------------------------------------------- /scripts/tag.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright © 2025 Meroxa, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Get the directory where the script is located 18 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 19 | 20 | source "${SCRIPT_DIR}/common.sh" 21 | 22 | HAS_UNCOMMITTED=$(git status --porcelain=v1 2>/dev/null | wc -l | awk '{print $1}') 23 | if (( $HAS_UNCOMMITTED != 0 )); then 24 | echo "You have uncommitted changes, cannot tag." 25 | exit 1 26 | fi 27 | 28 | LAST_COMMIT=$(git log -1 --oneline) 29 | BRANCH=$(git rev-parse --abbrev-ref HEAD) 30 | CURRENT_TAG=$(git describe --tags --abbrev=0) 31 | V_TAG=$(get_spec_version connector.yaml) 32 | MSG="You are about to bump the version from ${CURRENT_TAG} to ${V_TAG}. 33 | Current commit is '${LAST_COMMIT}' on branch '${BRANCH}'. 34 | The release process is automatic and quick, so if you make a mistake, 35 | everyone will see it very soon." 36 | 37 | while true; do 38 | printf "${MSG}" 39 | read -p "Are you sure you want to continue? [y/n]" yn 40 | echo 41 | case $yn in 42 | [Yy]* ) 43 | git tag -a $V_TAG -m "Release: $V_TAG" 44 | git push origin $V_TAG 45 | break;; 46 | [Nn]* ) exit;; 47 | * ) echo "Please answer yes or no.";; 48 | esac 49 | done 50 | -------------------------------------------------------------------------------- /source.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package postgres 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | "time" 22 | 23 | "github.com/conduitio/conduit-commons/config" 24 | "github.com/conduitio/conduit-commons/csync" 25 | "github.com/conduitio/conduit-commons/lang" 26 | "github.com/conduitio/conduit-commons/opencdc" 27 | "github.com/conduitio/conduit-connector-postgres/internal" 28 | "github.com/conduitio/conduit-connector-postgres/source" 29 | "github.com/conduitio/conduit-connector-postgres/source/cpool" 30 | "github.com/conduitio/conduit-connector-postgres/source/logrepl" 31 | sdk "github.com/conduitio/conduit-connector-sdk" 32 | "github.com/jackc/pgx/v5" 33 | "github.com/jackc/pgx/v5/pgxpool" 34 | ) 35 | 36 | // Source is a Postgres source plugin. 37 | type Source struct { 38 | sdk.UnimplementedSource 39 | 40 | iterator source.Iterator 41 | config source.Config 42 | pool *pgxpool.Pool 43 | tableKeys map[string]string 44 | } 45 | 46 | func (s *Source) Config() sdk.SourceConfig { 47 | return &s.config 48 | } 49 | 50 | func NewSource() sdk.Source { 51 | return sdk.SourceWithMiddleware( 52 | &Source{ 53 | tableKeys: make(map[string]string), 54 | config: source.Config{ 55 | DefaultSourceMiddleware: sdk.DefaultSourceMiddleware{ 56 | // disable schema extraction by default, postgres will build its own schema 57 | SourceWithSchemaExtraction: sdk.SourceWithSchemaExtraction{ 58 | PayloadEnabled: lang.Ptr(false), 59 | KeyEnabled: lang.Ptr(false), 60 | }, 61 | }, 62 | }, 63 | }, 64 | ) 65 | } 66 | 67 | func (s *Source) Open(ctx context.Context, pos opencdc.Position) error { 68 | pool, err := cpool.New(ctx, s.config.URL) 69 | if err != nil { 70 | return fmt.Errorf("failed to create a connection pool to database: %w", err) 71 | } 72 | s.pool = pool 73 | 74 | logger := sdk.Logger(ctx) 75 | if s.readingAllTables() { 76 | logger.Info().Msg("Detecting all tables...") 77 | s.config.Tables, err = s.getAllTables(ctx) 78 | if err != nil { 79 | return fmt.Errorf("failed to connect to get all tables: %w", err) 80 | } 81 | logger.Info(). 82 | Strs("tables", s.config.Tables). 83 | Int("count", len(s.config.Tables)). 84 | Msg("Successfully detected tables") 85 | } 86 | 87 | // ensure we have keys for all tables 88 | for _, tableName := range s.config.Tables { 89 | s.tableKeys[tableName], err = s.getPrimaryKey(ctx, tableName) 90 | if err != nil { 91 | return fmt.Errorf("failed to find primary key for table %s: %w", tableName, err) 92 | } 93 | } 94 | 95 | switch s.config.CDCMode { 96 | case source.CDCModeAuto: 97 | // TODO add logic that checks if the DB supports logical replication 98 | // (since that's the only thing we support at the moment) 99 | fallthrough 100 | case source.CDCModeLogrepl: 101 | i, err := logrepl.NewCombinedIterator(ctx, s.pool, logrepl.Config{ 102 | Position: pos, 103 | SlotName: s.config.LogreplSlotName, 104 | PublicationName: s.config.LogreplPublicationName, 105 | Tables: s.config.Tables, 106 | TableKeys: s.tableKeys, 107 | WithSnapshot: s.config.SnapshotMode == source.SnapshotModeInitial, 108 | WithAvroSchema: s.config.WithAvroSchema, 109 | BatchSize: *s.config.BatchSize, 110 | }) 111 | if err != nil { 112 | return fmt.Errorf("failed to create logical replication iterator: %w", err) 113 | } 114 | s.iterator = i 115 | default: 116 | // shouldn't happen, config was validated 117 | return fmt.Errorf("unsupported CDC mode %q", s.config.CDCMode) 118 | } 119 | return nil 120 | } 121 | 122 | func (s *Source) ReadN(ctx context.Context, n int) ([]opencdc.Record, error) { 123 | return s.iterator.NextN(ctx, n) 124 | } 125 | 126 | func (s *Source) Ack(ctx context.Context, pos opencdc.Position) error { 127 | return s.iterator.Ack(ctx, pos) 128 | } 129 | 130 | func (s *Source) Teardown(ctx context.Context) error { 131 | logger := sdk.Logger(ctx) 132 | 133 | var errs []error 134 | if s.iterator != nil { 135 | logger.Debug().Msg("Tearing down iterator...") 136 | if err := s.iterator.Teardown(ctx); err != nil { 137 | logger.Warn().Err(err).Msg("Failed to tear down iterator") 138 | errs = append(errs, fmt.Errorf("failed to tear down iterator: %w", err)) 139 | } 140 | } 141 | if s.pool != nil { 142 | logger.Debug().Msg("Closing connection pool...") 143 | err := csync.RunTimeout(ctx, s.pool.Close, time.Minute) 144 | if err != nil { 145 | errs = append(errs, fmt.Errorf("failed to close DB connection pool: %w", err)) 146 | } 147 | } 148 | return errors.Join(errs...) 149 | } 150 | 151 | func (s *Source) LifecycleOnDeleted(ctx context.Context, cfg config.Config) error { 152 | var oldConfig source.Config 153 | err := sdk.Util.ParseConfig(ctx, cfg, &oldConfig, Connector.NewSpecification().SourceParams) 154 | if err != nil { 155 | return fmt.Errorf("lifecycle delete event: failed to parse configuration: %w", err) 156 | } 157 | 158 | switch oldConfig.CDCMode { 159 | case source.CDCModeAuto: 160 | fallthrough // TODO: Adjust as `auto` changes. 161 | case source.CDCModeLogrepl: 162 | if !oldConfig.LogreplAutoCleanup { 163 | sdk.Logger(ctx).Warn().Msg("Skipping logrepl auto cleanup") 164 | return nil 165 | } 166 | 167 | return logrepl.Cleanup(ctx, logrepl.CleanupConfig{ 168 | URL: oldConfig.URL, 169 | SlotName: oldConfig.LogreplSlotName, 170 | PublicationName: oldConfig.LogreplPublicationName, 171 | }) 172 | default: 173 | sdk.Logger(ctx).Warn().Msgf("cannot handle CDC mode %q", oldConfig.CDCMode) 174 | return nil 175 | } 176 | } 177 | 178 | func (s *Source) readingAllTables() bool { 179 | return len(s.config.Tables) == 1 && s.config.Tables[0] == source.AllTablesWildcard 180 | } 181 | 182 | func (s *Source) getAllTables(ctx context.Context) ([]string, error) { 183 | query := "SELECT tablename FROM pg_tables WHERE schemaname = 'public'" 184 | 185 | rows, err := s.pool.Query(ctx, query) 186 | if err != nil { 187 | return nil, err 188 | } 189 | defer rows.Close() 190 | 191 | var tables []string 192 | for rows.Next() { 193 | var tableName string 194 | if err := rows.Scan(&tableName); err != nil { 195 | return nil, fmt.Errorf("failed to scan table name: %w", err) 196 | } 197 | tables = append(tables, tableName) 198 | } 199 | if err := rows.Err(); err != nil { 200 | return nil, fmt.Errorf("rows error: %w", err) 201 | } 202 | return tables, nil 203 | } 204 | 205 | // getPrimaryKey queries the db for the name of the primary key column for a 206 | // table if one exists and returns it. 207 | func (s *Source) getPrimaryKey(ctx context.Context, tableName string) (string, error) { 208 | query := `SELECT a.attname FROM pg_index i 209 | JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) 210 | WHERE i.indrelid = $1::regclass AND i.indisprimary` 211 | 212 | rows, err := s.pool.Query(ctx, query, internal.WrapSQLIdent(tableName)) 213 | if err != nil { 214 | return "", fmt.Errorf("failed to query table keys: %w", err) 215 | } 216 | defer rows.Close() 217 | 218 | if !rows.Next() { 219 | if rows.Err() != nil { 220 | return "", fmt.Errorf("query failed: %w", rows.Err()) 221 | } 222 | return "", fmt.Errorf("no table keys found: %w", pgx.ErrNoRows) 223 | } 224 | 225 | var colName string 226 | err = rows.Scan(&colName) 227 | if err != nil { 228 | return "", fmt.Errorf("failed to scan row: %w", err) 229 | } 230 | 231 | if rows.Next() { 232 | // we only support single column primary keys for now 233 | return "", errors.New("composite keys are not supported") 234 | } 235 | 236 | return colName, nil 237 | } 238 | -------------------------------------------------------------------------------- /source/config.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package source 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | 22 | sdk "github.com/conduitio/conduit-connector-sdk" 23 | "github.com/jackc/pgx/v5" 24 | ) 25 | 26 | type SnapshotMode string 27 | 28 | const ( 29 | // SnapshotModeInitial creates a snapshot in the first run of the pipeline. 30 | SnapshotModeInitial SnapshotMode = "initial" 31 | // SnapshotModeNever skips snapshot creation altogether. 32 | SnapshotModeNever SnapshotMode = "never" 33 | ) 34 | 35 | type CDCMode string 36 | 37 | const ( 38 | // CDCModeAuto tries to set up logical replication and falls back to long 39 | // polling if that is impossible. 40 | CDCModeAuto CDCMode = "auto" 41 | // CDCModeLogrepl uses logical replication to listen to changes. 42 | CDCModeLogrepl CDCMode = "logrepl" 43 | 44 | // AllTablesWildcard can be used if you'd like to listen to all tables. 45 | AllTablesWildcard = "*" 46 | ) 47 | 48 | type Config struct { 49 | sdk.DefaultSourceMiddleware 50 | 51 | // URL is the connection string for the Postgres database. 52 | URL string `json:"url" validate:"required"` 53 | 54 | // Tables is a List of table names to read from, separated by a comma, e.g.:"table1,table2". 55 | // Use "*" if you'd like to listen to all tables. 56 | Tables []string `json:"tables" validate:"required"` 57 | 58 | // SnapshotMode is whether the plugin will take a snapshot of the entire table before starting cdc mode. 59 | SnapshotMode SnapshotMode `json:"snapshotMode" validate:"inclusion=initial|never" default:"initial"` 60 | 61 | // Snapshot fetcher size determines the number of rows to retrieve at a time. 62 | SnapshotFetchSize int `json:"snapshot.fetchSize" default:"50000"` 63 | 64 | // CDCMode determines how the connector should listen to changes. 65 | CDCMode CDCMode `json:"cdcMode" validate:"inclusion=auto|logrepl" default:"auto"` 66 | 67 | // LogreplPublicationName determines the publication name in case the 68 | // connector uses logical replication to listen to changes (see CDCMode). 69 | LogreplPublicationName string `json:"logrepl.publicationName" default:"conduitpub"` 70 | // LogreplSlotName determines the replication slot name in case the 71 | // connector uses logical replication to listen to changes (see CDCMode). 72 | // Can only contain lower-case letters, numbers, and the underscore character. 73 | LogreplSlotName string `json:"logrepl.slotName" validate:"regex=^[a-z0-9_]+$" default:"conduitslot"` 74 | 75 | // LogreplAutoCleanup determines if the replication slot and publication should be 76 | // removed when the connector is deleted. 77 | LogreplAutoCleanup bool `json:"logrepl.autoCleanup" default:"true"` 78 | 79 | // WithAvroSchema determines whether the connector should attach an avro schema on each 80 | // record. 81 | WithAvroSchema bool `json:"logrepl.withAvroSchema" default:"true"` 82 | } 83 | 84 | // Validate validates the provided config values. 85 | func (c *Config) Validate(ctx context.Context) error { 86 | var errs []error 87 | if _, err := pgx.ParseConfig(c.URL); err != nil { 88 | errs = append(errs, fmt.Errorf("invalid url: %w", err)) 89 | } 90 | 91 | err := c.DefaultSourceMiddleware.Validate(ctx) 92 | if err != nil { 93 | errs = append(errs, err) 94 | } 95 | 96 | return errors.Join(errs...) 97 | } 98 | -------------------------------------------------------------------------------- /source/config_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2023 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package source 16 | 17 | import ( 18 | "context" 19 | "testing" 20 | 21 | "github.com/matryer/is" 22 | ) 23 | 24 | func TestConfig_Validate(t *testing.T) { 25 | testCases := []struct { 26 | name string 27 | cfg Config 28 | wantErr bool 29 | }{ 30 | { 31 | name: "valid config", 32 | cfg: Config{ 33 | URL: "postgresql://meroxauser:meroxapass@127.0.0.1:5432/meroxadb", 34 | Tables: []string{"table1", "table2"}, 35 | CDCMode: CDCModeLogrepl, 36 | }, 37 | wantErr: false, 38 | }, { 39 | name: "invalid postgres url", 40 | cfg: Config{ 41 | URL: "postgresql", 42 | Tables: []string{"table1", "table2"}, 43 | CDCMode: CDCModeLogrepl, 44 | }, 45 | wantErr: true, 46 | }, 47 | } 48 | for _, tc := range testCases { 49 | t.Run(tc.name, func(t *testing.T) { 50 | is := is.New(t) 51 | err := tc.cfg.Validate(context.Background()) 52 | if tc.wantErr { 53 | is.True(err != nil) 54 | return 55 | } 56 | is.True(err == nil) 57 | }) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /source/cpool/cpool.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cpool 16 | 17 | import ( 18 | "context" 19 | "encoding/json" 20 | "fmt" 21 | 22 | "github.com/jackc/pgx/v5" 23 | "github.com/jackc/pgx/v5/pgtype" 24 | "github.com/jackc/pgx/v5/pgxpool" 25 | ) 26 | 27 | type replicationCtxKey struct{} 28 | 29 | func WithReplication(ctx context.Context) context.Context { 30 | return context.WithValue(ctx, replicationCtxKey{}, true) 31 | } 32 | 33 | // New returns new pgxpool.Pool with added hooks. 34 | func New(ctx context.Context, conninfo string) (*pgxpool.Pool, error) { 35 | config, err := pgxpool.ParseConfig(conninfo) 36 | if err != nil { 37 | return nil, fmt.Errorf("failed to parse pool config: %w", err) 38 | } 39 | 40 | config.BeforeAcquire = beforeAcquireHook 41 | config.BeforeConnect = beforeConnectHook 42 | config.AfterConnect = afterConnectHook 43 | config.AfterRelease = afterReleaseHook 44 | 45 | pool, err := pgxpool.NewWithConfig(ctx, config) 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | return pool, nil 51 | } 52 | 53 | func afterConnectHook(_ context.Context, conn *pgx.Conn) error { 54 | // Override the JSON and JSONB codec to return bytes rather than the 55 | // unmarshalled representation of map. 56 | conn.TypeMap().RegisterType(&pgtype.Type{ 57 | Name: "json", 58 | OID: pgtype.JSONOID, 59 | Codec: &pgtype.JSONCodec{Marshal: json.Marshal, Unmarshal: jsonNoopUnmarshal}, 60 | }) 61 | conn.TypeMap().RegisterType(&pgtype.Type{ 62 | Name: "jsonb", 63 | OID: pgtype.JSONBOID, 64 | Codec: &pgtype.JSONBCodec{Marshal: json.Marshal, Unmarshal: jsonNoopUnmarshal}, 65 | }) 66 | 67 | return nil 68 | } 69 | 70 | // beforeAcquireHook ensures purpose specific connections are returned: 71 | // * If a replication connection is requested, ensure the connection has replication enabled. 72 | // * If a regular connection is requested, return non-replication connections. 73 | func beforeAcquireHook(ctx context.Context, conn *pgx.Conn) bool { 74 | replReq := ctx.Value(replicationCtxKey{}) != nil 75 | replOn := conn.Config().RuntimeParams["replication"] != "" 76 | 77 | return replReq == replOn 78 | } 79 | 80 | // beforeConnectHook customizes the configuration of the new connection. 81 | func beforeConnectHook(ctx context.Context, config *pgx.ConnConfig) error { 82 | if config.RuntimeParams["application_name"] == "" { 83 | config.RuntimeParams["application_name"] = "conduit-connector-postgres" 84 | } 85 | 86 | if v := ctx.Value(replicationCtxKey{}); v != nil { 87 | config.RuntimeParams["replication"] = "database" 88 | } 89 | 90 | return nil 91 | } 92 | 93 | // afterReleaseHook marks all replication connections for disposal. 94 | func afterReleaseHook(conn *pgx.Conn) bool { 95 | return conn.Config().RuntimeParams["replication"] == "" 96 | } 97 | -------------------------------------------------------------------------------- /source/cpool/json.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cpool 16 | 17 | import ( 18 | "encoding/json" 19 | "reflect" 20 | ) 21 | 22 | // noopUnmarshal will copy source into dst. 23 | // this is to be used with the pgtype JSON codec 24 | func jsonNoopUnmarshal(src []byte, dst any) error { 25 | dstptr, ok := (dst.(*any)) 26 | if dst == nil || !ok { 27 | return &json.InvalidUnmarshalError{Type: reflect.TypeOf(dst)} 28 | } 29 | 30 | v := make([]byte, len(src)) 31 | copy(v, src) 32 | 33 | // set the slice to the value of the ptr. 34 | *dstptr = v 35 | 36 | return nil 37 | } 38 | -------------------------------------------------------------------------------- /source/cpool/json_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cpool 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/matryer/is" 21 | ) 22 | 23 | func Test_jsonNoopUnmarshal(t *testing.T) { 24 | is := is.New(t) 25 | 26 | var dst any 27 | data := []byte(`{"foo":"bar"}`) 28 | 29 | is.NoErr(jsonNoopUnmarshal(data, &dst)) 30 | is.Equal(data, dst.([]byte)) 31 | 32 | var err error 33 | 34 | err = jsonNoopUnmarshal(data, dst) 35 | is.True(err != nil) 36 | if err != nil { 37 | is.Equal(err.Error(), "json: Unmarshal(non-pointer []uint8)") 38 | } 39 | 40 | err = jsonNoopUnmarshal(data, nil) 41 | is.True(err != nil) 42 | if err != nil { 43 | is.Equal(err.Error(), "json: Unmarshal(nil)") 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /source/iterator.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package source 16 | 17 | import ( 18 | "context" 19 | 20 | "github.com/conduitio/conduit-commons/opencdc" 21 | "github.com/conduitio/conduit-connector-postgres/source/logrepl" 22 | ) 23 | 24 | // Iterator is an object that can iterate over a queue of records. 25 | type Iterator interface { 26 | // NextN takes and returns up to n records from the queue. NextN is allowed to 27 | // block until either at least one record is available or the context gets canceled. 28 | NextN(context.Context, int) ([]opencdc.Record, error) 29 | // Ack signals that a record at a specific position was successfully 30 | // processed. 31 | Ack(context.Context, opencdc.Position) error 32 | // Teardown attempts to gracefully teardown the iterator. 33 | Teardown(context.Context) error 34 | } 35 | 36 | var _ Iterator = (*logrepl.CDCIterator)(nil) 37 | -------------------------------------------------------------------------------- /source/logrepl/cdc.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package logrepl 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | "time" 22 | 23 | "github.com/conduitio/conduit-commons/opencdc" 24 | "github.com/conduitio/conduit-connector-postgres/source/logrepl/internal" 25 | "github.com/conduitio/conduit-connector-postgres/source/position" 26 | sdk "github.com/conduitio/conduit-connector-sdk" 27 | "github.com/jackc/pglogrepl" 28 | "github.com/jackc/pgx/v5/pgxpool" 29 | ) 30 | 31 | // CDCConfig holds configuration values for CDCIterator. 32 | type CDCConfig struct { 33 | LSN pglogrepl.LSN 34 | SlotName string 35 | PublicationName string 36 | Tables []string 37 | TableKeys map[string]string 38 | WithAvroSchema bool 39 | // BatchSize is the maximum size of a batch that will be read from the DB 40 | // in one go and processed by the CDCHandler. 41 | BatchSize int 42 | } 43 | 44 | // CDCIterator asynchronously listens for events from the logical replication 45 | // slot and returns them to the caller through NextN. 46 | type CDCIterator struct { 47 | config CDCConfig 48 | sub *internal.Subscription 49 | 50 | // batchesCh is a channel shared between this iterator and a CDCHandler, 51 | // to which the CDCHandler is sending batches of records. 52 | // Using a shared queue here would be the fastest option. However, 53 | // we also need to watch for a context that can get cancelled, 54 | // and for the subscription that can end, so using a channel is 55 | // the best option at the moment. 56 | batchesCh chan []opencdc.Record 57 | 58 | // recordsForNextRead contains records from the previous batch (returned by the CDCHandler), 59 | // that weren't return by this iterator's ReadN method. 60 | recordsForNextRead []opencdc.Record 61 | } 62 | 63 | // NewCDCIterator initializes logical replication by creating the publication and subscription manager. 64 | func NewCDCIterator(ctx context.Context, pool *pgxpool.Pool, c CDCConfig) (*CDCIterator, error) { 65 | if err := internal.CreatePublication( 66 | ctx, 67 | pool, 68 | c.PublicationName, 69 | internal.CreatePublicationOptions{Tables: c.Tables}, 70 | ); err != nil { 71 | // If creating the publication fails with code 42710, this means 72 | // the publication already exists. 73 | if !internal.IsPgDuplicateErr(err) { 74 | return nil, err 75 | } 76 | 77 | sdk.Logger(ctx).Warn(). 78 | Msgf("Publication %q already exists.", c.PublicationName) 79 | } 80 | 81 | // Using a buffered channel here so that the handler can send a batch 82 | // to the channel and start building a new batch. 83 | // This is useful when the first batch in the channel didn't reach BatchSize (which is sdk.batch.size). 84 | // The handler can prepare the next batch, and the CDCIterator can use them 85 | // to return the maximum number of records. 86 | batchesCh := make(chan []opencdc.Record, 1) 87 | handler := NewCDCHandler( 88 | ctx, 89 | internal.NewRelationSet(), 90 | c.TableKeys, 91 | batchesCh, 92 | c.WithAvroSchema, 93 | c.BatchSize, 94 | // todo make configurable 95 | time.Second, 96 | ) 97 | 98 | sub, err := internal.CreateSubscription( 99 | ctx, 100 | pool, 101 | c.SlotName, 102 | c.PublicationName, 103 | c.Tables, 104 | c.LSN, 105 | handler.Handle, 106 | ) 107 | if err != nil { 108 | return nil, fmt.Errorf("failed to initialize subscription: %w", err) 109 | } 110 | 111 | return &CDCIterator{ 112 | config: c, 113 | batchesCh: batchesCh, 114 | sub: sub, 115 | }, nil 116 | } 117 | 118 | // StartSubscriber starts the logical replication service in the background. 119 | // Blocks until the subscription becomes ready. 120 | func (i *CDCIterator) StartSubscriber(ctx context.Context) error { 121 | sdk.Logger(ctx).Info(). 122 | Str("slot", i.config.SlotName). 123 | Str("publication", i.config.PublicationName). 124 | Msgf("Starting logical replication at %s", i.sub.StartLSN) 125 | 126 | go func() { 127 | if err := i.sub.Run(ctx); err != nil { 128 | sdk.Logger(ctx).Error(). 129 | Err(err). 130 | Msg("replication exited with an error") 131 | } 132 | }() 133 | 134 | <-i.sub.Ready() 135 | 136 | sdk.Logger(ctx).Info(). 137 | Str("slot", i.config.SlotName). 138 | Str("publication", i.config.PublicationName). 139 | Msg("Logical replication started") 140 | 141 | return nil 142 | } 143 | 144 | // NextN returns up to n records from the internal channel with records. 145 | // NextN is allowed to block until either at least one record is available 146 | // or the context gets canceled. 147 | func (i *CDCIterator) NextN(ctx context.Context, n int) ([]opencdc.Record, error) { 148 | if !i.subscriberReady() { 149 | return nil, errors.New("logical replication has not been started") 150 | } 151 | 152 | if n <= 0 { 153 | return nil, fmt.Errorf("n must be greater than 0, got %d", n) 154 | } 155 | 156 | // First, we check if there are any records from the previous batch 157 | // that we can start with. 158 | recs := make([]opencdc.Record, len(i.recordsForNextRead), n) 159 | copy(recs, i.recordsForNextRead) 160 | i.recordsForNextRead = nil 161 | 162 | // NextN needs to wait until at least 1 record is available. 163 | if len(recs) == 0 { 164 | batch, err := i.nextRecordsBatchBlocking(ctx) 165 | if err != nil { 166 | return nil, fmt.Errorf("failed to fetch next batch of records (blocking): %w", err) 167 | } 168 | recs = batch 169 | } 170 | 171 | // We add any already available batches (i.e., we're not blocking waiting for any new batches to arrive) 172 | // to return at most n records. 173 | for len(recs) < n { 174 | batch, err := i.nextRecordsBatch(ctx) 175 | if err != nil { 176 | return nil, fmt.Errorf("failed to fetch next batch of records: %w", err) 177 | } 178 | if batch == nil { 179 | break 180 | } 181 | recs = i.appendRecordsWithLimit(recs, batch, n) 182 | } 183 | 184 | sdk.Logger(ctx).Trace(). 185 | Int("records", len(recs)). 186 | Int("records_for_next_read", len(i.recordsForNextRead)). 187 | Msg("CDCIterator.NextN returning records") 188 | return recs, nil 189 | } 190 | 191 | // nextRecordsBatchBlocking waits for the next batch of records to arrive, 192 | // or for the context to be done, or for the subscription to be done, 193 | // whichever comes first. 194 | func (i *CDCIterator) nextRecordsBatchBlocking(ctx context.Context) ([]opencdc.Record, error) { 195 | select { 196 | case <-ctx.Done(): 197 | return nil, ctx.Err() 198 | case <-i.sub.Done(): 199 | if err := i.sub.Err(); err != nil { 200 | return nil, fmt.Errorf("logical replication error: %w", err) 201 | } 202 | if err := ctx.Err(); err != nil { 203 | // subscription is done because the context is canceled, we went 204 | // into the wrong case by chance 205 | return nil, err 206 | } 207 | // subscription stopped without an error and the context is still 208 | // open, this is a strange case, shouldn't actually happen 209 | return nil, fmt.Errorf("subscription stopped, no more data to fetch (this smells like a bug)") 210 | case batch := <-i.batchesCh: 211 | sdk.Logger(ctx).Trace(). 212 | Int("records", len(batch)). 213 | Msg("CDCIterator.NextN received batch of records (blocking)") 214 | return batch, nil 215 | } 216 | } 217 | 218 | func (i *CDCIterator) nextRecordsBatch(ctx context.Context) ([]opencdc.Record, error) { 219 | select { 220 | case <-ctx.Done(): 221 | // Return what we have with the error 222 | return nil, ctx.Err() 223 | case <-i.sub.Done(): 224 | if err := i.sub.Err(); err != nil { 225 | return nil, fmt.Errorf("logical replication error: %w", err) 226 | } 227 | if err := ctx.Err(); err != nil { 228 | // Return what we have with the context error 229 | return nil, err 230 | } 231 | // Return what we have with subscription stopped error 232 | return nil, fmt.Errorf("subscription stopped, no more data to fetch (this smells like a bug)") 233 | case batch := <-i.batchesCh: 234 | sdk.Logger(ctx).Trace(). 235 | Int("records", len(batch)). 236 | Msg("CDCIterator.NextN received batch of records") 237 | 238 | return batch, nil 239 | default: 240 | // No more records currently available 241 | return nil, nil 242 | } 243 | } 244 | 245 | // appendRecordsWithLimit appends records to dst from src, until the given limit is reached, 246 | // or all records from src have been moved. 247 | // If some records from src are not moved (probably because they lack emotions), 248 | // they are saved to recordsForNextRead. 249 | func (i *CDCIterator) appendRecordsWithLimit(dst []opencdc.Record, src []opencdc.Record, limit int) []opencdc.Record { 250 | if len(src) == 0 || len(dst) > limit { 251 | return src 252 | } 253 | 254 | needed := limit - len(dst) 255 | if needed > len(src) { 256 | needed = len(src) 257 | } 258 | 259 | dst = append(dst, src[:needed]...) 260 | i.recordsForNextRead = src[needed:] 261 | 262 | return dst 263 | } 264 | 265 | // Ack forwards the acknowledgment to the subscription. 266 | func (i *CDCIterator) Ack(_ context.Context, sdkPos opencdc.Position) error { 267 | pos, err := position.ParseSDKPosition(sdkPos) 268 | if err != nil { 269 | return err 270 | } 271 | 272 | if pos.Type != position.TypeCDC { 273 | return fmt.Errorf("invalid type %q for CDC position", pos.Type.String()) 274 | } 275 | 276 | lsn, err := pos.LSN() 277 | if err != nil { 278 | return err 279 | } 280 | 281 | if lsn == 0 { 282 | return fmt.Errorf("cannot ack zero position") 283 | } 284 | 285 | i.sub.Ack(lsn) 286 | return nil 287 | } 288 | 289 | // Teardown stops the CDC subscription and blocks until the subscription is done 290 | // or the context gets canceled. If the subscription stopped with an unexpected 291 | // error, the error is returned. 292 | func (i *CDCIterator) Teardown(ctx context.Context) error { 293 | if i.sub != nil { 294 | return i.sub.Teardown(ctx) 295 | } 296 | 297 | return nil 298 | } 299 | 300 | // subscriberReady returns true when the subscriber is running. 301 | func (i *CDCIterator) subscriberReady() bool { 302 | select { 303 | case <-i.sub.Ready(): 304 | return true 305 | default: 306 | return false 307 | } 308 | } 309 | 310 | // TXSnapshotID returns the transaction snapshot which is received 311 | // when the replication slot is created. The value can be empty, when the 312 | // iterator is resuming. 313 | func (i *CDCIterator) TXSnapshotID() string { 314 | return i.sub.TXSnapshotID 315 | } 316 | -------------------------------------------------------------------------------- /source/logrepl/cleaner.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package logrepl 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | 22 | "github.com/conduitio/conduit-connector-postgres/source/cpool" 23 | "github.com/conduitio/conduit-connector-postgres/source/logrepl/internal" 24 | sdk "github.com/conduitio/conduit-connector-sdk" 25 | ) 26 | 27 | type CleanupConfig struct { 28 | URL string 29 | SlotName string 30 | PublicationName string 31 | } 32 | 33 | // Cleanup drops the provided replication slot and publication. 34 | // It will terminate any backends consuming the replication slot before deletion. 35 | func Cleanup(ctx context.Context, c CleanupConfig) error { 36 | logger := sdk.Logger(ctx) 37 | 38 | pool, err := cpool.New(ctx, c.URL) 39 | if err != nil { 40 | return fmt.Errorf("failed to connect to database: %w", err) 41 | } 42 | defer pool.Close() 43 | 44 | var errs []error 45 | 46 | logger.Debug(). 47 | Str("slot", c.SlotName). 48 | Str("publication", c.PublicationName). 49 | Msg("removing replication slot and publication") 50 | 51 | if c.SlotName != "" { 52 | // Terminate any outstanding backends which are consuming the slot before deleting it. 53 | if _, err := pool.Exec( 54 | ctx, 55 | "SELECT pg_terminate_backend(active_pid) FROM pg_replication_slots WHERE slot_name=$1 AND active=true", c.SlotName, 56 | ); err != nil { 57 | errs = append(errs, fmt.Errorf("failed to terminate active backends on slot: %w", err)) 58 | } 59 | 60 | if _, err := pool.Exec( 61 | ctx, 62 | "SELECT pg_drop_replication_slot($1)", c.SlotName, 63 | ); err != nil { 64 | errs = append(errs, fmt.Errorf("failed to clean up replication slot %q: %w", c.SlotName, err)) 65 | } 66 | } else { 67 | logger.Warn().Msg("cleanup: skipping replication slot cleanup, name is empty") 68 | } 69 | 70 | if c.PublicationName != "" { 71 | if err := internal.DropPublication( 72 | ctx, 73 | pool, 74 | c.PublicationName, 75 | internal.DropPublicationOptions{IfExists: true}, 76 | ); err != nil { 77 | errs = append(errs, fmt.Errorf("failed to clean up publication %q: %w", c.PublicationName, err)) 78 | } 79 | } else { 80 | logger.Warn().Msg("cleanup: skipping publication cleanup, name is empty") 81 | } 82 | 83 | return errors.Join(errs...) 84 | } 85 | -------------------------------------------------------------------------------- /source/logrepl/cleaner_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package logrepl 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "strings" 21 | "testing" 22 | 23 | "github.com/conduitio/conduit-connector-postgres/test" 24 | "github.com/matryer/is" 25 | ) 26 | 27 | func Test_Cleanup(t *testing.T) { 28 | conn := test.ConnectSimple(context.Background(), t, test.RepmgrConnString) 29 | 30 | tests := []struct { 31 | desc string 32 | setup func(t *testing.T) 33 | conf CleanupConfig 34 | 35 | wantErr error 36 | }{ 37 | { 38 | desc: "drops slot and pub", 39 | conf: CleanupConfig{ 40 | URL: test.RepmgrConnString, 41 | SlotName: "conduitslot1", 42 | PublicationName: "conduitpub1", 43 | }, 44 | setup: func(t *testing.T) { 45 | table := test.SetupTestTable(context.Background(), t, conn) 46 | test.CreatePublication(t, conn, "conduitpub1", []string{table}) 47 | test.CreateReplicationSlot(t, conn, "conduitslot1") 48 | }, 49 | }, 50 | { 51 | desc: "drops pub slot unspecified", 52 | conf: CleanupConfig{ 53 | URL: test.RepmgrConnString, 54 | PublicationName: "conduitpub2", 55 | }, 56 | setup: func(t *testing.T) { 57 | table := test.SetupTestTable(context.Background(), t, conn) 58 | test.CreatePublication(t, conn, "conduitpub2", []string{table}) 59 | }, 60 | }, 61 | { 62 | desc: "drops slot pub unspecified", 63 | conf: CleanupConfig{ 64 | URL: test.RepmgrConnString, 65 | SlotName: "conduitslot3", 66 | }, 67 | setup: func(t *testing.T) { 68 | test.CreateReplicationSlot(t, conn, "conduitslot3") 69 | }, 70 | }, 71 | { 72 | desc: "drops pub slot missing", 73 | conf: CleanupConfig{ 74 | URL: test.RepmgrConnString, 75 | SlotName: "conduitslot4", 76 | PublicationName: "conduitpub4", 77 | }, 78 | setup: func(t *testing.T) { 79 | table := test.SetupTestTable(context.Background(), t, conn) 80 | test.CreatePublication(t, conn, "conduitpub4", []string{table}) 81 | }, 82 | wantErr: errors.New(`replication slot "conduitslot4" does not exist`), 83 | }, 84 | { 85 | desc: "drops slot, pub missing", // no op 86 | conf: CleanupConfig{ 87 | URL: test.RepmgrConnString, 88 | SlotName: "conduitslot5", 89 | PublicationName: "conduitpub5", 90 | }, 91 | setup: func(t *testing.T) { 92 | test.CreateReplicationSlot(t, conn, "conduitslot5") 93 | }, 94 | }, 95 | } 96 | 97 | for _, tc := range tests { 98 | t.Run(tc.desc, func(t *testing.T) { 99 | is := is.New(t) 100 | 101 | if tc.setup != nil { 102 | tc.setup(t) 103 | } 104 | 105 | err := Cleanup(context.Background(), tc.conf) 106 | 107 | if tc.wantErr != nil { 108 | is.True(strings.Contains(err.Error(), tc.wantErr.Error())) 109 | } else { 110 | is.NoErr(err) 111 | } 112 | }) 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /source/logrepl/combined.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package logrepl 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | 22 | "github.com/conduitio/conduit-commons/opencdc" 23 | "github.com/conduitio/conduit-connector-postgres/source/position" 24 | "github.com/conduitio/conduit-connector-postgres/source/snapshot" 25 | sdk "github.com/conduitio/conduit-connector-sdk" 26 | "github.com/jackc/pgx/v5/pgxpool" 27 | ) 28 | 29 | type iterator interface { 30 | NextN(context.Context, int) ([]opencdc.Record, error) 31 | Ack(context.Context, opencdc.Position) error 32 | Teardown(context.Context) error 33 | } 34 | 35 | type CombinedIterator struct { 36 | conf Config 37 | pool *pgxpool.Pool 38 | 39 | cdcIterator *CDCIterator 40 | snapshotIterator *snapshot.Iterator 41 | activeIterator iterator 42 | } 43 | 44 | type Config struct { 45 | Position opencdc.Position 46 | SlotName string 47 | PublicationName string 48 | Tables []string 49 | TableKeys map[string]string 50 | WithSnapshot bool 51 | WithAvroSchema bool 52 | BatchSize int 53 | } 54 | 55 | // Validate performs validation tasks on the config. 56 | func (c Config) Validate() error { 57 | var errs []error 58 | // make sure we have all table keys 59 | for _, tableName := range c.Tables { 60 | if c.TableKeys[tableName] == "" { 61 | errs = append(errs, fmt.Errorf("missing key for table %q", tableName)) 62 | } 63 | } 64 | 65 | return errors.Join(errs...) 66 | } 67 | 68 | // NewCombinedIterator will initialize and start the Snapshot and CDC iterators. 69 | // Failure to parse the position or validate the config will return an error. 70 | func NewCombinedIterator(ctx context.Context, pool *pgxpool.Pool, conf Config) (*CombinedIterator, error) { 71 | pos, err := position.ParseSDKPosition(conf.Position) 72 | if err != nil { 73 | sdk.Logger(ctx).Debug(). 74 | Err(err). 75 | Msgf("failed to parse position: %s", string(conf.Position)) 76 | 77 | return nil, fmt.Errorf("failed to create logrepl iterator: %w", err) 78 | } 79 | 80 | if err := conf.Validate(); err != nil { 81 | return nil, fmt.Errorf("failed to validate logrepl config: %w", err) 82 | } 83 | 84 | c := &CombinedIterator{ 85 | conf: conf, 86 | pool: pool, 87 | } 88 | 89 | // Initialize the CDC iterator. 90 | if err := c.initCDCIterator(ctx, pos); err != nil { 91 | return nil, err 92 | } 93 | 94 | // Initialize the snapshot iterator when snapshotting is enabled and not completed. 95 | // The CDC iterator must be initialized first when snapshotting is requested. 96 | if err := c.initSnapshotIterator(ctx, pos); err != nil { 97 | return nil, err 98 | } 99 | 100 | switch { 101 | case c.snapshotIterator != nil: 102 | c.activeIterator = c.snapshotIterator 103 | default: 104 | if err := c.cdcIterator.StartSubscriber(ctx); err != nil { 105 | return nil, fmt.Errorf("failed to start CDC iterator: %w", err) 106 | } 107 | 108 | c.activeIterator = c.cdcIterator 109 | } 110 | 111 | return c, nil 112 | } 113 | 114 | // NextN retrieves up to n records from the active iterator. 115 | // If the end of the snapshot is reached during this call, it will switch to the CDC iterator 116 | // and continue retrieving records from there. 117 | func (c *CombinedIterator) NextN(ctx context.Context, n int) ([]opencdc.Record, error) { 118 | if n <= 0 { 119 | return nil, fmt.Errorf("n must be greater than 0, got %d", n) 120 | } 121 | 122 | records, err := c.activeIterator.NextN(ctx, n) 123 | if err != nil { 124 | if !errors.Is(err, snapshot.ErrIteratorDone) { 125 | return nil, fmt.Errorf("failed to fetch records in batch: %w", err) 126 | } 127 | 128 | // Snapshot iterator is done, handover to CDC iterator 129 | if err := c.useCDCIterator(ctx); err != nil { 130 | return nil, err 131 | } 132 | 133 | sdk.Logger(ctx).Debug().Msg("Snapshot completed, switching to CDC mode") 134 | return c.NextN(ctx, n) 135 | } 136 | 137 | return records, nil 138 | } 139 | 140 | func (c *CombinedIterator) Ack(ctx context.Context, p opencdc.Position) error { 141 | return c.activeIterator.Ack(ctx, p) 142 | } 143 | 144 | // Teardown will stop and teardown the CDC and Snapshot iterators. 145 | func (c *CombinedIterator) Teardown(ctx context.Context) error { 146 | logger := sdk.Logger(ctx) 147 | 148 | var errs []error 149 | 150 | if c.cdcIterator != nil { 151 | if err := c.cdcIterator.Teardown(ctx); err != nil { 152 | logger.Warn().Err(err).Msg("Failed to tear down cdc iterator") 153 | errs = append(errs, fmt.Errorf("failed to teardown cdc iterator: %w", err)) 154 | } 155 | } 156 | 157 | if c.snapshotIterator != nil { 158 | if err := c.snapshotIterator.Teardown(ctx); err != nil { 159 | logger.Warn().Err(err).Msg("Failed to tear down snapshot iterator") 160 | errs = append(errs, fmt.Errorf("failed to teardown snapshot iterator: %w", err)) 161 | } 162 | } 163 | 164 | return errors.Join(errs...) 165 | } 166 | 167 | // initCDCIterator initializes the CDC iterator, which will create the replication slot. 168 | // When snapshotting is disabled or the last known position is of CDC type, the iterator 169 | // will start to consume CDC events from the created slot. 170 | // Returns error when: 171 | // * LSN position cannot be parsed, 172 | // * The CDC iterator fails to initalize or fail to start. 173 | func (c *CombinedIterator) initCDCIterator(ctx context.Context, pos position.Position) error { 174 | lsn, err := pos.LSN() 175 | if err != nil { 176 | return fmt.Errorf("failed to parse LSN in position: %w", err) 177 | } 178 | 179 | cdcIterator, err := NewCDCIterator(ctx, c.pool, CDCConfig{ 180 | LSN: lsn, 181 | SlotName: c.conf.SlotName, 182 | PublicationName: c.conf.PublicationName, 183 | Tables: c.conf.Tables, 184 | TableKeys: c.conf.TableKeys, 185 | WithAvroSchema: c.conf.WithAvroSchema, 186 | BatchSize: c.conf.BatchSize, 187 | }) 188 | if err != nil { 189 | return fmt.Errorf("failed to create CDC iterator: %w", err) 190 | } 191 | 192 | c.cdcIterator = cdcIterator 193 | 194 | return nil 195 | } 196 | 197 | // initSnapshotIterator initializes the Snapshot iterator. The CDC iterator must be initalized. 198 | func (c *CombinedIterator) initSnapshotIterator(ctx context.Context, pos position.Position) error { 199 | if !c.conf.WithSnapshot || pos.Type == position.TypeCDC { 200 | return nil 201 | } 202 | 203 | if c.cdcIterator == nil { 204 | return fmt.Errorf("CDC iterator needs to be initialized before snapshot") 205 | } 206 | 207 | snapshotIterator, err := snapshot.NewIterator(ctx, c.pool, snapshot.Config{ 208 | Position: c.conf.Position, 209 | Tables: c.conf.Tables, 210 | TableKeys: c.conf.TableKeys, 211 | TXSnapshotID: c.cdcIterator.TXSnapshotID(), 212 | FetchSize: c.conf.BatchSize, 213 | WithAvroSchema: c.conf.WithAvroSchema, 214 | }) 215 | if err != nil { 216 | return fmt.Errorf("failed to create snapshot iterator: %w", err) 217 | } 218 | 219 | sdk.Logger(ctx).Info().Msg("Initial snapshot requested, starting..") 220 | 221 | c.snapshotIterator = snapshotIterator 222 | 223 | return nil 224 | } 225 | 226 | // useCDCIterator will activate and start the CDC iterator. The snapshot iterator 227 | // will be torn down if initialized. 228 | func (c *CombinedIterator) useCDCIterator(ctx context.Context) error { 229 | if c.snapshotIterator != nil { 230 | if err := c.snapshotIterator.Teardown(ctx); err != nil { 231 | return fmt.Errorf("failed to teardown snapshot iterator during switch: %w", err) 232 | } 233 | } 234 | 235 | c.activeIterator, c.snapshotIterator = c.cdcIterator, nil 236 | 237 | if err := c.cdcIterator.StartSubscriber(ctx); err != nil { 238 | return fmt.Errorf("failed to start CDC iterator: %w", err) 239 | } 240 | 241 | return nil 242 | } 243 | -------------------------------------------------------------------------------- /source/logrepl/handler.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package logrepl 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | "sync" 22 | "time" 23 | 24 | "github.com/conduitio/conduit-commons/opencdc" 25 | cschema "github.com/conduitio/conduit-commons/schema" 26 | "github.com/conduitio/conduit-connector-postgres/source/logrepl/internal" 27 | "github.com/conduitio/conduit-connector-postgres/source/position" 28 | "github.com/conduitio/conduit-connector-postgres/source/schema" 29 | sdk "github.com/conduitio/conduit-connector-sdk" 30 | sdkschema "github.com/conduitio/conduit-connector-sdk/schema" 31 | "github.com/jackc/pglogrepl" 32 | ) 33 | 34 | // CDCHandler is responsible for handling logical replication messages, 35 | // converting them to a record and sending them to a channel. 36 | type CDCHandler struct { 37 | tableKeys map[string]string 38 | relationSet *internal.RelationSet 39 | 40 | // batchSize is the largest number of records this handler will send at once. 41 | batchSize int 42 | flushInterval time.Duration 43 | 44 | // recordBatch holds the batch that is currently being built. 45 | recordBatch []opencdc.Record 46 | recordBatchLock sync.Mutex 47 | 48 | // out is a sending channel with batches of records. 49 | out chan<- []opencdc.Record 50 | lastTXLSN pglogrepl.LSN 51 | withAvroSchema bool 52 | keySchemas map[string]cschema.Schema 53 | payloadSchemas map[string]cschema.Schema 54 | } 55 | 56 | func NewCDCHandler( 57 | ctx context.Context, 58 | rs *internal.RelationSet, 59 | tableKeys map[string]string, 60 | out chan<- []opencdc.Record, 61 | withAvroSchema bool, 62 | batchSize int, 63 | flushInterval time.Duration, 64 | ) *CDCHandler { 65 | h := &CDCHandler{ 66 | tableKeys: tableKeys, 67 | relationSet: rs, 68 | recordBatch: make([]opencdc.Record, 0, batchSize), 69 | out: out, 70 | withAvroSchema: withAvroSchema, 71 | keySchemas: make(map[string]cschema.Schema), 72 | payloadSchemas: make(map[string]cschema.Schema), 73 | batchSize: batchSize, 74 | flushInterval: flushInterval, 75 | } 76 | 77 | go h.scheduleFlushing(ctx) 78 | 79 | return h 80 | } 81 | 82 | func (h *CDCHandler) scheduleFlushing(ctx context.Context) { 83 | ticker := time.NewTicker(h.flushInterval) 84 | defer ticker.Stop() 85 | 86 | for range time.Tick(h.flushInterval) { 87 | h.flush(ctx) 88 | } 89 | } 90 | 91 | func (h *CDCHandler) flush(ctx context.Context) { 92 | h.recordBatchLock.Lock() 93 | defer h.recordBatchLock.Unlock() 94 | 95 | if len(h.recordBatch) == 0 { 96 | return 97 | } 98 | 99 | if errors.Is(ctx.Err(), context.Canceled) { 100 | close(h.out) 101 | sdk.Logger(ctx).Warn(). 102 | Err(ctx.Err()). 103 | Int("records", len(h.recordBatch)). 104 | Msg("CDCHandler flushing records cancelled") 105 | return 106 | } 107 | 108 | h.out <- h.recordBatch 109 | sdk.Logger(ctx).Debug(). 110 | Int("records", len(h.recordBatch)). 111 | Msg("CDCHandler sending batch of records") 112 | h.recordBatch = make([]opencdc.Record, 0, h.batchSize) 113 | } 114 | 115 | // Handle is the handler function that receives all logical replication messages. 116 | // Returns non-zero LSN when a record was emitted for the message. 117 | func (h *CDCHandler) Handle(ctx context.Context, m pglogrepl.Message, lsn pglogrepl.LSN) (pglogrepl.LSN, error) { 118 | sdk.Logger(ctx).Trace(). 119 | Str("lsn", lsn.String()). 120 | Str("messageType", m.Type().String()). 121 | Msg("handler received pglogrepl.Message") 122 | 123 | switch m := m.(type) { 124 | case *pglogrepl.RelationMessage: 125 | // We have to add the Relations to our Set so that we can decode our own output 126 | h.relationSet.Add(m) 127 | case *pglogrepl.InsertMessage: 128 | if err := h.handleInsert(ctx, m, lsn); err != nil { 129 | return 0, fmt.Errorf("logrepl handler insert: %w", err) 130 | } 131 | return lsn, nil 132 | case *pglogrepl.UpdateMessage: 133 | if err := h.handleUpdate(ctx, m, lsn); err != nil { 134 | return 0, fmt.Errorf("logrepl handler update: %w", err) 135 | } 136 | return lsn, nil 137 | case *pglogrepl.DeleteMessage: 138 | if err := h.handleDelete(ctx, m, lsn); err != nil { 139 | return 0, fmt.Errorf("logrepl handler delete: %w", err) 140 | } 141 | return lsn, nil 142 | case *pglogrepl.BeginMessage: 143 | h.lastTXLSN = m.FinalLSN 144 | case *pglogrepl.CommitMessage: 145 | if h.lastTXLSN != 0 && h.lastTXLSN != m.CommitLSN { 146 | return 0, fmt.Errorf("out of order commit %s, expected %s", m.CommitLSN, h.lastTXLSN) 147 | } 148 | } 149 | 150 | return 0, nil 151 | } 152 | 153 | // handleInsert formats a Record with INSERT event data from Postgres and sends 154 | // it to the output channel. 155 | func (h *CDCHandler) handleInsert( 156 | ctx context.Context, 157 | msg *pglogrepl.InsertMessage, 158 | lsn pglogrepl.LSN, 159 | ) error { 160 | rel, err := h.relationSet.Get(msg.RelationID) 161 | if err != nil { 162 | return fmt.Errorf("failed getting relation %v: %w", msg.RelationID, err) 163 | } 164 | 165 | newValues, err := h.relationSet.Values(msg.RelationID, msg.Tuple) 166 | if err != nil { 167 | return fmt.Errorf("failed to decode new values: %w", err) 168 | } 169 | 170 | if err := h.updateAvroSchema(ctx, rel); err != nil { 171 | return fmt.Errorf("failed to update avro schema: %w", err) 172 | } 173 | 174 | rec := sdk.Util.Source.NewRecordCreate( 175 | h.buildPosition(lsn), 176 | h.buildRecordMetadata(rel), 177 | h.buildRecordKey(newValues, rel.RelationName), 178 | h.buildRecordPayload(newValues), 179 | ) 180 | h.attachSchemas(rec, rel.RelationName) 181 | h.addToBatch(ctx, rec) 182 | 183 | return nil 184 | } 185 | 186 | // handleUpdate formats a record with UPDATE event data from Postgres and sends 187 | // it to the output channel. 188 | func (h *CDCHandler) handleUpdate( 189 | ctx context.Context, 190 | msg *pglogrepl.UpdateMessage, 191 | lsn pglogrepl.LSN, 192 | ) error { 193 | rel, err := h.relationSet.Get(msg.RelationID) 194 | if err != nil { 195 | return err 196 | } 197 | 198 | newValues, err := h.relationSet.Values(msg.RelationID, msg.NewTuple) 199 | if err != nil { 200 | return fmt.Errorf("failed to decode new values: %w", err) 201 | } 202 | 203 | if err := h.updateAvroSchema(ctx, rel); err != nil { 204 | return fmt.Errorf("failed to update avro schema: %w", err) 205 | } 206 | 207 | oldValues, err := h.relationSet.Values(msg.RelationID, msg.OldTuple) 208 | if err != nil { 209 | // this is not a critical error, old values are optional, just log it 210 | // we use level "trace" intentionally to not clog up the logs in production 211 | sdk.Logger(ctx).Trace().Err(err).Msg("could not parse old values from UpdateMessage") 212 | } 213 | 214 | rec := sdk.Util.Source.NewRecordUpdate( 215 | h.buildPosition(lsn), 216 | h.buildRecordMetadata(rel), 217 | h.buildRecordKey(newValues, rel.RelationName), 218 | h.buildRecordPayload(oldValues), 219 | h.buildRecordPayload(newValues), 220 | ) 221 | h.attachSchemas(rec, rel.RelationName) 222 | h.addToBatch(ctx, rec) 223 | 224 | return nil 225 | } 226 | 227 | // handleDelete formats a record with DELETE event data from Postgres and sends 228 | // it to the output channel. Deleted records only contain the key and no payload. 229 | func (h *CDCHandler) handleDelete( 230 | ctx context.Context, 231 | msg *pglogrepl.DeleteMessage, 232 | lsn pglogrepl.LSN, 233 | ) error { 234 | rel, err := h.relationSet.Get(msg.RelationID) 235 | if err != nil { 236 | return err 237 | } 238 | 239 | oldValues, err := h.relationSet.Values(msg.RelationID, msg.OldTuple) 240 | if err != nil { 241 | return fmt.Errorf("failed to decode old values: %w", err) 242 | } 243 | 244 | if err := h.updateAvroSchema(ctx, rel); err != nil { 245 | return fmt.Errorf("failed to update avro schema: %w", err) 246 | } 247 | 248 | rec := sdk.Util.Source.NewRecordDelete( 249 | h.buildPosition(lsn), 250 | h.buildRecordMetadata(rel), 251 | h.buildRecordKey(oldValues, rel.RelationName), 252 | h.buildRecordPayload(oldValues), 253 | ) 254 | h.attachSchemas(rec, rel.RelationName) 255 | h.addToBatch(ctx, rec) 256 | 257 | return nil 258 | } 259 | 260 | // addToBatch the record to the output channel or detect the cancellation of the 261 | // context and return the context error. 262 | func (h *CDCHandler) addToBatch(ctx context.Context, rec opencdc.Record) { 263 | h.recordBatchLock.Lock() 264 | 265 | h.recordBatch = append(h.recordBatch, rec) 266 | currentBatchSize := len(h.recordBatch) 267 | 268 | sdk.Logger(ctx).Trace(). 269 | Int("current_batch_size", currentBatchSize). 270 | Msg("CDCHandler added record to batch") 271 | 272 | h.recordBatchLock.Unlock() 273 | 274 | if currentBatchSize >= h.batchSize { 275 | h.flush(ctx) 276 | } 277 | } 278 | 279 | func (h *CDCHandler) buildRecordMetadata(rel *pglogrepl.RelationMessage) map[string]string { 280 | m := map[string]string{ 281 | opencdc.MetadataCollection: rel.RelationName, 282 | } 283 | 284 | return m 285 | } 286 | 287 | // buildRecordKey takes the values from the message and extracts the key that 288 | // matches the configured keyColumnName. 289 | func (h *CDCHandler) buildRecordKey(values map[string]any, table string) opencdc.Data { 290 | keyColumn := h.tableKeys[table] 291 | key := make(opencdc.StructuredData) 292 | for k, v := range values { 293 | if keyColumn == k { 294 | key[k] = v 295 | break // TODO add support for composite keys 296 | } 297 | } 298 | return key 299 | } 300 | 301 | // buildRecordPayload takes the values from the message and extracts the payload 302 | // for the record. 303 | func (h *CDCHandler) buildRecordPayload(values map[string]any) opencdc.Data { 304 | if len(values) == 0 { 305 | return nil 306 | } 307 | return opencdc.StructuredData(values) 308 | } 309 | 310 | // buildPosition stores the LSN in position and converts it to bytes. 311 | func (*CDCHandler) buildPosition(lsn pglogrepl.LSN) opencdc.Position { 312 | return position.Position{ 313 | Type: position.TypeCDC, 314 | LastLSN: lsn.String(), 315 | }.ToSDKPosition() 316 | } 317 | 318 | // updateAvroSchema generates and stores avro schema based on the relation's row 319 | // when usage of avro schema is requested. 320 | func (h *CDCHandler) updateAvroSchema(ctx context.Context, rel *pglogrepl.RelationMessage) error { 321 | if !h.withAvroSchema { 322 | return nil 323 | } 324 | // Payload schema 325 | avroPayloadSch, err := schema.Avro.ExtractLogrepl(rel.RelationName+"_payload", rel) 326 | if err != nil { 327 | return fmt.Errorf("failed to extract payload schema: %w", err) 328 | } 329 | ps, err := sdkschema.Create( 330 | ctx, 331 | cschema.TypeAvro, 332 | avroPayloadSch.Name(), 333 | []byte(avroPayloadSch.String()), 334 | ) 335 | if err != nil { 336 | return fmt.Errorf("failed creating payload schema for relation %v: %w", rel.RelationName, err) 337 | } 338 | h.payloadSchemas[rel.RelationName] = ps 339 | 340 | // Key schema 341 | avroKeySch, err := schema.Avro.ExtractLogrepl(rel.RelationName+"_key", rel, h.tableKeys[rel.RelationName]) 342 | if err != nil { 343 | return fmt.Errorf("failed to extract key schema: %w", err) 344 | } 345 | ks, err := sdkschema.Create( 346 | ctx, 347 | cschema.TypeAvro, 348 | avroKeySch.Name(), 349 | []byte(avroKeySch.String()), 350 | ) 351 | if err != nil { 352 | return fmt.Errorf("failed creating key schema for relation %v: %w", rel.RelationName, err) 353 | } 354 | h.keySchemas[rel.RelationName] = ks 355 | 356 | return nil 357 | } 358 | 359 | func (h *CDCHandler) attachSchemas(rec opencdc.Record, relationName string) { 360 | if !h.withAvroSchema { 361 | return 362 | } 363 | cschema.AttachPayloadSchemaToRecord(rec, h.payloadSchemas[relationName]) 364 | cschema.AttachKeySchemaToRecord(rec, h.keySchemas[relationName]) 365 | } 366 | -------------------------------------------------------------------------------- /source/logrepl/handler_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2025 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package logrepl 16 | 17 | import ( 18 | "context" 19 | "testing" 20 | "time" 21 | 22 | "github.com/conduitio/conduit-commons/cchan" 23 | "github.com/conduitio/conduit-commons/opencdc" 24 | "github.com/matryer/is" 25 | ) 26 | 27 | func TestHandler_Batching_BatchSizeReached(t *testing.T) { 28 | ctx := context.Background() 29 | is := is.New(t) 30 | 31 | ch := make(chan []opencdc.Record, 1) 32 | underTest := NewCDCHandler(ctx, nil, nil, ch, false, 5, time.Second) 33 | want := make([]opencdc.Record, 5) 34 | for i := 0; i < cap(want); i++ { 35 | rec := newTestRecord(i) 36 | underTest.addToBatch(ctx, rec) 37 | want[i] = rec 38 | } 39 | 40 | recs, gotRecs, err := cchan.ChanOut[[]opencdc.Record](ch).RecvTimeout(ctx, time.Second) 41 | is.NoErr(err) 42 | is.True(gotRecs) 43 | is.Equal(recs, want) 44 | } 45 | 46 | // TestHandler_Batching_FlushInterval tests if the handler flushes 47 | // a batch once the flush interval passes, even if the batch size is not reached. 48 | func TestHandler_Batching_FlushInterval(t *testing.T) { 49 | ctx := context.Background() 50 | is := is.New(t) 51 | 52 | ch := make(chan []opencdc.Record, 1) 53 | flushInterval := time.Second 54 | underTest := NewCDCHandler(ctx, nil, nil, ch, false, 5, flushInterval) 55 | 56 | want := make([]opencdc.Record, 3) 57 | for i := 0; i < cap(want); i++ { 58 | rec := newTestRecord(i) 59 | underTest.addToBatch(ctx, rec) 60 | want[i] = rec 61 | } 62 | 63 | start := time.Now() 64 | recs, gotRecs, err := cchan.ChanOut[[]opencdc.Record](ch).RecvTimeout(ctx, 1200*time.Millisecond) 65 | 66 | is.NoErr(err) 67 | is.True(gotRecs) 68 | is.Equal(recs, want) 69 | is.True(time.Since(start) >= flushInterval) 70 | } 71 | 72 | func TestHandler_Batching_ContextCancelled(t *testing.T) { 73 | ctx, cancel := context.WithCancel(context.Background()) 74 | is := is.New(t) 75 | 76 | ch := make(chan []opencdc.Record, 1) 77 | underTest := NewCDCHandler(ctx, nil, nil, ch, false, 5, time.Second) 78 | cancel() 79 | <-ctx.Done() 80 | underTest.addToBatch(ctx, newTestRecord(0)) 81 | 82 | _, recordReceived := <-ch 83 | is.True(!recordReceived) 84 | } 85 | 86 | func newTestRecord(id int) opencdc.Record { 87 | return opencdc.Record{ 88 | Key: opencdc.StructuredData{ 89 | "id": id, 90 | }, 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /source/logrepl/internal/error.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "errors" 19 | 20 | "github.com/jackc/pgerrcode" 21 | "github.com/jackc/pgx/v5/pgconn" 22 | ) 23 | 24 | func IsPgDuplicateErr(err error) bool { 25 | var pgerr *pgconn.PgError 26 | return errors.As(err, &pgerr) && pgerr.Code == pgerrcode.DuplicateObject 27 | } 28 | -------------------------------------------------------------------------------- /source/logrepl/internal/publication.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "strings" 21 | 22 | "github.com/conduitio/conduit-connector-postgres/internal" 23 | "github.com/jackc/pgx/v5/pgxpool" 24 | ) 25 | 26 | // CreatePublicationOptions contains additional options for creating a publication. 27 | // If AllTables and Tables are both true and not empty at the same time, 28 | // publication creation will fail. 29 | type CreatePublicationOptions struct { 30 | Tables []string 31 | PublicationParams []string 32 | } 33 | 34 | // CreatePublication creates a publication. 35 | func CreatePublication(ctx context.Context, conn *pgxpool.Pool, name string, opts CreatePublicationOptions) error { 36 | if len(opts.Tables) == 0 { 37 | return fmt.Errorf("publication %q requires at least one table", name) 38 | } 39 | 40 | wrappedTablesNames := make([]string, 0, len(opts.Tables)) 41 | for _, t := range opts.Tables { 42 | wrappedTablesNames = append(wrappedTablesNames, internal.WrapSQLIdent(t)) 43 | } 44 | 45 | forTableString := fmt.Sprintf("FOR TABLE %s", strings.Join(wrappedTablesNames, ", ")) 46 | 47 | var publicationParams string 48 | if len(opts.PublicationParams) > 0 { 49 | publicationParams = fmt.Sprintf("WITH (%s)", strings.Join(opts.PublicationParams, ", ")) 50 | } 51 | 52 | if _, err := conn.Exec( 53 | ctx, 54 | fmt.Sprintf("CREATE PUBLICATION %q %s %s", name, forTableString, publicationParams), 55 | ); err != nil { 56 | return fmt.Errorf("failed to create publication %q: %w", name, err) 57 | } 58 | 59 | return nil 60 | } 61 | 62 | // DropPublicationOptions contains additional options for dropping a publication. 63 | type DropPublicationOptions struct { 64 | IfExists bool 65 | } 66 | 67 | // DropPublication drops a publication. 68 | func DropPublication(ctx context.Context, conn *pgxpool.Pool, name string, opts DropPublicationOptions) error { 69 | var ifExistsString string 70 | if opts.IfExists { 71 | ifExistsString = "IF EXISTS" 72 | } 73 | 74 | if _, err := conn.Exec( 75 | ctx, 76 | fmt.Sprintf("DROP PUBLICATION %s %q", ifExistsString, name), 77 | ); err != nil { 78 | return fmt.Errorf("failed to drop publication %q: %w", name, err) 79 | } 80 | 81 | return nil 82 | } 83 | -------------------------------------------------------------------------------- /source/logrepl/internal/publication_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "fmt" 19 | "strings" 20 | "testing" 21 | 22 | "github.com/conduitio/conduit-connector-postgres/test" 23 | "github.com/matryer/is" 24 | ) 25 | 26 | func TestCreatePublication(t *testing.T) { 27 | ctx := test.Context(t) 28 | pool := test.ConnectPool(ctx, t, test.RegularConnString) 29 | 30 | pubNames := []string{"testpub", "123", "test-hyphen", "test:semicolon", "test.dot", "test=equal"} 31 | pubParams := [][]string{ 32 | nil, 33 | {"publish = 'insert'"}, 34 | {"publish = 'insert,update,delete'"}, 35 | } 36 | 37 | tables := []string{ 38 | test.SetupTestTable(ctx, t, pool), 39 | test.SetupTestTable(ctx, t, pool), 40 | } 41 | 42 | for _, givenPubName := range pubNames { 43 | for i, givenPubParams := range pubParams { 44 | testName := fmt.Sprintf("%s_%d", givenPubName, i) 45 | t.Run(testName, func(t *testing.T) { 46 | is := is.New(t) 47 | err := CreatePublication( 48 | ctx, 49 | pool, 50 | givenPubName, 51 | CreatePublicationOptions{ 52 | Tables: tables, 53 | PublicationParams: givenPubParams, 54 | }, 55 | ) 56 | is.NoErr(err) 57 | // cleanup 58 | is.NoErr(DropPublication(ctx, pool, givenPubName, DropPublicationOptions{})) 59 | }) 60 | } 61 | } 62 | 63 | // Without tables 64 | t.Run("fails without tables", func(t *testing.T) { 65 | is := is.New(t) 66 | 67 | err := CreatePublication(ctx, nil, "testpub", CreatePublicationOptions{}) 68 | is.Equal(err.Error(), `publication "testpub" requires at least one table`) 69 | }) 70 | } 71 | 72 | func TestCreatePublicationForTables(t *testing.T) { 73 | ctx := test.Context(t) 74 | pub := test.RandomIdentifier(t) 75 | pool := test.ConnectPool(ctx, t, test.RegularConnString) 76 | 77 | tables := [][]string{ 78 | {test.SetupTestTable(ctx, t, pool)}, 79 | {test.SetupTestTable(ctx, t, pool), test.SetupTestTable(ctx, t, pool)}, 80 | } 81 | 82 | for _, givenTables := range tables { 83 | testName := strings.Join(givenTables, ",") 84 | t.Run(testName, func(t *testing.T) { 85 | is := is.New(t) 86 | err := CreatePublication( 87 | ctx, 88 | pool, 89 | pub, 90 | CreatePublicationOptions{ 91 | Tables: givenTables, 92 | }, 93 | ) 94 | is.NoErr(err) 95 | // cleanup 96 | is.NoErr(DropPublication(ctx, pool, pub, DropPublicationOptions{})) 97 | }) 98 | } 99 | } 100 | 101 | func TestDropPublication(t *testing.T) { 102 | ctx := test.Context(t) 103 | is := is.New(t) 104 | pub := test.RandomIdentifier(t) 105 | 106 | pool := test.ConnectPool(ctx, t, test.RegularConnString) 107 | err := DropPublication( 108 | ctx, 109 | pool, 110 | pub, 111 | DropPublicationOptions{ 112 | IfExists: false, // fail if pub doesn't exist 113 | }, 114 | ) 115 | test.IsPgError(is, err, "42704") 116 | 117 | // next connect with repmgr 118 | err = DropPublication( 119 | ctx, 120 | pool, 121 | pub, 122 | DropPublicationOptions{ 123 | IfExists: true, // fail if pub doesn't exist 124 | }, 125 | ) 126 | is.NoErr(err) 127 | } 128 | -------------------------------------------------------------------------------- /source/logrepl/internal/relationset.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "errors" 19 | "fmt" 20 | 21 | "github.com/conduitio/conduit-connector-postgres/source/types" 22 | "github.com/jackc/pglogrepl" 23 | "github.com/jackc/pgx/v5/pgtype" 24 | ) 25 | 26 | // RelationSet can be used to build a cache of relations returned by logical 27 | // replication. 28 | type RelationSet struct { 29 | relations map[uint32]*pglogrepl.RelationMessage 30 | connInfo *pgtype.Map 31 | } 32 | 33 | // NewRelationSet creates a new relation set. 34 | func NewRelationSet() *RelationSet { 35 | return &RelationSet{ 36 | relations: map[uint32]*pglogrepl.RelationMessage{}, 37 | connInfo: pgtype.NewMap(), 38 | } 39 | } 40 | 41 | func (rs *RelationSet) Add(r *pglogrepl.RelationMessage) { 42 | rs.relations[r.RelationID] = r 43 | } 44 | 45 | func (rs *RelationSet) Get(id uint32) (*pglogrepl.RelationMessage, error) { 46 | msg, ok := rs.relations[id] 47 | if !ok { 48 | return nil, fmt.Errorf("no relation for %d", id) 49 | } 50 | return msg, nil 51 | } 52 | 53 | func (rs *RelationSet) Values(id uint32, row *pglogrepl.TupleData) (map[string]any, error) { 54 | if row == nil { 55 | return nil, errors.New("no tuple data") 56 | } 57 | 58 | rel, err := rs.Get(id) 59 | if err != nil { 60 | return nil, fmt.Errorf("no relation for %d", id) 61 | } 62 | 63 | values := map[string]any{} 64 | 65 | // assert same number of row and rel columns 66 | for i, tuple := range row.Columns { 67 | col := rel.Columns[i] 68 | v, decodeErr := rs.decodeValue(col, tuple.Data) 69 | if decodeErr != nil { 70 | return nil, fmt.Errorf("failed to decode value for column %q: %w", col.Name, err) 71 | } 72 | 73 | values[col.Name] = v 74 | } 75 | 76 | return values, nil 77 | } 78 | 79 | func (rs *RelationSet) oidToCodec(id uint32) pgtype.Codec { 80 | dt, ok := rs.connInfo.TypeForOID(id) 81 | if !ok { 82 | return rs.oidToCodec(pgtype.UnknownOID) 83 | } 84 | return dt.Codec 85 | } 86 | 87 | func (rs *RelationSet) decodeValue(col *pglogrepl.RelationMessageColumn, data []byte) (any, error) { 88 | decoder := rs.oidToCodec(col.DataType) 89 | // This workaround is due to an issue in pgx v5.7.1. 90 | // Namely, that version introduces an XML codec 91 | // (see: https://github.com/jackc/pgx/pull/2083/files#diff-8288d41e69f73d01a874b40de086684e5894da83a627e845e484b06d5e053a44). 92 | // The XML codec, however, always return nil when deserializing input bytes 93 | // (see: https://github.com/jackc/pgx/pull/2083#discussion_r1755768269). 94 | var val any 95 | var err error 96 | 97 | switch col.DataType { 98 | case pgtype.XMLOID, pgtype.XMLArrayOID, pgtype.JSONBOID, pgtype.JSONOID: 99 | val, err = decoder.DecodeDatabaseSQLValue(rs.connInfo, col.DataType, pgtype.TextFormatCode, data) 100 | default: 101 | val, err = decoder.DecodeValue(rs.connInfo, col.DataType, pgtype.TextFormatCode, data) 102 | } 103 | 104 | if err != nil { 105 | return nil, fmt.Errorf("failed to decode value of pgtype %v: %w", col.DataType, err) 106 | } 107 | 108 | v, err := types.Format(col.DataType, val) 109 | if err != nil { 110 | return nil, fmt.Errorf("failed to format column %q type %T: %w", col.Name, val, err) 111 | } 112 | 113 | return v, nil 114 | } 115 | -------------------------------------------------------------------------------- /source/logrepl/internal/replication_slot.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | 22 | "github.com/jackc/pglogrepl" 23 | "github.com/jackc/pgx/v5" 24 | "github.com/jackc/pgx/v5/pgxpool" 25 | ) 26 | 27 | var ErrMissingSlot = errors.New("replication slot missing") 28 | 29 | type ReadReplicationSlotResult struct { 30 | Name string 31 | ConfirmedFlushLSN pglogrepl.LSN 32 | RestartLSN pglogrepl.LSN 33 | } 34 | 35 | // ReadReplicationSlot returns state of an existing replication slot. 36 | func ReadReplicationSlot(ctx context.Context, conn *pgxpool.Pool, name string) (ReadReplicationSlotResult, error) { 37 | var r ReadReplicationSlotResult 38 | 39 | qr := conn.QueryRow(ctx, "SELECT slot_name, confirmed_flush_lsn, restart_lsn FROM pg_replication_slots WHERE slot_name=$1", name) 40 | if err := qr.Scan(&r.Name, &r.ConfirmedFlushLSN, &r.RestartLSN); err != nil { 41 | if errors.Is(err, pgx.ErrNoRows) { 42 | return ReadReplicationSlotResult{}, fmt.Errorf("%s: %w", name, ErrMissingSlot) 43 | } 44 | return ReadReplicationSlotResult{}, fmt.Errorf("failed to read replication slot %q: %w", name, err) 45 | } 46 | 47 | return r, nil 48 | } 49 | -------------------------------------------------------------------------------- /source/logrepl/internal/replication_slot_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "errors" 19 | "fmt" 20 | "testing" 21 | 22 | "github.com/conduitio/conduit-connector-postgres/test" 23 | "github.com/matryer/is" 24 | ) 25 | 26 | func Test_ReadReplicationSlot(t *testing.T) { 27 | var ( 28 | ctx = test.Context(t) 29 | pool = test.ConnectPool(ctx, t, test.RepmgrConnString) 30 | slotName = test.RandomIdentifier(t) 31 | ) 32 | 33 | t.Run("read replication slot", func(t *testing.T) { 34 | is := is.New(t) 35 | 36 | test.CreateReplicationSlot(t, pool, slotName) 37 | res, err := ReadReplicationSlot(ctx, pool, slotName) 38 | is.NoErr(err) 39 | is.Equal(res.Name, slotName) 40 | is.True(res.ConfirmedFlushLSN > 0) 41 | is.True(res.RestartLSN > 0) 42 | }) 43 | 44 | t.Run("fails when slot is missing", func(t *testing.T) { 45 | is := is.New(t) 46 | 47 | _, err := ReadReplicationSlot(ctx, pool, slotName) 48 | is.True(err != nil) 49 | is.True(errors.Is(err, ErrMissingSlot)) 50 | }) 51 | 52 | t.Run("fails when conn errors", func(t *testing.T) { 53 | is := is.New(t) 54 | pool := test.ConnectPool(ctx, t, test.RepmgrConnString) 55 | pool.Close() 56 | 57 | _, err := ReadReplicationSlot(ctx, pool, slotName) 58 | is.True(err != nil) 59 | is.Equal(err.Error(), fmt.Sprintf("failed to read replication slot %q: closed pool", slotName)) 60 | }) 61 | } 62 | -------------------------------------------------------------------------------- /source/logrepl/internal/subscription_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package internal 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | "sync/atomic" 22 | "testing" 23 | "time" 24 | 25 | "github.com/conduitio/conduit-connector-postgres/test" 26 | "github.com/jackc/pglogrepl" 27 | "github.com/jackc/pgx/v5/pgxpool" 28 | "github.com/matryer/is" 29 | ) 30 | 31 | func TestSubscription_Create(t *testing.T) { 32 | ctx := test.Context(t) 33 | is := is.New(t) 34 | pool := test.ConnectPool(ctx, t, test.RepmgrConnString) 35 | pool.Close() 36 | 37 | _, err := CreateSubscription(ctx, pool, "slotname", "pubname", nil, 0, nil) 38 | is.Equal(err.Error(), "could not establish replication connection: closed pool") 39 | } 40 | 41 | func TestSubscription_WithRepmgr(t *testing.T) { 42 | var ( 43 | ctx = test.Context(t) 44 | pool = test.ConnectPool(ctx, t, test.RepmgrConnString) 45 | table1 = test.SetupTestTable(ctx, t, pool) 46 | table2 = test.SetupTestTable(ctx, t, pool) 47 | ) 48 | 49 | sub, messages := setupSubscription(ctx, t, pool, table1, table2) 50 | 51 | fetchAndAssertMessageTypes := func(is *is.I, m chan pglogrepl.Message, msgTypes ...pglogrepl.MessageType) []pglogrepl.Message { 52 | out := make([]pglogrepl.Message, len(msgTypes)) 53 | for i, msgType := range msgTypes { 54 | select { 55 | case msg := <-m: 56 | is.Equal(msg.Type(), msgType) 57 | out[i] = msg 58 | case <-time.After(time.Second): 59 | is.Fail() // timeout while waiting to receive message 60 | } 61 | } 62 | return out 63 | } 64 | 65 | t.Run("first insert table1", func(t *testing.T) { 66 | is := is.New(t) 67 | query := `INSERT INTO %s (id, column1, column2, column3) 68 | VALUES (6, 'bizz', 456, false)` 69 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table1)) 70 | is.NoErr(err) 71 | 72 | _ = fetchAndAssertMessageTypes( 73 | is, 74 | messages, 75 | // first insert should contain the relation as well 76 | pglogrepl.MessageTypeBegin, 77 | pglogrepl.MessageTypeRelation, 78 | pglogrepl.MessageTypeInsert, 79 | pglogrepl.MessageTypeCommit, 80 | ) 81 | }) 82 | 83 | t.Run("second insert table1", func(t *testing.T) { 84 | is := is.New(t) 85 | query := `INSERT INTO %s (id, column1, column2, column3) 86 | VALUES (7, 'bizz', 456, false)` 87 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table1)) 88 | is.NoErr(err) 89 | 90 | _ = fetchAndAssertMessageTypes( 91 | is, 92 | messages, 93 | // second insert does not ship the relation 94 | pglogrepl.MessageTypeBegin, 95 | pglogrepl.MessageTypeInsert, 96 | pglogrepl.MessageTypeCommit, 97 | ) 98 | }) 99 | 100 | t.Run("first update table2", func(t *testing.T) { 101 | is := is.New(t) 102 | query := `UPDATE %s SET column1 = 'foo' WHERE id = 1` 103 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table2)) 104 | is.NoErr(err) 105 | 106 | _ = fetchAndAssertMessageTypes( 107 | is, 108 | messages, 109 | // first insert should contain the relation as well 110 | pglogrepl.MessageTypeBegin, 111 | pglogrepl.MessageTypeRelation, 112 | pglogrepl.MessageTypeUpdate, 113 | pglogrepl.MessageTypeCommit, 114 | ) 115 | }) 116 | 117 | t.Run("update all table 2", func(t *testing.T) { 118 | is := is.New(t) 119 | query := `UPDATE %s SET column1 = 'bar'` // update all rows 120 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table2)) 121 | is.NoErr(err) 122 | 123 | _ = fetchAndAssertMessageTypes( 124 | is, 125 | messages, 126 | // we already got the relation so second update is without relation 127 | pglogrepl.MessageTypeBegin, 128 | pglogrepl.MessageTypeUpdate, 129 | pglogrepl.MessageTypeUpdate, 130 | pglogrepl.MessageTypeUpdate, 131 | pglogrepl.MessageTypeUpdate, 132 | pglogrepl.MessageTypeCommit, 133 | ) 134 | }) 135 | 136 | t.Run("Last WAL written is behind keepalive", func(t *testing.T) { 137 | is := is.New(t) 138 | time.Sleep(2 * time.Second) 139 | 140 | walFlushed := pglogrepl.LSN(atomic.LoadUint64((*uint64)(&sub.walFlushed))) 141 | serverWALEnd := pglogrepl.LSN(atomic.LoadUint64((*uint64)(&sub.serverWALEnd))) 142 | 143 | is.True(serverWALEnd >= sub.walWritten) 144 | is.True(sub.walWritten > walFlushed) 145 | }) 146 | 147 | t.Run("no more messages", func(t *testing.T) { 148 | isNoMoreMessages(t, messages, time.Millisecond*500) 149 | }) 150 | } 151 | 152 | func TestSubscription_ClosedContext(t *testing.T) { 153 | ctx := test.Context(t) 154 | ctx, cancel := context.WithCancel(ctx) 155 | 156 | var ( 157 | is = is.New(t) 158 | pool = test.ConnectPool(ctx, t, test.RepmgrConnString) 159 | table = test.SetupTestTable(ctx, t, pool) 160 | ) 161 | 162 | sub, messages := setupSubscription(ctx, t, pool, table) 163 | 164 | // insert to get new messages into publication 165 | query := `INSERT INTO %s (id, column1, column2, column3) 166 | VALUES (6, 'bizz', 456, false)` 167 | _, err := pool.Exec(ctx, fmt.Sprintf(query, table)) 168 | is.NoErr(err) 169 | 170 | cancel() 171 | // do not fetch messages, just close context instead 172 | select { 173 | case <-time.After(time.Second): 174 | is.Fail() // timed out while waiting for subscription to close 175 | case <-sub.Done(): 176 | // all good 177 | } 178 | 179 | is.True(errors.Is(sub.Err(), context.Canceled)) 180 | isNoMoreMessages(t, messages, time.Millisecond*500) 181 | } 182 | 183 | func TestSubscription_Ack(t *testing.T) { 184 | is := is.New(t) 185 | 186 | s := &Subscription{} 187 | s.Ack(12345) 188 | 189 | is.Equal(s.walFlushed, pglogrepl.LSN(12345)) 190 | } 191 | 192 | func TestSubscription_Stop(t *testing.T) { 193 | t.Run("with stop function", func(t *testing.T) { 194 | is := is.New(t) 195 | 196 | var stopped bool 197 | 198 | s := &Subscription{ 199 | stop: func() { 200 | stopped = true 201 | }, 202 | } 203 | 204 | s.Stop() 205 | is.True(stopped) 206 | }) 207 | 208 | t.Run("with missing stop function", func(*testing.T) { 209 | s := &Subscription{} 210 | s.Stop() 211 | }) 212 | } 213 | 214 | func setupSubscription( 215 | ctx context.Context, 216 | t *testing.T, 217 | pool *pgxpool.Pool, 218 | tables ...string, 219 | ) (*Subscription, chan pglogrepl.Message) { 220 | is := is.New(t) 221 | 222 | slotName := test.RandomIdentifier(t) 223 | publication := test.RandomIdentifier(t) 224 | 225 | test.CreatePublication(t, pool, publication, tables) 226 | 227 | messages := make(chan pglogrepl.Message) 228 | sub, err := CreateSubscription( 229 | ctx, 230 | pool, 231 | slotName, 232 | publication, 233 | tables, 234 | 0, 235 | func(ctx context.Context, msg pglogrepl.Message, lsn pglogrepl.LSN) (pglogrepl.LSN, error) { 236 | select { 237 | case <-ctx.Done(): 238 | return 0, ctx.Err() 239 | case messages <- msg: 240 | return lsn, nil 241 | } 242 | }, 243 | ) 244 | is.NoErr(err) 245 | 246 | sub.StatusTimeout = 1 * time.Second 247 | 248 | go func() { 249 | err := sub.Run(ctx) 250 | if !errors.Is(err, context.Canceled) { 251 | t.Logf("unexpected error: %+v", err) 252 | is.Fail() 253 | } 254 | }() 255 | 256 | // wait for subscription to be ready 257 | select { 258 | case <-sub.Ready(): 259 | // all good 260 | case <-time.After(5 * time.Second): 261 | t.Fatalf("timed out while waiting for subscription to be ready") 262 | } 263 | 264 | t.Cleanup(func() { 265 | // stop subscription 266 | cctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 267 | is.NoErr(sub.Teardown(cctx)) 268 | cancel() 269 | 270 | _, err := pool.Exec( 271 | context.Background(), 272 | "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name=$1", 273 | slotName, 274 | ) 275 | is.NoErr(err) 276 | }) 277 | 278 | return sub, messages 279 | } 280 | 281 | // isNoMoreMessages waits for the duration of the timeout and logs any new 282 | // messages if they are received. If a message is received that is not a "begin" 283 | // or "commit" message, the test is marked as failed. 284 | func isNoMoreMessages(t *testing.T, messages <-chan pglogrepl.Message, timeout time.Duration) { 285 | is := is.New(t) 286 | 287 | // there should be no more messages, wait shortly to make sure and log any 288 | // messages that we receive in the meantime 289 | var messagesReceived bool 290 | timeoutChan := time.After(timeout) 291 | for { 292 | select { 293 | case msg := <-messages: 294 | // empty begin/commit blocks are expected, work is being done to 295 | // reduce them (https://commitfest.postgresql.org/33/3093/) 296 | if msg.Type() == pglogrepl.MessageTypeBegin || 297 | msg.Type() == pglogrepl.MessageTypeCommit { 298 | t.Logf("got message of type %s: %+v", msg.Type(), msg) 299 | } else { 300 | t.Logf("unexpected message of type %s: %+v", msg.Type(), msg) 301 | messagesReceived = true 302 | } 303 | case <-timeoutChan: 304 | if messagesReceived { 305 | is.Fail() // expected no more messages 306 | } 307 | return 308 | } 309 | } 310 | } 311 | -------------------------------------------------------------------------------- /source/position/position.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package position 16 | 17 | import ( 18 | "encoding/json" 19 | "fmt" 20 | 21 | "github.com/conduitio/conduit-commons/opencdc" 22 | "github.com/jackc/pglogrepl" 23 | ) 24 | 25 | //go:generate stringer -type=Type -trimprefix Type 26 | 27 | type Type int 28 | 29 | const ( 30 | TypeInitial Type = iota 31 | TypeSnapshot 32 | TypeCDC 33 | ) 34 | 35 | type Position struct { 36 | Type Type `json:"type"` 37 | Snapshots SnapshotPositions `json:"snapshots,omitempty"` 38 | LastLSN string `json:"last_lsn,omitempty"` 39 | } 40 | 41 | type SnapshotPositions map[string]SnapshotPosition 42 | 43 | type SnapshotPosition struct { 44 | LastRead int64 `json:"last_read"` 45 | SnapshotEnd int64 `json:"snapshot_end"` 46 | } 47 | 48 | func ParseSDKPosition(sdkPos opencdc.Position) (Position, error) { 49 | var p Position 50 | 51 | if len(sdkPos) == 0 { 52 | return p, nil 53 | } 54 | 55 | if err := json.Unmarshal(sdkPos, &p); err != nil { 56 | return p, fmt.Errorf("invalid position: %w", err) 57 | } 58 | return p, nil 59 | } 60 | 61 | func (p Position) ToSDKPosition() opencdc.Position { 62 | v, err := json.Marshal(p) 63 | if err != nil { 64 | // This should never happen, all Position structs should be valid. 65 | panic(err) 66 | } 67 | return v 68 | } 69 | 70 | // LSN returns the last LSN (Log Sequence Number) in the position. 71 | func (p Position) LSN() (pglogrepl.LSN, error) { 72 | if p.LastLSN == "" { 73 | return 0, nil 74 | } 75 | 76 | lsn, err := pglogrepl.ParseLSN(p.LastLSN) 77 | if err != nil { 78 | return 0, err 79 | } 80 | 81 | return lsn, nil 82 | } 83 | -------------------------------------------------------------------------------- /source/position/position_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package position 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/conduitio/conduit-commons/opencdc" 21 | "github.com/matryer/is" 22 | ) 23 | 24 | func Test_ToSDKPosition(t *testing.T) { 25 | is := is.New(t) 26 | 27 | p := Position{ 28 | Type: TypeSnapshot, 29 | Snapshots: SnapshotPositions{ 30 | "orders": {LastRead: 1, SnapshotEnd: 2}, 31 | }, 32 | LastLSN: "4/137515E8", 33 | } 34 | 35 | sdkPos := p.ToSDKPosition() 36 | is.Equal( 37 | string(sdkPos), 38 | `{"type":1,"snapshots":{"orders":{"last_read":1,"snapshot_end":2}},"last_lsn":"4/137515E8"}`, 39 | ) 40 | } 41 | 42 | func Test_PositionLSN(t *testing.T) { 43 | is := is.New(t) 44 | 45 | invalid := Position{LastLSN: "invalid"} 46 | _, err := invalid.LSN() 47 | is.True(err != nil) 48 | is.Equal(err.Error(), "failed to parse LSN: expected integer") 49 | 50 | valid := Position{LastLSN: "4/137515E8"} 51 | lsn, noErr := valid.LSN() 52 | is.NoErr(noErr) 53 | is.Equal(uint64(lsn), uint64(17506309608)) 54 | } 55 | 56 | func Test_ParseSDKPosition(t *testing.T) { 57 | is := is.New(t) 58 | 59 | valid := opencdc.Position( 60 | []byte( 61 | `{"type":1,"snapshots":{"orders":{"last_read":1,"snapshot_end":2}},"last_lsn":"4/137515E8"}`, 62 | ), 63 | ) 64 | 65 | p, validErr := ParseSDKPosition(valid) 66 | is.NoErr(validErr) 67 | 68 | is.Equal(p, Position{ 69 | Type: TypeSnapshot, 70 | Snapshots: SnapshotPositions{ 71 | "orders": {LastRead: 1, SnapshotEnd: 2}, 72 | }, 73 | LastLSN: "4/137515E8", 74 | }) 75 | 76 | _, invalidErr := ParseSDKPosition(opencdc.Position("{")) 77 | is.True(invalidErr != nil) 78 | is.Equal(invalidErr.Error(), "invalid position: unexpected end of JSON input") 79 | } 80 | -------------------------------------------------------------------------------- /source/position/type_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=Type -trimprefix Type"; DO NOT EDIT. 2 | 3 | package position 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[TypeInitial-0] 12 | _ = x[TypeSnapshot-1] 13 | _ = x[TypeCDC-2] 14 | } 15 | 16 | const _Type_name = "InitialSnapshotCDC" 17 | 18 | var _Type_index = [...]uint8{0, 7, 15, 18} 19 | 20 | func (i Type) String() string { 21 | if i < 0 || i >= Type(len(_Type_index)-1) { 22 | return "Type(" + strconv.FormatInt(int64(i), 10) + ")" 23 | } 24 | return _Type_name[_Type_index[i]:_Type_index[i+1]] 25 | } 26 | -------------------------------------------------------------------------------- /source/schema/avro.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package schema 16 | 17 | import ( 18 | "cmp" 19 | "fmt" 20 | "slices" 21 | 22 | "github.com/hamba/avro/v2" 23 | "github.com/jackc/pglogrepl" 24 | "github.com/jackc/pgx/v5/pgconn" 25 | "github.com/jackc/pgx/v5/pgtype" 26 | ) 27 | 28 | var Avro = &avroExtractor{ 29 | pgMap: pgtype.NewMap(), 30 | avroMap: map[string]*avro.PrimitiveSchema{ 31 | "bool": avro.NewPrimitiveSchema(avro.Boolean, nil), 32 | "bytea": avro.NewPrimitiveSchema(avro.Bytes, nil), 33 | "float4": avro.NewPrimitiveSchema(avro.Float, nil), 34 | "float8": avro.NewPrimitiveSchema(avro.Double, nil), 35 | "int8": avro.NewPrimitiveSchema(avro.Long, nil), 36 | "int4": avro.NewPrimitiveSchema(avro.Int, nil), 37 | "int2": avro.NewPrimitiveSchema(avro.Int, nil), 38 | "text": avro.NewPrimitiveSchema(avro.String, nil), 39 | "varchar": avro.NewPrimitiveSchema(avro.String, nil), 40 | "jsonb": avro.NewPrimitiveSchema(avro.Bytes, nil), 41 | "json": avro.NewPrimitiveSchema(avro.Bytes, nil), 42 | "timestamptz": avro.NewPrimitiveSchema( 43 | avro.Long, 44 | avro.NewPrimitiveLogicalSchema(avro.TimestampMicros), 45 | ), 46 | "timestamp": avro.NewPrimitiveSchema( 47 | avro.Long, 48 | avro.NewPrimitiveLogicalSchema(avro.LocalTimestampMicros), 49 | ), 50 | "date": avro.NewPrimitiveSchema( 51 | avro.Int, 52 | avro.NewPrimitiveLogicalSchema(avro.Date), 53 | ), 54 | "uuid": avro.NewPrimitiveSchema( 55 | avro.String, 56 | avro.NewPrimitiveLogicalSchema(avro.UUID), 57 | ), 58 | }, 59 | } 60 | 61 | type avroExtractor struct { 62 | pgMap *pgtype.Map 63 | avroMap map[string]*avro.PrimitiveSchema 64 | } 65 | 66 | // ExtractLogrepl extracts an Avro schema from the given pglogrepl.RelationMessage. 67 | // If `fieldNames` are specified, then only the given fields will be included in the schema. 68 | func (a avroExtractor) ExtractLogrepl(schemaName string, rel *pglogrepl.RelationMessage, fieldNames ...string) (*avro.RecordSchema, error) { 69 | var fields []pgconn.FieldDescription 70 | 71 | for i := range rel.Columns { 72 | fields = append(fields, pgconn.FieldDescription{ 73 | Name: rel.Columns[i].Name, 74 | DataTypeOID: rel.Columns[i].DataType, 75 | TypeModifier: rel.Columns[i].TypeModifier, 76 | }) 77 | } 78 | 79 | return a.Extract(schemaName, fields, fieldNames...) 80 | } 81 | 82 | // Extract extracts an Avro schema from the given Postgres field descriptions. 83 | // If `fieldNames` are specified, then only the given fields will be included in the schema. 84 | func (a *avroExtractor) Extract(schemaName string, fields []pgconn.FieldDescription, fieldNames ...string) (*avro.RecordSchema, error) { 85 | var avroFields []*avro.Field 86 | 87 | for _, f := range fields { 88 | if len(fieldNames) > 0 && !slices.Contains(fieldNames, f.Name) { 89 | continue 90 | } 91 | 92 | t, ok := a.pgMap.TypeForOID(f.DataTypeOID) 93 | if !ok { 94 | return nil, fmt.Errorf("field %q with OID %d cannot be resolved", f.Name, f.DataTypeOID) 95 | } 96 | 97 | s, err := a.extractType(t, f.TypeModifier) 98 | if err != nil { 99 | return nil, err 100 | } 101 | 102 | af, err := avro.NewField(f.Name, s) 103 | if err != nil { 104 | return nil, fmt.Errorf("failed to create avro field %q: %w", f.Name, err) 105 | } 106 | 107 | avroFields = append(avroFields, af) 108 | } 109 | 110 | slices.SortFunc(avroFields, func(a, b *avro.Field) int { 111 | return cmp.Compare(a.Name(), b.Name()) 112 | }) 113 | 114 | sch, err := avro.NewRecordSchema(schemaName, "", avroFields) 115 | if err != nil { 116 | return nil, fmt.Errorf("failed to create avro schema: %w", err) 117 | } 118 | 119 | return sch, nil 120 | } 121 | 122 | func (a *avroExtractor) extractType(t *pgtype.Type, typeMod int32) (avro.Schema, error) { 123 | if ps, ok := a.avroMap[t.Name]; ok { 124 | return ps, nil 125 | } 126 | 127 | switch t.OID { 128 | case pgtype.NumericOID: 129 | scale := int((typeMod - 4) & 65535) 130 | precision := int(((typeMod - 4) >> 16) & 65535) 131 | return avro.NewPrimitiveSchema( 132 | avro.Bytes, 133 | avro.NewDecimalLogicalSchema(precision, scale), 134 | ), nil 135 | default: 136 | return nil, fmt.Errorf("cannot resolve field type %q ", t.Name) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /source/schema/avro_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package schema 16 | 17 | import ( 18 | "cmp" 19 | "context" 20 | "fmt" 21 | "math/big" 22 | "slices" 23 | "testing" 24 | "time" 25 | 26 | "github.com/conduitio/conduit-connector-postgres/source/types" 27 | "github.com/conduitio/conduit-connector-postgres/test" 28 | "github.com/hamba/avro/v2" 29 | "github.com/jackc/pgx/v5/pgconn" 30 | "github.com/jackc/pgx/v5/pgtype" 31 | "github.com/matryer/is" 32 | ) 33 | 34 | func Test_AvroExtract(t *testing.T) { 35 | ctx := test.Context(t) 36 | is := is.New(t) 37 | 38 | c := test.ConnectSimple(ctx, t, test.RegularConnString) 39 | table := setupAvroTestTable(ctx, t, c) 40 | insertAvroTestRow(ctx, t, c, table) 41 | 42 | rows, err := c.Query(ctx, "SELECT * FROM "+table) 43 | is.NoErr(err) 44 | defer rows.Close() 45 | 46 | rows.Next() 47 | 48 | values, err := rows.Values() 49 | is.NoErr(err) 50 | 51 | fields := rows.FieldDescriptions() 52 | 53 | sch, err := Avro.Extract(table, fields) 54 | is.NoErr(err) 55 | 56 | t.Run("schema is parsable", func(t *testing.T) { 57 | is := is.New(t) 58 | is.NoErr(err) 59 | is.Equal(sch, avroTestSchema(t, table)) 60 | 61 | _, err = avro.Parse(sch.String()) 62 | is.NoErr(err) 63 | }) 64 | 65 | t.Run("serde row", func(t *testing.T) { 66 | is := is.New(t) 67 | 68 | row := avrolizeMap(fields, values) 69 | 70 | sch, err := avro.Parse(sch.String()) 71 | is.NoErr(err) 72 | 73 | data, err := avro.Marshal(sch, row) 74 | is.NoErr(err) 75 | is.True(len(data) > 0) 76 | 77 | decoded := make(map[string]any) 78 | is.NoErr(avro.Unmarshal(sch, data, &decoded)) 79 | 80 | is.Equal(len(decoded), len(row)) 81 | is.Equal(row["col_boolean"], decoded["col_boolean"]) 82 | is.Equal(row["col_bytea"], decoded["col_bytea"]) 83 | is.Equal(row["col_varchar"], decoded["col_varchar"]) 84 | is.Equal(row["col_date"], decoded["col_date"]) 85 | is.Equal(row["col_float4"], decoded["col_float4"]) 86 | is.Equal(row["col_float8"], decoded["col_float8"]) 87 | 88 | colInt2 := int(row["col_int2"].(int16)) 89 | is.Equal(colInt2, decoded["col_int2"]) 90 | 91 | colInt4 := int(row["col_int4"].(int32)) 92 | is.Equal(colInt4, decoded["col_int4"]) 93 | 94 | is.Equal(row["col_int8"], decoded["col_int8"]) 95 | 96 | numRow := row["col_numeric"].(*big.Rat) 97 | numDecoded := decoded["col_numeric"].(*big.Rat) 98 | is.Equal(numRow.RatString(), numDecoded.RatString()) 99 | 100 | is.Equal(row["col_text"], decoded["col_text"]) 101 | 102 | rowTS, colTS := row["col_timestamp"].(time.Time), decoded["col_timestamp"].(time.Time) 103 | is.Equal(rowTS.UTC().String(), colTS.UTC().String()) 104 | 105 | rowTSTZ, colTSTZ := row["col_timestamptz"].(time.Time), decoded["col_timestamptz"].(time.Time) 106 | is.Equal(rowTSTZ.UTC().String(), colTSTZ.UTC().String()) 107 | 108 | is.Equal(row["col_uuid"], decoded["col_uuid"]) 109 | }) 110 | } 111 | 112 | func setupAvroTestTable(ctx context.Context, t *testing.T, conn test.Querier) string { 113 | is := is.New(t) 114 | table := test.RandomIdentifier(t) 115 | 116 | query := ` 117 | CREATE TABLE %s ( 118 | col_boolean boolean, 119 | col_bytea bytea, 120 | col_varchar varchar(10), 121 | col_date date, 122 | col_float4 float4, 123 | col_float8 float8, 124 | col_int2 int2, 125 | col_int4 int4, 126 | col_int8 int8, 127 | col_numeric numeric(8,2), 128 | col_text text, 129 | col_timestamp timestamp, 130 | col_timestamptz timestamptz, 131 | col_uuid uuid 132 | )` 133 | query = fmt.Sprintf(query, table) 134 | _, err := conn.Exec(ctx, query) 135 | is.NoErr(err) 136 | 137 | return table 138 | } 139 | 140 | func insertAvroTestRow(ctx context.Context, t *testing.T, conn test.Querier, table string) { 141 | is := is.New(t) 142 | query := ` 143 | INSERT INTO %s ( 144 | col_boolean, 145 | col_bytea, 146 | col_varchar, 147 | col_date, 148 | col_float4, 149 | col_float8, 150 | col_int2, 151 | col_int4, 152 | col_int8, 153 | col_numeric, 154 | col_text, 155 | col_timestamp, 156 | col_timestamptz, 157 | col_uuid 158 | ) VALUES ( 159 | true, -- col_boolean 160 | '\x07', -- col_bytea 161 | '9', -- col_varchar 162 | '2022-03-14', -- col_date 163 | 15, -- col_float4 164 | 16.16, -- col_float8 165 | 32767, -- col_int2 166 | 2147483647, -- col_int4 167 | 9223372036854775807, -- col_int8 168 | '292929.29', -- col_numeric 169 | 'foo bar baz', -- col_text 170 | '2022-03-14 15:16:17', -- col_timestamp 171 | '2022-03-14 15:16:17-08', -- col_timestamptz 172 | 'bd94ee0b-564f-4088-bf4e-8d5e626caf66' -- col_uuid 173 | )` 174 | query = fmt.Sprintf(query, table) 175 | _, err := conn.Exec(ctx, query) 176 | is.NoErr(err) 177 | } 178 | 179 | func avroTestSchema(t *testing.T, table string) avro.Schema { 180 | is := is.New(t) 181 | 182 | fields := []*avro.Field{ 183 | assert(avro.NewField("col_boolean", avro.NewPrimitiveSchema(avro.Boolean, nil))), 184 | assert(avro.NewField("col_bytea", avro.NewPrimitiveSchema(avro.Bytes, nil))), 185 | assert(avro.NewField("col_varchar", avro.NewPrimitiveSchema(avro.String, nil))), 186 | assert(avro.NewField("col_float4", avro.NewPrimitiveSchema(avro.Float, nil))), 187 | assert(avro.NewField("col_float8", avro.NewPrimitiveSchema(avro.Double, nil))), 188 | assert(avro.NewField("col_int2", avro.NewPrimitiveSchema(avro.Int, nil))), 189 | assert(avro.NewField("col_int4", avro.NewPrimitiveSchema(avro.Int, nil))), 190 | assert(avro.NewField("col_int8", avro.NewPrimitiveSchema(avro.Long, nil))), 191 | assert(avro.NewField("col_text", avro.NewPrimitiveSchema(avro.String, nil))), 192 | assert(avro.NewField("col_numeric", avro.NewPrimitiveSchema( 193 | avro.Bytes, 194 | avro.NewDecimalLogicalSchema(8, 2), 195 | ))), 196 | assert(avro.NewField("col_date", avro.NewPrimitiveSchema( 197 | avro.Int, 198 | avro.NewPrimitiveLogicalSchema(avro.Date), 199 | ))), 200 | assert(avro.NewField("col_timestamp", avro.NewPrimitiveSchema( 201 | avro.Long, 202 | avro.NewPrimitiveLogicalSchema(avro.LocalTimestampMicros), 203 | ))), 204 | assert(avro.NewField("col_timestamptz", avro.NewPrimitiveSchema( 205 | avro.Long, 206 | avro.NewPrimitiveLogicalSchema(avro.TimestampMicros), 207 | ))), 208 | assert(avro.NewField("col_uuid", avro.NewPrimitiveSchema( 209 | avro.String, 210 | avro.NewPrimitiveLogicalSchema(avro.UUID), 211 | ))), 212 | } 213 | 214 | slices.SortFunc(fields, func(a, b *avro.Field) int { 215 | return cmp.Compare(a.Name(), b.Name()) 216 | }) 217 | 218 | s, err := avro.NewRecordSchema(table, "", fields) 219 | is.NoErr(err) 220 | 221 | return s 222 | } 223 | 224 | func avrolizeMap(fields []pgconn.FieldDescription, values []any) map[string]any { 225 | row := make(map[string]any) 226 | 227 | for i, f := range fields { 228 | switch f.DataTypeOID { 229 | case pgtype.NumericOID: 230 | n := new(big.Rat) 231 | n.SetString(fmt.Sprint(types.Format(0, values[i]))) 232 | row[f.Name] = n 233 | case pgtype.UUIDOID: 234 | row[f.Name] = fmt.Sprint(values[i]) 235 | default: 236 | row[f.Name] = values[i] 237 | } 238 | } 239 | 240 | return row 241 | } 242 | 243 | func assert[T any](a T, err error) T { 244 | if err != nil { 245 | panic(err) 246 | } 247 | 248 | return a 249 | } 250 | -------------------------------------------------------------------------------- /source/snapshot/convert.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package snapshot 16 | 17 | import ( 18 | "fmt" 19 | ) 20 | 21 | func keyInt64(id any) (int64, error) { 22 | switch t := id.(type) { 23 | case int: 24 | return int64(t), nil 25 | case int8: 26 | return int64(t), nil 27 | case int16: 28 | return int64(t), nil 29 | case int32: 30 | return int64(t), nil 31 | case int64: 32 | return t, nil 33 | default: 34 | return 0, fmt.Errorf("invalid type for key %T", id) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /source/snapshot/iterator.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package snapshot 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | 22 | "github.com/conduitio/conduit-commons/csync" 23 | "github.com/conduitio/conduit-commons/opencdc" 24 | cschema "github.com/conduitio/conduit-commons/schema" 25 | "github.com/conduitio/conduit-connector-postgres/source/position" 26 | sdk "github.com/conduitio/conduit-connector-sdk" 27 | "github.com/jackc/pgx/v5/pgxpool" 28 | "gopkg.in/tomb.v2" 29 | ) 30 | 31 | var ErrIteratorDone = errors.New("snapshot complete") 32 | 33 | type Config struct { 34 | Position opencdc.Position 35 | Tables []string 36 | TableKeys map[string]string 37 | TXSnapshotID string 38 | FetchSize int 39 | WithAvroSchema bool 40 | } 41 | 42 | type Iterator struct { 43 | db *pgxpool.Pool 44 | 45 | workersTomb *tomb.Tomb 46 | workers []*FetchWorker 47 | acks csync.WaitGroup 48 | 49 | conf Config 50 | 51 | lastPosition position.Position 52 | 53 | data chan []FetchData 54 | } 55 | 56 | func NewIterator(ctx context.Context, db *pgxpool.Pool, c Config) (*Iterator, error) { 57 | p, err := position.ParseSDKPosition(c.Position) 58 | if err != nil { 59 | return nil, fmt.Errorf("failed to parse position: %w", err) 60 | } 61 | 62 | if p.Snapshots == nil { 63 | p.Snapshots = make(position.SnapshotPositions) 64 | } 65 | 66 | t, _ := tomb.WithContext(ctx) 67 | i := &Iterator{ 68 | db: db, 69 | workersTomb: t, 70 | conf: c, 71 | data: make(chan []FetchData), 72 | lastPosition: p, 73 | } 74 | 75 | if err := i.initFetchers(ctx); err != nil { 76 | return nil, fmt.Errorf("failed to initialize table fetchers: %w", err) 77 | } 78 | 79 | i.startWorkers() 80 | 81 | return i, nil 82 | } 83 | 84 | // NextN takes and returns up to n records from the queue. NextN is allowed to 85 | // block until either at least one record is available or the context gets canceled. 86 | func (i *Iterator) NextN(ctx context.Context, n int) ([]opencdc.Record, error) { 87 | if n <= 0 { 88 | return nil, fmt.Errorf("n must be greater than 0, got %d", n) 89 | } 90 | 91 | var records []opencdc.Record 92 | 93 | // Get first record (blocking) 94 | select { 95 | case <-ctx.Done(): 96 | return nil, fmt.Errorf("iterator stopped: %w", ctx.Err()) 97 | case batch, ok := <-i.data: 98 | if !ok { // closed 99 | if err := i.workersTomb.Err(); err != nil { 100 | return nil, fmt.Errorf("fetchers exited unexpectedly: %w", err) 101 | } 102 | if err := i.acks.Wait(ctx); err != nil { 103 | return nil, fmt.Errorf("failed to wait for acks: %w", err) 104 | } 105 | return nil, ErrIteratorDone 106 | } 107 | 108 | for _, d := range batch { 109 | i.acks.Add(1) 110 | records = append(records, i.buildRecord(d)) 111 | } 112 | } 113 | 114 | // Try to get remaining records non-blocking 115 | for len(records) < n { 116 | select { 117 | case <-ctx.Done(): 118 | return records, ctx.Err() 119 | case batch, ok := <-i.data: 120 | if !ok { // closed 121 | return records, nil 122 | } 123 | for _, d := range batch { 124 | i.acks.Add(1) 125 | records = append(records, i.buildRecord(d)) 126 | } 127 | default: 128 | // No more records currently available 129 | return records, nil 130 | } 131 | } 132 | 133 | return records, nil 134 | } 135 | 136 | func (i *Iterator) Ack(_ context.Context, _ opencdc.Position) error { 137 | i.acks.Done() 138 | return nil 139 | } 140 | 141 | func (i *Iterator) Teardown(_ context.Context) error { 142 | if i.workersTomb != nil { 143 | i.workersTomb.Kill(errors.New("tearing down snapshot iterator")) 144 | } 145 | 146 | return nil 147 | } 148 | 149 | func (i *Iterator) buildRecord(d FetchData) opencdc.Record { 150 | // merge this position with latest position 151 | i.lastPosition.Type = position.TypeSnapshot 152 | i.lastPosition.Snapshots[d.Table] = d.Position 153 | 154 | pos := i.lastPosition.ToSDKPosition() 155 | metadata := make(opencdc.Metadata) 156 | metadata[opencdc.MetadataCollection] = d.Table 157 | 158 | rec := sdk.Util.Source.NewRecordSnapshot(pos, metadata, d.Key, d.Payload) 159 | if i.conf.WithAvroSchema { 160 | cschema.AttachKeySchemaToRecord(rec, d.KeySchema) 161 | cschema.AttachPayloadSchemaToRecord(rec, d.PayloadSchema) 162 | } 163 | 164 | return rec 165 | } 166 | 167 | func (i *Iterator) initFetchers(ctx context.Context) error { 168 | var errs []error 169 | 170 | i.workers = make([]*FetchWorker, len(i.conf.Tables)) 171 | 172 | for j, t := range i.conf.Tables { 173 | w := NewFetchWorker(i.db, i.data, FetchConfig{ 174 | Table: t, 175 | Key: i.conf.TableKeys[t], 176 | TXSnapshotID: i.conf.TXSnapshotID, 177 | Position: i.lastPosition, 178 | FetchSize: i.conf.FetchSize, 179 | WithAvroSchema: i.conf.WithAvroSchema, 180 | }) 181 | 182 | if err := w.Validate(ctx); err != nil { 183 | errs = append(errs, fmt.Errorf("failed to validate table fetcher %q config: %w", t, err)) 184 | } 185 | 186 | i.workers[j] = w 187 | } 188 | 189 | return errors.Join(errs...) 190 | } 191 | 192 | func (i *Iterator) startWorkers() { 193 | for _, worker := range i.workers { 194 | i.workersTomb.Go(func() error { 195 | ctx := i.workersTomb.Context(nil) //nolint:staticcheck // This is the correct usage of tomb.Context 196 | if err := worker.Run(ctx); err != nil { 197 | return fmt.Errorf("fetcher for table %q exited: %w", worker.conf.Table, err) 198 | } 199 | return nil 200 | }) 201 | } 202 | go func() { 203 | <-i.workersTomb.Dead() 204 | close(i.data) 205 | }() 206 | } 207 | -------------------------------------------------------------------------------- /source/snapshot/iterator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package snapshot 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "testing" 21 | "time" 22 | 23 | "github.com/conduitio/conduit-commons/opencdc" 24 | "github.com/conduitio/conduit-connector-postgres/source/position" 25 | "github.com/conduitio/conduit-connector-postgres/test" 26 | "github.com/matryer/is" 27 | ) 28 | 29 | func Test_Iterator_NextN(t *testing.T) { 30 | var ( 31 | ctx = test.Context(t) 32 | pool = test.ConnectPool(ctx, t, test.RegularConnString) 33 | table = test.SetupTestTable(ctx, t, pool) 34 | ) 35 | 36 | t.Run("success", func(t *testing.T) { 37 | is := is.New(t) 38 | 39 | i, err := NewIterator(ctx, pool, Config{ 40 | Position: position.Position{}.ToSDKPosition(), 41 | Tables: []string{table}, 42 | TableKeys: map[string]string{ 43 | table: "id", 44 | }, 45 | FetchSize: 2, 46 | }) 47 | is.NoErr(err) 48 | defer func() { 49 | is.NoErr(i.Teardown(ctx)) 50 | }() 51 | 52 | // Get first 2 records 53 | records, err := i.NextN(ctx, 2) 54 | is.NoErr(err) 55 | is.Equal(len(records), 2) 56 | for _, r := range records { 57 | is.Equal(r.Operation, opencdc.OperationSnapshot) 58 | is.Equal(r.Metadata[opencdc.MetadataCollection], table) 59 | } 60 | 61 | // Get the remaining 2 records 62 | records, err = i.NextN(ctx, 2) 63 | is.NoErr(err) 64 | is.Equal(len(records), 2) 65 | for _, r := range records { 66 | is.Equal(r.Operation, opencdc.OperationSnapshot) 67 | is.Equal(r.Metadata[opencdc.MetadataCollection], table) 68 | } 69 | 70 | // Ack all records 71 | for j := 1; j <= 4; j++ { 72 | err = i.Ack(ctx, nil) 73 | is.NoErr(err) 74 | } 75 | 76 | // Should return ErrIteratorDone 77 | _, err = i.NextN(ctx, 1) 78 | is.Equal(err, ErrIteratorDone) 79 | }) 80 | 81 | t.Run("next waits for acks", func(t *testing.T) { 82 | is := is.New(t) 83 | 84 | i, err := NewIterator(ctx, pool, Config{ 85 | Position: position.Position{}.ToSDKPosition(), 86 | Tables: []string{table}, 87 | TableKeys: map[string]string{ 88 | table: "id", 89 | }, 90 | }) 91 | is.NoErr(err) 92 | defer func() { 93 | is.NoErr(i.Teardown(ctx)) 94 | }() 95 | 96 | // Get all 4 records in multiple calls since NextN is non-blocking 97 | var allRecords []opencdc.Record 98 | for len(allRecords) < 4 { 99 | records, err := i.NextN(ctx, 4) 100 | is.NoErr(err) 101 | allRecords = append(allRecords, records...) 102 | } 103 | is.Equal(len(allRecords), 4) 104 | 105 | // Only ack 3 records 106 | for j := 1; j <= 3; j++ { 107 | err = i.Ack(ctx, nil) 108 | is.NoErr(err) 109 | } 110 | 111 | ctxTimeout, cancel := context.WithTimeout(ctx, time.Millisecond*10) 112 | defer cancel() 113 | 114 | // No more records, but NextN blocks because we haven't acked all records 115 | _, err = i.NextN(ctxTimeout, 1) 116 | is.True(errors.Is(err, context.DeadlineExceeded)) 117 | 118 | // Ack the last record 119 | err = i.Ack(ctx, nil) 120 | is.NoErr(err) 121 | 122 | // Now NextN won't block 123 | _, err = i.NextN(ctx, 1) 124 | is.Equal(err, ErrIteratorDone) 125 | }) 126 | 127 | t.Run("context cancelled", func(t *testing.T) { 128 | is := is.New(t) 129 | 130 | i, err := NewIterator(ctx, pool, Config{ 131 | Position: position.Position{}.ToSDKPosition(), 132 | Tables: []string{table}, 133 | TableKeys: map[string]string{ 134 | table: "id", 135 | }, 136 | }) 137 | is.NoErr(err) 138 | defer func() { 139 | is.NoErr(i.Teardown(ctx)) 140 | }() 141 | 142 | cancelCtx, cancel := context.WithCancel(ctx) 143 | cancel() 144 | 145 | _, err = i.NextN(cancelCtx, 1) 146 | is.Equal(err.Error(), "iterator stopped: context canceled") 147 | }) 148 | 149 | t.Run("tomb exited", func(t *testing.T) { 150 | is := is.New(t) 151 | cancelCtx, cancel := context.WithCancel(ctx) 152 | 153 | i, err := NewIterator(cancelCtx, pool, Config{ 154 | Position: position.Position{}.ToSDKPosition(), 155 | Tables: []string{table}, 156 | TableKeys: map[string]string{ 157 | table: "id", 158 | }, 159 | }) 160 | is.NoErr(err) 161 | defer func() { 162 | is.NoErr(i.Teardown(ctx)) 163 | }() 164 | 165 | cancel() 166 | 167 | _, err = i.NextN(ctx, 1) 168 | is.True(errors.Is(err, context.Canceled)) 169 | }) 170 | 171 | t.Run("invalid n", func(t *testing.T) { 172 | is := is.New(t) 173 | 174 | i, err := NewIterator(ctx, pool, Config{ 175 | Position: position.Position{}.ToSDKPosition(), 176 | Tables: []string{table}, 177 | TableKeys: map[string]string{ 178 | table: "id", 179 | }, 180 | }) 181 | is.NoErr(err) 182 | defer func() { 183 | is.NoErr(i.Teardown(ctx)) 184 | }() 185 | 186 | _, err = i.NextN(ctx, 0) 187 | is.True(err != nil) 188 | is.Equal(err.Error(), "n must be greater than 0, got 0") 189 | 190 | _, err = i.NextN(ctx, -1) 191 | is.True(err != nil) 192 | is.Equal(err.Error(), "n must be greater than 0, got -1") 193 | }) 194 | } 195 | -------------------------------------------------------------------------------- /source/types/numeric.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package types 16 | 17 | import ( 18 | "math/big" 19 | 20 | "github.com/jackc/pgx/v5/pgtype" 21 | ) 22 | 23 | type NumericFormatter struct{} 24 | 25 | // BigRatFromNumeric converts a pgtype.Numeric to a big.Rat. 26 | func (NumericFormatter) BigRatFromNumeric(num pgtype.Numeric) (*big.Rat, error) { 27 | if num.Int == nil { 28 | return nil, nil 29 | } 30 | v := new(big.Rat) 31 | driverVal, err := num.Value() 32 | if err != nil { 33 | return nil, err 34 | } 35 | v.SetString(driverVal.(string)) 36 | return v, nil 37 | } 38 | -------------------------------------------------------------------------------- /source/types/types.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package types 16 | 17 | import ( 18 | "github.com/jackc/pgx/v5/pgtype" 19 | ) 20 | 21 | var ( 22 | Numeric = NumericFormatter{} 23 | UUID = UUIDFormatter{} 24 | ) 25 | 26 | func Format(oid uint32, v any) (any, error) { 27 | if oid == pgtype.UUIDOID { 28 | return UUID.Format(v) 29 | } 30 | 31 | switch t := v.(type) { 32 | case pgtype.Numeric: 33 | return Numeric.BigRatFromNumeric(t) 34 | case *pgtype.Numeric: 35 | return Numeric.BigRatFromNumeric(*t) 36 | case []uint8: 37 | if oid == pgtype.XMLOID { 38 | return string(t), nil 39 | } 40 | return t, nil 41 | default: 42 | return t, nil 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /source/types/types_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2024 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package types 16 | 17 | import ( 18 | "math/big" 19 | "testing" 20 | "time" 21 | 22 | "github.com/jackc/pgx/v5/pgtype" 23 | "github.com/matryer/is" 24 | ) 25 | 26 | func Test_Format(t *testing.T) { 27 | now := time.Now().UTC() 28 | 29 | tests := []struct { 30 | name string 31 | input []any 32 | inputOID []uint32 33 | expect []any 34 | withBuiltin bool 35 | }{ 36 | { 37 | name: "int float string bool", 38 | input: []any{ 39 | 1021, 199.2, "foo", true, 40 | }, 41 | inputOID: []uint32{ 42 | 0, 0, 0, 0, 43 | }, 44 | expect: []any{ 45 | 1021, 199.2, "foo", true, 46 | }, 47 | }, 48 | { 49 | name: "pgtype.Numeric", 50 | input: []any{ 51 | pgxNumeric(t, "12.2121"), pgxNumeric(t, "101"), pgxNumeric(t, "0"), &pgtype.Numeric{}, nil, 52 | }, 53 | inputOID: []uint32{ 54 | 0, 0, 0, 0, 0, 55 | }, 56 | expect: []any{ 57 | big.NewRat(122121, 10000), big.NewRat(101, 1), big.NewRat(0, 1), nil, nil, 58 | }, 59 | }, 60 | { 61 | name: "builtin time.Time", 62 | input: []any{ 63 | now, 64 | }, 65 | inputOID: []uint32{ 66 | 0, 67 | }, 68 | expect: []any{ 69 | now, 70 | }, 71 | withBuiltin: true, 72 | }, 73 | { 74 | name: "uuid", 75 | input: []any{ 76 | [16]uint8{0xbd, 0x94, 0xee, 0x0b, 0x56, 0x4f, 0x40, 0x88, 0xbf, 0x4e, 0x8d, 0x5e, 0x62, 0x6c, 0xaf, 0x66}, nil, 77 | }, 78 | inputOID: []uint32{ 79 | pgtype.UUIDOID, pgtype.UUIDOID, 80 | }, 81 | expect: []any{ 82 | "bd94ee0b-564f-4088-bf4e-8d5e626caf66", "", 83 | }, 84 | }, 85 | } 86 | _ = time.Now() 87 | 88 | for _, tc := range tests { 89 | t.Run(tc.name, func(t *testing.T) { 90 | is := is.New(t) 91 | 92 | for i, in := range tc.input { 93 | v, err := Format(tc.inputOID[i], in) 94 | is.NoErr(err) 95 | is.Equal(v, tc.expect[i]) 96 | } 97 | }) 98 | } 99 | } 100 | 101 | // as per https://github.com/jackc/pgx/blob/master/pgtype/numeric_test.go#L66 102 | func pgxNumeric(t *testing.T, num string) pgtype.Numeric { 103 | is := is.New(t) 104 | is.Helper() 105 | 106 | var n pgtype.Numeric 107 | plan := pgtype.NumericCodec{}.PlanScan(nil, pgtype.NumericOID, pgtype.TextFormatCode, &n) 108 | is.True(plan != nil) 109 | is.NoErr(plan.Scan([]byte(num), &n)) 110 | 111 | return n 112 | } 113 | -------------------------------------------------------------------------------- /source/types/uuid.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package types 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/jackc/pgx/v5/pgtype" 21 | ) 22 | 23 | type UUIDFormatter struct{} 24 | 25 | // Format takes a slice of bytes and returns a UUID in string format 26 | // Returns error when byte array cannot be parsed. 27 | func (UUIDFormatter) Format(v any) (string, error) { 28 | if v == nil { 29 | return "", nil 30 | } 31 | 32 | b, ok := v.([16]byte) 33 | if !ok { 34 | return "", fmt.Errorf("failed to parse uuid byte array %v", v) 35 | } 36 | 37 | uuid := pgtype.UUID{Bytes: b, Valid: true} 38 | 39 | uv, err := uuid.Value() 40 | if err != nil { 41 | return "", fmt.Errorf("failed to format uuid: %w", err) 42 | } 43 | 44 | return uv.(string), nil 45 | } 46 | -------------------------------------------------------------------------------- /source_integration_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package postgres 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "strings" 21 | "testing" 22 | 23 | "github.com/conduitio/conduit-commons/config" 24 | "github.com/conduitio/conduit-connector-postgres/source" 25 | "github.com/conduitio/conduit-connector-postgres/source/logrepl" 26 | "github.com/conduitio/conduit-connector-postgres/test" 27 | sdk "github.com/conduitio/conduit-connector-sdk" 28 | "github.com/matryer/is" 29 | ) 30 | 31 | func TestSource_Open(t *testing.T) { 32 | is := is.New(t) 33 | ctx := test.Context(t) 34 | conn := test.ConnectSimple(ctx, t, test.RepmgrConnString) 35 | 36 | // Be sure primary key discovering works correctly on 37 | // table names with capital letters 38 | tableName := strings.ToUpper(test.RandomIdentifier(t)) 39 | test.SetupTestTableWithName(ctx, t, conn, tableName) 40 | 41 | slotName := "conduitslot1" 42 | publicationName := "conduitpub1" 43 | 44 | s := NewSource() 45 | err := sdk.Util.ParseConfig( 46 | ctx, 47 | map[string]string{ 48 | "url": test.RepmgrConnString, 49 | "tables": tableName, 50 | "snapshotMode": "initial", 51 | "cdcMode": "logrepl", 52 | "logrepl.slotName": slotName, 53 | "logrepl.publicationName": publicationName, 54 | }, 55 | s.Config(), 56 | Connector.NewSpecification().SourceParams, 57 | ) 58 | is.NoErr(err) 59 | 60 | err = s.Open(ctx, nil) 61 | is.NoErr(err) 62 | 63 | defer func() { 64 | is.NoErr(logrepl.Cleanup(context.Background(), logrepl.CleanupConfig{ 65 | URL: test.RepmgrConnString, 66 | SlotName: slotName, 67 | PublicationName: publicationName, 68 | })) 69 | is.NoErr(s.Teardown(ctx)) 70 | }() 71 | } 72 | 73 | func TestSource_ParseConfig(t *testing.T) { 74 | testCases := []struct { 75 | name string 76 | cfg config.Config 77 | wantErr bool 78 | }{ 79 | { 80 | name: "valid postgres replication slot name", 81 | cfg: config.Config{ 82 | "url": "postgresql://meroxauser:meroxapass@127.0.0.1:5432/meroxadb", 83 | "tables": "table1,table2", 84 | "cdcMode": "logrepl", 85 | "logrepl.slotName": "valid_slot_name", 86 | }, 87 | wantErr: false, 88 | }, { 89 | name: "invalid postgres replication slot name", 90 | cfg: config.Config{ 91 | "url": "postgresql://meroxauser:meroxapass@127.0.0.1:5432/meroxadb", 92 | "tables": "table1,table2", 93 | "cdcMode": "logrepl", 94 | "logrepl.slotName": "invalid:slot.name", 95 | }, 96 | wantErr: true, 97 | }, 98 | } 99 | 100 | for _, tc := range testCases { 101 | t.Run(tc.name, func(t *testing.T) { 102 | is := is.New(t) 103 | 104 | var cfg source.Config 105 | err := sdk.Util.ParseConfig(context.Background(), tc.cfg, cfg, Connector.NewSpecification().SourceParams) 106 | 107 | if tc.wantErr { 108 | is.True(err != nil) 109 | return 110 | } 111 | is.NoErr(err) 112 | }) 113 | } 114 | } 115 | 116 | func TestSource_Read(t *testing.T) { 117 | ctx := test.Context(t) 118 | is := is.New(t) 119 | 120 | conn := test.ConnectSimple(ctx, t, test.RegularConnString) 121 | table := setupSourceTable(ctx, t, conn) 122 | insertSourceRow(ctx, t, conn, table) 123 | 124 | s := NewSource() 125 | err := sdk.Util.ParseConfig( 126 | ctx, 127 | map[string]string{ 128 | "url": test.RepmgrConnString, 129 | "tables": table, 130 | "snapshotMode": "initial", 131 | "cdcMode": "logrepl", 132 | }, 133 | s.Config(), 134 | Connector.NewSpecification().SourceParams, 135 | ) 136 | is.NoErr(err) 137 | 138 | err = s.Open(ctx, nil) 139 | is.NoErr(err) 140 | 141 | recs, err := s.ReadN(ctx, 1) 142 | is.NoErr(err) 143 | 144 | fmt.Println(recs) 145 | } 146 | 147 | // setupSourceTable creates a new table with all types and returns its name. 148 | func setupSourceTable(ctx context.Context, t *testing.T, conn test.Querier) string { 149 | is := is.New(t) 150 | table := test.RandomIdentifier(t) 151 | // todo still need to support: 152 | // bit, varbit, box, char(n), cidr, circle, inet, interval, line, lseg, 153 | // macaddr, macaddr8, money, path, pg_lsn, pg_snapshot, point, polygon, 154 | // time, timetz, tsquery, tsvector, xml 155 | query := ` 156 | CREATE TABLE %s ( 157 | id bigserial PRIMARY KEY, 158 | col_boolean boolean, 159 | col_bytea bytea, 160 | col_varchar varchar(10), 161 | col_date date, 162 | col_float4 float4, 163 | col_float8 float8, 164 | col_int2 int2, 165 | col_int4 int4, 166 | col_int8 int8, 167 | col_json json, 168 | col_jsonb jsonb, 169 | col_numeric numeric(8,2), 170 | col_serial2 serial2, 171 | col_serial4 serial4, 172 | col_serial8 serial8, 173 | col_text text, 174 | col_timestamp timestamp, 175 | col_timestamptz timestamptz, 176 | col_uuid uuid 177 | )` 178 | query = fmt.Sprintf(query, table) 179 | _, err := conn.Exec(ctx, query) 180 | is.NoErr(err) 181 | 182 | t.Cleanup(func() { 183 | query := `DROP TABLE %s` 184 | query = fmt.Sprintf(query, table) 185 | _, err := conn.Exec(context.Background(), query) 186 | is.NoErr(err) 187 | }) 188 | return table 189 | } 190 | 191 | func insertSourceRow(ctx context.Context, t *testing.T, conn test.Querier, table string) { 192 | is := is.New(t) 193 | query := ` 194 | INSERT INTO %s ( 195 | col_boolean, 196 | col_bytea, 197 | col_varchar, 198 | col_date, 199 | col_float4, 200 | col_float8, 201 | col_int2, 202 | col_int4, 203 | col_int8, 204 | col_json, 205 | col_jsonb, 206 | col_numeric, 207 | col_serial2, 208 | col_serial4, 209 | col_serial8, 210 | col_text, 211 | col_timestamp, 212 | col_timestamptz, 213 | col_uuid 214 | ) VALUES ( 215 | true, -- col_boolean 216 | '\x07', -- col_bytea 217 | '9', -- col_varchar 218 | '2022-03-14', -- col_date 219 | 15, -- col_float4 220 | 16.16, -- col_float8 221 | 32767, -- col_int2 222 | 2147483647, -- col_int4 223 | 9223372036854775807, -- col_int8 224 | '{"foo": "bar"}', -- col_json 225 | '{"foo": "baz"}', -- col_jsonb 226 | '292929.29', -- col_numeric 227 | 32767, -- col_serial2 228 | 2147483647, -- col_serial4 229 | 9223372036854775807, -- col_serial8 230 | 'foo bar baz', -- col_text 231 | '2022-03-14 15:16:17', -- col_timestamp 232 | '2022-03-14 15:16:17-08', -- col_timestamptz 233 | 'bd94ee0b-564f-4088-bf4e-8d5e626caf66' -- col_uuid 234 | )` 235 | query = fmt.Sprintf(query, table) 236 | _, err := conn.Exec(ctx, query) 237 | is.NoErr(err) 238 | } 239 | -------------------------------------------------------------------------------- /test/conf.d/postgresql.conf: -------------------------------------------------------------------------------- 1 | wal_level=logical 2 | max_wal_senders=5 3 | max_replication_slots=5 4 | log_statement='all' 5 | log_connections=true 6 | log_disconnections=true 7 | log_duration=true 8 | log_replication_commands=true 9 | -------------------------------------------------------------------------------- /test/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | pg-0: 3 | image: docker.io/bitnami/postgresql-repmgr:17.5.0 4 | ports: 5 | - "5433:5432" 6 | volumes: 7 | - "pg_0_data:/bitnami/postgresql" 8 | - "./conf.d/:/bitnami/postgresql/conf/conf.d/" 9 | healthcheck: 10 | test: [ "CMD", "pg_isready", "-q", "-d", "meroxadb", "-U", "meroxauser" ] 11 | timeout: 30s 12 | interval: 10s 13 | retries: 5 14 | environment: 15 | - POSTGRESQL_POSTGRES_PASSWORD=meroxaadmin 16 | - POSTGRESQL_USERNAME=meroxauser 17 | - POSTGRESQL_PASSWORD=meroxapass 18 | - POSTGRESQL_DATABASE=meroxadb 19 | - REPMGR_USERNAME=repmgr 20 | - REPMGR_PASSWORD=repmgrmeroxa 21 | - REPMGR_PRIMARY_HOST=pg-0 22 | - REPMGR_PRIMARY_PORT=5432 23 | - REPMGR_PARTNER_NODES=pg-0 24 | - REPMGR_NODE_NAME=pg-0 25 | - REPMGR_NODE_NETWORK_NAME=pg-0 26 | - REPMGR_PORT_NUMBER=5432 27 | volumes: 28 | pg_0_data: 29 | driver: local 30 | -------------------------------------------------------------------------------- /test/helper.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2022 Meroxa, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package test 16 | 17 | import ( 18 | "context" 19 | "errors" 20 | "fmt" 21 | "strconv" 22 | "strings" 23 | "testing" 24 | "time" 25 | 26 | "github.com/conduitio/conduit-commons/csync" 27 | "github.com/conduitio/conduit-connector-postgres/source/cpool" 28 | "github.com/jackc/pgx/v5" 29 | "github.com/jackc/pgx/v5/pgconn" 30 | "github.com/jackc/pgx/v5/pgxpool" 31 | "github.com/matryer/is" 32 | "github.com/rs/zerolog" 33 | ) 34 | 35 | // RepmgrConnString is a replication user connection string for the test postgres. 36 | const RepmgrConnString = "postgres://repmgr:repmgrmeroxa@127.0.0.1:5433/meroxadb?sslmode=disable" 37 | 38 | // RegularConnString is a non-replication user connection string for the test postgres. 39 | const RegularConnString = "postgres://meroxauser:meroxapass@127.0.0.1:5433/meroxadb?sslmode=disable" 40 | 41 | // TestTableAvroSchemaV1 is the Avro schema representation of the test table 42 | // defined through testTableCreateQuery. 43 | // The fields are sorted by name. 44 | const TestTableAvroSchemaV1 = `{ 45 | "type": "record", 46 | "name": "%s", 47 | "fields": 48 | [ 49 | {"name":"UppercaseColumn1","type":"int"}, 50 | {"name":"column1","type":"string"}, 51 | {"name":"column2","type":"int"}, 52 | {"name":"column3","type":"boolean"}, 53 | { 54 | "name": "column4", 55 | "type": 56 | { 57 | "type": "bytes", 58 | "logicalType": "decimal", 59 | "precision": 16, 60 | "scale": 3 61 | } 62 | }, 63 | { 64 | "name": "column5", 65 | "type": 66 | { 67 | "type": "bytes", 68 | "logicalType": "decimal", 69 | "precision": 5 70 | } 71 | }, 72 | {"name":"column6","type":"bytes"}, 73 | {"name":"column7","type":"bytes"}, 74 | {"name":"id","type":"long"}, 75 | {"name":"key","type":"bytes"} 76 | ] 77 | }` 78 | 79 | // TestTableAvroSchemaV2 is TestTableAvroSchemaV1 with `column6` (local-timestamp-micros) added. 80 | const TestTableAvroSchemaV2 = `{ 81 | "type": "record", 82 | "name": "%s", 83 | "fields": 84 | [ 85 | {"name":"UppercaseColumn1","type":"int"}, 86 | {"name":"column1","type":"string"}, 87 | {"name":"column101","type":{"type":"long","logicalType":"local-timestamp-micros"}}, 88 | {"name":"column2","type":"int"}, 89 | {"name":"column3","type":"boolean"}, 90 | { 91 | "name": "column4", 92 | "type": 93 | { 94 | "type": "bytes", 95 | "logicalType": "decimal", 96 | "precision": 16, 97 | "scale": 3 98 | } 99 | }, 100 | { 101 | "name": "column5", 102 | "type": 103 | { 104 | "type": "bytes", 105 | "logicalType": "decimal", 106 | "precision": 5 107 | } 108 | }, 109 | {"name":"column6","type":"bytes"}, 110 | {"name":"column7","type":"bytes"}, 111 | {"name":"id","type":"long"}, 112 | {"name":"key","type":"bytes"} 113 | ] 114 | }` 115 | 116 | // TestTableAvroSchemaV3 is TestTableAvroSchemaV1 with `column4` and `column5` dropped. 117 | const TestTableAvroSchemaV3 = `{ 118 | "type": "record", 119 | "name": "%s", 120 | "fields": 121 | [ 122 | {"name":"UppercaseColumn1","type":"int"}, 123 | {"name":"column1","type":"string"}, 124 | {"name":"column101","type":{"type":"long","logicalType":"local-timestamp-micros"}}, 125 | {"name":"column2","type":"int"}, 126 | {"name":"column3","type":"boolean"}, 127 | {"name":"column6","type":"bytes"}, 128 | {"name":"column7","type":"bytes"}, 129 | {"name":"id","type":"long"}, 130 | {"name":"key","type":"bytes"} 131 | ] 132 | }` 133 | 134 | // TestTableKeyAvroSchema is the Avro schema for the test table's key column. 135 | const TestTableKeyAvroSchema = `{ 136 | "type": "record", 137 | "name": "%s", 138 | "fields": 139 | [ 140 | {"name":"id","type":"long"} 141 | ] 142 | }` 143 | 144 | // When updating this table, TestTableAvroSchemaV1 needs to be updated too. 145 | const testTableCreateQuery = ` 146 | CREATE TABLE %q ( 147 | id bigserial PRIMARY KEY, 148 | key bytea, 149 | column1 varchar(256), 150 | column2 integer, 151 | column3 boolean, 152 | column4 numeric(16,3), 153 | column5 numeric(5), 154 | column6 jsonb, 155 | column7 json, 156 | "UppercaseColumn1" integer 157 | )` 158 | 159 | type Querier interface { 160 | Exec(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) 161 | Query(ctx context.Context, sql string, args ...any) (pgx.Rows, error) 162 | QueryRow(ctx context.Context, sql string, args ...any) pgx.Row 163 | } 164 | 165 | func ConnectPool(ctx context.Context, t *testing.T, connString string) *pgxpool.Pool { 166 | is := is.New(t) 167 | pool, err := cpool.New(ctx, connString) 168 | is.NoErr(err) 169 | t.Cleanup(func() { 170 | // close connection with fresh context 171 | is := is.New(t) 172 | is.NoErr(csync.RunTimeout(context.Background(), pool.Close, time.Second*10)) 173 | }) 174 | return pool 175 | } 176 | 177 | func ConnectSimple(ctx context.Context, t *testing.T, connString string) *pgx.Conn { 178 | is := is.New(t) 179 | pool := ConnectPool(ctx, t, connString) 180 | conn, err := pool.Acquire(ctx) 181 | is.NoErr(err) 182 | t.Cleanup(func() { 183 | conn.Release() 184 | }) 185 | return conn.Conn() 186 | } 187 | 188 | // SetupTestTable creates a new table and returns its name. 189 | func SetupEmptyTestTable(ctx context.Context, t *testing.T, conn Querier) string { 190 | table := RandomIdentifier(t) 191 | SetupEmptyTestTableWithName(ctx, t, conn, table) 192 | return table 193 | } 194 | 195 | func SetupEmptyTestTableWithName(ctx context.Context, t *testing.T, conn Querier, table string) { 196 | is := is.New(t) 197 | 198 | query := fmt.Sprintf(testTableCreateQuery, table) 199 | _, err := conn.Exec(ctx, query) 200 | is.NoErr(err) 201 | 202 | t.Cleanup(func() { 203 | query := `DROP TABLE %q` 204 | query = fmt.Sprintf(query, table) 205 | _, err := conn.Exec(context.Background(), query) 206 | is.NoErr(err) 207 | }) 208 | } 209 | 210 | func SetupTestTableWithName(ctx context.Context, t *testing.T, conn Querier, table string) { 211 | is := is.New(t) 212 | SetupEmptyTestTableWithName(ctx, t, conn, table) 213 | 214 | query := ` 215 | INSERT INTO %q (key, column1, column2, column3, column4, column5, column6, column7, "UppercaseColumn1") 216 | VALUES ('1', 'foo', 123, false, 12.2, 4, '{"foo": "bar"}', '{"foo": "baz"}', 1), 217 | ('2', 'bar', 456, true, 13.42, 8, '{"foo": "bar"}', '{"foo": "baz"}', 2), 218 | ('3', 'baz', 789, false, null, 9, '{"foo": "bar"}', '{"foo": "baz"}', 3), 219 | ('4', null, null, null, 91.1, null, null, null, null)` 220 | query = fmt.Sprintf(query, table) 221 | _, err := conn.Exec(ctx, query) 222 | is.NoErr(err) 223 | } 224 | 225 | // SetupTestTable creates a new table and returns its name. 226 | func SetupTestTable(ctx context.Context, t *testing.T, conn Querier) string { 227 | table := RandomIdentifier(t) 228 | SetupTestTableWithName(ctx, t, conn, table) 229 | return table 230 | } 231 | 232 | func CreateReplicationSlot(t *testing.T, conn Querier, slotName string) { 233 | is := is.New(t) 234 | 235 | _, err := conn.Exec( 236 | context.Background(), 237 | "SELECT pg_create_logical_replication_slot($1, $2)", 238 | slotName, 239 | "pgoutput", 240 | ) 241 | is.NoErr(err) 242 | 243 | t.Cleanup(func() { 244 | _, err := conn.Exec( 245 | context.Background(), 246 | "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name=$1", 247 | slotName, 248 | ) 249 | is.NoErr(err) 250 | }) 251 | } 252 | 253 | func CreatePublication(t *testing.T, conn Querier, pubName string, tables []string) { 254 | is := is.New(t) 255 | 256 | quotedTables := make([]string, 0, len(tables)) 257 | for _, t := range tables { 258 | // don't use internal.WrapSQLIdent to prevent import cycle 259 | quotedTables = append(quotedTables, strconv.Quote(t)) 260 | } 261 | 262 | _, err := conn.Exec( 263 | context.Background(), 264 | fmt.Sprintf("CREATE PUBLICATION %s FOR TABLE %s", pubName, strings.Join(quotedTables, ",")), 265 | ) 266 | is.NoErr(err) 267 | 268 | t.Cleanup(func() { 269 | _, err := conn.Exec(context.Background(), fmt.Sprintf("DROP PUBLICATION IF EXISTS %q", pubName)) 270 | is.NoErr(err) 271 | }) 272 | } 273 | 274 | func RandomIdentifier(t *testing.T) string { 275 | return fmt.Sprintf("conduit_%v_%d", 276 | strings.ReplaceAll(strings.ToLower(t.Name()), "/", "_"), 277 | time.Now().UnixMicro()%1000) 278 | } 279 | 280 | func IsPgError(is *is.I, err error, wantCode string) { 281 | is.True(err != nil) 282 | var pgerr *pgconn.PgError 283 | ok := errors.As(err, &pgerr) 284 | is.True(ok) // expected err to be a *pgconn.PgError 285 | is.Equal(pgerr.Code, wantCode) 286 | } 287 | 288 | func Context(t *testing.T) context.Context { 289 | ctx := context.Background() 290 | if testing.Short() || !testing.Verbose() { 291 | return ctx 292 | } 293 | 294 | writer := zerolog.NewTestWriter(t) 295 | logger := zerolog.New(writer).Level(zerolog.InfoLevel) 296 | return logger.WithContext(ctx) 297 | } 298 | --------------------------------------------------------------------------------